{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 56253, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017776829680194833, "grad_norm": 21.56465601551301, "learning_rate": 3.5549235691432637e-07, "loss": 5.6532, "mean_abs_error": 2081.930504302646, "mean_abs_error_last_10": 1157.3236769115358, "mean_abs_error_last_25": 1311.5341069792416, "mean_abs_error_last_50": 1568.227565926835, "mean_pred_prob": 0.002759764289908162, "mean_pred_prob_last_10": 0.023155727088578715, "mean_pred_prob_last_25": 0.01020040972850893, "mean_pred_prob_last_50": 0.00535183289705401, "mean_token_accuracy": 0.8810494542121887, "step": 10 }, { "epoch": 0.00035553659360389666, "grad_norm": 19.390104793161274, "learning_rate": 7.109847138286527e-07, "loss": 5.7015, "mean_abs_error": 2331.858378250916, "mean_abs_error_last_10": 1016.6854896823843, "mean_abs_error_last_25": 1236.033771568496, "mean_abs_error_last_50": 1601.2208400047982, "mean_pred_prob": 0.002933594050455213, "mean_pred_prob_last_10": 0.023077253372412086, "mean_pred_prob_last_25": 0.010409332568406171, "mean_pred_prob_last_50": 0.005605366138070167, "mean_token_accuracy": 0.866948938369751, "step": 20 }, { "epoch": 0.000533304890405845, "grad_norm": 19.537250764828293, "learning_rate": 1.066477070742979e-06, "loss": 5.596, "mean_abs_error": 1630.6817071249727, "mean_abs_error_last_10": 163.892469402574, "mean_abs_error_last_25": 408.38223736009803, "mean_abs_error_last_50": 815.8360374823228, "mean_pred_prob": 0.00160296713875141, "mean_pred_prob_last_10": 0.01328270505182445, "mean_pred_prob_last_25": 0.00585737592773512, "mean_pred_prob_last_50": 0.0030927147774491457, "mean_token_accuracy": 0.8687206745147705, "step": 30 }, { "epoch": 0.0007110731872077933, "grad_norm": 15.281145801407135, "learning_rate": 1.4219694276573055e-06, "loss": 5.1245, "mean_abs_error": 887.3248161105614, "mean_abs_error_last_10": 89.2844296659351, "mean_abs_error_last_25": 222.26824116276686, "mean_abs_error_last_50": 443.91935799607893, "mean_pred_prob": 0.003571953947539441, "mean_pred_prob_last_10": 0.028344261180609464, "mean_pred_prob_last_25": 0.01279921995010227, "mean_pred_prob_last_50": 0.006825145054608584, "mean_token_accuracy": 0.8650642931461334, "step": 40 }, { "epoch": 0.0008888414840097417, "grad_norm": 7.966682168195539, "learning_rate": 1.7774617845716316e-06, "loss": 4.6873, "mean_abs_error": 1594.4163063429855, "mean_abs_error_last_10": 406.16588049997097, "mean_abs_error_last_25": 604.1472158461409, "mean_abs_error_last_50": 934.134631210899, "mean_pred_prob": 0.003209469100329443, "mean_pred_prob_last_10": 0.023389141393272438, "mean_pred_prob_last_25": 0.010984378527064109, "mean_pred_prob_last_50": 0.006039442555629648, "mean_token_accuracy": 0.8634457230567932, "step": 50 }, { "epoch": 0.00106660978081169, "grad_norm": 5.988517943854701, "learning_rate": 2.132954141485958e-06, "loss": 4.3271, "mean_abs_error": 554.5652841537153, "mean_abs_error_last_10": 55.19130860904731, "mean_abs_error_last_25": 137.92881631434528, "mean_abs_error_last_50": 277.05559562897076, "mean_pred_prob": 0.004954178433399648, "mean_pred_prob_last_10": 0.03502678368240595, "mean_pred_prob_last_25": 0.016687263059429823, "mean_pred_prob_last_50": 0.009234924148768187, "mean_token_accuracy": 0.8757996201515198, "step": 60 }, { "epoch": 0.0012443780776136384, "grad_norm": 4.277215585880209, "learning_rate": 2.4884464984002846e-06, "loss": 3.9224, "mean_abs_error": 2270.964285749419, "mean_abs_error_last_10": 832.3256008760014, "mean_abs_error_last_25": 1071.1693471641124, "mean_abs_error_last_50": 1471.4194552943668, "mean_pred_prob": 0.003042354926947155, "mean_pred_prob_last_10": 0.020789048136794008, "mean_pred_prob_last_25": 0.010093293342652031, "mean_pred_prob_last_50": 0.005617142186383717, "mean_token_accuracy": 0.8811011493206025, "step": 70 }, { "epoch": 0.0014221463744155866, "grad_norm": 2.8890857217690917, "learning_rate": 2.843938855314611e-06, "loss": 3.7098, "mean_abs_error": 2408.0559958019453, "mean_abs_error_last_10": 1146.5425775456463, "mean_abs_error_last_25": 1356.1794702093741, "mean_abs_error_last_50": 1706.560472583419, "mean_pred_prob": 0.004582271141953242, "mean_pred_prob_last_10": 0.02981863746572344, "mean_pred_prob_last_25": 0.014866494131274522, "mean_pred_prob_last_50": 0.008361453330508084, "mean_token_accuracy": 0.8666705131530762, "step": 80 }, { "epoch": 0.0015999146712175351, "grad_norm": 3.1269560717264895, "learning_rate": 3.1994312122289373e-06, "loss": 3.4749, "mean_abs_error": 563.815353528223, "mean_abs_error_last_10": 53.75586388121461, "mean_abs_error_last_25": 137.41359926908646, "mean_abs_error_last_50": 278.9239452310151, "mean_pred_prob": 0.005402373056858778, "mean_pred_prob_last_10": 0.036521103046834466, "mean_pred_prob_last_25": 0.01779519806150347, "mean_pred_prob_last_50": 0.00994066207204014, "mean_token_accuracy": 0.8727048158645629, "step": 90 }, { "epoch": 0.0017776829680194834, "grad_norm": 3.098700348181734, "learning_rate": 3.5549235691432632e-06, "loss": 3.4684, "mean_abs_error": 1113.5225115459375, "mean_abs_error_last_10": 104.59440122105306, "mean_abs_error_last_25": 270.4955315247438, "mean_abs_error_last_50": 551.2043810916613, "mean_pred_prob": 0.0027408788446336985, "mean_pred_prob_last_10": 0.019899966102093457, "mean_pred_prob_last_25": 0.00928574469871819, "mean_pred_prob_last_50": 0.0050865999888628725, "mean_token_accuracy": 0.8686346590518952, "step": 100 }, { "epoch": 0.0019554512648214317, "grad_norm": 2.84244378600975, "learning_rate": 3.91041592605759e-06, "loss": 3.3622, "mean_abs_error": 852.7746886771077, "mean_abs_error_last_10": 159.50290589466456, "mean_abs_error_last_25": 274.06680333881934, "mean_abs_error_last_50": 465.46278343840567, "mean_pred_prob": 0.008541963775496698, "mean_pred_prob_last_10": 0.051231324579566716, "mean_pred_prob_last_25": 0.02653015477117151, "mean_pred_prob_last_50": 0.015352774692291859, "mean_token_accuracy": 0.8642103552818299, "step": 110 }, { "epoch": 0.00213321956162338, "grad_norm": 2.316967581285157, "learning_rate": 4.265908282971916e-06, "loss": 3.252, "mean_abs_error": 947.6977126581376, "mean_abs_error_last_10": 90.91843562556595, "mean_abs_error_last_25": 229.27730026968294, "mean_abs_error_last_50": 466.73268013583873, "mean_pred_prob": 0.007869397418107837, "mean_pred_prob_last_10": 0.047728861961513755, "mean_pred_prob_last_25": 0.024681796948425472, "mean_pred_prob_last_50": 0.014266024064272641, "mean_token_accuracy": 0.8657439291477204, "step": 120 }, { "epoch": 0.0023109878584253282, "grad_norm": 2.713930312886545, "learning_rate": 4.621400639886243e-06, "loss": 3.1622, "mean_abs_error": 2340.4804498435187, "mean_abs_error_last_10": 1045.4872888180887, "mean_abs_error_last_25": 1259.426028761437, "mean_abs_error_last_50": 1617.9618096712077, "mean_pred_prob": 0.004003110169287538, "mean_pred_prob_last_10": 0.026776817531936104, "mean_pred_prob_last_25": 0.013081176747073186, "mean_pred_prob_last_50": 0.007342003330631996, "mean_token_accuracy": 0.88228999376297, "step": 130 }, { "epoch": 0.0024887561552272767, "grad_norm": 2.24940326682105, "learning_rate": 4.976892996800569e-06, "loss": 3.1445, "mean_abs_error": 1554.4139677204678, "mean_abs_error_last_10": 475.9274326623939, "mean_abs_error_last_25": 649.9142714616231, "mean_abs_error_last_50": 946.1156457071835, "mean_pred_prob": 0.00484159383631777, "mean_pred_prob_last_10": 0.03187266457171063, "mean_pred_prob_last_25": 0.01558424612157978, "mean_pred_prob_last_50": 0.008827065545483492, "mean_token_accuracy": 0.875438779592514, "step": 140 }, { "epoch": 0.0026665244520292252, "grad_norm": 2.021468840924213, "learning_rate": 5.3323853537148955e-06, "loss": 3.059, "mean_abs_error": 1323.8264396682525, "mean_abs_error_last_10": 463.26673877245537, "mean_abs_error_last_25": 600.4225849922701, "mean_abs_error_last_50": 838.7385045962199, "mean_pred_prob": 0.007110913134238217, "mean_pred_prob_last_10": 0.04540177144590416, "mean_pred_prob_last_25": 0.022780304442858324, "mean_pred_prob_last_50": 0.012907711106527131, "mean_token_accuracy": 0.8784827530384064, "step": 150 }, { "epoch": 0.0028442927488311733, "grad_norm": 2.087439137995113, "learning_rate": 5.687877710629222e-06, "loss": 3.0167, "mean_abs_error": 519.1295712345476, "mean_abs_error_last_10": 46.18892610376101, "mean_abs_error_last_25": 121.11355506097884, "mean_abs_error_last_50": 247.41578262908624, "mean_pred_prob": 0.0072121144970878955, "mean_pred_prob_last_10": 0.04582751411944628, "mean_pred_prob_last_25": 0.023026558570563793, "mean_pred_prob_last_50": 0.013130010105669498, "mean_token_accuracy": 0.879948514699936, "step": 160 }, { "epoch": 0.0030220610456331218, "grad_norm": 2.474292669902697, "learning_rate": 6.043370067543548e-06, "loss": 2.9572, "mean_abs_error": 519.0617908486936, "mean_abs_error_last_10": 47.967708388257755, "mean_abs_error_last_25": 118.3963391404493, "mean_abs_error_last_50": 247.32471780304658, "mean_pred_prob": 0.010639345296658576, "mean_pred_prob_last_10": 0.06512961629778147, "mean_pred_prob_last_25": 0.03337220004759729, "mean_pred_prob_last_50": 0.019152704742737114, "mean_token_accuracy": 0.867685079574585, "step": 170 }, { "epoch": 0.0031998293424350703, "grad_norm": 1.9731034713792588, "learning_rate": 6.3988624244578746e-06, "loss": 2.8837, "mean_abs_error": 1077.2163939622285, "mean_abs_error_last_10": 134.0421013471074, "mean_abs_error_last_25": 282.72481655740296, "mean_abs_error_last_50": 541.711447135665, "mean_pred_prob": 0.0057909313865820876, "mean_pred_prob_last_10": 0.03707271659513935, "mean_pred_prob_last_25": 0.018516538810217753, "mean_pred_prob_last_50": 0.01052040989161469, "mean_token_accuracy": 0.871594500541687, "step": 180 }, { "epoch": 0.0033775976392370183, "grad_norm": 1.834144424617192, "learning_rate": 6.7543547813722e-06, "loss": 2.8801, "mean_abs_error": 968.8608989938524, "mean_abs_error_last_10": 89.53896942123839, "mean_abs_error_last_25": 222.7473529108106, "mean_abs_error_last_50": 462.6986547223813, "mean_pred_prob": 0.006918796454556286, "mean_pred_prob_last_10": 0.04323723413981497, "mean_pred_prob_last_25": 0.021866503660567103, "mean_pred_prob_last_50": 0.012507545913103968, "mean_token_accuracy": 0.8680060863494873, "step": 190 }, { "epoch": 0.003555365936038967, "grad_norm": 1.9266826811814308, "learning_rate": 7.1098471382865265e-06, "loss": 2.7836, "mean_abs_error": 605.1713673747008, "mean_abs_error_last_10": 55.428564695277885, "mean_abs_error_last_25": 133.59519076286008, "mean_abs_error_last_50": 286.18696280215056, "mean_pred_prob": 0.008529906393960118, "mean_pred_prob_last_10": 0.053480475768446924, "mean_pred_prob_last_25": 0.02725484096445143, "mean_pred_prob_last_50": 0.015520522836595774, "mean_token_accuracy": 0.8688653945922852, "step": 200 }, { "epoch": 0.0037331342328409153, "grad_norm": 2.549215339704598, "learning_rate": 7.465339495200853e-06, "loss": 2.6775, "mean_abs_error": 592.7766510044673, "mean_abs_error_last_10": 56.72517736674031, "mean_abs_error_last_25": 140.28949215626764, "mean_abs_error_last_50": 283.5430666355529, "mean_pred_prob": 0.00758981672115624, "mean_pred_prob_last_10": 0.048552744835615155, "mean_pred_prob_last_25": 0.024158354150131345, "mean_pred_prob_last_50": 0.013694529072381556, "mean_token_accuracy": 0.8840866684913635, "step": 210 }, { "epoch": 0.003910902529642863, "grad_norm": 1.8059758807583652, "learning_rate": 7.82083185211518e-06, "loss": 2.6538, "mean_abs_error": 1054.3440104030074, "mean_abs_error_last_10": 94.45444507154149, "mean_abs_error_last_25": 243.2002735319114, "mean_abs_error_last_50": 508.20025311013615, "mean_pred_prob": 0.006361911119893193, "mean_pred_prob_last_10": 0.041475206660106775, "mean_pred_prob_last_25": 0.020599451567977666, "mean_pred_prob_last_50": 0.011568409134633839, "mean_token_accuracy": 0.8806471645832061, "step": 220 }, { "epoch": 0.004088670826444812, "grad_norm": 1.9816377736530213, "learning_rate": 8.176324209029507e-06, "loss": 2.6384, "mean_abs_error": 606.0201641779822, "mean_abs_error_last_10": 54.662685245633746, "mean_abs_error_last_25": 137.82832808947424, "mean_abs_error_last_50": 288.25882409896775, "mean_pred_prob": 0.008235820359550417, "mean_pred_prob_last_10": 0.05320109324529767, "mean_pred_prob_last_25": 0.026414087787270547, "mean_pred_prob_last_50": 0.01494171239901334, "mean_token_accuracy": 0.879311341047287, "step": 230 }, { "epoch": 0.00426643912324676, "grad_norm": 2.15076737273257, "learning_rate": 8.531816565943833e-06, "loss": 2.6638, "mean_abs_error": 482.5770697923057, "mean_abs_error_last_10": 47.47723716547646, "mean_abs_error_last_25": 113.67982898247267, "mean_abs_error_last_50": 234.54130040187016, "mean_pred_prob": 0.010570207727141678, "mean_pred_prob_last_10": 0.06628330275416375, "mean_pred_prob_last_25": 0.03331974460743368, "mean_pred_prob_last_50": 0.019061604421585797, "mean_token_accuracy": 0.8719656527042389, "step": 240 }, { "epoch": 0.004444207420048709, "grad_norm": 1.85653200672292, "learning_rate": 8.887308922858158e-06, "loss": 2.6545, "mean_abs_error": 1225.023372283465, "mean_abs_error_last_10": 352.1355008619001, "mean_abs_error_last_25": 491.07527167166, "mean_abs_error_last_50": 731.5074688973834, "mean_pred_prob": 0.011778680424322374, "mean_pred_prob_last_10": 0.07061846865399275, "mean_pred_prob_last_25": 0.035995377041399476, "mean_pred_prob_last_50": 0.020935313501104245, "mean_token_accuracy": 0.8574804723262787, "step": 250 }, { "epoch": 0.0046219757168506565, "grad_norm": 1.8440577591195355, "learning_rate": 9.242801279772485e-06, "loss": 2.5222, "mean_abs_error": 896.5009297049615, "mean_abs_error_last_10": 88.01842564265499, "mean_abs_error_last_25": 206.83091128968394, "mean_abs_error_last_50": 424.759403602191, "mean_pred_prob": 0.005963793327100575, "mean_pred_prob_last_10": 0.03918723408132792, "mean_pred_prob_last_25": 0.019235433731228112, "mean_pred_prob_last_50": 0.010863313917070627, "mean_token_accuracy": 0.8847413301467896, "step": 260 }, { "epoch": 0.004799744013652605, "grad_norm": 2.1632354215405405, "learning_rate": 9.598293636686811e-06, "loss": 2.4951, "mean_abs_error": 1473.5935476041427, "mean_abs_error_last_10": 505.3843147141253, "mean_abs_error_last_25": 657.9740605100309, "mean_abs_error_last_50": 924.0970524103335, "mean_pred_prob": 0.009516757843084634, "mean_pred_prob_last_10": 0.05872288571117679, "mean_pred_prob_last_25": 0.030045847074507036, "mean_pred_prob_last_50": 0.017148313058714847, "mean_token_accuracy": 0.8790044784545898, "step": 270 }, { "epoch": 0.0049775123104545535, "grad_norm": 1.9464255458074122, "learning_rate": 9.953785993601138e-06, "loss": 2.4955, "mean_abs_error": 867.9072044814945, "mean_abs_error_last_10": 88.49641558782135, "mean_abs_error_last_25": 205.57073093337013, "mean_abs_error_last_50": 413.2507368239817, "mean_pred_prob": 0.007027860882226377, "mean_pred_prob_last_10": 0.04583239033818245, "mean_pred_prob_last_25": 0.022582991002127527, "mean_pred_prob_last_50": 0.01277509715873748, "mean_token_accuracy": 0.8592814564704895, "step": 280 }, { "epoch": 0.005155280607256502, "grad_norm": 2.1902870818521927, "learning_rate": 1.0309278350515464e-05, "loss": 2.5737, "mean_abs_error": 1049.5598979076478, "mean_abs_error_last_10": 410.9816318680594, "mean_abs_error_last_25": 514.1152779887536, "mean_abs_error_last_50": 682.9086101784495, "mean_pred_prob": 0.009537877995171584, "mean_pred_prob_last_10": 0.062288162637560165, "mean_pred_prob_last_25": 0.03073213312018197, "mean_pred_prob_last_50": 0.017333480378874812, "mean_token_accuracy": 0.8578595221042633, "step": 290 }, { "epoch": 0.0053330489040584505, "grad_norm": 2.023616948168538, "learning_rate": 1.0664770707429791e-05, "loss": 2.5006, "mean_abs_error": 1018.2732846886818, "mean_abs_error_last_10": 263.52696070070675, "mean_abs_error_last_25": 385.1718874558384, "mean_abs_error_last_50": 591.3079564312234, "mean_pred_prob": 0.01022104129078798, "mean_pred_prob_last_10": 0.06475108017912135, "mean_pred_prob_last_25": 0.032355661343899556, "mean_pred_prob_last_50": 0.01849092512857169, "mean_token_accuracy": 0.8653875529766083, "step": 300 }, { "epoch": 0.005510817200860399, "grad_norm": 1.5824472073838847, "learning_rate": 1.1020263064344118e-05, "loss": 2.4733, "mean_abs_error": 188.321368615913, "mean_abs_error_last_10": 22.926868093662687, "mean_abs_error_last_25": 45.33684987286709, "mean_abs_error_last_50": 87.96577154538203, "mean_pred_prob": 0.020238003646954893, "mean_pred_prob_last_10": 0.1154051311314106, "mean_pred_prob_last_25": 0.06137069426476956, "mean_pred_prob_last_50": 0.03588896775618196, "mean_token_accuracy": 0.8693340182304382, "step": 310 }, { "epoch": 0.005688585497662347, "grad_norm": 1.6989605443186473, "learning_rate": 1.1375755421258444e-05, "loss": 2.3984, "mean_abs_error": 1291.3093896374912, "mean_abs_error_last_10": 852.7141162677171, "mean_abs_error_last_25": 925.2561349375512, "mean_abs_error_last_50": 1045.3754282327636, "mean_pred_prob": 0.019090815443269092, "mean_pred_prob_last_10": 0.11415900266365497, "mean_pred_prob_last_25": 0.059097370497329396, "mean_pred_prob_last_50": 0.03424374687456293, "mean_token_accuracy": 0.8708861231803894, "step": 320 }, { "epoch": 0.005866353794464295, "grad_norm": 2.870462274956278, "learning_rate": 1.173124777817277e-05, "loss": 2.393, "mean_abs_error": 1141.2222462478403, "mean_abs_error_last_10": 424.7388901408364, "mean_abs_error_last_25": 530.8500481425924, "mean_abs_error_last_50": 726.1258068269432, "mean_pred_prob": 0.00950929329628707, "mean_pred_prob_last_10": 0.06237069674825761, "mean_pred_prob_last_25": 0.030604067773674616, "mean_pred_prob_last_50": 0.017304870058433154, "mean_token_accuracy": 0.8719798266887665, "step": 330 }, { "epoch": 0.0060441220912662436, "grad_norm": 2.254076071294425, "learning_rate": 1.2086740135087096e-05, "loss": 2.4094, "mean_abs_error": 870.8997807174743, "mean_abs_error_last_10": 84.99280820293559, "mean_abs_error_last_25": 201.06173781807695, "mean_abs_error_last_50": 416.59079048428686, "mean_pred_prob": 0.00954373626736924, "mean_pred_prob_last_10": 0.06116091702133417, "mean_pred_prob_last_25": 0.030483981315046548, "mean_pred_prob_last_50": 0.01728116683661938, "mean_token_accuracy": 0.8579321563243866, "step": 340 }, { "epoch": 0.006221890388068192, "grad_norm": 2.2121616723973574, "learning_rate": 1.2442232492001422e-05, "loss": 2.4264, "mean_abs_error": 1915.7988978909991, "mean_abs_error_last_10": 632.0489229044824, "mean_abs_error_last_25": 829.5695665735723, "mean_abs_error_last_50": 1172.600997542209, "mean_pred_prob": 0.004584001460170839, "mean_pred_prob_last_10": 0.03132441192719852, "mean_pred_prob_last_25": 0.015027827540325233, "mean_pred_prob_last_50": 0.008384643621684517, "mean_token_accuracy": 0.863148307800293, "step": 350 }, { "epoch": 0.0063996586848701405, "grad_norm": 1.6185845787590674, "learning_rate": 1.2797724848915749e-05, "loss": 2.3644, "mean_abs_error": 1227.3584209497938, "mean_abs_error_last_10": 294.82510281824364, "mean_abs_error_last_25": 437.3302467028132, "mean_abs_error_last_50": 690.5316576159037, "mean_pred_prob": 0.008961939698201605, "mean_pred_prob_last_10": 0.05789901504758745, "mean_pred_prob_last_25": 0.0286689261556603, "mean_pred_prob_last_50": 0.016233670827932655, "mean_token_accuracy": 0.8736794292926788, "step": 360 }, { "epoch": 0.006577426981672088, "grad_norm": 2.3715264089006163, "learning_rate": 1.3153217205830076e-05, "loss": 2.3005, "mean_abs_error": 959.5934502469594, "mean_abs_error_last_10": 87.31838072808443, "mean_abs_error_last_25": 212.26112947752708, "mean_abs_error_last_50": 447.12665270216894, "mean_pred_prob": 0.013704229635186494, "mean_pred_prob_last_10": 0.08107962347567081, "mean_pred_prob_last_25": 0.04217994878999889, "mean_pred_prob_last_50": 0.024424228374846278, "mean_token_accuracy": 0.8772959232330322, "step": 370 }, { "epoch": 0.006755195278474037, "grad_norm": 1.7041309593519778, "learning_rate": 1.35087095627444e-05, "loss": 2.3115, "mean_abs_error": 1828.3378898676879, "mean_abs_error_last_10": 1039.4118543423663, "mean_abs_error_last_25": 1167.8939790116385, "mean_abs_error_last_50": 1384.1669522726897, "mean_pred_prob": 0.012250717550341506, "mean_pred_prob_last_10": 0.07694679126943811, "mean_pred_prob_last_25": 0.03870245263606194, "mean_pred_prob_last_50": 0.022147155855054734, "mean_token_accuracy": 0.8747943878173828, "step": 380 }, { "epoch": 0.006932963575275985, "grad_norm": 1.6658708611708162, "learning_rate": 1.3864201919658729e-05, "loss": 2.2997, "mean_abs_error": 667.3213676162903, "mean_abs_error_last_10": 67.38474132605934, "mean_abs_error_last_25": 147.95493424888141, "mean_abs_error_last_50": 303.34295559123166, "mean_pred_prob": 0.016221640154253692, "mean_pred_prob_last_10": 0.09169129505753518, "mean_pred_prob_last_25": 0.04829374472610652, "mean_pred_prob_last_50": 0.0287384559167549, "mean_token_accuracy": 0.8688858926296235, "step": 390 }, { "epoch": 0.007110731872077934, "grad_norm": 2.528365109158805, "learning_rate": 1.4219694276573053e-05, "loss": 2.363, "mean_abs_error": 1496.8946833166272, "mean_abs_error_last_10": 428.64551753295217, "mean_abs_error_last_25": 584.0081286456327, "mean_abs_error_last_50": 882.3138415627991, "mean_pred_prob": 0.008175968012074009, "mean_pred_prob_last_10": 0.05281105618923902, "mean_pred_prob_last_25": 0.02615624858008232, "mean_pred_prob_last_50": 0.014828649224364198, "mean_token_accuracy": 0.8609509706497193, "step": 400 }, { "epoch": 0.007288500168879882, "grad_norm": 2.871326451610253, "learning_rate": 1.4575186633487382e-05, "loss": 2.2303, "mean_abs_error": 458.22575574560005, "mean_abs_error_last_10": 56.62846149733025, "mean_abs_error_last_25": 108.83272785840254, "mean_abs_error_last_50": 215.2901743121704, "mean_pred_prob": 0.014735384215600788, "mean_pred_prob_last_10": 0.08800930930301547, "mean_pred_prob_last_25": 0.045208982285112144, "mean_pred_prob_last_50": 0.026297000609338282, "mean_token_accuracy": 0.8817260265350342, "step": 410 }, { "epoch": 0.007466268465681831, "grad_norm": 4.293971714865189, "learning_rate": 1.4930678990401706e-05, "loss": 2.3006, "mean_abs_error": 904.625557150116, "mean_abs_error_last_10": 166.8513990643067, "mean_abs_error_last_25": 272.91532799125497, "mean_abs_error_last_50": 482.4395537741696, "mean_pred_prob": 0.012956322419631761, "mean_pred_prob_last_10": 0.07518868602346629, "mean_pred_prob_last_25": 0.03964248215779662, "mean_pred_prob_last_50": 0.02303887490706984, "mean_token_accuracy": 0.8701554834842682, "step": 420 }, { "epoch": 0.007644036762483778, "grad_norm": 1.9843409498303985, "learning_rate": 1.5286171347316035e-05, "loss": 2.2477, "mean_abs_error": 586.2271861662778, "mean_abs_error_last_10": 82.57001623255475, "mean_abs_error_last_25": 144.96028829249548, "mean_abs_error_last_50": 278.3872765929403, "mean_pred_prob": 0.011252971645444632, "mean_pred_prob_last_10": 0.0703921752050519, "mean_pred_prob_last_25": 0.035518421325832605, "mean_pred_prob_last_50": 0.020311750285327435, "mean_token_accuracy": 0.8768968522548676, "step": 430 }, { "epoch": 0.007821805059285727, "grad_norm": 2.185085154792055, "learning_rate": 1.564166370423036e-05, "loss": 2.2088, "mean_abs_error": 1594.3459319047627, "mean_abs_error_last_10": 651.7444849550877, "mean_abs_error_last_25": 779.7627187836417, "mean_abs_error_last_50": 1038.1566929954201, "mean_pred_prob": 0.009119944146368653, "mean_pred_prob_last_10": 0.05795781663764501, "mean_pred_prob_last_25": 0.02891360480862204, "mean_pred_prob_last_50": 0.01643232995411381, "mean_token_accuracy": 0.8787733793258667, "step": 440 }, { "epoch": 0.007999573356087676, "grad_norm": 5.065522187860635, "learning_rate": 1.5997156061144686e-05, "loss": 2.2703, "mean_abs_error": 1481.2884143295312, "mean_abs_error_last_10": 598.734251977282, "mean_abs_error_last_25": 736.1132723517999, "mean_abs_error_last_50": 973.4173726279523, "mean_pred_prob": 0.010752669186331332, "mean_pred_prob_last_10": 0.06802239302705856, "mean_pred_prob_last_25": 0.034022045529127354, "mean_pred_prob_last_50": 0.01941067056322936, "mean_token_accuracy": 0.8670118749141693, "step": 450 }, { "epoch": 0.008177341652889624, "grad_norm": 2.4986494596555664, "learning_rate": 1.6352648418059014e-05, "loss": 2.2619, "mean_abs_error": 460.28919356913104, "mean_abs_error_last_10": 60.712548495067196, "mean_abs_error_last_25": 115.22187213094014, "mean_abs_error_last_50": 220.5640381576869, "mean_pred_prob": 0.012563385954126715, "mean_pred_prob_last_10": 0.07831333000212908, "mean_pred_prob_last_25": 0.039512136671692136, "mean_pred_prob_last_50": 0.022666459856554865, "mean_token_accuracy": 0.8731670200824737, "step": 460 }, { "epoch": 0.008355109949691571, "grad_norm": 1.3915248210067255, "learning_rate": 1.6708140774973337e-05, "loss": 2.2045, "mean_abs_error": 376.61915676975116, "mean_abs_error_last_10": 59.60849084183678, "mean_abs_error_last_25": 105.83406719832189, "mean_abs_error_last_50": 185.9217078958072, "mean_pred_prob": 0.014220494381152093, "mean_pred_prob_last_10": 0.08593862913548947, "mean_pred_prob_last_25": 0.043995227012783286, "mean_pred_prob_last_50": 0.02547692428342998, "mean_token_accuracy": 0.8758230030536651, "step": 470 }, { "epoch": 0.00853287824649352, "grad_norm": 2.0544709677676436, "learning_rate": 1.7063633131887666e-05, "loss": 2.1864, "mean_abs_error": 680.2480282137616, "mean_abs_error_last_10": 74.46104591842669, "mean_abs_error_last_25": 158.17518109073998, "mean_abs_error_last_50": 324.70978104269, "mean_pred_prob": 0.013759046082850546, "mean_pred_prob_last_10": 0.08721353262662887, "mean_pred_prob_last_25": 0.0437601042445749, "mean_pred_prob_last_50": 0.024972716136835514, "mean_token_accuracy": 0.8738494396209717, "step": 480 }, { "epoch": 0.008710646543295468, "grad_norm": 4.252265176152699, "learning_rate": 1.741912548880199e-05, "loss": 2.2936, "mean_abs_error": 1048.8857094266873, "mean_abs_error_last_10": 99.18361977346686, "mean_abs_error_last_25": 231.80062638562413, "mean_abs_error_last_50": 481.9152215500162, "mean_pred_prob": 0.008978489763103425, "mean_pred_prob_last_10": 0.05751702873967588, "mean_pred_prob_last_25": 0.02864655104931444, "mean_pred_prob_last_50": 0.016212323389481753, "mean_token_accuracy": 0.8629481732845307, "step": 490 }, { "epoch": 0.008888414840097418, "grad_norm": 2.3354421616845573, "learning_rate": 1.7774617845716317e-05, "loss": 2.2113, "mean_abs_error": 2832.3507332535273, "mean_abs_error_last_10": 1487.198689202002, "mean_abs_error_last_25": 1719.5340121198763, "mean_abs_error_last_50": 2078.2882279660666, "mean_pred_prob": 0.007081846299115568, "mean_pred_prob_last_10": 0.04641506027619471, "mean_pred_prob_last_25": 0.022763692228181753, "mean_pred_prob_last_50": 0.01288994691130938, "mean_token_accuracy": 0.8734384477138519, "step": 500 }, { "epoch": 0.009066183136899365, "grad_norm": 2.668537222462315, "learning_rate": 1.8130110202630642e-05, "loss": 2.099, "mean_abs_error": 1066.161262226587, "mean_abs_error_last_10": 385.48269678939477, "mean_abs_error_last_25": 476.8811631195966, "mean_abs_error_last_50": 665.9581368073424, "mean_pred_prob": 0.013287916274566668, "mean_pred_prob_last_10": 0.0820460313843796, "mean_pred_prob_last_25": 0.04201397840079153, "mean_pred_prob_last_50": 0.02400549318845151, "mean_token_accuracy": 0.877763545513153, "step": 510 }, { "epoch": 0.009243951433701313, "grad_norm": 1.5321504400484873, "learning_rate": 1.848560255954497e-05, "loss": 2.1217, "mean_abs_error": 920.6919958052513, "mean_abs_error_last_10": 201.46851103103418, "mean_abs_error_last_25": 308.38162159422586, "mean_abs_error_last_50": 502.38527198322345, "mean_pred_prob": 0.016040556573716457, "mean_pred_prob_last_10": 0.09016729884315282, "mean_pred_prob_last_25": 0.048219769526622255, "mean_pred_prob_last_50": 0.028219645307399333, "mean_token_accuracy": 0.8750094592571258, "step": 520 }, { "epoch": 0.009421719730503262, "grad_norm": 1.703477324700471, "learning_rate": 1.8841094916459297e-05, "loss": 2.1598, "mean_abs_error": 156.90206623056832, "mean_abs_error_last_10": 16.7736454338555, "mean_abs_error_last_25": 35.306307665205495, "mean_abs_error_last_50": 72.88596683756687, "mean_pred_prob": 0.02321328017860651, "mean_pred_prob_last_10": 0.130733310803771, "mean_pred_prob_last_25": 0.0704930204898119, "mean_pred_prob_last_50": 0.04102774877101183, "mean_token_accuracy": 0.8757313907146453, "step": 530 }, { "epoch": 0.00959948802730521, "grad_norm": 1.9506550493288948, "learning_rate": 1.9196587273373622e-05, "loss": 2.147, "mean_abs_error": 315.9528346374631, "mean_abs_error_last_10": 42.054912300964645, "mean_abs_error_last_25": 77.29161060554961, "mean_abs_error_last_50": 152.8392217409533, "mean_pred_prob": 0.016844726703129708, "mean_pred_prob_last_10": 0.10753452144563198, "mean_pred_prob_last_25": 0.05376288164407015, "mean_pred_prob_last_50": 0.03054785546846688, "mean_token_accuracy": 0.8716124296188354, "step": 540 }, { "epoch": 0.00977725632410716, "grad_norm": 2.6517380515579756, "learning_rate": 1.955207963028795e-05, "loss": 2.1054, "mean_abs_error": 1736.4480185029054, "mean_abs_error_last_10": 516.3424469484095, "mean_abs_error_last_25": 693.9998737793119, "mean_abs_error_last_50": 1026.3836638973814, "mean_pred_prob": 0.010861001965531613, "mean_pred_prob_last_10": 0.06458706780686044, "mean_pred_prob_last_25": 0.033499004881014115, "mean_pred_prob_last_50": 0.01938287666416727, "mean_token_accuracy": 0.8730883598327637, "step": 550 }, { "epoch": 0.009955024620909107, "grad_norm": 1.1866331902669858, "learning_rate": 1.9907571987202276e-05, "loss": 2.1293, "mean_abs_error": 2824.6053077952283, "mean_abs_error_last_10": 1451.4567147766115, "mean_abs_error_last_25": 1638.9384555711422, "mean_abs_error_last_50": 2012.19872956888, "mean_pred_prob": 0.007342773902564659, "mean_pred_prob_last_10": 0.048608132332446985, "mean_pred_prob_last_25": 0.023735088705871023, "mean_pred_prob_last_50": 0.01332449310211814, "mean_token_accuracy": 0.8691015720367432, "step": 560 }, { "epoch": 0.010132792917711055, "grad_norm": 3.0917291882506275, "learning_rate": 2.0263064344116602e-05, "loss": 2.1542, "mean_abs_error": 737.87469063591, "mean_abs_error_last_10": 77.34422806533105, "mean_abs_error_last_25": 168.14518384913097, "mean_abs_error_last_50": 343.54148630906286, "mean_pred_prob": 0.015261209139134735, "mean_pred_prob_last_10": 0.0947187471203506, "mean_pred_prob_last_25": 0.04794847429729998, "mean_pred_prob_last_50": 0.027447465062141418, "mean_token_accuracy": 0.8645118117332459, "step": 570 }, { "epoch": 0.010310561214513004, "grad_norm": 2.8619315576468054, "learning_rate": 2.0618556701030927e-05, "loss": 2.0733, "mean_abs_error": 164.8668915501724, "mean_abs_error_last_10": 14.095203620545693, "mean_abs_error_last_25": 33.85894573821178, "mean_abs_error_last_50": 71.47168210088118, "mean_pred_prob": 0.0190959557890892, "mean_pred_prob_last_10": 0.12050194293260574, "mean_pred_prob_last_25": 0.05989529825747013, "mean_pred_prob_last_50": 0.034393707476556304, "mean_token_accuracy": 0.8729984164237976, "step": 580 }, { "epoch": 0.010488329511314952, "grad_norm": 2.4866738424528214, "learning_rate": 2.0974049057945256e-05, "loss": 2.0644, "mean_abs_error": 1583.6528284873154, "mean_abs_error_last_10": 774.2781510434418, "mean_abs_error_last_25": 900.467127988253, "mean_abs_error_last_50": 1120.8854483315777, "mean_pred_prob": 0.01426253512763651, "mean_pred_prob_last_10": 0.08811859704583185, "mean_pred_prob_last_25": 0.04462597533565713, "mean_pred_prob_last_50": 0.025630311023269313, "mean_token_accuracy": 0.8791667282581329, "step": 590 }, { "epoch": 0.010666097808116901, "grad_norm": 2.514498740014458, "learning_rate": 2.1329541414859582e-05, "loss": 2.0625, "mean_abs_error": 2377.742488314011, "mean_abs_error_last_10": 1036.9049682603775, "mean_abs_error_last_25": 1209.1397727320116, "mean_abs_error_last_50": 1572.3164301271456, "mean_pred_prob": 0.008942610093436087, "mean_pred_prob_last_10": 0.05387141860774136, "mean_pred_prob_last_25": 0.027862814708350926, "mean_pred_prob_last_50": 0.016010001622635172, "mean_token_accuracy": 0.8608674466609955, "step": 600 }, { "epoch": 0.010843866104918849, "grad_norm": 1.2283392041930337, "learning_rate": 2.1685033771773907e-05, "loss": 2.0213, "mean_abs_error": 640.7290544237768, "mean_abs_error_last_10": 98.9267104688146, "mean_abs_error_last_25": 169.10820209633326, "mean_abs_error_last_50": 311.0738937630939, "mean_pred_prob": 0.012178101378958672, "mean_pred_prob_last_10": 0.07626647343859076, "mean_pred_prob_last_25": 0.03861610111780465, "mean_pred_prob_last_50": 0.022004452254623174, "mean_token_accuracy": 0.878367567062378, "step": 610 }, { "epoch": 0.011021634401720798, "grad_norm": 2.1697712287235293, "learning_rate": 2.2040526128688236e-05, "loss": 2.0906, "mean_abs_error": 1236.30746522545, "mean_abs_error_last_10": 335.45916390364096, "mean_abs_error_last_25": 475.97544559386796, "mean_abs_error_last_50": 719.6307064249104, "mean_pred_prob": 0.008841504625161178, "mean_pred_prob_last_10": 0.057492248716880565, "mean_pred_prob_last_25": 0.028390410376596266, "mean_pred_prob_last_50": 0.01606532171863364, "mean_token_accuracy": 0.865581351518631, "step": 620 }, { "epoch": 0.011199402698522746, "grad_norm": 1.681237836154761, "learning_rate": 2.239601848560256e-05, "loss": 2.0378, "mean_abs_error": 515.8539899589646, "mean_abs_error_last_10": 63.27866855944338, "mean_abs_error_last_25": 123.31772501892326, "mean_abs_error_last_50": 236.9312932496917, "mean_pred_prob": 0.014141409657895565, "mean_pred_prob_last_10": 0.08493936229497194, "mean_pred_prob_last_25": 0.044305666070431474, "mean_pred_prob_last_50": 0.02538998764939606, "mean_token_accuracy": 0.869401341676712, "step": 630 }, { "epoch": 0.011377170995324693, "grad_norm": 1.305072730964814, "learning_rate": 2.2751510842516887e-05, "loss": 2.0229, "mean_abs_error": 358.8225048239777, "mean_abs_error_last_10": 51.08643031868158, "mean_abs_error_last_25": 91.92472052550379, "mean_abs_error_last_50": 173.47991202125513, "mean_pred_prob": 0.01822275712620467, "mean_pred_prob_last_10": 0.10852665565907955, "mean_pred_prob_last_25": 0.05650969841517508, "mean_pred_prob_last_50": 0.03269628481939435, "mean_token_accuracy": 0.871983802318573, "step": 640 }, { "epoch": 0.011554939292126643, "grad_norm": 2.6169376261655253, "learning_rate": 2.3107003199431213e-05, "loss": 1.9642, "mean_abs_error": 1261.897388105161, "mean_abs_error_last_10": 302.0758909695843, "mean_abs_error_last_25": 441.19760429878954, "mean_abs_error_last_50": 711.4220279169816, "mean_pred_prob": 0.014144247584044933, "mean_pred_prob_last_10": 0.08660976126266177, "mean_pred_prob_last_25": 0.04378695606137626, "mean_pred_prob_last_50": 0.025358513215905987, "mean_token_accuracy": 0.8832934081554413, "step": 650 }, { "epoch": 0.01173270758892859, "grad_norm": 1.7359968702383757, "learning_rate": 2.346249555634554e-05, "loss": 2.0102, "mean_abs_error": 1250.963832702919, "mean_abs_error_last_10": 259.7844059293464, "mean_abs_error_last_25": 390.53343637234815, "mean_abs_error_last_50": 659.1934102813359, "mean_pred_prob": 0.010868448135443031, "mean_pred_prob_last_10": 0.06769845742965117, "mean_pred_prob_last_25": 0.03425572859705426, "mean_pred_prob_last_50": 0.01958601890364662, "mean_token_accuracy": 0.8774405181407928, "step": 660 }, { "epoch": 0.01191047588573054, "grad_norm": 1.4844673895443463, "learning_rate": 2.3817987913259864e-05, "loss": 2.0822, "mean_abs_error": 1146.3336744989215, "mean_abs_error_last_10": 218.77370642638977, "mean_abs_error_last_25": 341.470349061674, "mean_abs_error_last_50": 583.8155573782162, "mean_pred_prob": 0.007093241618713364, "mean_pred_prob_last_10": 0.046161491749808194, "mean_pred_prob_last_25": 0.022604218462947755, "mean_pred_prob_last_50": 0.012840600349591114, "mean_token_accuracy": 0.8695630013942719, "step": 670 }, { "epoch": 0.012088244182532487, "grad_norm": 1.8652414092504799, "learning_rate": 2.4173480270174193e-05, "loss": 2.0149, "mean_abs_error": 919.1412546203194, "mean_abs_error_last_10": 115.31818119187737, "mean_abs_error_last_25": 211.64646340183762, "mean_abs_error_last_50": 413.21387103691103, "mean_pred_prob": 0.013808391662314535, "mean_pred_prob_last_10": 0.08362286034971475, "mean_pred_prob_last_25": 0.04274397674016654, "mean_pred_prob_last_50": 0.024762386083602907, "mean_token_accuracy": 0.8698546469211579, "step": 680 }, { "epoch": 0.012266012479334435, "grad_norm": 1.4773233050053751, "learning_rate": 2.4528972627088522e-05, "loss": 1.9636, "mean_abs_error": 690.7700059683903, "mean_abs_error_last_10": 92.69090793888623, "mean_abs_error_last_25": 169.0415486978965, "mean_abs_error_last_50": 325.2813018675482, "mean_pred_prob": 0.011126081319525838, "mean_pred_prob_last_10": 0.0713454345241189, "mean_pred_prob_last_25": 0.03523978176526725, "mean_pred_prob_last_50": 0.02005343665368855, "mean_token_accuracy": 0.8714996635913849, "step": 690 }, { "epoch": 0.012443780776136384, "grad_norm": 1.4538005895962656, "learning_rate": 2.4884464984002844e-05, "loss": 2.0195, "mean_abs_error": 395.8720768716796, "mean_abs_error_last_10": 38.87219887595948, "mean_abs_error_last_25": 85.15273357055332, "mean_abs_error_last_50": 181.24616838872635, "mean_pred_prob": 0.019480216421652586, "mean_pred_prob_last_10": 0.11440928420051932, "mean_pred_prob_last_25": 0.0598274661693722, "mean_pred_prob_last_50": 0.03464309282135218, "mean_token_accuracy": 0.870962792634964, "step": 700 }, { "epoch": 0.012621549072938332, "grad_norm": 1.5612305220487113, "learning_rate": 2.523995734091717e-05, "loss": 1.9295, "mean_abs_error": 678.110013271695, "mean_abs_error_last_10": 89.38539332779234, "mean_abs_error_last_25": 154.028767317691, "mean_abs_error_last_50": 303.62155743629233, "mean_pred_prob": 0.015187637647613883, "mean_pred_prob_last_10": 0.0963128950446844, "mean_pred_prob_last_25": 0.04790309453383088, "mean_pred_prob_last_50": 0.027391615230590104, "mean_token_accuracy": 0.8830518543720245, "step": 710 }, { "epoch": 0.012799317369740281, "grad_norm": 1.2688584390483564, "learning_rate": 2.5595449697831498e-05, "loss": 1.9674, "mean_abs_error": 939.8892745199144, "mean_abs_error_last_10": 202.70112614721629, "mean_abs_error_last_25": 306.94186030753116, "mean_abs_error_last_50": 499.7639180073643, "mean_pred_prob": 0.01666767657879973, "mean_pred_prob_last_10": 0.10212612657924183, "mean_pred_prob_last_25": 0.051664927767706106, "mean_pred_prob_last_50": 0.029863061418291183, "mean_token_accuracy": 0.8744579315185547, "step": 720 }, { "epoch": 0.012977085666542229, "grad_norm": 2.273706554821732, "learning_rate": 2.5950942054745824e-05, "loss": 1.9692, "mean_abs_error": 563.7449537123586, "mean_abs_error_last_10": 75.87553948350245, "mean_abs_error_last_25": 138.76842203430436, "mean_abs_error_last_50": 269.3107981130676, "mean_pred_prob": 0.017579700570786373, "mean_pred_prob_last_10": 0.10835173162631691, "mean_pred_prob_last_25": 0.05517339418875054, "mean_pred_prob_last_50": 0.03158061256399378, "mean_token_accuracy": 0.872285521030426, "step": 730 }, { "epoch": 0.013154853963344176, "grad_norm": 1.7170628871121811, "learning_rate": 2.6306434411660153e-05, "loss": 1.9494, "mean_abs_error": 890.1963770639002, "mean_abs_error_last_10": 133.4123112099955, "mean_abs_error_last_25": 238.53386630338363, "mean_abs_error_last_50": 438.1370283780311, "mean_pred_prob": 0.010980807233136148, "mean_pred_prob_last_10": 0.06759763637091964, "mean_pred_prob_last_25": 0.034268400724977256, "mean_pred_prob_last_50": 0.01966385202249512, "mean_token_accuracy": 0.8721384108066559, "step": 740 }, { "epoch": 0.013332622260146126, "grad_norm": 2.4992781279775325, "learning_rate": 2.6661926768574475e-05, "loss": 1.9376, "mean_abs_error": 1194.1783397195616, "mean_abs_error_last_10": 152.45238396943967, "mean_abs_error_last_25": 286.1775200919947, "mean_abs_error_last_50": 570.8372731116503, "mean_pred_prob": 0.012864462664583697, "mean_pred_prob_last_10": 0.07518419970292597, "mean_pred_prob_last_25": 0.03906068851938471, "mean_pred_prob_last_50": 0.02280809109797701, "mean_token_accuracy": 0.8736965298652649, "step": 750 }, { "epoch": 0.013510390556948073, "grad_norm": 3.04393825523289, "learning_rate": 2.70174191254888e-05, "loss": 1.9668, "mean_abs_error": 1102.99161988572, "mean_abs_error_last_10": 535.4727590053774, "mean_abs_error_last_25": 628.8129480085584, "mean_abs_error_last_50": 776.7344661592607, "mean_pred_prob": 0.018029058331012492, "mean_pred_prob_last_10": 0.10830854249652475, "mean_pred_prob_last_25": 0.055749533871130554, "mean_pred_prob_last_50": 0.03227875068623689, "mean_token_accuracy": 0.8730711162090301, "step": 760 }, { "epoch": 0.013688158853750023, "grad_norm": 1.2901088578059605, "learning_rate": 2.737291148240313e-05, "loss": 1.9694, "mean_abs_error": 198.82654893313025, "mean_abs_error_last_10": 35.796612515799026, "mean_abs_error_last_25": 52.94233634208625, "mean_abs_error_last_50": 102.02554708290937, "mean_pred_prob": 0.017076343251392245, "mean_pred_prob_last_10": 0.10577074326574802, "mean_pred_prob_last_25": 0.05347934029996395, "mean_pred_prob_last_50": 0.030645430088043213, "mean_token_accuracy": 0.8723669588565827, "step": 770 }, { "epoch": 0.01386592715055197, "grad_norm": 4.074858380528386, "learning_rate": 2.7728403839317458e-05, "loss": 1.9545, "mean_abs_error": 1854.716657047721, "mean_abs_error_last_10": 718.2438708424681, "mean_abs_error_last_25": 887.8277869751595, "mean_abs_error_last_50": 1181.5846656096946, "mean_pred_prob": 0.010450689861318096, "mean_pred_prob_last_10": 0.06599661236832617, "mean_pred_prob_last_25": 0.03270999022643082, "mean_pred_prob_last_50": 0.018695221874804702, "mean_token_accuracy": 0.8692009925842286, "step": 780 }, { "epoch": 0.01404369544735392, "grad_norm": 1.9551521064409032, "learning_rate": 2.8083896196231784e-05, "loss": 1.9312, "mean_abs_error": 2272.087757220136, "mean_abs_error_last_10": 1430.8886366177442, "mean_abs_error_last_25": 1543.140642501075, "mean_abs_error_last_50": 1759.9394909924504, "mean_pred_prob": 0.011762436097342288, "mean_pred_prob_last_10": 0.07715875267313095, "mean_pred_prob_last_25": 0.03779442710292642, "mean_pred_prob_last_50": 0.021357737327343786, "mean_token_accuracy": 0.8702041864395141, "step": 790 }, { "epoch": 0.014221463744155867, "grad_norm": 1.9510710002905134, "learning_rate": 2.8439388553146106e-05, "loss": 1.9824, "mean_abs_error": 622.4777215199921, "mean_abs_error_last_10": 107.1956627213275, "mean_abs_error_last_25": 180.47874181845938, "mean_abs_error_last_50": 318.6401586611836, "mean_pred_prob": 0.011213159305043519, "mean_pred_prob_last_10": 0.07090895688161254, "mean_pred_prob_last_25": 0.03508679680526257, "mean_pred_prob_last_50": 0.020124022965319455, "mean_token_accuracy": 0.8705736696720123, "step": 800 }, { "epoch": 0.014399232040957815, "grad_norm": 1.700448253055252, "learning_rate": 2.8794880910060435e-05, "loss": 1.9627, "mean_abs_error": 1836.580701023951, "mean_abs_error_last_10": 814.7049893523158, "mean_abs_error_last_25": 947.122639030564, "mean_abs_error_last_50": 1211.382895773916, "mean_pred_prob": 0.008424591603397858, "mean_pred_prob_last_10": 0.05458187669428298, "mean_pred_prob_last_25": 0.02657283188746078, "mean_pred_prob_last_50": 0.015185364367789589, "mean_token_accuracy": 0.8667363226413727, "step": 810 }, { "epoch": 0.014577000337759764, "grad_norm": 1.6442456118334605, "learning_rate": 2.9150373266974764e-05, "loss": 1.8917, "mean_abs_error": 267.7457482144604, "mean_abs_error_last_10": 60.45190179382458, "mean_abs_error_last_25": 78.96987262957323, "mean_abs_error_last_50": 127.02621401447064, "mean_pred_prob": 0.01792753883637488, "mean_pred_prob_last_10": 0.10834306254982948, "mean_pred_prob_last_25": 0.05566256809979677, "mean_pred_prob_last_50": 0.03205112805590034, "mean_token_accuracy": 0.8792222797870636, "step": 820 }, { "epoch": 0.014754768634561712, "grad_norm": 2.051823082306337, "learning_rate": 2.950586562388909e-05, "loss": 1.9334, "mean_abs_error": 784.5161824669879, "mean_abs_error_last_10": 137.31070943866084, "mean_abs_error_last_25": 231.53058509607482, "mean_abs_error_last_50": 389.92609898389895, "mean_pred_prob": 0.012713406141847372, "mean_pred_prob_last_10": 0.0781934070168063, "mean_pred_prob_last_25": 0.03964521816233173, "mean_pred_prob_last_50": 0.022801025217631832, "mean_token_accuracy": 0.8693111419677735, "step": 830 }, { "epoch": 0.014932536931363661, "grad_norm": 2.6877931474931915, "learning_rate": 2.986135798080341e-05, "loss": 1.9031, "mean_abs_error": 949.4680011503494, "mean_abs_error_last_10": 198.18966359120458, "mean_abs_error_last_25": 270.6694540863852, "mean_abs_error_last_50": 459.4527602623787, "mean_pred_prob": 0.01122164874104783, "mean_pred_prob_last_10": 0.07149240635335445, "mean_pred_prob_last_25": 0.035261291265487674, "mean_pred_prob_last_50": 0.020196249685250224, "mean_token_accuracy": 0.8699019849300385, "step": 840 }, { "epoch": 0.015110305228165609, "grad_norm": 1.1938814805651374, "learning_rate": 3.021685033771774e-05, "loss": 1.9013, "mean_abs_error": 789.4757079414624, "mean_abs_error_last_10": 365.21534509410554, "mean_abs_error_last_25": 444.62367433524685, "mean_abs_error_last_50": 551.8888508821412, "mean_pred_prob": 0.023170994043175595, "mean_pred_prob_last_10": 0.13356267590133938, "mean_pred_prob_last_25": 0.07046620280452771, "mean_pred_prob_last_50": 0.04107400585053256, "mean_token_accuracy": 0.8703703820705414, "step": 850 }, { "epoch": 0.015288073524967557, "grad_norm": 1.4299975871621136, "learning_rate": 3.057234269463207e-05, "loss": 1.8694, "mean_abs_error": 1159.5680906895132, "mean_abs_error_last_10": 276.1243185436452, "mean_abs_error_last_25": 397.08560945085105, "mean_abs_error_last_50": 629.491142836064, "mean_pred_prob": 0.007546872389502824, "mean_pred_prob_last_10": 0.05163385007181205, "mean_pred_prob_last_25": 0.02474620609136764, "mean_pred_prob_last_50": 0.013816943537676707, "mean_token_accuracy": 0.8691913664340973, "step": 860 }, { "epoch": 0.015465841821769506, "grad_norm": 1.799656850332991, "learning_rate": 3.0927835051546395e-05, "loss": 1.8699, "mean_abs_error": 1022.426035117908, "mean_abs_error_last_10": 307.2437340849307, "mean_abs_error_last_25": 400.2580893558849, "mean_abs_error_last_50": 609.4406194284385, "mean_pred_prob": 0.016856661830388475, "mean_pred_prob_last_10": 0.1063677763711894, "mean_pred_prob_last_25": 0.05346951623796485, "mean_pred_prob_last_50": 0.030417878369917162, "mean_token_accuracy": 0.8703417956829071, "step": 870 }, { "epoch": 0.015643610118571454, "grad_norm": 1.5746084635682793, "learning_rate": 3.128332740846072e-05, "loss": 1.829, "mean_abs_error": 1776.756867499369, "mean_abs_error_last_10": 597.209915342234, "mean_abs_error_last_25": 727.8434566260664, "mean_abs_error_last_50": 1065.668745655366, "mean_pred_prob": 0.009453660799044883, "mean_pred_prob_last_10": 0.060107519039593174, "mean_pred_prob_last_25": 0.02981273629120551, "mean_pred_prob_last_50": 0.017004712486232164, "mean_token_accuracy": 0.8819795846939087, "step": 880 }, { "epoch": 0.015821378415373403, "grad_norm": 1.7675525144332762, "learning_rate": 3.1638819765375046e-05, "loss": 1.8595, "mean_abs_error": 415.81823605746547, "mean_abs_error_last_10": 65.70054663142842, "mean_abs_error_last_25": 104.91153655656888, "mean_abs_error_last_50": 197.56005557850034, "mean_pred_prob": 0.017150645866058765, "mean_pred_prob_last_10": 0.10636458825320005, "mean_pred_prob_last_25": 0.05371033223345876, "mean_pred_prob_last_50": 0.030896126013249158, "mean_token_accuracy": 0.8836559474468231, "step": 890 }, { "epoch": 0.015999146712175352, "grad_norm": 1.391100333781094, "learning_rate": 3.199431212228937e-05, "loss": 1.8422, "mean_abs_error": 1118.15462828101, "mean_abs_error_last_10": 141.0421466998079, "mean_abs_error_last_25": 248.17373655149177, "mean_abs_error_last_50": 501.6808730392657, "mean_pred_prob": 0.011353061185218394, "mean_pred_prob_last_10": 0.07062441455200315, "mean_pred_prob_last_25": 0.03508197502233088, "mean_pred_prob_last_50": 0.020328305941075087, "mean_token_accuracy": 0.8780898571014404, "step": 900 }, { "epoch": 0.016176915008977298, "grad_norm": 1.7157035092226958, "learning_rate": 3.23498044792037e-05, "loss": 1.9546, "mean_abs_error": 1460.544669465964, "mean_abs_error_last_10": 293.99155270199947, "mean_abs_error_last_25": 464.7012302997533, "mean_abs_error_last_50": 779.037938471085, "mean_pred_prob": 0.01763931735767983, "mean_pred_prob_last_10": 0.09858238942106254, "mean_pred_prob_last_25": 0.05222884498070925, "mean_pred_prob_last_50": 0.030947875438141637, "mean_token_accuracy": 0.8645983755588531, "step": 910 }, { "epoch": 0.016354683305779247, "grad_norm": 2.388677103789419, "learning_rate": 3.270529683611803e-05, "loss": 1.8239, "mean_abs_error": 352.38657304560036, "mean_abs_error_last_10": 74.42540016648738, "mean_abs_error_last_25": 100.23488977236381, "mean_abs_error_last_50": 172.02875425886958, "mean_pred_prob": 0.01650934328790754, "mean_pred_prob_last_10": 0.10241706855595112, "mean_pred_prob_last_25": 0.05193256651982665, "mean_pred_prob_last_50": 0.029635635390877723, "mean_token_accuracy": 0.8859499454498291, "step": 920 }, { "epoch": 0.016532451602581197, "grad_norm": 1.1417172073168043, "learning_rate": 3.306078919303235e-05, "loss": 1.7879, "mean_abs_error": 387.51454970522445, "mean_abs_error_last_10": 60.22805726051371, "mean_abs_error_last_25": 96.88855614784312, "mean_abs_error_last_50": 174.46566749084045, "mean_pred_prob": 0.019074664288200437, "mean_pred_prob_last_10": 0.11565193980932235, "mean_pred_prob_last_25": 0.0590334452688694, "mean_pred_prob_last_50": 0.03402203619480133, "mean_token_accuracy": 0.8866820991039276, "step": 930 }, { "epoch": 0.016710219899383143, "grad_norm": 0.8522703992400047, "learning_rate": 3.341628154994667e-05, "loss": 1.8178, "mean_abs_error": 921.8177681969488, "mean_abs_error_last_10": 130.57473449171954, "mean_abs_error_last_25": 220.4553408663403, "mean_abs_error_last_50": 428.2234006352572, "mean_pred_prob": 0.012845800269860774, "mean_pred_prob_last_10": 0.07869840385392309, "mean_pred_prob_last_25": 0.03987124338746071, "mean_pred_prob_last_50": 0.02293724149931222, "mean_token_accuracy": 0.8637508928775788, "step": 940 }, { "epoch": 0.016887988196185092, "grad_norm": 1.1143493852150883, "learning_rate": 3.3771773906861006e-05, "loss": 1.7975, "mean_abs_error": 791.5767735971212, "mean_abs_error_last_10": 147.2470301223841, "mean_abs_error_last_25": 207.96718131936683, "mean_abs_error_last_50": 373.75919223696667, "mean_pred_prob": 0.012750968779437244, "mean_pred_prob_last_10": 0.0727568524889648, "mean_pred_prob_last_25": 0.03831611895002425, "mean_pred_prob_last_50": 0.02250666478648782, "mean_token_accuracy": 0.8707780003547668, "step": 950 }, { "epoch": 0.01706575649298704, "grad_norm": 3.135254246943672, "learning_rate": 3.412726626377533e-05, "loss": 1.8528, "mean_abs_error": 1506.6141663845115, "mean_abs_error_last_10": 721.0126673992652, "mean_abs_error_last_25": 818.0770678717795, "mean_abs_error_last_50": 1041.7512767871913, "mean_pred_prob": 0.013559057725069578, "mean_pred_prob_last_10": 0.08272474928817246, "mean_pred_prob_last_25": 0.04206283366074785, "mean_pred_prob_last_50": 0.024278597063675987, "mean_token_accuracy": 0.8592511057853699, "step": 960 }, { "epoch": 0.017243524789788987, "grad_norm": 1.7731955049429104, "learning_rate": 3.4482758620689657e-05, "loss": 1.8361, "mean_abs_error": 1228.7601229380116, "mean_abs_error_last_10": 551.6612940146672, "mean_abs_error_last_25": 647.9102300693824, "mean_abs_error_last_50": 821.5149495085427, "mean_pred_prob": 0.0170124259282602, "mean_pred_prob_last_10": 0.10249593549233396, "mean_pred_prob_last_25": 0.05248799313267227, "mean_pred_prob_last_50": 0.03035119296837365, "mean_token_accuracy": 0.8721415996551514, "step": 970 }, { "epoch": 0.017421293086590937, "grad_norm": 3.426346439635406, "learning_rate": 3.483825097760398e-05, "loss": 1.83, "mean_abs_error": 1271.4081452638623, "mean_abs_error_last_10": 195.99769116757614, "mean_abs_error_last_25": 338.4374150943541, "mean_abs_error_last_50": 634.2268019782928, "mean_pred_prob": 0.013934458687435836, "mean_pred_prob_last_10": 0.08236947029363365, "mean_pred_prob_last_25": 0.04290784916956909, "mean_pred_prob_last_50": 0.02475573924020864, "mean_token_accuracy": 0.8713948369026184, "step": 980 }, { "epoch": 0.017599061383392886, "grad_norm": 2.9337993946023833, "learning_rate": 3.519374333451831e-05, "loss": 1.7965, "mean_abs_error": 1171.1453638726634, "mean_abs_error_last_10": 450.7282211050131, "mean_abs_error_last_25": 570.8657625673176, "mean_abs_error_last_50": 771.3085523805363, "mean_pred_prob": 0.014543696140754037, "mean_pred_prob_last_10": 0.08565511100168806, "mean_pred_prob_last_25": 0.04438669189694337, "mean_pred_prob_last_50": 0.02577585247636307, "mean_token_accuracy": 0.8743273198604584, "step": 990 }, { "epoch": 0.017776829680194835, "grad_norm": 1.6538505481638057, "learning_rate": 3.554923569143263e-05, "loss": 1.8365, "mean_abs_error": 798.4776272806124, "mean_abs_error_last_10": 131.7607174499592, "mean_abs_error_last_25": 201.1464797326379, "mean_abs_error_last_50": 369.43627904278003, "mean_pred_prob": 0.016061189142055808, "mean_pred_prob_last_10": 0.09514907784759999, "mean_pred_prob_last_25": 0.04919759496115148, "mean_pred_prob_last_50": 0.028585598827339707, "mean_token_accuracy": 0.8683942079544067, "step": 1000 }, { "epoch": 0.01795459797699678, "grad_norm": 3.4903256225774197, "learning_rate": 3.5904728048346965e-05, "loss": 1.8546, "mean_abs_error": 2566.423276671316, "mean_abs_error_last_10": 932.8186014880979, "mean_abs_error_last_25": 1148.2339938549362, "mean_abs_error_last_50": 1598.9968526535315, "mean_pred_prob": 0.005815385037567467, "mean_pred_prob_last_10": 0.037338141571672166, "mean_pred_prob_last_25": 0.01840359580965014, "mean_pred_prob_last_50": 0.010459195279690903, "mean_token_accuracy": 0.8596399068832398, "step": 1010 }, { "epoch": 0.01813236627379873, "grad_norm": 2.3589028834884305, "learning_rate": 3.6260220405261284e-05, "loss": 1.7614, "mean_abs_error": 502.9479967780443, "mean_abs_error_last_10": 61.173814673478546, "mean_abs_error_last_25": 125.72904578654496, "mean_abs_error_last_50": 225.1339834596143, "mean_pred_prob": 0.018519453471526504, "mean_pred_prob_last_10": 0.11165339164435864, "mean_pred_prob_last_25": 0.056954919919371606, "mean_pred_prob_last_50": 0.03296203657519072, "mean_token_accuracy": 0.8806879580020904, "step": 1020 }, { "epoch": 0.01831013457060068, "grad_norm": 1.9949582021506997, "learning_rate": 3.6615712762175616e-05, "loss": 1.7811, "mean_abs_error": 732.6041846268528, "mean_abs_error_last_10": 173.94183858004297, "mean_abs_error_last_25": 260.78377242350246, "mean_abs_error_last_50": 380.7570100124108, "mean_pred_prob": 0.012647727061994374, "mean_pred_prob_last_10": 0.07816586047410964, "mean_pred_prob_last_25": 0.0394464660435915, "mean_pred_prob_last_50": 0.022708412539213896, "mean_token_accuracy": 0.8742669999599457, "step": 1030 }, { "epoch": 0.018487902867402626, "grad_norm": 1.194455547654076, "learning_rate": 3.697120511908994e-05, "loss": 1.7612, "mean_abs_error": 1094.5171499425262, "mean_abs_error_last_10": 394.48825042849637, "mean_abs_error_last_25": 478.6277142505095, "mean_abs_error_last_50": 659.9807543319803, "mean_pred_prob": 0.013662523667153436, "mean_pred_prob_last_10": 0.08292334219731856, "mean_pred_prob_last_25": 0.04218540360452607, "mean_pred_prob_last_50": 0.0242992339815828, "mean_token_accuracy": 0.8757982075214386, "step": 1040 }, { "epoch": 0.018665671164204575, "grad_norm": 3.3974462445653493, "learning_rate": 3.732669747600427e-05, "loss": 1.792, "mean_abs_error": 137.84989115888425, "mean_abs_error_last_10": 18.17103876625423, "mean_abs_error_last_25": 33.682368640631, "mean_abs_error_last_50": 64.47027885349664, "mean_pred_prob": 0.031462850980460644, "mean_pred_prob_last_10": 0.16852411665022374, "mean_pred_prob_last_25": 0.09221723675727844, "mean_pred_prob_last_50": 0.05516141504049301, "mean_token_accuracy": 0.8749751925468445, "step": 1050 }, { "epoch": 0.018843439461006525, "grad_norm": 1.5672670260565884, "learning_rate": 3.768218983291859e-05, "loss": 1.7774, "mean_abs_error": 782.9939625903579, "mean_abs_error_last_10": 325.2178495975149, "mean_abs_error_last_25": 386.5282546004247, "mean_abs_error_last_50": 511.14071804076957, "mean_pred_prob": 0.02081765220791567, "mean_pred_prob_last_10": 0.12502183100732508, "mean_pred_prob_last_25": 0.06396414456248749, "mean_pred_prob_last_50": 0.03716356152726803, "mean_token_accuracy": 0.8692864537239074, "step": 1060 }, { "epoch": 0.019021207757808474, "grad_norm": 1.316057692013351, "learning_rate": 3.803768218983292e-05, "loss": 1.795, "mean_abs_error": 258.3239461593527, "mean_abs_error_last_10": 34.80486090991383, "mean_abs_error_last_25": 60.096535141701665, "mean_abs_error_last_50": 116.57143061317473, "mean_pred_prob": 0.022373476042412223, "mean_pred_prob_last_10": 0.13741958495229484, "mean_pred_prob_last_25": 0.06954212905839086, "mean_pred_prob_last_50": 0.04008861151523888, "mean_token_accuracy": 0.8732493400573731, "step": 1070 }, { "epoch": 0.01919897605461042, "grad_norm": 3.2780466503326964, "learning_rate": 3.8393174546747244e-05, "loss": 1.7719, "mean_abs_error": 414.14601380970254, "mean_abs_error_last_10": 60.33650926516975, "mean_abs_error_last_25": 112.43565223447165, "mean_abs_error_last_50": 196.4600143959736, "mean_pred_prob": 0.018827091553248465, "mean_pred_prob_last_10": 0.11465290244668722, "mean_pred_prob_last_25": 0.05815247260034084, "mean_pred_prob_last_50": 0.03356515956111252, "mean_token_accuracy": 0.8731638073921204, "step": 1080 }, { "epoch": 0.01937674435141237, "grad_norm": 1.894293496882138, "learning_rate": 3.8748666903661576e-05, "loss": 1.7808, "mean_abs_error": 922.0198035002373, "mean_abs_error_last_10": 215.60860164331388, "mean_abs_error_last_25": 293.01197437386463, "mean_abs_error_last_50": 490.09200002829505, "mean_pred_prob": 0.01904304513009265, "mean_pred_prob_last_10": 0.10939940047683194, "mean_pred_prob_last_25": 0.05780069094034843, "mean_pred_prob_last_50": 0.03343806531047448, "mean_token_accuracy": 0.8668041110038758, "step": 1090 }, { "epoch": 0.01955451264821432, "grad_norm": 1.9227690322452087, "learning_rate": 3.91041592605759e-05, "loss": 1.7665, "mean_abs_error": 1018.596391708535, "mean_abs_error_last_10": 387.6023444631213, "mean_abs_error_last_25": 431.14750015352195, "mean_abs_error_last_50": 595.6069574070218, "mean_pred_prob": 0.010625437472481281, "mean_pred_prob_last_10": 0.06746031008078716, "mean_pred_prob_last_25": 0.03366048101452179, "mean_pred_prob_last_50": 0.01923361492663389, "mean_token_accuracy": 0.8734578609466552, "step": 1100 }, { "epoch": 0.019732280945016265, "grad_norm": 1.7432105387296897, "learning_rate": 3.945965161749022e-05, "loss": 1.7903, "mean_abs_error": 2409.0837497875186, "mean_abs_error_last_10": 1152.5899444595877, "mean_abs_error_last_25": 1320.4812973212845, "mean_abs_error_last_50": 1657.8826437142402, "mean_pred_prob": 0.013468303617992205, "mean_pred_prob_last_10": 0.08123758834553882, "mean_pred_prob_last_25": 0.04171226581456722, "mean_pred_prob_last_50": 0.02397479374994873, "mean_token_accuracy": 0.8693491160869599, "step": 1110 }, { "epoch": 0.019910049241818214, "grad_norm": 4.227093413002487, "learning_rate": 3.981514397440455e-05, "loss": 1.8063, "mean_abs_error": 1136.8388771191267, "mean_abs_error_last_10": 347.2816502893123, "mean_abs_error_last_25": 477.351567442724, "mean_abs_error_last_50": 683.0643801224533, "mean_pred_prob": 0.012926711986074224, "mean_pred_prob_last_10": 0.08136999206908513, "mean_pred_prob_last_25": 0.040661781973904, "mean_pred_prob_last_50": 0.02331761016976088, "mean_token_accuracy": 0.875640046596527, "step": 1120 }, { "epoch": 0.020087817538620163, "grad_norm": 1.4731043939381796, "learning_rate": 4.017063633131888e-05, "loss": 1.7248, "mean_abs_error": 460.4541626624033, "mean_abs_error_last_10": 67.20491391719877, "mean_abs_error_last_25": 117.003875079422, "mean_abs_error_last_50": 219.04477147574863, "mean_pred_prob": 0.02215564085636288, "mean_pred_prob_last_10": 0.13042277842760086, "mean_pred_prob_last_25": 0.0680045303888619, "mean_pred_prob_last_50": 0.03944537336938083, "mean_token_accuracy": 0.8774117350578308, "step": 1130 }, { "epoch": 0.02026558583542211, "grad_norm": 1.386831263664436, "learning_rate": 4.0526128688233204e-05, "loss": 1.7556, "mean_abs_error": 774.2854743461437, "mean_abs_error_last_10": 172.41060089558252, "mean_abs_error_last_25": 242.513332875968, "mean_abs_error_last_50": 403.6237404810653, "mean_pred_prob": 0.014764376793755219, "mean_pred_prob_last_10": 0.09055730662075803, "mean_pred_prob_last_25": 0.04579618401476182, "mean_pred_prob_last_50": 0.026289607089711352, "mean_token_accuracy": 0.867761778831482, "step": 1140 }, { "epoch": 0.02044335413222406, "grad_norm": 1.5857809504429128, "learning_rate": 4.0881621045147536e-05, "loss": 1.7617, "mean_abs_error": 847.0560958864122, "mean_abs_error_last_10": 316.85273182694294, "mean_abs_error_last_25": 401.6959103621528, "mean_abs_error_last_50": 549.9790089466477, "mean_pred_prob": 0.01818620362173533, "mean_pred_prob_last_10": 0.11066286668647081, "mean_pred_prob_last_25": 0.05631548668025062, "mean_pred_prob_last_50": 0.03243793873407412, "mean_token_accuracy": 0.8791987597942352, "step": 1150 }, { "epoch": 0.020621122429026008, "grad_norm": 3.3714172788896866, "learning_rate": 4.1237113402061855e-05, "loss": 1.7697, "mean_abs_error": 908.5432884727932, "mean_abs_error_last_10": 142.17841978086963, "mean_abs_error_last_25": 238.04838392951532, "mean_abs_error_last_50": 427.3314001201414, "mean_pred_prob": 0.013138301880098879, "mean_pred_prob_last_10": 0.08276321720331907, "mean_pred_prob_last_25": 0.04115322092548013, "mean_pred_prob_last_50": 0.023574506887234746, "mean_token_accuracy": 0.8581507682800293, "step": 1160 }, { "epoch": 0.020798890725827957, "grad_norm": 3.830866756569064, "learning_rate": 4.159260575897618e-05, "loss": 1.7064, "mean_abs_error": 380.8883624117896, "mean_abs_error_last_10": 101.03132906996832, "mean_abs_error_last_25": 129.36175774101045, "mean_abs_error_last_50": 197.9088023525489, "mean_pred_prob": 0.01699998863041401, "mean_pred_prob_last_10": 0.10249288082122802, "mean_pred_prob_last_25": 0.05221557319164276, "mean_pred_prob_last_50": 0.030273619526997208, "mean_token_accuracy": 0.8729150593280792, "step": 1170 }, { "epoch": 0.020976659022629903, "grad_norm": 1.1953146918878386, "learning_rate": 4.194809811589051e-05, "loss": 1.7519, "mean_abs_error": 1529.4015244388397, "mean_abs_error_last_10": 332.3594842243122, "mean_abs_error_last_25": 466.609762657324, "mean_abs_error_last_50": 781.1064435840958, "mean_pred_prob": 0.010750412577181123, "mean_pred_prob_last_10": 0.059182931733084844, "mean_pred_prob_last_25": 0.03192682595399674, "mean_pred_prob_last_50": 0.018891690368764102, "mean_token_accuracy": 0.8723862111568451, "step": 1180 }, { "epoch": 0.021154427319431852, "grad_norm": 1.0163555305612189, "learning_rate": 4.230359047280484e-05, "loss": 1.7306, "mean_abs_error": 1199.391709776474, "mean_abs_error_last_10": 502.3183180236627, "mean_abs_error_last_25": 603.0482586754244, "mean_abs_error_last_50": 778.6019057221989, "mean_pred_prob": 0.01153360655735014, "mean_pred_prob_last_10": 0.07331931467633694, "mean_pred_prob_last_25": 0.03651584517356241, "mean_pred_prob_last_50": 0.02081673896173015, "mean_token_accuracy": 0.8669721484184265, "step": 1190 }, { "epoch": 0.021332195616233802, "grad_norm": 2.693221755587699, "learning_rate": 4.2659082829719164e-05, "loss": 1.781, "mean_abs_error": 711.431400253002, "mean_abs_error_last_10": 230.19075451347936, "mean_abs_error_last_25": 288.1062230848501, "mean_abs_error_last_50": 408.31921548927306, "mean_pred_prob": 0.018685152058606037, "mean_pred_prob_last_10": 0.11261431546299719, "mean_pred_prob_last_25": 0.057782104506623, "mean_pred_prob_last_50": 0.03337684312136844, "mean_token_accuracy": 0.8768482089042664, "step": 1200 }, { "epoch": 0.021509963913035748, "grad_norm": 2.976072810056008, "learning_rate": 4.301457518663349e-05, "loss": 1.7095, "mean_abs_error": 218.18241550864167, "mean_abs_error_last_10": 62.924964940411314, "mean_abs_error_last_25": 87.70460358782603, "mean_abs_error_last_50": 121.8925918987984, "mean_pred_prob": 0.024726191814988852, "mean_pred_prob_last_10": 0.14557988233864308, "mean_pred_prob_last_25": 0.07512320764362812, "mean_pred_prob_last_50": 0.04383192695677281, "mean_token_accuracy": 0.8800241708755493, "step": 1210 }, { "epoch": 0.021687732209837697, "grad_norm": 1.3288906316568765, "learning_rate": 4.3370067543547815e-05, "loss": 1.7444, "mean_abs_error": 1360.9306215686072, "mean_abs_error_last_10": 667.1267312227546, "mean_abs_error_last_25": 755.6802186704975, "mean_abs_error_last_50": 958.2153572752884, "mean_pred_prob": 0.020741933016688564, "mean_pred_prob_last_10": 0.12415464680670993, "mean_pred_prob_last_25": 0.06434715679060901, "mean_pred_prob_last_50": 0.03719582144258311, "mean_token_accuracy": 0.8623406648635864, "step": 1220 }, { "epoch": 0.021865500506639646, "grad_norm": 1.5807459066367042, "learning_rate": 4.372555990046214e-05, "loss": 1.7276, "mean_abs_error": 1728.8811328566576, "mean_abs_error_last_10": 821.614121213501, "mean_abs_error_last_25": 930.6263407237551, "mean_abs_error_last_50": 1177.6024280654908, "mean_pred_prob": 0.014740088855614886, "mean_pred_prob_last_10": 0.09063291987549746, "mean_pred_prob_last_25": 0.046171089461131486, "mean_pred_prob_last_50": 0.026449639211205068, "mean_token_accuracy": 0.8739423871040344, "step": 1230 }, { "epoch": 0.022043268803441596, "grad_norm": 2.771810702995387, "learning_rate": 4.408105225737647e-05, "loss": 1.7955, "mean_abs_error": 1363.4174951104214, "mean_abs_error_last_10": 622.4176903355525, "mean_abs_error_last_25": 765.7422155409104, "mean_abs_error_last_50": 973.7757017709271, "mean_pred_prob": 0.014849663050699746, "mean_pred_prob_last_10": 0.09117852678318741, "mean_pred_prob_last_25": 0.046261077192320955, "mean_pred_prob_last_50": 0.02665601033368148, "mean_token_accuracy": 0.873943167924881, "step": 1240 }, { "epoch": 0.02222103710024354, "grad_norm": 4.490949028155335, "learning_rate": 4.443654461429079e-05, "loss": 1.7354, "mean_abs_error": 783.8740403444231, "mean_abs_error_last_10": 128.54070772910572, "mean_abs_error_last_25": 195.4259088876924, "mean_abs_error_last_50": 361.5809945634127, "mean_pred_prob": 0.012472601979970932, "mean_pred_prob_last_10": 0.07169414442032576, "mean_pred_prob_last_25": 0.036845567356795075, "mean_pred_prob_last_50": 0.021652171621099114, "mean_token_accuracy": 0.8679419994354248, "step": 1250 }, { "epoch": 0.02239880539704549, "grad_norm": 1.0404106636879245, "learning_rate": 4.479203697120512e-05, "loss": 1.6271, "mean_abs_error": 325.12107014933315, "mean_abs_error_last_10": 65.83567493031721, "mean_abs_error_last_25": 92.0343436516817, "mean_abs_error_last_50": 148.40412093449055, "mean_pred_prob": 0.02224088490474969, "mean_pred_prob_last_10": 0.12592877447605133, "mean_pred_prob_last_25": 0.06604274129495025, "mean_pred_prob_last_50": 0.0391274911351502, "mean_token_accuracy": 0.8882584512233734, "step": 1260 }, { "epoch": 0.02257657369384744, "grad_norm": 2.900658205264011, "learning_rate": 4.514752932811945e-05, "loss": 1.7393, "mean_abs_error": 1831.964603658555, "mean_abs_error_last_10": 1064.1676793046572, "mean_abs_error_last_25": 1184.3919426549212, "mean_abs_error_last_50": 1365.7914328466848, "mean_pred_prob": 0.012685066641461162, "mean_pred_prob_last_10": 0.07854136666792329, "mean_pred_prob_last_25": 0.03941326633394056, "mean_pred_prob_last_50": 0.022690193518792513, "mean_token_accuracy": 0.8676756978034973, "step": 1270 }, { "epoch": 0.022754341990649386, "grad_norm": 1.4330403472965465, "learning_rate": 4.5503021685033775e-05, "loss": 1.6551, "mean_abs_error": 2200.8596597020614, "mean_abs_error_last_10": 722.4406635244954, "mean_abs_error_last_25": 918.0817410507403, "mean_abs_error_last_50": 1333.6101634821332, "mean_pred_prob": 0.011858207639306784, "mean_pred_prob_last_10": 0.07274235815857537, "mean_pred_prob_last_25": 0.036767235008301216, "mean_pred_prob_last_50": 0.021255412729806265, "mean_token_accuracy": 0.8821565330028533, "step": 1280 }, { "epoch": 0.022932110287451336, "grad_norm": 2.3311206873111154, "learning_rate": 4.58585140419481e-05, "loss": 1.6827, "mean_abs_error": 663.7251226164062, "mean_abs_error_last_10": 165.79253567076367, "mean_abs_error_last_25": 236.33142751882502, "mean_abs_error_last_50": 348.7018393294063, "mean_pred_prob": 0.012205583485774696, "mean_pred_prob_last_10": 0.07647895719856024, "mean_pred_prob_last_25": 0.038605465460568665, "mean_pred_prob_last_50": 0.02202528906054795, "mean_token_accuracy": 0.8861655414104461, "step": 1290 }, { "epoch": 0.023109878584253285, "grad_norm": 1.2760204873962806, "learning_rate": 4.6214006398862426e-05, "loss": 1.6861, "mean_abs_error": 1461.2284742228717, "mean_abs_error_last_10": 374.4712187433595, "mean_abs_error_last_25": 540.3524965349636, "mean_abs_error_last_50": 839.4619324855227, "mean_pred_prob": 0.013827448239317164, "mean_pred_prob_last_10": 0.07326876841543709, "mean_pred_prob_last_25": 0.040083008681540376, "mean_pred_prob_last_50": 0.023910775876720435, "mean_token_accuracy": 0.8757512331008911, "step": 1300 }, { "epoch": 0.02328764688105523, "grad_norm": 2.547146920924933, "learning_rate": 4.656949875577675e-05, "loss": 1.7163, "mean_abs_error": 625.3169388915469, "mean_abs_error_last_10": 147.23743313612533, "mean_abs_error_last_25": 185.2725402372138, "mean_abs_error_last_50": 302.32535087410633, "mean_pred_prob": 0.016194390691816808, "mean_pred_prob_last_10": 0.0991840717382729, "mean_pred_prob_last_25": 0.050059123709797856, "mean_pred_prob_last_50": 0.028747984766960145, "mean_token_accuracy": 0.8589400291442871, "step": 1310 }, { "epoch": 0.02346541517785718, "grad_norm": 1.2406318600955253, "learning_rate": 4.692499111269108e-05, "loss": 1.6622, "mean_abs_error": 358.1720427169492, "mean_abs_error_last_10": 120.70040176655382, "mean_abs_error_last_25": 164.97122560396087, "mean_abs_error_last_50": 213.70314938625143, "mean_pred_prob": 0.016951887216418982, "mean_pred_prob_last_10": 0.10578823070973158, "mean_pred_prob_last_25": 0.05338048357516527, "mean_pred_prob_last_50": 0.03046666909940541, "mean_token_accuracy": 0.878348195552826, "step": 1320 }, { "epoch": 0.02364318347465913, "grad_norm": 1.5175333869389254, "learning_rate": 4.728048346960541e-05, "loss": 1.6772, "mean_abs_error": 1850.412006611384, "mean_abs_error_last_10": 769.1074967576662, "mean_abs_error_last_25": 927.1245563653007, "mean_abs_error_last_50": 1216.8103764795637, "mean_pred_prob": 0.012833541560394224, "mean_pred_prob_last_10": 0.07120263357792282, "mean_pred_prob_last_25": 0.037890538331703284, "mean_pred_prob_last_50": 0.022491914365673438, "mean_token_accuracy": 0.858892560005188, "step": 1330 }, { "epoch": 0.02382095177146108, "grad_norm": 2.9352207365899172, "learning_rate": 4.763597582651973e-05, "loss": 1.6927, "mean_abs_error": 664.4368062919342, "mean_abs_error_last_10": 206.09287054490648, "mean_abs_error_last_25": 282.83673087780636, "mean_abs_error_last_50": 394.2393265628856, "mean_pred_prob": 0.01843541553535033, "mean_pred_prob_last_10": 0.11224828261183575, "mean_pred_prob_last_25": 0.05721057032933459, "mean_pred_prob_last_50": 0.0329409847734496, "mean_token_accuracy": 0.8683108985424042, "step": 1340 }, { "epoch": 0.023998720068263025, "grad_norm": 1.4996515801124846, "learning_rate": 4.799146818343406e-05, "loss": 1.6677, "mean_abs_error": 496.826729806619, "mean_abs_error_last_10": 68.62833825284147, "mean_abs_error_last_25": 115.43709265372165, "mean_abs_error_last_50": 222.05914156382715, "mean_pred_prob": 0.019055890548042954, "mean_pred_prob_last_10": 0.11140225529670715, "mean_pred_prob_last_25": 0.058383313659578565, "mean_pred_prob_last_50": 0.03413414331153035, "mean_token_accuracy": 0.8732155799865723, "step": 1350 }, { "epoch": 0.024176488365064974, "grad_norm": 1.5568999120893752, "learning_rate": 4.8346960540348386e-05, "loss": 1.6537, "mean_abs_error": 631.7300024963215, "mean_abs_error_last_10": 169.7443260778254, "mean_abs_error_last_25": 244.77253357273193, "mean_abs_error_last_50": 350.0546419982153, "mean_pred_prob": 0.020552265300648288, "mean_pred_prob_last_10": 0.1177038231282495, "mean_pred_prob_last_25": 0.06126359885092825, "mean_pred_prob_last_50": 0.03617326442035847, "mean_token_accuracy": 0.8771378517150878, "step": 1360 }, { "epoch": 0.024354256661866924, "grad_norm": 1.05582464307877, "learning_rate": 4.870245289726271e-05, "loss": 1.6585, "mean_abs_error": 430.88415438783903, "mean_abs_error_last_10": 88.55844752220936, "mean_abs_error_last_25": 108.47996128154526, "mean_abs_error_last_50": 196.10923457789985, "mean_pred_prob": 0.017525318521074952, "mean_pred_prob_last_10": 0.10739634223282338, "mean_pred_prob_last_25": 0.05519731235690415, "mean_pred_prob_last_50": 0.0314839132130146, "mean_token_accuracy": 0.8768384754657745, "step": 1370 }, { "epoch": 0.02453202495866887, "grad_norm": 2.1419222326533864, "learning_rate": 4.9057945254177043e-05, "loss": 1.6395, "mean_abs_error": 261.9195164322103, "mean_abs_error_last_10": 38.39407353606276, "mean_abs_error_last_25": 60.059510922172834, "mean_abs_error_last_50": 122.59620286807282, "mean_pred_prob": 0.02325552455149591, "mean_pred_prob_last_10": 0.1399512518197298, "mean_pred_prob_last_25": 0.07197072133421897, "mean_pred_prob_last_50": 0.04165874677710235, "mean_token_accuracy": 0.8821843028068542, "step": 1380 }, { "epoch": 0.02470979325547082, "grad_norm": 1.0390614903048585, "learning_rate": 4.941343761109136e-05, "loss": 1.7134, "mean_abs_error": 900.1907216500956, "mean_abs_error_last_10": 258.7763939080462, "mean_abs_error_last_25": 330.7926082496753, "mean_abs_error_last_50": 500.211982821524, "mean_pred_prob": 0.01151905078731943, "mean_pred_prob_last_10": 0.0722217482572887, "mean_pred_prob_last_25": 0.03597391823423095, "mean_pred_prob_last_50": 0.02065194031165447, "mean_token_accuracy": 0.8686380863189698, "step": 1390 }, { "epoch": 0.024887561552272768, "grad_norm": 1.796245711952674, "learning_rate": 4.976892996800569e-05, "loss": 1.6631, "mean_abs_error": 1369.8734028863034, "mean_abs_error_last_10": 455.524745112669, "mean_abs_error_last_25": 574.1310755240953, "mean_abs_error_last_50": 796.9426946593624, "mean_pred_prob": 0.012212960187753197, "mean_pred_prob_last_10": 0.07441967059858143, "mean_pred_prob_last_25": 0.03753824330924545, "mean_pred_prob_last_50": 0.021709928382188083, "mean_token_accuracy": 0.8656980872154236, "step": 1400 }, { "epoch": 0.025065329849074718, "grad_norm": 1.172725280195696, "learning_rate": 5.012442232492002e-05, "loss": 1.6522, "mean_abs_error": 1153.7599496711782, "mean_abs_error_last_10": 521.6678070277576, "mean_abs_error_last_25": 660.1023784265101, "mean_abs_error_last_50": 796.2370906267852, "mean_pred_prob": 0.02049506301555084, "mean_pred_prob_last_10": 0.11490977039793507, "mean_pred_prob_last_25": 0.06126039719965774, "mean_pred_prob_last_50": 0.036071852594614026, "mean_token_accuracy": 0.87753666639328, "step": 1410 }, { "epoch": 0.025243098145876663, "grad_norm": 2.293296312331662, "learning_rate": 5.047991468183434e-05, "loss": 1.6756, "mean_abs_error": 611.1587597506651, "mean_abs_error_last_10": 98.61027174336274, "mean_abs_error_last_25": 150.77402744642905, "mean_abs_error_last_50": 268.5102776706443, "mean_pred_prob": 0.016853158781304955, "mean_pred_prob_last_10": 0.10104551054537296, "mean_pred_prob_last_25": 0.05206631561741233, "mean_pred_prob_last_50": 0.030127573176287115, "mean_token_accuracy": 0.8771479606628418, "step": 1420 }, { "epoch": 0.025420866442678613, "grad_norm": 1.8348299029504, "learning_rate": 5.083540703874867e-05, "loss": 1.6365, "mean_abs_error": 130.12329722205095, "mean_abs_error_last_10": 19.656168447050273, "mean_abs_error_last_25": 37.04276633597995, "mean_abs_error_last_50": 63.599918069978706, "mean_pred_prob": 0.028734912164509296, "mean_pred_prob_last_10": 0.16688748449087143, "mean_pred_prob_last_25": 0.08729009050875902, "mean_pred_prob_last_50": 0.05090851942077279, "mean_token_accuracy": 0.8723637104034424, "step": 1430 }, { "epoch": 0.025598634739480562, "grad_norm": 1.0417851074693871, "learning_rate": 5.1190899395662997e-05, "loss": 1.6897, "mean_abs_error": 1614.2994374072628, "mean_abs_error_last_10": 525.1225734040531, "mean_abs_error_last_25": 670.6636998653315, "mean_abs_error_last_50": 959.7132630187094, "mean_pred_prob": 0.017556450168194716, "mean_pred_prob_last_10": 0.10190910654200706, "mean_pred_prob_last_25": 0.052836881959228775, "mean_pred_prob_last_50": 0.0309505109777092, "mean_token_accuracy": 0.8624301016330719, "step": 1440 }, { "epoch": 0.025776403036282508, "grad_norm": 1.95447585020774, "learning_rate": 5.1546391752577315e-05, "loss": 1.6157, "mean_abs_error": 343.9384514034344, "mean_abs_error_last_10": 61.82447248524424, "mean_abs_error_last_25": 109.47009247449725, "mean_abs_error_last_50": 178.43938922473805, "mean_pred_prob": 0.017620447487570346, "mean_pred_prob_last_10": 0.10689117405563593, "mean_pred_prob_last_25": 0.05448639411479235, "mean_pred_prob_last_50": 0.03137261224910617, "mean_token_accuracy": 0.873774915933609, "step": 1450 }, { "epoch": 0.025954171333084457, "grad_norm": 2.1145655948529805, "learning_rate": 5.190188410949165e-05, "loss": 1.5839, "mean_abs_error": 1838.8644751479958, "mean_abs_error_last_10": 759.0289276580133, "mean_abs_error_last_25": 892.7038044534181, "mean_abs_error_last_50": 1172.2741172164012, "mean_pred_prob": 0.007617874271818437, "mean_pred_prob_last_10": 0.047226824279641734, "mean_pred_prob_last_25": 0.023790172685403378, "mean_pred_prob_last_50": 0.013697520579444244, "mean_token_accuracy": 0.8732926726341248, "step": 1460 }, { "epoch": 0.026131939629886407, "grad_norm": 2.2223325605492095, "learning_rate": 5.225737646640597e-05, "loss": 1.6229, "mean_abs_error": 362.40008341396174, "mean_abs_error_last_10": 53.90778889322884, "mean_abs_error_last_25": 81.37194282899414, "mean_abs_error_last_50": 156.84928662013235, "mean_pred_prob": 0.0210807497613132, "mean_pred_prob_last_10": 0.12808519769459964, "mean_pred_prob_last_25": 0.06534194648265838, "mean_pred_prob_last_50": 0.03759522484615445, "mean_token_accuracy": 0.8683632493019104, "step": 1470 }, { "epoch": 0.026309707926688353, "grad_norm": 1.6883401483746625, "learning_rate": 5.2612868823320305e-05, "loss": 1.642, "mean_abs_error": 764.9783850540414, "mean_abs_error_last_10": 84.17480966742826, "mean_abs_error_last_25": 154.90654248290517, "mean_abs_error_last_50": 344.19134542630303, "mean_pred_prob": 0.015626492677256465, "mean_pred_prob_last_10": 0.08917769566178321, "mean_pred_prob_last_25": 0.04714779537171125, "mean_pred_prob_last_50": 0.027609174558892845, "mean_token_accuracy": 0.8783726215362548, "step": 1480 }, { "epoch": 0.026487476223490302, "grad_norm": 2.957165861894188, "learning_rate": 5.2968361180234624e-05, "loss": 1.6887, "mean_abs_error": 673.2307016267496, "mean_abs_error_last_10": 155.95711845035976, "mean_abs_error_last_25": 191.22947898547267, "mean_abs_error_last_50": 334.7054357217788, "mean_pred_prob": 0.016421052417717873, "mean_pred_prob_last_10": 0.09730357397347689, "mean_pred_prob_last_25": 0.04991132421419024, "mean_pred_prob_last_50": 0.029134996328502895, "mean_token_accuracy": 0.8790351152420044, "step": 1490 }, { "epoch": 0.02666524452029225, "grad_norm": 1.3743879606909575, "learning_rate": 5.332385353714895e-05, "loss": 1.6536, "mean_abs_error": 582.6220257257042, "mean_abs_error_last_10": 94.81026885308694, "mean_abs_error_last_25": 145.99493614179192, "mean_abs_error_last_50": 251.23723931366186, "mean_pred_prob": 0.015316131059080362, "mean_pred_prob_last_10": 0.09486680254340171, "mean_pred_prob_last_25": 0.04775863122195005, "mean_pred_prob_last_50": 0.027487032348290086, "mean_token_accuracy": 0.87495436668396, "step": 1500 }, { "epoch": 0.0268430128170942, "grad_norm": 1.2136446948400224, "learning_rate": 5.367934589406328e-05, "loss": 1.604, "mean_abs_error": 271.5404682548635, "mean_abs_error_last_10": 42.26587013543171, "mean_abs_error_last_25": 69.0240286794537, "mean_abs_error_last_50": 118.33463990316207, "mean_pred_prob": 0.023333221417851747, "mean_pred_prob_last_10": 0.14092536438256503, "mean_pred_prob_last_25": 0.07255019377917052, "mean_pred_prob_last_50": 0.04167223339900374, "mean_token_accuracy": 0.8748168110847473, "step": 1510 }, { "epoch": 0.027020781113896147, "grad_norm": 2.074457444914527, "learning_rate": 5.40348382509776e-05, "loss": 1.5893, "mean_abs_error": 703.2039677049427, "mean_abs_error_last_10": 107.94587962486064, "mean_abs_error_last_25": 197.32237191308997, "mean_abs_error_last_50": 338.13928380624077, "mean_pred_prob": 0.022046052373480052, "mean_pred_prob_last_10": 0.11726714558899402, "mean_pred_prob_last_25": 0.0631602173554711, "mean_pred_prob_last_50": 0.037845445633865896, "mean_token_accuracy": 0.8723510682582856, "step": 1520 }, { "epoch": 0.027198549410698096, "grad_norm": 3.309747060547089, "learning_rate": 5.439033060789194e-05, "loss": 1.6101, "mean_abs_error": 1333.395224936488, "mean_abs_error_last_10": 767.8799138303574, "mean_abs_error_last_25": 803.6747712396511, "mean_abs_error_last_50": 966.3998453920549, "mean_pred_prob": 0.010575642427284038, "mean_pred_prob_last_10": 0.06925488367269281, "mean_pred_prob_last_25": 0.03322051459545037, "mean_pred_prob_last_50": 0.018990975616907234, "mean_token_accuracy": 0.874940139055252, "step": 1530 }, { "epoch": 0.027376317707500045, "grad_norm": 5.090829394372666, "learning_rate": 5.474582296480626e-05, "loss": 1.6264, "mean_abs_error": 195.53054050094087, "mean_abs_error_last_10": 28.60281633169717, "mean_abs_error_last_25": 42.010228464765405, "mean_abs_error_last_50": 89.09557293623861, "mean_pred_prob": 0.026783701684325933, "mean_pred_prob_last_10": 0.15859664790332317, "mean_pred_prob_last_25": 0.0821695625782013, "mean_pred_prob_last_50": 0.04768695011734962, "mean_token_accuracy": 0.8640330672264099, "step": 1540 }, { "epoch": 0.02755408600430199, "grad_norm": 2.0509898422826365, "learning_rate": 5.5101315321720584e-05, "loss": 1.64, "mean_abs_error": 2837.0393884135997, "mean_abs_error_last_10": 1573.0331252209367, "mean_abs_error_last_25": 1761.0426804849133, "mean_abs_error_last_50": 2082.781124805765, "mean_pred_prob": 0.007650941025349312, "mean_pred_prob_last_10": 0.04717465443827677, "mean_pred_prob_last_25": 0.02386269053458818, "mean_pred_prob_last_50": 0.013700282296485965, "mean_token_accuracy": 0.8717111706733703, "step": 1550 }, { "epoch": 0.02773185430110394, "grad_norm": 1.4695698747988875, "learning_rate": 5.5456807678634916e-05, "loss": 1.5836, "mean_abs_error": 976.341765363821, "mean_abs_error_last_10": 235.46067437450455, "mean_abs_error_last_25": 340.5153818863637, "mean_abs_error_last_50": 521.1119155873195, "mean_pred_prob": 0.014482132554985584, "mean_pred_prob_last_10": 0.08961812836350873, "mean_pred_prob_last_25": 0.045025631098542365, "mean_pred_prob_last_50": 0.02593808394158259, "mean_token_accuracy": 0.8711534857749939, "step": 1560 }, { "epoch": 0.02790962259790589, "grad_norm": 1.0891826741682218, "learning_rate": 5.5812300035549235e-05, "loss": 1.641, "mean_abs_error": 466.630757929854, "mean_abs_error_last_10": 58.28325013584136, "mean_abs_error_last_25": 93.38802345324186, "mean_abs_error_last_50": 203.737699141441, "mean_pred_prob": 0.025549567560665308, "mean_pred_prob_last_10": 0.14820209611207247, "mean_pred_prob_last_25": 0.07790605491027236, "mean_pred_prob_last_50": 0.045226221438497305, "mean_token_accuracy": 0.8725174009799957, "step": 1570 }, { "epoch": 0.02808739089470784, "grad_norm": 2.5341689349122043, "learning_rate": 5.616779239246357e-05, "loss": 1.6767, "mean_abs_error": 233.38456684979406, "mean_abs_error_last_10": 65.82928516348095, "mean_abs_error_last_25": 107.8253360207265, "mean_abs_error_last_50": 141.7918328960554, "mean_pred_prob": 0.025333888037130235, "mean_pred_prob_last_10": 0.14392107874155044, "mean_pred_prob_last_25": 0.07643342316150666, "mean_pred_prob_last_50": 0.04481712123379111, "mean_token_accuracy": 0.8671004414558411, "step": 1580 }, { "epoch": 0.028265159191509785, "grad_norm": 1.7218864390707194, "learning_rate": 5.652328474937789e-05, "loss": 1.5433, "mean_abs_error": 819.9878745428523, "mean_abs_error_last_10": 116.41454792784512, "mean_abs_error_last_25": 189.34699220204556, "mean_abs_error_last_50": 383.95616180783577, "mean_pred_prob": 0.01952824933687225, "mean_pred_prob_last_10": 0.11227354658767581, "mean_pred_prob_last_25": 0.05902068605646491, "mean_pred_prob_last_50": 0.03446983862668276, "mean_token_accuracy": 0.8800671339035034, "step": 1590 }, { "epoch": 0.028442927488311735, "grad_norm": 1.739270618505877, "learning_rate": 5.687877710629221e-05, "loss": 1.5685, "mean_abs_error": 401.86450849940064, "mean_abs_error_last_10": 75.60645378406184, "mean_abs_error_last_25": 133.05065985245244, "mean_abs_error_last_50": 202.2722946891095, "mean_pred_prob": 0.021083915862254797, "mean_pred_prob_last_10": 0.12529561072587966, "mean_pred_prob_last_25": 0.06480386704206467, "mean_pred_prob_last_50": 0.03750073686242104, "mean_token_accuracy": 0.8815515398979187, "step": 1600 }, { "epoch": 0.028620695785113684, "grad_norm": 1.9413240161212477, "learning_rate": 5.7234269463206544e-05, "loss": 1.5788, "mean_abs_error": 552.5779006409238, "mean_abs_error_last_10": 147.82148585643358, "mean_abs_error_last_25": 217.25791633690042, "mean_abs_error_last_50": 313.45769093498217, "mean_pred_prob": 0.0173327493481338, "mean_pred_prob_last_10": 0.09667339641600847, "mean_pred_prob_last_25": 0.05238424791023135, "mean_pred_prob_last_50": 0.03070050470996648, "mean_token_accuracy": 0.8711834847927094, "step": 1610 }, { "epoch": 0.02879846408191563, "grad_norm": 4.760218867492203, "learning_rate": 5.758976182012087e-05, "loss": 1.6102, "mean_abs_error": 537.2962032917358, "mean_abs_error_last_10": 133.71456717178862, "mean_abs_error_last_25": 144.59312473684446, "mean_abs_error_last_50": 259.84047461241323, "mean_pred_prob": 0.01982226853724569, "mean_pred_prob_last_10": 0.1161421250551939, "mean_pred_prob_last_25": 0.06021426254883409, "mean_pred_prob_last_50": 0.03523043934255839, "mean_token_accuracy": 0.8688010454177857, "step": 1620 }, { "epoch": 0.02897623237871758, "grad_norm": 0.9562148198957264, "learning_rate": 5.79452541770352e-05, "loss": 1.5666, "mean_abs_error": 608.8049309880479, "mean_abs_error_last_10": 122.53952037571386, "mean_abs_error_last_25": 194.7763260535036, "mean_abs_error_last_50": 297.8645767474025, "mean_pred_prob": 0.02329258568934165, "mean_pred_prob_last_10": 0.1290183554403484, "mean_pred_prob_last_25": 0.06954050359781831, "mean_pred_prob_last_50": 0.04094533954048529, "mean_token_accuracy": 0.8797980606555938, "step": 1630 }, { "epoch": 0.02915400067551953, "grad_norm": 4.338158457488934, "learning_rate": 5.830074653394953e-05, "loss": 1.6502, "mean_abs_error": 1870.2107159738873, "mean_abs_error_last_10": 763.8161581604714, "mean_abs_error_last_25": 905.7666543339658, "mean_abs_error_last_50": 1183.1025714475225, "mean_pred_prob": 0.015906895249645458, "mean_pred_prob_last_10": 0.09359261288191192, "mean_pred_prob_last_25": 0.048152158239099664, "mean_pred_prob_last_50": 0.02818258294282714, "mean_token_accuracy": 0.8763019859790802, "step": 1640 }, { "epoch": 0.029331768972321474, "grad_norm": 2.773545344547665, "learning_rate": 5.8656238890863846e-05, "loss": 1.6277, "mean_abs_error": 197.70594378274762, "mean_abs_error_last_10": 46.815662089640085, "mean_abs_error_last_25": 62.992226874678956, "mean_abs_error_last_50": 102.12481604954444, "mean_pred_prob": 0.021113006910309196, "mean_pred_prob_last_10": 0.12841745056211948, "mean_pred_prob_last_25": 0.0654250094667077, "mean_pred_prob_last_50": 0.03781123040243983, "mean_token_accuracy": 0.8771360814571381, "step": 1650 }, { "epoch": 0.029509537269123424, "grad_norm": 1.9801353622311255, "learning_rate": 5.901173124777818e-05, "loss": 1.6556, "mean_abs_error": 522.052288599825, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 244.19840450882288, "mean_pred_prob": 0.05546427224762738, "mean_pred_prob_last_10": 0.13180350083857775, "mean_pred_prob_last_25": 0.0903711961582303, "mean_pred_prob_last_50": 0.07978674862533808, "mean_token_accuracy": 0.8654135823249817, "step": 1660 }, { "epoch": 0.029687305565925373, "grad_norm": 1.9063007988739415, "learning_rate": 5.9367223604692504e-05, "loss": 1.5956, "mean_abs_error": 1038.2882873168357, "mean_abs_error_last_10": 290.0047038511547, "mean_abs_error_last_25": 355.6637076851881, "mean_abs_error_last_50": 529.9444727962348, "mean_pred_prob": 0.011066706042038276, "mean_pred_prob_last_10": 0.0653849505353719, "mean_pred_prob_last_25": 0.03425437058322132, "mean_pred_prob_last_50": 0.019931732246186586, "mean_token_accuracy": 0.872611790895462, "step": 1670 }, { "epoch": 0.029865073862727323, "grad_norm": 2.7437533198120967, "learning_rate": 5.972271596160682e-05, "loss": 1.5692, "mean_abs_error": 674.0538440162305, "mean_abs_error_last_10": 190.9106514213754, "mean_abs_error_last_25": 261.17722321712563, "mean_abs_error_last_50": 353.34147381527293, "mean_pred_prob": 0.024172355281189085, "mean_pred_prob_last_10": 0.13708350732922553, "mean_pred_prob_last_25": 0.07249597855843604, "mean_pred_prob_last_50": 0.04257700800080784, "mean_token_accuracy": 0.876027524471283, "step": 1680 }, { "epoch": 0.03004284215952927, "grad_norm": 2.5616597703287476, "learning_rate": 6.0078208318521155e-05, "loss": 1.5723, "mean_abs_error": 347.27133589501494, "mean_abs_error_last_10": 83.39582267607224, "mean_abs_error_last_25": 110.20125551085974, "mean_abs_error_last_50": 171.9666027076735, "mean_pred_prob": 0.026383531838655473, "mean_pred_prob_last_10": 0.15729617979377508, "mean_pred_prob_last_25": 0.08091163467615843, "mean_pred_prob_last_50": 0.046870554750785234, "mean_token_accuracy": 0.8710489213466645, "step": 1690 }, { "epoch": 0.030220610456331218, "grad_norm": 1.276260847649076, "learning_rate": 6.043370067543548e-05, "loss": 1.5647, "mean_abs_error": 671.16524211304, "mean_abs_error_last_10": 120.52729785564993, "mean_abs_error_last_25": 166.48168395922738, "mean_abs_error_last_50": 310.1827886321435, "mean_pred_prob": 0.021867119963280855, "mean_pred_prob_last_10": 0.11948457397520543, "mean_pred_prob_last_25": 0.06476189894601703, "mean_pred_prob_last_50": 0.03825670485384762, "mean_token_accuracy": 0.8762826800346375, "step": 1700 }, { "epoch": 0.030398378753133167, "grad_norm": 1.5684628929206375, "learning_rate": 6.078919303234981e-05, "loss": 1.5317, "mean_abs_error": 438.00321841468804, "mean_abs_error_last_10": 57.80319821214397, "mean_abs_error_last_25": 111.93192808467866, "mean_abs_error_last_50": 208.12922325505912, "mean_pred_prob": 0.021061183093115686, "mean_pred_prob_last_10": 0.12810458466410637, "mean_pred_prob_last_25": 0.06523107895627618, "mean_pred_prob_last_50": 0.03758204765617847, "mean_token_accuracy": 0.8774802684783936, "step": 1710 }, { "epoch": 0.030576147049935113, "grad_norm": 0.9987057447795266, "learning_rate": 6.114468538926414e-05, "loss": 1.5643, "mean_abs_error": 742.1741089063935, "mean_abs_error_last_10": 159.83485233134985, "mean_abs_error_last_25": 234.84085686710085, "mean_abs_error_last_50": 388.4629738459844, "mean_pred_prob": 0.010205904580652713, "mean_pred_prob_last_10": 0.06500844247639179, "mean_pred_prob_last_25": 0.032107981760054825, "mean_pred_prob_last_50": 0.01835753065533936, "mean_token_accuracy": 0.8768974125385285, "step": 1720 }, { "epoch": 0.030753915346737062, "grad_norm": 1.3013829511783772, "learning_rate": 6.150017774617846e-05, "loss": 1.5867, "mean_abs_error": 552.4692836892517, "mean_abs_error_last_10": 132.8064506858449, "mean_abs_error_last_25": 183.97605660867197, "mean_abs_error_last_50": 290.8087689582435, "mean_pred_prob": 0.01324368487112224, "mean_pred_prob_last_10": 0.0834079721942544, "mean_pred_prob_last_25": 0.04184094136580825, "mean_pred_prob_last_50": 0.023831284232437612, "mean_token_accuracy": 0.8784031510353089, "step": 1730 }, { "epoch": 0.030931683643539012, "grad_norm": 2.5999137856415495, "learning_rate": 6.185567010309279e-05, "loss": 1.5284, "mean_abs_error": 587.1988690906535, "mean_abs_error_last_10": 127.55650379129422, "mean_abs_error_last_25": 180.46345706650965, "mean_abs_error_last_50": 296.3377320776302, "mean_pred_prob": 0.013537263497710228, "mean_pred_prob_last_10": 0.0841062942519784, "mean_pred_prob_last_25": 0.04270351603627205, "mean_pred_prob_last_50": 0.024421205371618272, "mean_token_accuracy": 0.8794539630413055, "step": 1740 }, { "epoch": 0.03110945194034096, "grad_norm": 3.434906820855681, "learning_rate": 6.221116246000711e-05, "loss": 1.5423, "mean_abs_error": 399.019114746026, "mean_abs_error_last_10": 84.39959051378108, "mean_abs_error_last_25": 129.09078295524057, "mean_abs_error_last_50": 207.45367041290987, "mean_pred_prob": 0.020439536636695267, "mean_pred_prob_last_10": 0.12038559354841709, "mean_pred_prob_last_25": 0.06165795428678393, "mean_pred_prob_last_50": 0.03587515805847943, "mean_token_accuracy": 0.8706774234771728, "step": 1750 }, { "epoch": 0.03128722023714291, "grad_norm": 2.7754345508775993, "learning_rate": 6.256665481692144e-05, "loss": 1.5488, "mean_abs_error": 319.4781088426692, "mean_abs_error_last_10": 58.03959434703607, "mean_abs_error_last_25": 92.79106287277246, "mean_abs_error_last_50": 164.72840272852324, "mean_pred_prob": 0.021947815921157597, "mean_pred_prob_last_10": 0.1303605128079653, "mean_pred_prob_last_25": 0.06771591445431113, "mean_pred_prob_last_50": 0.03917620284482837, "mean_token_accuracy": 0.8664877057075501, "step": 1760 }, { "epoch": 0.03146498853394485, "grad_norm": 1.5895719177334648, "learning_rate": 6.292214717383577e-05, "loss": 1.519, "mean_abs_error": 106.34410764985236, "mean_abs_error_last_10": 17.06002695458178, "mean_abs_error_last_25": 32.56161359202051, "mean_abs_error_last_50": 54.09640250065886, "mean_pred_prob": 0.03261647177860141, "mean_pred_prob_last_10": 0.18176692202687264, "mean_pred_prob_last_25": 0.09737813100218773, "mean_pred_prob_last_50": 0.05724500603973866, "mean_token_accuracy": 0.8731725513935089, "step": 1770 }, { "epoch": 0.031642756830746806, "grad_norm": 0.9085674844845095, "learning_rate": 6.327763953075009e-05, "loss": 1.619, "mean_abs_error": 2095.0235947098627, "mean_abs_error_last_10": 976.7177256159199, "mean_abs_error_last_25": 1109.7047072584762, "mean_abs_error_last_50": 1388.5015746191634, "mean_pred_prob": 0.0072246422045282085, "mean_pred_prob_last_10": 0.0459831616608426, "mean_pred_prob_last_25": 0.022795496054459363, "mean_pred_prob_last_50": 0.013041419589717407, "mean_token_accuracy": 0.8673090875148773, "step": 1780 }, { "epoch": 0.03182052512754875, "grad_norm": 1.7869306854774674, "learning_rate": 6.363313188766442e-05, "loss": 1.5846, "mean_abs_error": 602.1287195966402, "mean_abs_error_last_10": 112.68613892046157, "mean_abs_error_last_25": 178.09993920821043, "mean_abs_error_last_50": 299.7149719696226, "mean_pred_prob": 0.0160146995971445, "mean_pred_prob_last_10": 0.09730496476404368, "mean_pred_prob_last_25": 0.04984659764450043, "mean_pred_prob_last_50": 0.02869854572927579, "mean_token_accuracy": 0.870566862821579, "step": 1790 }, { "epoch": 0.031998293424350704, "grad_norm": 1.4590828089348737, "learning_rate": 6.398862424457874e-05, "loss": 1.4904, "mean_abs_error": 1368.3645876731894, "mean_abs_error_last_10": 611.2923746141815, "mean_abs_error_last_25": 751.161751235794, "mean_abs_error_last_50": 953.811963522895, "mean_pred_prob": 0.02022763122949982, "mean_pred_prob_last_10": 0.11736192463868064, "mean_pred_prob_last_25": 0.06178874644101597, "mean_pred_prob_last_50": 0.035889812621462625, "mean_token_accuracy": 0.8766325175762176, "step": 1800 }, { "epoch": 0.03217606172115265, "grad_norm": 3.121850569026762, "learning_rate": 6.434411660149307e-05, "loss": 1.5583, "mean_abs_error": 1324.1389415394672, "mean_abs_error_last_10": 583.241713368828, "mean_abs_error_last_25": 734.317807722689, "mean_abs_error_last_50": 916.0737873770937, "mean_pred_prob": 0.01461865454839426, "mean_pred_prob_last_10": 0.08968229025776964, "mean_pred_prob_last_25": 0.04571870612126076, "mean_pred_prob_last_50": 0.02612002687092172, "mean_token_accuracy": 0.8664892554283142, "step": 1810 }, { "epoch": 0.032353830017954596, "grad_norm": 2.08325545119682, "learning_rate": 6.46996089584074e-05, "loss": 1.5364, "mean_abs_error": 850.9300729108123, "mean_abs_error_last_10": 348.8674858256856, "mean_abs_error_last_25": 453.4842311111894, "mean_abs_error_last_50": 576.0415852342801, "mean_pred_prob": 0.017144905745226424, "mean_pred_prob_last_10": 0.10205489788204432, "mean_pred_prob_last_25": 0.051922300530713984, "mean_pred_prob_last_50": 0.030334768400643954, "mean_token_accuracy": 0.8722153604030609, "step": 1820 }, { "epoch": 0.03253159831475655, "grad_norm": 1.509078243505536, "learning_rate": 6.505510131532173e-05, "loss": 1.499, "mean_abs_error": 592.044505542018, "mean_abs_error_last_10": 136.83808041828507, "mean_abs_error_last_25": 152.67181511668178, "mean_abs_error_last_50": 248.321863570509, "mean_pred_prob": 0.014501491980627178, "mean_pred_prob_last_10": 0.08536313567310572, "mean_pred_prob_last_25": 0.04478878481313586, "mean_pred_prob_last_50": 0.0259655034635216, "mean_token_accuracy": 0.8788087964057922, "step": 1830 }, { "epoch": 0.032709366611558495, "grad_norm": 1.3664850562524102, "learning_rate": 6.541059367223606e-05, "loss": 1.5303, "mean_abs_error": 250.69714637341804, "mean_abs_error_last_10": 57.329806084664675, "mean_abs_error_last_25": 69.87653786618726, "mean_abs_error_last_50": 113.30665677239328, "mean_pred_prob": 0.03151176376268268, "mean_pred_prob_last_10": 0.165332149527967, "mean_pred_prob_last_25": 0.09057951355352997, "mean_pred_prob_last_50": 0.054573564976453784, "mean_token_accuracy": 0.8694211840629578, "step": 1840 }, { "epoch": 0.03288713490836044, "grad_norm": 1.5588046313508486, "learning_rate": 6.576608602915038e-05, "loss": 1.5442, "mean_abs_error": 256.8704676799697, "mean_abs_error_last_10": 39.902869139220506, "mean_abs_error_last_25": 94.11446250082646, "mean_abs_error_last_50": 149.4930599927083, "mean_pred_prob": 0.027006458304822446, "mean_pred_prob_last_10": 0.15488507710397242, "mean_pred_prob_last_25": 0.08208291567862033, "mean_pred_prob_last_50": 0.04770837966352701, "mean_token_accuracy": 0.8745441198348999, "step": 1850 }, { "epoch": 0.033064903205162394, "grad_norm": 1.1027295171417617, "learning_rate": 6.61215783860647e-05, "loss": 1.4969, "mean_abs_error": 725.571952600136, "mean_abs_error_last_10": 229.17476636988562, "mean_abs_error_last_25": 277.0394951143857, "mean_abs_error_last_50": 403.95183533144103, "mean_pred_prob": 0.01629258379107341, "mean_pred_prob_last_10": 0.10010996328783221, "mean_pred_prob_last_25": 0.05071856161521282, "mean_pred_prob_last_50": 0.029121151778963395, "mean_token_accuracy": 0.8714351952075958, "step": 1860 }, { "epoch": 0.03324267150196434, "grad_norm": 1.1384210323551462, "learning_rate": 6.647707074297903e-05, "loss": 1.5727, "mean_abs_error": 646.9405317721274, "mean_abs_error_last_10": 138.6504252142982, "mean_abs_error_last_25": 189.12180945914366, "mean_abs_error_last_50": 293.39516918847323, "mean_pred_prob": 0.01713652443140745, "mean_pred_prob_last_10": 0.10368854831904173, "mean_pred_prob_last_25": 0.05281994184479118, "mean_pred_prob_last_50": 0.030555179668590428, "mean_token_accuracy": 0.8642755508422851, "step": 1870 }, { "epoch": 0.033420439798766285, "grad_norm": 1.5032162550053922, "learning_rate": 6.683256309989335e-05, "loss": 1.537, "mean_abs_error": 327.54562212767235, "mean_abs_error_last_10": 77.42054285493793, "mean_abs_error_last_25": 112.64032384607538, "mean_abs_error_last_50": 173.65787814877206, "mean_pred_prob": 0.023344560386613013, "mean_pred_prob_last_10": 0.13596505671739578, "mean_pred_prob_last_25": 0.07065159939229489, "mean_pred_prob_last_50": 0.041286860127002, "mean_token_accuracy": 0.8689482510089874, "step": 1880 }, { "epoch": 0.03359820809556824, "grad_norm": 2.3247684816839858, "learning_rate": 6.718805545680769e-05, "loss": 1.529, "mean_abs_error": 1569.4907683066222, "mean_abs_error_last_10": 766.7385949606512, "mean_abs_error_last_25": 865.4123219291175, "mean_abs_error_last_50": 1090.5729080334763, "mean_pred_prob": 0.013699681993603007, "mean_pred_prob_last_10": 0.08342278415657348, "mean_pred_prob_last_25": 0.04229097023053328, "mean_pred_prob_last_50": 0.024369821799336933, "mean_token_accuracy": 0.8707821607589722, "step": 1890 }, { "epoch": 0.033775976392370184, "grad_norm": 1.5723857894179314, "learning_rate": 6.754354781372201e-05, "loss": 1.5207, "mean_abs_error": 693.9613714290181, "mean_abs_error_last_10": 211.60927966004516, "mean_abs_error_last_25": 256.87141124388387, "mean_abs_error_last_50": 388.1333809742622, "mean_pred_prob": 0.028452340033254586, "mean_pred_prob_last_10": 0.1485806784941815, "mean_pred_prob_last_25": 0.0822130378917791, "mean_pred_prob_last_50": 0.04943521540844813, "mean_token_accuracy": 0.8710512101650238, "step": 1900 }, { "epoch": 0.03395374468917213, "grad_norm": 1.0109345505070155, "learning_rate": 6.789904017063633e-05, "loss": 1.5331, "mean_abs_error": 1596.904972073618, "mean_abs_error_last_10": 609.3112095501845, "mean_abs_error_last_25": 775.0150462955846, "mean_abs_error_last_50": 1032.9014820394477, "mean_pred_prob": 0.014195032104908023, "mean_pred_prob_last_10": 0.07704635890841019, "mean_pred_prob_last_25": 0.040845472147339024, "mean_pred_prob_last_50": 0.024719995984924026, "mean_token_accuracy": 0.8776759505271912, "step": 1910 }, { "epoch": 0.03413151298597408, "grad_norm": 2.1945594190202855, "learning_rate": 6.825453252755066e-05, "loss": 1.4866, "mean_abs_error": 308.44214578047223, "mean_abs_error_last_10": 40.59408239778635, "mean_abs_error_last_25": 68.91874529926864, "mean_abs_error_last_50": 134.20405557937406, "mean_pred_prob": 0.02228613682091236, "mean_pred_prob_last_10": 0.13490310832858085, "mean_pred_prob_last_25": 0.0688202515244484, "mean_pred_prob_last_50": 0.039557581208646296, "mean_token_accuracy": 0.8734694719314575, "step": 1920 }, { "epoch": 0.03430928128277603, "grad_norm": 1.5933571658741632, "learning_rate": 6.861002488446498e-05, "loss": 1.4908, "mean_abs_error": 509.0646948181047, "mean_abs_error_last_10": 70.93164023003477, "mean_abs_error_last_25": 121.38965061856057, "mean_abs_error_last_50": 251.17415737486493, "mean_pred_prob": 0.025048462930135428, "mean_pred_prob_last_10": 0.1426616908982396, "mean_pred_prob_last_25": 0.07579188604140655, "mean_pred_prob_last_50": 0.04430403895676136, "mean_token_accuracy": 0.8740299820899964, "step": 1930 }, { "epoch": 0.034487049579577975, "grad_norm": 1.1985893581138394, "learning_rate": 6.896551724137931e-05, "loss": 1.4706, "mean_abs_error": 441.5786914381256, "mean_abs_error_last_10": 125.23975449875925, "mean_abs_error_last_25": 155.1023711596361, "mean_abs_error_last_50": 226.79056893464895, "mean_pred_prob": 0.02107554962858558, "mean_pred_prob_last_10": 0.1251268956810236, "mean_pred_prob_last_25": 0.06437811059877277, "mean_pred_prob_last_50": 0.03741060229949653, "mean_token_accuracy": 0.8757235527038574, "step": 1940 }, { "epoch": 0.03466481787637993, "grad_norm": 5.6212124958905525, "learning_rate": 6.932100959829365e-05, "loss": 1.4734, "mean_abs_error": 789.0948528466461, "mean_abs_error_last_10": 322.4056777420007, "mean_abs_error_last_25": 394.4399894083232, "mean_abs_error_last_50": 516.7991655691252, "mean_pred_prob": 0.02187536039709812, "mean_pred_prob_last_10": 0.12623235402279534, "mean_pred_prob_last_25": 0.06594297289848328, "mean_pred_prob_last_50": 0.03865760799089912, "mean_token_accuracy": 0.8854648411273957, "step": 1950 }, { "epoch": 0.03484258617318187, "grad_norm": 2.0698678249687674, "learning_rate": 6.967650195520796e-05, "loss": 1.5328, "mean_abs_error": 882.1460143205219, "mean_abs_error_last_10": 326.1064048378817, "mean_abs_error_last_25": 423.06386592453055, "mean_abs_error_last_50": 562.023517760025, "mean_pred_prob": 0.019922682692413218, "mean_pred_prob_last_10": 0.11619370584085117, "mean_pred_prob_last_25": 0.06081215747690294, "mean_pred_prob_last_50": 0.035206670881598255, "mean_token_accuracy": 0.8789740264415741, "step": 1960 }, { "epoch": 0.035020354469983826, "grad_norm": 3.045904868337824, "learning_rate": 7.00319943121223e-05, "loss": 1.5695, "mean_abs_error": 1626.1138597906356, "mean_abs_error_last_10": 720.1615736005165, "mean_abs_error_last_25": 899.2203095360564, "mean_abs_error_last_50": 1139.7289856054215, "mean_pred_prob": 0.01888087726838421, "mean_pred_prob_last_10": 0.10678932041919324, "mean_pred_prob_last_25": 0.05670565139153041, "mean_pred_prob_last_50": 0.0333149010213674, "mean_token_accuracy": 0.861934232711792, "step": 1970 }, { "epoch": 0.03519812276678577, "grad_norm": 2.4061756131436978, "learning_rate": 7.038748666903662e-05, "loss": 1.5392, "mean_abs_error": 654.1874400320804, "mean_abs_error_last_10": 90.80810758698347, "mean_abs_error_last_25": 146.32444266592296, "mean_abs_error_last_50": 298.81765628089266, "mean_pred_prob": 0.015856795071158558, "mean_pred_prob_last_10": 0.09785413332283496, "mean_pred_prob_last_25": 0.04924702518619597, "mean_pred_prob_last_50": 0.02824674784205854, "mean_token_accuracy": 0.8607281982898712, "step": 1980 }, { "epoch": 0.03537589106358772, "grad_norm": 1.105973400036506, "learning_rate": 7.074297902595095e-05, "loss": 1.4988, "mean_abs_error": 613.3508899856874, "mean_abs_error_last_10": 127.78742673590678, "mean_abs_error_last_25": 223.55404147475588, "mean_abs_error_last_50": 321.93478071724473, "mean_pred_prob": 0.021999703225446865, "mean_pred_prob_last_10": 0.1297244369634427, "mean_pred_prob_last_25": 0.06673638871870935, "mean_pred_prob_last_50": 0.03891880976734683, "mean_token_accuracy": 0.8775992333889008, "step": 1990 }, { "epoch": 0.03555365936038967, "grad_norm": 2.671603462813687, "learning_rate": 7.109847138286527e-05, "loss": 1.4815, "mean_abs_error": 163.89785767958878, "mean_abs_error_last_10": 40.03868944505946, "mean_abs_error_last_25": 58.404134555170174, "mean_abs_error_last_50": 94.75493273738047, "mean_pred_prob": 0.025376253202557562, "mean_pred_prob_last_10": 0.14582304060459136, "mean_pred_prob_last_25": 0.0768865890800953, "mean_pred_prob_last_50": 0.044826808571815493, "mean_token_accuracy": 0.8780671954154968, "step": 2000 }, { "epoch": 0.03573142765719162, "grad_norm": 1.1019224120175324, "learning_rate": 7.14539637397796e-05, "loss": 1.4876, "mean_abs_error": 248.9461985008208, "mean_abs_error_last_10": 28.43058310213516, "mean_abs_error_last_25": 57.76292913647927, "mean_abs_error_last_50": 121.06155961071813, "mean_pred_prob": 0.02357156276702881, "mean_pred_prob_last_10": 0.13727271892130374, "mean_pred_prob_last_25": 0.07163794981315733, "mean_pred_prob_last_50": 0.04164019878953695, "mean_token_accuracy": 0.8690985381603241, "step": 2010 }, { "epoch": 0.03590919595399356, "grad_norm": 1.1753045780586397, "learning_rate": 7.180945609669393e-05, "loss": 1.4765, "mean_abs_error": 575.0998285418551, "mean_abs_error_last_10": 141.69051184577341, "mean_abs_error_last_25": 180.5033979213782, "mean_abs_error_last_50": 283.20621575483773, "mean_pred_prob": 0.017395077151013537, "mean_pred_prob_last_10": 0.10459308256395161, "mean_pred_prob_last_25": 0.05379024280700832, "mean_pred_prob_last_50": 0.031081311288289726, "mean_token_accuracy": 0.8747635304927825, "step": 2020 }, { "epoch": 0.036086964250795515, "grad_norm": 1.1177797650312913, "learning_rate": 7.216494845360825e-05, "loss": 1.4675, "mean_abs_error": 349.7261607990957, "mean_abs_error_last_10": 43.34801454090759, "mean_abs_error_last_25": 77.97215569763725, "mean_abs_error_last_50": 155.84949546662284, "mean_pred_prob": 0.02237478126771748, "mean_pred_prob_last_10": 0.126967497728765, "mean_pred_prob_last_25": 0.06659916462376714, "mean_pred_prob_last_50": 0.03943214630708099, "mean_token_accuracy": 0.8782776355743408, "step": 2030 }, { "epoch": 0.03626473254759746, "grad_norm": 2.67698452351145, "learning_rate": 7.252044081052257e-05, "loss": 1.4687, "mean_abs_error": 285.6833601634861, "mean_abs_error_last_10": 59.0415610547915, "mean_abs_error_last_25": 81.41435841529464, "mean_abs_error_last_50": 128.2785061233124, "mean_pred_prob": 0.02200507977977395, "mean_pred_prob_last_10": 0.1349520083516836, "mean_pred_prob_last_25": 0.06814858494326473, "mean_pred_prob_last_50": 0.039208952663466336, "mean_token_accuracy": 0.8704730093479156, "step": 2040 }, { "epoch": 0.03644250084439941, "grad_norm": 1.0833800150603485, "learning_rate": 7.28759331674369e-05, "loss": 1.4369, "mean_abs_error": 335.6570818684259, "mean_abs_error_last_10": 67.03931694776517, "mean_abs_error_last_25": 97.77164592080106, "mean_abs_error_last_50": 162.8487441986293, "mean_pred_prob": 0.021268694335594772, "mean_pred_prob_last_10": 0.12416427750140428, "mean_pred_prob_last_25": 0.06444749226793647, "mean_pred_prob_last_50": 0.03766499552875757, "mean_token_accuracy": 0.8824263334274292, "step": 2050 }, { "epoch": 0.03662026914120136, "grad_norm": 1.543264403010905, "learning_rate": 7.323142552435123e-05, "loss": 1.4753, "mean_abs_error": 95.05964551662255, "mean_abs_error_last_10": 9.930454942288577, "mean_abs_error_last_25": 22.117373829476183, "mean_abs_error_last_50": 44.88989687075154, "mean_pred_prob": 0.034461659938097, "mean_pred_prob_last_10": 0.19712246507406234, "mean_pred_prob_last_25": 0.1042986899614334, "mean_pred_prob_last_50": 0.06094263829290867, "mean_token_accuracy": 0.876310408115387, "step": 2060 }, { "epoch": 0.036798037438003306, "grad_norm": 0.8774399679329445, "learning_rate": 7.358691788126557e-05, "loss": 1.4488, "mean_abs_error": 1057.191850644656, "mean_abs_error_last_10": 199.04970298842574, "mean_abs_error_last_25": 306.8608308611558, "mean_abs_error_last_50": 499.77539695795696, "mean_pred_prob": 0.01262541434261948, "mean_pred_prob_last_10": 0.0745397636666894, "mean_pred_prob_last_25": 0.03882303233258426, "mean_pred_prob_last_50": 0.022483765333890914, "mean_token_accuracy": 0.8769640922546387, "step": 2070 }, { "epoch": 0.03697580573480525, "grad_norm": 0.9475051645104966, "learning_rate": 7.394241023817988e-05, "loss": 1.4561, "mean_abs_error": 1286.2769845320374, "mean_abs_error_last_10": 569.3849027134299, "mean_abs_error_last_25": 656.8041119734572, "mean_abs_error_last_50": 847.283731242024, "mean_pred_prob": 0.02112683474260848, "mean_pred_prob_last_10": 0.11986383290786762, "mean_pred_prob_last_25": 0.06327548066328745, "mean_pred_prob_last_50": 0.037146298422885596, "mean_token_accuracy": 0.8625136852264405, "step": 2080 }, { "epoch": 0.037153574031607205, "grad_norm": 2.674889595659087, "learning_rate": 7.42979025950942e-05, "loss": 1.4958, "mean_abs_error": 845.7514926587133, "mean_abs_error_last_10": 253.76787403112317, "mean_abs_error_last_25": 323.3957205437871, "mean_abs_error_last_50": 488.15023785765806, "mean_pred_prob": 0.014881963640800678, "mean_pred_prob_last_10": 0.08897214998141863, "mean_pred_prob_last_25": 0.0458265813824255, "mean_pred_prob_last_50": 0.026563035510480403, "mean_token_accuracy": 0.868015444278717, "step": 2090 }, { "epoch": 0.03733134232840915, "grad_norm": 2.3989257132819404, "learning_rate": 7.465339495200854e-05, "loss": 1.4788, "mean_abs_error": 1547.7697196985557, "mean_abs_error_last_10": 547.6749509216954, "mean_abs_error_last_25": 697.7872922848289, "mean_abs_error_last_50": 975.0466748737639, "mean_pred_prob": 0.016053247825766448, "mean_pred_prob_last_10": 0.08961141730251257, "mean_pred_prob_last_25": 0.04742633909336291, "mean_pred_prob_last_50": 0.02814857368939556, "mean_token_accuracy": 0.8706414639949799, "step": 2100 }, { "epoch": 0.037509110625211096, "grad_norm": 0.8761728079143631, "learning_rate": 7.500888730892285e-05, "loss": 1.4874, "mean_abs_error": 866.6454023771361, "mean_abs_error_last_10": 472.1638217728984, "mean_abs_error_last_25": 518.4594898411551, "mean_abs_error_last_50": 593.1295999533954, "mean_pred_prob": 0.02712287523318082, "mean_pred_prob_last_10": 0.15454183696128893, "mean_pred_prob_last_25": 0.08139783061051276, "mean_pred_prob_last_50": 0.047729438747046514, "mean_token_accuracy": 0.870348972082138, "step": 2110 }, { "epoch": 0.03768687892201305, "grad_norm": 1.9938478475360044, "learning_rate": 7.536437966583719e-05, "loss": 1.4363, "mean_abs_error": 316.0822024917214, "mean_abs_error_last_10": 196.42704111044924, "mean_abs_error_last_25": 224.27411356624788, "mean_abs_error_last_50": 246.97435806950662, "mean_pred_prob": 0.020226318389177322, "mean_pred_prob_last_10": 0.1191339835524559, "mean_pred_prob_last_25": 0.06195482416078448, "mean_pred_prob_last_50": 0.03598826974630356, "mean_token_accuracy": 0.8933375835418701, "step": 2120 }, { "epoch": 0.037864647218814995, "grad_norm": 1.2327832608449139, "learning_rate": 7.571987202275152e-05, "loss": 1.481, "mean_abs_error": 435.18556977795026, "mean_abs_error_last_10": 147.23862295426784, "mean_abs_error_last_25": 205.34520523669553, "mean_abs_error_last_50": 247.70202853678273, "mean_pred_prob": 0.02664527411106974, "mean_pred_prob_last_10": 0.15547652281820773, "mean_pred_prob_last_25": 0.0807267738506198, "mean_pred_prob_last_50": 0.04713789124507457, "mean_token_accuracy": 0.8712724983692169, "step": 2130 }, { "epoch": 0.03804241551561695, "grad_norm": 2.787789838371725, "learning_rate": 7.607536437966584e-05, "loss": 1.4619, "mean_abs_error": 1233.5503919774433, "mean_abs_error_last_10": 522.7807520480208, "mean_abs_error_last_25": 603.3177275587567, "mean_abs_error_last_50": 784.5092775345295, "mean_pred_prob": 0.019202382351795676, "mean_pred_prob_last_10": 0.10766922209295444, "mean_pred_prob_last_25": 0.057208372691820844, "mean_pred_prob_last_50": 0.03387195191899082, "mean_token_accuracy": 0.8845961511135101, "step": 2140 }, { "epoch": 0.038220183812418894, "grad_norm": 1.6066539832569682, "learning_rate": 7.643085673658017e-05, "loss": 1.5596, "mean_abs_error": 1267.628176824235, "mean_abs_error_last_10": 414.29256939202804, "mean_abs_error_last_25": 530.4001849626392, "mean_abs_error_last_50": 750.6065600543754, "mean_pred_prob": 0.0126552949892357, "mean_pred_prob_last_10": 0.07019317646627314, "mean_pred_prob_last_25": 0.03708154439809732, "mean_pred_prob_last_50": 0.02194314318767283, "mean_token_accuracy": 0.8654808580875397, "step": 2150 }, { "epoch": 0.03839795210922084, "grad_norm": 2.5907103754352088, "learning_rate": 7.678634909349449e-05, "loss": 1.3954, "mean_abs_error": 379.8425088823994, "mean_abs_error_last_10": 65.48497475050529, "mean_abs_error_last_25": 115.69890004362057, "mean_abs_error_last_50": 175.01264676667495, "mean_pred_prob": 0.020785000710748135, "mean_pred_prob_last_10": 0.1221903944388032, "mean_pred_prob_last_25": 0.0632443075068295, "mean_pred_prob_last_50": 0.03683115905150771, "mean_token_accuracy": 0.8823951721191406, "step": 2160 }, { "epoch": 0.03857572040602279, "grad_norm": 2.662884788732967, "learning_rate": 7.714184145040882e-05, "loss": 1.4368, "mean_abs_error": 1596.1438610925775, "mean_abs_error_last_10": 746.5825454590879, "mean_abs_error_last_25": 872.6593040737928, "mean_abs_error_last_50": 1095.4087295753066, "mean_pred_prob": 0.01866176496987464, "mean_pred_prob_last_10": 0.1104726797901094, "mean_pred_prob_last_25": 0.056806436007900626, "mean_pred_prob_last_50": 0.0329940305033233, "mean_token_accuracy": 0.8757696032524109, "step": 2170 }, { "epoch": 0.03875348870282474, "grad_norm": 1.5081368095452088, "learning_rate": 7.749733380732315e-05, "loss": 1.4598, "mean_abs_error": 593.9054225334265, "mean_abs_error_last_10": 358.31170630651184, "mean_abs_error_last_25": 323.75673393202925, "mean_abs_error_last_50": 396.4525241045554, "mean_pred_prob": 0.022567958501167596, "mean_pred_prob_last_10": 0.1257085021585226, "mean_pred_prob_last_25": 0.06792964283376932, "mean_pred_prob_last_50": 0.03973197662271559, "mean_token_accuracy": 0.8731034696102142, "step": 2180 }, { "epoch": 0.038931256999626684, "grad_norm": 2.2571026405615826, "learning_rate": 7.785282616423747e-05, "loss": 1.48, "mean_abs_error": 1026.7454009268492, "mean_abs_error_last_10": 303.27793300071176, "mean_abs_error_last_25": 388.09671259092755, "mean_abs_error_last_50": 556.6515299580904, "mean_pred_prob": 0.01140565711830277, "mean_pred_prob_last_10": 0.06893818479729816, "mean_pred_prob_last_25": 0.03502801993745379, "mean_pred_prob_last_50": 0.02032846909132786, "mean_token_accuracy": 0.8653667032718658, "step": 2190 }, { "epoch": 0.03910902529642864, "grad_norm": 2.2660499691519336, "learning_rate": 7.82083185211518e-05, "loss": 1.4031, "mean_abs_error": 1036.3356007617037, "mean_abs_error_last_10": 638.4201152200997, "mean_abs_error_last_25": 694.9604873904921, "mean_abs_error_last_50": 796.5435261007406, "mean_pred_prob": 0.030079751355515327, "mean_pred_prob_last_10": 0.1727468586032046, "mean_pred_prob_last_25": 0.09099910175427794, "mean_pred_prob_last_50": 0.05328139155317331, "mean_token_accuracy": 0.8714746594429016, "step": 2200 }, { "epoch": 0.03928679359323058, "grad_norm": 2.4086484516451137, "learning_rate": 7.856381087806612e-05, "loss": 1.4356, "mean_abs_error": 1434.2279751471826, "mean_abs_error_last_10": 761.2960845307124, "mean_abs_error_last_25": 833.4125039077583, "mean_abs_error_last_50": 999.5554159433101, "mean_pred_prob": 0.016206205442722423, "mean_pred_prob_last_10": 0.09076676001132, "mean_pred_prob_last_25": 0.048056086688302455, "mean_pred_prob_last_50": 0.028366646477661562, "mean_token_accuracy": 0.8727989196777344, "step": 2210 }, { "epoch": 0.03946456189003253, "grad_norm": 1.3622096605934368, "learning_rate": 7.891930323498044e-05, "loss": 1.4549, "mean_abs_error": 571.9223713416774, "mean_abs_error_last_10": 95.83151992866985, "mean_abs_error_last_25": 165.04078394112125, "mean_abs_error_last_50": 285.0938800144818, "mean_pred_prob": 0.02105623740935698, "mean_pred_prob_last_10": 0.12283915146254003, "mean_pred_prob_last_25": 0.06329305642284452, "mean_pred_prob_last_50": 0.0371176905115135, "mean_token_accuracy": 0.869562977552414, "step": 2220 }, { "epoch": 0.03964233018683448, "grad_norm": 1.4102642817594295, "learning_rate": 7.927479559189477e-05, "loss": 1.3937, "mean_abs_error": 639.4956023429452, "mean_abs_error_last_10": 215.67348468112468, "mean_abs_error_last_25": 294.16357166674936, "mean_abs_error_last_50": 410.31541744595944, "mean_pred_prob": 0.023754495501634665, "mean_pred_prob_last_10": 0.1368926958355587, "mean_pred_prob_last_25": 0.0713156589306891, "mean_pred_prob_last_50": 0.04174524076806847, "mean_token_accuracy": 0.8837140798568726, "step": 2230 }, { "epoch": 0.03982009848363643, "grad_norm": 1.7721542824670276, "learning_rate": 7.96302879488091e-05, "loss": 1.499, "mean_abs_error": 1567.0883785199687, "mean_abs_error_last_10": 676.7283245034708, "mean_abs_error_last_25": 793.0226192517155, "mean_abs_error_last_50": 1012.1215142728, "mean_pred_prob": 0.02159112287336029, "mean_pred_prob_last_10": 0.11554847478400916, "mean_pred_prob_last_25": 0.0629061469939188, "mean_pred_prob_last_50": 0.037736118893371896, "mean_token_accuracy": 0.8679772198200226, "step": 2240 }, { "epoch": 0.039997866780438374, "grad_norm": 2.692647424620851, "learning_rate": 7.998578030572344e-05, "loss": 1.4331, "mean_abs_error": 137.57283274903884, "mean_abs_error_last_10": 50.24724250307189, "mean_abs_error_last_25": 59.510247806325026, "mean_abs_error_last_50": 76.81421996516954, "mean_pred_prob": 0.032181607000529765, "mean_pred_prob_last_10": 0.17341354079544544, "mean_pred_prob_last_25": 0.09489367585629224, "mean_pred_prob_last_50": 0.05624651368707419, "mean_token_accuracy": 0.878024697303772, "step": 2250 }, { "epoch": 0.040175635077240326, "grad_norm": 1.7899217074166505, "learning_rate": 8.034127266263776e-05, "loss": 1.3661, "mean_abs_error": 217.61034801586288, "mean_abs_error_last_10": 53.682954604679864, "mean_abs_error_last_25": 67.85644540418612, "mean_abs_error_last_50": 110.31939970139642, "mean_pred_prob": 0.03253350192680955, "mean_pred_prob_last_10": 0.1574793979525566, "mean_pred_prob_last_25": 0.09086486268788577, "mean_pred_prob_last_50": 0.05576998358592391, "mean_token_accuracy": 0.8873264193534851, "step": 2260 }, { "epoch": 0.04035340337404227, "grad_norm": 0.9914604212179906, "learning_rate": 8.069676501955208e-05, "loss": 1.5192, "mean_abs_error": 1726.9198675908526, "mean_abs_error_last_10": 775.8591878934939, "mean_abs_error_last_25": 975.5626822192262, "mean_abs_error_last_50": 1300.2347609372796, "mean_pred_prob": 0.012248406105209142, "mean_pred_prob_last_10": 0.0742674426321173, "mean_pred_prob_last_25": 0.03797715630789753, "mean_pred_prob_last_50": 0.021858744067139925, "mean_token_accuracy": 0.8693666696548462, "step": 2270 }, { "epoch": 0.04053117167084422, "grad_norm": 1.768266897155871, "learning_rate": 8.105225737646641e-05, "loss": 1.4282, "mean_abs_error": 817.0672897490061, "mean_abs_error_last_10": 308.5811628954177, "mean_abs_error_last_25": 390.04896602522996, "mean_abs_error_last_50": 517.2840716078365, "mean_pred_prob": 0.01713771012291545, "mean_pred_prob_last_10": 0.1008823530515656, "mean_pred_prob_last_25": 0.05200659952242859, "mean_pred_prob_last_50": 0.030258830307866448, "mean_token_accuracy": 0.8680357575416565, "step": 2280 }, { "epoch": 0.04070893996764617, "grad_norm": 2.1836194412317878, "learning_rate": 8.140774973338074e-05, "loss": 1.3907, "mean_abs_error": 348.6783811163762, "mean_abs_error_last_10": 82.20682948374173, "mean_abs_error_last_25": 116.17556015010003, "mean_abs_error_last_50": 165.57984876095014, "mean_pred_prob": 0.020485367719084026, "mean_pred_prob_last_10": 0.12310806345194578, "mean_pred_prob_last_25": 0.06333958078175783, "mean_pred_prob_last_50": 0.036540553672239184, "mean_token_accuracy": 0.8730456173419953, "step": 2290 }, { "epoch": 0.04088670826444812, "grad_norm": 1.0538220211423992, "learning_rate": 8.176324209029507e-05, "loss": 1.4575, "mean_abs_error": 836.2371736482922, "mean_abs_error_last_10": 276.74209255376695, "mean_abs_error_last_25": 275.006638436651, "mean_abs_error_last_50": 394.0397097487958, "mean_pred_prob": 0.013900854671373964, "mean_pred_prob_last_10": 0.08221056526526808, "mean_pred_prob_last_25": 0.04209397230297327, "mean_pred_prob_last_50": 0.024493978917598726, "mean_token_accuracy": 0.8800273418426514, "step": 2300 }, { "epoch": 0.04106447656125007, "grad_norm": 0.7896059788058761, "learning_rate": 8.211873444720939e-05, "loss": 1.4001, "mean_abs_error": 448.59469783326267, "mean_abs_error_last_10": 93.30708202289662, "mean_abs_error_last_25": 147.61764294632331, "mean_abs_error_last_50": 223.26320007336753, "mean_pred_prob": 0.030469438363797962, "mean_pred_prob_last_10": 0.1675234376336448, "mean_pred_prob_last_25": 0.08971620617667213, "mean_pred_prob_last_50": 0.05329057877534069, "mean_token_accuracy": 0.8699787199497223, "step": 2310 }, { "epoch": 0.041242244858052016, "grad_norm": 2.1290443347235444, "learning_rate": 8.247422680412371e-05, "loss": 1.4655, "mean_abs_error": 246.7892600828555, "mean_abs_error_last_10": 128.4516030035256, "mean_abs_error_last_25": 102.80406107829279, "mean_abs_error_last_50": 156.11467149517995, "mean_pred_prob": 0.022286389488726854, "mean_pred_prob_last_10": 0.12758622393012048, "mean_pred_prob_last_25": 0.06794970128685236, "mean_pred_prob_last_50": 0.03969409605488181, "mean_token_accuracy": 0.8639033555984497, "step": 2320 }, { "epoch": 0.04142001315485396, "grad_norm": 2.608833251050549, "learning_rate": 8.282971916103804e-05, "loss": 1.3631, "mean_abs_error": 242.24167102327206, "mean_abs_error_last_10": 40.9322026303173, "mean_abs_error_last_25": 68.98368710892557, "mean_abs_error_last_50": 123.02328404397251, "mean_pred_prob": 0.030879474338144065, "mean_pred_prob_last_10": 0.17391180340200663, "mean_pred_prob_last_25": 0.09239993896335363, "mean_pred_prob_last_50": 0.05433575096540153, "mean_token_accuracy": 0.8725074470043183, "step": 2330 }, { "epoch": 0.041597781451655914, "grad_norm": 5.070825793262644, "learning_rate": 8.318521151795236e-05, "loss": 1.4963, "mean_abs_error": 2690.4753234672335, "mean_abs_error_last_10": 1349.2635432835086, "mean_abs_error_last_25": 1546.9182401874264, "mean_abs_error_last_50": 1901.876056363075, "mean_pred_prob": 0.011174718644906535, "mean_pred_prob_last_10": 0.06611282883095555, "mean_pred_prob_last_25": 0.03405894412862835, "mean_pred_prob_last_50": 0.01973415841057431, "mean_token_accuracy": 0.8638275146484375, "step": 2340 }, { "epoch": 0.04177554974845786, "grad_norm": 1.3868759850950811, "learning_rate": 8.35407038748667e-05, "loss": 1.4019, "mean_abs_error": 1262.4846960442796, "mean_abs_error_last_10": 502.3648415693762, "mean_abs_error_last_25": 605.6958499880525, "mean_abs_error_last_50": 807.9739751831505, "mean_pred_prob": 0.0221475061160163, "mean_pred_prob_last_10": 0.10600625866791233, "mean_pred_prob_last_25": 0.0591496434120927, "mean_pred_prob_last_50": 0.037392078767879866, "mean_token_accuracy": 0.8806383907794952, "step": 2350 }, { "epoch": 0.041953318045259806, "grad_norm": 1.5521889297296299, "learning_rate": 8.389619623178103e-05, "loss": 1.3646, "mean_abs_error": 1198.4686218329598, "mean_abs_error_last_10": 534.6873363717147, "mean_abs_error_last_25": 584.7631771765375, "mean_abs_error_last_50": 773.3882053080642, "mean_pred_prob": 0.024215189897222444, "mean_pred_prob_last_10": 0.1319436389603652, "mean_pred_prob_last_25": 0.07056641895032953, "mean_pred_prob_last_50": 0.04209418435784755, "mean_token_accuracy": 0.8792049646377563, "step": 2360 }, { "epoch": 0.04213108634206176, "grad_norm": 1.793440135783173, "learning_rate": 8.425168858869534e-05, "loss": 1.3868, "mean_abs_error": 221.25165690170962, "mean_abs_error_last_10": 57.85754277467686, "mean_abs_error_last_25": 74.58502368581068, "mean_abs_error_last_50": 110.35516744029391, "mean_pred_prob": 0.022371622640639542, "mean_pred_prob_last_10": 0.1302551381289959, "mean_pred_prob_last_25": 0.0681624960154295, "mean_pred_prob_last_50": 0.03964403914287686, "mean_token_accuracy": 0.8770825862884521, "step": 2370 }, { "epoch": 0.042308854638863705, "grad_norm": 1.0805409924381248, "learning_rate": 8.460718094560968e-05, "loss": 1.4125, "mean_abs_error": 351.94016997966145, "mean_abs_error_last_10": 142.65995975281794, "mean_abs_error_last_25": 173.41845397489618, "mean_abs_error_last_50": 227.50081840652962, "mean_pred_prob": 0.021722259395755828, "mean_pred_prob_last_10": 0.12849817117676138, "mean_pred_prob_last_25": 0.06569513096474111, "mean_pred_prob_last_50": 0.03831472201272845, "mean_token_accuracy": 0.8721943020820617, "step": 2380 }, { "epoch": 0.04248662293566565, "grad_norm": 2.08800097285806, "learning_rate": 8.4962673302524e-05, "loss": 1.3784, "mean_abs_error": 966.0920386738696, "mean_abs_error_last_10": 471.64174369332784, "mean_abs_error_last_25": 524.1706182534651, "mean_abs_error_last_50": 652.0575546742632, "mean_pred_prob": 0.01913504074909724, "mean_pred_prob_last_10": 0.10957975371711655, "mean_pred_prob_last_25": 0.05742558960337192, "mean_pred_prob_last_50": 0.03374289653438609, "mean_token_accuracy": 0.8903157114982605, "step": 2390 }, { "epoch": 0.042664391232467604, "grad_norm": 1.9228343806374326, "learning_rate": 8.531816565943833e-05, "loss": 1.3768, "mean_abs_error": 1036.524066565907, "mean_abs_error_last_10": 494.75241691989766, "mean_abs_error_last_25": 575.4737797532532, "mean_abs_error_last_50": 717.2982536191295, "mean_pred_prob": 0.019194818247342482, "mean_pred_prob_last_10": 0.11014663770620245, "mean_pred_prob_last_25": 0.057739031728124245, "mean_pred_prob_last_50": 0.033996954570466184, "mean_token_accuracy": 0.8870917141437531, "step": 2400 }, { "epoch": 0.04284215952926955, "grad_norm": 1.5610293193027263, "learning_rate": 8.567365801635266e-05, "loss": 1.408, "mean_abs_error": 1378.9275068290158, "mean_abs_error_last_10": 586.8450176006322, "mean_abs_error_last_25": 686.5155185843189, "mean_abs_error_last_50": 914.7939382546168, "mean_pred_prob": 0.01680666036118055, "mean_pred_prob_last_10": 0.09801689594169147, "mean_pred_prob_last_25": 0.0513276175246574, "mean_pred_prob_last_50": 0.02990749362506904, "mean_token_accuracy": 0.8754072725772858, "step": 2410 }, { "epoch": 0.043019927826071495, "grad_norm": 1.5164924214527826, "learning_rate": 8.602915037326698e-05, "loss": 1.487, "mean_abs_error": 482.86675001351233, "mean_abs_error_last_10": 116.89811715507861, "mean_abs_error_last_25": 152.1003819297592, "mean_abs_error_last_50": 234.48828056581493, "mean_pred_prob": 0.01841066828928888, "mean_pred_prob_last_10": 0.11335194576531649, "mean_pred_prob_last_25": 0.05679952213540673, "mean_pred_prob_last_50": 0.0328007637988776, "mean_token_accuracy": 0.8699564874172211, "step": 2420 }, { "epoch": 0.04319769612287345, "grad_norm": 1.2946727715959947, "learning_rate": 8.638464273018131e-05, "loss": 1.44, "mean_abs_error": 519.5803250237972, "mean_abs_error_last_10": 167.14773818648985, "mean_abs_error_last_25": 214.8017327858296, "mean_abs_error_last_50": 302.4031824312427, "mean_pred_prob": 0.01553797300439328, "mean_pred_prob_last_10": 0.09539058841764927, "mean_pred_prob_last_25": 0.048569779470562936, "mean_pred_prob_last_50": 0.027707509510219098, "mean_token_accuracy": 0.8717435359954834, "step": 2430 }, { "epoch": 0.043375464419675394, "grad_norm": 1.3109125940948196, "learning_rate": 8.674013508709563e-05, "loss": 1.466, "mean_abs_error": 896.2426480776354, "mean_abs_error_last_10": 204.08912997304373, "mean_abs_error_last_25": 297.5546005760915, "mean_abs_error_last_50": 504.91694931977327, "mean_pred_prob": 0.014060532272560522, "mean_pred_prob_last_10": 0.08386842894833535, "mean_pred_prob_last_25": 0.04254677395801991, "mean_pred_prob_last_50": 0.024763495480874552, "mean_token_accuracy": 0.8676249861717225, "step": 2440 }, { "epoch": 0.04355323271647734, "grad_norm": 1.3524882606991169, "learning_rate": 8.709562744400995e-05, "loss": 1.4042, "mean_abs_error": 1077.2010888260327, "mean_abs_error_last_10": 487.672980211392, "mean_abs_error_last_25": 572.346383285004, "mean_abs_error_last_50": 731.6689354726345, "mean_pred_prob": 0.021736227092333138, "mean_pred_prob_last_10": 0.12158285329351201, "mean_pred_prob_last_25": 0.06487913657038007, "mean_pred_prob_last_50": 0.038225797691848126, "mean_token_accuracy": 0.8632249116897583, "step": 2450 }, { "epoch": 0.04373100101327929, "grad_norm": 1.784128175836519, "learning_rate": 8.745111980092428e-05, "loss": 1.3619, "mean_abs_error": 1042.2188096506177, "mean_abs_error_last_10": 524.2100226250103, "mean_abs_error_last_25": 595.7887098436189, "mean_abs_error_last_50": 723.5251914897965, "mean_pred_prob": 0.019151560572208837, "mean_pred_prob_last_10": 0.11312113021849654, "mean_pred_prob_last_25": 0.05883099424390821, "mean_pred_prob_last_50": 0.034084241234813815, "mean_token_accuracy": 0.8728459537029266, "step": 2460 }, { "epoch": 0.04390876931008124, "grad_norm": 1.553778587310398, "learning_rate": 8.780661215783861e-05, "loss": 1.396, "mean_abs_error": 597.5419057397434, "mean_abs_error_last_10": 229.9189027986587, "mean_abs_error_last_25": 244.60606585500122, "mean_abs_error_last_50": 347.6473639974316, "mean_pred_prob": 0.015232095681130885, "mean_pred_prob_last_10": 0.08847958762198686, "mean_pred_prob_last_25": 0.04596303245052695, "mean_pred_prob_last_50": 0.02683165716007352, "mean_token_accuracy": 0.8783308267593384, "step": 2470 }, { "epoch": 0.04408653760688319, "grad_norm": 1.0036923393562869, "learning_rate": 8.816210451475295e-05, "loss": 1.3634, "mean_abs_error": 708.3178029066462, "mean_abs_error_last_10": 212.3358179309692, "mean_abs_error_last_25": 282.50513957433293, "mean_abs_error_last_50": 430.87998479768385, "mean_pred_prob": 0.01628059662762098, "mean_pred_prob_last_10": 0.10193685098784044, "mean_pred_prob_last_25": 0.05124131162883714, "mean_pred_prob_last_50": 0.02930342661566101, "mean_token_accuracy": 0.8812481224536896, "step": 2480 }, { "epoch": 0.04426430590368514, "grad_norm": 1.7643865249745145, "learning_rate": 8.851759687166726e-05, "loss": 1.4069, "mean_abs_error": 593.9889796766304, "mean_abs_error_last_10": 112.08217844400329, "mean_abs_error_last_25": 150.3553051807124, "mean_abs_error_last_50": 291.866050249315, "mean_pred_prob": 0.018119816883699968, "mean_pred_prob_last_10": 0.10634122099727392, "mean_pred_prob_last_25": 0.05493758444208652, "mean_pred_prob_last_50": 0.03201218626927584, "mean_token_accuracy": 0.8664293885231018, "step": 2490 }, { "epoch": 0.04444207420048708, "grad_norm": 2.595867888610092, "learning_rate": 8.887308922858158e-05, "loss": 1.4738, "mean_abs_error": 845.5048564617739, "mean_abs_error_last_10": 389.1199925710325, "mean_abs_error_last_25": 466.23519249191077, "mean_abs_error_last_50": 582.0425109638342, "mean_pred_prob": 0.020219450956210493, "mean_pred_prob_last_10": 0.11268192474090029, "mean_pred_prob_last_25": 0.060353443029453044, "mean_pred_prob_last_50": 0.03560221061634365, "mean_token_accuracy": 0.8702459216117859, "step": 2500 }, { "epoch": 0.044619842497289036, "grad_norm": 1.5205144801921207, "learning_rate": 8.922858158549592e-05, "loss": 1.3995, "mean_abs_error": 1046.4506338406804, "mean_abs_error_last_10": 326.8146510800967, "mean_abs_error_last_25": 458.2718656105652, "mean_abs_error_last_50": 631.1907435725047, "mean_pred_prob": 0.01687683160416782, "mean_pred_prob_last_10": 0.09519101742771455, "mean_pred_prob_last_25": 0.0500571242388105, "mean_pred_prob_last_50": 0.029581698402762414, "mean_token_accuracy": 0.8740506708621979, "step": 2510 }, { "epoch": 0.04479761079409098, "grad_norm": 1.4414041676234288, "learning_rate": 8.958407394241023e-05, "loss": 1.3911, "mean_abs_error": 609.0108379485354, "mean_abs_error_last_10": 210.73914391643197, "mean_abs_error_last_25": 215.9299285889183, "mean_abs_error_last_50": 317.7326295279472, "mean_pred_prob": 0.013679533457616345, "mean_pred_prob_last_10": 0.08120364635251462, "mean_pred_prob_last_25": 0.042224413226358594, "mean_pred_prob_last_50": 0.02437299113953486, "mean_token_accuracy": 0.8787867963314057, "step": 2520 }, { "epoch": 0.04497537909089293, "grad_norm": 1.6105611262683701, "learning_rate": 8.993956629932458e-05, "loss": 1.3734, "mean_abs_error": 416.0914166974265, "mean_abs_error_last_10": 141.31872519907296, "mean_abs_error_last_25": 212.18096729162312, "mean_abs_error_last_50": 264.9223327162448, "mean_pred_prob": 0.015457681380212307, "mean_pred_prob_last_10": 0.09385848008096218, "mean_pred_prob_last_25": 0.04758939053863287, "mean_pred_prob_last_50": 0.02759367134422064, "mean_token_accuracy": 0.8722067952156067, "step": 2530 }, { "epoch": 0.04515314738769488, "grad_norm": 1.7919288732562886, "learning_rate": 9.02950586562389e-05, "loss": 1.3656, "mean_abs_error": 760.3896894013701, "mean_abs_error_last_10": 222.51322789411415, "mean_abs_error_last_25": 302.31333517902834, "mean_abs_error_last_50": 448.5021649689462, "mean_pred_prob": 0.016843117179814727, "mean_pred_prob_last_10": 0.1018050195532851, "mean_pred_prob_last_25": 0.05224751933128573, "mean_pred_prob_last_50": 0.0301998516311869, "mean_token_accuracy": 0.8713289558887481, "step": 2540 }, { "epoch": 0.04533091568449683, "grad_norm": 1.6380829142850537, "learning_rate": 9.065055101315322e-05, "loss": 1.3866, "mean_abs_error": 187.5086846359512, "mean_abs_error_last_10": 46.65573429855273, "mean_abs_error_last_25": 63.41978917074776, "mean_abs_error_last_50": 108.08325891702097, "mean_pred_prob": 0.026046785060316324, "mean_pred_prob_last_10": 0.14457310177385807, "mean_pred_prob_last_25": 0.07692298963665962, "mean_pred_prob_last_50": 0.04555275347083807, "mean_token_accuracy": 0.8741584062576294, "step": 2550 }, { "epoch": 0.04550868398129877, "grad_norm": 1.4276925575441162, "learning_rate": 9.100604337006755e-05, "loss": 1.3348, "mean_abs_error": 236.21458068743317, "mean_abs_error_last_10": 18.802153112988922, "mean_abs_error_last_25": 48.474076565616556, "mean_abs_error_last_50": 99.25513977885666, "mean_pred_prob": 0.02965658325701952, "mean_pred_prob_last_10": 0.16778781898319722, "mean_pred_prob_last_25": 0.0884100123308599, "mean_pred_prob_last_50": 0.052087059570476414, "mean_token_accuracy": 0.8868680596351624, "step": 2560 }, { "epoch": 0.045686452278100725, "grad_norm": 1.0691593080161768, "learning_rate": 9.136153572698187e-05, "loss": 1.3886, "mean_abs_error": 307.843549021491, "mean_abs_error_last_10": 67.30519029375981, "mean_abs_error_last_25": 123.5006857016886, "mean_abs_error_last_50": 186.5043587183604, "mean_pred_prob": 0.02729048244655132, "mean_pred_prob_last_10": 0.1532921176403761, "mean_pred_prob_last_25": 0.08169149663299322, "mean_pred_prob_last_50": 0.04800289161503315, "mean_token_accuracy": 0.8764169335365295, "step": 2570 }, { "epoch": 0.04586422057490267, "grad_norm": 1.2614235275487424, "learning_rate": 9.17170280838962e-05, "loss": 1.4057, "mean_abs_error": 728.2306984049272, "mean_abs_error_last_10": 231.6044701689767, "mean_abs_error_last_25": 311.51959060929863, "mean_abs_error_last_50": 404.7817058251377, "mean_pred_prob": 0.02232725938083604, "mean_pred_prob_last_10": 0.12628133967518806, "mean_pred_prob_last_25": 0.06694129284587688, "mean_pred_prob_last_50": 0.03927101053413935, "mean_token_accuracy": 0.8792701065540314, "step": 2580 }, { "epoch": 0.04604198887170462, "grad_norm": 1.3651875343601207, "learning_rate": 9.207252044081053e-05, "loss": 1.3367, "mean_abs_error": 794.0358694503309, "mean_abs_error_last_10": 262.44661671665233, "mean_abs_error_last_25": 376.5301577075606, "mean_abs_error_last_50": 513.0916467073881, "mean_pred_prob": 0.024554058554349466, "mean_pred_prob_last_10": 0.143917695429991, "mean_pred_prob_last_25": 0.07474960180115886, "mean_pred_prob_last_50": 0.04346262604813091, "mean_token_accuracy": 0.880507093667984, "step": 2590 }, { "epoch": 0.04621975716850657, "grad_norm": 1.1873507078352763, "learning_rate": 9.242801279772485e-05, "loss": 1.3558, "mean_abs_error": 608.6788722373703, "mean_abs_error_last_10": 106.97050890784965, "mean_abs_error_last_25": 187.8890152488661, "mean_abs_error_last_50": 308.5870612314857, "mean_pred_prob": 0.01947756396839395, "mean_pred_prob_last_10": 0.11357566113583743, "mean_pred_prob_last_25": 0.058208299276884645, "mean_pred_prob_last_50": 0.03425016263499856, "mean_token_accuracy": 0.8859395861625672, "step": 2600 }, { "epoch": 0.046397525465308516, "grad_norm": 0.832758275477244, "learning_rate": 9.278350515463918e-05, "loss": 1.4383, "mean_abs_error": 801.5042091129902, "mean_abs_error_last_10": 202.40588813258117, "mean_abs_error_last_25": 242.34034780791325, "mean_abs_error_last_50": 375.70753863528546, "mean_pred_prob": 0.01548094573081471, "mean_pred_prob_last_10": 0.09212340698577463, "mean_pred_prob_last_25": 0.04575446636881679, "mean_pred_prob_last_50": 0.026962820801418274, "mean_token_accuracy": 0.8773499131202698, "step": 2610 }, { "epoch": 0.04657529376211046, "grad_norm": 1.226301908104623, "learning_rate": 9.31389975115535e-05, "loss": 1.3855, "mean_abs_error": 664.0321126848047, "mean_abs_error_last_10": 311.7742484357608, "mean_abs_error_last_25": 338.8499414609673, "mean_abs_error_last_50": 410.0309571297265, "mean_pred_prob": 0.019657098269090057, "mean_pred_prob_last_10": 0.11303680036216975, "mean_pred_prob_last_25": 0.05826054741628468, "mean_pred_prob_last_50": 0.034239121712744236, "mean_token_accuracy": 0.8728158175945282, "step": 2620 }, { "epoch": 0.046753062058912415, "grad_norm": 1.4536762225704354, "learning_rate": 9.349448986846782e-05, "loss": 1.3482, "mean_abs_error": 516.5970268929462, "mean_abs_error_last_10": 194.48293451821388, "mean_abs_error_last_25": 267.1088209110664, "mean_abs_error_last_50": 317.4469392496221, "mean_pred_prob": 0.025660900643561035, "mean_pred_prob_last_10": 0.14644494992680848, "mean_pred_prob_last_25": 0.07711699311621487, "mean_pred_prob_last_50": 0.04522656474728137, "mean_token_accuracy": 0.8717858791351318, "step": 2630 }, { "epoch": 0.04693083035571436, "grad_norm": 0.9373644063755315, "learning_rate": 9.384998222538215e-05, "loss": 1.3252, "mean_abs_error": 342.8127973812637, "mean_abs_error_last_10": 61.39493754091586, "mean_abs_error_last_25": 97.21254368110834, "mean_abs_error_last_50": 151.8707939948917, "mean_pred_prob": 0.02147293563466519, "mean_pred_prob_last_10": 0.12630709819495678, "mean_pred_prob_last_25": 0.06598380859941244, "mean_pred_prob_last_50": 0.03833461478352547, "mean_token_accuracy": 0.8833292126655579, "step": 2640 }, { "epoch": 0.04710859865251631, "grad_norm": 2.459774840689786, "learning_rate": 9.420547458229649e-05, "loss": 1.3716, "mean_abs_error": 822.3215576328685, "mean_abs_error_last_10": 508.7676399320118, "mean_abs_error_last_25": 512.5469230235079, "mean_abs_error_last_50": 610.8925222495747, "mean_pred_prob": 0.028420815709978342, "mean_pred_prob_last_10": 0.1581932371365838, "mean_pred_prob_last_25": 0.08552391050616279, "mean_pred_prob_last_50": 0.04996545540052466, "mean_token_accuracy": 0.8771938800811767, "step": 2650 }, { "epoch": 0.04728636694931826, "grad_norm": 2.858848311954709, "learning_rate": 9.456096693921082e-05, "loss": 1.4178, "mean_abs_error": 1442.3948539561356, "mean_abs_error_last_10": 695.3925396946278, "mean_abs_error_last_25": 877.8619934708262, "mean_abs_error_last_50": 1053.3267450204323, "mean_pred_prob": 0.02640979439020157, "mean_pred_prob_last_10": 0.14182202333322494, "mean_pred_prob_last_25": 0.0754795120912604, "mean_pred_prob_last_50": 0.045637158227327744, "mean_token_accuracy": 0.8800195276737213, "step": 2660 }, { "epoch": 0.047464135246120205, "grad_norm": 1.0306533605677686, "learning_rate": 9.491645929612514e-05, "loss": 1.3778, "mean_abs_error": 765.7679489923096, "mean_abs_error_last_10": 300.4078776145713, "mean_abs_error_last_25": 389.33098524833514, "mean_abs_error_last_50": 521.7592094933696, "mean_pred_prob": 0.023933238972676917, "mean_pred_prob_last_10": 0.13389110737480223, "mean_pred_prob_last_25": 0.07082381276413799, "mean_pred_prob_last_50": 0.04177939467772376, "mean_token_accuracy": 0.8654900789260864, "step": 2670 }, { "epoch": 0.04764190354292216, "grad_norm": 1.8203657749655013, "learning_rate": 9.527195165303946e-05, "loss": 1.3803, "mean_abs_error": 723.7672617235869, "mean_abs_error_last_10": 231.6749322033108, "mean_abs_error_last_25": 291.483024562623, "mean_abs_error_last_50": 417.35506305409115, "mean_pred_prob": 0.01776633367408067, "mean_pred_prob_last_10": 0.10103725116932764, "mean_pred_prob_last_25": 0.0532133984786924, "mean_pred_prob_last_50": 0.03132151049794629, "mean_token_accuracy": 0.8747741103172302, "step": 2680 }, { "epoch": 0.047819671839724104, "grad_norm": 2.5528788040490826, "learning_rate": 9.562744400995379e-05, "loss": 1.3282, "mean_abs_error": 675.0046179256077, "mean_abs_error_last_10": 210.44086502126476, "mean_abs_error_last_25": 247.3909238148794, "mean_abs_error_last_50": 387.22663219167845, "mean_pred_prob": 0.022617942903889344, "mean_pred_prob_last_10": 0.13078979382407852, "mean_pred_prob_last_25": 0.06783113289857283, "mean_pred_prob_last_50": 0.039857899109483695, "mean_token_accuracy": 0.8753682374954224, "step": 2690 }, { "epoch": 0.04799744013652605, "grad_norm": 1.058539459104733, "learning_rate": 9.598293636686812e-05, "loss": 1.3734, "mean_abs_error": 724.1896142488442, "mean_abs_error_last_10": 379.4120315964865, "mean_abs_error_last_25": 407.53425972528197, "mean_abs_error_last_50": 454.2638766996135, "mean_pred_prob": 0.01627523056231439, "mean_pred_prob_last_10": 0.09591035805642605, "mean_pred_prob_last_25": 0.04925511991605162, "mean_pred_prob_last_50": 0.02862791595980525, "mean_token_accuracy": 0.867316085100174, "step": 2700 }, { "epoch": 0.048175208433328, "grad_norm": 3.938421586144895, "learning_rate": 9.633842872378245e-05, "loss": 1.4264, "mean_abs_error": 334.39077410425426, "mean_abs_error_last_10": 68.79926831770817, "mean_abs_error_last_25": 93.73137462505875, "mean_abs_error_last_50": 152.9838698735595, "mean_pred_prob": 0.02443231549113989, "mean_pred_prob_last_10": 0.1421042697504163, "mean_pred_prob_last_25": 0.07441211640834808, "mean_pred_prob_last_50": 0.04326636856421828, "mean_token_accuracy": 0.877708888053894, "step": 2710 }, { "epoch": 0.04835297673012995, "grad_norm": 1.077955499143471, "learning_rate": 9.669392108069677e-05, "loss": 1.3113, "mean_abs_error": 672.7756346856717, "mean_abs_error_last_10": 143.8349056723313, "mean_abs_error_last_25": 227.36399754562316, "mean_abs_error_last_50": 380.03659530645047, "mean_pred_prob": 0.015876413689693437, "mean_pred_prob_last_10": 0.098181393253617, "mean_pred_prob_last_25": 0.048838906455785036, "mean_pred_prob_last_50": 0.028031576215289532, "mean_token_accuracy": 0.8817533791065216, "step": 2720 }, { "epoch": 0.048530745026931894, "grad_norm": 3.9009565307923944, "learning_rate": 9.704941343761109e-05, "loss": 1.345, "mean_abs_error": 519.4638971164422, "mean_abs_error_last_10": 133.66281138646957, "mean_abs_error_last_25": 216.1154978337231, "mean_abs_error_last_50": 337.99948743411136, "mean_pred_prob": 0.02686829408048652, "mean_pred_prob_last_10": 0.14136830667266623, "mean_pred_prob_last_25": 0.07735591615783051, "mean_pred_prob_last_50": 0.04659032623749226, "mean_token_accuracy": 0.8768336772918701, "step": 2730 }, { "epoch": 0.04870851332373385, "grad_norm": 1.1920529614201698, "learning_rate": 9.740490579452542e-05, "loss": 1.3497, "mean_abs_error": 733.7008699214874, "mean_abs_error_last_10": 209.65558316738984, "mean_abs_error_last_25": 290.7716579219325, "mean_abs_error_last_50": 425.1259385925826, "mean_pred_prob": 0.01641454747878015, "mean_pred_prob_last_10": 0.09290263690054416, "mean_pred_prob_last_25": 0.04880076302215457, "mean_pred_prob_last_50": 0.02883575325831771, "mean_token_accuracy": 0.8635350465774536, "step": 2740 }, { "epoch": 0.04888628162053579, "grad_norm": 1.2916607918809788, "learning_rate": 9.776039815143974e-05, "loss": 1.3303, "mean_abs_error": 262.7005672530016, "mean_abs_error_last_10": 41.48003569576146, "mean_abs_error_last_25": 72.51694757392669, "mean_abs_error_last_50": 133.52957061381875, "mean_pred_prob": 0.03204985759221017, "mean_pred_prob_last_10": 0.17337275426834822, "mean_pred_prob_last_25": 0.09453126583248377, "mean_pred_prob_last_50": 0.055972461309283975, "mean_token_accuracy": 0.8765082359313965, "step": 2750 }, { "epoch": 0.04906404991733774, "grad_norm": 0.7481707761152282, "learning_rate": 9.811589050835409e-05, "loss": 1.3137, "mean_abs_error": 182.24615611057976, "mean_abs_error_last_10": 48.95281882020844, "mean_abs_error_last_25": 56.975982013644376, "mean_abs_error_last_50": 87.18556038788344, "mean_pred_prob": 0.028056084550917147, "mean_pred_prob_last_10": 0.15811471678316594, "mean_pred_prob_last_25": 0.08387599531561137, "mean_pred_prob_last_50": 0.04937024293467403, "mean_token_accuracy": 0.8826410889625549, "step": 2760 }, { "epoch": 0.04924181821413969, "grad_norm": 1.4552331659562756, "learning_rate": 9.84713828652684e-05, "loss": 1.3189, "mean_abs_error": 1531.8454682130762, "mean_abs_error_last_10": 741.8981286355938, "mean_abs_error_last_25": 825.2589340418575, "mean_abs_error_last_50": 1040.6112076171185, "mean_pred_prob": 0.015571997420920525, "mean_pred_prob_last_10": 0.0858284374640789, "mean_pred_prob_last_25": 0.0461947122879792, "mean_pred_prob_last_50": 0.0273718445256236, "mean_token_accuracy": 0.8762606799602508, "step": 2770 }, { "epoch": 0.04941958651094164, "grad_norm": 1.4237695690577914, "learning_rate": 9.882687522218272e-05, "loss": 1.3436, "mean_abs_error": 413.9627134743258, "mean_abs_error_last_10": 176.4112933059098, "mean_abs_error_last_25": 160.2883435006616, "mean_abs_error_last_50": 214.7527629093766, "mean_pred_prob": 0.017228148179128766, "mean_pred_prob_last_10": 0.09848855007439852, "mean_pred_prob_last_25": 0.05167413940653205, "mean_pred_prob_last_50": 0.030234634736552836, "mean_token_accuracy": 0.8815155386924743, "step": 2780 }, { "epoch": 0.049597354807743584, "grad_norm": 2.1142900514239296, "learning_rate": 9.918236757909706e-05, "loss": 1.3594, "mean_abs_error": 389.7877963795584, "mean_abs_error_last_10": 126.46268178172222, "mean_abs_error_last_25": 166.43784758388577, "mean_abs_error_last_50": 224.68870899927865, "mean_pred_prob": 0.027394849318079652, "mean_pred_prob_last_10": 0.1519575323909521, "mean_pred_prob_last_25": 0.08060132935643197, "mean_pred_prob_last_50": 0.04772143317386508, "mean_token_accuracy": 0.8693492174148559, "step": 2790 }, { "epoch": 0.049775123104545536, "grad_norm": 1.089760346057587, "learning_rate": 9.953785993601138e-05, "loss": 1.3567, "mean_abs_error": 910.4643538780143, "mean_abs_error_last_10": 298.49918617586, "mean_abs_error_last_25": 387.48097877281566, "mean_abs_error_last_50": 559.9160009328474, "mean_pred_prob": 0.018930918155820108, "mean_pred_prob_last_10": 0.10400932710035704, "mean_pred_prob_last_25": 0.05596826915862039, "mean_pred_prob_last_50": 0.03313727231870871, "mean_token_accuracy": 0.8670912563800812, "step": 2800 }, { "epoch": 0.04995289140134748, "grad_norm": 1.6127591660545753, "learning_rate": 9.989335229292571e-05, "loss": 1.3125, "mean_abs_error": 293.32969521559954, "mean_abs_error_last_10": 91.17576881974489, "mean_abs_error_last_25": 109.0777039422948, "mean_abs_error_last_50": 148.82470752462172, "mean_pred_prob": 0.02340920246206224, "mean_pred_prob_last_10": 0.13538450598716736, "mean_pred_prob_last_25": 0.07113211713731289, "mean_pred_prob_last_50": 0.04165651649236679, "mean_token_accuracy": 0.8729255318641662, "step": 2810 }, { "epoch": 0.050130659698149435, "grad_norm": 1.6474142186519187, "learning_rate": 0.0001, "loss": 1.361, "mean_abs_error": 1122.6063769096388, "mean_abs_error_last_10": 639.3867205592753, "mean_abs_error_last_25": 671.8164339541665, "mean_abs_error_last_50": 811.7502233932645, "mean_pred_prob": 0.023625894781434908, "mean_pred_prob_last_10": 0.1370586534132599, "mean_pred_prob_last_25": 0.07172456370171858, "mean_pred_prob_last_50": 0.04184752250439487, "mean_token_accuracy": 0.8699276566505432, "step": 2820 }, { "epoch": 0.05030842799495138, "grad_norm": 1.4079148651896984, "learning_rate": 0.0001, "loss": 1.3548, "mean_abs_error": 1030.8026958868968, "mean_abs_error_last_10": 285.4892958990027, "mean_abs_error_last_25": 385.9875490102789, "mean_abs_error_last_50": 604.6860603604948, "mean_pred_prob": 0.019697461641044356, "mean_pred_prob_last_10": 0.10681000062031672, "mean_pred_prob_last_25": 0.05898571689031087, "mean_pred_prob_last_50": 0.03401675926579628, "mean_token_accuracy": 0.8706597149372101, "step": 2830 }, { "epoch": 0.05048619629175333, "grad_norm": 1.6445575510521562, "learning_rate": 0.0001, "loss": 1.2875, "mean_abs_error": 289.4802605201906, "mean_abs_error_last_10": 205.47238771543306, "mean_abs_error_last_25": 198.53051862812086, "mean_abs_error_last_50": 192.66175628919308, "mean_pred_prob": 0.02914808285422623, "mean_pred_prob_last_10": 0.16099350340664387, "mean_pred_prob_last_25": 0.0869142958894372, "mean_pred_prob_last_50": 0.05121583119034767, "mean_token_accuracy": 0.8827132940292358, "step": 2840 }, { "epoch": 0.05066396458855528, "grad_norm": 2.475653809089289, "learning_rate": 0.0001, "loss": 1.3237, "mean_abs_error": 704.9857629883161, "mean_abs_error_last_10": 309.14644200487805, "mean_abs_error_last_25": 399.51287266235005, "mean_abs_error_last_50": 474.7867671627134, "mean_pred_prob": 0.04345453318092041, "mean_pred_prob_last_10": 0.21501370091282296, "mean_pred_prob_last_25": 0.12185932961583604, "mean_pred_prob_last_50": 0.07460691314772702, "mean_token_accuracy": 0.8738130271434784, "step": 2850 }, { "epoch": 0.050841732885357226, "grad_norm": 0.9334154359974357, "learning_rate": 0.0001, "loss": 1.3325, "mean_abs_error": 1214.528683465718, "mean_abs_error_last_10": 410.78705321755496, "mean_abs_error_last_25": 546.5478275831572, "mean_abs_error_last_50": 742.2374446582983, "mean_pred_prob": 0.012449456044123508, "mean_pred_prob_last_10": 0.0756226222612895, "mean_pred_prob_last_25": 0.03852968979044817, "mean_pred_prob_last_50": 0.022225853629061022, "mean_token_accuracy": 0.8695955276489258, "step": 2860 }, { "epoch": 0.05101950118215917, "grad_norm": 1.7237253956214436, "learning_rate": 0.0001, "loss": 1.2927, "mean_abs_error": 586.6059517194141, "mean_abs_error_last_10": 121.32984300395799, "mean_abs_error_last_25": 167.1656676502602, "mean_abs_error_last_50": 295.65806221125933, "mean_pred_prob": 0.018382964335614815, "mean_pred_prob_last_10": 0.10808810174930841, "mean_pred_prob_last_25": 0.05560403775889426, "mean_pred_prob_last_50": 0.03241160679608583, "mean_token_accuracy": 0.8834626078605652, "step": 2870 }, { "epoch": 0.051197269478961124, "grad_norm": 1.988554357797818, "learning_rate": 0.0001, "loss": 1.3729, "mean_abs_error": 853.4136344683309, "mean_abs_error_last_10": 333.72474961294046, "mean_abs_error_last_25": 422.5445201431553, "mean_abs_error_last_50": 561.6452538808958, "mean_pred_prob": 0.011576440616045148, "mean_pred_prob_last_10": 0.07012400145176798, "mean_pred_prob_last_25": 0.03536872131517157, "mean_pred_prob_last_50": 0.020551886747125537, "mean_token_accuracy": 0.8736047506332397, "step": 2880 }, { "epoch": 0.05137503777576307, "grad_norm": 0.9622999448198494, "learning_rate": 0.0001, "loss": 1.3102, "mean_abs_error": 262.68046029733694, "mean_abs_error_last_10": 50.564504513964025, "mean_abs_error_last_25": 79.86291433182949, "mean_abs_error_last_50": 133.272340377471, "mean_pred_prob": 0.024082083720713854, "mean_pred_prob_last_10": 0.1440281756222248, "mean_pred_prob_last_25": 0.07104890253394842, "mean_pred_prob_last_50": 0.041783005744218824, "mean_token_accuracy": 0.8609657108783721, "step": 2890 }, { "epoch": 0.051552806072565016, "grad_norm": 2.0564972777795245, "learning_rate": 0.0001, "loss": 1.3523, "mean_abs_error": 1625.4486456869595, "mean_abs_error_last_10": 851.9960664648604, "mean_abs_error_last_25": 940.0121274742572, "mean_abs_error_last_50": 1108.0730379561862, "mean_pred_prob": 0.015128001985431183, "mean_pred_prob_last_10": 0.08920111174375052, "mean_pred_prob_last_25": 0.0456168527220143, "mean_pred_prob_last_50": 0.026714421447832138, "mean_token_accuracy": 0.8680628299713135, "step": 2900 }, { "epoch": 0.05173057436936697, "grad_norm": 1.9244993477930916, "learning_rate": 0.0001, "loss": 1.2851, "mean_abs_error": 808.2685101144083, "mean_abs_error_last_10": 231.16373032744727, "mean_abs_error_last_25": 288.1654823607595, "mean_abs_error_last_50": 427.1599736629655, "mean_pred_prob": 0.013910174815100618, "mean_pred_prob_last_10": 0.08084329098928719, "mean_pred_prob_last_25": 0.04192192619666457, "mean_pred_prob_last_50": 0.024478516227100046, "mean_token_accuracy": 0.8844204723834992, "step": 2910 }, { "epoch": 0.051908342666168915, "grad_norm": 2.00165004751908, "learning_rate": 0.0001, "loss": 1.3085, "mean_abs_error": 884.6445856171019, "mean_abs_error_last_10": 270.8935891535236, "mean_abs_error_last_25": 337.80198878482736, "mean_abs_error_last_50": 506.4170579654036, "mean_pred_prob": 0.019764181177015415, "mean_pred_prob_last_10": 0.11440796740353107, "mean_pred_prob_last_25": 0.05960236122482456, "mean_pred_prob_last_50": 0.034781266085337846, "mean_token_accuracy": 0.881256514787674, "step": 2920 }, { "epoch": 0.05208611096297086, "grad_norm": 2.4541425995721857, "learning_rate": 0.0001, "loss": 1.3926, "mean_abs_error": 1444.3501434710208, "mean_abs_error_last_10": 610.8065179856947, "mean_abs_error_last_25": 737.2580600965036, "mean_abs_error_last_50": 979.996305563729, "mean_pred_prob": 0.012876936921384186, "mean_pred_prob_last_10": 0.07556469296250726, "mean_pred_prob_last_25": 0.03893907496822067, "mean_pred_prob_last_50": 0.022665116767166184, "mean_token_accuracy": 0.869570130109787, "step": 2930 }, { "epoch": 0.052263879259772814, "grad_norm": 1.4950581366016855, "learning_rate": 0.0001, "loss": 1.2877, "mean_abs_error": 808.9518962625998, "mean_abs_error_last_10": 293.9518931424215, "mean_abs_error_last_25": 353.0527910747987, "mean_abs_error_last_50": 494.60542212419494, "mean_pred_prob": 0.026269329673959875, "mean_pred_prob_last_10": 0.13738194819889032, "mean_pred_prob_last_25": 0.07504376295837574, "mean_pred_prob_last_50": 0.04530330446141306, "mean_token_accuracy": 0.8752997159957886, "step": 2940 }, { "epoch": 0.05244164755657476, "grad_norm": 1.8853325549634987, "learning_rate": 0.0001, "loss": 1.3527, "mean_abs_error": 386.55344909909104, "mean_abs_error_last_10": 150.91941981696263, "mean_abs_error_last_25": 245.53499964696, "mean_abs_error_last_50": 286.20094425818365, "mean_pred_prob": 0.02265359554439783, "mean_pred_prob_last_10": 0.13102542106062173, "mean_pred_prob_last_25": 0.06803735103458167, "mean_pred_prob_last_50": 0.039928844384849074, "mean_token_accuracy": 0.8656170308589936, "step": 2950 }, { "epoch": 0.052619415853376705, "grad_norm": 2.0442550895358416, "learning_rate": 0.0001, "loss": 1.3628, "mean_abs_error": 526.0097294710915, "mean_abs_error_last_10": 128.05082103936255, "mean_abs_error_last_25": 177.11746242325825, "mean_abs_error_last_50": 285.91130261037966, "mean_pred_prob": 0.02761065063532442, "mean_pred_prob_last_10": 0.16034314379794523, "mean_pred_prob_last_25": 0.08347872668527997, "mean_pred_prob_last_50": 0.048535390291363, "mean_token_accuracy": 0.8571675598621369, "step": 2960 }, { "epoch": 0.05279718415017866, "grad_norm": 1.8429714466053269, "learning_rate": 0.0001, "loss": 1.2996, "mean_abs_error": 624.9584163413051, "mean_abs_error_last_10": 112.84926945042677, "mean_abs_error_last_25": 173.12979392437833, "mean_abs_error_last_50": 321.43131639609953, "mean_pred_prob": 0.02056679255911149, "mean_pred_prob_last_10": 0.1166401629569009, "mean_pred_prob_last_25": 0.0612852178979665, "mean_pred_prob_last_50": 0.03604120656964369, "mean_token_accuracy": 0.8752633810043335, "step": 2970 }, { "epoch": 0.052974952446980604, "grad_norm": 1.3720605605821956, "learning_rate": 0.0001, "loss": 1.2925, "mean_abs_error": 132.12970660221447, "mean_abs_error_last_10": 29.515961465768036, "mean_abs_error_last_25": 65.64698499467795, "mean_abs_error_last_50": 84.24477710912691, "mean_pred_prob": 0.03529156958684325, "mean_pred_prob_last_10": 0.18627041652798654, "mean_pred_prob_last_25": 0.10266991723328829, "mean_pred_prob_last_50": 0.06117867194116115, "mean_token_accuracy": 0.8701725125312805, "step": 2980 }, { "epoch": 0.05315272074378256, "grad_norm": 1.8814753488622864, "learning_rate": 0.0001, "loss": 1.2955, "mean_abs_error": 742.8556608201145, "mean_abs_error_last_10": 246.56050692366566, "mean_abs_error_last_25": 288.2432514528252, "mean_abs_error_last_50": 392.04643666290593, "mean_pred_prob": 0.01765982685610652, "mean_pred_prob_last_10": 0.0977340629324317, "mean_pred_prob_last_25": 0.052321596816182134, "mean_pred_prob_last_50": 0.03123435187153518, "mean_token_accuracy": 0.8783878922462464, "step": 2990 }, { "epoch": 0.0533304890405845, "grad_norm": 1.3895486993268307, "learning_rate": 0.0001, "loss": 1.2498, "mean_abs_error": 675.3638616667827, "mean_abs_error_last_10": 204.02057871009396, "mean_abs_error_last_25": 269.5222168772532, "mean_abs_error_last_50": 399.3834351834122, "mean_pred_prob": 0.02820242104353383, "mean_pred_prob_last_10": 0.15360073612537234, "mean_pred_prob_last_25": 0.08277422201936133, "mean_pred_prob_last_50": 0.04929770027229097, "mean_token_accuracy": 0.8799628913402557, "step": 3000 }, { "epoch": 0.05350825733738645, "grad_norm": 1.2006132579527216, "learning_rate": 0.0001, "loss": 1.2652, "mean_abs_error": 370.6202596734865, "mean_abs_error_last_10": 112.94987692997798, "mean_abs_error_last_25": 130.2230299672255, "mean_abs_error_last_50": 189.46339622948727, "mean_pred_prob": 0.04171452506561764, "mean_pred_prob_last_10": 0.20766805172897876, "mean_pred_prob_last_25": 0.11794836334884166, "mean_pred_prob_last_50": 0.07140622227452695, "mean_token_accuracy": 0.8845410346984863, "step": 3010 }, { "epoch": 0.0536860256341884, "grad_norm": 0.7742668929102222, "learning_rate": 0.0001, "loss": 1.2507, "mean_abs_error": 952.7407347326459, "mean_abs_error_last_10": 309.96353158151896, "mean_abs_error_last_25": 361.91516127412285, "mean_abs_error_last_50": 491.86454729776676, "mean_pred_prob": 0.017648006122908556, "mean_pred_prob_last_10": 0.1030081931327004, "mean_pred_prob_last_25": 0.05367649127729237, "mean_pred_prob_last_50": 0.03124697692110203, "mean_token_accuracy": 0.8703360021114349, "step": 3020 }, { "epoch": 0.05386379393099035, "grad_norm": 1.4486609341990582, "learning_rate": 0.0001, "loss": 1.2888, "mean_abs_error": 990.5046095859104, "mean_abs_error_last_10": 232.5820335212921, "mean_abs_error_last_25": 337.65355272710184, "mean_abs_error_last_50": 552.4412363096061, "mean_pred_prob": 0.021609041435294785, "mean_pred_prob_last_10": 0.12101234173751436, "mean_pred_prob_last_25": 0.06418618996394798, "mean_pred_prob_last_50": 0.03775308388285339, "mean_token_accuracy": 0.8809057831764221, "step": 3030 }, { "epoch": 0.05404156222779229, "grad_norm": 2.080230712809839, "learning_rate": 0.0001, "loss": 1.326, "mean_abs_error": 795.810816519575, "mean_abs_error_last_10": 298.9607066226034, "mean_abs_error_last_25": 385.341786729464, "mean_abs_error_last_50": 551.9675641347685, "mean_pred_prob": 0.02432820619578706, "mean_pred_prob_last_10": 0.1401860710757319, "mean_pred_prob_last_25": 0.07340991538076196, "mean_pred_prob_last_50": 0.04287208302121144, "mean_token_accuracy": 0.8761277914047241, "step": 3040 }, { "epoch": 0.054219330524594246, "grad_norm": 1.6581220477897642, "learning_rate": 0.0001, "loss": 1.2609, "mean_abs_error": 1231.5437979533208, "mean_abs_error_last_10": 536.4206528470577, "mean_abs_error_last_25": 648.7315355960752, "mean_abs_error_last_50": 822.5790376200044, "mean_pred_prob": 0.025892842315079177, "mean_pred_prob_last_10": 0.1424584718362894, "mean_pred_prob_last_25": 0.07704243475163822, "mean_pred_prob_last_50": 0.04563570614263881, "mean_token_accuracy": 0.8810582458972931, "step": 3050 }, { "epoch": 0.05439709882139619, "grad_norm": 0.9900008210680528, "learning_rate": 0.0001, "loss": 1.3034, "mean_abs_error": 1109.3257825016556, "mean_abs_error_last_10": 358.6807599223109, "mean_abs_error_last_25": 413.19695665392726, "mean_abs_error_last_50": 650.537239321001, "mean_pred_prob": 0.02516643967537675, "mean_pred_prob_last_10": 0.13251468995586038, "mean_pred_prob_last_25": 0.07340922486037016, "mean_pred_prob_last_50": 0.04393428949988447, "mean_token_accuracy": 0.8709449529647827, "step": 3060 }, { "epoch": 0.05457486711819814, "grad_norm": 0.9040865339943892, "learning_rate": 0.0001, "loss": 1.3357, "mean_abs_error": 987.5396399282629, "mean_abs_error_last_10": 364.1672349578691, "mean_abs_error_last_25": 449.4802106667009, "mean_abs_error_last_50": 612.4318863849886, "mean_pred_prob": 0.0208185732975835, "mean_pred_prob_last_10": 0.11901256451092194, "mean_pred_prob_last_25": 0.06255009776214138, "mean_pred_prob_last_50": 0.03664808508183341, "mean_token_accuracy": 0.8758454501628876, "step": 3070 }, { "epoch": 0.05475263541500009, "grad_norm": 1.3521113135253842, "learning_rate": 0.0001, "loss": 1.2478, "mean_abs_error": 380.80029559285015, "mean_abs_error_last_10": 165.44097413626167, "mean_abs_error_last_25": 172.31331546782982, "mean_abs_error_last_50": 199.62961840258257, "mean_pred_prob": 0.028281019162386657, "mean_pred_prob_last_10": 0.13641851861029863, "mean_pred_prob_last_25": 0.07656853403896094, "mean_pred_prob_last_50": 0.04785336018539965, "mean_token_accuracy": 0.87413769364357, "step": 3080 }, { "epoch": 0.05493040371180204, "grad_norm": 1.9419518809772658, "learning_rate": 0.0001, "loss": 1.2615, "mean_abs_error": 1170.1113509803695, "mean_abs_error_last_10": 440.50537734411455, "mean_abs_error_last_25": 522.1816806014582, "mean_abs_error_last_50": 718.3509962892615, "mean_pred_prob": 0.02268009307590546, "mean_pred_prob_last_10": 0.12396736468654126, "mean_pred_prob_last_25": 0.06698846181679982, "mean_pred_prob_last_50": 0.03979775174811948, "mean_token_accuracy": 0.8720145523548126, "step": 3090 }, { "epoch": 0.05510817200860398, "grad_norm": 1.1734132871541207, "learning_rate": 0.0001, "loss": 1.2755, "mean_abs_error": 173.75555044706974, "mean_abs_error_last_10": 80.19816812493107, "mean_abs_error_last_25": 74.76638621694713, "mean_abs_error_last_50": 101.01752513562863, "mean_pred_prob": 0.03267046827822924, "mean_pred_prob_last_10": 0.17996100336313248, "mean_pred_prob_last_25": 0.09659301787614823, "mean_pred_prob_last_50": 0.0571459710597992, "mean_token_accuracy": 0.8832781672477722, "step": 3100 }, { "epoch": 0.055285940305405935, "grad_norm": 4.251917326192509, "learning_rate": 0.0001, "loss": 1.3083, "mean_abs_error": 1108.1626233483962, "mean_abs_error_last_10": 366.483733318459, "mean_abs_error_last_25": 444.14764653521013, "mean_abs_error_last_50": 617.3462428527399, "mean_pred_prob": 0.017834479254088365, "mean_pred_prob_last_10": 0.09992490921868011, "mean_pred_prob_last_25": 0.05255802797910292, "mean_pred_prob_last_50": 0.031124190159607677, "mean_token_accuracy": 0.8704800128936767, "step": 3110 }, { "epoch": 0.05546370860220788, "grad_norm": 1.702597613934745, "learning_rate": 0.0001, "loss": 1.3127, "mean_abs_error": 941.173300475084, "mean_abs_error_last_10": 234.1832359033023, "mean_abs_error_last_25": 272.4121257005132, "mean_abs_error_last_50": 444.5980114537709, "mean_pred_prob": 0.019104257627623154, "mean_pred_prob_last_10": 0.10808502957224846, "mean_pred_prob_last_25": 0.05799499093554914, "mean_pred_prob_last_50": 0.03358645734842867, "mean_token_accuracy": 0.8725282192230225, "step": 3120 }, { "epoch": 0.05564147689900983, "grad_norm": 1.6981621943308967, "learning_rate": 0.0001, "loss": 1.2166, "mean_abs_error": 444.4105820508087, "mean_abs_error_last_10": 128.28396838335885, "mean_abs_error_last_25": 153.59330230684745, "mean_abs_error_last_50": 237.1422018650979, "mean_pred_prob": 0.0304211214534007, "mean_pred_prob_last_10": 0.16931086354888975, "mean_pred_prob_last_25": 0.09202518165111542, "mean_pred_prob_last_50": 0.05375215436797589, "mean_token_accuracy": 0.8889124631881714, "step": 3130 }, { "epoch": 0.05581924519581178, "grad_norm": 1.9285749994792942, "learning_rate": 0.0001, "loss": 1.2689, "mean_abs_error": 346.7489504448008, "mean_abs_error_last_10": 125.69482819378015, "mean_abs_error_last_25": 125.87844321153325, "mean_abs_error_last_50": 176.02932414035917, "mean_pred_prob": 0.02739342409186065, "mean_pred_prob_last_10": 0.14785582665354013, "mean_pred_prob_last_25": 0.08045079577714205, "mean_pred_prob_last_50": 0.047812403691932556, "mean_token_accuracy": 0.8678937613964081, "step": 3140 }, { "epoch": 0.055997013492613726, "grad_norm": 0.9895204261237732, "learning_rate": 0.0001, "loss": 1.2878, "mean_abs_error": 745.2568477093221, "mean_abs_error_last_10": 215.6961500090521, "mean_abs_error_last_25": 255.25333196756415, "mean_abs_error_last_50": 373.6153166551726, "mean_pred_prob": 0.018493432871764526, "mean_pred_prob_last_10": 0.10751605171244591, "mean_pred_prob_last_25": 0.05468302530935034, "mean_pred_prob_last_50": 0.03228159516584128, "mean_token_accuracy": 0.8784126877784729, "step": 3150 }, { "epoch": 0.05617478178941568, "grad_norm": 2.44863111761698, "learning_rate": 0.0001, "loss": 1.3639, "mean_abs_error": 396.4779706334017, "mean_abs_error_last_10": 75.58199536492684, "mean_abs_error_last_25": 189.2331368041391, "mean_abs_error_last_50": 179.00964386176742, "mean_pred_prob": 0.03187702149152756, "mean_pred_prob_last_10": 0.17016653567552567, "mean_pred_prob_last_25": 0.09207275118678808, "mean_pred_prob_last_50": 0.05517488862387836, "mean_token_accuracy": 0.869613093137741, "step": 3160 }, { "epoch": 0.056352550086217625, "grad_norm": 3.1754232985247404, "learning_rate": 0.0001, "loss": 1.2633, "mean_abs_error": 588.4794324961047, "mean_abs_error_last_10": 135.2098030049365, "mean_abs_error_last_25": 159.8194919411804, "mean_abs_error_last_50": 267.2887135959424, "mean_pred_prob": 0.024781671026721595, "mean_pred_prob_last_10": 0.1389715252444148, "mean_pred_prob_last_25": 0.074109064694494, "mean_pred_prob_last_50": 0.04349518739618361, "mean_token_accuracy": 0.8832952499389648, "step": 3170 }, { "epoch": 0.05653031838301957, "grad_norm": 1.7327056481340977, "learning_rate": 0.0001, "loss": 1.3053, "mean_abs_error": 452.35153094478744, "mean_abs_error_last_10": 151.58845655187153, "mean_abs_error_last_25": 173.4295190884782, "mean_abs_error_last_50": 219.065433308717, "mean_pred_prob": 0.021552138752304016, "mean_pred_prob_last_10": 0.11992214117199182, "mean_pred_prob_last_25": 0.06349640255793929, "mean_pred_prob_last_50": 0.037502889474853876, "mean_token_accuracy": 0.8712295293807983, "step": 3180 }, { "epoch": 0.05670808667982152, "grad_norm": 2.1158680611057634, "learning_rate": 0.0001, "loss": 1.2713, "mean_abs_error": 359.6498108099059, "mean_abs_error_last_10": 91.80206292591194, "mean_abs_error_last_25": 137.179079776855, "mean_abs_error_last_50": 189.909802842535, "mean_pred_prob": 0.023086805432103574, "mean_pred_prob_last_10": 0.13192831221967935, "mean_pred_prob_last_25": 0.06988002760335803, "mean_pred_prob_last_50": 0.041184833785519, "mean_token_accuracy": 0.8630586326122284, "step": 3190 }, { "epoch": 0.05688585497662347, "grad_norm": 1.1551852161848692, "learning_rate": 0.0001, "loss": 1.2586, "mean_abs_error": 611.82979849745, "mean_abs_error_last_10": 159.7773187302923, "mean_abs_error_last_25": 200.21041204280172, "mean_abs_error_last_50": 295.5123253685896, "mean_pred_prob": 0.014440181944519282, "mean_pred_prob_last_10": 0.08826515898108482, "mean_pred_prob_last_25": 0.044580376520752904, "mean_pred_prob_last_50": 0.025806986493989825, "mean_token_accuracy": 0.8768698692321777, "step": 3200 }, { "epoch": 0.057063623273425415, "grad_norm": 1.14832199048262, "learning_rate": 0.0001, "loss": 1.3535, "mean_abs_error": 1087.647693808654, "mean_abs_error_last_10": 548.9228566206433, "mean_abs_error_last_25": 659.4612918985079, "mean_abs_error_last_50": 821.5304659093501, "mean_pred_prob": 0.019148146004590673, "mean_pred_prob_last_10": 0.10957334799168165, "mean_pred_prob_last_25": 0.05735779302194714, "mean_pred_prob_last_50": 0.03350320294703124, "mean_token_accuracy": 0.8582743406295776, "step": 3210 }, { "epoch": 0.05724139157022737, "grad_norm": 1.224668307748469, "learning_rate": 0.0001, "loss": 1.2642, "mean_abs_error": 1125.0576815819827, "mean_abs_error_last_10": 559.651422123151, "mean_abs_error_last_25": 660.8538149372805, "mean_abs_error_last_50": 781.0788941743274, "mean_pred_prob": 0.014824195677647367, "mean_pred_prob_last_10": 0.08385722937819082, "mean_pred_prob_last_25": 0.043937561646453106, "mean_pred_prob_last_50": 0.025962115134461783, "mean_token_accuracy": 0.8663264214992523, "step": 3220 }, { "epoch": 0.057419159867029314, "grad_norm": 1.3338511334396905, "learning_rate": 0.0001, "loss": 1.3429, "mean_abs_error": 196.8486627691594, "mean_abs_error_last_10": 61.918781165136394, "mean_abs_error_last_25": 76.42148121042212, "mean_abs_error_last_50": 105.515514483517, "mean_pred_prob": 0.026435026433318853, "mean_pred_prob_last_10": 0.15932936184108257, "mean_pred_prob_last_25": 0.08090301305055618, "mean_pred_prob_last_50": 0.04683505762368441, "mean_token_accuracy": 0.8624171555042267, "step": 3230 }, { "epoch": 0.05759692816383126, "grad_norm": 1.6037376512406782, "learning_rate": 0.0001, "loss": 1.3012, "mean_abs_error": 325.94685773735876, "mean_abs_error_last_10": 115.987551081257, "mean_abs_error_last_25": 146.14831101170196, "mean_abs_error_last_50": 202.70882769008568, "mean_pred_prob": 0.019035905692726375, "mean_pred_prob_last_10": 0.11025383695960045, "mean_pred_prob_last_25": 0.05713379196822643, "mean_pred_prob_last_50": 0.03349905051290989, "mean_token_accuracy": 0.8758323132991791, "step": 3240 }, { "epoch": 0.05777469646063321, "grad_norm": 1.5630224688629046, "learning_rate": 0.0001, "loss": 1.3827, "mean_abs_error": 968.6818979760877, "mean_abs_error_last_10": 286.5609814066396, "mean_abs_error_last_25": 382.74209705944793, "mean_abs_error_last_50": 573.1100361907573, "mean_pred_prob": 0.01577012744965032, "mean_pred_prob_last_10": 0.09555036083329468, "mean_pred_prob_last_25": 0.04827096770750359, "mean_pred_prob_last_50": 0.027940700130420736, "mean_token_accuracy": 0.8681124389171601, "step": 3250 }, { "epoch": 0.05795246475743516, "grad_norm": 1.7795049043686342, "learning_rate": 0.0001, "loss": 1.3195, "mean_abs_error": 494.6847999869101, "mean_abs_error_last_10": 226.13989310185215, "mean_abs_error_last_25": 256.79883507061436, "mean_abs_error_last_50": 325.7557596245025, "mean_pred_prob": 0.025543783674947917, "mean_pred_prob_last_10": 0.14310225285589695, "mean_pred_prob_last_25": 0.07585430117323995, "mean_pred_prob_last_50": 0.04467791637871414, "mean_token_accuracy": 0.8702375054359436, "step": 3260 }, { "epoch": 0.058130233054237104, "grad_norm": 1.126759107580002, "learning_rate": 0.0001, "loss": 1.2485, "mean_abs_error": 1242.2478041594454, "mean_abs_error_last_10": 482.019305036243, "mean_abs_error_last_25": 592.5784362981955, "mean_abs_error_last_50": 794.6263882977887, "mean_pred_prob": 0.02528655979549512, "mean_pred_prob_last_10": 0.13388744612748268, "mean_pred_prob_last_25": 0.07373295847792179, "mean_pred_prob_last_50": 0.04392703612684272, "mean_token_accuracy": 0.873485940694809, "step": 3270 }, { "epoch": 0.05830800135103906, "grad_norm": 2.5453510355419158, "learning_rate": 0.0001, "loss": 1.2635, "mean_abs_error": 1005.8167762747323, "mean_abs_error_last_10": 291.80961560135927, "mean_abs_error_last_25": 336.66741654733215, "mean_abs_error_last_50": 518.3773880952353, "mean_pred_prob": 0.019928364316001534, "mean_pred_prob_last_10": 0.10984986985567957, "mean_pred_prob_last_25": 0.059321457496844233, "mean_pred_prob_last_50": 0.03502506911754608, "mean_token_accuracy": 0.8615797162055969, "step": 3280 }, { "epoch": 0.058485769647841, "grad_norm": 1.821623994289705, "learning_rate": 0.0001, "loss": 1.3221, "mean_abs_error": 1883.439574196782, "mean_abs_error_last_10": 1109.9240412674394, "mean_abs_error_last_25": 1252.4236328966658, "mean_abs_error_last_50": 1502.1888636480726, "mean_pred_prob": 0.028475185457500628, "mean_pred_prob_last_10": 0.16319141496205702, "mean_pred_prob_last_25": 0.08543355311849155, "mean_pred_prob_last_50": 0.04988688361045206, "mean_token_accuracy": 0.865997701883316, "step": 3290 }, { "epoch": 0.05866353794464295, "grad_norm": 1.5723093745529029, "learning_rate": 0.0001, "loss": 1.3053, "mean_abs_error": 250.33434500188187, "mean_abs_error_last_10": 81.88973972146613, "mean_abs_error_last_25": 118.55333857780015, "mean_abs_error_last_50": 154.8031117109233, "mean_pred_prob": 0.024443189753219484, "mean_pred_prob_last_10": 0.14532718919217585, "mean_pred_prob_last_25": 0.0748808722011745, "mean_pred_prob_last_50": 0.043614540994167325, "mean_token_accuracy": 0.868739801645279, "step": 3300 }, { "epoch": 0.0588413062414449, "grad_norm": 1.0132631770130822, "learning_rate": 0.0001, "loss": 1.2734, "mean_abs_error": 350.50317232542045, "mean_abs_error_last_10": 143.61047032125032, "mean_abs_error_last_25": 178.4129746198041, "mean_abs_error_last_50": 252.80121291092223, "mean_pred_prob": 0.03244392615742982, "mean_pred_prob_last_10": 0.17708760369569063, "mean_pred_prob_last_25": 0.09645468760281802, "mean_pred_prob_last_50": 0.0570471006911248, "mean_token_accuracy": 0.8683993935585022, "step": 3310 }, { "epoch": 0.05901907453824685, "grad_norm": 1.3260613003598907, "learning_rate": 0.0001, "loss": 1.244, "mean_abs_error": 837.4433525724953, "mean_abs_error_last_10": 245.69986806044898, "mean_abs_error_last_25": 301.04130285433126, "mean_abs_error_last_50": 448.4222012266617, "mean_pred_prob": 0.022496663773199543, "mean_pred_prob_last_10": 0.11687988440971822, "mean_pred_prob_last_25": 0.06444987456197851, "mean_pred_prob_last_50": 0.038871088984888044, "mean_token_accuracy": 0.876492154598236, "step": 3320 }, { "epoch": 0.0591968428350488, "grad_norm": 1.6984871111330713, "learning_rate": 0.0001, "loss": 1.3054, "mean_abs_error": 1115.5471074155703, "mean_abs_error_last_10": 456.6080285257773, "mean_abs_error_last_25": 563.1069587738068, "mean_abs_error_last_50": 740.5301931325499, "mean_pred_prob": 0.022210383105266372, "mean_pred_prob_last_10": 0.1269996524148155, "mean_pred_prob_last_25": 0.06642433399392758, "mean_pred_prob_last_50": 0.039095886050199626, "mean_token_accuracy": 0.8720869541168212, "step": 3330 }, { "epoch": 0.059374611131850746, "grad_norm": 1.069050497184329, "learning_rate": 0.0001, "loss": 1.2371, "mean_abs_error": 187.67411474229743, "mean_abs_error_last_10": 57.256182254844816, "mean_abs_error_last_25": 59.132670455370565, "mean_abs_error_last_50": 102.07843909205455, "mean_pred_prob": 0.024021538719534875, "mean_pred_prob_last_10": 0.13940860703587532, "mean_pred_prob_last_25": 0.07333584539592267, "mean_pred_prob_last_50": 0.04258403209969401, "mean_token_accuracy": 0.8678958892822266, "step": 3340 }, { "epoch": 0.05955237942865269, "grad_norm": 1.7447350781748336, "learning_rate": 0.0001, "loss": 1.2565, "mean_abs_error": 1912.0667537163004, "mean_abs_error_last_10": 810.8829496323473, "mean_abs_error_last_25": 975.763125193081, "mean_abs_error_last_50": 1270.9974925976721, "mean_pred_prob": 0.017066897785116452, "mean_pred_prob_last_10": 0.0989992046088446, "mean_pred_prob_last_25": 0.05152787288825493, "mean_pred_prob_last_50": 0.030245681179803797, "mean_token_accuracy": 0.8696943044662475, "step": 3350 }, { "epoch": 0.059730147725454645, "grad_norm": 2.1410070048752043, "learning_rate": 0.0001, "loss": 1.2546, "mean_abs_error": 242.01514225660316, "mean_abs_error_last_10": 71.40942857685012, "mean_abs_error_last_25": 83.87422709717211, "mean_abs_error_last_50": 112.52505743030329, "mean_pred_prob": 0.030205979477614164, "mean_pred_prob_last_10": 0.1662285912781954, "mean_pred_prob_last_25": 0.09032595800235868, "mean_pred_prob_last_50": 0.05321606704965234, "mean_token_accuracy": 0.8691145777702332, "step": 3360 }, { "epoch": 0.05990791602225659, "grad_norm": 1.7225662912086661, "learning_rate": 0.0001, "loss": 1.2248, "mean_abs_error": 850.2204462725101, "mean_abs_error_last_10": 276.1775355164724, "mean_abs_error_last_25": 316.63348469962466, "mean_abs_error_last_50": 454.0795813233557, "mean_pred_prob": 0.013949672793387435, "mean_pred_prob_last_10": 0.08224031446152366, "mean_pred_prob_last_25": 0.042539674509316686, "mean_pred_prob_last_50": 0.02482876559952274, "mean_token_accuracy": 0.8779279470443726, "step": 3370 }, { "epoch": 0.06008568431905854, "grad_norm": 0.6631807942484758, "learning_rate": 0.0001, "loss": 1.2924, "mean_abs_error": 970.7421580998255, "mean_abs_error_last_10": 506.5177679566714, "mean_abs_error_last_25": 576.2783156001205, "mean_abs_error_last_50": 720.020923881109, "mean_pred_prob": 0.031124521246238147, "mean_pred_prob_last_10": 0.16894571077427828, "mean_pred_prob_last_25": 0.091693422505341, "mean_pred_prob_last_50": 0.0542945141409291, "mean_token_accuracy": 0.8630667626857758, "step": 3380 }, { "epoch": 0.06026345261586049, "grad_norm": 1.5089185792131476, "learning_rate": 0.0001, "loss": 1.2734, "mean_abs_error": 1365.9858421782965, "mean_abs_error_last_10": 429.96243053817807, "mean_abs_error_last_25": 581.1223778697225, "mean_abs_error_last_50": 786.960815920719, "mean_pred_prob": 0.01760986793669872, "mean_pred_prob_last_10": 0.09680626939516515, "mean_pred_prob_last_25": 0.05256606785405893, "mean_pred_prob_last_50": 0.03092479824263137, "mean_token_accuracy": 0.8674476623535157, "step": 3390 }, { "epoch": 0.060441220912662436, "grad_norm": 1.4043109210236266, "learning_rate": 0.0001, "loss": 1.2749, "mean_abs_error": 1004.1242223019017, "mean_abs_error_last_10": 405.07502764635376, "mean_abs_error_last_25": 464.0976279888919, "mean_abs_error_last_50": 629.9671067865168, "mean_pred_prob": 0.026491653207631315, "mean_pred_prob_last_10": 0.1485255744366441, "mean_pred_prob_last_25": 0.07960370954533573, "mean_pred_prob_last_50": 0.04679056151071563, "mean_token_accuracy": 0.8670904219150544, "step": 3400 }, { "epoch": 0.06061898920946438, "grad_norm": 1.9546511483975544, "learning_rate": 0.0001, "loss": 1.2696, "mean_abs_error": 746.9045989957314, "mean_abs_error_last_10": 198.06267820057127, "mean_abs_error_last_25": 264.8617414133413, "mean_abs_error_last_50": 404.2700498966904, "mean_pred_prob": 0.0320159202296054, "mean_pred_prob_last_10": 0.17024925025762058, "mean_pred_prob_last_25": 0.09240342471166514, "mean_pred_prob_last_50": 0.055160843610065055, "mean_token_accuracy": 0.8718893527984619, "step": 3410 }, { "epoch": 0.060796757506266334, "grad_norm": 0.9957011279536759, "learning_rate": 0.0001, "loss": 1.1874, "mean_abs_error": 514.602694411424, "mean_abs_error_last_10": 123.89003396110357, "mean_abs_error_last_25": 139.50888488163508, "mean_abs_error_last_50": 229.06473255714718, "mean_pred_prob": 0.02410198568832129, "mean_pred_prob_last_10": 0.1365866676904261, "mean_pred_prob_last_25": 0.07115588886663318, "mean_pred_prob_last_50": 0.04224070366472006, "mean_token_accuracy": 0.8768127799034119, "step": 3420 }, { "epoch": 0.06097452580306828, "grad_norm": 1.471078022581464, "learning_rate": 0.0001, "loss": 1.182, "mean_abs_error": 326.6466972940566, "mean_abs_error_last_10": 139.02477501134598, "mean_abs_error_last_25": 156.7802769721761, "mean_abs_error_last_50": 227.41099969407827, "mean_pred_prob": 0.022300514997914432, "mean_pred_prob_last_10": 0.12303454149514437, "mean_pred_prob_last_25": 0.06597820036113262, "mean_pred_prob_last_50": 0.038897321280092004, "mean_token_accuracy": 0.8755409836769104, "step": 3430 }, { "epoch": 0.061152294099870226, "grad_norm": 0.9471533547478835, "learning_rate": 0.0001, "loss": 1.2824, "mean_abs_error": 386.3101781411398, "mean_abs_error_last_10": 71.57894155707638, "mean_abs_error_last_25": 152.74445769878744, "mean_abs_error_last_50": 219.5542798576581, "mean_pred_prob": 0.03123675889801234, "mean_pred_prob_last_10": 0.17193492418155074, "mean_pred_prob_last_25": 0.09212956796400248, "mean_pred_prob_last_50": 0.05444481368176639, "mean_token_accuracy": 0.8697978436946869, "step": 3440 }, { "epoch": 0.06133006239667218, "grad_norm": 1.3589503427904237, "learning_rate": 0.0001, "loss": 1.2192, "mean_abs_error": 183.23205640448927, "mean_abs_error_last_10": 68.2332762327973, "mean_abs_error_last_25": 72.78208019035563, "mean_abs_error_last_50": 107.70689826242037, "mean_pred_prob": 0.03329795002937317, "mean_pred_prob_last_10": 0.18139988277107477, "mean_pred_prob_last_25": 0.0981624012812972, "mean_pred_prob_last_50": 0.0581998152192682, "mean_token_accuracy": 0.8793485462665558, "step": 3450 }, { "epoch": 0.061507830693474125, "grad_norm": 2.0441401509732104, "learning_rate": 0.0001, "loss": 1.2192, "mean_abs_error": 909.6235057671714, "mean_abs_error_last_10": 349.3776709660171, "mean_abs_error_last_25": 434.07180165435665, "mean_abs_error_last_50": 588.0542270360489, "mean_pred_prob": 0.018168885112390853, "mean_pred_prob_last_10": 0.10720954936696217, "mean_pred_prob_last_25": 0.055434971128124746, "mean_pred_prob_last_50": 0.03219517762772739, "mean_token_accuracy": 0.8770839631557464, "step": 3460 }, { "epoch": 0.06168559899027608, "grad_norm": 0.9616617402013786, "learning_rate": 0.0001, "loss": 1.2483, "mean_abs_error": 671.5076373354218, "mean_abs_error_last_10": 183.56628184525795, "mean_abs_error_last_25": 239.09325255840048, "mean_abs_error_last_50": 350.66855199897293, "mean_pred_prob": 0.022461514655151404, "mean_pred_prob_last_10": 0.1311771429143846, "mean_pred_prob_last_25": 0.06805643329280428, "mean_pred_prob_last_50": 0.03956167235737666, "mean_token_accuracy": 0.874638020992279, "step": 3470 }, { "epoch": 0.061863367287078024, "grad_norm": 1.251997694938154, "learning_rate": 0.0001, "loss": 1.1898, "mean_abs_error": 618.5082984917119, "mean_abs_error_last_10": 167.5762659599701, "mean_abs_error_last_25": 249.91959295602356, "mean_abs_error_last_50": 373.2965009006269, "mean_pred_prob": 0.018255505268462, "mean_pred_prob_last_10": 0.1007579306140542, "mean_pred_prob_last_25": 0.05431057456880808, "mean_pred_prob_last_50": 0.03199128182604909, "mean_token_accuracy": 0.8792300879955292, "step": 3480 }, { "epoch": 0.06204113558387997, "grad_norm": 1.1803579845570926, "learning_rate": 0.0001, "loss": 1.2431, "mean_abs_error": 361.7471861111128, "mean_abs_error_last_10": 147.43400940329528, "mean_abs_error_last_25": 227.9040375454387, "mean_abs_error_last_50": 253.30289141021453, "mean_pred_prob": 0.027714954921975732, "mean_pred_prob_last_10": 0.15124887889251112, "mean_pred_prob_last_25": 0.08193556172773242, "mean_pred_prob_last_50": 0.04867613895330578, "mean_token_accuracy": 0.8735067129135132, "step": 3490 }, { "epoch": 0.06221890388068192, "grad_norm": 2.314463909308319, "learning_rate": 0.0001, "loss": 1.2107, "mean_abs_error": 250.95255548778263, "mean_abs_error_last_10": 55.93931508041012, "mean_abs_error_last_25": 80.99670658042407, "mean_abs_error_last_50": 133.49235825648748, "mean_pred_prob": 0.027645385451614858, "mean_pred_prob_last_10": 0.1513938581570983, "mean_pred_prob_last_25": 0.0815082335844636, "mean_pred_prob_last_50": 0.04831523918546736, "mean_token_accuracy": 0.8796909511089325, "step": 3500 }, { "epoch": 0.06239667217748387, "grad_norm": 1.5799562671425829, "learning_rate": 0.0001, "loss": 1.2606, "mean_abs_error": 249.86704761940177, "mean_abs_error_last_10": 106.20160640825672, "mean_abs_error_last_25": 111.8246821876332, "mean_abs_error_last_50": 145.10766574000846, "mean_pred_prob": 0.02196557060815394, "mean_pred_prob_last_10": 0.11969360001385212, "mean_pred_prob_last_25": 0.06441388204693795, "mean_pred_prob_last_50": 0.03840046478435397, "mean_token_accuracy": 0.8702664613723755, "step": 3510 }, { "epoch": 0.06257444047428581, "grad_norm": 0.8796894374822296, "learning_rate": 0.0001, "loss": 1.2125, "mean_abs_error": 391.1531257007748, "mean_abs_error_last_10": 107.02350628957522, "mean_abs_error_last_25": 175.52218539107665, "mean_abs_error_last_50": 240.84835746646658, "mean_pred_prob": 0.020143682835623622, "mean_pred_prob_last_10": 0.11722710393369198, "mean_pred_prob_last_25": 0.061107791308313605, "mean_pred_prob_last_50": 0.03565197982825339, "mean_token_accuracy": 0.8704193830490112, "step": 3520 }, { "epoch": 0.06275220877108777, "grad_norm": 2.9220120842433235, "learning_rate": 0.0001, "loss": 1.2897, "mean_abs_error": 1033.0747476668778, "mean_abs_error_last_10": 556.446317640529, "mean_abs_error_last_25": 627.733537131856, "mean_abs_error_last_50": 760.5559410661838, "mean_pred_prob": 0.024926376610528676, "mean_pred_prob_last_10": 0.13744293188501616, "mean_pred_prob_last_25": 0.07363516040350078, "mean_pred_prob_last_50": 0.04353252703585895, "mean_token_accuracy": 0.870444941520691, "step": 3530 }, { "epoch": 0.0629299770678897, "grad_norm": 1.4431532650625674, "learning_rate": 0.0001, "loss": 1.2892, "mean_abs_error": 734.359009740604, "mean_abs_error_last_10": 148.9335795322041, "mean_abs_error_last_25": 189.73867061919435, "mean_abs_error_last_50": 332.9019384619205, "mean_pred_prob": 0.020615168078802527, "mean_pred_prob_last_10": 0.10135952085256576, "mean_pred_prob_last_25": 0.058098424877971414, "mean_pred_prob_last_50": 0.03536261082626879, "mean_token_accuracy": 0.8671153366565705, "step": 3540 }, { "epoch": 0.06310774536469166, "grad_norm": 1.9949204901561788, "learning_rate": 0.0001, "loss": 1.2125, "mean_abs_error": 632.0049746308957, "mean_abs_error_last_10": 160.8045191658207, "mean_abs_error_last_25": 262.3694175156409, "mean_abs_error_last_50": 395.8701804691051, "mean_pred_prob": 0.029521040603867732, "mean_pred_prob_last_10": 0.16521192645886912, "mean_pred_prob_last_25": 0.08741323816939257, "mean_pred_prob_last_50": 0.0518147561524529, "mean_token_accuracy": 0.8811122179031372, "step": 3550 }, { "epoch": 0.06328551366149361, "grad_norm": 1.766636007296537, "learning_rate": 0.0001, "loss": 1.2719, "mean_abs_error": 745.5912153164784, "mean_abs_error_last_10": 134.65129164144588, "mean_abs_error_last_25": 197.11276229201803, "mean_abs_error_last_50": 364.4075237136909, "mean_pred_prob": 0.019755520060425625, "mean_pred_prob_last_10": 0.10606902209110558, "mean_pred_prob_last_25": 0.05861947985831648, "mean_pred_prob_last_50": 0.03450920683098957, "mean_token_accuracy": 0.8729301333427429, "step": 3560 }, { "epoch": 0.06346328195829556, "grad_norm": 1.0998969307492008, "learning_rate": 0.0001, "loss": 1.2672, "mean_abs_error": 285.08250153965736, "mean_abs_error_last_10": 84.34580147825005, "mean_abs_error_last_25": 149.34268771598767, "mean_abs_error_last_50": 170.28633847127176, "mean_pred_prob": 0.027703737444244324, "mean_pred_prob_last_10": 0.14960814863443375, "mean_pred_prob_last_25": 0.08125562844797969, "mean_pred_prob_last_50": 0.04841694198548794, "mean_token_accuracy": 0.8626737236976624, "step": 3570 }, { "epoch": 0.0636410502550975, "grad_norm": 1.318610482230775, "learning_rate": 0.0001, "loss": 1.2467, "mean_abs_error": 1285.4713393105976, "mean_abs_error_last_10": 560.5070243273528, "mean_abs_error_last_25": 706.8467782780914, "mean_abs_error_last_50": 879.6955601863623, "mean_pred_prob": 0.01994218210456893, "mean_pred_prob_last_10": 0.11027658634411637, "mean_pred_prob_last_25": 0.05871643877471797, "mean_pred_prob_last_50": 0.03478442639752757, "mean_token_accuracy": 0.8608072698116302, "step": 3580 }, { "epoch": 0.06381881855189946, "grad_norm": 1.505119201174614, "learning_rate": 0.0001, "loss": 1.261, "mean_abs_error": 730.6927205635532, "mean_abs_error_last_10": 297.4420437827838, "mean_abs_error_last_25": 380.31636386504954, "mean_abs_error_last_50": 518.2507019229915, "mean_pred_prob": 0.01997418700484559, "mean_pred_prob_last_10": 0.10785864944919013, "mean_pred_prob_last_25": 0.05889981659129262, "mean_pred_prob_last_50": 0.03473221208259929, "mean_token_accuracy": 0.86872438788414, "step": 3590 }, { "epoch": 0.06399658684870141, "grad_norm": 1.7508528460751027, "learning_rate": 0.0001, "loss": 1.2536, "mean_abs_error": 837.6158632410097, "mean_abs_error_last_10": 315.1988487738614, "mean_abs_error_last_25": 410.9498048354429, "mean_abs_error_last_50": 502.5298582091814, "mean_pred_prob": 0.019595227637910283, "mean_pred_prob_last_10": 0.10734992126526777, "mean_pred_prob_last_25": 0.05733054628944956, "mean_pred_prob_last_50": 0.034164150079595855, "mean_token_accuracy": 0.8626857221126556, "step": 3600 }, { "epoch": 0.06417435514550335, "grad_norm": 2.277612217054273, "learning_rate": 0.0001, "loss": 1.2283, "mean_abs_error": 367.5505011696006, "mean_abs_error_last_10": 110.7152477319891, "mean_abs_error_last_25": 125.730504420198, "mean_abs_error_last_50": 197.2956093344081, "mean_pred_prob": 0.03136508848983795, "mean_pred_prob_last_10": 0.16861151661723853, "mean_pred_prob_last_25": 0.09027570746839046, "mean_pred_prob_last_50": 0.0548009738791734, "mean_token_accuracy": 0.8728612542152405, "step": 3610 }, { "epoch": 0.0643521234423053, "grad_norm": 1.571087875404634, "learning_rate": 0.0001, "loss": 1.2288, "mean_abs_error": 856.7078715760535, "mean_abs_error_last_10": 471.112053355032, "mean_abs_error_last_25": 517.5902439805009, "mean_abs_error_last_50": 609.6117943332915, "mean_pred_prob": 0.017850809903757182, "mean_pred_prob_last_10": 0.10487051560776309, "mean_pred_prob_last_25": 0.054111107645439915, "mean_pred_prob_last_50": 0.03155433778592851, "mean_token_accuracy": 0.8696537911891937, "step": 3620 }, { "epoch": 0.06452989173910725, "grad_norm": 1.361711748883097, "learning_rate": 0.0001, "loss": 1.2681, "mean_abs_error": 1066.2671299583467, "mean_abs_error_last_10": 348.47535521573144, "mean_abs_error_last_25": 377.2822798044421, "mean_abs_error_last_50": 558.0081094054419, "mean_pred_prob": 0.02154821475269273, "mean_pred_prob_last_10": 0.10738239526981488, "mean_pred_prob_last_25": 0.05932324988534674, "mean_pred_prob_last_50": 0.036197107745101674, "mean_token_accuracy": 0.865079152584076, "step": 3630 }, { "epoch": 0.06470766003590919, "grad_norm": 1.0137427457037276, "learning_rate": 0.0001, "loss": 1.2537, "mean_abs_error": 314.03812777995836, "mean_abs_error_last_10": 218.15051751772967, "mean_abs_error_last_25": 231.3737848693375, "mean_abs_error_last_50": 222.04133425234062, "mean_pred_prob": 0.01861880854703486, "mean_pred_prob_last_10": 0.11032761577516795, "mean_pred_prob_last_25": 0.05637896871194244, "mean_pred_prob_last_50": 0.032879711221903564, "mean_token_accuracy": 0.875254338979721, "step": 3640 }, { "epoch": 0.06488542833271115, "grad_norm": 0.8928739913926563, "learning_rate": 0.0001, "loss": 1.2468, "mean_abs_error": 1241.210726930491, "mean_abs_error_last_10": 626.9772474475322, "mean_abs_error_last_25": 726.9773283848307, "mean_abs_error_last_50": 893.1202478030016, "mean_pred_prob": 0.015640768822049723, "mean_pred_prob_last_10": 0.08711512419395148, "mean_pred_prob_last_25": 0.04646863156522159, "mean_pred_prob_last_50": 0.02743904209055472, "mean_token_accuracy": 0.8578274548053741, "step": 3650 }, { "epoch": 0.0650631966295131, "grad_norm": 1.2232791140892525, "learning_rate": 0.0001, "loss": 1.2784, "mean_abs_error": 1387.778735898962, "mean_abs_error_last_10": 564.1096200090658, "mean_abs_error_last_25": 716.127371634288, "mean_abs_error_last_50": 959.4677126583963, "mean_pred_prob": 0.01891453292191727, "mean_pred_prob_last_10": 0.11535786242166068, "mean_pred_prob_last_25": 0.058553160494193436, "mean_pred_prob_last_50": 0.03351391822070582, "mean_token_accuracy": 0.8768161475658417, "step": 3660 }, { "epoch": 0.06524096492631504, "grad_norm": 1.3366357930189, "learning_rate": 0.0001, "loss": 1.2796, "mean_abs_error": 1042.63465107734, "mean_abs_error_last_10": 674.2950654496848, "mean_abs_error_last_25": 721.357722903661, "mean_abs_error_last_50": 826.2905273599603, "mean_pred_prob": 0.04188208534906153, "mean_pred_prob_last_10": 0.20401392072235466, "mean_pred_prob_last_25": 0.11645016659604153, "mean_pred_prob_last_50": 0.07150568953802576, "mean_token_accuracy": 0.8715189635753632, "step": 3670 }, { "epoch": 0.06541873322311699, "grad_norm": 1.4323653847129827, "learning_rate": 0.0001, "loss": 1.2318, "mean_abs_error": 393.9274352287586, "mean_abs_error_last_10": 220.8984681453083, "mean_abs_error_last_25": 229.7460448922589, "mean_abs_error_last_50": 288.0877551534694, "mean_pred_prob": 0.026676149060949682, "mean_pred_prob_last_10": 0.14329752754420041, "mean_pred_prob_last_25": 0.07861620970070363, "mean_pred_prob_last_50": 0.046761102695018056, "mean_token_accuracy": 0.8768408894538879, "step": 3680 }, { "epoch": 0.06559650151991894, "grad_norm": 0.7358400257599174, "learning_rate": 0.0001, "loss": 1.2195, "mean_abs_error": 336.81765059358935, "mean_abs_error_last_10": 167.7334724780258, "mean_abs_error_last_25": 168.3648301154416, "mean_abs_error_last_50": 209.2918144642384, "mean_pred_prob": 0.026364106842083856, "mean_pred_prob_last_10": 0.14518493155483156, "mean_pred_prob_last_25": 0.07714701528893783, "mean_pred_prob_last_50": 0.04576209019869566, "mean_token_accuracy": 0.8701985955238343, "step": 3690 }, { "epoch": 0.06577426981672088, "grad_norm": 0.805756262707646, "learning_rate": 0.0001, "loss": 1.2484, "mean_abs_error": 1480.0648764542984, "mean_abs_error_last_10": 706.5752571472538, "mean_abs_error_last_25": 827.278572472215, "mean_abs_error_last_50": 1023.3363664579945, "mean_pred_prob": 0.01265515931590926, "mean_pred_prob_last_10": 0.07206073238339741, "mean_pred_prob_last_25": 0.03769265103037469, "mean_pred_prob_last_50": 0.02203268482699059, "mean_token_accuracy": 0.8696523070335388, "step": 3700 }, { "epoch": 0.06595203811352283, "grad_norm": 1.2547804845041222, "learning_rate": 0.0001, "loss": 1.241, "mean_abs_error": 1213.4731196091489, "mean_abs_error_last_10": 545.4711414920555, "mean_abs_error_last_25": 648.402967857696, "mean_abs_error_last_50": 821.8934991435592, "mean_pred_prob": 0.023417237217654475, "mean_pred_prob_last_10": 0.13324026504124048, "mean_pred_prob_last_25": 0.07028624558006413, "mean_pred_prob_last_50": 0.04125111938919872, "mean_token_accuracy": 0.8681505799293519, "step": 3710 }, { "epoch": 0.06612980641032479, "grad_norm": 1.727723563459397, "learning_rate": 0.0001, "loss": 1.2434, "mean_abs_error": 1489.929822317232, "mean_abs_error_last_10": 604.3195822432909, "mean_abs_error_last_25": 731.643308145563, "mean_abs_error_last_50": 972.3212108459938, "mean_pred_prob": 0.0200374482999905, "mean_pred_prob_last_10": 0.1073662146634888, "mean_pred_prob_last_25": 0.05825519085628912, "mean_pred_prob_last_50": 0.034647896853857676, "mean_token_accuracy": 0.874841821193695, "step": 3720 }, { "epoch": 0.06630757470712673, "grad_norm": 2.705630407822899, "learning_rate": 0.0001, "loss": 1.237, "mean_abs_error": 1504.1213877832845, "mean_abs_error_last_10": 801.0731517706821, "mean_abs_error_last_25": 889.3992019945584, "mean_abs_error_last_50": 1073.62054287638, "mean_pred_prob": 0.024773197463946416, "mean_pred_prob_last_10": 0.13540594003570733, "mean_pred_prob_last_25": 0.07239284566021524, "mean_pred_prob_last_50": 0.042947014304809274, "mean_token_accuracy": 0.8704407572746277, "step": 3730 }, { "epoch": 0.06648534300392868, "grad_norm": 1.8275618337377986, "learning_rate": 0.0001, "loss": 1.263, "mean_abs_error": 968.8799294912997, "mean_abs_error_last_10": 531.1449356336982, "mean_abs_error_last_25": 542.4269333055888, "mean_abs_error_last_50": 636.8208752809461, "mean_pred_prob": 0.028562897189112847, "mean_pred_prob_last_10": 0.15384428327379282, "mean_pred_prob_last_25": 0.08331693165237083, "mean_pred_prob_last_50": 0.049614893569378184, "mean_token_accuracy": 0.8603814601898193, "step": 3740 }, { "epoch": 0.06666311130073063, "grad_norm": 1.957028192639983, "learning_rate": 0.0001, "loss": 1.2538, "mean_abs_error": 1103.7910087897462, "mean_abs_error_last_10": 385.89993580270186, "mean_abs_error_last_25": 482.0706930439695, "mean_abs_error_last_50": 652.7997081476161, "mean_pred_prob": 0.02073825709812809, "mean_pred_prob_last_10": 0.1099419943871908, "mean_pred_prob_last_25": 0.0589944459265098, "mean_pred_prob_last_50": 0.0356668385618832, "mean_token_accuracy": 0.8715368270874023, "step": 3750 }, { "epoch": 0.06684087959753257, "grad_norm": 1.3389093482526186, "learning_rate": 0.0001, "loss": 1.1535, "mean_abs_error": 968.3791647129505, "mean_abs_error_last_10": 520.1882336291203, "mean_abs_error_last_25": 569.2011945817378, "mean_abs_error_last_50": 718.7640315044121, "mean_pred_prob": 0.02700225973385386, "mean_pred_prob_last_10": 0.15371502602356485, "mean_pred_prob_last_25": 0.0819598610396497, "mean_pred_prob_last_50": 0.04771527520060772, "mean_token_accuracy": 0.8688234686851501, "step": 3760 }, { "epoch": 0.06701864789433452, "grad_norm": 1.8529797932910763, "learning_rate": 0.0001, "loss": 1.2174, "mean_abs_error": 1242.6144560950129, "mean_abs_error_last_10": 493.9027901130063, "mean_abs_error_last_25": 601.2307421838467, "mean_abs_error_last_50": 804.3325107866254, "mean_pred_prob": 0.020395108725642784, "mean_pred_prob_last_10": 0.11029835717054084, "mean_pred_prob_last_25": 0.05957583267299924, "mean_pred_prob_last_50": 0.03545922941120807, "mean_token_accuracy": 0.8617818057537079, "step": 3770 }, { "epoch": 0.06719641619113648, "grad_norm": 1.7503753465082463, "learning_rate": 0.0001, "loss": 1.2583, "mean_abs_error": 262.1239485058586, "mean_abs_error_last_10": 94.33393819646761, "mean_abs_error_last_25": 84.40545240141094, "mean_abs_error_last_50": 126.45432221186677, "mean_pred_prob": 0.03750522704795003, "mean_pred_prob_last_10": 0.20190899148583413, "mean_pred_prob_last_25": 0.10940608084201812, "mean_pred_prob_last_50": 0.06503772297874093, "mean_token_accuracy": 0.865988838672638, "step": 3780 }, { "epoch": 0.06737418448793842, "grad_norm": 0.7862116632069145, "learning_rate": 0.0001, "loss": 1.2237, "mean_abs_error": 217.17998172362132, "mean_abs_error_last_10": 27.71796650259042, "mean_abs_error_last_25": 48.492257507590786, "mean_abs_error_last_50": 110.72353159668398, "mean_pred_prob": 0.03020478254184127, "mean_pred_prob_last_10": 0.1677361659705639, "mean_pred_prob_last_25": 0.08922525849193334, "mean_pred_prob_last_50": 0.05278393477201462, "mean_token_accuracy": 0.8695703268051147, "step": 3790 }, { "epoch": 0.06755195278474037, "grad_norm": 1.3470462536002372, "learning_rate": 0.0001, "loss": 1.1734, "mean_abs_error": 1496.7250486638604, "mean_abs_error_last_10": 711.7648045745731, "mean_abs_error_last_25": 824.4444862858157, "mean_abs_error_last_50": 1016.6088696202902, "mean_pred_prob": 0.02866428609850118, "mean_pred_prob_last_10": 0.1526426250115037, "mean_pred_prob_last_25": 0.08248847745126113, "mean_pred_prob_last_50": 0.049650818518421146, "mean_token_accuracy": 0.8638859450817108, "step": 3800 }, { "epoch": 0.06772972108154232, "grad_norm": 1.2583500064805768, "learning_rate": 0.0001, "loss": 1.1534, "mean_abs_error": 239.97808116894794, "mean_abs_error_last_10": 69.19171391113075, "mean_abs_error_last_25": 83.73831679377031, "mean_abs_error_last_50": 147.18429272435077, "mean_pred_prob": 0.03286901274695993, "mean_pred_prob_last_10": 0.1731117323040962, "mean_pred_prob_last_25": 0.09470875971019269, "mean_pred_prob_last_50": 0.057265691738575694, "mean_token_accuracy": 0.867527425289154, "step": 3810 }, { "epoch": 0.06790748937834426, "grad_norm": 3.114853495558707, "learning_rate": 0.0001, "loss": 1.1404, "mean_abs_error": 172.87262570231556, "mean_abs_error_last_10": 29.28953580540039, "mean_abs_error_last_25": 51.099782204280174, "mean_abs_error_last_50": 90.23552008998652, "mean_pred_prob": 0.03943806043826044, "mean_pred_prob_last_10": 0.20632098317146302, "mean_pred_prob_last_25": 0.11387666575610637, "mean_pred_prob_last_50": 0.06855000583454966, "mean_token_accuracy": 0.892344206571579, "step": 3820 }, { "epoch": 0.06808525767514621, "grad_norm": 1.3522520354940117, "learning_rate": 0.0001, "loss": 1.2117, "mean_abs_error": 672.1631400635252, "mean_abs_error_last_10": 333.7834305690783, "mean_abs_error_last_25": 383.60226556944605, "mean_abs_error_last_50": 470.2797579877089, "mean_pred_prob": 0.038940986484522, "mean_pred_prob_last_10": 0.21512400170904583, "mean_pred_prob_last_25": 0.11510693563031964, "mean_pred_prob_last_50": 0.06779956181126182, "mean_token_accuracy": 0.8763953864574432, "step": 3830 }, { "epoch": 0.06826302597194817, "grad_norm": 2.293742826684641, "learning_rate": 0.0001, "loss": 1.255, "mean_abs_error": 465.0047049987071, "mean_abs_error_last_10": 76.9215064049056, "mean_abs_error_last_25": 147.94043491193653, "mean_abs_error_last_50": 245.94619044558613, "mean_pred_prob": 0.020880427816882728, "mean_pred_prob_last_10": 0.1189275523647666, "mean_pred_prob_last_25": 0.06321138008497655, "mean_pred_prob_last_50": 0.03688841629773378, "mean_token_accuracy": 0.8688338875770569, "step": 3840 }, { "epoch": 0.0684407942687501, "grad_norm": 3.542702331838995, "learning_rate": 0.0001, "loss": 1.2404, "mean_abs_error": 416.45031871564436, "mean_abs_error_last_10": 146.58457886412415, "mean_abs_error_last_25": 191.17333889766678, "mean_abs_error_last_50": 239.3179386180052, "mean_pred_prob": 0.019512530788779257, "mean_pred_prob_last_10": 0.10560408886522055, "mean_pred_prob_last_25": 0.05622685290873051, "mean_pred_prob_last_50": 0.033722225856035945, "mean_token_accuracy": 0.872469562292099, "step": 3850 }, { "epoch": 0.06861856256555206, "grad_norm": 1.0865139228402796, "learning_rate": 0.0001, "loss": 1.2528, "mean_abs_error": 666.2466430133788, "mean_abs_error_last_10": 303.3858253897862, "mean_abs_error_last_25": 332.0161967598168, "mean_abs_error_last_50": 426.05684134878936, "mean_pred_prob": 0.02618629514472559, "mean_pred_prob_last_10": 0.14719901209464298, "mean_pred_prob_last_25": 0.07809958900325001, "mean_pred_prob_last_50": 0.04594585263403132, "mean_token_accuracy": 0.8700393676757813, "step": 3860 }, { "epoch": 0.06879633086235401, "grad_norm": 1.9218152933476627, "learning_rate": 0.0001, "loss": 1.1408, "mean_abs_error": 545.0858446821906, "mean_abs_error_last_10": 99.61128425570962, "mean_abs_error_last_25": 153.84763190618764, "mean_abs_error_last_50": 260.7439098564654, "mean_pred_prob": 0.02479274255456403, "mean_pred_prob_last_10": 0.1455352250719443, "mean_pred_prob_last_25": 0.07431514614727348, "mean_pred_prob_last_50": 0.04335513215046376, "mean_token_accuracy": 0.875678026676178, "step": 3870 }, { "epoch": 0.06897409915915595, "grad_norm": 2.287102258267253, "learning_rate": 0.0001, "loss": 1.2423, "mean_abs_error": 327.6087549091327, "mean_abs_error_last_10": 119.07730966966328, "mean_abs_error_last_25": 155.5653320997871, "mean_abs_error_last_50": 222.35955340569325, "mean_pred_prob": 0.023086568736471236, "mean_pred_prob_last_10": 0.12715756949037313, "mean_pred_prob_last_25": 0.06852779518812895, "mean_pred_prob_last_50": 0.04027453260496259, "mean_token_accuracy": 0.8701020836830139, "step": 3880 }, { "epoch": 0.0691518674559579, "grad_norm": 1.0949114617744031, "learning_rate": 0.0001, "loss": 1.2036, "mean_abs_error": 690.5129548758063, "mean_abs_error_last_10": 176.93275145863527, "mean_abs_error_last_25": 241.68799660979366, "mean_abs_error_last_50": 370.82068408206993, "mean_pred_prob": 0.026011726970318706, "mean_pred_prob_last_10": 0.13844364281976596, "mean_pred_prob_last_25": 0.07488798299455084, "mean_pred_prob_last_50": 0.044989800971234215, "mean_token_accuracy": 0.8751386880874634, "step": 3890 }, { "epoch": 0.06932963575275986, "grad_norm": 1.17580496853764, "learning_rate": 0.0001, "loss": 1.175, "mean_abs_error": 253.42903022658325, "mean_abs_error_last_10": 32.10577022510553, "mean_abs_error_last_25": 68.38237806712341, "mean_abs_error_last_50": 130.35450933798163, "mean_pred_prob": 0.02805436160415411, "mean_pred_prob_last_10": 0.16157254278659822, "mean_pred_prob_last_25": 0.0843699443154037, "mean_pred_prob_last_50": 0.04935942841693759, "mean_token_accuracy": 0.8792194545269012, "step": 3900 }, { "epoch": 0.06950740404956181, "grad_norm": 1.6004396282780406, "learning_rate": 0.0001, "loss": 1.1656, "mean_abs_error": 612.7249956611406, "mean_abs_error_last_10": 198.1554575153454, "mean_abs_error_last_25": 265.15036964516474, "mean_abs_error_last_50": 370.3863401144456, "mean_pred_prob": 0.028487520752241834, "mean_pred_prob_last_10": 0.15960078989737667, "mean_pred_prob_last_25": 0.08423587976722047, "mean_pred_prob_last_50": 0.04999610353552271, "mean_token_accuracy": 0.8729821801185608, "step": 3910 }, { "epoch": 0.06968517234636375, "grad_norm": 2.429996466957285, "learning_rate": 0.0001, "loss": 1.2849, "mean_abs_error": 487.4916382124954, "mean_abs_error_last_10": 132.2854658585849, "mean_abs_error_last_25": 136.70887480730295, "mean_abs_error_last_50": 232.71201698934996, "mean_pred_prob": 0.028304218733683228, "mean_pred_prob_last_10": 0.15907342750579118, "mean_pred_prob_last_25": 0.08473150739446282, "mean_pred_prob_last_50": 0.04964567604474723, "mean_token_accuracy": 0.8662286102771759, "step": 3920 }, { "epoch": 0.0698629406431657, "grad_norm": 1.1114050063700038, "learning_rate": 0.0001, "loss": 1.2043, "mean_abs_error": 375.0301942595811, "mean_abs_error_last_10": 98.3398392918842, "mean_abs_error_last_25": 148.50714068074117, "mean_abs_error_last_50": 198.81702723699397, "mean_pred_prob": 0.021395618887618185, "mean_pred_prob_last_10": 0.12102994415909052, "mean_pred_prob_last_25": 0.06260068183764815, "mean_pred_prob_last_50": 0.03703496977686882, "mean_token_accuracy": 0.8725180447101593, "step": 3930 }, { "epoch": 0.07004070893996765, "grad_norm": 1.9553506103107985, "learning_rate": 0.0001, "loss": 1.2109, "mean_abs_error": 1108.006960972333, "mean_abs_error_last_10": 600.8804443138876, "mean_abs_error_last_25": 683.2335925634925, "mean_abs_error_last_50": 831.0956101387337, "mean_pred_prob": 0.02053964002261637, "mean_pred_prob_last_10": 0.11512151707574958, "mean_pred_prob_last_25": 0.061460065242135896, "mean_pred_prob_last_50": 0.035986881171993446, "mean_token_accuracy": 0.8699237942695618, "step": 3940 }, { "epoch": 0.07021847723676959, "grad_norm": 2.4583937905574142, "learning_rate": 0.0001, "loss": 1.2018, "mean_abs_error": 985.2020487877651, "mean_abs_error_last_10": 256.1272820865218, "mean_abs_error_last_25": 342.6140226100434, "mean_abs_error_last_50": 529.4515384691074, "mean_pred_prob": 0.019647200603503733, "mean_pred_prob_last_10": 0.11286266706883907, "mean_pred_prob_last_25": 0.059256222331896424, "mean_pred_prob_last_50": 0.03461318633635528, "mean_token_accuracy": 0.870273107290268, "step": 3950 }, { "epoch": 0.07039624553357154, "grad_norm": 1.5978588216436704, "learning_rate": 0.0001, "loss": 1.2433, "mean_abs_error": 387.1953756688555, "mean_abs_error_last_10": 152.25229020182894, "mean_abs_error_last_25": 200.96604751290636, "mean_abs_error_last_50": 238.27517497810678, "mean_pred_prob": 0.021868110448122025, "mean_pred_prob_last_10": 0.12518390296027065, "mean_pred_prob_last_25": 0.06518488926813007, "mean_pred_prob_last_50": 0.0383638936560601, "mean_token_accuracy": 0.8657049298286438, "step": 3960 }, { "epoch": 0.0705740138303735, "grad_norm": 1.2209969032012578, "learning_rate": 0.0001, "loss": 1.171, "mean_abs_error": 792.105037507905, "mean_abs_error_last_10": 265.41794434903915, "mean_abs_error_last_25": 327.61386970336764, "mean_abs_error_last_50": 446.16264943675776, "mean_pred_prob": 0.033402095883502625, "mean_pred_prob_last_10": 0.17429355224594473, "mean_pred_prob_last_25": 0.09692209577187896, "mean_pred_prob_last_50": 0.05813343487970997, "mean_token_accuracy": 0.8806912004947662, "step": 3970 }, { "epoch": 0.07075178212717544, "grad_norm": 1.6890687032264333, "learning_rate": 0.0001, "loss": 1.2001, "mean_abs_error": 357.6171154392595, "mean_abs_error_last_10": 59.590765305488425, "mean_abs_error_last_25": 97.34877898222304, "mean_abs_error_last_50": 169.68093069203516, "mean_pred_prob": 0.028452963568270206, "mean_pred_prob_last_10": 0.1488586522638798, "mean_pred_prob_last_25": 0.08131217816844583, "mean_pred_prob_last_50": 0.04891628795303404, "mean_token_accuracy": 0.8759393990039825, "step": 3980 }, { "epoch": 0.07092955042397739, "grad_norm": 1.7752421422511273, "learning_rate": 0.0001, "loss": 1.2581, "mean_abs_error": 580.2392799020549, "mean_abs_error_last_10": 228.86320775051126, "mean_abs_error_last_25": 278.4235098438655, "mean_abs_error_last_50": 357.3025502957743, "mean_pred_prob": 0.023807524360017852, "mean_pred_prob_last_10": 0.1288306913804263, "mean_pred_prob_last_25": 0.06978662648471072, "mean_pred_prob_last_50": 0.04145668903947808, "mean_token_accuracy": 0.8609550535678864, "step": 3990 }, { "epoch": 0.07110731872077934, "grad_norm": 1.472165383674522, "learning_rate": 0.0001, "loss": 1.1727, "mean_abs_error": 80.13112514751955, "mean_abs_error_last_10": 17.444279001976327, "mean_abs_error_last_25": 31.894136686189096, "mean_abs_error_last_50": 51.64537135240564, "mean_pred_prob": 0.04760349905118346, "mean_pred_prob_last_10": 0.2515252411365509, "mean_pred_prob_last_25": 0.13594182580709457, "mean_pred_prob_last_50": 0.08182264603674412, "mean_token_accuracy": 0.8760374069213868, "step": 4000 }, { "epoch": 0.07128508701758128, "grad_norm": 0.8893046855586143, "learning_rate": 0.0001, "loss": 1.1536, "mean_abs_error": 786.043941134991, "mean_abs_error_last_10": 414.67500271014853, "mean_abs_error_last_25": 485.01749044928874, "mean_abs_error_last_50": 585.00184192004, "mean_pred_prob": 0.034528520506864877, "mean_pred_prob_last_10": 0.16928194256615825, "mean_pred_prob_last_25": 0.09574352946074213, "mean_pred_prob_last_50": 0.058828766230726616, "mean_token_accuracy": 0.8755454123020172, "step": 4010 }, { "epoch": 0.07146285531438323, "grad_norm": 1.1754480864393617, "learning_rate": 0.0001, "loss": 1.1592, "mean_abs_error": 806.1410376530987, "mean_abs_error_last_10": 411.07072689598226, "mean_abs_error_last_25": 423.8587506532316, "mean_abs_error_last_50": 478.1371991957778, "mean_pred_prob": 0.026032898318953814, "mean_pred_prob_last_10": 0.14268898649606854, "mean_pred_prob_last_25": 0.0770018967334181, "mean_pred_prob_last_50": 0.045526557846460494, "mean_token_accuracy": 0.866157591342926, "step": 4020 }, { "epoch": 0.07164062361118519, "grad_norm": 0.9523969850657539, "learning_rate": 0.0001, "loss": 1.1886, "mean_abs_error": 1213.5533606672993, "mean_abs_error_last_10": 719.1269064649662, "mean_abs_error_last_25": 791.2877361277357, "mean_abs_error_last_50": 922.8486289147015, "mean_pred_prob": 0.026939987123478205, "mean_pred_prob_last_10": 0.154243815211521, "mean_pred_prob_last_25": 0.08027182509395062, "mean_pred_prob_last_50": 0.046944390107819346, "mean_token_accuracy": 0.8822877943515778, "step": 4030 }, { "epoch": 0.07181839190798713, "grad_norm": 1.509152749245575, "learning_rate": 0.0001, "loss": 1.1652, "mean_abs_error": 651.8298953760177, "mean_abs_error_last_10": 176.8510372769492, "mean_abs_error_last_25": 234.8573071258067, "mean_abs_error_last_50": 397.5331244661485, "mean_pred_prob": 0.019992628257023172, "mean_pred_prob_last_10": 0.1128181344596669, "mean_pred_prob_last_25": 0.060023970319889486, "mean_pred_prob_last_50": 0.03500251100631431, "mean_token_accuracy": 0.8814369976520539, "step": 4040 }, { "epoch": 0.07199616020478908, "grad_norm": 1.9711475457559966, "learning_rate": 0.0001, "loss": 1.2013, "mean_abs_error": 455.1458693587832, "mean_abs_error_last_10": 137.79011955975201, "mean_abs_error_last_25": 149.62519924778795, "mean_abs_error_last_50": 212.31363712504435, "mean_pred_prob": 0.022916784859262407, "mean_pred_prob_last_10": 0.12698386907577514, "mean_pred_prob_last_25": 0.06791546689346432, "mean_pred_prob_last_50": 0.040377250593155625, "mean_token_accuracy": 0.8680610716342926, "step": 4050 }, { "epoch": 0.07217392850159103, "grad_norm": 2.342008328734572, "learning_rate": 0.0001, "loss": 1.144, "mean_abs_error": 339.11084167373394, "mean_abs_error_last_10": 123.59730282758385, "mean_abs_error_last_25": 132.43946304342057, "mean_abs_error_last_50": 211.49706471168793, "mean_pred_prob": 0.026809937274083494, "mean_pred_prob_last_10": 0.1515102492645383, "mean_pred_prob_last_25": 0.07991720521822572, "mean_pred_prob_last_50": 0.04699794789776206, "mean_token_accuracy": 0.8683563232421875, "step": 4060 }, { "epoch": 0.07235169679839297, "grad_norm": 1.6141054394014283, "learning_rate": 0.0001, "loss": 1.1559, "mean_abs_error": 469.2826188844997, "mean_abs_error_last_10": 145.14009373545915, "mean_abs_error_last_25": 160.25228777769922, "mean_abs_error_last_50": 253.23635717071753, "mean_pred_prob": 0.020500853937119246, "mean_pred_prob_last_10": 0.11328548192977905, "mean_pred_prob_last_25": 0.06088420264422893, "mean_pred_prob_last_50": 0.0359989897813648, "mean_token_accuracy": 0.8778667986392975, "step": 4070 }, { "epoch": 0.07252946509519492, "grad_norm": 1.2842510588226372, "learning_rate": 0.0001, "loss": 1.2029, "mean_abs_error": 862.5262480252961, "mean_abs_error_last_10": 486.34913321700844, "mean_abs_error_last_25": 554.7975801054014, "mean_abs_error_last_50": 640.8205137059961, "mean_pred_prob": 0.0327959094021935, "mean_pred_prob_last_10": 0.17856922212522477, "mean_pred_prob_last_25": 0.09620493795955554, "mean_pred_prob_last_50": 0.05718062432424631, "mean_token_accuracy": 0.8776798069477081, "step": 4080 }, { "epoch": 0.07270723339199688, "grad_norm": 2.207051928705425, "learning_rate": 0.0001, "loss": 1.1818, "mean_abs_error": 347.56624562338794, "mean_abs_error_last_10": 128.33196599143338, "mean_abs_error_last_25": 154.85334066097124, "mean_abs_error_last_50": 196.80116711412802, "mean_pred_prob": 0.03472067511174828, "mean_pred_prob_last_10": 0.17300507612526417, "mean_pred_prob_last_25": 0.09868194479495287, "mean_pred_prob_last_50": 0.059642641618847844, "mean_token_accuracy": 0.8675441563129425, "step": 4090 }, { "epoch": 0.07288500168879881, "grad_norm": 1.4327271026204969, "learning_rate": 0.0001, "loss": 1.1917, "mean_abs_error": 470.36258910253184, "mean_abs_error_last_10": 174.50170450102672, "mean_abs_error_last_25": 230.08806640044676, "mean_abs_error_last_50": 279.25431524384527, "mean_pred_prob": 0.037146425410173836, "mean_pred_prob_last_10": 0.1988143515191041, "mean_pred_prob_last_25": 0.10872627347125671, "mean_pred_prob_last_50": 0.0645954989711754, "mean_token_accuracy": 0.881640475988388, "step": 4100 }, { "epoch": 0.07306276998560077, "grad_norm": 3.2458076797348627, "learning_rate": 0.0001, "loss": 1.1895, "mean_abs_error": 434.5128152663443, "mean_abs_error_last_10": 102.57989979571137, "mean_abs_error_last_25": 147.58933527335628, "mean_abs_error_last_50": 247.26131048066054, "mean_pred_prob": 0.022483618254773317, "mean_pred_prob_last_10": 0.13161000087857247, "mean_pred_prob_last_25": 0.06768071521073579, "mean_pred_prob_last_50": 0.03934562774375081, "mean_token_accuracy": 0.8756135702133179, "step": 4110 }, { "epoch": 0.07324053828240272, "grad_norm": 1.9297465133895726, "learning_rate": 0.0001, "loss": 1.1173, "mean_abs_error": 431.42255337961376, "mean_abs_error_last_10": 106.39740761868661, "mean_abs_error_last_25": 140.058989564446, "mean_abs_error_last_50": 229.42534091761573, "mean_pred_prob": 0.0362959504884202, "mean_pred_prob_last_10": 0.19217998014064505, "mean_pred_prob_last_25": 0.10446523709688335, "mean_pred_prob_last_50": 0.06292781644733622, "mean_token_accuracy": 0.880069786310196, "step": 4120 }, { "epoch": 0.07341830657920466, "grad_norm": 1.0898770324314595, "learning_rate": 0.0001, "loss": 1.276, "mean_abs_error": 1098.1706291782264, "mean_abs_error_last_10": 619.828519881165, "mean_abs_error_last_25": 694.1185729661358, "mean_abs_error_last_50": 850.5567605989036, "mean_pred_prob": 0.021214373923430684, "mean_pred_prob_last_10": 0.12627066084824037, "mean_pred_prob_last_25": 0.06429401076893555, "mean_pred_prob_last_50": 0.037520193375530655, "mean_token_accuracy": 0.858642965555191, "step": 4130 }, { "epoch": 0.07359607487600661, "grad_norm": 1.3341222902196574, "learning_rate": 0.0001, "loss": 1.1624, "mean_abs_error": 655.33790228991, "mean_abs_error_last_10": 315.4224313071635, "mean_abs_error_last_25": 404.83487822577825, "mean_abs_error_last_50": 489.5411319707688, "mean_pred_prob": 0.034867659452720544, "mean_pred_prob_last_10": 0.18842223463580013, "mean_pred_prob_last_25": 0.10096227079629898, "mean_pred_prob_last_50": 0.0602427868230734, "mean_token_accuracy": 0.8734652161598205, "step": 4140 }, { "epoch": 0.07377384317280856, "grad_norm": 2.19568664628252, "learning_rate": 0.0001, "loss": 1.1556, "mean_abs_error": 208.09364913968275, "mean_abs_error_last_10": 147.733748840016, "mean_abs_error_last_25": 157.3096080861279, "mean_abs_error_last_50": 142.55261646464646, "mean_pred_prob": 0.02567280037328601, "mean_pred_prob_last_10": 0.15099987890571356, "mean_pred_prob_last_25": 0.07782887862995266, "mean_pred_prob_last_50": 0.04535002708435058, "mean_token_accuracy": 0.8743096470832825, "step": 4150 }, { "epoch": 0.0739516114696105, "grad_norm": 0.8507702616391266, "learning_rate": 0.0001, "loss": 1.205, "mean_abs_error": 680.995684459682, "mean_abs_error_last_10": 203.39042452067264, "mean_abs_error_last_25": 210.78173763751823, "mean_abs_error_last_50": 310.09635043473554, "mean_pred_prob": 0.023413407523185013, "mean_pred_prob_last_10": 0.1323966646566987, "mean_pred_prob_last_25": 0.07042449787259102, "mean_pred_prob_last_50": 0.041219582222402094, "mean_token_accuracy": 0.87879678606987, "step": 4160 }, { "epoch": 0.07412937976641246, "grad_norm": 2.395098497088867, "learning_rate": 0.0001, "loss": 1.2464, "mean_abs_error": 472.80717941954873, "mean_abs_error_last_10": 54.92171300258029, "mean_abs_error_last_25": 112.33537242698837, "mean_abs_error_last_50": 234.37927127154126, "mean_pred_prob": 0.03457570879836567, "mean_pred_prob_last_10": 0.1896958102704957, "mean_pred_prob_last_25": 0.10323862334480509, "mean_pred_prob_last_50": 0.06037073259940371, "mean_token_accuracy": 0.8675505876541137, "step": 4170 }, { "epoch": 0.07430714806321441, "grad_norm": 1.1067054895977926, "learning_rate": 0.0001, "loss": 1.1748, "mean_abs_error": 517.5486470952603, "mean_abs_error_last_10": 291.49811166074915, "mean_abs_error_last_25": 284.00556810344256, "mean_abs_error_last_50": 335.71013089171606, "mean_pred_prob": 0.022848860267549752, "mean_pred_prob_last_10": 0.1256267341086641, "mean_pred_prob_last_25": 0.0669925341499038, "mean_pred_prob_last_50": 0.03986957767046988, "mean_token_accuracy": 0.8719140827655792, "step": 4180 }, { "epoch": 0.07448491636001635, "grad_norm": 1.4489912313393127, "learning_rate": 0.0001, "loss": 1.1665, "mean_abs_error": 185.1968363069155, "mean_abs_error_last_10": 62.61007020930849, "mean_abs_error_last_25": 65.01547304168405, "mean_abs_error_last_50": 92.02002387850021, "mean_pred_prob": 0.04401418105699122, "mean_pred_prob_last_10": 0.2255860071629286, "mean_pred_prob_last_25": 0.12604550197720527, "mean_pred_prob_last_50": 0.07633684929460287, "mean_token_accuracy": 0.8730107367038726, "step": 4190 }, { "epoch": 0.0746626846568183, "grad_norm": 1.8563581884138318, "learning_rate": 0.0001, "loss": 1.1789, "mean_abs_error": 1538.3840319042076, "mean_abs_error_last_10": 465.9181680194221, "mean_abs_error_last_25": 596.760102878138, "mean_abs_error_last_50": 860.5391160669884, "mean_pred_prob": 0.01835979644965846, "mean_pred_prob_last_10": 0.09666890510707163, "mean_pred_prob_last_25": 0.053430907160509376, "mean_pred_prob_last_50": 0.032072893396252765, "mean_token_accuracy": 0.8715944647789001, "step": 4200 }, { "epoch": 0.07484045295362025, "grad_norm": 1.6299950021613623, "learning_rate": 0.0001, "loss": 1.1223, "mean_abs_error": 487.6678740859684, "mean_abs_error_last_10": 215.2954442711225, "mean_abs_error_last_25": 202.7275689808368, "mean_abs_error_last_50": 261.47805775478963, "mean_pred_prob": 0.020205732120666654, "mean_pred_prob_last_10": 0.11671465320978314, "mean_pred_prob_last_25": 0.06165433550486341, "mean_pred_prob_last_50": 0.035780783102381974, "mean_token_accuracy": 0.8760673761367798, "step": 4210 }, { "epoch": 0.07501822125042219, "grad_norm": 1.572052292266283, "learning_rate": 0.0001, "loss": 1.2084, "mean_abs_error": 1202.1590842644368, "mean_abs_error_last_10": 635.8426490603989, "mean_abs_error_last_25": 714.714693422517, "mean_abs_error_last_50": 888.5160267812855, "mean_pred_prob": 0.018467748364491852, "mean_pred_prob_last_10": 0.10214482808951289, "mean_pred_prob_last_25": 0.05427017527545104, "mean_pred_prob_last_50": 0.03205666333233239, "mean_token_accuracy": 0.8772783160209656, "step": 4220 }, { "epoch": 0.07519598954722415, "grad_norm": 0.9829401537946421, "learning_rate": 0.0001, "loss": 1.0902, "mean_abs_error": 576.0862927856742, "mean_abs_error_last_10": 195.36330367577273, "mean_abs_error_last_25": 256.64341254033576, "mean_abs_error_last_50": 360.6205072389352, "mean_pred_prob": 0.026982151824631727, "mean_pred_prob_last_10": 0.1430188341648318, "mean_pred_prob_last_25": 0.07826801720657386, "mean_pred_prob_last_50": 0.04674641111632809, "mean_token_accuracy": 0.8780793964862823, "step": 4230 }, { "epoch": 0.0753737578440261, "grad_norm": 1.3033067618723695, "learning_rate": 0.0001, "loss": 1.1622, "mean_abs_error": 845.5244001194066, "mean_abs_error_last_10": 406.8998456969267, "mean_abs_error_last_25": 494.4186547303424, "mean_abs_error_last_50": 596.0228160590726, "mean_pred_prob": 0.03172378012532136, "mean_pred_prob_last_10": 0.17145992656296585, "mean_pred_prob_last_25": 0.09197402700665407, "mean_pred_prob_last_50": 0.05502157370647183, "mean_token_accuracy": 0.8736338198184967, "step": 4240 }, { "epoch": 0.07555152614082805, "grad_norm": 1.8754949527965494, "learning_rate": 0.0001, "loss": 1.1181, "mean_abs_error": 719.2088184794073, "mean_abs_error_last_10": 343.7923368099765, "mean_abs_error_last_25": 436.3800614329122, "mean_abs_error_last_50": 535.7565828449078, "mean_pred_prob": 0.019976888969540595, "mean_pred_prob_last_10": 0.113464511372149, "mean_pred_prob_last_25": 0.06081867450848222, "mean_pred_prob_last_50": 0.035435298830270766, "mean_token_accuracy": 0.8764713048934937, "step": 4250 }, { "epoch": 0.07572929443762999, "grad_norm": 0.827233785966465, "learning_rate": 0.0001, "loss": 1.1694, "mean_abs_error": 1140.2665846514617, "mean_abs_error_last_10": 520.5236122853024, "mean_abs_error_last_25": 735.2405723437917, "mean_abs_error_last_50": 891.8921890608271, "mean_pred_prob": 0.025925544445635752, "mean_pred_prob_last_10": 0.1427337136148708, "mean_pred_prob_last_25": 0.07717757464270107, "mean_pred_prob_last_50": 0.045487412840884645, "mean_token_accuracy": 0.8680379271507264, "step": 4260 }, { "epoch": 0.07590706273443194, "grad_norm": 0.9652557078824074, "learning_rate": 0.0001, "loss": 1.1757, "mean_abs_error": 1183.9152475139529, "mean_abs_error_last_10": 423.24377982390206, "mean_abs_error_last_25": 500.13347976672105, "mean_abs_error_last_50": 721.8337462032106, "mean_pred_prob": 0.013311793384491465, "mean_pred_prob_last_10": 0.07918783606146462, "mean_pred_prob_last_25": 0.041176557209109885, "mean_pred_prob_last_50": 0.023640901420731098, "mean_token_accuracy": 0.8694011867046356, "step": 4270 }, { "epoch": 0.0760848310312339, "grad_norm": 1.9521779352651136, "learning_rate": 0.0001, "loss": 1.1179, "mean_abs_error": 535.4189235323277, "mean_abs_error_last_10": 166.5209849526547, "mean_abs_error_last_25": 240.39169170460454, "mean_abs_error_last_50": 337.6923158833292, "mean_pred_prob": 0.03501258474425413, "mean_pred_prob_last_10": 0.1886465058545582, "mean_pred_prob_last_25": 0.1023931488103699, "mean_pred_prob_last_50": 0.06158954862621613, "mean_token_accuracy": 0.8906591057777404, "step": 4280 }, { "epoch": 0.07626259932803583, "grad_norm": 1.2895325137262816, "learning_rate": 0.0001, "loss": 1.1572, "mean_abs_error": 1781.0976963694588, "mean_abs_error_last_10": 1104.9066486728425, "mean_abs_error_last_25": 1221.8556957510395, "mean_abs_error_last_50": 1380.4250030812648, "mean_pred_prob": 0.022081092302687465, "mean_pred_prob_last_10": 0.11421747676358791, "mean_pred_prob_last_25": 0.06295238838356454, "mean_pred_prob_last_50": 0.03800995857163798, "mean_token_accuracy": 0.8748943030834198, "step": 4290 }, { "epoch": 0.07644036762483779, "grad_norm": 0.7987776610780419, "learning_rate": 0.0001, "loss": 1.1072, "mean_abs_error": 421.20029362659824, "mean_abs_error_last_10": 95.15644497479163, "mean_abs_error_last_25": 112.04962707464469, "mean_abs_error_last_50": 203.9165299767726, "mean_pred_prob": 0.02564018138218671, "mean_pred_prob_last_10": 0.14466752130538224, "mean_pred_prob_last_25": 0.07617585202679038, "mean_pred_prob_last_50": 0.044988796161487696, "mean_token_accuracy": 0.8863897860050202, "step": 4300 }, { "epoch": 0.07661813592163974, "grad_norm": 1.2474458929577203, "learning_rate": 0.0001, "loss": 1.0797, "mean_abs_error": 497.9969507844827, "mean_abs_error_last_10": 155.72110099569193, "mean_abs_error_last_25": 195.20439589144422, "mean_abs_error_last_50": 258.5416965693958, "mean_pred_prob": 0.0277825690805912, "mean_pred_prob_last_10": 0.1561006661504507, "mean_pred_prob_last_25": 0.08350055798655376, "mean_pred_prob_last_50": 0.049044029344804584, "mean_token_accuracy": 0.8745853960514068, "step": 4310 }, { "epoch": 0.07679590421844168, "grad_norm": 2.1652892573251896, "learning_rate": 0.0001, "loss": 1.1895, "mean_abs_error": 373.5016392445844, "mean_abs_error_last_10": 72.03612704600746, "mean_abs_error_last_25": 136.08937892420263, "mean_abs_error_last_50": 221.9127598856233, "mean_pred_prob": 0.03303325757733546, "mean_pred_prob_last_10": 0.17674909438937902, "mean_pred_prob_last_25": 0.09555012602359056, "mean_pred_prob_last_50": 0.05674638206837699, "mean_token_accuracy": 0.8703523814678192, "step": 4320 }, { "epoch": 0.07697367251524363, "grad_norm": 2.0251837105473256, "learning_rate": 0.0001, "loss": 1.1964, "mean_abs_error": 672.2845602072964, "mean_abs_error_last_10": 269.22766201055344, "mean_abs_error_last_25": 312.35103080999204, "mean_abs_error_last_50": 418.69319193959734, "mean_pred_prob": 0.030524588737171142, "mean_pred_prob_last_10": 0.1624662804068066, "mean_pred_prob_last_25": 0.08773882692330517, "mean_pred_prob_last_50": 0.0530379664094653, "mean_token_accuracy": 0.8721356570720673, "step": 4330 }, { "epoch": 0.07715144081204559, "grad_norm": 3.659393101317384, "learning_rate": 0.0001, "loss": 1.1377, "mean_abs_error": 306.29252425092517, "mean_abs_error_last_10": 72.82348486426375, "mean_abs_error_last_25": 93.79714501216107, "mean_abs_error_last_50": 162.76569100093073, "mean_pred_prob": 0.03116602129302919, "mean_pred_prob_last_10": 0.17165471483021974, "mean_pred_prob_last_25": 0.09123256569728255, "mean_pred_prob_last_50": 0.0542080101557076, "mean_token_accuracy": 0.8816818952560425, "step": 4340 }, { "epoch": 0.07732920910884752, "grad_norm": 1.5114449231352858, "learning_rate": 0.0001, "loss": 1.2208, "mean_abs_error": 790.6001334858577, "mean_abs_error_last_10": 203.4324536117911, "mean_abs_error_last_25": 323.39225856666616, "mean_abs_error_last_50": 492.68421244172197, "mean_pred_prob": 0.018070297519443557, "mean_pred_prob_last_10": 0.10234548634616658, "mean_pred_prob_last_25": 0.053513235435821115, "mean_pred_prob_last_50": 0.0316579069243744, "mean_token_accuracy": 0.8587220430374145, "step": 4350 }, { "epoch": 0.07750697740564948, "grad_norm": 0.987753433279375, "learning_rate": 0.0001, "loss": 1.14, "mean_abs_error": 245.06358951038933, "mean_abs_error_last_10": 58.20572121440468, "mean_abs_error_last_25": 69.91330770154974, "mean_abs_error_last_50": 117.2026168961969, "mean_pred_prob": 0.036003915453329684, "mean_pred_prob_last_10": 0.19721438623964788, "mean_pred_prob_last_25": 0.10535328797996044, "mean_pred_prob_last_50": 0.06283705406822264, "mean_token_accuracy": 0.8713413894176483, "step": 4360 }, { "epoch": 0.07768474570245143, "grad_norm": 1.0639901401053349, "learning_rate": 0.0001, "loss": 1.2141, "mean_abs_error": 386.4197684597343, "mean_abs_error_last_10": 72.05040942842467, "mean_abs_error_last_25": 160.64157216628263, "mean_abs_error_last_50": 291.2760652379346, "mean_pred_prob": 0.02403686214238405, "mean_pred_prob_last_10": 0.135432181507349, "mean_pred_prob_last_25": 0.0717684868723154, "mean_pred_prob_last_50": 0.042165350448340175, "mean_token_accuracy": 0.8719731509685517, "step": 4370 }, { "epoch": 0.07786251399925337, "grad_norm": 1.9223151628933473, "learning_rate": 0.0001, "loss": 1.1845, "mean_abs_error": 1898.3534713942536, "mean_abs_error_last_10": 959.4361668348016, "mean_abs_error_last_25": 1114.3497996997455, "mean_abs_error_last_50": 1363.3756694286308, "mean_pred_prob": 0.020249983458779753, "mean_pred_prob_last_10": 0.10763895531417803, "mean_pred_prob_last_25": 0.05860283411893761, "mean_pred_prob_last_50": 0.03506890751741594, "mean_token_accuracy": 0.8635900855064392, "step": 4380 }, { "epoch": 0.07804028229605532, "grad_norm": 0.8557791356543931, "learning_rate": 0.0001, "loss": 1.0605, "mean_abs_error": 92.63283145838398, "mean_abs_error_last_10": 17.98666433479884, "mean_abs_error_last_25": 32.024544504910935, "mean_abs_error_last_50": 56.56994591045177, "mean_pred_prob": 0.04539233734831214, "mean_pred_prob_last_10": 0.22884779870510102, "mean_pred_prob_last_25": 0.12839478626847267, "mean_pred_prob_last_50": 0.07781665176153182, "mean_token_accuracy": 0.8740814507007599, "step": 4390 }, { "epoch": 0.07821805059285727, "grad_norm": 0.7689762091815079, "learning_rate": 0.0001, "loss": 1.1631, "mean_abs_error": 580.937602010922, "mean_abs_error_last_10": 296.65562581588506, "mean_abs_error_last_25": 325.76096615151096, "mean_abs_error_last_50": 356.891175208986, "mean_pred_prob": 0.02428022609092295, "mean_pred_prob_last_10": 0.12925268560647965, "mean_pred_prob_last_25": 0.07061897879466414, "mean_pred_prob_last_50": 0.042150520533323285, "mean_token_accuracy": 0.8791366755962372, "step": 4400 }, { "epoch": 0.07839581888965921, "grad_norm": 2.256898043583154, "learning_rate": 0.0001, "loss": 1.0956, "mean_abs_error": 291.30268462635553, "mean_abs_error_last_10": 44.17887458791937, "mean_abs_error_last_25": 75.25848983239592, "mean_abs_error_last_50": 137.83819874651195, "mean_pred_prob": 0.031697860127314924, "mean_pred_prob_last_10": 0.17430235594511032, "mean_pred_prob_last_25": 0.09339970573782921, "mean_pred_prob_last_50": 0.05542269665747881, "mean_token_accuracy": 0.8809987783432007, "step": 4410 }, { "epoch": 0.07857358718646117, "grad_norm": 1.1255774783664958, "learning_rate": 0.0001, "loss": 1.0933, "mean_abs_error": 380.9808350042674, "mean_abs_error_last_10": 99.4445793070727, "mean_abs_error_last_25": 116.02583546090038, "mean_abs_error_last_50": 205.53971386495112, "mean_pred_prob": 0.02385189151391387, "mean_pred_prob_last_10": 0.1403885878622532, "mean_pred_prob_last_25": 0.07253521103411913, "mean_pred_prob_last_50": 0.04216720210388303, "mean_token_accuracy": 0.8887575447559357, "step": 4420 }, { "epoch": 0.07875135548326312, "grad_norm": 2.0801369755226684, "learning_rate": 0.0001, "loss": 1.093, "mean_abs_error": 315.9131809404167, "mean_abs_error_last_10": 36.82140208187841, "mean_abs_error_last_25": 67.67734537371561, "mean_abs_error_last_50": 135.1956338310612, "mean_pred_prob": 0.03981845923699438, "mean_pred_prob_last_10": 0.19924406036734582, "mean_pred_prob_last_25": 0.1126536566996947, "mean_pred_prob_last_50": 0.06821918268688024, "mean_token_accuracy": 0.8856956362724304, "step": 4430 }, { "epoch": 0.07892912378006506, "grad_norm": 1.785186886753354, "learning_rate": 0.0001, "loss": 1.179, "mean_abs_error": 451.0052947970227, "mean_abs_error_last_10": 138.34987491278463, "mean_abs_error_last_25": 158.10121483322678, "mean_abs_error_last_50": 223.8533566127293, "mean_pred_prob": 0.02884667501784861, "mean_pred_prob_last_10": 0.1620663657784462, "mean_pred_prob_last_25": 0.08609420750290156, "mean_pred_prob_last_50": 0.050529098603874445, "mean_token_accuracy": 0.8671370387077332, "step": 4440 }, { "epoch": 0.07910689207686701, "grad_norm": 1.4628200151183466, "learning_rate": 0.0001, "loss": 1.2208, "mean_abs_error": 1245.7047783436756, "mean_abs_error_last_10": 397.0847755081337, "mean_abs_error_last_25": 562.4044273338595, "mean_abs_error_last_50": 787.5826398043021, "mean_pred_prob": 0.012321741998312064, "mean_pred_prob_last_10": 0.07412047928664833, "mean_pred_prob_last_25": 0.037344701297115535, "mean_pred_prob_last_50": 0.021788431314053013, "mean_token_accuracy": 0.8700671374797821, "step": 4450 }, { "epoch": 0.07928466037366896, "grad_norm": 1.6203133569476142, "learning_rate": 0.0001, "loss": 1.1154, "mean_abs_error": 584.5730941132268, "mean_abs_error_last_10": 90.14481771339891, "mean_abs_error_last_25": 150.47911145511557, "mean_abs_error_last_50": 283.0307052730582, "mean_pred_prob": 0.033398421696620065, "mean_pred_prob_last_10": 0.17514825828839092, "mean_pred_prob_last_25": 0.09601448532193899, "mean_pred_prob_last_50": 0.0576276978245005, "mean_token_accuracy": 0.8772350132465363, "step": 4460 }, { "epoch": 0.0794624286704709, "grad_norm": 2.5471991580103936, "learning_rate": 0.0001, "loss": 1.1876, "mean_abs_error": 399.6371798138677, "mean_abs_error_last_10": 130.50879087915388, "mean_abs_error_last_25": 157.23778778868763, "mean_abs_error_last_50": 255.19118921773907, "mean_pred_prob": 0.02333537315716967, "mean_pred_prob_last_10": 0.13705364535562695, "mean_pred_prob_last_25": 0.07073911267798394, "mean_pred_prob_last_50": 0.040794428228400646, "mean_token_accuracy": 0.8762804806232453, "step": 4470 }, { "epoch": 0.07964019696727286, "grad_norm": 1.0799645032689453, "learning_rate": 0.0001, "loss": 1.1697, "mean_abs_error": 1077.1544566189846, "mean_abs_error_last_10": 651.1637813753655, "mean_abs_error_last_25": 720.3190728454867, "mean_abs_error_last_50": 848.6500562719688, "mean_pred_prob": 0.021868107456248252, "mean_pred_prob_last_10": 0.12501543686375954, "mean_pred_prob_last_25": 0.06401626170700184, "mean_pred_prob_last_50": 0.03792732822330436, "mean_token_accuracy": 0.8663794040679932, "step": 4480 }, { "epoch": 0.07981796526407481, "grad_norm": 1.0435147509671392, "learning_rate": 0.0001, "loss": 1.1218, "mean_abs_error": 653.5274795415941, "mean_abs_error_last_10": 150.03494645614282, "mean_abs_error_last_25": 246.33100907825283, "mean_abs_error_last_50": 334.1847084385137, "mean_pred_prob": 0.024930040317121892, "mean_pred_prob_last_10": 0.1315264471806586, "mean_pred_prob_last_25": 0.07101414029020817, "mean_pred_prob_last_50": 0.0428305008681491, "mean_token_accuracy": 0.878891521692276, "step": 4490 }, { "epoch": 0.07999573356087675, "grad_norm": 1.8473810752367048, "learning_rate": 0.0001, "loss": 1.1933, "mean_abs_error": 527.1303404416683, "mean_abs_error_last_10": 130.65610429392984, "mean_abs_error_last_25": 163.28875465034037, "mean_abs_error_last_50": 273.19205966722126, "mean_pred_prob": 0.017530764266848565, "mean_pred_prob_last_10": 0.10358123835176229, "mean_pred_prob_last_25": 0.053521494194865224, "mean_pred_prob_last_50": 0.031008394621312618, "mean_token_accuracy": 0.8720980226993561, "step": 4500 }, { "epoch": 0.0801735018576787, "grad_norm": 2.179933830043799, "learning_rate": 0.0001, "loss": 1.206, "mean_abs_error": 519.3058041368752, "mean_abs_error_last_10": 219.49614027124852, "mean_abs_error_last_25": 363.7855373910528, "mean_abs_error_last_50": 397.17889244938203, "mean_pred_prob": 0.020864889747463165, "mean_pred_prob_last_10": 0.11629261374473572, "mean_pred_prob_last_25": 0.06103082452900708, "mean_pred_prob_last_50": 0.03634977634064853, "mean_token_accuracy": 0.8782523036003113, "step": 4510 }, { "epoch": 0.08035127015448065, "grad_norm": 1.1084586013319297, "learning_rate": 0.0001, "loss": 1.1202, "mean_abs_error": 557.1180693266559, "mean_abs_error_last_10": 95.64144423947043, "mean_abs_error_last_25": 157.93006606622617, "mean_abs_error_last_50": 275.4826096640057, "mean_pred_prob": 0.036476642001071015, "mean_pred_prob_last_10": 0.19759108260041103, "mean_pred_prob_last_25": 0.10627184470649809, "mean_pred_prob_last_50": 0.06333668705774471, "mean_token_accuracy": 0.8735647618770599, "step": 4520 }, { "epoch": 0.08052903845128259, "grad_norm": 1.7568514788774363, "learning_rate": 0.0001, "loss": 1.1148, "mean_abs_error": 320.0210797962282, "mean_abs_error_last_10": 63.27515885080197, "mean_abs_error_last_25": 98.43214670446, "mean_abs_error_last_50": 154.61437116049223, "mean_pred_prob": 0.022695653699338438, "mean_pred_prob_last_10": 0.1253625564277172, "mean_pred_prob_last_25": 0.06758050750941039, "mean_pred_prob_last_50": 0.039685890171676876, "mean_token_accuracy": 0.8789180934429168, "step": 4530 }, { "epoch": 0.08070680674808454, "grad_norm": 1.6378940650481435, "learning_rate": 0.0001, "loss": 1.1291, "mean_abs_error": 501.0571481353142, "mean_abs_error_last_10": 259.2317127921055, "mean_abs_error_last_25": 326.12184921390025, "mean_abs_error_last_50": 357.12765022670027, "mean_pred_prob": 0.027842614240944384, "mean_pred_prob_last_10": 0.15363050103187562, "mean_pred_prob_last_25": 0.08217059737071394, "mean_pred_prob_last_50": 0.048866494465619324, "mean_token_accuracy": 0.8758965015411377, "step": 4540 }, { "epoch": 0.0808845750448865, "grad_norm": 1.001030987346583, "learning_rate": 0.0001, "loss": 1.1289, "mean_abs_error": 364.45693729066414, "mean_abs_error_last_10": 172.4951197051069, "mean_abs_error_last_25": 152.09726819351977, "mean_abs_error_last_50": 228.11033020759277, "mean_pred_prob": 0.01974961464293301, "mean_pred_prob_last_10": 0.10398914068937301, "mean_pred_prob_last_25": 0.05702729085460305, "mean_pred_prob_last_50": 0.03443374549970031, "mean_token_accuracy": 0.8716545879840851, "step": 4550 }, { "epoch": 0.08106234334168844, "grad_norm": 1.3063870239892557, "learning_rate": 0.0001, "loss": 1.1217, "mean_abs_error": 417.84551719699675, "mean_abs_error_last_10": 111.65218224002017, "mean_abs_error_last_25": 121.67433229256642, "mean_abs_error_last_50": 199.0504472584806, "mean_pred_prob": 0.024286931729875505, "mean_pred_prob_last_10": 0.13684701845049857, "mean_pred_prob_last_25": 0.07225313046947121, "mean_pred_prob_last_50": 0.042561266524717214, "mean_token_accuracy": 0.8811209738254547, "step": 4560 }, { "epoch": 0.08124011163849039, "grad_norm": 1.6701605705785818, "learning_rate": 0.0001, "loss": 1.0941, "mean_abs_error": 499.8955734565919, "mean_abs_error_last_10": 190.11358276535697, "mean_abs_error_last_25": 192.287095378346, "mean_abs_error_last_50": 250.6757069765944, "mean_pred_prob": 0.020960092125460506, "mean_pred_prob_last_10": 0.11956835780292749, "mean_pred_prob_last_25": 0.06338954158127308, "mean_pred_prob_last_50": 0.037275431538000704, "mean_token_accuracy": 0.8812853455543518, "step": 4570 }, { "epoch": 0.08141787993529234, "grad_norm": 2.176983049360389, "learning_rate": 0.0001, "loss": 1.1724, "mean_abs_error": 725.1251405404342, "mean_abs_error_last_10": 112.53139292079638, "mean_abs_error_last_25": 218.1821451136379, "mean_abs_error_last_50": 380.1484493467985, "mean_pred_prob": 0.0274053574539721, "mean_pred_prob_last_10": 0.14317938347812742, "mean_pred_prob_last_25": 0.0787741586798802, "mean_pred_prob_last_50": 0.047377073741517964, "mean_token_accuracy": 0.8724276185035705, "step": 4580 }, { "epoch": 0.0815956482320943, "grad_norm": 2.634450305804368, "learning_rate": 0.0001, "loss": 1.173, "mean_abs_error": 676.9043464040917, "mean_abs_error_last_10": 151.39951458655307, "mean_abs_error_last_25": 244.07237100594617, "mean_abs_error_last_50": 387.81711073057056, "mean_pred_prob": 0.022616291511803867, "mean_pred_prob_last_10": 0.12464961297810077, "mean_pred_prob_last_25": 0.06660362733528018, "mean_pred_prob_last_50": 0.03968148659914732, "mean_token_accuracy": 0.8669964134693146, "step": 4590 }, { "epoch": 0.08177341652889623, "grad_norm": 1.0933072385601108, "learning_rate": 0.0001, "loss": 1.136, "mean_abs_error": 1265.1041573447515, "mean_abs_error_last_10": 594.7899738546333, "mean_abs_error_last_25": 830.9211540519634, "mean_abs_error_last_50": 981.7912107997021, "mean_pred_prob": 0.016078223325894214, "mean_pred_prob_last_10": 0.08546753249247559, "mean_pred_prob_last_25": 0.046448332472937184, "mean_pred_prob_last_50": 0.027800078957807273, "mean_token_accuracy": 0.8679300427436829, "step": 4600 }, { "epoch": 0.08195118482569819, "grad_norm": 1.3737044796522244, "learning_rate": 0.0001, "loss": 1.1454, "mean_abs_error": 620.8521238216979, "mean_abs_error_last_10": 210.41369916637467, "mean_abs_error_last_25": 278.08159422132314, "mean_abs_error_last_50": 373.84833015196017, "mean_pred_prob": 0.026314243039814757, "mean_pred_prob_last_10": 0.14109335843822918, "mean_pred_prob_last_25": 0.07748473469982856, "mean_pred_prob_last_50": 0.04591952351038344, "mean_token_accuracy": 0.8753147721290588, "step": 4610 }, { "epoch": 0.08212895312250014, "grad_norm": 0.9162072365283718, "learning_rate": 0.0001, "loss": 1.1768, "mean_abs_error": 1088.4778971058943, "mean_abs_error_last_10": 309.1710230487196, "mean_abs_error_last_25": 427.11654045515115, "mean_abs_error_last_50": 625.7557257618571, "mean_pred_prob": 0.017663751280633733, "mean_pred_prob_last_10": 0.08540576547384263, "mean_pred_prob_last_25": 0.04821080880356021, "mean_pred_prob_last_50": 0.030020778253674506, "mean_token_accuracy": 0.8630304336547852, "step": 4620 }, { "epoch": 0.08230672141930208, "grad_norm": 1.3966895908795323, "learning_rate": 0.0001, "loss": 1.2097, "mean_abs_error": 694.8698779681625, "mean_abs_error_last_10": 184.3675717096708, "mean_abs_error_last_25": 257.06203893339034, "mean_abs_error_last_50": 401.816423346619, "mean_pred_prob": 0.019716537394560873, "mean_pred_prob_last_10": 0.11100318963872269, "mean_pred_prob_last_25": 0.05897953126113862, "mean_pred_prob_last_50": 0.0348586872976739, "mean_token_accuracy": 0.8706077098846435, "step": 4630 }, { "epoch": 0.08248448971610403, "grad_norm": 2.169689787670887, "learning_rate": 0.0001, "loss": 1.115, "mean_abs_error": 425.03459199728366, "mean_abs_error_last_10": 46.51800703227642, "mean_abs_error_last_25": 103.10677816550984, "mean_abs_error_last_50": 216.06604403360043, "mean_pred_prob": 0.022542079258710147, "mean_pred_prob_last_10": 0.13263743929564953, "mean_pred_prob_last_25": 0.06872456837445498, "mean_pred_prob_last_50": 0.03965057604946196, "mean_token_accuracy": 0.8856098771095275, "step": 4640 }, { "epoch": 0.08266225801290598, "grad_norm": 1.3203308423313334, "learning_rate": 0.0001, "loss": 1.0735, "mean_abs_error": 239.62084510158303, "mean_abs_error_last_10": 66.95185860260558, "mean_abs_error_last_25": 88.41787702873, "mean_abs_error_last_50": 118.87513264915773, "mean_pred_prob": 0.030019336938858034, "mean_pred_prob_last_10": 0.16349619943648577, "mean_pred_prob_last_25": 0.08921497566625476, "mean_pred_prob_last_50": 0.05284306118264794, "mean_token_accuracy": 0.8767233908176422, "step": 4650 }, { "epoch": 0.08284002630970792, "grad_norm": 1.6613529421089122, "learning_rate": 0.0001, "loss": 1.1843, "mean_abs_error": 1578.6976077558033, "mean_abs_error_last_10": 829.3683175734477, "mean_abs_error_last_25": 909.6967937211142, "mean_abs_error_last_50": 1112.390916024638, "mean_pred_prob": 0.02328769987798296, "mean_pred_prob_last_10": 0.1290547479991801, "mean_pred_prob_last_25": 0.06880304382939358, "mean_pred_prob_last_50": 0.04076677187113091, "mean_token_accuracy": 0.8594026386737823, "step": 4660 }, { "epoch": 0.08301779460650988, "grad_norm": 1.9953254479460276, "learning_rate": 0.0001, "loss": 1.1344, "mean_abs_error": 704.2391162576268, "mean_abs_error_last_10": 363.96062525319314, "mean_abs_error_last_25": 361.9328150659527, "mean_abs_error_last_50": 472.18084315594325, "mean_pred_prob": 0.01518051878665574, "mean_pred_prob_last_10": 0.08497128753224388, "mean_pred_prob_last_25": 0.045286302198655905, "mean_pred_prob_last_50": 0.02660888565587811, "mean_token_accuracy": 0.8732579350471497, "step": 4670 }, { "epoch": 0.08319556290331183, "grad_norm": 0.8715553409721309, "learning_rate": 0.0001, "loss": 1.201, "mean_abs_error": 336.425450851053, "mean_abs_error_last_10": 90.9584247535667, "mean_abs_error_last_25": 107.0765267994379, "mean_abs_error_last_50": 159.49724926905785, "mean_pred_prob": 0.02599735534749925, "mean_pred_prob_last_10": 0.1447384048253298, "mean_pred_prob_last_25": 0.07764406315982342, "mean_pred_prob_last_50": 0.04586004065349698, "mean_token_accuracy": 0.8721972048282624, "step": 4680 }, { "epoch": 0.08337333120011377, "grad_norm": 2.230341664915398, "learning_rate": 0.0001, "loss": 1.1119, "mean_abs_error": 684.7820915532868, "mean_abs_error_last_10": 348.7659090735649, "mean_abs_error_last_25": 389.1705595965511, "mean_abs_error_last_50": 459.06377810602, "mean_pred_prob": 0.019648883235640825, "mean_pred_prob_last_10": 0.11445504107978195, "mean_pred_prob_last_25": 0.0588826454069931, "mean_pred_prob_last_50": 0.0343939614132978, "mean_token_accuracy": 0.8779527723789216, "step": 4690 }, { "epoch": 0.08355109949691572, "grad_norm": 2.6507223729907543, "learning_rate": 0.0001, "loss": 1.1973, "mean_abs_error": 200.3018969187296, "mean_abs_error_last_10": 60.07426735665977, "mean_abs_error_last_25": 73.59178862122262, "mean_abs_error_last_50": 116.65412557021322, "mean_pred_prob": 0.029222078574821352, "mean_pred_prob_last_10": 0.15980057902634143, "mean_pred_prob_last_25": 0.08612691927701235, "mean_pred_prob_last_50": 0.05083234310150146, "mean_token_accuracy": 0.8789471805095672, "step": 4700 }, { "epoch": 0.08372886779371767, "grad_norm": 1.6663410854230594, "learning_rate": 0.0001, "loss": 1.1117, "mean_abs_error": 639.0105671503982, "mean_abs_error_last_10": 213.36279661122109, "mean_abs_error_last_25": 248.7626692986727, "mean_abs_error_last_50": 362.9483377397936, "mean_pred_prob": 0.02971134044928476, "mean_pred_prob_last_10": 0.1492597372736782, "mean_pred_prob_last_25": 0.08420616743387654, "mean_pred_prob_last_50": 0.05087348538218066, "mean_token_accuracy": 0.8753738820552825, "step": 4710 }, { "epoch": 0.08390663609051961, "grad_norm": 1.4389150663253656, "learning_rate": 0.0001, "loss": 1.0718, "mean_abs_error": 248.93649513258129, "mean_abs_error_last_10": 132.14447824641144, "mean_abs_error_last_25": 135.3150923053651, "mean_abs_error_last_50": 162.72401455704988, "mean_pred_prob": 0.03229434108361602, "mean_pred_prob_last_10": 0.16694738939404488, "mean_pred_prob_last_25": 0.09177652411162854, "mean_pred_prob_last_50": 0.05537205934524536, "mean_token_accuracy": 0.870566314458847, "step": 4720 }, { "epoch": 0.08408440438732157, "grad_norm": 3.3136553663632022, "learning_rate": 0.0001, "loss": 1.1543, "mean_abs_error": 847.4624022157419, "mean_abs_error_last_10": 336.4431904917882, "mean_abs_error_last_25": 420.33595984427467, "mean_abs_error_last_50": 592.0113478536703, "mean_pred_prob": 0.025601463983184657, "mean_pred_prob_last_10": 0.14109971653670073, "mean_pred_prob_last_25": 0.07580812589731067, "mean_pred_prob_last_50": 0.04473189782002009, "mean_token_accuracy": 0.863091230392456, "step": 4730 }, { "epoch": 0.08426217268412352, "grad_norm": 1.0587258933109884, "learning_rate": 0.0001, "loss": 1.1003, "mean_abs_error": 1085.13038446754, "mean_abs_error_last_10": 472.5387061381255, "mean_abs_error_last_25": 560.0263144893628, "mean_abs_error_last_50": 725.1203200767455, "mean_pred_prob": 0.025375168732716703, "mean_pred_prob_last_10": 0.13507090345665346, "mean_pred_prob_last_25": 0.07431007355626207, "mean_pred_prob_last_50": 0.04415991866844706, "mean_token_accuracy": 0.8704761922359466, "step": 4740 }, { "epoch": 0.08443994098092546, "grad_norm": 1.183483233216599, "learning_rate": 0.0001, "loss": 1.0652, "mean_abs_error": 894.5281808557536, "mean_abs_error_last_10": 399.66436510830783, "mean_abs_error_last_25": 463.1506416007161, "mean_abs_error_last_50": 587.3403393731905, "mean_pred_prob": 0.026105798305070493, "mean_pred_prob_last_10": 0.14368653599522077, "mean_pred_prob_last_25": 0.07656030933139846, "mean_pred_prob_last_50": 0.04544079051411245, "mean_token_accuracy": 0.8669535338878631, "step": 4750 }, { "epoch": 0.08461770927772741, "grad_norm": 0.7874283581406069, "learning_rate": 0.0001, "loss": 1.1192, "mean_abs_error": 287.9416817981472, "mean_abs_error_last_10": 107.43128373458039, "mean_abs_error_last_25": 141.8771861567925, "mean_abs_error_last_50": 190.59667148948165, "mean_pred_prob": 0.03954427766148001, "mean_pred_prob_last_10": 0.19330720357829706, "mean_pred_prob_last_25": 0.11009202826535329, "mean_pred_prob_last_50": 0.06725591992144472, "mean_token_accuracy": 0.8700738906860351, "step": 4760 }, { "epoch": 0.08479547757452936, "grad_norm": 1.3049199642634999, "learning_rate": 0.0001, "loss": 1.0676, "mean_abs_error": 660.6502604895211, "mean_abs_error_last_10": 249.58938607785672, "mean_abs_error_last_25": 317.65628555804716, "mean_abs_error_last_50": 379.53412191084175, "mean_pred_prob": 0.024839676555711777, "mean_pred_prob_last_10": 0.13542982740327716, "mean_pred_prob_last_25": 0.07362072241958231, "mean_pred_prob_last_50": 0.04301793179474771, "mean_token_accuracy": 0.8704898297786713, "step": 4770 }, { "epoch": 0.0849732458713313, "grad_norm": 2.1183140156252, "learning_rate": 0.0001, "loss": 1.1627, "mean_abs_error": 507.32072814198534, "mean_abs_error_last_10": 152.62589781819239, "mean_abs_error_last_25": 226.53624214304537, "mean_abs_error_last_50": 301.68102903865577, "mean_pred_prob": 0.019898136093979703, "mean_pred_prob_last_10": 0.11399123349692672, "mean_pred_prob_last_25": 0.059486678382381794, "mean_pred_prob_last_50": 0.034981568471994254, "mean_token_accuracy": 0.8696892917156219, "step": 4780 }, { "epoch": 0.08515101416813325, "grad_norm": 0.8908865766629958, "learning_rate": 0.0001, "loss": 1.1281, "mean_abs_error": 323.60447007994213, "mean_abs_error_last_10": 303.59287062171205, "mean_abs_error_last_25": 281.98710130056236, "mean_abs_error_last_50": 279.0692501609569, "mean_pred_prob": 0.0340075601823628, "mean_pred_prob_last_10": 0.1775594636797905, "mean_pred_prob_last_25": 0.09914471697993577, "mean_pred_prob_last_50": 0.0589512703474611, "mean_token_accuracy": 0.8716906428337097, "step": 4790 }, { "epoch": 0.08532878246493521, "grad_norm": 1.43628775069396, "learning_rate": 0.0001, "loss": 1.1254, "mean_abs_error": 1714.0185881783134, "mean_abs_error_last_10": 764.7982253353008, "mean_abs_error_last_25": 882.6827763660397, "mean_abs_error_last_50": 1137.7063682376574, "mean_pred_prob": 0.019117278457270005, "mean_pred_prob_last_10": 0.10709441495710052, "mean_pred_prob_last_25": 0.05635618066880852, "mean_pred_prob_last_50": 0.03325955948384944, "mean_token_accuracy": 0.8752699494361877, "step": 4800 }, { "epoch": 0.08550655076173715, "grad_norm": 2.9312021577627427, "learning_rate": 0.0001, "loss": 1.1365, "mean_abs_error": 415.8840153271964, "mean_abs_error_last_10": 195.16995600773268, "mean_abs_error_last_25": 270.61679609989756, "mean_abs_error_last_50": 285.179283781135, "mean_pred_prob": 0.02619634729344398, "mean_pred_prob_last_10": 0.13941380009055138, "mean_pred_prob_last_25": 0.07543888418003916, "mean_pred_prob_last_50": 0.045425737369805576, "mean_token_accuracy": 0.8696941435337067, "step": 4810 }, { "epoch": 0.0856843190585391, "grad_norm": 1.4036539033000963, "learning_rate": 0.0001, "loss": 1.1703, "mean_abs_error": 571.4725714738248, "mean_abs_error_last_10": 150.7373674569202, "mean_abs_error_last_25": 226.15429287150474, "mean_abs_error_last_50": 325.81346573836106, "mean_pred_prob": 0.03493434457341209, "mean_pred_prob_last_10": 0.1816610662965104, "mean_pred_prob_last_25": 0.0997670114506036, "mean_pred_prob_last_50": 0.060007990745361894, "mean_token_accuracy": 0.8773126661777496, "step": 4820 }, { "epoch": 0.08586208735534105, "grad_norm": 1.248967514520284, "learning_rate": 0.0001, "loss": 1.1846, "mean_abs_error": 309.8671313290551, "mean_abs_error_last_10": 69.89734082786214, "mean_abs_error_last_25": 106.75116799383254, "mean_abs_error_last_50": 172.72816445069782, "mean_pred_prob": 0.03693825281225145, "mean_pred_prob_last_10": 0.19258835073560476, "mean_pred_prob_last_25": 0.10631552189588547, "mean_pred_prob_last_50": 0.06428431179374457, "mean_token_accuracy": 0.8651641190052033, "step": 4830 }, { "epoch": 0.08603985565214299, "grad_norm": 1.4861102207561085, "learning_rate": 0.0001, "loss": 1.2171, "mean_abs_error": 569.5187065407506, "mean_abs_error_last_10": 139.3953319646058, "mean_abs_error_last_25": 176.7680392929512, "mean_abs_error_last_50": 271.54519398208123, "mean_pred_prob": 0.022973644779995084, "mean_pred_prob_last_10": 0.12207811884582043, "mean_pred_prob_last_25": 0.06493558790534734, "mean_pred_prob_last_50": 0.03956853360868991, "mean_token_accuracy": 0.8671528160572052, "step": 4840 }, { "epoch": 0.08621762394894494, "grad_norm": 1.8683428271992046, "learning_rate": 0.0001, "loss": 1.1074, "mean_abs_error": 93.82799435064152, "mean_abs_error_last_10": 12.171741221617523, "mean_abs_error_last_25": 29.291969389794776, "mean_abs_error_last_50": 51.976019303856255, "mean_pred_prob": 0.03837143033742905, "mean_pred_prob_last_10": 0.21560814157128333, "mean_pred_prob_last_25": 0.11405877359211444, "mean_pred_prob_last_50": 0.06720423977822065, "mean_token_accuracy": 0.8742630660533905, "step": 4850 }, { "epoch": 0.0863953922457469, "grad_norm": 3.2608150866687304, "learning_rate": 0.0001, "loss": 1.2262, "mean_abs_error": 449.47303131085664, "mean_abs_error_last_10": 134.16336593204397, "mean_abs_error_last_25": 151.3302941899088, "mean_abs_error_last_50": 254.38694997167227, "mean_pred_prob": 0.02161081382073462, "mean_pred_prob_last_10": 0.12645612824708224, "mean_pred_prob_last_25": 0.06478813253343105, "mean_pred_prob_last_50": 0.037933959160000086, "mean_token_accuracy": 0.871285879611969, "step": 4860 }, { "epoch": 0.08657316054254884, "grad_norm": 1.1319995232731686, "learning_rate": 0.0001, "loss": 1.1008, "mean_abs_error": 1080.36691910229, "mean_abs_error_last_10": 502.8250557690296, "mean_abs_error_last_25": 645.5945295148168, "mean_abs_error_last_50": 776.5928354193942, "mean_pred_prob": 0.019608459864684846, "mean_pred_prob_last_10": 0.10738166117516812, "mean_pred_prob_last_25": 0.05729269857692998, "mean_pred_prob_last_50": 0.03422598197648767, "mean_token_accuracy": 0.8756619930267334, "step": 4870 }, { "epoch": 0.08675092883935079, "grad_norm": 0.9258481044979147, "learning_rate": 0.0001, "loss": 1.1086, "mean_abs_error": 944.0208942562324, "mean_abs_error_last_10": 501.09283278866167, "mean_abs_error_last_25": 592.2164377559205, "mean_abs_error_last_50": 686.6302005277659, "mean_pred_prob": 0.03893922934366856, "mean_pred_prob_last_10": 0.20736925845849327, "mean_pred_prob_last_25": 0.11206419873051346, "mean_pred_prob_last_50": 0.06720332482072991, "mean_token_accuracy": 0.8720599710941315, "step": 4880 }, { "epoch": 0.08692869713615274, "grad_norm": 1.5774477420465625, "learning_rate": 0.0001, "loss": 1.0767, "mean_abs_error": 283.2994316950919, "mean_abs_error_last_10": 121.29264420363006, "mean_abs_error_last_25": 113.52298174178239, "mean_abs_error_last_50": 143.32142813918176, "mean_pred_prob": 0.02745819939300418, "mean_pred_prob_last_10": 0.14352449998259545, "mean_pred_prob_last_25": 0.0780455636791885, "mean_pred_prob_last_50": 0.04719181242398918, "mean_token_accuracy": 0.8675102889537811, "step": 4890 }, { "epoch": 0.08710646543295468, "grad_norm": 1.6764980109731153, "learning_rate": 0.0001, "loss": 1.1578, "mean_abs_error": 231.78191305299302, "mean_abs_error_last_10": 82.9765614714727, "mean_abs_error_last_25": 111.94895937319515, "mean_abs_error_last_50": 149.53438471218556, "mean_pred_prob": 0.027291051764041185, "mean_pred_prob_last_10": 0.15764327086508273, "mean_pred_prob_last_25": 0.07983885500580072, "mean_pred_prob_last_50": 0.047325174510478976, "mean_token_accuracy": 0.8698634743690491, "step": 4900 }, { "epoch": 0.08728423372975663, "grad_norm": 1.7348217399640513, "learning_rate": 0.0001, "loss": 1.1988, "mean_abs_error": 547.2566283236355, "mean_abs_error_last_10": 84.77281009780113, "mean_abs_error_last_25": 120.5170172787919, "mean_abs_error_last_50": 265.50650031124576, "mean_pred_prob": 0.01515000225044787, "mean_pred_prob_last_10": 0.09005028083920479, "mean_pred_prob_last_25": 0.04696488510817289, "mean_pred_prob_last_50": 0.02709236051887274, "mean_token_accuracy": 0.8691994488239289, "step": 4910 }, { "epoch": 0.08746200202655859, "grad_norm": 1.527327929320477, "learning_rate": 0.0001, "loss": 1.1735, "mean_abs_error": 763.8637079706795, "mean_abs_error_last_10": 274.15593449255897, "mean_abs_error_last_25": 388.7918464291593, "mean_abs_error_last_50": 517.2313033964081, "mean_pred_prob": 0.02529272732208483, "mean_pred_prob_last_10": 0.14235178895760328, "mean_pred_prob_last_25": 0.07525455645518378, "mean_pred_prob_last_50": 0.04427875723631587, "mean_token_accuracy": 0.869351065158844, "step": 4920 }, { "epoch": 0.08763977032336054, "grad_norm": 1.7578451756919957, "learning_rate": 0.0001, "loss": 1.1034, "mean_abs_error": 218.8003184108672, "mean_abs_error_last_10": 28.709870268419888, "mean_abs_error_last_25": 61.30062225111842, "mean_abs_error_last_50": 116.15304283327546, "mean_pred_prob": 0.03020396032370627, "mean_pred_prob_last_10": 0.16289082467556, "mean_pred_prob_last_25": 0.08873046580702067, "mean_pred_prob_last_50": 0.05256151221692562, "mean_token_accuracy": 0.8717720925807952, "step": 4930 }, { "epoch": 0.08781753862016248, "grad_norm": 1.199269494084506, "learning_rate": 0.0001, "loss": 1.1552, "mean_abs_error": 573.2648768247388, "mean_abs_error_last_10": 275.58300755117824, "mean_abs_error_last_25": 266.8518321836567, "mean_abs_error_last_50": 347.28536257666127, "mean_pred_prob": 0.026519707537954674, "mean_pred_prob_last_10": 0.14456221726723015, "mean_pred_prob_last_25": 0.07759350303094834, "mean_pred_prob_last_50": 0.04608102252241224, "mean_token_accuracy": 0.8747597157955169, "step": 4940 }, { "epoch": 0.08799530691696443, "grad_norm": 2.1090123442470023, "learning_rate": 0.0001, "loss": 1.149, "mean_abs_error": 967.7177553888235, "mean_abs_error_last_10": 260.58937239991184, "mean_abs_error_last_25": 377.62458845319213, "mean_abs_error_last_50": 560.1190453187648, "mean_pred_prob": 0.018511746046715415, "mean_pred_prob_last_10": 0.10291655682958663, "mean_pred_prob_last_25": 0.053407634084578604, "mean_pred_prob_last_50": 0.03216553209349513, "mean_token_accuracy": 0.8658661484718323, "step": 4950 }, { "epoch": 0.08817307521376638, "grad_norm": 0.7826295957569807, "learning_rate": 0.0001, "loss": 1.0238, "mean_abs_error": 702.8854466036064, "mean_abs_error_last_10": 125.82543355804523, "mean_abs_error_last_25": 192.45010034740952, "mean_abs_error_last_50": 373.74133782958216, "mean_pred_prob": 0.028749128460185604, "mean_pred_prob_last_10": 0.15326416748575866, "mean_pred_prob_last_25": 0.08353062107926235, "mean_pred_prob_last_50": 0.050004919269122185, "mean_token_accuracy": 0.8823276221752167, "step": 4960 }, { "epoch": 0.08835084351056832, "grad_norm": 0.909591297195769, "learning_rate": 0.0001, "loss": 1.121, "mean_abs_error": 394.9131405797295, "mean_abs_error_last_10": 186.8371550829704, "mean_abs_error_last_25": 262.9300877915832, "mean_abs_error_last_50": 281.4802275996673, "mean_pred_prob": 0.027030159323476254, "mean_pred_prob_last_10": 0.14046008232980967, "mean_pred_prob_last_25": 0.07718469239771367, "mean_pred_prob_last_50": 0.046247320855036377, "mean_token_accuracy": 0.8657492280006409, "step": 4970 }, { "epoch": 0.08852861180737027, "grad_norm": 2.486686203216045, "learning_rate": 0.0001, "loss": 1.0522, "mean_abs_error": 78.20650858345493, "mean_abs_error_last_10": 14.377327627801597, "mean_abs_error_last_25": 28.526996116266446, "mean_abs_error_last_50": 46.06934786181897, "mean_pred_prob": 0.03764573652297258, "mean_pred_prob_last_10": 0.21278189420700072, "mean_pred_prob_last_25": 0.11173623837530614, "mean_pred_prob_last_50": 0.06579708307981491, "mean_token_accuracy": 0.8852830052375793, "step": 4980 }, { "epoch": 0.08870638010417223, "grad_norm": 2.4052367845873133, "learning_rate": 0.0001, "loss": 1.103, "mean_abs_error": 283.2415519286646, "mean_abs_error_last_10": 62.75849909571291, "mean_abs_error_last_25": 99.79353248646234, "mean_abs_error_last_50": 154.16973182409276, "mean_pred_prob": 0.02911479058675468, "mean_pred_prob_last_10": 0.16091703437268734, "mean_pred_prob_last_25": 0.08659274699166417, "mean_pred_prob_last_50": 0.051061036763712764, "mean_token_accuracy": 0.8676920711994172, "step": 4990 }, { "epoch": 0.08888414840097417, "grad_norm": 2.2820081975994935, "learning_rate": 0.0001, "loss": 1.1701, "mean_abs_error": 582.2231087974508, "mean_abs_error_last_10": 247.35967962530844, "mean_abs_error_last_25": 234.2870631936211, "mean_abs_error_last_50": 302.2463851193113, "mean_pred_prob": 0.020571812946582214, "mean_pred_prob_last_10": 0.11798392676282674, "mean_pred_prob_last_25": 0.061495358787942676, "mean_pred_prob_last_50": 0.03602127099875361, "mean_token_accuracy": 0.8694763839244842, "step": 5000 }, { "epoch": 0.08906191669777612, "grad_norm": 1.6867439621905473, "learning_rate": 0.0001, "loss": 1.1253, "mean_abs_error": 1832.8146838279695, "mean_abs_error_last_10": 956.5661780086748, "mean_abs_error_last_25": 1158.4420532940499, "mean_abs_error_last_50": 1401.2171964921413, "mean_pred_prob": 0.011114714038558304, "mean_pred_prob_last_10": 0.06886970377527177, "mean_pred_prob_last_25": 0.034229905119718754, "mean_pred_prob_last_50": 0.019793063791439636, "mean_token_accuracy": 0.8674332201480865, "step": 5010 }, { "epoch": 0.08923968499457807, "grad_norm": 0.8669608278794523, "learning_rate": 0.0001, "loss": 1.1586, "mean_abs_error": 1252.2063336189187, "mean_abs_error_last_10": 472.18946155315945, "mean_abs_error_last_25": 521.5471357622857, "mean_abs_error_last_50": 768.1187713962311, "mean_pred_prob": 0.017107946357282345, "mean_pred_prob_last_10": 0.0922876852680929, "mean_pred_prob_last_25": 0.04989596147206612, "mean_pred_prob_last_50": 0.029761041153687984, "mean_token_accuracy": 0.8700891315937043, "step": 5020 }, { "epoch": 0.08941745329138001, "grad_norm": 2.343632334901347, "learning_rate": 0.0001, "loss": 1.1256, "mean_abs_error": 1558.7392325450041, "mean_abs_error_last_10": 776.6018305046152, "mean_abs_error_last_25": 910.1051489715479, "mean_abs_error_last_50": 1135.6783576610064, "mean_pred_prob": 0.01661538025655318, "mean_pred_prob_last_10": 0.09534134952991735, "mean_pred_prob_last_25": 0.04942564321099781, "mean_pred_prob_last_50": 0.029014781153819058, "mean_token_accuracy": 0.875753515958786, "step": 5030 }, { "epoch": 0.08959522158818196, "grad_norm": 1.1864144524828844, "learning_rate": 0.0001, "loss": 1.0897, "mean_abs_error": 562.9682127368372, "mean_abs_error_last_10": 212.8733015782976, "mean_abs_error_last_25": 257.0234326655841, "mean_abs_error_last_50": 346.1252557542203, "mean_pred_prob": 0.03042644746310543, "mean_pred_prob_last_10": 0.16276061928947455, "mean_pred_prob_last_25": 0.08857738646911457, "mean_pred_prob_last_50": 0.05266836200898979, "mean_token_accuracy": 0.8667658746242524, "step": 5040 }, { "epoch": 0.08977298988498392, "grad_norm": 3.587488376493596, "learning_rate": 0.0001, "loss": 1.1666, "mean_abs_error": 1489.603249632624, "mean_abs_error_last_10": 641.6688104423281, "mean_abs_error_last_25": 695.921008385702, "mean_abs_error_last_50": 903.1053086724087, "mean_pred_prob": 0.022842685334035197, "mean_pred_prob_last_10": 0.10687242343556136, "mean_pred_prob_last_25": 0.062130803300533444, "mean_pred_prob_last_50": 0.03841111122164875, "mean_token_accuracy": 0.8777999877929688, "step": 5050 }, { "epoch": 0.08995075818178586, "grad_norm": 1.583669623774241, "learning_rate": 0.0001, "loss": 1.1823, "mean_abs_error": 258.92492831011197, "mean_abs_error_last_10": 89.24689839686012, "mean_abs_error_last_25": 143.655359631649, "mean_abs_error_last_50": 164.4067901486645, "mean_pred_prob": 0.031490382831543685, "mean_pred_prob_last_10": 0.15979250371456147, "mean_pred_prob_last_25": 0.08701636381447315, "mean_pred_prob_last_50": 0.05297737577930093, "mean_token_accuracy": 0.8637917101383209, "step": 5060 }, { "epoch": 0.09012852647858781, "grad_norm": 4.279150081123735, "learning_rate": 0.0001, "loss": 1.1439, "mean_abs_error": 1015.871818125171, "mean_abs_error_last_10": 369.94516951017624, "mean_abs_error_last_25": 455.62020675613337, "mean_abs_error_last_50": 605.6474681115535, "mean_pred_prob": 0.022411614551674574, "mean_pred_prob_last_10": 0.122523262683535, "mean_pred_prob_last_25": 0.065400853153551, "mean_pred_prob_last_50": 0.03880182011052966, "mean_token_accuracy": 0.8716198325157165, "step": 5070 }, { "epoch": 0.09030629477538976, "grad_norm": 1.115532916928188, "learning_rate": 0.0001, "loss": 1.1405, "mean_abs_error": 411.82073208932104, "mean_abs_error_last_10": 100.95084825249063, "mean_abs_error_last_25": 139.54404872054073, "mean_abs_error_last_50": 232.4065898218912, "mean_pred_prob": 0.04804078417364508, "mean_pred_prob_last_10": 0.19605498984456063, "mean_pred_prob_last_25": 0.1081656108610332, "mean_pred_prob_last_50": 0.07893851865082979, "mean_token_accuracy": 0.8650032997131347, "step": 5080 }, { "epoch": 0.0904840630721917, "grad_norm": 3.3314885207406495, "learning_rate": 0.0001, "loss": 1.077, "mean_abs_error": 427.4370223672254, "mean_abs_error_last_10": 160.31010892935657, "mean_abs_error_last_25": 192.3989407450863, "mean_abs_error_last_50": 263.02767987677146, "mean_pred_prob": 0.02663805296178907, "mean_pred_prob_last_10": 0.1494617907330394, "mean_pred_prob_last_25": 0.078741788957268, "mean_pred_prob_last_50": 0.046467928355559705, "mean_token_accuracy": 0.8830110132694244, "step": 5090 }, { "epoch": 0.09066183136899365, "grad_norm": 0.9235626665780834, "learning_rate": 0.0001, "loss": 1.0954, "mean_abs_error": 306.31313888880675, "mean_abs_error_last_10": 182.48314294065804, "mean_abs_error_last_25": 196.74501902780486, "mean_abs_error_last_50": 213.96249449666115, "mean_pred_prob": 0.02752052030991763, "mean_pred_prob_last_10": 0.15559644401073455, "mean_pred_prob_last_25": 0.08158685313537717, "mean_pred_prob_last_50": 0.04805904917884618, "mean_token_accuracy": 0.8740381240844727, "step": 5100 }, { "epoch": 0.0908395996657956, "grad_norm": 1.6205138648932826, "learning_rate": 0.0001, "loss": 1.16, "mean_abs_error": 1192.5283907382425, "mean_abs_error_last_10": 526.9120485201279, "mean_abs_error_last_25": 694.7369124895392, "mean_abs_error_last_50": 858.682825111932, "mean_pred_prob": 0.013492047958425245, "mean_pred_prob_last_10": 0.08028552322939504, "mean_pred_prob_last_25": 0.041049933640169914, "mean_pred_prob_last_50": 0.0238048563915072, "mean_token_accuracy": 0.8755265235900879, "step": 5110 }, { "epoch": 0.09101736796259755, "grad_norm": 1.5067973687213514, "learning_rate": 0.0001, "loss": 1.0581, "mean_abs_error": 198.7320859562697, "mean_abs_error_last_10": 48.17164900395413, "mean_abs_error_last_25": 81.5844204522607, "mean_abs_error_last_50": 119.48881966872143, "mean_pred_prob": 0.04111221586354077, "mean_pred_prob_last_10": 0.21704879254102707, "mean_pred_prob_last_25": 0.11936610024422407, "mean_pred_prob_last_50": 0.07058620871976018, "mean_token_accuracy": 0.8752039670944214, "step": 5120 }, { "epoch": 0.0911951362593995, "grad_norm": 2.070726690675538, "learning_rate": 0.0001, "loss": 1.1188, "mean_abs_error": 544.1108729231871, "mean_abs_error_last_10": 163.92340579056662, "mean_abs_error_last_25": 196.09623005563046, "mean_abs_error_last_50": 267.30112273628754, "mean_pred_prob": 0.01986444799695164, "mean_pred_prob_last_10": 0.11350088233593851, "mean_pred_prob_last_25": 0.05928117746952921, "mean_pred_prob_last_50": 0.03492796119535342, "mean_token_accuracy": 0.8831024587154388, "step": 5130 }, { "epoch": 0.09137290455620145, "grad_norm": 1.236540669068718, "learning_rate": 0.0001, "loss": 1.1139, "mean_abs_error": 456.85474742402965, "mean_abs_error_last_10": 209.0314195747158, "mean_abs_error_last_25": 299.06153768354886, "mean_abs_error_last_50": 332.18448380140546, "mean_pred_prob": 0.02702902932651341, "mean_pred_prob_last_10": 0.14587568063288928, "mean_pred_prob_last_25": 0.07807600302621723, "mean_pred_prob_last_50": 0.046777814044617116, "mean_token_accuracy": 0.8665916085243225, "step": 5140 }, { "epoch": 0.09155067285300339, "grad_norm": 0.609383663261542, "learning_rate": 0.0001, "loss": 1.1169, "mean_abs_error": 793.5983126506756, "mean_abs_error_last_10": 141.82882367255135, "mean_abs_error_last_25": 206.06267388385805, "mean_abs_error_last_50": 390.77737285689653, "mean_pred_prob": 0.02274495730525814, "mean_pred_prob_last_10": 0.12818279837956653, "mean_pred_prob_last_25": 0.06818265408510342, "mean_pred_prob_last_50": 0.03997104134177789, "mean_token_accuracy": 0.8602327883243561, "step": 5150 }, { "epoch": 0.09172844114980534, "grad_norm": 1.8500535650321537, "learning_rate": 0.0001, "loss": 1.1303, "mean_abs_error": 1450.5389274547665, "mean_abs_error_last_10": 645.5810197888485, "mean_abs_error_last_25": 741.2745301007225, "mean_abs_error_last_50": 1000.7799322124401, "mean_pred_prob": 0.017806697575724683, "mean_pred_prob_last_10": 0.08685511292424053, "mean_pred_prob_last_25": 0.04919214228866622, "mean_pred_prob_last_50": 0.03027000170550309, "mean_token_accuracy": 0.8618246018886566, "step": 5160 }, { "epoch": 0.0919062094466073, "grad_norm": 1.7597390356204323, "learning_rate": 0.0001, "loss": 1.1153, "mean_abs_error": 1144.273559902991, "mean_abs_error_last_10": 526.9566039050908, "mean_abs_error_last_25": 610.582581979198, "mean_abs_error_last_50": 759.5104076087406, "mean_pred_prob": 0.026627323242428245, "mean_pred_prob_last_10": 0.1395589256891981, "mean_pred_prob_last_25": 0.07488034375710413, "mean_pred_prob_last_50": 0.04567226747021778, "mean_token_accuracy": 0.8677672147750854, "step": 5170 }, { "epoch": 0.09208397774340923, "grad_norm": 1.6072028423839226, "learning_rate": 0.0001, "loss": 1.112, "mean_abs_error": 140.63305193636342, "mean_abs_error_last_10": 64.67429270971158, "mean_abs_error_last_25": 79.45107471202184, "mean_abs_error_last_50": 87.16620687274961, "mean_pred_prob": 0.04165639546699822, "mean_pred_prob_last_10": 0.21834360733628272, "mean_pred_prob_last_25": 0.11571627222001553, "mean_pred_prob_last_50": 0.07040369566529989, "mean_token_accuracy": 0.8766228497028351, "step": 5180 }, { "epoch": 0.09226174604021119, "grad_norm": 2.692269780432501, "learning_rate": 0.0001, "loss": 1.1902, "mean_abs_error": 328.07739926797905, "mean_abs_error_last_10": 179.96812201204796, "mean_abs_error_last_25": 247.3273142175884, "mean_abs_error_last_50": 269.0247097392782, "mean_pred_prob": 0.027808293374255298, "mean_pred_prob_last_10": 0.14539298322051764, "mean_pred_prob_last_25": 0.07928223935887217, "mean_pred_prob_last_50": 0.04789522560313344, "mean_token_accuracy": 0.8629862368106842, "step": 5190 }, { "epoch": 0.09243951433701314, "grad_norm": 1.1107379405666133, "learning_rate": 0.0001, "loss": 1.1171, "mean_abs_error": 539.9550428657383, "mean_abs_error_last_10": 109.65539645678321, "mean_abs_error_last_25": 162.82283661804428, "mean_abs_error_last_50": 318.88901972374134, "mean_pred_prob": 0.033481446484802294, "mean_pred_prob_last_10": 0.1810833258263301, "mean_pred_prob_last_25": 0.09905848376802169, "mean_pred_prob_last_50": 0.0587468249257654, "mean_token_accuracy": 0.8719001531600952, "step": 5200 }, { "epoch": 0.09261728263381508, "grad_norm": 1.0967277675408782, "learning_rate": 0.0001, "loss": 1.1005, "mean_abs_error": 491.814736364198, "mean_abs_error_last_10": 140.4215708768148, "mean_abs_error_last_25": 195.21935496706115, "mean_abs_error_last_50": 272.69488323954647, "mean_pred_prob": 0.02059527705423534, "mean_pred_prob_last_10": 0.11201784294098616, "mean_pred_prob_last_25": 0.06082157958298921, "mean_pred_prob_last_50": 0.03596998672001064, "mean_token_accuracy": 0.8705707728862763, "step": 5210 }, { "epoch": 0.09279505093061703, "grad_norm": 1.7908269549903262, "learning_rate": 0.0001, "loss": 1.0505, "mean_abs_error": 522.2971088467244, "mean_abs_error_last_10": 143.4752159717592, "mean_abs_error_last_25": 190.7493698084774, "mean_abs_error_last_50": 284.32236502718496, "mean_pred_prob": 0.020441006711917, "mean_pred_prob_last_10": 0.12104492071084678, "mean_pred_prob_last_25": 0.06204449268989265, "mean_pred_prob_last_50": 0.036264354863669725, "mean_token_accuracy": 0.8764260113239288, "step": 5220 }, { "epoch": 0.09297281922741898, "grad_norm": 3.2023307935126453, "learning_rate": 0.0001, "loss": 1.1409, "mean_abs_error": 493.29533821797304, "mean_abs_error_last_10": 86.79151454226317, "mean_abs_error_last_25": 116.65155687799245, "mean_abs_error_last_50": 214.7795668196765, "mean_pred_prob": 0.023945213318802415, "mean_pred_prob_last_10": 0.1364724325016141, "mean_pred_prob_last_25": 0.07103702053427696, "mean_pred_prob_last_50": 0.04194659315980971, "mean_token_accuracy": 0.8595689177513123, "step": 5230 }, { "epoch": 0.09315058752422092, "grad_norm": 1.4383313915811735, "learning_rate": 0.0001, "loss": 1.0944, "mean_abs_error": 410.7437007289565, "mean_abs_error_last_10": 159.97032808352373, "mean_abs_error_last_25": 185.98204503778507, "mean_abs_error_last_50": 227.94482232567375, "mean_pred_prob": 0.024081162619404496, "mean_pred_prob_last_10": 0.1320261809974909, "mean_pred_prob_last_25": 0.0701858988031745, "mean_pred_prob_last_50": 0.041830799402669074, "mean_token_accuracy": 0.8634596407413483, "step": 5240 }, { "epoch": 0.09332835582102288, "grad_norm": 1.7066343641867177, "learning_rate": 0.0001, "loss": 1.0918, "mean_abs_error": 602.2299343679006, "mean_abs_error_last_10": 192.3620220335153, "mean_abs_error_last_25": 227.90243711161742, "mean_abs_error_last_50": 344.54012379342674, "mean_pred_prob": 0.031093818537192418, "mean_pred_prob_last_10": 0.15595337152481079, "mean_pred_prob_last_25": 0.08779681056621484, "mean_pred_prob_last_50": 0.0532910653972067, "mean_token_accuracy": 0.8753671050071716, "step": 5250 }, { "epoch": 0.09350612411782483, "grad_norm": 1.6296034441290754, "learning_rate": 0.0001, "loss": 1.0812, "mean_abs_error": 225.65315298025425, "mean_abs_error_last_10": 109.43198440100954, "mean_abs_error_last_25": 212.71553843002903, "mean_abs_error_last_50": 226.1198637605578, "mean_pred_prob": 0.04870750240515918, "mean_pred_prob_last_10": 0.24136911612004042, "mean_pred_prob_last_25": 0.13755458844825624, "mean_pred_prob_last_50": 0.0836423930246383, "mean_token_accuracy": 0.8667048275470733, "step": 5260 }, { "epoch": 0.09368389241462678, "grad_norm": 0.9651960579594666, "learning_rate": 0.0001, "loss": 1.0459, "mean_abs_error": 687.3670513327108, "mean_abs_error_last_10": 182.25471860444662, "mean_abs_error_last_25": 230.35032763601663, "mean_abs_error_last_50": 362.9524510900738, "mean_pred_prob": 0.024317867308855056, "mean_pred_prob_last_10": 0.13677854724228383, "mean_pred_prob_last_25": 0.07245791060850024, "mean_pred_prob_last_50": 0.04252804177813232, "mean_token_accuracy": 0.8717190325260162, "step": 5270 }, { "epoch": 0.09386166071142872, "grad_norm": 0.9425174389168847, "learning_rate": 0.0001, "loss": 1.1159, "mean_abs_error": 164.98024228784521, "mean_abs_error_last_10": 48.3843334869586, "mean_abs_error_last_25": 81.83392295411838, "mean_abs_error_last_50": 98.65793556162176, "mean_pred_prob": 0.04083395614288747, "mean_pred_prob_last_10": 0.21555021181702613, "mean_pred_prob_last_25": 0.11750174146145582, "mean_pred_prob_last_50": 0.0708398967050016, "mean_token_accuracy": 0.868832266330719, "step": 5280 }, { "epoch": 0.09403942900823067, "grad_norm": 0.8845999906959364, "learning_rate": 0.0001, "loss": 1.1108, "mean_abs_error": 251.13305882290632, "mean_abs_error_last_10": 31.09153515208618, "mean_abs_error_last_25": 61.535327176403555, "mean_abs_error_last_50": 110.63398685754892, "mean_pred_prob": 0.033201128453947605, "mean_pred_prob_last_10": 0.1912896927446127, "mean_pred_prob_last_25": 0.10032079061493278, "mean_pred_prob_last_50": 0.058417541021481154, "mean_token_accuracy": 0.8782100796699523, "step": 5290 }, { "epoch": 0.09421719730503263, "grad_norm": 0.9361661035649731, "learning_rate": 0.0001, "loss": 0.9576, "mean_abs_error": 215.4200028551646, "mean_abs_error_last_10": 25.275361501767613, "mean_abs_error_last_25": 44.1930427040029, "mean_abs_error_last_50": 86.78346247147871, "mean_pred_prob": 0.0329899241682142, "mean_pred_prob_last_10": 0.1746735692024231, "mean_pred_prob_last_25": 0.09543195441365242, "mean_pred_prob_last_50": 0.0573797469958663, "mean_token_accuracy": 0.8958361089229584, "step": 5300 }, { "epoch": 0.09439496560183457, "grad_norm": 1.4612129182737426, "learning_rate": 0.0001, "loss": 1.1039, "mean_abs_error": 485.4371643704086, "mean_abs_error_last_10": 117.14994692105608, "mean_abs_error_last_25": 152.26030582152686, "mean_abs_error_last_50": 229.77389038770406, "mean_pred_prob": 0.02389079148415476, "mean_pred_prob_last_10": 0.13339641876518726, "mean_pred_prob_last_25": 0.07067631036043168, "mean_pred_prob_last_50": 0.04193281587213278, "mean_token_accuracy": 0.8707182765007019, "step": 5310 }, { "epoch": 0.09457273389863652, "grad_norm": 1.448504434808885, "learning_rate": 0.0001, "loss": 1.073, "mean_abs_error": 690.1170283752241, "mean_abs_error_last_10": 225.68644110479423, "mean_abs_error_last_25": 288.52252363928596, "mean_abs_error_last_50": 388.7312867415279, "mean_pred_prob": 0.021815106936264782, "mean_pred_prob_last_10": 0.12109999982640147, "mean_pred_prob_last_25": 0.06434553435537964, "mean_pred_prob_last_50": 0.0379788261372596, "mean_token_accuracy": 0.8791216075420379, "step": 5320 }, { "epoch": 0.09475050219543847, "grad_norm": 1.97832203006691, "learning_rate": 0.0001, "loss": 1.0839, "mean_abs_error": 296.0052383031948, "mean_abs_error_last_10": 168.82452028461753, "mean_abs_error_last_25": 187.7759640324804, "mean_abs_error_last_50": 212.52254548599134, "mean_pred_prob": 0.022378019383177163, "mean_pred_prob_last_10": 0.12318307999521494, "mean_pred_prob_last_25": 0.06611682465299965, "mean_pred_prob_last_50": 0.039110881136730315, "mean_token_accuracy": 0.8711007475852967, "step": 5330 }, { "epoch": 0.09492827049224041, "grad_norm": 0.8770391718184192, "learning_rate": 0.0001, "loss": 1.0592, "mean_abs_error": 161.57661874425054, "mean_abs_error_last_10": 40.79151784691497, "mean_abs_error_last_25": 49.747254220004564, "mean_abs_error_last_50": 84.16339278867784, "mean_pred_prob": 0.0375046617584303, "mean_pred_prob_last_10": 0.1971282310783863, "mean_pred_prob_last_25": 0.10836004326120019, "mean_pred_prob_last_50": 0.06488484917208552, "mean_token_accuracy": 0.8725644588470459, "step": 5340 }, { "epoch": 0.09510603878904236, "grad_norm": 1.4163811936713107, "learning_rate": 0.0001, "loss": 1.1722, "mean_abs_error": 911.7661918643222, "mean_abs_error_last_10": 545.4768548753981, "mean_abs_error_last_25": 621.8124684543138, "mean_abs_error_last_50": 691.2398835025558, "mean_pred_prob": 0.025306303435354494, "mean_pred_prob_last_10": 0.1379912528267596, "mean_pred_prob_last_25": 0.07288566231145524, "mean_pred_prob_last_50": 0.04397882959165145, "mean_token_accuracy": 0.869339120388031, "step": 5350 }, { "epoch": 0.09528380708584432, "grad_norm": 2.493784196999841, "learning_rate": 0.0001, "loss": 1.0497, "mean_abs_error": 529.5179791680961, "mean_abs_error_last_10": 189.71973947980013, "mean_abs_error_last_25": 202.20674039128426, "mean_abs_error_last_50": 260.745386359309, "mean_pred_prob": 0.027218521339818835, "mean_pred_prob_last_10": 0.15042857034131885, "mean_pred_prob_last_25": 0.0810598639305681, "mean_pred_prob_last_50": 0.047905238997191193, "mean_token_accuracy": 0.8830582082271576, "step": 5360 }, { "epoch": 0.09546157538264625, "grad_norm": 2.395870012222873, "learning_rate": 0.0001, "loss": 1.0335, "mean_abs_error": 417.7364911356, "mean_abs_error_last_10": 92.17968287320353, "mean_abs_error_last_25": 138.7409766530174, "mean_abs_error_last_50": 227.84478509561092, "mean_pred_prob": 0.029734724608715624, "mean_pred_prob_last_10": 0.16340644131414592, "mean_pred_prob_last_25": 0.0868566210498102, "mean_pred_prob_last_50": 0.05133834667503834, "mean_token_accuracy": 0.8756498992443085, "step": 5370 }, { "epoch": 0.09563934367944821, "grad_norm": 1.0136154106706365, "learning_rate": 0.0001, "loss": 1.1653, "mean_abs_error": 1414.2891423117749, "mean_abs_error_last_10": 719.9492099697642, "mean_abs_error_last_25": 794.7391985083214, "mean_abs_error_last_50": 959.2432860790695, "mean_pred_prob": 0.029511827835813166, "mean_pred_prob_last_10": 0.1658190904854564, "mean_pred_prob_last_25": 0.08830228051810991, "mean_pred_prob_last_50": 0.05191216917301063, "mean_token_accuracy": 0.8602613031864166, "step": 5380 }, { "epoch": 0.09581711197625016, "grad_norm": 1.7044276298458307, "learning_rate": 0.0001, "loss": 1.117, "mean_abs_error": 270.943071809973, "mean_abs_error_last_10": 138.60873908626058, "mean_abs_error_last_25": 152.9779748837769, "mean_abs_error_last_50": 165.4187656647575, "mean_pred_prob": 0.031284891674295066, "mean_pred_prob_last_10": 0.16724181585013867, "mean_pred_prob_last_25": 0.09164185198023915, "mean_pred_prob_last_50": 0.05458404347300529, "mean_token_accuracy": 0.8646478235721589, "step": 5390 }, { "epoch": 0.0959948802730521, "grad_norm": 1.0163555594456435, "learning_rate": 0.0001, "loss": 1.1266, "mean_abs_error": 522.8807947557248, "mean_abs_error_last_10": 175.82246978052814, "mean_abs_error_last_25": 243.2471598167771, "mean_abs_error_last_50": 314.21850468026395, "mean_pred_prob": 0.02238746958319098, "mean_pred_prob_last_10": 0.11998137850314379, "mean_pred_prob_last_25": 0.06405028747394681, "mean_pred_prob_last_50": 0.03865336873568594, "mean_token_accuracy": 0.8748334527015686, "step": 5400 }, { "epoch": 0.09617264856985405, "grad_norm": 2.2715143810205083, "learning_rate": 0.0001, "loss": 1.0928, "mean_abs_error": 410.0675359599574, "mean_abs_error_last_10": 82.38675124572167, "mean_abs_error_last_25": 161.66350165593877, "mean_abs_error_last_50": 300.96597935861735, "mean_pred_prob": 0.03376826741732657, "mean_pred_prob_last_10": 0.1820038601756096, "mean_pred_prob_last_25": 0.09813669677823782, "mean_pred_prob_last_50": 0.05851865028962493, "mean_token_accuracy": 0.8710188627243042, "step": 5410 }, { "epoch": 0.096350416866656, "grad_norm": 1.9794822428886027, "learning_rate": 0.0001, "loss": 1.0651, "mean_abs_error": 271.7187522574499, "mean_abs_error_last_10": 89.69646463312198, "mean_abs_error_last_25": 116.9860380482731, "mean_abs_error_last_50": 160.3258437233122, "mean_pred_prob": 0.03976277126930654, "mean_pred_prob_last_10": 0.20585141256451606, "mean_pred_prob_last_25": 0.11356302909553051, "mean_pred_prob_last_50": 0.06797019373625517, "mean_token_accuracy": 0.8688409745693206, "step": 5420 }, { "epoch": 0.09652818516345794, "grad_norm": 1.3032138541277163, "learning_rate": 0.0001, "loss": 1.0498, "mean_abs_error": 592.6434020657648, "mean_abs_error_last_10": 234.34864999328678, "mean_abs_error_last_25": 235.86394965946812, "mean_abs_error_last_50": 303.65695522883993, "mean_pred_prob": 0.02702655440662056, "mean_pred_prob_last_10": 0.139137258939445, "mean_pred_prob_last_25": 0.07799030719324947, "mean_pred_prob_last_50": 0.04702928415499628, "mean_token_accuracy": 0.8764529824256897, "step": 5430 }, { "epoch": 0.0967059534602599, "grad_norm": 1.0427416530410047, "learning_rate": 0.0001, "loss": 1.0784, "mean_abs_error": 689.5744635465628, "mean_abs_error_last_10": 339.7477758354947, "mean_abs_error_last_25": 348.38005640452036, "mean_abs_error_last_50": 452.2372907856923, "mean_pred_prob": 0.019343634985852988, "mean_pred_prob_last_10": 0.10822726450860501, "mean_pred_prob_last_25": 0.05653222737601027, "mean_pred_prob_last_50": 0.0335650390770752, "mean_token_accuracy": 0.8751103281974792, "step": 5440 }, { "epoch": 0.09688372175706185, "grad_norm": 0.8315926241704824, "learning_rate": 0.0001, "loss": 1.1636, "mean_abs_error": 837.0165279071625, "mean_abs_error_last_10": 560.2369869877857, "mean_abs_error_last_25": 556.8412407780927, "mean_abs_error_last_50": 599.139302744011, "mean_pred_prob": 0.013158080587163567, "mean_pred_prob_last_10": 0.0762719415128231, "mean_pred_prob_last_25": 0.039295157743617894, "mean_pred_prob_last_50": 0.023174081556499006, "mean_token_accuracy": 0.8551489770412445, "step": 5450 }, { "epoch": 0.09706149005386379, "grad_norm": 1.289410401820848, "learning_rate": 0.0001, "loss": 1.0658, "mean_abs_error": 1802.3489488632931, "mean_abs_error_last_10": 701.9571273245799, "mean_abs_error_last_25": 876.698744073053, "mean_abs_error_last_50": 1198.78624080787, "mean_pred_prob": 0.0131810808336013, "mean_pred_prob_last_10": 0.07895219524216372, "mean_pred_prob_last_25": 0.04042746088525746, "mean_pred_prob_last_50": 0.023389169386064168, "mean_token_accuracy": 0.8825616061687469, "step": 5460 }, { "epoch": 0.09723925835066574, "grad_norm": 1.2937050053947539, "learning_rate": 0.0001, "loss": 1.0933, "mean_abs_error": 949.9347910887193, "mean_abs_error_last_10": 372.9288989776774, "mean_abs_error_last_25": 577.8521784210209, "mean_abs_error_last_50": 714.9651388765675, "mean_pred_prob": 0.027906353696016593, "mean_pred_prob_last_10": 0.15029409285634757, "mean_pred_prob_last_25": 0.0801963713747682, "mean_pred_prob_last_50": 0.04801537948369514, "mean_token_accuracy": 0.8797843158245087, "step": 5470 }, { "epoch": 0.0974170266474677, "grad_norm": 1.3432410775180905, "learning_rate": 0.0001, "loss": 1.1369, "mean_abs_error": 531.0570367621466, "mean_abs_error_last_10": 201.8484985378242, "mean_abs_error_last_25": 254.26993338436714, "mean_abs_error_last_50": 280.83477726617804, "mean_pred_prob": 0.018822495662607252, "mean_pred_prob_last_10": 0.10644702482968568, "mean_pred_prob_last_25": 0.055841518007218836, "mean_pred_prob_last_50": 0.03302966579794884, "mean_token_accuracy": 0.8630094110965729, "step": 5480 }, { "epoch": 0.09759479494426963, "grad_norm": 1.0514541440985188, "learning_rate": 0.0001, "loss": 1.0682, "mean_abs_error": 742.250832514514, "mean_abs_error_last_10": 516.4596916941717, "mean_abs_error_last_25": 592.534730296874, "mean_abs_error_last_50": 657.1934515886132, "mean_pred_prob": 0.01759050239343196, "mean_pred_prob_last_10": 0.10320794321596623, "mean_pred_prob_last_25": 0.052372409123927356, "mean_pred_prob_last_50": 0.030730358557775617, "mean_token_accuracy": 0.8762460649013519, "step": 5490 }, { "epoch": 0.09777256324107159, "grad_norm": 1.3922935233711133, "learning_rate": 0.0001, "loss": 1.0588, "mean_abs_error": 315.60462885925324, "mean_abs_error_last_10": 113.43966208068628, "mean_abs_error_last_25": 124.56248107747629, "mean_abs_error_last_50": 165.18510509360271, "mean_pred_prob": 0.026717914454638957, "mean_pred_prob_last_10": 0.14623024575412275, "mean_pred_prob_last_25": 0.07851065434515477, "mean_pred_prob_last_50": 0.04667849964462221, "mean_token_accuracy": 0.8739736020565033, "step": 5500 }, { "epoch": 0.09795033153787354, "grad_norm": 1.4622441777408413, "learning_rate": 0.0001, "loss": 1.0151, "mean_abs_error": 212.49163750265933, "mean_abs_error_last_10": 112.20387546924194, "mean_abs_error_last_25": 132.6840777113875, "mean_abs_error_last_50": 147.16201358159438, "mean_pred_prob": 0.03693919430952519, "mean_pred_prob_last_10": 0.20006396006792784, "mean_pred_prob_last_25": 0.10893568899482489, "mean_pred_prob_last_50": 0.06418749624863267, "mean_token_accuracy": 0.8727891921997071, "step": 5510 }, { "epoch": 0.09812809983467548, "grad_norm": 0.6530597916308318, "learning_rate": 0.0001, "loss": 1.0356, "mean_abs_error": 245.1357765722436, "mean_abs_error_last_10": 65.24547133415325, "mean_abs_error_last_25": 97.04412360364984, "mean_abs_error_last_50": 136.17184249743363, "mean_pred_prob": 0.02870330144651234, "mean_pred_prob_last_10": 0.17044536210596561, "mean_pred_prob_last_25": 0.08580497968941928, "mean_pred_prob_last_50": 0.050086505338549615, "mean_token_accuracy": 0.8742558598518372, "step": 5520 }, { "epoch": 0.09830586813147743, "grad_norm": 1.5965480038974047, "learning_rate": 0.0001, "loss": 1.1202, "mean_abs_error": 879.8468996067668, "mean_abs_error_last_10": 254.71519201873122, "mean_abs_error_last_25": 353.26190529311583, "mean_abs_error_last_50": 536.5956655199632, "mean_pred_prob": 0.01501287198625505, "mean_pred_prob_last_10": 0.08880681581795216, "mean_pred_prob_last_25": 0.04561959421262145, "mean_pred_prob_last_50": 0.02640757872723043, "mean_token_accuracy": 0.8691257297992706, "step": 5530 }, { "epoch": 0.09848363642827938, "grad_norm": 1.1150288012804952, "learning_rate": 0.0001, "loss": 1.0879, "mean_abs_error": 437.5671343741258, "mean_abs_error_last_10": 136.76070299076835, "mean_abs_error_last_25": 177.49749522118128, "mean_abs_error_last_50": 238.48744624062715, "mean_pred_prob": 0.02978668020805344, "mean_pred_prob_last_10": 0.16568861580453814, "mean_pred_prob_last_25": 0.08719417699612678, "mean_pred_prob_last_50": 0.05196445691399276, "mean_token_accuracy": 0.875037956237793, "step": 5540 }, { "epoch": 0.09866140472508132, "grad_norm": 1.3295108715182133, "learning_rate": 0.0001, "loss": 1.0402, "mean_abs_error": 1448.7668732254874, "mean_abs_error_last_10": 867.9050373830858, "mean_abs_error_last_25": 915.1901820459046, "mean_abs_error_last_50": 1081.32503659517, "mean_pred_prob": 0.025033851567422973, "mean_pred_prob_last_10": 0.13273777001304551, "mean_pred_prob_last_25": 0.07210712229862111, "mean_pred_prob_last_50": 0.04335771302721696, "mean_token_accuracy": 0.8751733541488648, "step": 5550 }, { "epoch": 0.09883917302188328, "grad_norm": 2.3381068823495985, "learning_rate": 0.0001, "loss": 1.0513, "mean_abs_error": 1056.5887470536363, "mean_abs_error_last_10": 436.1111378212827, "mean_abs_error_last_25": 599.3008081466487, "mean_abs_error_last_50": 734.2856262242706, "mean_pred_prob": 0.02148562330257846, "mean_pred_prob_last_10": 0.12075388969969936, "mean_pred_prob_last_25": 0.06351657145423814, "mean_pred_prob_last_50": 0.03752511584898457, "mean_token_accuracy": 0.8710996687412262, "step": 5560 }, { "epoch": 0.09901694131868523, "grad_norm": 1.548039623210869, "learning_rate": 0.0001, "loss": 1.0852, "mean_abs_error": 679.668266730157, "mean_abs_error_last_10": 233.03895477880846, "mean_abs_error_last_25": 281.65471025534066, "mean_abs_error_last_50": 393.75468495620896, "mean_pred_prob": 0.014106995522161015, "mean_pred_prob_last_10": 0.08239357031998225, "mean_pred_prob_last_25": 0.0423145461070817, "mean_pred_prob_last_50": 0.02473865303909406, "mean_token_accuracy": 0.8687098741531372, "step": 5570 }, { "epoch": 0.09919470961548717, "grad_norm": 1.5174994505626944, "learning_rate": 0.0001, "loss": 1.0484, "mean_abs_error": 353.7493393692058, "mean_abs_error_last_10": 66.37412972542415, "mean_abs_error_last_25": 112.95641444088528, "mean_abs_error_last_50": 186.07500798148834, "mean_pred_prob": 0.031187429977580904, "mean_pred_prob_last_10": 0.17376140058040618, "mean_pred_prob_last_25": 0.09127539461478591, "mean_pred_prob_last_50": 0.05422111004590988, "mean_token_accuracy": 0.8859039664268493, "step": 5580 }, { "epoch": 0.09937247791228912, "grad_norm": 2.1832407638307494, "learning_rate": 0.0001, "loss": 1.0285, "mean_abs_error": 522.3994918336659, "mean_abs_error_last_10": 273.36618606502657, "mean_abs_error_last_25": 275.59870581862293, "mean_abs_error_last_50": 304.8865109712334, "mean_pred_prob": 0.031007777812192217, "mean_pred_prob_last_10": 0.16496543468674646, "mean_pred_prob_last_25": 0.09236177728744224, "mean_pred_prob_last_50": 0.054270278988406064, "mean_token_accuracy": 0.8794676721096039, "step": 5590 }, { "epoch": 0.09955024620909107, "grad_norm": 0.9158839002760566, "learning_rate": 0.0001, "loss": 1.0327, "mean_abs_error": 630.6069651452851, "mean_abs_error_last_10": 170.83443103122144, "mean_abs_error_last_25": 264.9639940897428, "mean_abs_error_last_50": 386.16849544039127, "mean_pred_prob": 0.02121395340363961, "mean_pred_prob_last_10": 0.11858040877850726, "mean_pred_prob_last_25": 0.06332274836022407, "mean_pred_prob_last_50": 0.03738876862335019, "mean_token_accuracy": 0.8752023994922637, "step": 5600 }, { "epoch": 0.09972801450589303, "grad_norm": 0.950160432702849, "learning_rate": 0.0001, "loss": 1.0213, "mean_abs_error": 786.7001182149835, "mean_abs_error_last_10": 250.95015571105463, "mean_abs_error_last_25": 293.1878943914799, "mean_abs_error_last_50": 428.88886701701415, "mean_pred_prob": 0.027544878888875246, "mean_pred_prob_last_10": 0.1453240836621262, "mean_pred_prob_last_25": 0.07957404861226677, "mean_pred_prob_last_50": 0.04770468414062634, "mean_token_accuracy": 0.8737191498279572, "step": 5610 }, { "epoch": 0.09990578280269496, "grad_norm": 1.1387852894035273, "learning_rate": 0.0001, "loss": 1.0751, "mean_abs_error": 133.85543430030435, "mean_abs_error_last_10": 27.035906640950703, "mean_abs_error_last_25": 56.01114227993524, "mean_abs_error_last_50": 86.0796193869647, "mean_pred_prob": 0.040286189038306476, "mean_pred_prob_last_10": 0.2193019852042198, "mean_pred_prob_last_25": 0.11675412878394127, "mean_pred_prob_last_50": 0.0695094633847475, "mean_token_accuracy": 0.8681089699268341, "step": 5620 }, { "epoch": 0.10008355109949692, "grad_norm": 1.6870047694066321, "learning_rate": 0.0001, "loss": 1.1207, "mean_abs_error": 1002.2862467510092, "mean_abs_error_last_10": 566.8346581443337, "mean_abs_error_last_25": 589.9097416848143, "mean_abs_error_last_50": 725.5933332950259, "mean_pred_prob": 0.02232239494624082, "mean_pred_prob_last_10": 0.11354836472019088, "mean_pred_prob_last_25": 0.0642415707145119, "mean_pred_prob_last_50": 0.038319760706508534, "mean_token_accuracy": 0.8629106342792511, "step": 5630 }, { "epoch": 0.10026131939629887, "grad_norm": 1.9327042831439374, "learning_rate": 0.0001, "loss": 1.0603, "mean_abs_error": 437.5547696662293, "mean_abs_error_last_10": 93.6609208112709, "mean_abs_error_last_25": 139.17041558590296, "mean_abs_error_last_50": 266.1929553805536, "mean_pred_prob": 0.03029517931281589, "mean_pred_prob_last_10": 0.16048043686896563, "mean_pred_prob_last_25": 0.08731415047077462, "mean_pred_prob_last_50": 0.05232847023289651, "mean_token_accuracy": 0.8738558828830719, "step": 5640 }, { "epoch": 0.10043908769310081, "grad_norm": 2.5735451239392995, "learning_rate": 0.0001, "loss": 1.1053, "mean_abs_error": 656.2391634058108, "mean_abs_error_last_10": 356.82485840949533, "mean_abs_error_last_25": 394.5998682186263, "mean_abs_error_last_50": 443.4011487994814, "mean_pred_prob": 0.030594920692965388, "mean_pred_prob_last_10": 0.15847061881795527, "mean_pred_prob_last_25": 0.0865578006953001, "mean_pred_prob_last_50": 0.05252033676952124, "mean_token_accuracy": 0.870152622461319, "step": 5650 }, { "epoch": 0.10061685598990276, "grad_norm": 2.343067725409613, "learning_rate": 0.0001, "loss": 1.0614, "mean_abs_error": 580.7190296947022, "mean_abs_error_last_10": 196.59987583190033, "mean_abs_error_last_25": 199.0042166027552, "mean_abs_error_last_50": 308.7211507728858, "mean_pred_prob": 0.023669387551490218, "mean_pred_prob_last_10": 0.1270497058518231, "mean_pred_prob_last_25": 0.06892453753389419, "mean_pred_prob_last_50": 0.04099069550866261, "mean_token_accuracy": 0.8725515604019165, "step": 5660 }, { "epoch": 0.10079462428670471, "grad_norm": 0.9356081327642299, "learning_rate": 0.0001, "loss": 1.0318, "mean_abs_error": 743.492239781836, "mean_abs_error_last_10": 354.0003514461485, "mean_abs_error_last_25": 454.53008634751114, "mean_abs_error_last_50": 565.18747665022, "mean_pred_prob": 0.03666989752091467, "mean_pred_prob_last_10": 0.19398410608700942, "mean_pred_prob_last_25": 0.10584478144301102, "mean_pred_prob_last_50": 0.0632512882584706, "mean_token_accuracy": 0.8755991280078887, "step": 5670 }, { "epoch": 0.10097239258350665, "grad_norm": 1.746926888276658, "learning_rate": 0.0001, "loss": 1.0355, "mean_abs_error": 385.5535267068746, "mean_abs_error_last_10": 79.90798418068835, "mean_abs_error_last_25": 113.22716943332628, "mean_abs_error_last_50": 197.31122433311276, "mean_pred_prob": 0.027738500316627324, "mean_pred_prob_last_10": 0.15724171050824226, "mean_pred_prob_last_25": 0.08301804915536196, "mean_pred_prob_last_50": 0.048774266103282574, "mean_token_accuracy": 0.8852488398551941, "step": 5680 }, { "epoch": 0.1011501608803086, "grad_norm": 0.9268927146695507, "learning_rate": 0.0001, "loss": 1.0227, "mean_abs_error": 902.7315825580578, "mean_abs_error_last_10": 323.16596435812835, "mean_abs_error_last_25": 411.6586878789594, "mean_abs_error_last_50": 595.9700323966509, "mean_pred_prob": 0.024485819006804375, "mean_pred_prob_last_10": 0.13230060107889585, "mean_pred_prob_last_25": 0.0703098054334987, "mean_pred_prob_last_50": 0.04190067437593825, "mean_token_accuracy": 0.8739616274833679, "step": 5690 }, { "epoch": 0.10132792917711056, "grad_norm": 1.9432838818996727, "learning_rate": 0.0001, "loss": 1.0982, "mean_abs_error": 1429.9782279654164, "mean_abs_error_last_10": 537.2602053658159, "mean_abs_error_last_25": 679.6799547530438, "mean_abs_error_last_50": 898.6956649377096, "mean_pred_prob": 0.017858434082882013, "mean_pred_prob_last_10": 0.09805451958964113, "mean_pred_prob_last_25": 0.0523678138240939, "mean_pred_prob_last_50": 0.031106952266418374, "mean_token_accuracy": 0.8679002165794373, "step": 5700 }, { "epoch": 0.1015056974739125, "grad_norm": 1.2122028868187145, "learning_rate": 0.0001, "loss": 1.006, "mean_abs_error": 174.07571651298613, "mean_abs_error_last_10": 30.479221430413737, "mean_abs_error_last_25": 55.45150807024837, "mean_abs_error_last_50": 90.91744485592015, "mean_pred_prob": 0.04240985880605876, "mean_pred_prob_last_10": 0.2033717606216669, "mean_pred_prob_last_25": 0.11669860724359751, "mean_pred_prob_last_50": 0.07199135730043053, "mean_token_accuracy": 0.8804899752140045, "step": 5710 }, { "epoch": 0.10168346577071445, "grad_norm": 1.7988322167342137, "learning_rate": 0.0001, "loss": 1.1029, "mean_abs_error": 896.8406694825778, "mean_abs_error_last_10": 274.84049606898236, "mean_abs_error_last_25": 394.8868773699857, "mean_abs_error_last_50": 573.4621456563035, "mean_pred_prob": 0.02577633389737457, "mean_pred_prob_last_10": 0.15524545792723074, "mean_pred_prob_last_25": 0.07702572286361828, "mean_pred_prob_last_50": 0.04471888846019283, "mean_token_accuracy": 0.8756594657897949, "step": 5720 }, { "epoch": 0.1018612340675164, "grad_norm": 1.4985498556949408, "learning_rate": 0.0001, "loss": 1.0519, "mean_abs_error": 2204.4255816249356, "mean_abs_error_last_10": 1320.4385533507634, "mean_abs_error_last_25": 1593.5027672724448, "mean_abs_error_last_50": 1825.4915400136888, "mean_pred_prob": 0.023302449169568716, "mean_pred_prob_last_10": 0.11822493189538363, "mean_pred_prob_last_25": 0.06647581888537388, "mean_pred_prob_last_50": 0.040211125733912924, "mean_token_accuracy": 0.8731114745140076, "step": 5730 }, { "epoch": 0.10203900236431834, "grad_norm": 1.0390246991275724, "learning_rate": 0.0001, "loss": 1.1007, "mean_abs_error": 338.8176920744022, "mean_abs_error_last_10": 110.56672095263602, "mean_abs_error_last_25": 135.91838507119797, "mean_abs_error_last_50": 190.30944898861384, "mean_pred_prob": 0.030855121184140445, "mean_pred_prob_last_10": 0.16770683198701591, "mean_pred_prob_last_25": 0.09069279568502679, "mean_pred_prob_last_50": 0.05367728479905054, "mean_token_accuracy": 0.8718061685562134, "step": 5740 }, { "epoch": 0.1022167706611203, "grad_norm": 1.2602140265861714, "learning_rate": 0.0001, "loss": 1.021, "mean_abs_error": 1091.7882831085233, "mean_abs_error_last_10": 629.3036858631223, "mean_abs_error_last_25": 681.567789284415, "mean_abs_error_last_50": 826.3777472895115, "mean_pred_prob": 0.03449885154259391, "mean_pred_prob_last_10": 0.17600764100498054, "mean_pred_prob_last_25": 0.0979609731293749, "mean_pred_prob_last_50": 0.059700583264930177, "mean_token_accuracy": 0.8710470139980316, "step": 5750 }, { "epoch": 0.10239453895792225, "grad_norm": 2.004932865070283, "learning_rate": 0.0001, "loss": 1.062, "mean_abs_error": 456.4328214318568, "mean_abs_error_last_10": 348.6484884621851, "mean_abs_error_last_25": 448.58104418199184, "mean_abs_error_last_50": 456.5223052836318, "mean_pred_prob": 0.025103466759901494, "mean_pred_prob_last_10": 0.13414519503712655, "mean_pred_prob_last_25": 0.07222986379638315, "mean_pred_prob_last_50": 0.04325131655205041, "mean_token_accuracy": 0.8818944990634918, "step": 5760 }, { "epoch": 0.10257230725472419, "grad_norm": 1.1833260044357106, "learning_rate": 0.0001, "loss": 1.1012, "mean_abs_error": 606.4003308222779, "mean_abs_error_last_10": 242.30134537883487, "mean_abs_error_last_25": 269.97831403847323, "mean_abs_error_last_50": 342.88790032926454, "mean_pred_prob": 0.018807798391208053, "mean_pred_prob_last_10": 0.10394846946001053, "mean_pred_prob_last_25": 0.05584295988082886, "mean_pred_prob_last_50": 0.03289044089615345, "mean_token_accuracy": 0.88002889752388, "step": 5770 }, { "epoch": 0.10275007555152614, "grad_norm": 3.0242088008058636, "learning_rate": 0.0001, "loss": 1.0833, "mean_abs_error": 447.1749267681338, "mean_abs_error_last_10": 197.04714451713897, "mean_abs_error_last_25": 269.33236323884785, "mean_abs_error_last_50": 313.79765448887764, "mean_pred_prob": 0.03419015477411449, "mean_pred_prob_last_10": 0.17406360022723674, "mean_pred_prob_last_25": 0.09653874216601252, "mean_pred_prob_last_50": 0.05870683128014207, "mean_token_accuracy": 0.8686318933963776, "step": 5780 }, { "epoch": 0.1029278438483281, "grad_norm": 0.9438725128983341, "learning_rate": 0.0001, "loss": 1.0919, "mean_abs_error": 1249.959867303014, "mean_abs_error_last_10": 465.0309089653116, "mean_abs_error_last_25": 571.6911783992834, "mean_abs_error_last_50": 790.2084326025727, "mean_pred_prob": 0.023007424673414788, "mean_pred_prob_last_10": 0.1207532406609971, "mean_pred_prob_last_25": 0.06640580037783365, "mean_pred_prob_last_50": 0.03998014458047692, "mean_token_accuracy": 0.8666800916194916, "step": 5790 }, { "epoch": 0.10310561214513003, "grad_norm": 2.1846873645667166, "learning_rate": 0.0001, "loss": 1.1162, "mean_abs_error": 339.32800263877743, "mean_abs_error_last_10": 84.3395447432708, "mean_abs_error_last_25": 132.98209137144676, "mean_abs_error_last_50": 173.22279535948118, "mean_pred_prob": 0.0306595460511744, "mean_pred_prob_last_10": 0.16474498063325882, "mean_pred_prob_last_25": 0.08724512122571468, "mean_pred_prob_last_50": 0.0526860699057579, "mean_token_accuracy": 0.8688371658325196, "step": 5800 }, { "epoch": 0.10328338044193199, "grad_norm": 1.2677545110217503, "learning_rate": 0.0001, "loss": 1.0541, "mean_abs_error": 1041.5424174917093, "mean_abs_error_last_10": 641.6179354285499, "mean_abs_error_last_25": 774.5865817629762, "mean_abs_error_last_50": 858.1942018865809, "mean_pred_prob": 0.027047370956279338, "mean_pred_prob_last_10": 0.14722552604653175, "mean_pred_prob_last_25": 0.07770974981103791, "mean_pred_prob_last_50": 0.04662567918858258, "mean_token_accuracy": 0.8689977824687958, "step": 5810 }, { "epoch": 0.10346114873873394, "grad_norm": 1.3058557362244132, "learning_rate": 0.0001, "loss": 1.085, "mean_abs_error": 730.7233882432599, "mean_abs_error_last_10": 440.08037355807085, "mean_abs_error_last_25": 480.2326045781503, "mean_abs_error_last_50": 577.388667269782, "mean_pred_prob": 0.03490980925125768, "mean_pred_prob_last_10": 0.17960734049847815, "mean_pred_prob_last_25": 0.09870064939022996, "mean_pred_prob_last_50": 0.059858658965094944, "mean_token_accuracy": 0.8728980898857117, "step": 5820 }, { "epoch": 0.10363891703553588, "grad_norm": 1.1220001890123439, "learning_rate": 0.0001, "loss": 1.0745, "mean_abs_error": 267.4918936856464, "mean_abs_error_last_10": 100.89329521672619, "mean_abs_error_last_25": 131.31587311557638, "mean_abs_error_last_50": 162.0836668950181, "mean_pred_prob": 0.03497600322589278, "mean_pred_prob_last_10": 0.19333846680819988, "mean_pred_prob_last_25": 0.10409568902105093, "mean_pred_prob_last_50": 0.06082646800205112, "mean_token_accuracy": 0.8799975991249085, "step": 5830 }, { "epoch": 0.10381668533233783, "grad_norm": 1.3226676650723415, "learning_rate": 0.0001, "loss": 1.0838, "mean_abs_error": 528.6234164860701, "mean_abs_error_last_10": 275.0631695405926, "mean_abs_error_last_25": 308.0971735866908, "mean_abs_error_last_50": 334.04689003659996, "mean_pred_prob": 0.023509150731842964, "mean_pred_prob_last_10": 0.12024923656135797, "mean_pred_prob_last_25": 0.06634880180936306, "mean_pred_prob_last_50": 0.04012540983967483, "mean_token_accuracy": 0.8728016793727875, "step": 5840 }, { "epoch": 0.10399445362913978, "grad_norm": 2.367237362903048, "learning_rate": 0.0001, "loss": 1.065, "mean_abs_error": 459.47418357041636, "mean_abs_error_last_10": 155.2745709400753, "mean_abs_error_last_25": 160.02770379253417, "mean_abs_error_last_50": 229.57989736734604, "mean_pred_prob": 0.021653511212207377, "mean_pred_prob_last_10": 0.11550574582070113, "mean_pred_prob_last_25": 0.06323194121941925, "mean_pred_prob_last_50": 0.037330771796405315, "mean_token_accuracy": 0.8700132191181182, "step": 5850 }, { "epoch": 0.10417222192594172, "grad_norm": 1.687056103461359, "learning_rate": 0.0001, "loss": 1.0551, "mean_abs_error": 167.81953517725722, "mean_abs_error_last_10": 35.97432858324127, "mean_abs_error_last_25": 67.65461618230685, "mean_abs_error_last_50": 116.84509060025562, "mean_pred_prob": 0.04162019044160843, "mean_pred_prob_last_10": 0.22586000226438047, "mean_pred_prob_last_25": 0.1220121968537569, "mean_pred_prob_last_50": 0.07260500052943826, "mean_token_accuracy": 0.877996176481247, "step": 5860 }, { "epoch": 0.10434999022274367, "grad_norm": 1.850286596440802, "learning_rate": 0.0001, "loss": 1.1658, "mean_abs_error": 479.77597199567936, "mean_abs_error_last_10": 161.67390590754388, "mean_abs_error_last_25": 218.39809703592246, "mean_abs_error_last_50": 293.28700825805095, "mean_pred_prob": 0.024812201294116674, "mean_pred_prob_last_10": 0.12457524687051773, "mean_pred_prob_last_25": 0.06984353177249432, "mean_pred_prob_last_50": 0.04232244654558599, "mean_token_accuracy": 0.8755463778972625, "step": 5870 }, { "epoch": 0.10452775851954563, "grad_norm": 0.7821662426722908, "learning_rate": 0.0001, "loss": 1.0663, "mean_abs_error": 813.3693931231703, "mean_abs_error_last_10": 269.9741433021882, "mean_abs_error_last_25": 377.0035167957772, "mean_abs_error_last_50": 514.9239180554707, "mean_pred_prob": 0.03492003679129994, "mean_pred_prob_last_10": 0.19428249254124239, "mean_pred_prob_last_25": 0.10248235193430447, "mean_pred_prob_last_50": 0.0608464462828124, "mean_token_accuracy": 0.8713288724422454, "step": 5880 }, { "epoch": 0.10470552681634757, "grad_norm": 1.4851211170782939, "learning_rate": 0.0001, "loss": 1.066, "mean_abs_error": 476.74091039340965, "mean_abs_error_last_10": 94.7454595271252, "mean_abs_error_last_25": 130.684612690389, "mean_abs_error_last_50": 251.3044889582107, "mean_pred_prob": 0.039476080442545934, "mean_pred_prob_last_10": 0.1958219767198898, "mean_pred_prob_last_25": 0.11035238554468378, "mean_pred_prob_last_50": 0.0675007137353532, "mean_token_accuracy": 0.8594565153121948, "step": 5890 }, { "epoch": 0.10488329511314952, "grad_norm": 1.4031290491262864, "learning_rate": 0.0001, "loss": 1.0891, "mean_abs_error": 441.02320056385076, "mean_abs_error_last_10": 241.49059018976305, "mean_abs_error_last_25": 273.792314065181, "mean_abs_error_last_50": 317.6665529214419, "mean_pred_prob": 0.03206640484277159, "mean_pred_prob_last_10": 0.16733831875026225, "mean_pred_prob_last_25": 0.0902469546534121, "mean_pred_prob_last_50": 0.054556878795847294, "mean_token_accuracy": 0.8775602757930756, "step": 5900 }, { "epoch": 0.10506106340995147, "grad_norm": 0.8400671256206331, "learning_rate": 0.0001, "loss": 1.0766, "mean_abs_error": 883.254750786007, "mean_abs_error_last_10": 519.8263411329011, "mean_abs_error_last_25": 596.2256637448025, "mean_abs_error_last_50": 694.3787428480107, "mean_pred_prob": 0.025257507666538004, "mean_pred_prob_last_10": 0.13304244676255622, "mean_pred_prob_last_25": 0.07208807628194336, "mean_pred_prob_last_50": 0.043190881707414516, "mean_token_accuracy": 0.8735886812210083, "step": 5910 }, { "epoch": 0.10523883170675341, "grad_norm": 1.4002504079116413, "learning_rate": 0.0001, "loss": 1.0592, "mean_abs_error": 330.0711379777661, "mean_abs_error_last_10": 199.22351021245078, "mean_abs_error_last_25": 268.95515818054656, "mean_abs_error_last_50": 314.8303119692522, "mean_pred_prob": 0.03525126266758889, "mean_pred_prob_last_10": 0.1696337226778269, "mean_pred_prob_last_25": 0.09892034698277712, "mean_pred_prob_last_50": 0.06093203374184668, "mean_token_accuracy": 0.8750094652175904, "step": 5920 }, { "epoch": 0.10541660000355536, "grad_norm": 0.8492375621762743, "learning_rate": 0.0001, "loss": 1.0781, "mean_abs_error": 763.501786920441, "mean_abs_error_last_10": 278.15913436117637, "mean_abs_error_last_25": 380.82156166565795, "mean_abs_error_last_50": 500.9291837080853, "mean_pred_prob": 0.02436214662156999, "mean_pred_prob_last_10": 0.11725514711579307, "mean_pred_prob_last_25": 0.06664216125500388, "mean_pred_prob_last_50": 0.040910924720810725, "mean_token_accuracy": 0.8765160858631134, "step": 5930 }, { "epoch": 0.10559436830035732, "grad_norm": 1.409207750059844, "learning_rate": 0.0001, "loss": 1.0995, "mean_abs_error": 411.9285857164202, "mean_abs_error_last_10": 63.25728517113574, "mean_abs_error_last_25": 112.65695425152369, "mean_abs_error_last_50": 204.60572782617265, "mean_pred_prob": 0.03348411154001951, "mean_pred_prob_last_10": 0.17592611368745564, "mean_pred_prob_last_25": 0.09758953144773841, "mean_pred_prob_last_50": 0.0586014477070421, "mean_token_accuracy": 0.8695766150951385, "step": 5940 }, { "epoch": 0.10577213659715927, "grad_norm": 1.091600838890581, "learning_rate": 0.0001, "loss": 1.073, "mean_abs_error": 332.76404178677296, "mean_abs_error_last_10": 233.4035686560623, "mean_abs_error_last_25": 243.02557597156732, "mean_abs_error_last_50": 206.49163486885732, "mean_pred_prob": 0.03881719135679305, "mean_pred_prob_last_10": 0.18068544529378414, "mean_pred_prob_last_25": 0.1075629296246916, "mean_pred_prob_last_50": 0.06597638740204274, "mean_token_accuracy": 0.8726301431655884, "step": 5950 }, { "epoch": 0.10594990489396121, "grad_norm": 0.966282181719109, "learning_rate": 0.0001, "loss": 1.0504, "mean_abs_error": 199.00093367159178, "mean_abs_error_last_10": 85.29428728260427, "mean_abs_error_last_25": 100.54227109883452, "mean_abs_error_last_50": 130.7752175753092, "mean_pred_prob": 0.04095807829871774, "mean_pred_prob_last_10": 0.21868530176579953, "mean_pred_prob_last_25": 0.11767314672470093, "mean_pred_prob_last_50": 0.07053065681830048, "mean_token_accuracy": 0.8755864500999451, "step": 5960 }, { "epoch": 0.10612767319076316, "grad_norm": 0.9634993738288994, "learning_rate": 0.0001, "loss": 0.9454, "mean_abs_error": 201.20299405289828, "mean_abs_error_last_10": 53.56028655234577, "mean_abs_error_last_25": 65.87822181943058, "mean_abs_error_last_50": 118.15333509588542, "mean_pred_prob": 0.03171414397656917, "mean_pred_prob_last_10": 0.1679906889796257, "mean_pred_prob_last_25": 0.0923348531126976, "mean_pred_prob_last_50": 0.05520674316212535, "mean_token_accuracy": 0.8826254725456237, "step": 5970 }, { "epoch": 0.10630544148756511, "grad_norm": 2.0490785001587737, "learning_rate": 0.0001, "loss": 1.0796, "mean_abs_error": 406.8142237035748, "mean_abs_error_last_10": 122.90365279078735, "mean_abs_error_last_25": 168.44117552741838, "mean_abs_error_last_50": 210.36925353181846, "mean_pred_prob": 0.029608566034585237, "mean_pred_prob_last_10": 0.15388379693031312, "mean_pred_prob_last_25": 0.08557479418814182, "mean_pred_prob_last_50": 0.051298028137534854, "mean_token_accuracy": 0.8784051597118377, "step": 5980 }, { "epoch": 0.10648320978436705, "grad_norm": 1.3913619130745878, "learning_rate": 0.0001, "loss": 1.0744, "mean_abs_error": 1714.9280601032326, "mean_abs_error_last_10": 823.6768985624703, "mean_abs_error_last_25": 993.0014146313355, "mean_abs_error_last_50": 1235.3653453108896, "mean_pred_prob": 0.024848363795899785, "mean_pred_prob_last_10": 0.14007865326711907, "mean_pred_prob_last_25": 0.07478531914239284, "mean_pred_prob_last_50": 0.043843232380459084, "mean_token_accuracy": 0.8600153446197509, "step": 5990 }, { "epoch": 0.106660978081169, "grad_norm": 1.1833417934223682, "learning_rate": 0.0001, "loss": 1.0697, "mean_abs_error": 434.5958687531687, "mean_abs_error_last_10": 226.01773118974052, "mean_abs_error_last_25": 217.91728341249762, "mean_abs_error_last_50": 250.78996691363696, "mean_pred_prob": 0.022005817817989735, "mean_pred_prob_last_10": 0.12088459033984691, "mean_pred_prob_last_25": 0.06447886553360149, "mean_pred_prob_last_50": 0.038197152246721086, "mean_token_accuracy": 0.8797934532165528, "step": 6000 }, { "epoch": 0.10683874637797096, "grad_norm": 1.2730595642991172, "learning_rate": 0.0001, "loss": 1.04, "mean_abs_error": 231.83611024374673, "mean_abs_error_last_10": 52.001530250033355, "mean_abs_error_last_25": 72.59890692200563, "mean_abs_error_last_50": 119.35061553762966, "mean_pred_prob": 0.04274396137334406, "mean_pred_prob_last_10": 0.22095959726721048, "mean_pred_prob_last_25": 0.12241620998829603, "mean_pred_prob_last_50": 0.07377951708622277, "mean_token_accuracy": 0.8775485098361969, "step": 6010 }, { "epoch": 0.1070165146747729, "grad_norm": 1.541895391776343, "learning_rate": 0.0001, "loss": 0.9657, "mean_abs_error": 435.3912443161092, "mean_abs_error_last_10": 97.71592020586243, "mean_abs_error_last_25": 157.93549703330334, "mean_abs_error_last_50": 284.3213665126032, "mean_pred_prob": 0.029629729816224425, "mean_pred_prob_last_10": 0.16616393765434623, "mean_pred_prob_last_25": 0.08832407053560018, "mean_pred_prob_last_50": 0.05177113532554358, "mean_token_accuracy": 0.878191328048706, "step": 6020 }, { "epoch": 0.10719428297157485, "grad_norm": 2.038111230246957, "learning_rate": 0.0001, "loss": 0.9946, "mean_abs_error": 227.6988502327143, "mean_abs_error_last_10": 77.18986043302468, "mean_abs_error_last_25": 102.74069434674661, "mean_abs_error_last_50": 140.1052269586176, "mean_pred_prob": 0.03165126470848918, "mean_pred_prob_last_10": 0.17779822275042534, "mean_pred_prob_last_25": 0.09370003072544933, "mean_pred_prob_last_50": 0.055060956813395026, "mean_token_accuracy": 0.8820796072483063, "step": 6030 }, { "epoch": 0.1073720512683768, "grad_norm": 0.9290511650973877, "learning_rate": 0.0001, "loss": 1.0773, "mean_abs_error": 716.6419197533173, "mean_abs_error_last_10": 235.44175434098355, "mean_abs_error_last_25": 288.6465992592938, "mean_abs_error_last_50": 429.1240921299408, "mean_pred_prob": 0.02623047480301466, "mean_pred_prob_last_10": 0.14342602886608802, "mean_pred_prob_last_25": 0.0779718766512815, "mean_pred_prob_last_50": 0.04604015541262925, "mean_token_accuracy": 0.868906033039093, "step": 6040 }, { "epoch": 0.10754981956517874, "grad_norm": 1.5933442256876504, "learning_rate": 0.0001, "loss": 1.0281, "mean_abs_error": 425.6778784145875, "mean_abs_error_last_10": 69.0213438789064, "mean_abs_error_last_25": 95.24319723918754, "mean_abs_error_last_50": 184.2487009575097, "mean_pred_prob": 0.02801236093509942, "mean_pred_prob_last_10": 0.15326157733798026, "mean_pred_prob_last_25": 0.08334832238033414, "mean_pred_prob_last_50": 0.04927019020542502, "mean_token_accuracy": 0.8749298512935638, "step": 6050 }, { "epoch": 0.1077275878619807, "grad_norm": 1.2597737642001823, "learning_rate": 0.0001, "loss": 1.078, "mean_abs_error": 402.3027608945541, "mean_abs_error_last_10": 115.62887708653147, "mean_abs_error_last_25": 153.09815600214273, "mean_abs_error_last_50": 209.46133350924688, "mean_pred_prob": 0.0341333297197707, "mean_pred_prob_last_10": 0.19064442433882506, "mean_pred_prob_last_25": 0.10200838288292288, "mean_pred_prob_last_50": 0.06002227405551821, "mean_token_accuracy": 0.8740025281906127, "step": 6060 }, { "epoch": 0.10790535615878265, "grad_norm": 0.9392464162822727, "learning_rate": 0.0001, "loss": 0.9775, "mean_abs_error": 489.8500396144799, "mean_abs_error_last_10": 218.78443808527626, "mean_abs_error_last_25": 253.1210238348871, "mean_abs_error_last_50": 361.8491703126869, "mean_pred_prob": 0.04579417516943067, "mean_pred_prob_last_10": 0.22691433510626666, "mean_pred_prob_last_25": 0.12749118194915354, "mean_pred_prob_last_50": 0.07796766477986239, "mean_token_accuracy": 0.8813792824745178, "step": 6070 }, { "epoch": 0.10808312445558459, "grad_norm": 2.5173421296313814, "learning_rate": 0.0001, "loss": 1.0579, "mean_abs_error": 1056.6102601280545, "mean_abs_error_last_10": 762.1536521539604, "mean_abs_error_last_25": 771.1218452276996, "mean_abs_error_last_50": 859.4524952224307, "mean_pred_prob": 0.025070364109706134, "mean_pred_prob_last_10": 0.144993617325963, "mean_pred_prob_last_25": 0.07397303883626591, "mean_pred_prob_last_50": 0.043365207628812644, "mean_token_accuracy": 0.8647451221942901, "step": 6080 }, { "epoch": 0.10826089275238654, "grad_norm": 2.627845663996207, "learning_rate": 0.0001, "loss": 1.1029, "mean_abs_error": 1231.9320044270037, "mean_abs_error_last_10": 521.5066254955178, "mean_abs_error_last_25": 618.8206583636181, "mean_abs_error_last_50": 809.6989345459533, "mean_pred_prob": 0.026383406401146205, "mean_pred_prob_last_10": 0.1314768690877827, "mean_pred_prob_last_25": 0.07429003875586204, "mean_pred_prob_last_50": 0.04528585314983502, "mean_token_accuracy": 0.8675226747989655, "step": 6090 }, { "epoch": 0.10843866104918849, "grad_norm": 1.240352479735365, "learning_rate": 0.0001, "loss": 1.0851, "mean_abs_error": 2365.393445588516, "mean_abs_error_last_10": 1214.1632628147229, "mean_abs_error_last_25": 1444.0479703482652, "mean_abs_error_last_50": 1744.497575922641, "mean_pred_prob": 0.01548225890728645, "mean_pred_prob_last_10": 0.08694273944565793, "mean_pred_prob_last_25": 0.04537178390455665, "mean_pred_prob_last_50": 0.0270592868240783, "mean_token_accuracy": 0.8762197136878968, "step": 6100 }, { "epoch": 0.10861642934599043, "grad_norm": 1.2383666081168827, "learning_rate": 0.0001, "loss": 1.0365, "mean_abs_error": 428.7180192241123, "mean_abs_error_last_10": 105.62576548552977, "mean_abs_error_last_25": 157.67297917463083, "mean_abs_error_last_50": 231.51786128682207, "mean_pred_prob": 0.03546270271763206, "mean_pred_prob_last_10": 0.18856632597744466, "mean_pred_prob_last_25": 0.1022789878770709, "mean_pred_prob_last_50": 0.06159797720611095, "mean_token_accuracy": 0.8729293882846832, "step": 6110 }, { "epoch": 0.10879419764279238, "grad_norm": 1.105097666412901, "learning_rate": 0.0001, "loss": 1.1499, "mean_abs_error": 661.6021105090399, "mean_abs_error_last_10": 333.5257060565176, "mean_abs_error_last_25": 448.5881061664675, "mean_abs_error_last_50": 504.2500911671511, "mean_pred_prob": 0.034092295356094836, "mean_pred_prob_last_10": 0.18037641996343154, "mean_pred_prob_last_25": 0.096770004282007, "mean_pred_prob_last_50": 0.05766387976182159, "mean_token_accuracy": 0.8754724204540253, "step": 6120 }, { "epoch": 0.10897196593959434, "grad_norm": 1.6637261586672407, "learning_rate": 0.0001, "loss": 1.0495, "mean_abs_error": 443.5925606909388, "mean_abs_error_last_10": 193.88114913854778, "mean_abs_error_last_25": 190.45115355562695, "mean_abs_error_last_50": 215.79035547220852, "mean_pred_prob": 0.03183453169185668, "mean_pred_prob_last_10": 0.1621743814088404, "mean_pred_prob_last_25": 0.08939444585703313, "mean_pred_prob_last_50": 0.05443942472338677, "mean_token_accuracy": 0.8748935461044312, "step": 6130 }, { "epoch": 0.10914973423639628, "grad_norm": 1.8266629141640538, "learning_rate": 0.0001, "loss": 0.9758, "mean_abs_error": 549.2424438005504, "mean_abs_error_last_10": 285.84574175968106, "mean_abs_error_last_25": 274.132800521503, "mean_abs_error_last_50": 334.1503247199649, "mean_pred_prob": 0.025015466706827284, "mean_pred_prob_last_10": 0.14334403867833317, "mean_pred_prob_last_25": 0.07568127402337269, "mean_pred_prob_last_50": 0.044295945088379085, "mean_token_accuracy": 0.8716962397098541, "step": 6140 }, { "epoch": 0.10932750253319823, "grad_norm": 1.749669532529086, "learning_rate": 0.0001, "loss": 1.0089, "mean_abs_error": 352.58382779548765, "mean_abs_error_last_10": 114.32755331664093, "mean_abs_error_last_25": 166.0025772819661, "mean_abs_error_last_50": 219.39989935784712, "mean_pred_prob": 0.022295934380963445, "mean_pred_prob_last_10": 0.12464765124022961, "mean_pred_prob_last_25": 0.06503867376595736, "mean_pred_prob_last_50": 0.03868717234581709, "mean_token_accuracy": 0.8844233632087708, "step": 6150 }, { "epoch": 0.10950527083000018, "grad_norm": 2.7322418706925564, "learning_rate": 0.0001, "loss": 1.1154, "mean_abs_error": 664.3815568109801, "mean_abs_error_last_10": 272.5744541344824, "mean_abs_error_last_25": 334.5624466532744, "mean_abs_error_last_50": 427.6544352760181, "mean_pred_prob": 0.01819900665432215, "mean_pred_prob_last_10": 0.09308054401772096, "mean_pred_prob_last_25": 0.05161896554054692, "mean_pred_prob_last_50": 0.03129666465101764, "mean_token_accuracy": 0.8691252827644348, "step": 6160 }, { "epoch": 0.10968303912680212, "grad_norm": 2.866526991671482, "learning_rate": 0.0001, "loss": 1.0205, "mean_abs_error": 361.6789513751172, "mean_abs_error_last_10": 170.82785559428012, "mean_abs_error_last_25": 281.63280844097716, "mean_abs_error_last_50": 287.5394181843275, "mean_pred_prob": 0.03614794497843832, "mean_pred_prob_last_10": 0.19055747780948878, "mean_pred_prob_last_25": 0.10389565508812666, "mean_pred_prob_last_50": 0.062477293657138944, "mean_token_accuracy": 0.8794253826141357, "step": 6170 }, { "epoch": 0.10986080742360407, "grad_norm": 0.8464109308191408, "learning_rate": 0.0001, "loss": 1.1599, "mean_abs_error": 1047.0244013288013, "mean_abs_error_last_10": 350.6716809916006, "mean_abs_error_last_25": 498.9403129225476, "mean_abs_error_last_50": 712.1023826122239, "mean_pred_prob": 0.027463773725321515, "mean_pred_prob_last_10": 0.14163048311602325, "mean_pred_prob_last_25": 0.0792635096295271, "mean_pred_prob_last_50": 0.047256243729498236, "mean_token_accuracy": 0.8537014067173004, "step": 6180 }, { "epoch": 0.11003857572040603, "grad_norm": 1.0344391391050363, "learning_rate": 0.0001, "loss": 0.978, "mean_abs_error": 218.54770232415416, "mean_abs_error_last_10": 73.02273425933888, "mean_abs_error_last_25": 79.11375615051911, "mean_abs_error_last_50": 120.9708817741734, "mean_pred_prob": 0.05119501727167517, "mean_pred_prob_last_10": 0.22609581649303437, "mean_pred_prob_last_25": 0.1376422789879143, "mean_pred_prob_last_50": 0.08602088689804077, "mean_token_accuracy": 0.8821293890476227, "step": 6190 }, { "epoch": 0.11021634401720797, "grad_norm": 0.7007253941660359, "learning_rate": 0.0001, "loss": 1.0632, "mean_abs_error": 1187.7767606282032, "mean_abs_error_last_10": 386.82399398992584, "mean_abs_error_last_25": 540.3356889088583, "mean_abs_error_last_50": 744.436335897319, "mean_pred_prob": 0.03402735928830225, "mean_pred_prob_last_10": 0.16675317173358054, "mean_pred_prob_last_25": 0.09434356056153774, "mean_pred_prob_last_50": 0.05761975278437603, "mean_token_accuracy": 0.8705656290054321, "step": 6200 }, { "epoch": 0.11039411231400992, "grad_norm": 6.085805059378028, "learning_rate": 0.0001, "loss": 1.047, "mean_abs_error": 103.42867777524437, "mean_abs_error_last_10": 18.407014061878392, "mean_abs_error_last_25": 37.26480197760838, "mean_abs_error_last_50": 62.0929132132296, "mean_pred_prob": 0.03791933599859476, "mean_pred_prob_last_10": 0.20613643974065782, "mean_pred_prob_last_25": 0.1101313006132841, "mean_pred_prob_last_50": 0.06563762128353119, "mean_token_accuracy": 0.8786141753196717, "step": 6210 }, { "epoch": 0.11057188061081187, "grad_norm": 1.7598013375296777, "learning_rate": 0.0001, "loss": 1.163, "mean_abs_error": 492.37570837432975, "mean_abs_error_last_10": 265.6772857137721, "mean_abs_error_last_25": 372.3988424231463, "mean_abs_error_last_50": 395.5075567907698, "mean_pred_prob": 0.036253048083744945, "mean_pred_prob_last_10": 0.16917843390256165, "mean_pred_prob_last_25": 0.09305414725095033, "mean_pred_prob_last_50": 0.06019815909676254, "mean_token_accuracy": 0.8724528908729553, "step": 6220 }, { "epoch": 0.11074964890761381, "grad_norm": 1.81765045531975, "learning_rate": 0.0001, "loss": 1.0302, "mean_abs_error": 206.3894114127893, "mean_abs_error_last_10": 50.55007954916569, "mean_abs_error_last_25": 69.37726974830312, "mean_abs_error_last_50": 102.68580949140862, "mean_pred_prob": 0.039430957194417714, "mean_pred_prob_last_10": 0.21056960970163346, "mean_pred_prob_last_25": 0.11568080075085163, "mean_pred_prob_last_50": 0.06862461054697633, "mean_token_accuracy": 0.8733793139457703, "step": 6230 }, { "epoch": 0.11092741720441576, "grad_norm": 2.581102898373577, "learning_rate": 0.0001, "loss": 1.1024, "mean_abs_error": 772.0951538284523, "mean_abs_error_last_10": 401.80290948031524, "mean_abs_error_last_25": 538.1002889459066, "mean_abs_error_last_50": 569.0247851148649, "mean_pred_prob": 0.007196567277424038, "mean_pred_prob_last_10": 0.046485453471541406, "mean_pred_prob_last_25": 0.021700836159288882, "mean_pred_prob_last_50": 0.012533452874049545, "mean_token_accuracy": 0.8786063194274902, "step": 6240 }, { "epoch": 0.11110518550121772, "grad_norm": 1.6313075062000502, "learning_rate": 0.0001, "loss": 1.1688, "mean_abs_error": 1845.6341883966202, "mean_abs_error_last_10": 1132.7871591401624, "mean_abs_error_last_25": 1284.4465172593448, "mean_abs_error_last_50": 1436.4411950240778, "mean_pred_prob": 0.020451380919257644, "mean_pred_prob_last_10": 0.11224181056968519, "mean_pred_prob_last_25": 0.05937097530259052, "mean_pred_prob_last_50": 0.03481385019185836, "mean_token_accuracy": 0.8617499768733978, "step": 6250 }, { "epoch": 0.11128295379801965, "grad_norm": 1.0856603156524522, "learning_rate": 0.0001, "loss": 1.0396, "mean_abs_error": 1712.6158226400562, "mean_abs_error_last_10": 922.0253359757728, "mean_abs_error_last_25": 1026.9152948473359, "mean_abs_error_last_50": 1219.6501197188186, "mean_pred_prob": 0.02075403952621855, "mean_pred_prob_last_10": 0.10317457655328326, "mean_pred_prob_last_25": 0.05832815793110058, "mean_pred_prob_last_50": 0.035488895670278, "mean_token_accuracy": 0.8687067270278931, "step": 6260 }, { "epoch": 0.11146072209482161, "grad_norm": 1.5022704278783505, "learning_rate": 0.0001, "loss": 1.0279, "mean_abs_error": 172.8391493891122, "mean_abs_error_last_10": 24.29108506718243, "mean_abs_error_last_25": 53.8438062480703, "mean_abs_error_last_50": 94.34283407141007, "mean_pred_prob": 0.03236910654231906, "mean_pred_prob_last_10": 0.18360806331038476, "mean_pred_prob_last_25": 0.0970656093209982, "mean_pred_prob_last_50": 0.0568889838643372, "mean_token_accuracy": 0.880213451385498, "step": 6270 }, { "epoch": 0.11163849039162356, "grad_norm": 0.8985095225103322, "learning_rate": 0.0001, "loss": 0.9946, "mean_abs_error": 352.81651061901823, "mean_abs_error_last_10": 187.5106413293908, "mean_abs_error_last_25": 186.9944925687488, "mean_abs_error_last_50": 202.02102375566338, "mean_pred_prob": 0.03281230872962624, "mean_pred_prob_last_10": 0.16430550944060088, "mean_pred_prob_last_25": 0.0919845730997622, "mean_pred_prob_last_50": 0.05599655075930059, "mean_token_accuracy": 0.8707937598228455, "step": 6280 }, { "epoch": 0.11181625868842551, "grad_norm": 0.8091970436790243, "learning_rate": 0.0001, "loss": 1.0132, "mean_abs_error": 615.9641879345523, "mean_abs_error_last_10": 148.08819671364446, "mean_abs_error_last_25": 188.74776380026896, "mean_abs_error_last_50": 325.01872768391, "mean_pred_prob": 0.03288046687957831, "mean_pred_prob_last_10": 0.17870219693286343, "mean_pred_prob_last_25": 0.09714509597979486, "mean_pred_prob_last_50": 0.05732295128982514, "mean_token_accuracy": 0.8708544492721557, "step": 6290 }, { "epoch": 0.11199402698522745, "grad_norm": 1.4545355928989794, "learning_rate": 0.0001, "loss": 1.1872, "mean_abs_error": 661.3839952400543, "mean_abs_error_last_10": 322.6236564639992, "mean_abs_error_last_25": 280.9136307943187, "mean_abs_error_last_50": 378.44803458022295, "mean_pred_prob": 0.018607922038063406, "mean_pred_prob_last_10": 0.1027538726106286, "mean_pred_prob_last_25": 0.05494703883305192, "mean_pred_prob_last_50": 0.03248086397070438, "mean_token_accuracy": 0.8699114203453064, "step": 6300 }, { "epoch": 0.1121717952820294, "grad_norm": 0.7905230668671367, "learning_rate": 0.0001, "loss": 1.0223, "mean_abs_error": 924.3815626487991, "mean_abs_error_last_10": 308.16432176208576, "mean_abs_error_last_25": 394.51668809993794, "mean_abs_error_last_50": 576.6811443869321, "mean_pred_prob": 0.02505504187429324, "mean_pred_prob_last_10": 0.1356183707714081, "mean_pred_prob_last_25": 0.07245608902303502, "mean_pred_prob_last_50": 0.042930606740992514, "mean_token_accuracy": 0.8769255399703979, "step": 6310 }, { "epoch": 0.11234956357883136, "grad_norm": 1.1236688171714118, "learning_rate": 0.0001, "loss": 1.0417, "mean_abs_error": 546.8877876263706, "mean_abs_error_last_10": 392.96117310126033, "mean_abs_error_last_25": 473.20614925319813, "mean_abs_error_last_50": 424.1152166127792, "mean_pred_prob": 0.02346212141565047, "mean_pred_prob_last_10": 0.12842806577682495, "mean_pred_prob_last_25": 0.06778683700831607, "mean_pred_prob_last_50": 0.04067793798167259, "mean_token_accuracy": 0.8762869834899902, "step": 6320 }, { "epoch": 0.1125273318756333, "grad_norm": 1.1272284740578271, "learning_rate": 0.0001, "loss": 1.0528, "mean_abs_error": 613.196670320431, "mean_abs_error_last_10": 257.1246410062744, "mean_abs_error_last_25": 261.263911319456, "mean_abs_error_last_50": 348.87165655507414, "mean_pred_prob": 0.04407594784861431, "mean_pred_prob_last_10": 0.21262305827112868, "mean_pred_prob_last_25": 0.12057734202826395, "mean_pred_prob_last_50": 0.07475273535237648, "mean_token_accuracy": 0.8548038601875305, "step": 6330 }, { "epoch": 0.11270510017243525, "grad_norm": 1.1799259931108144, "learning_rate": 0.0001, "loss": 1.0634, "mean_abs_error": 364.79052941892826, "mean_abs_error_last_10": 67.98163367554359, "mean_abs_error_last_25": 141.54512741561513, "mean_abs_error_last_50": 210.5386563175494, "mean_pred_prob": 0.025139868608675897, "mean_pred_prob_last_10": 0.1326398402452469, "mean_pred_prob_last_25": 0.07154685035347938, "mean_pred_prob_last_50": 0.042984932335093616, "mean_token_accuracy": 0.8664091765880585, "step": 6340 }, { "epoch": 0.1128828684692372, "grad_norm": 2.998990761855883, "learning_rate": 0.0001, "loss": 1.0449, "mean_abs_error": 1548.3446546583514, "mean_abs_error_last_10": 846.5276425217971, "mean_abs_error_last_25": 968.5098268607477, "mean_abs_error_last_50": 1140.9857182707074, "mean_pred_prob": 0.025440455356874735, "mean_pred_prob_last_10": 0.1411411382112419, "mean_pred_prob_last_25": 0.07490786042471882, "mean_pred_prob_last_50": 0.044320070193498394, "mean_token_accuracy": 0.8719528436660766, "step": 6350 }, { "epoch": 0.11306063676603914, "grad_norm": 2.395844082442459, "learning_rate": 0.0001, "loss": 1.1256, "mean_abs_error": 301.60822930482544, "mean_abs_error_last_10": 51.27245499349749, "mean_abs_error_last_25": 83.31270183233377, "mean_abs_error_last_50": 145.00266964718963, "mean_pred_prob": 0.03372497188393027, "mean_pred_prob_last_10": 0.19482371173799037, "mean_pred_prob_last_25": 0.099936752486974, "mean_pred_prob_last_50": 0.05876406799070537, "mean_token_accuracy": 0.8729860126972199, "step": 6360 }, { "epoch": 0.1132384050628411, "grad_norm": 0.7703812514092253, "learning_rate": 0.0001, "loss": 1.0251, "mean_abs_error": 573.8245164733296, "mean_abs_error_last_10": 186.06656339065952, "mean_abs_error_last_25": 289.72319298264887, "mean_abs_error_last_50": 337.8274893049303, "mean_pred_prob": 0.023989836469991133, "mean_pred_prob_last_10": 0.12553898990154266, "mean_pred_prob_last_25": 0.06878364438889548, "mean_pred_prob_last_50": 0.0414303153869696, "mean_token_accuracy": 0.8746422529220581, "step": 6370 }, { "epoch": 0.11341617335964305, "grad_norm": 1.0395829259960165, "learning_rate": 0.0001, "loss": 0.9841, "mean_abs_error": 210.69600377617243, "mean_abs_error_last_10": 103.9000470159946, "mean_abs_error_last_25": 169.3474807079645, "mean_abs_error_last_50": 176.33572888053362, "mean_pred_prob": 0.03704341873526573, "mean_pred_prob_last_10": 0.19203751850873232, "mean_pred_prob_last_25": 0.10557340225204825, "mean_pred_prob_last_50": 0.06345189600251615, "mean_token_accuracy": 0.8723196864128113, "step": 6380 }, { "epoch": 0.11359394165644499, "grad_norm": 1.943392313228147, "learning_rate": 0.0001, "loss": 1.027, "mean_abs_error": 122.49766640178336, "mean_abs_error_last_10": 52.5508053484776, "mean_abs_error_last_25": 60.920749386184674, "mean_abs_error_last_50": 85.0974250242327, "mean_pred_prob": 0.038009834941476583, "mean_pred_prob_last_10": 0.20043461434543133, "mean_pred_prob_last_25": 0.1085446123033762, "mean_pred_prob_last_50": 0.06548754880204796, "mean_token_accuracy": 0.8752770721912384, "step": 6390 }, { "epoch": 0.11377170995324694, "grad_norm": 2.243808110028991, "learning_rate": 0.0001, "loss": 1.0718, "mean_abs_error": 194.57747923066302, "mean_abs_error_last_10": 57.009035724396426, "mean_abs_error_last_25": 78.4888614417413, "mean_abs_error_last_50": 104.01369586193263, "mean_pred_prob": 0.03723122691735625, "mean_pred_prob_last_10": 0.20136011131107806, "mean_pred_prob_last_25": 0.10994442887604236, "mean_pred_prob_last_50": 0.06472832229919731, "mean_token_accuracy": 0.8638121902942657, "step": 6400 }, { "epoch": 0.11394947825004889, "grad_norm": 1.5332782902113917, "learning_rate": 0.0001, "loss": 1.0111, "mean_abs_error": 548.6437534979577, "mean_abs_error_last_10": 300.46269696832735, "mean_abs_error_last_25": 275.3121808073621, "mean_abs_error_last_50": 332.15213615166294, "mean_pred_prob": 0.02658189085777849, "mean_pred_prob_last_10": 0.14751454028300942, "mean_pred_prob_last_25": 0.07723419923568145, "mean_pred_prob_last_50": 0.04603743389598094, "mean_token_accuracy": 0.8767133474349975, "step": 6410 }, { "epoch": 0.11412724654685083, "grad_norm": 2.2493228132019807, "learning_rate": 0.0001, "loss": 1.0778, "mean_abs_error": 290.6762775953149, "mean_abs_error_last_10": 38.08889416161696, "mean_abs_error_last_25": 74.42494823174953, "mean_abs_error_last_50": 149.099773922315, "mean_pred_prob": 0.02977562849409878, "mean_pred_prob_last_10": 0.15811211317777635, "mean_pred_prob_last_25": 0.0865395569242537, "mean_pred_prob_last_50": 0.051454908307641746, "mean_token_accuracy": 0.8717272460460663, "step": 6420 }, { "epoch": 0.11430501484365278, "grad_norm": 1.1988067449359185, "learning_rate": 0.0001, "loss": 1.0298, "mean_abs_error": 592.1594229373216, "mean_abs_error_last_10": 239.3605034537589, "mean_abs_error_last_25": 274.94252629396937, "mean_abs_error_last_50": 355.9222635452406, "mean_pred_prob": 0.03699087215354666, "mean_pred_prob_last_10": 0.1915275508305058, "mean_pred_prob_last_25": 0.10334831871441566, "mean_pred_prob_last_50": 0.06292914033110719, "mean_token_accuracy": 0.8696025907993317, "step": 6430 }, { "epoch": 0.11448278314045474, "grad_norm": 4.860747527147742, "learning_rate": 0.0001, "loss": 1.0496, "mean_abs_error": 122.93621006269234, "mean_abs_error_last_10": 27.172878213219327, "mean_abs_error_last_25": 46.06941545308729, "mean_abs_error_last_50": 75.5359429133058, "mean_pred_prob": 0.038426338136196135, "mean_pred_prob_last_10": 0.20021793022751808, "mean_pred_prob_last_25": 0.10849145110696554, "mean_pred_prob_last_50": 0.06543768122792244, "mean_token_accuracy": 0.8670252025127411, "step": 6440 }, { "epoch": 0.11466055143725667, "grad_norm": 1.1227160905743683, "learning_rate": 0.0001, "loss": 0.9966, "mean_abs_error": 450.99450100035165, "mean_abs_error_last_10": 92.22959766092774, "mean_abs_error_last_25": 154.9849495332199, "mean_abs_error_last_50": 229.41480117753258, "mean_pred_prob": 0.02878626771271229, "mean_pred_prob_last_10": 0.1416452620178461, "mean_pred_prob_last_25": 0.07888417840003967, "mean_pred_prob_last_50": 0.048581157345324755, "mean_token_accuracy": 0.8852909803390503, "step": 6450 }, { "epoch": 0.11483831973405863, "grad_norm": 1.2472727473448544, "learning_rate": 0.0001, "loss": 1.0103, "mean_abs_error": 212.7373421732616, "mean_abs_error_last_10": 28.100487539374598, "mean_abs_error_last_25": 59.13599564717974, "mean_abs_error_last_50": 118.25343216824471, "mean_pred_prob": 0.03675549891777337, "mean_pred_prob_last_10": 0.20220339596271514, "mean_pred_prob_last_25": 0.10786756118759513, "mean_pred_prob_last_50": 0.06366637698374689, "mean_token_accuracy": 0.8855387926101684, "step": 6460 }, { "epoch": 0.11501608803086058, "grad_norm": 1.6186190694406026, "learning_rate": 0.0001, "loss": 0.9838, "mean_abs_error": 437.5064708103566, "mean_abs_error_last_10": 127.57895240664482, "mean_abs_error_last_25": 155.7843266371568, "mean_abs_error_last_50": 252.0031561933935, "mean_pred_prob": 0.02897826684638858, "mean_pred_prob_last_10": 0.16203667572699487, "mean_pred_prob_last_25": 0.08626932726474479, "mean_pred_prob_last_50": 0.05113067890051752, "mean_token_accuracy": 0.8863296866416931, "step": 6470 }, { "epoch": 0.11519385632766252, "grad_norm": 0.9979169323105951, "learning_rate": 0.0001, "loss": 1.018, "mean_abs_error": 825.6746381274909, "mean_abs_error_last_10": 385.5516529928089, "mean_abs_error_last_25": 364.4075329951445, "mean_abs_error_last_50": 480.39382494527365, "mean_pred_prob": 0.026267079170793295, "mean_pred_prob_last_10": 0.1363766711903736, "mean_pred_prob_last_25": 0.081293573777657, "mean_pred_prob_last_50": 0.04658693693345413, "mean_token_accuracy": 0.8678731977939605, "step": 6480 }, { "epoch": 0.11537162462446447, "grad_norm": 1.3116802085334713, "learning_rate": 0.0001, "loss": 0.962, "mean_abs_error": 163.536124798195, "mean_abs_error_last_10": 99.23215136101118, "mean_abs_error_last_25": 111.50396078121177, "mean_abs_error_last_50": 114.11526791581159, "mean_pred_prob": 0.03857658817432821, "mean_pred_prob_last_10": 0.19906884357333182, "mean_pred_prob_last_25": 0.1098292545415461, "mean_pred_prob_last_50": 0.06653477847576142, "mean_token_accuracy": 0.8874715387821197, "step": 6490 }, { "epoch": 0.11554939292126643, "grad_norm": 1.1829319957451483, "learning_rate": 0.0001, "loss": 1.0607, "mean_abs_error": 606.7777727135293, "mean_abs_error_last_10": 175.85868600207328, "mean_abs_error_last_25": 264.733122834652, "mean_abs_error_last_50": 366.7639313080372, "mean_pred_prob": 0.03349662403925322, "mean_pred_prob_last_10": 0.17635798810515552, "mean_pred_prob_last_25": 0.09550489471293985, "mean_pred_prob_last_50": 0.057688969932496545, "mean_token_accuracy": 0.8694349467754364, "step": 6500 }, { "epoch": 0.11572716121806836, "grad_norm": 1.1028384553133512, "learning_rate": 0.0001, "loss": 1.0169, "mean_abs_error": 321.2624250813343, "mean_abs_error_last_10": 231.23448889135062, "mean_abs_error_last_25": 210.11962130971546, "mean_abs_error_last_50": 210.73979244965636, "mean_pred_prob": 0.04484360263450071, "mean_pred_prob_last_10": 0.22421580246882514, "mean_pred_prob_last_25": 0.12655427085701376, "mean_pred_prob_last_50": 0.0767755211214535, "mean_token_accuracy": 0.8837724506855011, "step": 6510 }, { "epoch": 0.11590492951487032, "grad_norm": 0.8468047511135163, "learning_rate": 0.0001, "loss": 1.118, "mean_abs_error": 274.7649171567142, "mean_abs_error_last_10": 169.63492254776705, "mean_abs_error_last_25": 202.02292638098848, "mean_abs_error_last_50": 223.61239987321215, "mean_pred_prob": 0.03540713188704103, "mean_pred_prob_last_10": 0.18623794615268707, "mean_pred_prob_last_25": 0.1015484859701246, "mean_pred_prob_last_50": 0.06136285471729934, "mean_token_accuracy": 0.8658437728881836, "step": 6520 }, { "epoch": 0.11608269781167227, "grad_norm": 1.1679145430764213, "learning_rate": 0.0001, "loss": 1.0337, "mean_abs_error": 791.0570920527341, "mean_abs_error_last_10": 237.99255861132306, "mean_abs_error_last_25": 298.3080819319115, "mean_abs_error_last_50": 447.01340084244856, "mean_pred_prob": 0.02745631285069976, "mean_pred_prob_last_10": 0.15090818493627012, "mean_pred_prob_last_25": 0.08083368340157904, "mean_pred_prob_last_50": 0.04767478949506767, "mean_token_accuracy": 0.8719609200954437, "step": 6530 }, { "epoch": 0.11626046610847421, "grad_norm": 2.1280279515150395, "learning_rate": 0.0001, "loss": 1.065, "mean_abs_error": 847.6201260527893, "mean_abs_error_last_10": 410.81701812906766, "mean_abs_error_last_25": 498.72857304423525, "mean_abs_error_last_50": 614.9028638208351, "mean_pred_prob": 0.03460676524264272, "mean_pred_prob_last_10": 0.1816978476708755, "mean_pred_prob_last_25": 0.09912463031942025, "mean_pred_prob_last_50": 0.059731014486169444, "mean_token_accuracy": 0.8681483685970306, "step": 6540 }, { "epoch": 0.11643823440527616, "grad_norm": 1.1183734141070836, "learning_rate": 0.0001, "loss": 1.0329, "mean_abs_error": 315.64115146474626, "mean_abs_error_last_10": 122.87635992361818, "mean_abs_error_last_25": 133.6360189471455, "mean_abs_error_last_50": 177.63567005509998, "mean_pred_prob": 0.04261966297053732, "mean_pred_prob_last_10": 0.21532010599039494, "mean_pred_prob_last_25": 0.11832716093631461, "mean_pred_prob_last_50": 0.07240617955103516, "mean_token_accuracy": 0.8738926589488983, "step": 6550 }, { "epoch": 0.11661600270207811, "grad_norm": 1.0777181076968028, "learning_rate": 0.0001, "loss": 1.0027, "mean_abs_error": 292.41838022867745, "mean_abs_error_last_10": 112.61181265698458, "mean_abs_error_last_25": 147.98033087571056, "mean_abs_error_last_50": 155.10358341397986, "mean_pred_prob": 0.03239050309639424, "mean_pred_prob_last_10": 0.17168013714253902, "mean_pred_prob_last_25": 0.09058080809190869, "mean_pred_prob_last_50": 0.05538898557424545, "mean_token_accuracy": 0.8798310458660126, "step": 6560 }, { "epoch": 0.11679377099888005, "grad_norm": 1.986674564563899, "learning_rate": 0.0001, "loss": 1.0685, "mean_abs_error": 480.3365923713433, "mean_abs_error_last_10": 101.12493365537391, "mean_abs_error_last_25": 135.3853726905201, "mean_abs_error_last_50": 240.03288039425797, "mean_pred_prob": 0.01928187911398709, "mean_pred_prob_last_10": 0.11072466932237149, "mean_pred_prob_last_25": 0.05710223410278559, "mean_pred_prob_last_50": 0.033556099608540536, "mean_token_accuracy": 0.8715128183364869, "step": 6570 }, { "epoch": 0.116971539295682, "grad_norm": 2.259732405655745, "learning_rate": 0.0001, "loss": 1.0418, "mean_abs_error": 485.1587498838335, "mean_abs_error_last_10": 72.72916276459743, "mean_abs_error_last_25": 125.08097363526069, "mean_abs_error_last_50": 240.83554346936506, "mean_pred_prob": 0.027684696792857722, "mean_pred_prob_last_10": 0.1444040725938976, "mean_pred_prob_last_25": 0.07925790406297892, "mean_pred_prob_last_50": 0.04765037114266306, "mean_token_accuracy": 0.8831982851028443, "step": 6580 }, { "epoch": 0.11714930759248396, "grad_norm": 1.4436922146775581, "learning_rate": 0.0001, "loss": 0.973, "mean_abs_error": 327.1748182781271, "mean_abs_error_last_10": 69.24010213684403, "mean_abs_error_last_25": 103.02779328508184, "mean_abs_error_last_50": 190.410720549674, "mean_pred_prob": 0.0357840028591454, "mean_pred_prob_last_10": 0.1985281277447939, "mean_pred_prob_last_25": 0.10454375073313713, "mean_pred_prob_last_50": 0.061966663040220735, "mean_token_accuracy": 0.8767560422420502, "step": 6590 }, { "epoch": 0.1173270758892859, "grad_norm": 0.9916807656564144, "learning_rate": 0.0001, "loss": 1.1065, "mean_abs_error": 1185.9691495165002, "mean_abs_error_last_10": 340.97464437446797, "mean_abs_error_last_25": 484.87730741791285, "mean_abs_error_last_50": 678.7998913807979, "mean_pred_prob": 0.01595868405420333, "mean_pred_prob_last_10": 0.09660566697712056, "mean_pred_prob_last_25": 0.04821138971601613, "mean_pred_prob_last_50": 0.027781774831237272, "mean_token_accuracy": 0.8724577307701111, "step": 6600 }, { "epoch": 0.11750484418608785, "grad_norm": 0.9984039040105206, "learning_rate": 0.0001, "loss": 1.08, "mean_abs_error": 1988.939696295432, "mean_abs_error_last_10": 908.4796724297203, "mean_abs_error_last_25": 1207.7295017147837, "mean_abs_error_last_50": 1462.0035774189776, "mean_pred_prob": 0.017804552080633584, "mean_pred_prob_last_10": 0.09722073475422803, "mean_pred_prob_last_25": 0.05182042208762141, "mean_pred_prob_last_50": 0.030445046418753917, "mean_token_accuracy": 0.8678319036960602, "step": 6610 }, { "epoch": 0.1176826124828898, "grad_norm": 2.0696460705637554, "learning_rate": 0.0001, "loss": 1.1236, "mean_abs_error": 968.2256634975234, "mean_abs_error_last_10": 421.8393331031045, "mean_abs_error_last_25": 529.6058642380996, "mean_abs_error_last_50": 639.7633953894791, "mean_pred_prob": 0.022169905190821737, "mean_pred_prob_last_10": 0.1242944142606575, "mean_pred_prob_last_25": 0.06511322594597005, "mean_pred_prob_last_50": 0.038289548613829535, "mean_token_accuracy": 0.867077773809433, "step": 6620 }, { "epoch": 0.11786038077969176, "grad_norm": 1.0523322329746068, "learning_rate": 0.0001, "loss": 1.0645, "mean_abs_error": 1000.0238793521991, "mean_abs_error_last_10": 401.17783400676166, "mean_abs_error_last_25": 524.9395213976065, "mean_abs_error_last_50": 625.6632699094606, "mean_pred_prob": 0.020629179175011815, "mean_pred_prob_last_10": 0.10617371791158803, "mean_pred_prob_last_25": 0.05826766171376221, "mean_pred_prob_last_50": 0.0354241675697267, "mean_token_accuracy": 0.882358294725418, "step": 6630 }, { "epoch": 0.1180381490764937, "grad_norm": 1.3143959483915464, "learning_rate": 0.0001, "loss": 0.9788, "mean_abs_error": 197.27714039407573, "mean_abs_error_last_10": 46.809631022369295, "mean_abs_error_last_25": 57.95286860012756, "mean_abs_error_last_50": 107.86105479126618, "mean_pred_prob": 0.04373372769914567, "mean_pred_prob_last_10": 0.21099340692162513, "mean_pred_prob_last_25": 0.12113199532032012, "mean_pred_prob_last_50": 0.07430901862680912, "mean_token_accuracy": 0.8817349016666413, "step": 6640 }, { "epoch": 0.11821591737329565, "grad_norm": 0.9717880718067557, "learning_rate": 0.0001, "loss": 1.1754, "mean_abs_error": 510.9751260226343, "mean_abs_error_last_10": 312.4621971246226, "mean_abs_error_last_25": 265.09971652406, "mean_abs_error_last_50": 356.25367947243797, "mean_pred_prob": 0.03548161890357733, "mean_pred_prob_last_10": 0.18224159206729382, "mean_pred_prob_last_25": 0.09906192781636491, "mean_pred_prob_last_50": 0.06051981755299494, "mean_token_accuracy": 0.8673556029796601, "step": 6650 }, { "epoch": 0.1183936856700976, "grad_norm": 2.5460534933317693, "learning_rate": 0.0001, "loss": 1.0761, "mean_abs_error": 1589.3407052124217, "mean_abs_error_last_10": 690.6600569265772, "mean_abs_error_last_25": 799.222036442853, "mean_abs_error_last_50": 1033.0948442396484, "mean_pred_prob": 0.01693615770782344, "mean_pred_prob_last_10": 0.09420464439026546, "mean_pred_prob_last_25": 0.05015238245687215, "mean_pred_prob_last_50": 0.029468202430871317, "mean_token_accuracy": 0.8710126757621766, "step": 6660 }, { "epoch": 0.11857145396689954, "grad_norm": 1.6399690642679359, "learning_rate": 0.0001, "loss": 1.0137, "mean_abs_error": 566.5612095791224, "mean_abs_error_last_10": 166.23169097525292, "mean_abs_error_last_25": 214.65611259575365, "mean_abs_error_last_50": 329.29595110343257, "mean_pred_prob": 0.01674199749249965, "mean_pred_prob_last_10": 0.09478812403976918, "mean_pred_prob_last_25": 0.04948857638519257, "mean_pred_prob_last_50": 0.029053627920802683, "mean_token_accuracy": 0.8714057624340057, "step": 6670 }, { "epoch": 0.11874922226370149, "grad_norm": 1.4518293879803432, "learning_rate": 0.0001, "loss": 1.0017, "mean_abs_error": 1283.6715486578923, "mean_abs_error_last_10": 780.968354303181, "mean_abs_error_last_25": 851.337867584054, "mean_abs_error_last_50": 951.1838576301643, "mean_pred_prob": 0.027772248510154897, "mean_pred_prob_last_10": 0.14308252449554856, "mean_pred_prob_last_25": 0.07877469454833771, "mean_pred_prob_last_50": 0.047736089865793474, "mean_token_accuracy": 0.8734348535537719, "step": 6680 }, { "epoch": 0.11892699056050345, "grad_norm": 3.4424278942006454, "learning_rate": 0.0001, "loss": 1.1067, "mean_abs_error": 611.5182297025268, "mean_abs_error_last_10": 85.97503964676443, "mean_abs_error_last_25": 138.73543348690026, "mean_abs_error_last_50": 257.48333260711024, "mean_pred_prob": 0.027487488044425846, "mean_pred_prob_last_10": 0.14278810541145504, "mean_pred_prob_last_25": 0.08028937523486093, "mean_pred_prob_last_50": 0.04790990418987349, "mean_token_accuracy": 0.8741624534130097, "step": 6690 }, { "epoch": 0.11910475885730538, "grad_norm": 1.9037506467124183, "learning_rate": 0.0001, "loss": 1.0494, "mean_abs_error": 165.15056971908623, "mean_abs_error_last_10": 20.306190421977142, "mean_abs_error_last_25": 61.554345360811205, "mean_abs_error_last_50": 95.01164734093776, "mean_pred_prob": 0.04324042098596692, "mean_pred_prob_last_10": 0.23655605539679528, "mean_pred_prob_last_25": 0.1265144484117627, "mean_pred_prob_last_50": 0.07439329084008932, "mean_token_accuracy": 0.8679699957370758, "step": 6700 }, { "epoch": 0.11928252715410734, "grad_norm": 1.784075536695235, "learning_rate": 0.0001, "loss": 1.0389, "mean_abs_error": 63.34410994484815, "mean_abs_error_last_10": 5.5822900628883065, "mean_abs_error_last_25": 16.861911483509424, "mean_abs_error_last_50": 33.33027309265185, "mean_pred_prob": 0.05050036497414112, "mean_pred_prob_last_10": 0.2602718144655228, "mean_pred_prob_last_25": 0.14342403262853623, "mean_pred_prob_last_50": 0.08629596196115016, "mean_token_accuracy": 0.8764304935932159, "step": 6710 }, { "epoch": 0.11946029545090929, "grad_norm": 1.0374377243160837, "learning_rate": 0.0001, "loss": 0.9407, "mean_abs_error": 462.6775011829035, "mean_abs_error_last_10": 165.29597132790676, "mean_abs_error_last_25": 200.74516803850037, "mean_abs_error_last_50": 256.8529692675995, "mean_pred_prob": 0.02438846807926893, "mean_pred_prob_last_10": 0.13600642825476825, "mean_pred_prob_last_25": 0.0712827633600682, "mean_pred_prob_last_50": 0.04210469148820266, "mean_token_accuracy": 0.8811397790908814, "step": 6720 }, { "epoch": 0.11963806374771123, "grad_norm": 1.2639809031549465, "learning_rate": 0.0001, "loss": 1.0332, "mean_abs_error": 278.5928314984773, "mean_abs_error_last_10": 59.46328142930988, "mean_abs_error_last_25": 88.10805246673348, "mean_abs_error_last_50": 153.13522862866208, "mean_pred_prob": 0.03060507969930768, "mean_pred_prob_last_10": 0.16810584254562855, "mean_pred_prob_last_25": 0.09049352128058671, "mean_pred_prob_last_50": 0.053402674943208696, "mean_token_accuracy": 0.8786787807941436, "step": 6730 }, { "epoch": 0.11981583204451318, "grad_norm": 1.9476450727556125, "learning_rate": 0.0001, "loss": 1.0559, "mean_abs_error": 465.589475186661, "mean_abs_error_last_10": 134.29315571214605, "mean_abs_error_last_25": 148.65934286160947, "mean_abs_error_last_50": 218.39155884194707, "mean_pred_prob": 0.029126957536209374, "mean_pred_prob_last_10": 0.15739104709355162, "mean_pred_prob_last_25": 0.08522866026032716, "mean_pred_prob_last_50": 0.0505412264901679, "mean_token_accuracy": 0.8711486279964447, "step": 6740 }, { "epoch": 0.11999360034131513, "grad_norm": 1.3939345858746681, "learning_rate": 0.0001, "loss": 1.0305, "mean_abs_error": 354.21680357086206, "mean_abs_error_last_10": 49.115404840830266, "mean_abs_error_last_25": 110.60248331506827, "mean_abs_error_last_50": 177.16235887436974, "mean_pred_prob": 0.027293783333152532, "mean_pred_prob_last_10": 0.15178565680980682, "mean_pred_prob_last_25": 0.08036112552508712, "mean_pred_prob_last_50": 0.04769642036408186, "mean_token_accuracy": 0.8704035699367523, "step": 6750 }, { "epoch": 0.12017136863811707, "grad_norm": 1.2013379150206083, "learning_rate": 0.0001, "loss": 1.0998, "mean_abs_error": 564.4105206749025, "mean_abs_error_last_10": 258.67856769654475, "mean_abs_error_last_25": 324.4666240508574, "mean_abs_error_last_50": 362.59998778827674, "mean_pred_prob": 0.03318532953853719, "mean_pred_prob_last_10": 0.16713380317669363, "mean_pred_prob_last_25": 0.09510967910755426, "mean_pred_prob_last_50": 0.057134667341597375, "mean_token_accuracy": 0.8747122645378113, "step": 6760 }, { "epoch": 0.12034913693491903, "grad_norm": 0.9764633031750449, "learning_rate": 0.0001, "loss": 1.0674, "mean_abs_error": 106.64893332839488, "mean_abs_error_last_10": 18.12069013243128, "mean_abs_error_last_25": 37.32371554978111, "mean_abs_error_last_50": 67.2954310842921, "mean_pred_prob": 0.04323999220505357, "mean_pred_prob_last_10": 0.22884513214230537, "mean_pred_prob_last_25": 0.12484160810709, "mean_pred_prob_last_50": 0.07469654493033887, "mean_token_accuracy": 0.8735684394836426, "step": 6770 }, { "epoch": 0.12052690523172098, "grad_norm": 1.3774574802947508, "learning_rate": 0.0001, "loss": 1.0748, "mean_abs_error": 978.8539050722535, "mean_abs_error_last_10": 489.5530316611177, "mean_abs_error_last_25": 559.7078776083829, "mean_abs_error_last_50": 689.1514940749712, "mean_pred_prob": 0.024668339552590623, "mean_pred_prob_last_10": 0.14482023140881212, "mean_pred_prob_last_25": 0.07374207554385066, "mean_pred_prob_last_50": 0.04307279410131741, "mean_token_accuracy": 0.8582912564277649, "step": 6780 }, { "epoch": 0.12070467352852292, "grad_norm": 1.00646547542933, "learning_rate": 0.0001, "loss": 0.9875, "mean_abs_error": 1006.7384108663045, "mean_abs_error_last_10": 356.5788036324311, "mean_abs_error_last_25": 511.76857837711424, "mean_abs_error_last_50": 602.3943291796288, "mean_pred_prob": 0.012052354670595377, "mean_pred_prob_last_10": 0.06656661748420448, "mean_pred_prob_last_25": 0.035268988297320905, "mean_pred_prob_last_50": 0.02094888798892498, "mean_token_accuracy": 0.8649411737918854, "step": 6790 }, { "epoch": 0.12088244182532487, "grad_norm": 1.71583513271566, "learning_rate": 0.0001, "loss": 1.1178, "mean_abs_error": 1022.058228906878, "mean_abs_error_last_10": 428.809772939807, "mean_abs_error_last_25": 598.6340826823437, "mean_abs_error_last_50": 771.4588091028006, "mean_pred_prob": 0.030665835217223502, "mean_pred_prob_last_10": 0.1643183092965046, "mean_pred_prob_last_25": 0.09030903110979124, "mean_pred_prob_last_50": 0.05346670458384324, "mean_token_accuracy": 0.8672223567962647, "step": 6800 }, { "epoch": 0.12106021012212682, "grad_norm": 1.763064753471235, "learning_rate": 0.0001, "loss": 0.9766, "mean_abs_error": 386.58523641149475, "mean_abs_error_last_10": 107.08296655585445, "mean_abs_error_last_25": 125.49818159320418, "mean_abs_error_last_50": 211.7190351608101, "mean_pred_prob": 0.02700974796898663, "mean_pred_prob_last_10": 0.15296041388064624, "mean_pred_prob_last_25": 0.08199377004057169, "mean_pred_prob_last_50": 0.047598600317724046, "mean_token_accuracy": 0.8776887118816376, "step": 6810 }, { "epoch": 0.12123797841892876, "grad_norm": 1.5541072017481892, "learning_rate": 0.0001, "loss": 0.9741, "mean_abs_error": 419.33307155581105, "mean_abs_error_last_10": 104.30349066624137, "mean_abs_error_last_25": 158.06764413573399, "mean_abs_error_last_50": 255.2691196511085, "mean_pred_prob": 0.04535308317281306, "mean_pred_prob_last_10": 0.23146989531815051, "mean_pred_prob_last_25": 0.12846151934936642, "mean_pred_prob_last_50": 0.07812724327668548, "mean_token_accuracy": 0.8757771492004395, "step": 6820 }, { "epoch": 0.12141574671573072, "grad_norm": 1.9227241048280166, "learning_rate": 0.0001, "loss": 0.9958, "mean_abs_error": 659.9965176971237, "mean_abs_error_last_10": 230.4728976578271, "mean_abs_error_last_25": 275.51654427505406, "mean_abs_error_last_50": 392.78615049226346, "mean_pred_prob": 0.02376194520620629, "mean_pred_prob_last_10": 0.13025409823749215, "mean_pred_prob_last_25": 0.06973245043773205, "mean_pred_prob_last_50": 0.041196218319237234, "mean_token_accuracy": 0.8683109402656555, "step": 6830 }, { "epoch": 0.12159351501253267, "grad_norm": 1.072195761257642, "learning_rate": 0.0001, "loss": 1.0628, "mean_abs_error": 653.600419821687, "mean_abs_error_last_10": 244.1829629078054, "mean_abs_error_last_25": 295.5616795480253, "mean_abs_error_last_50": 390.59855018854915, "mean_pred_prob": 0.035825626872247086, "mean_pred_prob_last_10": 0.1831153424223885, "mean_pred_prob_last_25": 0.09975057221017777, "mean_pred_prob_last_50": 0.06096318666823208, "mean_token_accuracy": 0.8666876673698425, "step": 6840 }, { "epoch": 0.12177128330933461, "grad_norm": 1.3495454464958085, "learning_rate": 0.0001, "loss": 1.0169, "mean_abs_error": 256.1595509858254, "mean_abs_error_last_10": 86.01920042687499, "mean_abs_error_last_25": 135.18014130476223, "mean_abs_error_last_50": 189.21794813119328, "mean_pred_prob": 0.034024528600275515, "mean_pred_prob_last_10": 0.1806827113032341, "mean_pred_prob_last_25": 0.09686010917648673, "mean_pred_prob_last_50": 0.05834480281919241, "mean_token_accuracy": 0.8825622737407685, "step": 6850 }, { "epoch": 0.12194905160613656, "grad_norm": 1.993892315494954, "learning_rate": 0.0001, "loss": 0.9998, "mean_abs_error": 950.1587323047227, "mean_abs_error_last_10": 585.6705109388043, "mean_abs_error_last_25": 670.0299896125554, "mean_abs_error_last_50": 751.0660945336612, "mean_pred_prob": 0.0402483747646329, "mean_pred_prob_last_10": 0.1985210943093989, "mean_pred_prob_last_25": 0.11452583817590493, "mean_pred_prob_last_50": 0.06961424970941152, "mean_token_accuracy": 0.8645701825618743, "step": 6860 }, { "epoch": 0.12212681990293851, "grad_norm": 1.5871470962348162, "learning_rate": 0.0001, "loss": 1.1037, "mean_abs_error": 1086.4652816709665, "mean_abs_error_last_10": 548.8034592406727, "mean_abs_error_last_25": 624.0674152959235, "mean_abs_error_last_50": 787.5215809642899, "mean_pred_prob": 0.019216222201066557, "mean_pred_prob_last_10": 0.10837183861003723, "mean_pred_prob_last_25": 0.05653334168018773, "mean_pred_prob_last_50": 0.03345824264688417, "mean_token_accuracy": 0.8860175549983978, "step": 6870 }, { "epoch": 0.12230458819974045, "grad_norm": 1.21195003690334, "learning_rate": 0.0001, "loss": 1.0452, "mean_abs_error": 827.6965376571404, "mean_abs_error_last_10": 136.12299813580108, "mean_abs_error_last_25": 237.48973913071436, "mean_abs_error_last_50": 410.49727880834223, "mean_pred_prob": 0.026543632650282235, "mean_pred_prob_last_10": 0.1310079721035436, "mean_pred_prob_last_25": 0.0737211765605025, "mean_pred_prob_last_50": 0.045378949213773015, "mean_token_accuracy": 0.8594338715076446, "step": 6880 }, { "epoch": 0.1224823564965424, "grad_norm": 1.236571522017311, "learning_rate": 0.0001, "loss": 0.9736, "mean_abs_error": 808.8947292352664, "mean_abs_error_last_10": 382.1091109479483, "mean_abs_error_last_25": 392.49355633095104, "mean_abs_error_last_50": 541.5169487002279, "mean_pred_prob": 0.028895974830084013, "mean_pred_prob_last_10": 0.15787157463200857, "mean_pred_prob_last_25": 0.08388091729430017, "mean_pred_prob_last_50": 0.04981413729256019, "mean_token_accuracy": 0.861097902059555, "step": 6890 }, { "epoch": 0.12266012479334436, "grad_norm": 1.0341875393608915, "learning_rate": 0.0001, "loss": 1.0242, "mean_abs_error": 356.0295172856403, "mean_abs_error_last_10": 128.6535226641551, "mean_abs_error_last_25": 141.52194162449388, "mean_abs_error_last_50": 191.40117764120507, "mean_pred_prob": 0.030963010410778223, "mean_pred_prob_last_10": 0.16423486098647117, "mean_pred_prob_last_25": 0.08928160639479757, "mean_pred_prob_last_50": 0.05352379465475678, "mean_token_accuracy": 0.8668808281421662, "step": 6900 }, { "epoch": 0.1228378930901463, "grad_norm": 1.7029445536071008, "learning_rate": 0.0001, "loss": 1.0181, "mean_abs_error": 555.5675207724795, "mean_abs_error_last_10": 84.17602640976318, "mean_abs_error_last_25": 144.79862872543663, "mean_abs_error_last_50": 263.65392124181164, "mean_pred_prob": 0.03125435144174844, "mean_pred_prob_last_10": 0.17016230672597885, "mean_pred_prob_last_25": 0.0911023617722094, "mean_pred_prob_last_50": 0.05396441137418151, "mean_token_accuracy": 0.8632115423679352, "step": 6910 }, { "epoch": 0.12301566138694825, "grad_norm": 1.4311709388026017, "learning_rate": 0.0001, "loss": 0.9442, "mean_abs_error": 644.943195780336, "mean_abs_error_last_10": 198.87760933301666, "mean_abs_error_last_25": 244.88958657840053, "mean_abs_error_last_50": 349.77907416800684, "mean_pred_prob": 0.034975671902066095, "mean_pred_prob_last_10": 0.1822535629849881, "mean_pred_prob_last_25": 0.09951813067309559, "mean_pred_prob_last_50": 0.06016816012561321, "mean_token_accuracy": 0.8856506586074829, "step": 6920 }, { "epoch": 0.1231934296837502, "grad_norm": 0.990030126749568, "learning_rate": 0.0001, "loss": 1.0294, "mean_abs_error": 867.6966970721439, "mean_abs_error_last_10": 433.9533578722949, "mean_abs_error_last_25": 411.39130299178726, "mean_abs_error_last_50": 525.0658618839313, "mean_pred_prob": 0.023283020325470714, "mean_pred_prob_last_10": 0.11943479805486276, "mean_pred_prob_last_25": 0.06711705685593188, "mean_pred_prob_last_50": 0.040226695348974315, "mean_token_accuracy": 0.8730813980102539, "step": 6930 }, { "epoch": 0.12337119798055216, "grad_norm": 1.2105397658591848, "learning_rate": 0.0001, "loss": 1.0794, "mean_abs_error": 534.4979977596333, "mean_abs_error_last_10": 183.96967759967265, "mean_abs_error_last_25": 233.20161535885526, "mean_abs_error_last_50": 325.92612203299365, "mean_pred_prob": 0.01853646351955831, "mean_pred_prob_last_10": 0.10505666416138411, "mean_pred_prob_last_25": 0.05490374369546771, "mean_pred_prob_last_50": 0.03237405591644347, "mean_token_accuracy": 0.8772629797458649, "step": 6940 }, { "epoch": 0.1235489662773541, "grad_norm": 1.0712089310094008, "learning_rate": 0.0001, "loss": 1.0074, "mean_abs_error": 915.0167939549814, "mean_abs_error_last_10": 400.9193315921432, "mean_abs_error_last_25": 488.8565334134587, "mean_abs_error_last_50": 634.2972538130505, "mean_pred_prob": 0.02568076277675573, "mean_pred_prob_last_10": 0.1424980819807388, "mean_pred_prob_last_25": 0.07660176277568098, "mean_pred_prob_last_50": 0.04480982365494128, "mean_token_accuracy": 0.8704700112342835, "step": 6950 }, { "epoch": 0.12372673457415605, "grad_norm": 1.9470287679377616, "learning_rate": 0.0001, "loss": 1.0434, "mean_abs_error": 922.4263510891094, "mean_abs_error_last_10": 507.4307846156963, "mean_abs_error_last_25": 613.5011409538321, "mean_abs_error_last_50": 697.7325543826998, "mean_pred_prob": 0.03856245648057666, "mean_pred_prob_last_10": 0.19725361560704185, "mean_pred_prob_last_25": 0.10875212931714487, "mean_pred_prob_last_50": 0.06543034748028731, "mean_token_accuracy": 0.8779070198535919, "step": 6960 }, { "epoch": 0.123904502870958, "grad_norm": 1.024755434080857, "learning_rate": 0.0001, "loss": 1.0542, "mean_abs_error": 128.14773284484554, "mean_abs_error_last_10": 26.693288919570904, "mean_abs_error_last_25": 77.84610675810436, "mean_abs_error_last_50": 101.01841849602579, "mean_pred_prob": 0.03742154836654663, "mean_pred_prob_last_10": 0.19462176486849786, "mean_pred_prob_last_25": 0.10673657692968845, "mean_pred_prob_last_50": 0.06445392444729806, "mean_token_accuracy": 0.8739614844322204, "step": 6970 }, { "epoch": 0.12408227116775994, "grad_norm": 1.3849389593670982, "learning_rate": 0.0001, "loss": 1.0605, "mean_abs_error": 235.0127022548228, "mean_abs_error_last_10": 101.9471835751577, "mean_abs_error_last_25": 130.48563230893018, "mean_abs_error_last_50": 174.14417752163297, "mean_pred_prob": 0.03729690304026008, "mean_pred_prob_last_10": 0.18471907787024974, "mean_pred_prob_last_25": 0.10394950490444899, "mean_pred_prob_last_50": 0.0634116556495428, "mean_token_accuracy": 0.879130756855011, "step": 6980 }, { "epoch": 0.12426003946456189, "grad_norm": 0.6886604871668894, "learning_rate": 0.0001, "loss": 1.072, "mean_abs_error": 521.4388180023717, "mean_abs_error_last_10": 190.69151349940574, "mean_abs_error_last_25": 262.552727074386, "mean_abs_error_last_50": 363.7645936039134, "mean_pred_prob": 0.03362932293966878, "mean_pred_prob_last_10": 0.18169659067643806, "mean_pred_prob_last_25": 0.09746629753499292, "mean_pred_prob_last_50": 0.05784918750287034, "mean_token_accuracy": 0.8632077813148499, "step": 6990 }, { "epoch": 0.12443780776136384, "grad_norm": 1.3885008440542117, "learning_rate": 0.0001, "loss": 0.977, "mean_abs_error": 624.531314956447, "mean_abs_error_last_10": 200.03694020663085, "mean_abs_error_last_25": 281.579119670379, "mean_abs_error_last_50": 398.6851219707265, "mean_pred_prob": 0.032755226502195, "mean_pred_prob_last_10": 0.17120496186544187, "mean_pred_prob_last_25": 0.09314940753392875, "mean_pred_prob_last_50": 0.056172088926541616, "mean_token_accuracy": 0.8799328684806824, "step": 7000 }, { "epoch": 0.12461557605816578, "grad_norm": 1.3360350921830972, "learning_rate": 0.0001, "loss": 1.056, "mean_abs_error": 1692.1375850183165, "mean_abs_error_last_10": 966.4983920031007, "mean_abs_error_last_25": 1038.6501518313476, "mean_abs_error_last_50": 1227.7631940242104, "mean_pred_prob": 0.025993701422703453, "mean_pred_prob_last_10": 0.13877926309942268, "mean_pred_prob_last_25": 0.0751517840413726, "mean_pred_prob_last_50": 0.04459753810660914, "mean_token_accuracy": 0.8692611515522003, "step": 7010 }, { "epoch": 0.12479334435496774, "grad_norm": 3.270433604792075, "learning_rate": 0.0001, "loss": 0.9867, "mean_abs_error": 1114.1075531224278, "mean_abs_error_last_10": 478.1575781602843, "mean_abs_error_last_25": 535.8054268260266, "mean_abs_error_last_50": 681.6667219451043, "mean_pred_prob": 0.023313859829795548, "mean_pred_prob_last_10": 0.12879107955959626, "mean_pred_prob_last_25": 0.0684860776498681, "mean_pred_prob_last_50": 0.04070460629009176, "mean_token_accuracy": 0.8867936849594116, "step": 7020 }, { "epoch": 0.12497111265176969, "grad_norm": 1.5952544075924862, "learning_rate": 0.0001, "loss": 1.03, "mean_abs_error": 605.394199462055, "mean_abs_error_last_10": 313.25225295195867, "mean_abs_error_last_25": 313.07381842867653, "mean_abs_error_last_50": 328.16353031969965, "mean_pred_prob": 0.019606550491880627, "mean_pred_prob_last_10": 0.10698488445486873, "mean_pred_prob_last_25": 0.05725224378984421, "mean_pred_prob_last_50": 0.034058920911047605, "mean_token_accuracy": 0.8752908289432526, "step": 7030 }, { "epoch": 0.12514888094857163, "grad_norm": 1.7465348191687096, "learning_rate": 0.0001, "loss": 1.0345, "mean_abs_error": 201.20255527652853, "mean_abs_error_last_10": 54.24749945139022, "mean_abs_error_last_25": 73.22494789951789, "mean_abs_error_last_50": 105.23451440430306, "mean_pred_prob": 0.029758195858448744, "mean_pred_prob_last_10": 0.16025571078062056, "mean_pred_prob_last_25": 0.08485908936709166, "mean_pred_prob_last_50": 0.05083645256236195, "mean_token_accuracy": 0.8782920181751251, "step": 7040 }, { "epoch": 0.12532664924537357, "grad_norm": 1.2819244420034028, "learning_rate": 0.0001, "loss": 1.011, "mean_abs_error": 1123.710546159976, "mean_abs_error_last_10": 560.5944208832385, "mean_abs_error_last_25": 648.637605646113, "mean_abs_error_last_50": 840.9619372041377, "mean_pred_prob": 0.029299370318767615, "mean_pred_prob_last_10": 0.1533305078628473, "mean_pred_prob_last_25": 0.08292685190244811, "mean_pred_prob_last_50": 0.0499765447282698, "mean_token_accuracy": 0.8654457032680511, "step": 7050 }, { "epoch": 0.12550441754217553, "grad_norm": 1.8567773696634846, "learning_rate": 0.0001, "loss": 1.0856, "mean_abs_error": 1587.3961817303698, "mean_abs_error_last_10": 867.8339259819234, "mean_abs_error_last_25": 935.6329366336304, "mean_abs_error_last_50": 1135.951045224001, "mean_pred_prob": 0.0249953655351419, "mean_pred_prob_last_10": 0.1176054905707133, "mean_pred_prob_last_25": 0.06603113928722451, "mean_pred_prob_last_50": 0.04110174775851192, "mean_token_accuracy": 0.8644546508789063, "step": 7060 }, { "epoch": 0.12568218583897747, "grad_norm": 1.1197197701462633, "learning_rate": 0.0001, "loss": 1.0235, "mean_abs_error": 680.7182064331321, "mean_abs_error_last_10": 166.2360168693995, "mean_abs_error_last_25": 221.9522273805187, "mean_abs_error_last_50": 370.5869800014017, "mean_pred_prob": 0.026394860213622452, "mean_pred_prob_last_10": 0.1428583315340802, "mean_pred_prob_last_25": 0.07621719175949693, "mean_pred_prob_last_50": 0.04560897130286321, "mean_token_accuracy": 0.8670341670513153, "step": 7070 }, { "epoch": 0.1258599541357794, "grad_norm": 1.241626912427655, "learning_rate": 0.0001, "loss": 1.0195, "mean_abs_error": 416.7808442740958, "mean_abs_error_last_10": 108.4142144177338, "mean_abs_error_last_25": 143.4145821209291, "mean_abs_error_last_50": 200.19439243938777, "mean_pred_prob": 0.02674016370438039, "mean_pred_prob_last_10": 0.13056617472320795, "mean_pred_prob_last_25": 0.07231214307248593, "mean_pred_prob_last_50": 0.044864509347826244, "mean_token_accuracy": 0.8727743029594421, "step": 7080 }, { "epoch": 0.12603772243258138, "grad_norm": 1.2833303244607979, "learning_rate": 0.0001, "loss": 1.056, "mean_abs_error": 545.7707083274968, "mean_abs_error_last_10": 187.99575901449347, "mean_abs_error_last_25": 248.53788746010633, "mean_abs_error_last_50": 329.60708746779704, "mean_pred_prob": 0.0271534935105592, "mean_pred_prob_last_10": 0.14195550334407017, "mean_pred_prob_last_25": 0.07723347722785548, "mean_pred_prob_last_50": 0.04664545793784782, "mean_token_accuracy": 0.869595718383789, "step": 7090 }, { "epoch": 0.12621549072938332, "grad_norm": 1.0204266491637233, "learning_rate": 0.0001, "loss": 0.9745, "mean_abs_error": 304.7794708322223, "mean_abs_error_last_10": 90.73988955821234, "mean_abs_error_last_25": 117.90326032717749, "mean_abs_error_last_50": 201.4833393754653, "mean_pred_prob": 0.026862612552940847, "mean_pred_prob_last_10": 0.1422497395426035, "mean_pred_prob_last_25": 0.07704962473362684, "mean_pred_prob_last_50": 0.04652189128100872, "mean_token_accuracy": 0.879022866487503, "step": 7100 }, { "epoch": 0.12639325902618528, "grad_norm": 1.6218368463336046, "learning_rate": 0.0001, "loss": 0.9778, "mean_abs_error": 874.5707281909332, "mean_abs_error_last_10": 368.8064963027879, "mean_abs_error_last_25": 471.06458388270914, "mean_abs_error_last_50": 583.0002966776931, "mean_pred_prob": 0.014869731574435718, "mean_pred_prob_last_10": 0.08946654622559436, "mean_pred_prob_last_25": 0.045561355212703346, "mean_pred_prob_last_50": 0.02636160720139742, "mean_token_accuracy": 0.8725643575191497, "step": 7110 }, { "epoch": 0.12657102732298722, "grad_norm": 1.6016052686958864, "learning_rate": 0.0001, "loss": 1.0143, "mean_abs_error": 514.0891475075234, "mean_abs_error_last_10": 169.3395088368605, "mean_abs_error_last_25": 225.36893573925326, "mean_abs_error_last_50": 254.65313578506084, "mean_pred_prob": 0.02732297987677157, "mean_pred_prob_last_10": 0.14800260737538337, "mean_pred_prob_last_25": 0.08071344112977386, "mean_pred_prob_last_50": 0.04757651234976947, "mean_token_accuracy": 0.8756812751293183, "step": 7120 }, { "epoch": 0.12674879561978916, "grad_norm": 1.0231097853158015, "learning_rate": 0.0001, "loss": 1.0429, "mean_abs_error": 1024.5497754361527, "mean_abs_error_last_10": 341.3440499975526, "mean_abs_error_last_25": 539.6544453292885, "mean_abs_error_last_50": 647.68366900582, "mean_pred_prob": 0.0193986921251053, "mean_pred_prob_last_10": 0.11372510342625901, "mean_pred_prob_last_25": 0.05773286878829822, "mean_pred_prob_last_50": 0.03384265374625102, "mean_token_accuracy": 0.870025384426117, "step": 7130 }, { "epoch": 0.12692656391659113, "grad_norm": 0.8875667644130575, "learning_rate": 0.0001, "loss": 0.9309, "mean_abs_error": 142.11123635146038, "mean_abs_error_last_10": 44.11945913481061, "mean_abs_error_last_25": 73.84416661714664, "mean_abs_error_last_50": 118.42563688677299, "mean_pred_prob": 0.044349503424018624, "mean_pred_prob_last_10": 0.223784252256155, "mean_pred_prob_last_25": 0.12349377237260342, "mean_pred_prob_last_50": 0.07517472878098488, "mean_token_accuracy": 0.879103422164917, "step": 7140 }, { "epoch": 0.12710433221339307, "grad_norm": 1.4544144945069661, "learning_rate": 0.0001, "loss": 1.0244, "mean_abs_error": 1223.8599075387513, "mean_abs_error_last_10": 696.726575884279, "mean_abs_error_last_25": 869.6566477081478, "mean_abs_error_last_50": 987.5264995494351, "mean_pred_prob": 0.034983265519258566, "mean_pred_prob_last_10": 0.17838786302600057, "mean_pred_prob_last_25": 0.09915599916275823, "mean_pred_prob_last_50": 0.05969868096581195, "mean_token_accuracy": 0.8681376576423645, "step": 7150 }, { "epoch": 0.127282100510195, "grad_norm": 1.3243815381094848, "learning_rate": 0.0001, "loss": 0.9711, "mean_abs_error": 326.71762358627336, "mean_abs_error_last_10": 85.25933583389184, "mean_abs_error_last_25": 141.62483960407354, "mean_abs_error_last_50": 200.2337325942455, "mean_pred_prob": 0.03329826542176306, "mean_pred_prob_last_10": 0.17502080630511047, "mean_pred_prob_last_25": 0.09591192388907074, "mean_pred_prob_last_50": 0.057160034449771045, "mean_token_accuracy": 0.8720529913902283, "step": 7160 }, { "epoch": 0.12745986880699697, "grad_norm": 1.4356281718662973, "learning_rate": 0.0001, "loss": 0.9956, "mean_abs_error": 379.1648159606721, "mean_abs_error_last_10": 67.87937012335371, "mean_abs_error_last_25": 86.07350416300696, "mean_abs_error_last_50": 145.29264225569253, "mean_pred_prob": 0.03184756580740213, "mean_pred_prob_last_10": 0.17432451993227005, "mean_pred_prob_last_25": 0.0928506201133132, "mean_pred_prob_last_50": 0.05541362054646015, "mean_token_accuracy": 0.8770717144012451, "step": 7170 }, { "epoch": 0.1276376371037989, "grad_norm": 4.049822022002851, "learning_rate": 0.0001, "loss": 1.031, "mean_abs_error": 1288.6831417166238, "mean_abs_error_last_10": 679.8389798876842, "mean_abs_error_last_25": 796.4587666798235, "mean_abs_error_last_50": 985.7619596060628, "mean_pred_prob": 0.025802685209782794, "mean_pred_prob_last_10": 0.14411484263546298, "mean_pred_prob_last_25": 0.07696699784573865, "mean_pred_prob_last_50": 0.04513317316304892, "mean_token_accuracy": 0.872083204984665, "step": 7180 }, { "epoch": 0.12781540540060085, "grad_norm": 0.9826831843764169, "learning_rate": 0.0001, "loss": 1.0202, "mean_abs_error": 811.5730127472448, "mean_abs_error_last_10": 202.52916994971255, "mean_abs_error_last_25": 296.55101141929225, "mean_abs_error_last_50": 510.8797735492687, "mean_pred_prob": 0.025901206565322354, "mean_pred_prob_last_10": 0.13276820713072085, "mean_pred_prob_last_25": 0.07269556357059628, "mean_pred_prob_last_50": 0.04407591160852462, "mean_token_accuracy": 0.8777068257331848, "step": 7190 }, { "epoch": 0.12799317369740282, "grad_norm": 1.4002354340117074, "learning_rate": 0.0001, "loss": 0.9938, "mean_abs_error": 1062.2153130104573, "mean_abs_error_last_10": 474.5701280210542, "mean_abs_error_last_25": 660.4814219701357, "mean_abs_error_last_50": 833.6205042076715, "mean_pred_prob": 0.03050224614853505, "mean_pred_prob_last_10": 0.15619147435645572, "mean_pred_prob_last_25": 0.0854761129769031, "mean_pred_prob_last_50": 0.051890690624713895, "mean_token_accuracy": 0.8733309745788574, "step": 7200 }, { "epoch": 0.12817094199420476, "grad_norm": 0.8877239832898065, "learning_rate": 0.0001, "loss": 0.9781, "mean_abs_error": 1161.1684662452762, "mean_abs_error_last_10": 558.039649674952, "mean_abs_error_last_25": 632.2707809003833, "mean_abs_error_last_50": 820.2089213729703, "mean_pred_prob": 0.03132189711323008, "mean_pred_prob_last_10": 0.16058988235890864, "mean_pred_prob_last_25": 0.0878215600096155, "mean_pred_prob_last_50": 0.05379477463138756, "mean_token_accuracy": 0.8665588617324829, "step": 7210 }, { "epoch": 0.1283487102910067, "grad_norm": 0.7128555393361087, "learning_rate": 0.0001, "loss": 1.0064, "mean_abs_error": 670.32988768633, "mean_abs_error_last_10": 229.94905369528777, "mean_abs_error_last_25": 268.49422814652064, "mean_abs_error_last_50": 405.4136553234805, "mean_pred_prob": 0.01807222898933105, "mean_pred_prob_last_10": 0.10723383461590856, "mean_pred_prob_last_25": 0.05447193484287709, "mean_pred_prob_last_50": 0.03179777131881565, "mean_token_accuracy": 0.8807110726833344, "step": 7220 }, { "epoch": 0.12852647858780866, "grad_norm": 1.44217608997693, "learning_rate": 0.0001, "loss": 0.9581, "mean_abs_error": 54.117689144824645, "mean_abs_error_last_10": 8.060831868158184, "mean_abs_error_last_25": 20.79969141026897, "mean_abs_error_last_50": 36.420418851515265, "mean_pred_prob": 0.05298251397907734, "mean_pred_prob_last_10": 0.2631178319454193, "mean_pred_prob_last_25": 0.14692569449543952, "mean_pred_prob_last_50": 0.08985503762960434, "mean_token_accuracy": 0.8747910618782043, "step": 7230 }, { "epoch": 0.1287042468846106, "grad_norm": 1.0638867960658789, "learning_rate": 0.0001, "loss": 1.0396, "mean_abs_error": 709.1394131892714, "mean_abs_error_last_10": 229.89729243822347, "mean_abs_error_last_25": 287.59879626615384, "mean_abs_error_last_50": 403.03803828561934, "mean_pred_prob": 0.02941939905867912, "mean_pred_prob_last_10": 0.1598489630036056, "mean_pred_prob_last_25": 0.08570069064153359, "mean_pred_prob_last_50": 0.05088133094832301, "mean_token_accuracy": 0.8723481714725494, "step": 7240 }, { "epoch": 0.12888201518141254, "grad_norm": 0.8531977055002549, "learning_rate": 0.0001, "loss": 1.0056, "mean_abs_error": 263.70560191470634, "mean_abs_error_last_10": 130.53314201214812, "mean_abs_error_last_25": 180.24360379808235, "mean_abs_error_last_50": 194.23030444484056, "mean_pred_prob": 0.035265228152275084, "mean_pred_prob_last_10": 0.1796920219436288, "mean_pred_prob_last_25": 0.09996328735724092, "mean_pred_prob_last_50": 0.06036362466402352, "mean_token_accuracy": 0.8740422666072846, "step": 7250 }, { "epoch": 0.1290597834782145, "grad_norm": 1.163766931316086, "learning_rate": 0.0001, "loss": 1.0464, "mean_abs_error": 763.1450119949117, "mean_abs_error_last_10": 374.09126983156494, "mean_abs_error_last_25": 440.2699236418979, "mean_abs_error_last_50": 535.623972626666, "mean_pred_prob": 0.03946372137870639, "mean_pred_prob_last_10": 0.20372071737074293, "mean_pred_prob_last_25": 0.11340387610543985, "mean_pred_prob_last_50": 0.0684450342261698, "mean_token_accuracy": 0.8705991387367249, "step": 7260 }, { "epoch": 0.12923755177501645, "grad_norm": 2.025018311790723, "learning_rate": 0.0001, "loss": 0.9759, "mean_abs_error": 417.4470299784556, "mean_abs_error_last_10": 83.00114472842685, "mean_abs_error_last_25": 131.55944096849788, "mean_abs_error_last_50": 217.97857144721215, "mean_pred_prob": 0.03772405863273889, "mean_pred_prob_last_10": 0.20207023657858372, "mean_pred_prob_last_25": 0.10764936748892069, "mean_pred_prob_last_50": 0.0647955175023526, "mean_token_accuracy": 0.8764799356460571, "step": 7270 }, { "epoch": 0.12941532007181838, "grad_norm": 1.8934603570834374, "learning_rate": 0.0001, "loss": 0.9436, "mean_abs_error": 678.2803221239176, "mean_abs_error_last_10": 460.1486009032568, "mean_abs_error_last_25": 470.17297712281953, "mean_abs_error_last_50": 528.9959719035725, "mean_pred_prob": 0.020873096864670516, "mean_pred_prob_last_10": 0.12324273829581216, "mean_pred_prob_last_25": 0.06232607892015949, "mean_pred_prob_last_50": 0.03635422745719552, "mean_token_accuracy": 0.8775424063205719, "step": 7280 }, { "epoch": 0.12959308836862035, "grad_norm": 1.8545735627694384, "learning_rate": 0.0001, "loss": 1.0308, "mean_abs_error": 748.518645180197, "mean_abs_error_last_10": 200.39543395295416, "mean_abs_error_last_25": 265.07355164294336, "mean_abs_error_last_50": 463.87925130352534, "mean_pred_prob": 0.02158705393958371, "mean_pred_prob_last_10": 0.12626841652672738, "mean_pred_prob_last_25": 0.06618277882225812, "mean_pred_prob_last_50": 0.03776139205438085, "mean_token_accuracy": 0.8676936805248261, "step": 7290 }, { "epoch": 0.1297708566654223, "grad_norm": 1.280717189262386, "learning_rate": 0.0001, "loss": 1.0028, "mean_abs_error": 263.9617960361873, "mean_abs_error_last_10": 165.7970043589539, "mean_abs_error_last_25": 226.5837436013334, "mean_abs_error_last_50": 264.9913222022463, "mean_pred_prob": 0.04844165546819568, "mean_pred_prob_last_10": 0.2397888172417879, "mean_pred_prob_last_25": 0.13622239790856838, "mean_pred_prob_last_50": 0.08324484620243311, "mean_token_accuracy": 0.8762351930141449, "step": 7300 }, { "epoch": 0.12994862496222423, "grad_norm": 2.2311191732639477, "learning_rate": 0.0001, "loss": 1.0386, "mean_abs_error": 377.83422394103457, "mean_abs_error_last_10": 84.72781868852441, "mean_abs_error_last_25": 112.95436687830707, "mean_abs_error_last_50": 174.43251271349112, "mean_pred_prob": 0.04090065809432417, "mean_pred_prob_last_10": 0.18943451009690762, "mean_pred_prob_last_25": 0.11108212638646364, "mean_pred_prob_last_50": 0.06868806574493647, "mean_token_accuracy": 0.8710503935813904, "step": 7310 }, { "epoch": 0.1301263932590262, "grad_norm": 1.3356033850515017, "learning_rate": 0.0001, "loss": 1.0272, "mean_abs_error": 541.304629510296, "mean_abs_error_last_10": 150.01085635717803, "mean_abs_error_last_25": 191.39528324353532, "mean_abs_error_last_50": 302.7654730713639, "mean_pred_prob": 0.016581220971420407, "mean_pred_prob_last_10": 0.09388186000287532, "mean_pred_prob_last_25": 0.04799451930448413, "mean_pred_prob_last_50": 0.028526640264317393, "mean_token_accuracy": 0.8723614096641541, "step": 7320 }, { "epoch": 0.13030416155582814, "grad_norm": 2.159434210951882, "learning_rate": 0.0001, "loss": 0.9899, "mean_abs_error": 514.669924810977, "mean_abs_error_last_10": 157.47724938179132, "mean_abs_error_last_25": 180.1597539795535, "mean_abs_error_last_50": 252.605719792016, "mean_pred_prob": 0.014698981016408652, "mean_pred_prob_last_10": 0.08329494609497487, "mean_pred_prob_last_25": 0.043514070496894416, "mean_pred_prob_last_50": 0.02564294929616153, "mean_token_accuracy": 0.8716165661811829, "step": 7330 }, { "epoch": 0.13048192985263007, "grad_norm": 1.158203353796358, "learning_rate": 0.0001, "loss": 1.0108, "mean_abs_error": 738.1270677339495, "mean_abs_error_last_10": 301.1031477569144, "mean_abs_error_last_25": 399.6358730973865, "mean_abs_error_last_50": 504.2713670140844, "mean_pred_prob": 0.029711239319294692, "mean_pred_prob_last_10": 0.15963999805389903, "mean_pred_prob_last_25": 0.08573797991266474, "mean_pred_prob_last_50": 0.05144454234978184, "mean_token_accuracy": 0.8694653391838074, "step": 7340 }, { "epoch": 0.13065969814943204, "grad_norm": 1.6343611631426485, "learning_rate": 0.0001, "loss": 0.9735, "mean_abs_error": 653.568672138946, "mean_abs_error_last_10": 218.20048487390204, "mean_abs_error_last_25": 218.2797665641243, "mean_abs_error_last_50": 319.2539020738076, "mean_pred_prob": 0.022574137337505816, "mean_pred_prob_last_10": 0.12289872551336885, "mean_pred_prob_last_25": 0.0656773385591805, "mean_pred_prob_last_50": 0.039161273697391155, "mean_token_accuracy": 0.8780977010726929, "step": 7350 }, { "epoch": 0.13083746644623398, "grad_norm": 1.492584257193712, "learning_rate": 0.0001, "loss": 0.9869, "mean_abs_error": 708.5031621422984, "mean_abs_error_last_10": 222.42160004229186, "mean_abs_error_last_25": 285.2550864884097, "mean_abs_error_last_50": 437.42205606149884, "mean_pred_prob": 0.01700433061923832, "mean_pred_prob_last_10": 0.09866151474416256, "mean_pred_prob_last_25": 0.050703726895153525, "mean_pred_prob_last_50": 0.029749100003391503, "mean_token_accuracy": 0.8730491697788239, "step": 7360 }, { "epoch": 0.13101523474303592, "grad_norm": 1.2094889756323293, "learning_rate": 0.0001, "loss": 0.9281, "mean_abs_error": 304.2450089980754, "mean_abs_error_last_10": 93.63259188475682, "mean_abs_error_last_25": 116.03998607034589, "mean_abs_error_last_50": 178.73749683165198, "mean_pred_prob": 0.031134253926575185, "mean_pred_prob_last_10": 0.1613827059045434, "mean_pred_prob_last_25": 0.08877750169485807, "mean_pred_prob_last_50": 0.0535814942792058, "mean_token_accuracy": 0.8872477889060975, "step": 7370 }, { "epoch": 0.13119300303983789, "grad_norm": 2.2893004145435736, "learning_rate": 0.0001, "loss": 1.0501, "mean_abs_error": 989.2434655174642, "mean_abs_error_last_10": 347.6091129742661, "mean_abs_error_last_25": 507.9662243630293, "mean_abs_error_last_50": 691.3772121730383, "mean_pred_prob": 0.02787337748159189, "mean_pred_prob_last_10": 0.149124183220556, "mean_pred_prob_last_25": 0.08130335868918337, "mean_pred_prob_last_50": 0.04759950141014997, "mean_token_accuracy": 0.8809172868728637, "step": 7380 }, { "epoch": 0.13137077133663982, "grad_norm": 0.9117888339883461, "learning_rate": 0.0001, "loss": 1.0036, "mean_abs_error": 391.79208433625615, "mean_abs_error_last_10": 119.90647192796533, "mean_abs_error_last_25": 168.1947770782683, "mean_abs_error_last_50": 217.13084897613726, "mean_pred_prob": 0.03016528906300664, "mean_pred_prob_last_10": 0.1633408021181822, "mean_pred_prob_last_25": 0.08699119668453932, "mean_pred_prob_last_50": 0.05191663773730397, "mean_token_accuracy": 0.8698421478271484, "step": 7390 }, { "epoch": 0.13154853963344176, "grad_norm": 1.5931624528606814, "learning_rate": 0.0001, "loss": 0.9453, "mean_abs_error": 510.4277734239398, "mean_abs_error_last_10": 176.31182902979089, "mean_abs_error_last_25": 213.8458926870761, "mean_abs_error_last_50": 307.30919564255566, "mean_pred_prob": 0.03025385668152012, "mean_pred_prob_last_10": 0.16765635835472495, "mean_pred_prob_last_25": 0.08966895960038528, "mean_pred_prob_last_50": 0.05290495690423995, "mean_token_accuracy": 0.8769096195697784, "step": 7400 }, { "epoch": 0.13172630793024373, "grad_norm": 1.3653695186525818, "learning_rate": 0.0001, "loss": 0.9534, "mean_abs_error": 864.7779183689248, "mean_abs_error_last_10": 410.20834866324685, "mean_abs_error_last_25": 499.0514136814005, "mean_abs_error_last_50": 623.4347379144672, "mean_pred_prob": 0.028185926529113205, "mean_pred_prob_last_10": 0.15702424065675585, "mean_pred_prob_last_25": 0.08309083041094709, "mean_pred_prob_last_50": 0.048474627852556296, "mean_token_accuracy": 0.8822492480278015, "step": 7410 }, { "epoch": 0.13190407622704567, "grad_norm": 1.374072745500582, "learning_rate": 0.0001, "loss": 1.0729, "mean_abs_error": 924.7257986008639, "mean_abs_error_last_10": 297.6521663359374, "mean_abs_error_last_25": 466.10807581643564, "mean_abs_error_last_50": 610.4154539757926, "mean_pred_prob": 0.02470778786082519, "mean_pred_prob_last_10": 0.13595938050420955, "mean_pred_prob_last_25": 0.07337264628149569, "mean_pred_prob_last_50": 0.04311222772230394, "mean_token_accuracy": 0.8778780341148377, "step": 7420 }, { "epoch": 0.1320818445238476, "grad_norm": 1.1541444314559755, "learning_rate": 0.0001, "loss": 1.0384, "mean_abs_error": 620.3362268090336, "mean_abs_error_last_10": 259.26619097622813, "mean_abs_error_last_25": 330.3295538618434, "mean_abs_error_last_50": 382.984179949458, "mean_pred_prob": 0.02758078493643552, "mean_pred_prob_last_10": 0.14687142558395863, "mean_pred_prob_last_25": 0.08028578348457813, "mean_pred_prob_last_50": 0.04794441186822951, "mean_token_accuracy": 0.8706886827945709, "step": 7430 }, { "epoch": 0.13225961282064957, "grad_norm": 1.9334606191323098, "learning_rate": 0.0001, "loss": 1.0895, "mean_abs_error": 437.3468225300111, "mean_abs_error_last_10": 173.80888777753262, "mean_abs_error_last_25": 221.37689136565191, "mean_abs_error_last_50": 278.1944849629976, "mean_pred_prob": 0.030410420754924415, "mean_pred_prob_last_10": 0.15068505927920342, "mean_pred_prob_last_25": 0.08494576821103691, "mean_pred_prob_last_50": 0.05192525717429817, "mean_token_accuracy": 0.8769234001636506, "step": 7440 }, { "epoch": 0.1324373811174515, "grad_norm": 3.011942227907046, "learning_rate": 0.0001, "loss": 1.0265, "mean_abs_error": 232.79600286719543, "mean_abs_error_last_10": 167.59305256658587, "mean_abs_error_last_25": 241.99508786022102, "mean_abs_error_last_50": 206.6084170721311, "mean_pred_prob": 0.044330699183046815, "mean_pred_prob_last_10": 0.2252843076363206, "mean_pred_prob_last_25": 0.12595556722953916, "mean_pred_prob_last_50": 0.07601102045737207, "mean_token_accuracy": 0.8644443213939667, "step": 7450 }, { "epoch": 0.13261514941425345, "grad_norm": 1.6615449492345773, "learning_rate": 0.0001, "loss": 0.9821, "mean_abs_error": 266.92199941812635, "mean_abs_error_last_10": 72.48792594040552, "mean_abs_error_last_25": 82.31760826183925, "mean_abs_error_last_50": 132.16311348353048, "mean_pred_prob": 0.042485198774375024, "mean_pred_prob_last_10": 0.22939054481685162, "mean_pred_prob_last_25": 0.12346241772174835, "mean_pred_prob_last_50": 0.07346006194129587, "mean_token_accuracy": 0.855544638633728, "step": 7460 }, { "epoch": 0.13279291771105542, "grad_norm": 2.831054483826988, "learning_rate": 0.0001, "loss": 1.0239, "mean_abs_error": 469.242554290151, "mean_abs_error_last_10": 119.22521781706624, "mean_abs_error_last_25": 146.4873396433848, "mean_abs_error_last_50": 200.20860617882775, "mean_pred_prob": 0.025611169321928175, "mean_pred_prob_last_10": 0.1326808482874185, "mean_pred_prob_last_25": 0.07324531651102006, "mean_pred_prob_last_50": 0.044366909004747865, "mean_token_accuracy": 0.8719708383083343, "step": 7470 }, { "epoch": 0.13297068600785736, "grad_norm": 0.9858594224038777, "learning_rate": 0.0001, "loss": 1.0267, "mean_abs_error": 767.47819082304, "mean_abs_error_last_10": 252.73677270636466, "mean_abs_error_last_25": 310.0625740032468, "mean_abs_error_last_50": 436.9187354343259, "mean_pred_prob": 0.022498035701573826, "mean_pred_prob_last_10": 0.1253992690355517, "mean_pred_prob_last_25": 0.0660878598340787, "mean_pred_prob_last_50": 0.03901787942741066, "mean_token_accuracy": 0.867507404088974, "step": 7480 }, { "epoch": 0.1331484543046593, "grad_norm": 4.113710963624302, "learning_rate": 0.0001, "loss": 1.0762, "mean_abs_error": 438.32768675966855, "mean_abs_error_last_10": 218.3858326187054, "mean_abs_error_last_25": 212.7800219689504, "mean_abs_error_last_50": 243.51386168285518, "mean_pred_prob": 0.038455183728365226, "mean_pred_prob_last_10": 0.18440518353600055, "mean_pred_prob_last_25": 0.10483346360269934, "mean_pred_prob_last_50": 0.0652894229744561, "mean_token_accuracy": 0.8632186770439148, "step": 7490 }, { "epoch": 0.13332622260146126, "grad_norm": 0.9624746923875346, "learning_rate": 0.0001, "loss": 0.9643, "mean_abs_error": 343.1918860064096, "mean_abs_error_last_10": 63.203861434286274, "mean_abs_error_last_25": 83.79680368556126, "mean_abs_error_last_50": 157.09181050355454, "mean_pred_prob": 0.03552199348341674, "mean_pred_prob_last_10": 0.20240882690995932, "mean_pred_prob_last_25": 0.10555149503052234, "mean_pred_prob_last_50": 0.06221979237161577, "mean_token_accuracy": 0.8774604678153992, "step": 7500 }, { "epoch": 0.1335039908982632, "grad_norm": 1.7481828589120527, "learning_rate": 0.0001, "loss": 1.0303, "mean_abs_error": 447.74722211885194, "mean_abs_error_last_10": 111.30177476989797, "mean_abs_error_last_25": 156.0582722360727, "mean_abs_error_last_50": 258.40549239350736, "mean_pred_prob": 0.034016409004107115, "mean_pred_prob_last_10": 0.15903823543339968, "mean_pred_prob_last_25": 0.09272427726536989, "mean_pred_prob_last_50": 0.05748579683713615, "mean_token_accuracy": 0.8746147572994232, "step": 7510 }, { "epoch": 0.13368175919506514, "grad_norm": 1.1998062580697644, "learning_rate": 0.0001, "loss": 1.0514, "mean_abs_error": 1028.042321218775, "mean_abs_error_last_10": 512.085403983121, "mean_abs_error_last_25": 576.9269508444024, "mean_abs_error_last_50": 695.7459576826825, "mean_pred_prob": 0.02139370912918821, "mean_pred_prob_last_10": 0.11587152632710059, "mean_pred_prob_last_25": 0.06275692256458569, "mean_pred_prob_last_50": 0.03728551875683479, "mean_token_accuracy": 0.8663444459438324, "step": 7520 }, { "epoch": 0.1338595274918671, "grad_norm": 0.8372834465781208, "learning_rate": 0.0001, "loss": 0.9746, "mean_abs_error": 195.50194880764596, "mean_abs_error_last_10": 72.230978307409, "mean_abs_error_last_25": 135.90374806059194, "mean_abs_error_last_50": 153.75031912298732, "mean_pred_prob": 0.035869505070149896, "mean_pred_prob_last_10": 0.17885990925133227, "mean_pred_prob_last_25": 0.0992991903796792, "mean_pred_prob_last_50": 0.06109837079420686, "mean_token_accuracy": 0.8728898882865905, "step": 7530 }, { "epoch": 0.13403729578866905, "grad_norm": 1.253802702808421, "learning_rate": 0.0001, "loss": 0.9347, "mean_abs_error": 1369.9702829177302, "mean_abs_error_last_10": 977.7253578444231, "mean_abs_error_last_25": 1027.0339415619105, "mean_abs_error_last_50": 1092.3154539757184, "mean_pred_prob": 0.025649217949830928, "mean_pred_prob_last_10": 0.13190106739202748, "mean_pred_prob_last_25": 0.07213632339262403, "mean_pred_prob_last_50": 0.04374123525485629, "mean_token_accuracy": 0.8749939203262329, "step": 7540 }, { "epoch": 0.134215064085471, "grad_norm": 1.2357454202869536, "learning_rate": 0.0001, "loss": 0.9946, "mean_abs_error": 299.00109977642063, "mean_abs_error_last_10": 75.56750956052865, "mean_abs_error_last_25": 190.85691170518987, "mean_abs_error_last_50": 245.34413093248676, "mean_pred_prob": 0.031672802241519096, "mean_pred_prob_last_10": 0.16769991852343083, "mean_pred_prob_last_25": 0.09043459482491016, "mean_pred_prob_last_50": 0.05456355959177017, "mean_token_accuracy": 0.8683643758296966, "step": 7550 }, { "epoch": 0.13439283238227295, "grad_norm": 1.2476948282317473, "learning_rate": 0.0001, "loss": 0.9965, "mean_abs_error": 738.3234778811191, "mean_abs_error_last_10": 271.4174613879205, "mean_abs_error_last_25": 317.9154286541426, "mean_abs_error_last_50": 436.20516696630466, "mean_pred_prob": 0.023923257569549605, "mean_pred_prob_last_10": 0.1276574231334962, "mean_pred_prob_last_25": 0.06808031913824379, "mean_pred_prob_last_50": 0.040988129907054825, "mean_token_accuracy": 0.8696106970310211, "step": 7560 }, { "epoch": 0.1345706006790749, "grad_norm": 1.1980179306813359, "learning_rate": 0.0001, "loss": 1.0314, "mean_abs_error": 263.4062943152869, "mean_abs_error_last_10": 126.6870969018162, "mean_abs_error_last_25": 110.34190109688295, "mean_abs_error_last_50": 146.76527860187895, "mean_pred_prob": 0.03405646386090666, "mean_pred_prob_last_10": 0.17628007112070918, "mean_pred_prob_last_25": 0.09715975001454354, "mean_pred_prob_last_50": 0.05856317495927214, "mean_token_accuracy": 0.8800053715705871, "step": 7570 }, { "epoch": 0.13474836897587683, "grad_norm": 1.9191387682990144, "learning_rate": 0.0001, "loss": 0.9396, "mean_abs_error": 767.5651046817264, "mean_abs_error_last_10": 332.7991926545238, "mean_abs_error_last_25": 358.85148841928, "mean_abs_error_last_50": 463.99730446095253, "mean_pred_prob": 0.030312120172311552, "mean_pred_prob_last_10": 0.158996420429321, "mean_pred_prob_last_25": 0.08629901247331873, "mean_pred_prob_last_50": 0.05219049404549878, "mean_token_accuracy": 0.8710974395275116, "step": 7580 }, { "epoch": 0.1349261372726788, "grad_norm": 1.8327598847659883, "learning_rate": 0.0001, "loss": 0.9537, "mean_abs_error": 134.11005436705773, "mean_abs_error_last_10": 71.21063922445661, "mean_abs_error_last_25": 63.820576209082574, "mean_abs_error_last_50": 78.73658417576935, "mean_pred_prob": 0.043986250273883344, "mean_pred_prob_last_10": 0.2264338657259941, "mean_pred_prob_last_25": 0.12400385588407517, "mean_pred_prob_last_50": 0.07511499999091029, "mean_token_accuracy": 0.8795453906059265, "step": 7590 }, { "epoch": 0.13510390556948074, "grad_norm": 3.375206431001861, "learning_rate": 0.0001, "loss": 0.9824, "mean_abs_error": 1552.9402825606226, "mean_abs_error_last_10": 798.1582148051318, "mean_abs_error_last_25": 905.1238676718131, "mean_abs_error_last_50": 1161.1937760278327, "mean_pred_prob": 0.021398700714053122, "mean_pred_prob_last_10": 0.11497281575429952, "mean_pred_prob_last_25": 0.062161191544146274, "mean_pred_prob_last_50": 0.03665166522114305, "mean_token_accuracy": 0.8644430816173554, "step": 7600 }, { "epoch": 0.13528167386628268, "grad_norm": 1.4826046826625883, "learning_rate": 0.0001, "loss": 1.0275, "mean_abs_error": 619.0163729792359, "mean_abs_error_last_10": 144.766655123585, "mean_abs_error_last_25": 233.17396623919072, "mean_abs_error_last_50": 293.4146221124821, "mean_pred_prob": 0.029635353898629545, "mean_pred_prob_last_10": 0.15340820690616966, "mean_pred_prob_last_25": 0.0834285712102428, "mean_pred_prob_last_50": 0.050641917739994824, "mean_token_accuracy": 0.875834047794342, "step": 7610 }, { "epoch": 0.13545944216308464, "grad_norm": 2.0815530864393663, "learning_rate": 0.0001, "loss": 1.0478, "mean_abs_error": 256.8980546771073, "mean_abs_error_last_10": 37.95198557139433, "mean_abs_error_last_25": 80.80024774603932, "mean_abs_error_last_50": 127.04053728154568, "mean_pred_prob": 0.034051154367625715, "mean_pred_prob_last_10": 0.17757425159215928, "mean_pred_prob_last_25": 0.09831028003245593, "mean_pred_prob_last_50": 0.058721386920660736, "mean_token_accuracy": 0.8628671824932098, "step": 7620 }, { "epoch": 0.13563721045988658, "grad_norm": 1.2166883954858683, "learning_rate": 0.0001, "loss": 1.0646, "mean_abs_error": 1392.3613107601145, "mean_abs_error_last_10": 740.718093061501, "mean_abs_error_last_25": 873.5037128838827, "mean_abs_error_last_50": 1053.0247593771355, "mean_pred_prob": 0.02438085425965255, "mean_pred_prob_last_10": 0.13801395561313257, "mean_pred_prob_last_25": 0.07232565545855323, "mean_pred_prob_last_50": 0.042343273820006286, "mean_token_accuracy": 0.8665800571441651, "step": 7630 }, { "epoch": 0.13581497875668852, "grad_norm": 2.3096379869648076, "learning_rate": 0.0001, "loss": 0.976, "mean_abs_error": 1330.4993862471297, "mean_abs_error_last_10": 920.8293863761149, "mean_abs_error_last_25": 1007.5993276762771, "mean_abs_error_last_50": 1136.1817797944793, "mean_pred_prob": 0.024589031375217018, "mean_pred_prob_last_10": 0.12913786520075518, "mean_pred_prob_last_25": 0.07091884138644673, "mean_pred_prob_last_50": 0.04258653687502374, "mean_token_accuracy": 0.8725746452808381, "step": 7640 }, { "epoch": 0.1359927470534905, "grad_norm": 1.2273581691598587, "learning_rate": 0.0001, "loss": 0.9771, "mean_abs_error": 737.864767957611, "mean_abs_error_last_10": 272.1562124720082, "mean_abs_error_last_25": 317.0743919820347, "mean_abs_error_last_50": 437.6414172333428, "mean_pred_prob": 0.026158008590573444, "mean_pred_prob_last_10": 0.147047101467615, "mean_pred_prob_last_25": 0.07759236396523192, "mean_pred_prob_last_50": 0.045689606230007486, "mean_token_accuracy": 0.8873255312442779, "step": 7650 }, { "epoch": 0.13617051535029243, "grad_norm": 0.8151138811510322, "learning_rate": 0.0001, "loss": 0.9616, "mean_abs_error": 329.1217637411745, "mean_abs_error_last_10": 223.24208757550664, "mean_abs_error_last_25": 220.34529925157685, "mean_abs_error_last_50": 257.1840108181509, "mean_pred_prob": 0.036948929890058935, "mean_pred_prob_last_10": 0.18617438338696957, "mean_pred_prob_last_25": 0.10308684017509222, "mean_pred_prob_last_50": 0.06293805767782032, "mean_token_accuracy": 0.8712180376052856, "step": 7660 }, { "epoch": 0.13634828364709436, "grad_norm": 1.1688774781049875, "learning_rate": 0.0001, "loss": 1.0274, "mean_abs_error": 446.9813477365559, "mean_abs_error_last_10": 157.53259175971192, "mean_abs_error_last_25": 196.67528187884187, "mean_abs_error_last_50": 250.7439343752726, "mean_pred_prob": 0.029776359326206148, "mean_pred_prob_last_10": 0.16070360597223043, "mean_pred_prob_last_25": 0.08688275143504143, "mean_pred_prob_last_50": 0.05157658001407981, "mean_token_accuracy": 0.8734456241130829, "step": 7670 }, { "epoch": 0.13652605194389633, "grad_norm": 2.0684901946069356, "learning_rate": 0.0001, "loss": 0.9341, "mean_abs_error": 129.2253180292843, "mean_abs_error_last_10": 19.70346598526452, "mean_abs_error_last_25": 44.89910630234321, "mean_abs_error_last_50": 84.55798483724185, "mean_pred_prob": 0.0423933029640466, "mean_pred_prob_last_10": 0.21800039596855642, "mean_pred_prob_last_25": 0.12142270067706704, "mean_pred_prob_last_50": 0.07328796512447297, "mean_token_accuracy": 0.8777812480926513, "step": 7680 }, { "epoch": 0.13670382024069827, "grad_norm": 2.4261764933246517, "learning_rate": 0.0001, "loss": 0.8968, "mean_abs_error": 274.31857729508545, "mean_abs_error_last_10": 105.70475841557058, "mean_abs_error_last_25": 96.76852145804072, "mean_abs_error_last_50": 137.02157224929735, "mean_pred_prob": 0.03178223092108965, "mean_pred_prob_last_10": 0.17472021989524364, "mean_pred_prob_last_25": 0.09311377499252557, "mean_pred_prob_last_50": 0.05483362539671362, "mean_token_accuracy": 0.8775225818157196, "step": 7690 }, { "epoch": 0.1368815885375002, "grad_norm": 2.244441123108288, "learning_rate": 0.0001, "loss": 1.0433, "mean_abs_error": 893.4745620781887, "mean_abs_error_last_10": 298.06191211829116, "mean_abs_error_last_25": 335.8061772744459, "mean_abs_error_last_50": 466.50802205591225, "mean_pred_prob": 0.024444372244761325, "mean_pred_prob_last_10": 0.13033949087839575, "mean_pred_prob_last_25": 0.07110101956641302, "mean_pred_prob_last_50": 0.042640353488968684, "mean_token_accuracy": 0.8757319390773773, "step": 7700 }, { "epoch": 0.13705935683430218, "grad_norm": 1.8643114962886742, "learning_rate": 0.0001, "loss": 0.9691, "mean_abs_error": 157.77134375041575, "mean_abs_error_last_10": 24.51421968430154, "mean_abs_error_last_25": 62.013202944146315, "mean_abs_error_last_50": 96.98550594159141, "mean_pred_prob": 0.0481217207852751, "mean_pred_prob_last_10": 0.24608469493687152, "mean_pred_prob_last_25": 0.13632250651717187, "mean_pred_prob_last_50": 0.08242010800167918, "mean_token_accuracy": 0.8804807007312775, "step": 7710 }, { "epoch": 0.13723712513110412, "grad_norm": 2.2113861937054855, "learning_rate": 0.0001, "loss": 0.9806, "mean_abs_error": 977.8356611347878, "mean_abs_error_last_10": 571.1097311530036, "mean_abs_error_last_25": 641.2444159507032, "mean_abs_error_last_50": 769.1642038353535, "mean_pred_prob": 0.027455807440856007, "mean_pred_prob_last_10": 0.15288116255542264, "mean_pred_prob_last_25": 0.07965218095341697, "mean_pred_prob_last_50": 0.04726092663040617, "mean_token_accuracy": 0.8780654907226563, "step": 7720 }, { "epoch": 0.13741489342790605, "grad_norm": 1.795504190394274, "learning_rate": 0.0001, "loss": 0.9713, "mean_abs_error": 621.9636148255363, "mean_abs_error_last_10": 258.9695337504741, "mean_abs_error_last_25": 318.50910210756376, "mean_abs_error_last_50": 393.75809538983765, "mean_pred_prob": 0.02874123674700968, "mean_pred_prob_last_10": 0.13708948525600134, "mean_pred_prob_last_25": 0.07913865206064656, "mean_pred_prob_last_50": 0.04867036037612706, "mean_token_accuracy": 0.8691737353801727, "step": 7730 }, { "epoch": 0.13759266172470802, "grad_norm": 1.8636997480480688, "learning_rate": 0.0001, "loss": 0.9694, "mean_abs_error": 1122.02482752272, "mean_abs_error_last_10": 503.73239337212283, "mean_abs_error_last_25": 677.8744157779721, "mean_abs_error_last_50": 871.2329564002614, "mean_pred_prob": 0.029205284699855838, "mean_pred_prob_last_10": 0.15762978050042875, "mean_pred_prob_last_25": 0.08474645910609979, "mean_pred_prob_last_50": 0.050425639712193514, "mean_token_accuracy": 0.8728265523910522, "step": 7740 }, { "epoch": 0.13777043002150996, "grad_norm": 1.797415073564789, "learning_rate": 0.0001, "loss": 1.0164, "mean_abs_error": 800.1837728417752, "mean_abs_error_last_10": 336.6679842148549, "mean_abs_error_last_25": 401.54698601896604, "mean_abs_error_last_50": 567.6772685804293, "mean_pred_prob": 0.03256652382260654, "mean_pred_prob_last_10": 0.1521343508444261, "mean_pred_prob_last_25": 0.08967254877206869, "mean_pred_prob_last_50": 0.05535732068819925, "mean_token_accuracy": 0.8620778143405914, "step": 7750 }, { "epoch": 0.1379481983183119, "grad_norm": 2.5572984207880407, "learning_rate": 0.0001, "loss": 0.9457, "mean_abs_error": 275.81684765511295, "mean_abs_error_last_10": 95.93631338470718, "mean_abs_error_last_25": 138.6678541607406, "mean_abs_error_last_50": 154.91746651466093, "mean_pred_prob": 0.03304510819725692, "mean_pred_prob_last_10": 0.1804356247186661, "mean_pred_prob_last_25": 0.09566006576642394, "mean_pred_prob_last_50": 0.05729362643323839, "mean_token_accuracy": 0.876551729440689, "step": 7760 }, { "epoch": 0.13812596661511387, "grad_norm": 0.8615018967388468, "learning_rate": 0.0001, "loss": 1.0689, "mean_abs_error": 197.8076719379154, "mean_abs_error_last_10": 66.50334053093889, "mean_abs_error_last_25": 139.3786215444831, "mean_abs_error_last_50": 197.56052155617212, "mean_pred_prob": 0.03472980512306094, "mean_pred_prob_last_10": 0.19194166138768196, "mean_pred_prob_last_25": 0.10265027191489935, "mean_pred_prob_last_50": 0.060417222511023286, "mean_token_accuracy": 0.8721199929714203, "step": 7770 }, { "epoch": 0.1383037349119158, "grad_norm": 1.015706758506375, "learning_rate": 0.0001, "loss": 0.8779, "mean_abs_error": 587.6678259297631, "mean_abs_error_last_10": 227.67529456058847, "mean_abs_error_last_25": 305.5192803245035, "mean_abs_error_last_50": 395.4823333334935, "mean_pred_prob": 0.03311798072827514, "mean_pred_prob_last_10": 0.16354302353574895, "mean_pred_prob_last_25": 0.09040590766817332, "mean_pred_prob_last_50": 0.0558947580982931, "mean_token_accuracy": 0.8816867053508759, "step": 7780 }, { "epoch": 0.13848150320871777, "grad_norm": 1.6231548397268791, "learning_rate": 0.0001, "loss": 0.9225, "mean_abs_error": 292.5565134869722, "mean_abs_error_last_10": 110.14110188352745, "mean_abs_error_last_25": 201.25099208717975, "mean_abs_error_last_50": 195.96122049290284, "mean_pred_prob": 0.03029100098647177, "mean_pred_prob_last_10": 0.16392645109444856, "mean_pred_prob_last_25": 0.09010386522859334, "mean_pred_prob_last_50": 0.05266029518097639, "mean_token_accuracy": 0.8789017796516418, "step": 7790 }, { "epoch": 0.1386592715055197, "grad_norm": 0.8136060037957842, "learning_rate": 0.0001, "loss": 0.9777, "mean_abs_error": 292.3346380023492, "mean_abs_error_last_10": 58.17702231533103, "mean_abs_error_last_25": 141.04338296243583, "mean_abs_error_last_50": 185.63196533214287, "mean_pred_prob": 0.038575455197133124, "mean_pred_prob_last_10": 0.19285073801875113, "mean_pred_prob_last_25": 0.10730764409527183, "mean_pred_prob_last_50": 0.06582114025950432, "mean_token_accuracy": 0.8867436289787293, "step": 7800 }, { "epoch": 0.13883703980232165, "grad_norm": 2.2333145725097197, "learning_rate": 0.0001, "loss": 1.0215, "mean_abs_error": 255.6939440620842, "mean_abs_error_last_10": 192.19298878311665, "mean_abs_error_last_25": 183.89757185446152, "mean_abs_error_last_50": 183.68538377912168, "mean_pred_prob": 0.03658816707320511, "mean_pred_prob_last_10": 0.1900655409321189, "mean_pred_prob_last_25": 0.10441216323524713, "mean_pred_prob_last_50": 0.06308700568042695, "mean_token_accuracy": 0.869531261920929, "step": 7810 }, { "epoch": 0.13901480809912362, "grad_norm": 1.3787386311458434, "learning_rate": 0.0001, "loss": 1.0327, "mean_abs_error": 152.87833779890553, "mean_abs_error_last_10": 30.446241468515005, "mean_abs_error_last_25": 57.03074758998832, "mean_abs_error_last_50": 86.99728480025848, "mean_pred_prob": 0.03697875989601016, "mean_pred_prob_last_10": 0.19169110879302026, "mean_pred_prob_last_25": 0.10502463914453983, "mean_pred_prob_last_50": 0.06349807437509299, "mean_token_accuracy": 0.8710108757019043, "step": 7820 }, { "epoch": 0.13919257639592555, "grad_norm": 1.127189942148002, "learning_rate": 0.0001, "loss": 0.9331, "mean_abs_error": 435.34875642428625, "mean_abs_error_last_10": 120.94836387733994, "mean_abs_error_last_25": 174.89424454855654, "mean_abs_error_last_50": 244.0270378069426, "mean_pred_prob": 0.036450162215624006, "mean_pred_prob_last_10": 0.18277799957431853, "mean_pred_prob_last_25": 0.1033795200753957, "mean_pred_prob_last_50": 0.06267666136845947, "mean_token_accuracy": 0.8805067479610443, "step": 7830 }, { "epoch": 0.1393703446927275, "grad_norm": 1.661348651609658, "learning_rate": 0.0001, "loss": 0.935, "mean_abs_error": 383.9649175140235, "mean_abs_error_last_10": 120.46824000070191, "mean_abs_error_last_25": 156.03851816735943, "mean_abs_error_last_50": 229.79066614590573, "mean_pred_prob": 0.02958505938295275, "mean_pred_prob_last_10": 0.149999625608325, "mean_pred_prob_last_25": 0.08419002210721374, "mean_pred_prob_last_50": 0.050675107585266235, "mean_token_accuracy": 0.8722622871398926, "step": 7840 }, { "epoch": 0.13954811298952946, "grad_norm": 1.293258809785194, "learning_rate": 0.0001, "loss": 0.977, "mean_abs_error": 221.79584419025596, "mean_abs_error_last_10": 142.49905588987295, "mean_abs_error_last_25": 143.37272575484752, "mean_abs_error_last_50": 145.53562000460875, "mean_pred_prob": 0.03544639935716987, "mean_pred_prob_last_10": 0.18913957700133324, "mean_pred_prob_last_25": 0.10160346124321222, "mean_pred_prob_last_50": 0.06107741137966514, "mean_token_accuracy": 0.8645615875720978, "step": 7850 }, { "epoch": 0.1397258812863314, "grad_norm": 1.4567329577471788, "learning_rate": 0.0001, "loss": 1.0819, "mean_abs_error": 165.0978370008965, "mean_abs_error_last_10": 37.99019266475121, "mean_abs_error_last_25": 65.12984301400823, "mean_abs_error_last_50": 113.57465775097664, "mean_pred_prob": 0.032954431418329475, "mean_pred_prob_last_10": 0.1652780320495367, "mean_pred_prob_last_25": 0.0924058798700571, "mean_pred_prob_last_50": 0.055984947830438614, "mean_token_accuracy": 0.8637441158294678, "step": 7860 }, { "epoch": 0.13990364958313334, "grad_norm": 1.0936154542143024, "learning_rate": 0.0001, "loss": 0.9304, "mean_abs_error": 167.1692254969978, "mean_abs_error_last_10": 38.981940965805045, "mean_abs_error_last_25": 55.60571686895351, "mean_abs_error_last_50": 79.64406341522069, "mean_pred_prob": 0.03544316543266177, "mean_pred_prob_last_10": 0.18952481187880038, "mean_pred_prob_last_25": 0.10347598548978568, "mean_pred_prob_last_50": 0.06170851644128561, "mean_token_accuracy": 0.8817150950431824, "step": 7870 }, { "epoch": 0.1400814178799353, "grad_norm": 0.8029242172751969, "learning_rate": 0.0001, "loss": 0.9475, "mean_abs_error": 150.07715848926057, "mean_abs_error_last_10": 28.49932411716768, "mean_abs_error_last_25": 48.00710880376798, "mean_abs_error_last_50": 77.21295724055604, "mean_pred_prob": 0.04302531285211444, "mean_pred_prob_last_10": 0.2160622589290142, "mean_pred_prob_last_25": 0.11793334893882275, "mean_pred_prob_last_50": 0.0725900236517191, "mean_token_accuracy": 0.882079440355301, "step": 7880 }, { "epoch": 0.14025918617673724, "grad_norm": 1.250115937173779, "learning_rate": 0.0001, "loss": 0.8767, "mean_abs_error": 247.80464559192524, "mean_abs_error_last_10": 91.7852640743055, "mean_abs_error_last_25": 176.94051754166392, "mean_abs_error_last_50": 197.75622573565684, "mean_pred_prob": 0.04102007821202278, "mean_pred_prob_last_10": 0.22132818698883056, "mean_pred_prob_last_25": 0.11830725902691483, "mean_pred_prob_last_50": 0.07071621981449425, "mean_token_accuracy": 0.8856140792369842, "step": 7890 }, { "epoch": 0.14043695447353918, "grad_norm": 1.1534064508001622, "learning_rate": 0.0001, "loss": 0.9507, "mean_abs_error": 685.8681317996134, "mean_abs_error_last_10": 283.55382128972667, "mean_abs_error_last_25": 373.1321667213948, "mean_abs_error_last_50": 482.51283166320263, "mean_pred_prob": 0.040845907421316954, "mean_pred_prob_last_10": 0.21564630535431206, "mean_pred_prob_last_25": 0.11698809012304992, "mean_pred_prob_last_50": 0.06982753800111823, "mean_token_accuracy": 0.8749096870422364, "step": 7900 }, { "epoch": 0.14061472277034115, "grad_norm": 1.3054291494075156, "learning_rate": 0.0001, "loss": 1.0277, "mean_abs_error": 842.4062214261467, "mean_abs_error_last_10": 496.12535413784843, "mean_abs_error_last_25": 584.9477896989088, "mean_abs_error_last_50": 673.2757416600141, "mean_pred_prob": 0.01755806705041323, "mean_pred_prob_last_10": 0.09727251361473463, "mean_pred_prob_last_25": 0.053114817981258965, "mean_pred_prob_last_50": 0.030980657585314476, "mean_token_accuracy": 0.8688044130802155, "step": 7910 }, { "epoch": 0.1407924910671431, "grad_norm": 1.0113323557092635, "learning_rate": 0.0001, "loss": 1.065, "mean_abs_error": 564.3184471521652, "mean_abs_error_last_10": 236.99118020543224, "mean_abs_error_last_25": 236.46886029534713, "mean_abs_error_last_50": 298.157048174661, "mean_pred_prob": 0.015662120142951607, "mean_pred_prob_last_10": 0.09238069131970406, "mean_pred_prob_last_25": 0.04658011225983501, "mean_pred_prob_last_50": 0.02742367978207767, "mean_token_accuracy": 0.8693179726600647, "step": 7920 }, { "epoch": 0.14097025936394503, "grad_norm": 1.702723470750715, "learning_rate": 0.0001, "loss": 1.106, "mean_abs_error": 662.9131368948448, "mean_abs_error_last_10": 180.20232603589528, "mean_abs_error_last_25": 268.12820925086277, "mean_abs_error_last_50": 366.87364864477513, "mean_pred_prob": 0.031110420048935338, "mean_pred_prob_last_10": 0.16856812263140455, "mean_pred_prob_last_25": 0.09046585719333962, "mean_pred_prob_last_50": 0.053890831978060307, "mean_token_accuracy": 0.8574820458889008, "step": 7930 }, { "epoch": 0.141148027660747, "grad_norm": 1.6286508055234468, "learning_rate": 0.0001, "loss": 1.0114, "mean_abs_error": 2226.2280296939925, "mean_abs_error_last_10": 1240.4301608619703, "mean_abs_error_last_25": 1456.7014480993123, "mean_abs_error_last_50": 1706.0769197739526, "mean_pred_prob": 0.0222081301806611, "mean_pred_prob_last_10": 0.11514790822693613, "mean_pred_prob_last_25": 0.06355914445739472, "mean_pred_prob_last_50": 0.03840105291747022, "mean_token_accuracy": 0.8673229157924652, "step": 7940 }, { "epoch": 0.14132579595754893, "grad_norm": 1.650080215436744, "learning_rate": 0.0001, "loss": 0.9353, "mean_abs_error": 245.52899208972855, "mean_abs_error_last_10": 91.94228815426587, "mean_abs_error_last_25": 100.85964751341035, "mean_abs_error_last_50": 150.89881944438383, "mean_pred_prob": 0.038616533810272816, "mean_pred_prob_last_10": 0.19910478368401527, "mean_pred_prob_last_25": 0.10647984426468611, "mean_pred_prob_last_50": 0.06510875737294555, "mean_token_accuracy": 0.8813013374805451, "step": 7950 }, { "epoch": 0.14150356425435087, "grad_norm": 2.7931680564545207, "learning_rate": 0.0001, "loss": 0.9353, "mean_abs_error": 334.1469086202906, "mean_abs_error_last_10": 80.95342132429141, "mean_abs_error_last_25": 106.41087794176693, "mean_abs_error_last_50": 180.1407145252859, "mean_pred_prob": 0.033163536828942596, "mean_pred_prob_last_10": 0.17935733124613762, "mean_pred_prob_last_25": 0.09962185472249985, "mean_pred_prob_last_50": 0.05843294588848948, "mean_token_accuracy": 0.8782006800174713, "step": 7960 }, { "epoch": 0.14168133255115284, "grad_norm": 3.2270134605390743, "learning_rate": 0.0001, "loss": 0.9991, "mean_abs_error": 542.1330159943278, "mean_abs_error_last_10": 317.0176707534082, "mean_abs_error_last_25": 403.1065650419017, "mean_abs_error_last_50": 402.52019383377217, "mean_pred_prob": 0.023528882628306748, "mean_pred_prob_last_10": 0.1348161019384861, "mean_pred_prob_last_25": 0.07119187824428082, "mean_pred_prob_last_50": 0.0413186829071492, "mean_token_accuracy": 0.8691125690937043, "step": 7970 }, { "epoch": 0.14185910084795478, "grad_norm": 1.7140683161474197, "learning_rate": 0.0001, "loss": 1.001, "mean_abs_error": 1203.1499127340044, "mean_abs_error_last_10": 586.3697967890595, "mean_abs_error_last_25": 632.1867931156723, "mean_abs_error_last_50": 811.1031631760736, "mean_pred_prob": 0.02209353781945538, "mean_pred_prob_last_10": 0.12304015915142372, "mean_pred_prob_last_25": 0.06558763931971043, "mean_pred_prob_last_50": 0.03859339773189276, "mean_token_accuracy": 0.8780543327331543, "step": 7980 }, { "epoch": 0.14203686914475672, "grad_norm": 1.6498319596777367, "learning_rate": 0.0001, "loss": 1.0223, "mean_abs_error": 599.3610116364242, "mean_abs_error_last_10": 196.943448438606, "mean_abs_error_last_25": 243.35639265412405, "mean_abs_error_last_50": 336.0841486977245, "mean_pred_prob": 0.021721203619381412, "mean_pred_prob_last_10": 0.11791337502654642, "mean_pred_prob_last_25": 0.06268116541905329, "mean_pred_prob_last_50": 0.03761500212131068, "mean_token_accuracy": 0.8838137269020081, "step": 7990 }, { "epoch": 0.14221463744155868, "grad_norm": 1.1590574903697268, "learning_rate": 0.0001, "loss": 0.9647, "mean_abs_error": 342.3232572689685, "mean_abs_error_last_10": 67.6683189415266, "mean_abs_error_last_25": 122.61334407535341, "mean_abs_error_last_50": 196.33246527162925, "mean_pred_prob": 0.03434635098092258, "mean_pred_prob_last_10": 0.1742064768448472, "mean_pred_prob_last_25": 0.0961878347210586, "mean_pred_prob_last_50": 0.05879752193577588, "mean_token_accuracy": 0.8667628407478333, "step": 8000 }, { "epoch": 0.14239240573836062, "grad_norm": 0.7957067668262487, "learning_rate": 0.0001, "loss": 0.9524, "mean_abs_error": 367.2521036373684, "mean_abs_error_last_10": 279.89232867451284, "mean_abs_error_last_25": 320.20268452619456, "mean_abs_error_last_50": 299.8710792784224, "mean_pred_prob": 0.03137753696646541, "mean_pred_prob_last_10": 0.16434021554887296, "mean_pred_prob_last_25": 0.08677609330043197, "mean_pred_prob_last_50": 0.05281757297925651, "mean_token_accuracy": 0.871202963590622, "step": 8010 }, { "epoch": 0.14257017403516256, "grad_norm": 0.8488189473534475, "learning_rate": 0.0001, "loss": 0.8422, "mean_abs_error": 174.0564615168962, "mean_abs_error_last_10": 53.31877023007918, "mean_abs_error_last_25": 94.74061393047485, "mean_abs_error_last_50": 114.5392294866796, "mean_pred_prob": 0.03276804997585714, "mean_pred_prob_last_10": 0.167945958673954, "mean_pred_prob_last_25": 0.09255058597773314, "mean_pred_prob_last_50": 0.056066707149147986, "mean_token_accuracy": 0.8895693957805634, "step": 8020 }, { "epoch": 0.14274794233196453, "grad_norm": 1.8981368706988218, "learning_rate": 0.0001, "loss": 1.0061, "mean_abs_error": 556.3028149782392, "mean_abs_error_last_10": 218.75565229920113, "mean_abs_error_last_25": 196.24980254805155, "mean_abs_error_last_50": 307.50370708066293, "mean_pred_prob": 0.01886402815580368, "mean_pred_prob_last_10": 0.10697818498592823, "mean_pred_prob_last_25": 0.05668366255704314, "mean_pred_prob_last_50": 0.033206418843474236, "mean_token_accuracy": 0.8728706955909729, "step": 8030 }, { "epoch": 0.14292571062876647, "grad_norm": 2.8714134733510437, "learning_rate": 0.0001, "loss": 0.9915, "mean_abs_error": 1524.255273512464, "mean_abs_error_last_10": 624.2809840768889, "mean_abs_error_last_25": 748.5716375214278, "mean_abs_error_last_50": 1016.0566619864485, "mean_pred_prob": 0.02242175663850503, "mean_pred_prob_last_10": 0.1168396097200457, "mean_pred_prob_last_25": 0.06393271096749231, "mean_pred_prob_last_50": 0.03875414332433138, "mean_token_accuracy": 0.873145592212677, "step": 8040 }, { "epoch": 0.1431034789255684, "grad_norm": 1.0788300789601266, "learning_rate": 0.0001, "loss": 1.0389, "mean_abs_error": 390.1611194264253, "mean_abs_error_last_10": 180.07904613856962, "mean_abs_error_last_25": 180.5928639219225, "mean_abs_error_last_50": 206.07144279841683, "mean_pred_prob": 0.030569310905411838, "mean_pred_prob_last_10": 0.17050084825605155, "mean_pred_prob_last_25": 0.08877411624416709, "mean_pred_prob_last_50": 0.05283907186239958, "mean_token_accuracy": 0.8702591598033905, "step": 8050 }, { "epoch": 0.14328124722237037, "grad_norm": 1.2567458140702676, "learning_rate": 0.0001, "loss": 0.9616, "mean_abs_error": 402.7355240186427, "mean_abs_error_last_10": 160.06489981054966, "mean_abs_error_last_25": 184.02920387552544, "mean_abs_error_last_50": 242.29739467797407, "mean_pred_prob": 0.027517272322438658, "mean_pred_prob_last_10": 0.14381516769062727, "mean_pred_prob_last_25": 0.07787339347414672, "mean_pred_prob_last_50": 0.04711474317591637, "mean_token_accuracy": 0.8792863488197327, "step": 8060 }, { "epoch": 0.1434590155191723, "grad_norm": 1.0250642062341155, "learning_rate": 0.0001, "loss": 0.9759, "mean_abs_error": 1200.630163032273, "mean_abs_error_last_10": 437.89732827774816, "mean_abs_error_last_25": 568.2564398226535, "mean_abs_error_last_50": 792.7875975679359, "mean_pred_prob": 0.01351765419531148, "mean_pred_prob_last_10": 0.07777030477882363, "mean_pred_prob_last_25": 0.04003245609346777, "mean_pred_prob_last_50": 0.023433546046726407, "mean_token_accuracy": 0.8712161302566528, "step": 8070 }, { "epoch": 0.14363678381597425, "grad_norm": 0.9059070510256703, "learning_rate": 0.0001, "loss": 0.94, "mean_abs_error": 81.97626610816785, "mean_abs_error_last_10": 13.534866178210896, "mean_abs_error_last_25": 29.198872198728388, "mean_abs_error_last_50": 42.26110365552752, "mean_pred_prob": 0.04589300798252225, "mean_pred_prob_last_10": 0.21786485537886618, "mean_pred_prob_last_25": 0.12497834637761115, "mean_pred_prob_last_50": 0.07832106780260802, "mean_token_accuracy": 0.8805162727832794, "step": 8080 }, { "epoch": 0.14381455211277622, "grad_norm": 0.9965915202472917, "learning_rate": 0.0001, "loss": 1.0149, "mean_abs_error": 812.1195834064697, "mean_abs_error_last_10": 186.90940473917436, "mean_abs_error_last_25": 205.41624230034648, "mean_abs_error_last_50": 377.981074182227, "mean_pred_prob": 0.021360721986275165, "mean_pred_prob_last_10": 0.09933536387979984, "mean_pred_prob_last_25": 0.05775405308231711, "mean_pred_prob_last_50": 0.036057620600331575, "mean_token_accuracy": 0.8807790279388428, "step": 8090 }, { "epoch": 0.14399232040957816, "grad_norm": 0.9219706390797926, "learning_rate": 0.0001, "loss": 0.9857, "mean_abs_error": 354.7190604117043, "mean_abs_error_last_10": 129.63241976055642, "mean_abs_error_last_25": 147.37133770023723, "mean_abs_error_last_50": 198.8060226502946, "mean_pred_prob": 0.03518774665426463, "mean_pred_prob_last_10": 0.17777043357491493, "mean_pred_prob_last_25": 0.09735724115744233, "mean_pred_prob_last_50": 0.060058909887447956, "mean_token_accuracy": 0.8805581748485565, "step": 8100 }, { "epoch": 0.1441700887063801, "grad_norm": 1.3634701687817954, "learning_rate": 0.0001, "loss": 0.9701, "mean_abs_error": 1022.6693096441174, "mean_abs_error_last_10": 574.7616451064836, "mean_abs_error_last_25": 630.7273269210498, "mean_abs_error_last_50": 732.958786607904, "mean_pred_prob": 0.04152302921720548, "mean_pred_prob_last_10": 0.20875591373769567, "mean_pred_prob_last_25": 0.11744216460792814, "mean_pred_prob_last_50": 0.07127750475046923, "mean_token_accuracy": 0.8769596636295318, "step": 8110 }, { "epoch": 0.14434785700318206, "grad_norm": 1.8830365978372392, "learning_rate": 0.0001, "loss": 0.9715, "mean_abs_error": 664.8746168479116, "mean_abs_error_last_10": 239.5634199842205, "mean_abs_error_last_25": 344.947139576111, "mean_abs_error_last_50": 465.6295663400122, "mean_pred_prob": 0.006807490135543048, "mean_pred_prob_last_10": 0.04310966935008764, "mean_pred_prob_last_25": 0.020903304684907198, "mean_pred_prob_last_50": 0.012012589955702424, "mean_token_accuracy": 0.874862265586853, "step": 8120 }, { "epoch": 0.144525625299984, "grad_norm": 3.95616598854484, "learning_rate": 0.0001, "loss": 0.9408, "mean_abs_error": 278.5637562911983, "mean_abs_error_last_10": 240.96405065341347, "mean_abs_error_last_25": 191.2578728472945, "mean_abs_error_last_50": 195.86077617395404, "mean_pred_prob": 0.03742389818071388, "mean_pred_prob_last_10": 0.20203139495570213, "mean_pred_prob_last_25": 0.10913722253171727, "mean_pred_prob_last_50": 0.06465633120387793, "mean_token_accuracy": 0.8693073511123657, "step": 8130 }, { "epoch": 0.14470339359678594, "grad_norm": 1.408603658247875, "learning_rate": 0.0001, "loss": 0.9637, "mean_abs_error": 934.2970696673253, "mean_abs_error_last_10": 235.48708370456583, "mean_abs_error_last_25": 310.27872555560043, "mean_abs_error_last_50": 529.0711319325685, "mean_pred_prob": 0.021330144442617893, "mean_pred_prob_last_10": 0.12367667692014947, "mean_pred_prob_last_25": 0.06553476166445762, "mean_pred_prob_last_50": 0.03791064927936531, "mean_token_accuracy": 0.8742280781269074, "step": 8140 }, { "epoch": 0.1448811618935879, "grad_norm": 1.022964372986664, "learning_rate": 0.0001, "loss": 0.9388, "mean_abs_error": 529.2200223423677, "mean_abs_error_last_10": 175.77661584717987, "mean_abs_error_last_25": 225.71291219763035, "mean_abs_error_last_50": 336.1636705537734, "mean_pred_prob": 0.03475987605052069, "mean_pred_prob_last_10": 0.17394350891700014, "mean_pred_prob_last_25": 0.09734131972072646, "mean_pred_prob_last_50": 0.0594913384062238, "mean_token_accuracy": 0.8774481296539307, "step": 8150 }, { "epoch": 0.14505893019038985, "grad_norm": 1.4092125153071169, "learning_rate": 0.0001, "loss": 0.9958, "mean_abs_error": 873.5285679885376, "mean_abs_error_last_10": 523.2714306995106, "mean_abs_error_last_25": 595.6709660936392, "mean_abs_error_last_50": 714.3811468594344, "mean_pred_prob": 0.0441089901534724, "mean_pred_prob_last_10": 0.22561255351756698, "mean_pred_prob_last_25": 0.12381942490465007, "mean_pred_prob_last_50": 0.07489385667140595, "mean_token_accuracy": 0.8728093862533569, "step": 8160 }, { "epoch": 0.14523669848719178, "grad_norm": 1.8489029454264703, "learning_rate": 0.0001, "loss": 0.9971, "mean_abs_error": 794.5874205790242, "mean_abs_error_last_10": 132.40913516001427, "mean_abs_error_last_25": 212.73790595201203, "mean_abs_error_last_50": 405.32053337763193, "mean_pred_prob": 0.01908477513352409, "mean_pred_prob_last_10": 0.10919558173045515, "mean_pred_prob_last_25": 0.056312569254077974, "mean_pred_prob_last_50": 0.033012421592138706, "mean_token_accuracy": 0.8743179023265839, "step": 8170 }, { "epoch": 0.14541446678399375, "grad_norm": 1.056353598648479, "learning_rate": 0.0001, "loss": 0.9923, "mean_abs_error": 425.0358788582809, "mean_abs_error_last_10": 106.75267159064575, "mean_abs_error_last_25": 132.14799712080517, "mean_abs_error_last_50": 207.75373547642812, "mean_pred_prob": 0.02613776730722748, "mean_pred_prob_last_10": 0.12939011328853667, "mean_pred_prob_last_25": 0.07330905314302072, "mean_pred_prob_last_50": 0.0444655092433095, "mean_token_accuracy": 0.8620798110961914, "step": 8180 }, { "epoch": 0.1455922350807957, "grad_norm": 0.9018420172939066, "learning_rate": 0.0001, "loss": 0.9771, "mean_abs_error": 260.3475644825279, "mean_abs_error_last_10": 163.14963338137696, "mean_abs_error_last_25": 151.75637760162257, "mean_abs_error_last_50": 160.75439750383907, "mean_pred_prob": 0.03770841867662966, "mean_pred_prob_last_10": 0.18421482359990476, "mean_pred_prob_last_25": 0.10544820157811045, "mean_pred_prob_last_50": 0.06414108038879932, "mean_token_accuracy": 0.8827621221542359, "step": 8190 }, { "epoch": 0.14577000337759763, "grad_norm": 2.309396154394062, "learning_rate": 0.0001, "loss": 0.919, "mean_abs_error": 816.4944136228305, "mean_abs_error_last_10": 242.6105468035242, "mean_abs_error_last_25": 310.1863804848852, "mean_abs_error_last_50": 480.9490781474071, "mean_pred_prob": 0.039064362182398324, "mean_pred_prob_last_10": 0.18291202717809937, "mean_pred_prob_last_25": 0.10388521759887226, "mean_pred_prob_last_50": 0.06475027800770476, "mean_token_accuracy": 0.875568288564682, "step": 8200 }, { "epoch": 0.1459477716743996, "grad_norm": 1.0016568861800728, "learning_rate": 0.0001, "loss": 0.9021, "mean_abs_error": 907.5339648485658, "mean_abs_error_last_10": 287.8537609257117, "mean_abs_error_last_25": 372.6680335449689, "mean_abs_error_last_50": 539.3176385115961, "mean_pred_prob": 0.02674923107115319, "mean_pred_prob_last_10": 0.1437923631456215, "mean_pred_prob_last_25": 0.07455839537433348, "mean_pred_prob_last_50": 0.045491299493005496, "mean_token_accuracy": 0.886778074502945, "step": 8210 }, { "epoch": 0.14612553997120153, "grad_norm": 1.8542698974443694, "learning_rate": 0.0001, "loss": 0.9684, "mean_abs_error": 1096.5038119884894, "mean_abs_error_last_10": 608.4382864337178, "mean_abs_error_last_25": 691.2304639236414, "mean_abs_error_last_50": 827.0223347471687, "mean_pred_prob": 0.03411621464474592, "mean_pred_prob_last_10": 0.18010200003918725, "mean_pred_prob_last_25": 0.1002087636719807, "mean_pred_prob_last_50": 0.05946465825982159, "mean_token_accuracy": 0.8775330722332001, "step": 8220 }, { "epoch": 0.14630330826800347, "grad_norm": 1.5254822374526984, "learning_rate": 0.0001, "loss": 0.8891, "mean_abs_error": 290.7485745551965, "mean_abs_error_last_10": 46.50727695345795, "mean_abs_error_last_25": 82.53575821635084, "mean_abs_error_last_50": 166.6005226216357, "mean_pred_prob": 0.03319106660783291, "mean_pred_prob_last_10": 0.17206793520599603, "mean_pred_prob_last_25": 0.09540651580318808, "mean_pred_prob_last_50": 0.05717191644944251, "mean_token_accuracy": 0.8838252186775207, "step": 8230 }, { "epoch": 0.14648107656480544, "grad_norm": 2.8928451550579535, "learning_rate": 0.0001, "loss": 1.016, "mean_abs_error": 101.90370636284747, "mean_abs_error_last_10": 35.55277258283947, "mean_abs_error_last_25": 66.51119045922773, "mean_abs_error_last_50": 81.66445667862823, "mean_pred_prob": 0.040571216493844986, "mean_pred_prob_last_10": 0.22064266949892045, "mean_pred_prob_last_25": 0.11544430144131183, "mean_pred_prob_last_50": 0.06865673959255218, "mean_token_accuracy": 0.8688217520713806, "step": 8240 }, { "epoch": 0.14665884486160738, "grad_norm": 2.048688240713757, "learning_rate": 0.0001, "loss": 0.9258, "mean_abs_error": 843.4256838852627, "mean_abs_error_last_10": 445.5963009849026, "mean_abs_error_last_25": 529.0251022082407, "mean_abs_error_last_50": 641.4477152199755, "mean_pred_prob": 0.0358774011125206, "mean_pred_prob_last_10": 0.17664451939053832, "mean_pred_prob_last_25": 0.10042212819680571, "mean_pred_prob_last_50": 0.06162305206526071, "mean_token_accuracy": 0.8845133602619171, "step": 8250 }, { "epoch": 0.14683661315840932, "grad_norm": 1.8152277056559893, "learning_rate": 0.0001, "loss": 0.9311, "mean_abs_error": 492.2018737274616, "mean_abs_error_last_10": 178.91889377356787, "mean_abs_error_last_25": 211.17547612317725, "mean_abs_error_last_50": 284.07203104192143, "mean_pred_prob": 0.02905100675416179, "mean_pred_prob_last_10": 0.1558898315182887, "mean_pred_prob_last_25": 0.0837636960321106, "mean_pred_prob_last_50": 0.05016958446358331, "mean_token_accuracy": 0.8757084846496582, "step": 8260 }, { "epoch": 0.14701438145521128, "grad_norm": 2.297178906915578, "learning_rate": 0.0001, "loss": 0.9815, "mean_abs_error": 612.5299454290621, "mean_abs_error_last_10": 205.15831840804702, "mean_abs_error_last_25": 271.10093271967014, "mean_abs_error_last_50": 379.00972655282055, "mean_pred_prob": 0.016990448819706218, "mean_pred_prob_last_10": 0.09498300009872765, "mean_pred_prob_last_25": 0.048357721464708445, "mean_pred_prob_last_50": 0.028692183713428677, "mean_token_accuracy": 0.8794092535972595, "step": 8270 }, { "epoch": 0.14719214975201322, "grad_norm": 1.2218962536091942, "learning_rate": 0.0001, "loss": 0.878, "mean_abs_error": 910.1749101865901, "mean_abs_error_last_10": 503.6289048774747, "mean_abs_error_last_25": 507.88033821508805, "mean_abs_error_last_50": 596.2779211088576, "mean_pred_prob": 0.02999420265259687, "mean_pred_prob_last_10": 0.15143600961891934, "mean_pred_prob_last_25": 0.08362020990462042, "mean_pred_prob_last_50": 0.051318990203435534, "mean_token_accuracy": 0.8778450727462769, "step": 8280 }, { "epoch": 0.14736991804881516, "grad_norm": 2.203736554088176, "learning_rate": 0.0001, "loss": 0.9785, "mean_abs_error": 139.33811236123955, "mean_abs_error_last_10": 34.255471935243094, "mean_abs_error_last_25": 42.75094574431955, "mean_abs_error_last_50": 86.32710917479356, "mean_pred_prob": 0.046896815206855534, "mean_pred_prob_last_10": 0.2214297227561474, "mean_pred_prob_last_25": 0.12920625060796737, "mean_pred_prob_last_50": 0.07893396988511085, "mean_token_accuracy": 0.8823391616344451, "step": 8290 }, { "epoch": 0.14754768634561713, "grad_norm": 1.1548309189258963, "learning_rate": 0.0001, "loss": 1.0051, "mean_abs_error": 1548.6703295532525, "mean_abs_error_last_10": 869.4850323125933, "mean_abs_error_last_25": 981.4104223359103, "mean_abs_error_last_50": 1197.4834229617497, "mean_pred_prob": 0.02734022775402991, "mean_pred_prob_last_10": 0.14208415639295707, "mean_pred_prob_last_25": 0.07721557895783918, "mean_pred_prob_last_50": 0.0468822906492278, "mean_token_accuracy": 0.8777612924575806, "step": 8300 }, { "epoch": 0.14772545464241907, "grad_norm": 1.5490673627558211, "learning_rate": 0.0001, "loss": 0.9193, "mean_abs_error": 542.68640583625, "mean_abs_error_last_10": 225.76539202927725, "mean_abs_error_last_25": 360.2194062375887, "mean_abs_error_last_50": 456.5904155620574, "mean_pred_prob": 0.02520103151910007, "mean_pred_prob_last_10": 0.13677714336663485, "mean_pred_prob_last_25": 0.07431535329669714, "mean_pred_prob_last_50": 0.043824424222111705, "mean_token_accuracy": 0.8806394159793853, "step": 8310 }, { "epoch": 0.147903222939221, "grad_norm": 2.4327448979229565, "learning_rate": 0.0001, "loss": 0.9948, "mean_abs_error": 333.87125708083084, "mean_abs_error_last_10": 66.42059641843721, "mean_abs_error_last_25": 108.41612060525942, "mean_abs_error_last_50": 200.93072558249088, "mean_pred_prob": 0.03448551553301513, "mean_pred_prob_last_10": 0.18317539505660535, "mean_pred_prob_last_25": 0.10008079390972853, "mean_pred_prob_last_50": 0.05949627822265029, "mean_token_accuracy": 0.8684696912765503, "step": 8320 }, { "epoch": 0.14808099123602297, "grad_norm": 1.0349422239927244, "learning_rate": 0.0001, "loss": 1.0349, "mean_abs_error": 271.3992394680836, "mean_abs_error_last_10": 71.29476496302672, "mean_abs_error_last_25": 112.15236903803077, "mean_abs_error_last_50": 163.31027931748508, "mean_pred_prob": 0.03232055976986885, "mean_pred_prob_last_10": 0.16700758673250676, "mean_pred_prob_last_25": 0.08959743399173022, "mean_pred_prob_last_50": 0.05396759370341897, "mean_token_accuracy": 0.874687922000885, "step": 8330 }, { "epoch": 0.1482587595328249, "grad_norm": 1.2693940420423124, "learning_rate": 0.0001, "loss": 0.9924, "mean_abs_error": 235.10041384942028, "mean_abs_error_last_10": 112.0506305270947, "mean_abs_error_last_25": 117.42439316579278, "mean_abs_error_last_50": 162.0949058917526, "mean_pred_prob": 0.0360485318582505, "mean_pred_prob_last_10": 0.1868527766317129, "mean_pred_prob_last_25": 0.10314686130732298, "mean_pred_prob_last_50": 0.06179374866187572, "mean_token_accuracy": 0.8721838176250458, "step": 8340 }, { "epoch": 0.14843652782962685, "grad_norm": 1.9312350958445075, "learning_rate": 0.0001, "loss": 1.1068, "mean_abs_error": 446.8713537940462, "mean_abs_error_last_10": 102.20803400937399, "mean_abs_error_last_25": 133.1384764575958, "mean_abs_error_last_50": 232.87956335979356, "mean_pred_prob": 0.022205240884795786, "mean_pred_prob_last_10": 0.12175434101372958, "mean_pred_prob_last_25": 0.06342862052842975, "mean_pred_prob_last_50": 0.03815768286585808, "mean_token_accuracy": 0.8691914737224579, "step": 8350 }, { "epoch": 0.14861429612642882, "grad_norm": 0.6922761901564984, "learning_rate": 0.0001, "loss": 0.9412, "mean_abs_error": 390.65211619001906, "mean_abs_error_last_10": 278.8695622930306, "mean_abs_error_last_25": 318.36307190813574, "mean_abs_error_last_50": 285.28562232242456, "mean_pred_prob": 0.033602808904834094, "mean_pred_prob_last_10": 0.17609478011727334, "mean_pred_prob_last_25": 0.09493830865249038, "mean_pred_prob_last_50": 0.05685432655736804, "mean_token_accuracy": 0.8807458102703094, "step": 8360 }, { "epoch": 0.14879206442323076, "grad_norm": 0.6994211268517785, "learning_rate": 0.0001, "loss": 0.9833, "mean_abs_error": 488.77288440474905, "mean_abs_error_last_10": 199.7985511105243, "mean_abs_error_last_25": 232.7342012646036, "mean_abs_error_last_50": 316.7236300361786, "mean_pred_prob": 0.03070420109143015, "mean_pred_prob_last_10": 0.164276235725265, "mean_pred_prob_last_25": 0.08782981538097374, "mean_pred_prob_last_50": 0.05217988098156638, "mean_token_accuracy": 0.8805400252342224, "step": 8370 }, { "epoch": 0.1489698327200327, "grad_norm": 1.7335047759257944, "learning_rate": 0.0001, "loss": 1.0104, "mean_abs_error": 889.8684860178721, "mean_abs_error_last_10": 287.45333435315877, "mean_abs_error_last_25": 404.4003624921876, "mean_abs_error_last_50": 618.501327233843, "mean_pred_prob": 0.02781200533208903, "mean_pred_prob_last_10": 0.14507125092786738, "mean_pred_prob_last_25": 0.08128112948907074, "mean_pred_prob_last_50": 0.048196573369205, "mean_token_accuracy": 0.873328709602356, "step": 8380 }, { "epoch": 0.14914760101683466, "grad_norm": 1.451637743932516, "learning_rate": 0.0001, "loss": 0.9313, "mean_abs_error": 411.3285737329428, "mean_abs_error_last_10": 70.69897794164899, "mean_abs_error_last_25": 102.84764998343694, "mean_abs_error_last_50": 199.63891705289097, "mean_pred_prob": 0.02102099610492587, "mean_pred_prob_last_10": 0.11876482330262661, "mean_pred_prob_last_25": 0.062415814772248265, "mean_pred_prob_last_50": 0.03687387565150857, "mean_token_accuracy": 0.8828849852085113, "step": 8390 }, { "epoch": 0.1493253693136366, "grad_norm": 1.1533971022527052, "learning_rate": 0.0001, "loss": 1.0244, "mean_abs_error": 737.3045215897873, "mean_abs_error_last_10": 261.1852483173644, "mean_abs_error_last_25": 321.55620055899584, "mean_abs_error_last_50": 482.8200492045538, "mean_pred_prob": 0.03372141754371114, "mean_pred_prob_last_10": 0.18269229553407057, "mean_pred_prob_last_25": 0.09679157544742338, "mean_pred_prob_last_50": 0.05731726173544303, "mean_token_accuracy": 0.8672052562236786, "step": 8400 }, { "epoch": 0.14950313761043854, "grad_norm": 1.6147261084326896, "learning_rate": 0.0001, "loss": 0.9631, "mean_abs_error": 354.06205433268383, "mean_abs_error_last_10": 189.2875623478746, "mean_abs_error_last_25": 189.29489131537517, "mean_abs_error_last_50": 210.118668150228, "mean_pred_prob": 0.02905058308970183, "mean_pred_prob_last_10": 0.1568909775465727, "mean_pred_prob_last_25": 0.08415158195421099, "mean_pred_prob_last_50": 0.0500734297093004, "mean_token_accuracy": 0.8804804444313049, "step": 8410 }, { "epoch": 0.1496809059072405, "grad_norm": 2.104593140723458, "learning_rate": 0.0001, "loss": 1.0344, "mean_abs_error": 666.8992880397012, "mean_abs_error_last_10": 171.79931752119472, "mean_abs_error_last_25": 261.900426164413, "mean_abs_error_last_50": 395.0646242459139, "mean_pred_prob": 0.03031333523977082, "mean_pred_prob_last_10": 0.1626926859957166, "mean_pred_prob_last_25": 0.0884958665817976, "mean_pred_prob_last_50": 0.0525294020480942, "mean_token_accuracy": 0.8682598531246185, "step": 8420 }, { "epoch": 0.14985867420404245, "grad_norm": 1.2747882164135536, "learning_rate": 0.0001, "loss": 0.9486, "mean_abs_error": 521.9221983500876, "mean_abs_error_last_10": 330.6218230692119, "mean_abs_error_last_25": 445.21842510434334, "mean_abs_error_last_50": 504.1279277421898, "mean_pred_prob": 0.03483234237646684, "mean_pred_prob_last_10": 0.18681870764121414, "mean_pred_prob_last_25": 0.10102597828954459, "mean_pred_prob_last_50": 0.06010981686413288, "mean_token_accuracy": 0.8747802257537842, "step": 8430 }, { "epoch": 0.15003644250084439, "grad_norm": 0.8873283702832729, "learning_rate": 0.0001, "loss": 0.9594, "mean_abs_error": 1976.356331675439, "mean_abs_error_last_10": 1085.6286798968683, "mean_abs_error_last_25": 1150.255870871378, "mean_abs_error_last_50": 1390.9075484780308, "mean_pred_prob": 0.030440643518522846, "mean_pred_prob_last_10": 0.1491987554851221, "mean_pred_prob_last_25": 0.08456676422210876, "mean_pred_prob_last_50": 0.05143152640666813, "mean_token_accuracy": 0.867573595046997, "step": 8440 }, { "epoch": 0.15021421079764635, "grad_norm": 3.0677150510993916, "learning_rate": 0.0001, "loss": 1.063, "mean_abs_error": 159.6693910892833, "mean_abs_error_last_10": 31.438942600771746, "mean_abs_error_last_25": 51.00228805008827, "mean_abs_error_last_50": 83.66238828906981, "mean_pred_prob": 0.03796982727944851, "mean_pred_prob_last_10": 0.20222638137638568, "mean_pred_prob_last_25": 0.1070998864248395, "mean_pred_prob_last_50": 0.0642874265089631, "mean_token_accuracy": 0.8695107996463776, "step": 8450 }, { "epoch": 0.1503919790944483, "grad_norm": 1.7926982856609517, "learning_rate": 0.0001, "loss": 0.9918, "mean_abs_error": 515.8060000139426, "mean_abs_error_last_10": 225.85134526321957, "mean_abs_error_last_25": 318.7478431844679, "mean_abs_error_last_50": 375.9080118227618, "mean_pred_prob": 0.03148961896658875, "mean_pred_prob_last_10": 0.1725403384771198, "mean_pred_prob_last_25": 0.0914265011495445, "mean_pred_prob_last_50": 0.054377387155545874, "mean_token_accuracy": 0.8788747906684875, "step": 8460 }, { "epoch": 0.15056974739125026, "grad_norm": 0.7814421475716112, "learning_rate": 0.0001, "loss": 0.8981, "mean_abs_error": 99.74163629900558, "mean_abs_error_last_10": 29.34864277937378, "mean_abs_error_last_25": 32.22262933814286, "mean_abs_error_last_50": 46.590139745730355, "mean_pred_prob": 0.05691393557935953, "mean_pred_prob_last_10": 0.26173473708331585, "mean_pred_prob_last_25": 0.15505040138959886, "mean_pred_prob_last_50": 0.09680606480687856, "mean_token_accuracy": 0.8854730904102326, "step": 8470 }, { "epoch": 0.1507475156880522, "grad_norm": 1.555124090535098, "learning_rate": 0.0001, "loss": 0.8903, "mean_abs_error": 160.85799683199284, "mean_abs_error_last_10": 79.2681913587384, "mean_abs_error_last_25": 74.12973433403633, "mean_abs_error_last_50": 106.67121392367922, "mean_pred_prob": 0.03810444003902376, "mean_pred_prob_last_10": 0.19565930999815465, "mean_pred_prob_last_25": 0.10956695731729268, "mean_pred_prob_last_50": 0.06583103127777576, "mean_token_accuracy": 0.8850683450698853, "step": 8480 }, { "epoch": 0.15092528398485414, "grad_norm": 0.9860315641977399, "learning_rate": 0.0001, "loss": 0.8873, "mean_abs_error": 208.8633567228716, "mean_abs_error_last_10": 45.48728518903776, "mean_abs_error_last_25": 92.8875616205979, "mean_abs_error_last_50": 140.8412153426816, "mean_pred_prob": 0.03859870270825923, "mean_pred_prob_last_10": 0.19827707223594188, "mean_pred_prob_last_25": 0.1096294516697526, "mean_pred_prob_last_50": 0.06611006530001759, "mean_token_accuracy": 0.8704540014266968, "step": 8490 }, { "epoch": 0.1511030522816561, "grad_norm": 1.107862850581676, "learning_rate": 0.0001, "loss": 0.9119, "mean_abs_error": 411.9356272071359, "mean_abs_error_last_10": 88.92867520184856, "mean_abs_error_last_25": 168.68761875964375, "mean_abs_error_last_50": 266.05615625152774, "mean_pred_prob": 0.03076877580024302, "mean_pred_prob_last_10": 0.15406832210719584, "mean_pred_prob_last_25": 0.08500994518399238, "mean_pred_prob_last_50": 0.052116926619783045, "mean_token_accuracy": 0.8746440470218658, "step": 8500 }, { "epoch": 0.15128082057845804, "grad_norm": 1.3900917424326837, "learning_rate": 0.0001, "loss": 1.0318, "mean_abs_error": 273.5539832378396, "mean_abs_error_last_10": 74.9963493499528, "mean_abs_error_last_25": 153.33973777597288, "mean_abs_error_last_50": 248.56300417451945, "mean_pred_prob": 0.04108018600381911, "mean_pred_prob_last_10": 0.21270790025591851, "mean_pred_prob_last_25": 0.11670223707333208, "mean_pred_prob_last_50": 0.07048385217785835, "mean_token_accuracy": 0.8710053205490113, "step": 8510 }, { "epoch": 0.15145858887525998, "grad_norm": 1.349089841346309, "learning_rate": 0.0001, "loss": 0.854, "mean_abs_error": 619.9038909538083, "mean_abs_error_last_10": 187.8671815109746, "mean_abs_error_last_25": 281.13678840361, "mean_abs_error_last_50": 359.70319375237125, "mean_pred_prob": 0.03624900459544733, "mean_pred_prob_last_10": 0.17342065310804172, "mean_pred_prob_last_25": 0.09919728864333592, "mean_pred_prob_last_50": 0.061156915448373185, "mean_token_accuracy": 0.8726610600948334, "step": 8520 }, { "epoch": 0.15163635717206195, "grad_norm": 1.4847635092759548, "learning_rate": 0.0001, "loss": 1.0253, "mean_abs_error": 1302.5721967879297, "mean_abs_error_last_10": 614.8905652917352, "mean_abs_error_last_25": 745.1018968724668, "mean_abs_error_last_50": 875.8148370908733, "mean_pred_prob": 0.012558308755978942, "mean_pred_prob_last_10": 0.07280284425942227, "mean_pred_prob_last_25": 0.03674575664917938, "mean_pred_prob_last_50": 0.021718209714163095, "mean_token_accuracy": 0.8723556458950043, "step": 8530 }, { "epoch": 0.1518141254688639, "grad_norm": 3.5637242294273026, "learning_rate": 0.0001, "loss": 1.0155, "mean_abs_error": 216.51856038797843, "mean_abs_error_last_10": 26.69319652758846, "mean_abs_error_last_25": 71.08181045083583, "mean_abs_error_last_50": 116.01641116968617, "mean_pred_prob": 0.0342129172757268, "mean_pred_prob_last_10": 0.18930232524871826, "mean_pred_prob_last_25": 0.10032494217157364, "mean_pred_prob_last_50": 0.05963697023689747, "mean_token_accuracy": 0.8722657024860382, "step": 8540 }, { "epoch": 0.15199189376566583, "grad_norm": 2.777815762759865, "learning_rate": 0.0001, "loss": 0.9502, "mean_abs_error": 386.51123348148724, "mean_abs_error_last_10": 68.66228655268166, "mean_abs_error_last_25": 115.98637786223526, "mean_abs_error_last_50": 214.3361401215106, "mean_pred_prob": 0.03140885753091425, "mean_pred_prob_last_10": 0.15896077118813992, "mean_pred_prob_last_25": 0.08720733700320124, "mean_pred_prob_last_50": 0.053371391631662844, "mean_token_accuracy": 0.86485356092453, "step": 8550 }, { "epoch": 0.1521696620624678, "grad_norm": 1.0283143060959483, "learning_rate": 0.0001, "loss": 0.9602, "mean_abs_error": 412.596494466325, "mean_abs_error_last_10": 85.63201566448163, "mean_abs_error_last_25": 110.82289856728133, "mean_abs_error_last_50": 195.8623862960861, "mean_pred_prob": 0.03621171845588833, "mean_pred_prob_last_10": 0.17263223342597483, "mean_pred_prob_last_25": 0.09809874463826418, "mean_pred_prob_last_50": 0.0605185545515269, "mean_token_accuracy": 0.8804702401161194, "step": 8560 }, { "epoch": 0.15234743035926973, "grad_norm": 1.5060213157733762, "learning_rate": 0.0001, "loss": 0.9543, "mean_abs_error": 352.1995299243282, "mean_abs_error_last_10": 113.47082088244989, "mean_abs_error_last_25": 129.5650655007956, "mean_abs_error_last_50": 192.87793421156286, "mean_pred_prob": 0.02841062096413225, "mean_pred_prob_last_10": 0.1521051655523479, "mean_pred_prob_last_25": 0.08155096210539341, "mean_pred_prob_last_50": 0.04856403931044042, "mean_token_accuracy": 0.8684589326381683, "step": 8570 }, { "epoch": 0.15252519865607167, "grad_norm": 1.2008395341389375, "learning_rate": 0.0001, "loss": 0.9794, "mean_abs_error": 294.97394852387185, "mean_abs_error_last_10": 36.269975274357805, "mean_abs_error_last_25": 81.94433041377044, "mean_abs_error_last_50": 150.14822458023377, "mean_pred_prob": 0.03762126457877457, "mean_pred_prob_last_10": 0.20164504759013652, "mean_pred_prob_last_25": 0.11008202079683542, "mean_pred_prob_last_50": 0.06592277940362692, "mean_token_accuracy": 0.8692658364772796, "step": 8580 }, { "epoch": 0.15270296695287364, "grad_norm": 2.1833320120752697, "learning_rate": 0.0001, "loss": 0.9529, "mean_abs_error": 207.4224315614419, "mean_abs_error_last_10": 33.687249508124374, "mean_abs_error_last_25": 77.08956162750054, "mean_abs_error_last_50": 130.41728544920466, "mean_pred_prob": 0.056167021160945296, "mean_pred_prob_last_10": 0.24484813138842582, "mean_pred_prob_last_25": 0.1498604414984584, "mean_pred_prob_last_50": 0.09392561707645655, "mean_token_accuracy": 0.8751325666904449, "step": 8590 }, { "epoch": 0.15288073524967558, "grad_norm": 2.202473837564678, "learning_rate": 0.0001, "loss": 0.9477, "mean_abs_error": 355.0313063967295, "mean_abs_error_last_10": 214.73792863089392, "mean_abs_error_last_25": 200.02723035818659, "mean_abs_error_last_50": 280.4240956420293, "mean_pred_prob": 0.04004105036146939, "mean_pred_prob_last_10": 0.20541662052273751, "mean_pred_prob_last_25": 0.11443352270871401, "mean_pred_prob_last_50": 0.06848956164903938, "mean_token_accuracy": 0.8727002441883087, "step": 8600 }, { "epoch": 0.15305850354647751, "grad_norm": 1.1996909612497908, "learning_rate": 0.0001, "loss": 0.9853, "mean_abs_error": 636.4545281116509, "mean_abs_error_last_10": 125.95578038483748, "mean_abs_error_last_25": 225.74523628227325, "mean_abs_error_last_50": 385.42944601152124, "mean_pred_prob": 0.03188385450921487, "mean_pred_prob_last_10": 0.15958694654982536, "mean_pred_prob_last_25": 0.09029322982532903, "mean_pred_prob_last_50": 0.05455711228423752, "mean_token_accuracy": 0.8739429354667664, "step": 8610 }, { "epoch": 0.15323627184327948, "grad_norm": 1.2828262501597865, "learning_rate": 0.0001, "loss": 0.9387, "mean_abs_error": 469.95076922598156, "mean_abs_error_last_10": 121.11606459505231, "mean_abs_error_last_25": 153.864959593636, "mean_abs_error_last_50": 218.65533788270653, "mean_pred_prob": 0.030670307856053115, "mean_pred_prob_last_10": 0.1588564846664667, "mean_pred_prob_last_25": 0.08959347028285265, "mean_pred_prob_last_50": 0.05347522366791964, "mean_token_accuracy": 0.8833381474018097, "step": 8620 }, { "epoch": 0.15341404014008142, "grad_norm": 1.4832536227270796, "learning_rate": 0.0001, "loss": 0.9537, "mean_abs_error": 553.3342842242649, "mean_abs_error_last_10": 335.9880153882796, "mean_abs_error_last_25": 357.2361506869029, "mean_abs_error_last_50": 363.49047227370613, "mean_pred_prob": 0.01822021739790216, "mean_pred_prob_last_10": 0.10388091399800033, "mean_pred_prob_last_25": 0.05304409951204434, "mean_pred_prob_last_50": 0.0314707780838944, "mean_token_accuracy": 0.8731252193450928, "step": 8630 }, { "epoch": 0.15359180843688336, "grad_norm": 1.34884098899214, "learning_rate": 0.0001, "loss": 0.912, "mean_abs_error": 412.1492929550415, "mean_abs_error_last_10": 165.229718368596, "mean_abs_error_last_25": 126.31232413095252, "mean_abs_error_last_50": 185.25375978383033, "mean_pred_prob": 0.03745721243321896, "mean_pred_prob_last_10": 0.20762910973280668, "mean_pred_prob_last_25": 0.11144666643813253, "mean_pred_prob_last_50": 0.06552894739434123, "mean_token_accuracy": 0.8875636875629425, "step": 8640 }, { "epoch": 0.15376957673368533, "grad_norm": 2.0291670529091475, "learning_rate": 0.0001, "loss": 0.9974, "mean_abs_error": 996.5838688605736, "mean_abs_error_last_10": 589.0950689483365, "mean_abs_error_last_25": 629.7288901314535, "mean_abs_error_last_50": 712.8067408693969, "mean_pred_prob": 0.02753886592981871, "mean_pred_prob_last_10": 0.1448172225151211, "mean_pred_prob_last_25": 0.07768734779965598, "mean_pred_prob_last_50": 0.04692779900797177, "mean_token_accuracy": 0.865591287612915, "step": 8650 }, { "epoch": 0.15394734503048726, "grad_norm": 1.574841468326712, "learning_rate": 0.0001, "loss": 0.9437, "mean_abs_error": 653.5757513175466, "mean_abs_error_last_10": 402.52663930268966, "mean_abs_error_last_25": 418.55486563541973, "mean_abs_error_last_50": 454.4853703767516, "mean_pred_prob": 0.029363654588814826, "mean_pred_prob_last_10": 0.15366339762695133, "mean_pred_prob_last_25": 0.08483586194925011, "mean_pred_prob_last_50": 0.05138182641239837, "mean_token_accuracy": 0.8744226276874543, "step": 8660 }, { "epoch": 0.1541251133272892, "grad_norm": 2.1319679974420924, "learning_rate": 0.0001, "loss": 1.0441, "mean_abs_error": 1373.25571497285, "mean_abs_error_last_10": 629.6857780589428, "mean_abs_error_last_25": 725.6033457697991, "mean_abs_error_last_50": 968.0445050969349, "mean_pred_prob": 0.019511227415932808, "mean_pred_prob_last_10": 0.11211135576013476, "mean_pred_prob_last_25": 0.05785836283030221, "mean_pred_prob_last_50": 0.03404828717175405, "mean_token_accuracy": 0.8721859276294708, "step": 8670 }, { "epoch": 0.15430288162409117, "grad_norm": 1.1838511689296995, "learning_rate": 0.0001, "loss": 0.9355, "mean_abs_error": 253.77217887445454, "mean_abs_error_last_10": 102.469442988054, "mean_abs_error_last_25": 174.87427310739798, "mean_abs_error_last_50": 230.53216629643916, "mean_pred_prob": 0.037333969539031384, "mean_pred_prob_last_10": 0.2025271374732256, "mean_pred_prob_last_25": 0.10799201261252164, "mean_pred_prob_last_50": 0.06435022242367268, "mean_token_accuracy": 0.8777305960655213, "step": 8680 }, { "epoch": 0.1544806499208931, "grad_norm": 2.084061410237541, "learning_rate": 0.0001, "loss": 0.9655, "mean_abs_error": 375.9324220669033, "mean_abs_error_last_10": 286.2779780645068, "mean_abs_error_last_25": 251.31770339945768, "mean_abs_error_last_50": 231.88564862056742, "mean_pred_prob": 0.02673261360032484, "mean_pred_prob_last_10": 0.14284829990938305, "mean_pred_prob_last_25": 0.07757048236671835, "mean_pred_prob_last_50": 0.04620014001848176, "mean_token_accuracy": 0.8627127110958099, "step": 8690 }, { "epoch": 0.15465841821769505, "grad_norm": 3.7744036005827617, "learning_rate": 0.0001, "loss": 0.9267, "mean_abs_error": 131.29197681788736, "mean_abs_error_last_10": 29.457964626862008, "mean_abs_error_last_25": 62.524375602779195, "mean_abs_error_last_50": 84.08465095785778, "mean_pred_prob": 0.043738550040870905, "mean_pred_prob_last_10": 0.23575269058346748, "mean_pred_prob_last_25": 0.1258736800402403, "mean_pred_prob_last_50": 0.07545700836926698, "mean_token_accuracy": 0.8678321063518524, "step": 8700 }, { "epoch": 0.15483618651449702, "grad_norm": 1.3108610778659224, "learning_rate": 0.0001, "loss": 0.9459, "mean_abs_error": 691.3858448373555, "mean_abs_error_last_10": 221.4485314970506, "mean_abs_error_last_25": 268.7793880358246, "mean_abs_error_last_50": 412.5339904956172, "mean_pred_prob": 0.020276901056058706, "mean_pred_prob_last_10": 0.10190126886591315, "mean_pred_prob_last_25": 0.05753125436604023, "mean_pred_prob_last_50": 0.03474727333523333, "mean_token_accuracy": 0.8696551442146301, "step": 8710 }, { "epoch": 0.15501395481129895, "grad_norm": 2.0156303462297145, "learning_rate": 0.0001, "loss": 1.0005, "mean_abs_error": 248.85554063290152, "mean_abs_error_last_10": 53.129258519367795, "mean_abs_error_last_25": 82.29929807770314, "mean_abs_error_last_50": 131.40398915106104, "mean_pred_prob": 0.038865654123947026, "mean_pred_prob_last_10": 0.20626801960170268, "mean_pred_prob_last_25": 0.1126268420368433, "mean_pred_prob_last_50": 0.06702226884663105, "mean_token_accuracy": 0.8627240478992462, "step": 8720 }, { "epoch": 0.1551917231081009, "grad_norm": 1.1610701188728072, "learning_rate": 0.0001, "loss": 0.9397, "mean_abs_error": 465.7903114045988, "mean_abs_error_last_10": 183.9230946681628, "mean_abs_error_last_25": 173.2040292695813, "mean_abs_error_last_50": 250.1711990276927, "mean_pred_prob": 0.030732199549674988, "mean_pred_prob_last_10": 0.16452969387173652, "mean_pred_prob_last_25": 0.08834912688471377, "mean_pred_prob_last_50": 0.053174042422324416, "mean_token_accuracy": 0.8747472822666168, "step": 8730 }, { "epoch": 0.15536949140490286, "grad_norm": 2.0091663705693157, "learning_rate": 0.0001, "loss": 1.0035, "mean_abs_error": 201.73704365746264, "mean_abs_error_last_10": 21.625009416909062, "mean_abs_error_last_25": 48.07577072441042, "mean_abs_error_last_50": 94.54495045764986, "mean_pred_prob": 0.04106620023958385, "mean_pred_prob_last_10": 0.21915024444460868, "mean_pred_prob_last_25": 0.11936217732727528, "mean_pred_prob_last_50": 0.07092553144320846, "mean_token_accuracy": 0.8756813168525696, "step": 8740 }, { "epoch": 0.1555472597017048, "grad_norm": 3.4746235228746754, "learning_rate": 0.0001, "loss": 1.0348, "mean_abs_error": 872.7095318934628, "mean_abs_error_last_10": 331.8770714358369, "mean_abs_error_last_25": 443.2005290429349, "mean_abs_error_last_50": 575.8184464187909, "mean_pred_prob": 0.026271846058079973, "mean_pred_prob_last_10": 0.12825436101993545, "mean_pred_prob_last_25": 0.0735579127445817, "mean_pred_prob_last_50": 0.045314631145447495, "mean_token_accuracy": 0.8736590921878815, "step": 8750 }, { "epoch": 0.15572502799850674, "grad_norm": 1.1786071154119997, "learning_rate": 0.0001, "loss": 1.0218, "mean_abs_error": 957.6386841672145, "mean_abs_error_last_10": 466.02225313223073, "mean_abs_error_last_25": 613.3926486289165, "mean_abs_error_last_50": 728.9997095359072, "mean_pred_prob": 0.03125957166193984, "mean_pred_prob_last_10": 0.16135760924662462, "mean_pred_prob_last_25": 0.08981188500183634, "mean_pred_prob_last_50": 0.053507415193598716, "mean_token_accuracy": 0.8692900538444519, "step": 8760 }, { "epoch": 0.1559027962953087, "grad_norm": 1.1423065313762173, "learning_rate": 0.0001, "loss": 0.9699, "mean_abs_error": 387.11827427441284, "mean_abs_error_last_10": 149.23382559868517, "mean_abs_error_last_25": 184.97926477567438, "mean_abs_error_last_50": 228.094029176829, "mean_pred_prob": 0.02934029287425801, "mean_pred_prob_last_10": 0.16103764383587987, "mean_pred_prob_last_25": 0.08550880089169369, "mean_pred_prob_last_50": 0.05086383259040304, "mean_token_accuracy": 0.8634327590465546, "step": 8770 }, { "epoch": 0.15608056459211064, "grad_norm": 0.9705741084013838, "learning_rate": 0.0001, "loss": 0.9404, "mean_abs_error": 106.57348081519494, "mean_abs_error_last_10": 18.950067947438242, "mean_abs_error_last_25": 28.445921895975516, "mean_abs_error_last_50": 53.44236587124256, "mean_pred_prob": 0.044652410130947826, "mean_pred_prob_last_10": 0.21984962075948716, "mean_pred_prob_last_25": 0.12389718703925609, "mean_pred_prob_last_50": 0.07551699839532375, "mean_token_accuracy": 0.871343332529068, "step": 8780 }, { "epoch": 0.15625833288891258, "grad_norm": 0.9389007283779967, "learning_rate": 0.0001, "loss": 0.951, "mean_abs_error": 979.2005657943531, "mean_abs_error_last_10": 316.6961872382502, "mean_abs_error_last_25": 376.9257865338737, "mean_abs_error_last_50": 605.171733888331, "mean_pred_prob": 0.018825264877523295, "mean_pred_prob_last_10": 0.10540721418801695, "mean_pred_prob_last_25": 0.05602286572102457, "mean_pred_prob_last_50": 0.032798650735639964, "mean_token_accuracy": 0.8770860433578491, "step": 8790 }, { "epoch": 0.15643610118571455, "grad_norm": 1.3474579597640906, "learning_rate": 0.0001, "loss": 0.9606, "mean_abs_error": 101.75087926077072, "mean_abs_error_last_10": 16.830606833165596, "mean_abs_error_last_25": 25.783737720465627, "mean_abs_error_last_50": 51.73119161165025, "mean_pred_prob": 0.04609157498925924, "mean_pred_prob_last_10": 0.24261409044265747, "mean_pred_prob_last_25": 0.13407897129654883, "mean_pred_prob_last_50": 0.07946706637740135, "mean_token_accuracy": 0.865700751543045, "step": 8800 }, { "epoch": 0.1566138694825165, "grad_norm": 3.3680028943583316, "learning_rate": 0.0001, "loss": 0.965, "mean_abs_error": 1072.6332248412625, "mean_abs_error_last_10": 437.1901535572023, "mean_abs_error_last_25": 551.0084981461164, "mean_abs_error_last_50": 731.4200284333529, "mean_pred_prob": 0.041273710103996566, "mean_pred_prob_last_10": 0.2101505262311548, "mean_pred_prob_last_25": 0.11533637186221313, "mean_pred_prob_last_50": 0.07048166186141316, "mean_token_accuracy": 0.8636223793029785, "step": 8810 }, { "epoch": 0.15679163777931843, "grad_norm": 1.1424900975147079, "learning_rate": 0.0001, "loss": 1.049, "mean_abs_error": 1435.0411133984455, "mean_abs_error_last_10": 821.0351225753691, "mean_abs_error_last_25": 894.9147233739832, "mean_abs_error_last_50": 1066.7523895457944, "mean_pred_prob": 0.018056233205425088, "mean_pred_prob_last_10": 0.09595139255397953, "mean_pred_prob_last_25": 0.05194928708660882, "mean_pred_prob_last_50": 0.03109113944810815, "mean_token_accuracy": 0.8604698598384857, "step": 8820 }, { "epoch": 0.1569694060761204, "grad_norm": 0.8322641537356665, "learning_rate": 0.0001, "loss": 0.9222, "mean_abs_error": 673.0614379882179, "mean_abs_error_last_10": 367.90193512308144, "mean_abs_error_last_25": 367.7991872217547, "mean_abs_error_last_50": 452.2501812498291, "mean_pred_prob": 0.02325273986789398, "mean_pred_prob_last_10": 0.1350397958070971, "mean_pred_prob_last_25": 0.06946687620365992, "mean_pred_prob_last_50": 0.04077623514458537, "mean_token_accuracy": 0.8694359242916108, "step": 8830 }, { "epoch": 0.15714717437292233, "grad_norm": 1.4147255622618344, "learning_rate": 0.0001, "loss": 0.9166, "mean_abs_error": 570.2440807996486, "mean_abs_error_last_10": 273.58874316404456, "mean_abs_error_last_25": 333.4922354135092, "mean_abs_error_last_50": 394.76027687270096, "mean_pred_prob": 0.02320959835778922, "mean_pred_prob_last_10": 0.1218162203906104, "mean_pred_prob_last_25": 0.06602184292860329, "mean_pred_prob_last_50": 0.0396752069471404, "mean_token_accuracy": 0.8710846960544586, "step": 8840 }, { "epoch": 0.15732494266972427, "grad_norm": 0.7353342940875366, "learning_rate": 0.0001, "loss": 0.929, "mean_abs_error": 310.21838020600165, "mean_abs_error_last_10": 109.38018863619384, "mean_abs_error_last_25": 158.07064674908798, "mean_abs_error_last_50": 205.45706511667495, "mean_pred_prob": 0.03343075579032302, "mean_pred_prob_last_10": 0.174987262673676, "mean_pred_prob_last_25": 0.09434072887524962, "mean_pred_prob_last_50": 0.057208006642758845, "mean_token_accuracy": 0.8726669430732727, "step": 8850 }, { "epoch": 0.15750271096652624, "grad_norm": 1.7641377538034715, "learning_rate": 0.0001, "loss": 1.0234, "mean_abs_error": 365.81219965602315, "mean_abs_error_last_10": 103.5948858581796, "mean_abs_error_last_25": 144.36516910823053, "mean_abs_error_last_50": 204.1388419129268, "mean_pred_prob": 0.03186692360904999, "mean_pred_prob_last_10": 0.16599304158007727, "mean_pred_prob_last_25": 0.08912270474247634, "mean_pred_prob_last_50": 0.05456622525816783, "mean_token_accuracy": 0.8687568545341492, "step": 8860 }, { "epoch": 0.15768047926332818, "grad_norm": 0.9662545240648344, "learning_rate": 0.0001, "loss": 0.9813, "mean_abs_error": 388.94308883083795, "mean_abs_error_last_10": 193.6135378886413, "mean_abs_error_last_25": 186.04449225569255, "mean_abs_error_last_50": 214.45696841418612, "mean_pred_prob": 0.0339206759352237, "mean_pred_prob_last_10": 0.16738252881914378, "mean_pred_prob_last_25": 0.09273163909092545, "mean_pred_prob_last_50": 0.05657902625389397, "mean_token_accuracy": 0.8724518835544586, "step": 8870 }, { "epoch": 0.15785824756013012, "grad_norm": 1.9126481433126306, "learning_rate": 0.0001, "loss": 0.9163, "mean_abs_error": 134.05919276073524, "mean_abs_error_last_10": 29.622254026671857, "mean_abs_error_last_25": 55.14805760590666, "mean_abs_error_last_50": 82.44864138296113, "mean_pred_prob": 0.04005785621702671, "mean_pred_prob_last_10": 0.20780792385339736, "mean_pred_prob_last_25": 0.11354789324104786, "mean_pred_prob_last_50": 0.06857907008379698, "mean_token_accuracy": 0.8875118851661682, "step": 8880 }, { "epoch": 0.15803601585693208, "grad_norm": 1.378804607951434, "learning_rate": 0.0001, "loss": 0.916, "mean_abs_error": 451.88052510914775, "mean_abs_error_last_10": 143.18419227991203, "mean_abs_error_last_25": 195.19086106716537, "mean_abs_error_last_50": 253.94165823192202, "mean_pred_prob": 0.03513150658691302, "mean_pred_prob_last_10": 0.19436922870809212, "mean_pred_prob_last_25": 0.10321846386650577, "mean_pred_prob_last_50": 0.060837125888792795, "mean_token_accuracy": 0.8749491274356842, "step": 8890 }, { "epoch": 0.15821378415373402, "grad_norm": 0.9422094233349807, "learning_rate": 0.0001, "loss": 0.9659, "mean_abs_error": 510.81575842001473, "mean_abs_error_last_10": 144.91604336777007, "mean_abs_error_last_25": 191.75304164131518, "mean_abs_error_last_50": 288.7108731901217, "mean_pred_prob": 0.03090019117225893, "mean_pred_prob_last_10": 0.15129653464537113, "mean_pred_prob_last_25": 0.0883676647208631, "mean_pred_prob_last_50": 0.052603583014570174, "mean_token_accuracy": 0.8723876237869262, "step": 8900 }, { "epoch": 0.15839155245053596, "grad_norm": 1.2448137132707207, "learning_rate": 0.0001, "loss": 0.9181, "mean_abs_error": 973.7199532218881, "mean_abs_error_last_10": 415.27283461325044, "mean_abs_error_last_25": 495.01615569784065, "mean_abs_error_last_50": 633.4595191015867, "mean_pred_prob": 0.01922523891553283, "mean_pred_prob_last_10": 0.11084434589429293, "mean_pred_prob_last_25": 0.05699266372830607, "mean_pred_prob_last_50": 0.033508760257973336, "mean_token_accuracy": 0.8748551428318023, "step": 8910 }, { "epoch": 0.15856932074733793, "grad_norm": 1.8294347722625774, "learning_rate": 0.0001, "loss": 1.0478, "mean_abs_error": 857.1009685598186, "mean_abs_error_last_10": 308.0250407091004, "mean_abs_error_last_25": 358.2692035073095, "mean_abs_error_last_50": 510.8948212107892, "mean_pred_prob": 0.0279842958872905, "mean_pred_prob_last_10": 0.15450501503655686, "mean_pred_prob_last_25": 0.08193015561555513, "mean_pred_prob_last_50": 0.04842818773468025, "mean_token_accuracy": 0.8621814250946045, "step": 8920 }, { "epoch": 0.15874708904413987, "grad_norm": 1.897226870191739, "learning_rate": 0.0001, "loss": 0.9406, "mean_abs_error": 454.380620405546, "mean_abs_error_last_10": 173.3660139171504, "mean_abs_error_last_25": 237.34331447198028, "mean_abs_error_last_50": 274.5640539737579, "mean_pred_prob": 0.03695865101180971, "mean_pred_prob_last_10": 0.1890793913975358, "mean_pred_prob_last_25": 0.10398127473890781, "mean_pred_prob_last_50": 0.0635989970061928, "mean_token_accuracy": 0.8829162001609803, "step": 8930 }, { "epoch": 0.1589248573409418, "grad_norm": 1.3558619370975593, "learning_rate": 0.0001, "loss": 0.9586, "mean_abs_error": 572.0988526969915, "mean_abs_error_last_10": 179.19147306991596, "mean_abs_error_last_25": 186.7847019318324, "mean_abs_error_last_50": 283.4989630422484, "mean_pred_prob": 0.02692072603967972, "mean_pred_prob_last_10": 0.14431453251745552, "mean_pred_prob_last_25": 0.07857089143944904, "mean_pred_prob_last_50": 0.047158406139351426, "mean_token_accuracy": 0.8790627539157867, "step": 8940 }, { "epoch": 0.15910262563774377, "grad_norm": 2.081537823316243, "learning_rate": 0.0001, "loss": 0.9878, "mean_abs_error": 912.5589023516834, "mean_abs_error_last_10": 417.36438025835344, "mean_abs_error_last_25": 475.1705353287666, "mean_abs_error_last_50": 631.280307586737, "mean_pred_prob": 0.03159596115292516, "mean_pred_prob_last_10": 0.17431269718508702, "mean_pred_prob_last_25": 0.09313345944683533, "mean_pred_prob_last_50": 0.05473633057845291, "mean_token_accuracy": 0.870554941892624, "step": 8950 }, { "epoch": 0.1592803939345457, "grad_norm": 0.9604794564570398, "learning_rate": 0.0001, "loss": 0.8936, "mean_abs_error": 167.97660105550557, "mean_abs_error_last_10": 39.80191423678466, "mean_abs_error_last_25": 49.3864974239737, "mean_abs_error_last_50": 79.60858787113844, "mean_pred_prob": 0.04649556465446949, "mean_pred_prob_last_10": 0.22715945802628995, "mean_pred_prob_last_25": 0.13021697681397199, "mean_pred_prob_last_50": 0.08028056537732482, "mean_token_accuracy": 0.8704636335372925, "step": 8960 }, { "epoch": 0.15945816223134765, "grad_norm": 1.4988321719122213, "learning_rate": 0.0001, "loss": 1.1379, "mean_abs_error": 1143.9611685805253, "mean_abs_error_last_10": 546.4547668304677, "mean_abs_error_last_25": 630.6171602456417, "mean_abs_error_last_50": 796.5942486533994, "mean_pred_prob": 0.02425454670446925, "mean_pred_prob_last_10": 0.13045960528106662, "mean_pred_prob_last_25": 0.07216119693184737, "mean_pred_prob_last_50": 0.042762192903319375, "mean_token_accuracy": 0.8691952645778656, "step": 8970 }, { "epoch": 0.15963593052814962, "grad_norm": 0.92358569980053, "learning_rate": 0.0001, "loss": 1.0378, "mean_abs_error": 1492.8338820485776, "mean_abs_error_last_10": 597.4332649363389, "mean_abs_error_last_25": 753.9463718350156, "mean_abs_error_last_50": 1010.1399355940614, "mean_pred_prob": 0.021409959311131387, "mean_pred_prob_last_10": 0.10491100787767209, "mean_pred_prob_last_25": 0.05956879669392947, "mean_pred_prob_last_50": 0.03567057556065265, "mean_token_accuracy": 0.8739726722240448, "step": 8980 }, { "epoch": 0.15981369882495156, "grad_norm": 1.6895136724241102, "learning_rate": 0.0001, "loss": 0.9553, "mean_abs_error": 1136.7666909921252, "mean_abs_error_last_10": 482.3971644950012, "mean_abs_error_last_25": 566.6299514613581, "mean_abs_error_last_50": 721.3119860043514, "mean_pred_prob": 0.02668004691367969, "mean_pred_prob_last_10": 0.1464005684800213, "mean_pred_prob_last_25": 0.07740630531625356, "mean_pred_prob_last_50": 0.046166976119275206, "mean_token_accuracy": 0.8764701426029206, "step": 8990 }, { "epoch": 0.1599914671217535, "grad_norm": 1.4510672135675167, "learning_rate": 0.0001, "loss": 1.0334, "mean_abs_error": 828.998719659616, "mean_abs_error_last_10": 361.020247700755, "mean_abs_error_last_25": 525.7736692859247, "mean_abs_error_last_50": 620.4478263777016, "mean_pred_prob": 0.028320775704924018, "mean_pred_prob_last_10": 0.1430173776112497, "mean_pred_prob_last_25": 0.07952673159888946, "mean_pred_prob_last_50": 0.048305479815462606, "mean_token_accuracy": 0.8678466379642487, "step": 9000 }, { "epoch": 0.16016923541855546, "grad_norm": 4.384877278755595, "learning_rate": 0.0001, "loss": 1.0095, "mean_abs_error": 327.08636291935466, "mean_abs_error_last_10": 121.1672703956104, "mean_abs_error_last_25": 136.12453656159397, "mean_abs_error_last_50": 208.64364698105175, "mean_pred_prob": 0.03169573713093996, "mean_pred_prob_last_10": 0.17551838643848897, "mean_pred_prob_last_25": 0.09263098891824484, "mean_pred_prob_last_50": 0.05499247177504003, "mean_token_accuracy": 0.8789804935455322, "step": 9010 }, { "epoch": 0.1603470037153574, "grad_norm": 1.1266253387637712, "learning_rate": 0.0001, "loss": 0.9586, "mean_abs_error": 1429.8561086550042, "mean_abs_error_last_10": 736.5526035179266, "mean_abs_error_last_25": 798.9200359821364, "mean_abs_error_last_50": 1042.5206821863708, "mean_pred_prob": 0.02490756518673152, "mean_pred_prob_last_10": 0.12831815983518027, "mean_pred_prob_last_25": 0.06921085509529804, "mean_pred_prob_last_50": 0.04198220666439738, "mean_token_accuracy": 0.8751546144485474, "step": 9020 }, { "epoch": 0.16052477201215934, "grad_norm": 2.425979864622822, "learning_rate": 0.0001, "loss": 0.9675, "mean_abs_error": 328.2839330573703, "mean_abs_error_last_10": 91.12935763169494, "mean_abs_error_last_25": 134.98636039745452, "mean_abs_error_last_50": 211.87678841070593, "mean_pred_prob": 0.03765427260659635, "mean_pred_prob_last_10": 0.16953422017395497, "mean_pred_prob_last_25": 0.09970250949263573, "mean_pred_prob_last_50": 0.06334882993251086, "mean_token_accuracy": 0.8713601410388947, "step": 9030 }, { "epoch": 0.1607025403089613, "grad_norm": 2.0796146853037905, "learning_rate": 0.0001, "loss": 1.0116, "mean_abs_error": 475.56525171892645, "mean_abs_error_last_10": 140.86770749005294, "mean_abs_error_last_25": 187.2274734841695, "mean_abs_error_last_50": 276.4628651858038, "mean_pred_prob": 0.02271562998648733, "mean_pred_prob_last_10": 0.12333908192813396, "mean_pred_prob_last_25": 0.0660771762020886, "mean_pred_prob_last_50": 0.039566326700150964, "mean_token_accuracy": 0.8675564289093017, "step": 9040 }, { "epoch": 0.16088030860576324, "grad_norm": 1.2923377628639454, "learning_rate": 0.0001, "loss": 1.052, "mean_abs_error": 606.0179242608586, "mean_abs_error_last_10": 178.5448783614029, "mean_abs_error_last_25": 242.12153839088947, "mean_abs_error_last_50": 369.17170524196865, "mean_pred_prob": 0.01850186795927584, "mean_pred_prob_last_10": 0.11089272475801408, "mean_pred_prob_last_25": 0.056793077220208946, "mean_pred_prob_last_50": 0.03285930319689214, "mean_token_accuracy": 0.8699900805950165, "step": 9050 }, { "epoch": 0.16105807690256518, "grad_norm": 1.1898553012915898, "learning_rate": 0.0001, "loss": 0.8823, "mean_abs_error": 447.29918702084467, "mean_abs_error_last_10": 228.62451465748936, "mean_abs_error_last_25": 219.47834986765483, "mean_abs_error_last_50": 312.25874782088283, "mean_pred_prob": 0.030572226969525218, "mean_pred_prob_last_10": 0.15872835805639623, "mean_pred_prob_last_25": 0.08467724323272705, "mean_pred_prob_last_50": 0.051841414300724864, "mean_token_accuracy": 0.8714205145835876, "step": 9060 }, { "epoch": 0.16123584519936715, "grad_norm": 1.1713296934087776, "learning_rate": 0.0001, "loss": 0.9042, "mean_abs_error": 581.2936800799647, "mean_abs_error_last_10": 247.87164684106193, "mean_abs_error_last_25": 240.304238794239, "mean_abs_error_last_50": 308.7652075325315, "mean_pred_prob": 0.030196540924953295, "mean_pred_prob_last_10": 0.15776210505282506, "mean_pred_prob_last_25": 0.08694458506652154, "mean_pred_prob_last_50": 0.051698347483761606, "mean_token_accuracy": 0.869013887643814, "step": 9070 }, { "epoch": 0.1614136134961691, "grad_norm": 1.2278077243436722, "learning_rate": 0.0001, "loss": 0.9884, "mean_abs_error": 587.1488909630505, "mean_abs_error_last_10": 158.14870054879438, "mean_abs_error_last_25": 277.572598897785, "mean_abs_error_last_50": 360.3299210366233, "mean_pred_prob": 0.02590072783641517, "mean_pred_prob_last_10": 0.13338960893452168, "mean_pred_prob_last_25": 0.07328972348477691, "mean_pred_prob_last_50": 0.04439443340525031, "mean_token_accuracy": 0.8668269991874695, "step": 9080 }, { "epoch": 0.16159138179297103, "grad_norm": 2.1264079532574116, "learning_rate": 0.0001, "loss": 0.9195, "mean_abs_error": 349.73307963100876, "mean_abs_error_last_10": 111.37261089819128, "mean_abs_error_last_25": 159.10660751535602, "mean_abs_error_last_50": 203.7012650670048, "mean_pred_prob": 0.024984109262004495, "mean_pred_prob_last_10": 0.133075001090765, "mean_pred_prob_last_25": 0.07028558291494846, "mean_pred_prob_last_50": 0.042160564288496974, "mean_token_accuracy": 0.8730694651603699, "step": 9090 }, { "epoch": 0.161769150089773, "grad_norm": 1.2963451437163362, "learning_rate": 0.0001, "loss": 1.0708, "mean_abs_error": 651.6166041397566, "mean_abs_error_last_10": 200.9453698366213, "mean_abs_error_last_25": 241.57873491411266, "mean_abs_error_last_50": 338.16374998808794, "mean_pred_prob": 0.03290277157211676, "mean_pred_prob_last_10": 0.16099097817204894, "mean_pred_prob_last_25": 0.09175041014095768, "mean_pred_prob_last_50": 0.05613961850758642, "mean_token_accuracy": 0.8816075146198272, "step": 9100 }, { "epoch": 0.16194691838657493, "grad_norm": 0.998782824492887, "learning_rate": 0.0001, "loss": 0.9101, "mean_abs_error": 187.3110719839535, "mean_abs_error_last_10": 96.7181588904915, "mean_abs_error_last_25": 143.24845747395798, "mean_abs_error_last_50": 144.64040934283966, "mean_pred_prob": 0.04433342269621789, "mean_pred_prob_last_10": 0.20087506398558616, "mean_pred_prob_last_25": 0.11649454412981868, "mean_pred_prob_last_50": 0.0731861456297338, "mean_token_accuracy": 0.8867758989334107, "step": 9110 }, { "epoch": 0.16212468668337687, "grad_norm": 1.008361273808396, "learning_rate": 0.0001, "loss": 1.0253, "mean_abs_error": 315.8096462542203, "mean_abs_error_last_10": 90.08832687041367, "mean_abs_error_last_25": 126.19360864868506, "mean_abs_error_last_50": 185.76958694886466, "mean_pred_prob": 0.028976467391476034, "mean_pred_prob_last_10": 0.1588057428598404, "mean_pred_prob_last_25": 0.08293196186423302, "mean_pred_prob_last_50": 0.049598577246069905, "mean_token_accuracy": 0.8823255360126495, "step": 9120 }, { "epoch": 0.16230245498017884, "grad_norm": 2.1434332416809165, "learning_rate": 0.0001, "loss": 0.9874, "mean_abs_error": 456.57599770170066, "mean_abs_error_last_10": 116.2068206851366, "mean_abs_error_last_25": 131.15622648571394, "mean_abs_error_last_50": 244.9308752104775, "mean_pred_prob": 0.028163581562694162, "mean_pred_prob_last_10": 0.15317297205328942, "mean_pred_prob_last_25": 0.08340982391964644, "mean_pred_prob_last_50": 0.04908148964168504, "mean_token_accuracy": 0.8620415508747101, "step": 9130 }, { "epoch": 0.16248022327698078, "grad_norm": 2.340743892395845, "learning_rate": 0.0001, "loss": 0.9345, "mean_abs_error": 471.9232893594146, "mean_abs_error_last_10": 125.57997908422776, "mean_abs_error_last_25": 175.7124038955356, "mean_abs_error_last_50": 270.932390457351, "mean_pred_prob": 0.026777809811756016, "mean_pred_prob_last_10": 0.14550584089010954, "mean_pred_prob_last_25": 0.07767161708325147, "mean_pred_prob_last_50": 0.046159864077344535, "mean_token_accuracy": 0.8797707080841064, "step": 9140 }, { "epoch": 0.16265799157378275, "grad_norm": 1.1248894070386184, "learning_rate": 0.0001, "loss": 1.0279, "mean_abs_error": 1044.036095184203, "mean_abs_error_last_10": 618.9566803246914, "mean_abs_error_last_25": 687.5741439389675, "mean_abs_error_last_50": 794.1682127957727, "mean_pred_prob": 0.027331378060625865, "mean_pred_prob_last_10": 0.1355632486520335, "mean_pred_prob_last_25": 0.07808343079523183, "mean_pred_prob_last_50": 0.04705816523928661, "mean_token_accuracy": 0.8736721634864807, "step": 9150 }, { "epoch": 0.16283575987058468, "grad_norm": 1.0662827011450056, "learning_rate": 0.0001, "loss": 0.8975, "mean_abs_error": 232.38088214385192, "mean_abs_error_last_10": 45.786473044348746, "mean_abs_error_last_25": 81.2683590218926, "mean_abs_error_last_50": 124.51976561203324, "mean_pred_prob": 0.040695669641718266, "mean_pred_prob_last_10": 0.20974074229598044, "mean_pred_prob_last_25": 0.11180250532925129, "mean_pred_prob_last_50": 0.06888468917459249, "mean_token_accuracy": 0.87452592253685, "step": 9160 }, { "epoch": 0.16301352816738662, "grad_norm": 2.0687621612585496, "learning_rate": 0.0001, "loss": 0.8699, "mean_abs_error": 194.89290569927385, "mean_abs_error_last_10": 81.00715822292207, "mean_abs_error_last_25": 88.33150390867027, "mean_abs_error_last_50": 116.99439387151321, "mean_pred_prob": 0.05268524624407291, "mean_pred_prob_last_10": 0.27657724991440774, "mean_pred_prob_last_25": 0.15151672046631576, "mean_pred_prob_last_50": 0.0908192269038409, "mean_token_accuracy": 0.8750746846199036, "step": 9170 }, { "epoch": 0.1631912964641886, "grad_norm": 1.393863102854961, "learning_rate": 0.0001, "loss": 0.9958, "mean_abs_error": 711.2565450308105, "mean_abs_error_last_10": 168.27391153658763, "mean_abs_error_last_25": 242.15924541164767, "mean_abs_error_last_50": 379.88569717008977, "mean_pred_prob": 0.03179388050630223, "mean_pred_prob_last_10": 0.17439083590870724, "mean_pred_prob_last_25": 0.0939824764674995, "mean_pred_prob_last_50": 0.054987275903113184, "mean_token_accuracy": 0.8721781551837922, "step": 9180 }, { "epoch": 0.16336906476099053, "grad_norm": 1.408918176320399, "learning_rate": 0.0001, "loss": 0.9532, "mean_abs_error": 358.32784308262063, "mean_abs_error_last_10": 83.17712868548551, "mean_abs_error_last_25": 132.04492771587715, "mean_abs_error_last_50": 238.583421558731, "mean_pred_prob": 0.037563713174313305, "mean_pred_prob_last_10": 0.19134256821125745, "mean_pred_prob_last_25": 0.10613608350977302, "mean_pred_prob_last_50": 0.06466021360829473, "mean_token_accuracy": 0.8773897469043732, "step": 9190 }, { "epoch": 0.16354683305779247, "grad_norm": 1.861045546197282, "learning_rate": 0.0001, "loss": 1.0369, "mean_abs_error": 473.76545768184616, "mean_abs_error_last_10": 90.54271068934605, "mean_abs_error_last_25": 123.32492895647295, "mean_abs_error_last_50": 258.2193338912951, "mean_pred_prob": 0.019070198759436607, "mean_pred_prob_last_10": 0.10736471824347973, "mean_pred_prob_last_25": 0.05597408097237348, "mean_pred_prob_last_50": 0.03319996036589146, "mean_token_accuracy": 0.8683893084526062, "step": 9200 }, { "epoch": 0.16372460135459443, "grad_norm": 1.3575429457055224, "learning_rate": 0.0001, "loss": 0.9458, "mean_abs_error": 1481.2771344765156, "mean_abs_error_last_10": 898.9750773236477, "mean_abs_error_last_25": 930.7149728970728, "mean_abs_error_last_50": 1053.072904603238, "mean_pred_prob": 0.018250690546119587, "mean_pred_prob_last_10": 0.10310860597674036, "mean_pred_prob_last_25": 0.054959707382658964, "mean_pred_prob_last_50": 0.03208326896710787, "mean_token_accuracy": 0.8668370604515075, "step": 9210 }, { "epoch": 0.16390236965139637, "grad_norm": 1.7741896939176338, "learning_rate": 0.0001, "loss": 0.9465, "mean_abs_error": 235.91590347341133, "mean_abs_error_last_10": 44.90284173863874, "mean_abs_error_last_25": 85.86545580901728, "mean_abs_error_last_50": 136.68951579273403, "mean_pred_prob": 0.06382294474169611, "mean_pred_prob_last_10": 0.301652492582798, "mean_pred_prob_last_25": 0.17592766657471656, "mean_pred_prob_last_50": 0.10816487409174443, "mean_token_accuracy": 0.8761305928230285, "step": 9220 }, { "epoch": 0.1640801379481983, "grad_norm": 1.0206727242219975, "learning_rate": 0.0001, "loss": 0.8417, "mean_abs_error": 155.49186787999764, "mean_abs_error_last_10": 50.933864150236644, "mean_abs_error_last_25": 75.5252559934834, "mean_abs_error_last_50": 103.3037684417233, "mean_pred_prob": 0.04015068463049829, "mean_pred_prob_last_10": 0.2149441123008728, "mean_pred_prob_last_25": 0.11457460038363934, "mean_pred_prob_last_50": 0.0691754413768649, "mean_token_accuracy": 0.8756650626659394, "step": 9230 }, { "epoch": 0.16425790624500028, "grad_norm": 2.986343448203458, "learning_rate": 0.0001, "loss": 1.0038, "mean_abs_error": 372.08923279100617, "mean_abs_error_last_10": 257.6306332970928, "mean_abs_error_last_25": 332.0491062704892, "mean_abs_error_last_50": 287.93466386864884, "mean_pred_prob": 0.030044635664671658, "mean_pred_prob_last_10": 0.15743985064327717, "mean_pred_prob_last_25": 0.08678768817335367, "mean_pred_prob_last_50": 0.05174419111572206, "mean_token_accuracy": 0.8694498777389527, "step": 9240 }, { "epoch": 0.16443567454180222, "grad_norm": 3.531719225529527, "learning_rate": 0.0001, "loss": 0.979, "mean_abs_error": 169.77827339433097, "mean_abs_error_last_10": 16.22827506222057, "mean_abs_error_last_25": 46.330053521207375, "mean_abs_error_last_50": 92.64135303492803, "mean_pred_prob": 0.03790576197206974, "mean_pred_prob_last_10": 0.19745348393917084, "mean_pred_prob_last_25": 0.10869798045605421, "mean_pred_prob_last_50": 0.06532304352149368, "mean_token_accuracy": 0.8719028472900391, "step": 9250 }, { "epoch": 0.16461344283860416, "grad_norm": 1.3712241025927179, "learning_rate": 0.0001, "loss": 1.0055, "mean_abs_error": 579.3565498928235, "mean_abs_error_last_10": 107.45614096078603, "mean_abs_error_last_25": 188.85197448360913, "mean_abs_error_last_50": 328.6034888189758, "mean_pred_prob": 0.02281582252471708, "mean_pred_prob_last_10": 0.11701086601242423, "mean_pred_prob_last_25": 0.06410500479396433, "mean_pred_prob_last_50": 0.038843405072111634, "mean_token_accuracy": 0.8724782109260559, "step": 9260 }, { "epoch": 0.16479121113540612, "grad_norm": 1.2892631866853543, "learning_rate": 0.0001, "loss": 0.9133, "mean_abs_error": 367.9587449352881, "mean_abs_error_last_10": 93.01995491734101, "mean_abs_error_last_25": 209.48440875321415, "mean_abs_error_last_50": 259.44962377100785, "mean_pred_prob": 0.02848380331415683, "mean_pred_prob_last_10": 0.1518784025683999, "mean_pred_prob_last_25": 0.08093157084658742, "mean_pred_prob_last_50": 0.04885499263182282, "mean_token_accuracy": 0.8697392106056213, "step": 9270 }, { "epoch": 0.16496897943220806, "grad_norm": 2.0987505822705685, "learning_rate": 0.0001, "loss": 0.9439, "mean_abs_error": 1127.9185546362705, "mean_abs_error_last_10": 304.2670293868176, "mean_abs_error_last_25": 393.0643848053154, "mean_abs_error_last_50": 623.0881711984794, "mean_pred_prob": 0.019781966644222847, "mean_pred_prob_last_10": 0.11214979536598549, "mean_pred_prob_last_25": 0.05945571076590568, "mean_pred_prob_last_50": 0.03500957624637522, "mean_token_accuracy": 0.8756788313388825, "step": 9280 }, { "epoch": 0.16514674772901, "grad_norm": 1.0893330853064915, "learning_rate": 0.0001, "loss": 0.8566, "mean_abs_error": 303.7272937746922, "mean_abs_error_last_10": 75.55861101039812, "mean_abs_error_last_25": 109.40552808177192, "mean_abs_error_last_50": 165.90657684838635, "mean_pred_prob": 0.03617174602113664, "mean_pred_prob_last_10": 0.18374444637447596, "mean_pred_prob_last_25": 0.1022634033113718, "mean_pred_prob_last_50": 0.06203879090026021, "mean_token_accuracy": 0.876637202501297, "step": 9290 }, { "epoch": 0.16532451602581197, "grad_norm": 1.4437160342569815, "learning_rate": 0.0001, "loss": 1.0405, "mean_abs_error": 1882.0665442907343, "mean_abs_error_last_10": 1020.1929841215276, "mean_abs_error_last_25": 1106.8491321863933, "mean_abs_error_last_50": 1354.1017265836604, "mean_pred_prob": 0.02900375844765222, "mean_pred_prob_last_10": 0.13879158311028733, "mean_pred_prob_last_25": 0.07929134631121997, "mean_pred_prob_last_50": 0.048383145630941725, "mean_token_accuracy": 0.8739871740341186, "step": 9300 }, { "epoch": 0.1655022843226139, "grad_norm": 1.8884384399185632, "learning_rate": 0.0001, "loss": 1.0057, "mean_abs_error": 1492.6221998883761, "mean_abs_error_last_10": 902.8046146907369, "mean_abs_error_last_25": 958.5595847984393, "mean_abs_error_last_50": 1077.107203399259, "mean_pred_prob": 0.02382279337325599, "mean_pred_prob_last_10": 0.12398904767469503, "mean_pred_prob_last_25": 0.06748438311478822, "mean_pred_prob_last_50": 0.040688496326038145, "mean_token_accuracy": 0.8669520378112793, "step": 9310 }, { "epoch": 0.16568005261941585, "grad_norm": 1.22266137411043, "learning_rate": 0.0001, "loss": 0.965, "mean_abs_error": 581.550894402041, "mean_abs_error_last_10": 260.401343410149, "mean_abs_error_last_25": 336.1717575031367, "mean_abs_error_last_50": 440.7201290678868, "mean_pred_prob": 0.0275903042871505, "mean_pred_prob_last_10": 0.14645209470763804, "mean_pred_prob_last_25": 0.07783618601970374, "mean_pred_prob_last_50": 0.04714901563711464, "mean_token_accuracy": 0.8660113632678985, "step": 9320 }, { "epoch": 0.1658578209162178, "grad_norm": 1.2591166465728112, "learning_rate": 0.0001, "loss": 0.8914, "mean_abs_error": 211.63329995372797, "mean_abs_error_last_10": 67.85056003595287, "mean_abs_error_last_25": 83.46020064280728, "mean_abs_error_last_50": 125.8516770070629, "mean_pred_prob": 0.04017949961125851, "mean_pred_prob_last_10": 0.21718917060643433, "mean_pred_prob_last_25": 0.11627101441845297, "mean_pred_prob_last_50": 0.06923278137110174, "mean_token_accuracy": 0.8693394839763642, "step": 9330 }, { "epoch": 0.16603558921301975, "grad_norm": 1.0654945620970004, "learning_rate": 0.0001, "loss": 1.0084, "mean_abs_error": 668.7210143947765, "mean_abs_error_last_10": 383.8753256596275, "mean_abs_error_last_25": 462.06906572392165, "mean_abs_error_last_50": 515.9239021779674, "mean_pred_prob": 0.025785996450576932, "mean_pred_prob_last_10": 0.1481569585390389, "mean_pred_prob_last_25": 0.07399562885984778, "mean_pred_prob_last_50": 0.0434685624204576, "mean_token_accuracy": 0.858961421251297, "step": 9340 }, { "epoch": 0.1662133575098217, "grad_norm": 1.3319657748633853, "learning_rate": 0.0001, "loss": 0.9137, "mean_abs_error": 598.5246689797306, "mean_abs_error_last_10": 162.56032484464149, "mean_abs_error_last_25": 180.54333752120488, "mean_abs_error_last_50": 281.1549498880752, "mean_pred_prob": 0.027255410875659435, "mean_pred_prob_last_10": 0.14476290894672275, "mean_pred_prob_last_25": 0.0777293217368424, "mean_pred_prob_last_50": 0.046809287695214155, "mean_token_accuracy": 0.8746940791606903, "step": 9350 }, { "epoch": 0.16639112580662366, "grad_norm": 1.1981185495069553, "learning_rate": 0.0001, "loss": 0.954, "mean_abs_error": 324.28619732409464, "mean_abs_error_last_10": 145.43310957841976, "mean_abs_error_last_25": 271.91629971605937, "mean_abs_error_last_50": 318.77657499475754, "mean_pred_prob": 0.04172942689619959, "mean_pred_prob_last_10": 0.20669226367026566, "mean_pred_prob_last_25": 0.11544900294393301, "mean_pred_prob_last_50": 0.0706063115503639, "mean_token_accuracy": 0.8723278701305389, "step": 9360 }, { "epoch": 0.1665688941034256, "grad_norm": 2.223668084814584, "learning_rate": 0.0001, "loss": 1.03, "mean_abs_error": 1114.4769889713775, "mean_abs_error_last_10": 544.6726144275852, "mean_abs_error_last_25": 637.0021584675808, "mean_abs_error_last_50": 792.6783938523816, "mean_pred_prob": 0.022128204409091266, "mean_pred_prob_last_10": 0.12559783578617498, "mean_pred_prob_last_25": 0.06497147046757164, "mean_pred_prob_last_50": 0.03842175836471142, "mean_token_accuracy": 0.8636990129947663, "step": 9370 }, { "epoch": 0.16674666240022754, "grad_norm": 1.5267460283335832, "learning_rate": 0.0001, "loss": 0.9669, "mean_abs_error": 1547.2414108141043, "mean_abs_error_last_10": 676.7993004836818, "mean_abs_error_last_25": 779.0896383544016, "mean_abs_error_last_50": 1039.3308686395242, "mean_pred_prob": 0.017391019377100747, "mean_pred_prob_last_10": 0.10236135464801918, "mean_pred_prob_last_25": 0.052089436756796205, "mean_pred_prob_last_50": 0.030129331797070336, "mean_token_accuracy": 0.8646747469902039, "step": 9380 }, { "epoch": 0.1669244306970295, "grad_norm": 1.9380682159786145, "learning_rate": 0.0001, "loss": 0.9871, "mean_abs_error": 477.89354478144094, "mean_abs_error_last_10": 129.2853671457852, "mean_abs_error_last_25": 182.37363875488782, "mean_abs_error_last_50": 254.25473418890383, "mean_pred_prob": 0.025198882864788174, "mean_pred_prob_last_10": 0.13577580135315656, "mean_pred_prob_last_25": 0.07248872928321362, "mean_pred_prob_last_50": 0.04352750550024211, "mean_token_accuracy": 0.8677095234394073, "step": 9390 }, { "epoch": 0.16710219899383144, "grad_norm": 2.017461201075294, "learning_rate": 0.0001, "loss": 0.9537, "mean_abs_error": 199.0590901450221, "mean_abs_error_last_10": 54.344038304742455, "mean_abs_error_last_25": 75.29202458152528, "mean_abs_error_last_50": 107.7305760486308, "mean_pred_prob": 0.03842243002727628, "mean_pred_prob_last_10": 0.1905296202749014, "mean_pred_prob_last_25": 0.10624033212661743, "mean_pred_prob_last_50": 0.06517763268202544, "mean_token_accuracy": 0.8625871658325195, "step": 9400 }, { "epoch": 0.16727996729063338, "grad_norm": 1.3930074653819515, "learning_rate": 0.0001, "loss": 0.9567, "mean_abs_error": 242.50597815173796, "mean_abs_error_last_10": 39.467736901082894, "mean_abs_error_last_25": 135.05635296797985, "mean_abs_error_last_50": 172.54791348140512, "mean_pred_prob": 0.03207650715485215, "mean_pred_prob_last_10": 0.1694676909595728, "mean_pred_prob_last_25": 0.09315647203475237, "mean_pred_prob_last_50": 0.05520802615210414, "mean_token_accuracy": 0.8634455442428589, "step": 9410 }, { "epoch": 0.16745773558743535, "grad_norm": 1.2736513569804375, "learning_rate": 0.0001, "loss": 1.0286, "mean_abs_error": 190.31625452693703, "mean_abs_error_last_10": 43.68223753833339, "mean_abs_error_last_25": 59.25206897339176, "mean_abs_error_last_50": 114.18410978139953, "mean_pred_prob": 0.04100563363172114, "mean_pred_prob_last_10": 0.20519102737307549, "mean_pred_prob_last_25": 0.11503329202532768, "mean_pred_prob_last_50": 0.07015901217237115, "mean_token_accuracy": 0.8805606663227081, "step": 9420 }, { "epoch": 0.16763550388423729, "grad_norm": 2.014067539443353, "learning_rate": 0.0001, "loss": 0.9959, "mean_abs_error": 747.0039347179307, "mean_abs_error_last_10": 295.7069685030832, "mean_abs_error_last_25": 440.06802702303605, "mean_abs_error_last_50": 524.1625829798328, "mean_pred_prob": 0.014335236512124538, "mean_pred_prob_last_10": 0.08500379938632249, "mean_pred_prob_last_25": 0.042105302680283785, "mean_pred_prob_last_50": 0.024715524259954692, "mean_token_accuracy": 0.8715161561965943, "step": 9430 }, { "epoch": 0.16781327218103922, "grad_norm": 1.8208302690854494, "learning_rate": 0.0001, "loss": 1.018, "mean_abs_error": 530.1798585300845, "mean_abs_error_last_10": 105.80813032984115, "mean_abs_error_last_25": 162.7689560785833, "mean_abs_error_last_50": 295.14234514496553, "mean_pred_prob": 0.03179142128792591, "mean_pred_prob_last_10": 0.17188271390041338, "mean_pred_prob_last_25": 0.09166104939067736, "mean_pred_prob_last_50": 0.05490918750292621, "mean_token_accuracy": 0.8646873354911804, "step": 9440 }, { "epoch": 0.1679910404778412, "grad_norm": 1.3121084837409438, "learning_rate": 0.0001, "loss": 0.9133, "mean_abs_error": 375.5274191900127, "mean_abs_error_last_10": 131.88252677849763, "mean_abs_error_last_25": 131.4220570016579, "mean_abs_error_last_50": 202.35563945425324, "mean_pred_prob": 0.029833697411231697, "mean_pred_prob_last_10": 0.16453136764466764, "mean_pred_prob_last_25": 0.08689889174420387, "mean_pred_prob_last_50": 0.05149629469960928, "mean_token_accuracy": 0.8760707676410675, "step": 9450 }, { "epoch": 0.16816880877464313, "grad_norm": 1.6174286770372446, "learning_rate": 0.0001, "loss": 0.961, "mean_abs_error": 617.1288569836028, "mean_abs_error_last_10": 231.89692928954273, "mean_abs_error_last_25": 383.5937029397802, "mean_abs_error_last_50": 492.5880361240096, "mean_pred_prob": 0.030858990171691403, "mean_pred_prob_last_10": 0.16278871860122307, "mean_pred_prob_last_25": 0.08793935695430263, "mean_pred_prob_last_50": 0.053143121616449206, "mean_token_accuracy": 0.8688679695129394, "step": 9460 }, { "epoch": 0.16834657707144507, "grad_norm": 1.4786130719252457, "learning_rate": 0.0001, "loss": 0.9125, "mean_abs_error": 317.23158778565175, "mean_abs_error_last_10": 123.30710250491423, "mean_abs_error_last_25": 138.32239570546093, "mean_abs_error_last_50": 190.19011823550426, "mean_pred_prob": 0.036024968372657895, "mean_pred_prob_last_10": 0.15980896539986134, "mean_pred_prob_last_25": 0.09399960972368718, "mean_pred_prob_last_50": 0.05943438289687038, "mean_token_accuracy": 0.8760351955890655, "step": 9470 }, { "epoch": 0.16852434536824704, "grad_norm": 1.4641177014100983, "learning_rate": 0.0001, "loss": 0.949, "mean_abs_error": 340.1592102363913, "mean_abs_error_last_10": 58.08459680816064, "mean_abs_error_last_25": 101.70514291221573, "mean_abs_error_last_50": 152.5173663565138, "mean_pred_prob": 0.03541391433682293, "mean_pred_prob_last_10": 0.19474338684231043, "mean_pred_prob_last_25": 0.10204120660200715, "mean_pred_prob_last_50": 0.061192922806367275, "mean_token_accuracy": 0.8708173513412476, "step": 9480 }, { "epoch": 0.16870211366504897, "grad_norm": 1.3597937345473066, "learning_rate": 0.0001, "loss": 0.8759, "mean_abs_error": 170.62204482784858, "mean_abs_error_last_10": 41.259277125242846, "mean_abs_error_last_25": 46.045892770573055, "mean_abs_error_last_50": 87.74756304630621, "mean_pred_prob": 0.04472281574271619, "mean_pred_prob_last_10": 0.21622221283614634, "mean_pred_prob_last_25": 0.12371569313108921, "mean_pred_prob_last_50": 0.0762047715485096, "mean_token_accuracy": 0.8772116959095001, "step": 9490 }, { "epoch": 0.16887988196185091, "grad_norm": 1.5907008670653635, "learning_rate": 0.0001, "loss": 0.9374, "mean_abs_error": 427.63433683745507, "mean_abs_error_last_10": 154.9021652797681, "mean_abs_error_last_25": 192.604261283293, "mean_abs_error_last_50": 222.71424380106845, "mean_pred_prob": 0.0330814290442504, "mean_pred_prob_last_10": 0.1701622246298939, "mean_pred_prob_last_25": 0.0913168728351593, "mean_pred_prob_last_50": 0.05611654582899064, "mean_token_accuracy": 0.8677425980567932, "step": 9500 }, { "epoch": 0.16905765025865288, "grad_norm": 5.346695317480054, "learning_rate": 0.0001, "loss": 0.9006, "mean_abs_error": 469.8314698162911, "mean_abs_error_last_10": 120.5162694644328, "mean_abs_error_last_25": 144.38187412860853, "mean_abs_error_last_50": 204.8927543576658, "mean_pred_prob": 0.03110831945668906, "mean_pred_prob_last_10": 0.1646651491522789, "mean_pred_prob_last_25": 0.08989445515908301, "mean_pred_prob_last_50": 0.05353402253240347, "mean_token_accuracy": 0.8736241042613984, "step": 9510 }, { "epoch": 0.16923541855545482, "grad_norm": 1.6571006357545524, "learning_rate": 0.0001, "loss": 0.9346, "mean_abs_error": 351.3821843399896, "mean_abs_error_last_10": 211.57892507364804, "mean_abs_error_last_25": 270.27218761105803, "mean_abs_error_last_50": 239.6339910093799, "mean_pred_prob": 0.037065780046395956, "mean_pred_prob_last_10": 0.1801121074706316, "mean_pred_prob_last_25": 0.1013393871486187, "mean_pred_prob_last_50": 0.0625612975563854, "mean_token_accuracy": 0.8758323907852172, "step": 9520 }, { "epoch": 0.16941318685225676, "grad_norm": 2.1983183229339405, "learning_rate": 0.0001, "loss": 0.9116, "mean_abs_error": 643.031530903543, "mean_abs_error_last_10": 162.3473917581686, "mean_abs_error_last_25": 293.78072312200055, "mean_abs_error_last_50": 392.2964943248765, "mean_pred_prob": 0.03559699021279812, "mean_pred_prob_last_10": 0.18441469850949943, "mean_pred_prob_last_25": 0.10232098696287721, "mean_pred_prob_last_50": 0.06170188339892775, "mean_token_accuracy": 0.873927640914917, "step": 9530 }, { "epoch": 0.16959095514905873, "grad_norm": 2.4227519765015257, "learning_rate": 0.0001, "loss": 0.9408, "mean_abs_error": 1327.2816830813722, "mean_abs_error_last_10": 823.4501967675167, "mean_abs_error_last_25": 834.2927617821877, "mean_abs_error_last_50": 871.9231054314703, "mean_pred_prob": 0.02326677362434566, "mean_pred_prob_last_10": 0.11979442613082938, "mean_pred_prob_last_25": 0.06767752334999386, "mean_pred_prob_last_50": 0.04065338660147973, "mean_token_accuracy": 0.8769320905208587, "step": 9540 }, { "epoch": 0.16976872344586066, "grad_norm": 2.391338436443892, "learning_rate": 0.0001, "loss": 1.0667, "mean_abs_error": 1518.5041224587142, "mean_abs_error_last_10": 622.9154880260338, "mean_abs_error_last_25": 703.501779845891, "mean_abs_error_last_50": 894.1892272805017, "mean_pred_prob": 0.013057787661091424, "mean_pred_prob_last_10": 0.06998546080139931, "mean_pred_prob_last_25": 0.037498630193294955, "mean_pred_prob_last_50": 0.022768806805834173, "mean_token_accuracy": 0.869172352552414, "step": 9550 }, { "epoch": 0.1699464917426626, "grad_norm": 1.2012546975296696, "learning_rate": 0.0001, "loss": 0.9491, "mean_abs_error": 599.8764898646775, "mean_abs_error_last_10": 151.4066838919963, "mean_abs_error_last_25": 184.99834776559513, "mean_abs_error_last_50": 282.09835196754113, "mean_pred_prob": 0.0331242817628663, "mean_pred_prob_last_10": 0.14607108696363866, "mean_pred_prob_last_25": 0.08895002051722259, "mean_pred_prob_last_50": 0.05585434779059142, "mean_token_accuracy": 0.8639876127243042, "step": 9560 }, { "epoch": 0.17012426003946457, "grad_norm": 0.788955858659517, "learning_rate": 0.0001, "loss": 1.0021, "mean_abs_error": 170.98040199566466, "mean_abs_error_last_10": 49.88970549372008, "mean_abs_error_last_25": 64.88128361412006, "mean_abs_error_last_50": 120.65064867366314, "mean_pred_prob": 0.04794217757880688, "mean_pred_prob_last_10": 0.2412413891404867, "mean_pred_prob_last_25": 0.13591609727591275, "mean_pred_prob_last_50": 0.08211040534079075, "mean_token_accuracy": 0.8711798429489136, "step": 9570 }, { "epoch": 0.1703020283362665, "grad_norm": 1.1969738875238567, "learning_rate": 0.0001, "loss": 0.9185, "mean_abs_error": 249.06314730374748, "mean_abs_error_last_10": 66.811798517028, "mean_abs_error_last_25": 127.4898433271597, "mean_abs_error_last_50": 154.7991668264459, "mean_pred_prob": 0.038429459929466246, "mean_pred_prob_last_10": 0.19156362488865852, "mean_pred_prob_last_25": 0.10688067171722651, "mean_pred_prob_last_50": 0.06511504007503391, "mean_token_accuracy": 0.8694488525390625, "step": 9580 }, { "epoch": 0.17047979663306845, "grad_norm": 1.3724170394849766, "learning_rate": 0.0001, "loss": 0.9575, "mean_abs_error": 400.7619281398208, "mean_abs_error_last_10": 155.8280760962329, "mean_abs_error_last_25": 196.54509002143732, "mean_abs_error_last_50": 266.8397921001479, "mean_pred_prob": 0.019782516954001038, "mean_pred_prob_last_10": 0.11319289281964302, "mean_pred_prob_last_25": 0.05881586570758372, "mean_pred_prob_last_50": 0.03462201822549105, "mean_token_accuracy": 0.8698060154914856, "step": 9590 }, { "epoch": 0.17065756492987041, "grad_norm": 1.390836547588894, "learning_rate": 0.0001, "loss": 0.9432, "mean_abs_error": 610.2769635391865, "mean_abs_error_last_10": 100.38128996034123, "mean_abs_error_last_25": 176.29138208279892, "mean_abs_error_last_50": 364.06441504544193, "mean_pred_prob": 0.02540878658182919, "mean_pred_prob_last_10": 0.13113473784178495, "mean_pred_prob_last_25": 0.0711652456317097, "mean_pred_prob_last_50": 0.0431053658016026, "mean_token_accuracy": 0.8691487491130829, "step": 9600 }, { "epoch": 0.17083533322667235, "grad_norm": 0.6882400660019166, "learning_rate": 0.0001, "loss": 0.8901, "mean_abs_error": 86.58080868346215, "mean_abs_error_last_10": 17.5560412985919, "mean_abs_error_last_25": 41.2166895147394, "mean_abs_error_last_50": 60.70517724877898, "mean_pred_prob": 0.05002831295132637, "mean_pred_prob_last_10": 0.25892567485570905, "mean_pred_prob_last_25": 0.14167898893356323, "mean_pred_prob_last_50": 0.08584947064518929, "mean_token_accuracy": 0.874844366312027, "step": 9610 }, { "epoch": 0.1710131015234743, "grad_norm": 1.5181719012770676, "learning_rate": 0.0001, "loss": 1.0422, "mean_abs_error": 595.9033258975016, "mean_abs_error_last_10": 152.69079316994447, "mean_abs_error_last_25": 173.85059430528523, "mean_abs_error_last_50": 299.01688424877983, "mean_pred_prob": 0.034900564374402164, "mean_pred_prob_last_10": 0.18441162621602417, "mean_pred_prob_last_25": 0.1006285111187026, "mean_pred_prob_last_50": 0.05989212676649913, "mean_token_accuracy": 0.8724852979183197, "step": 9620 }, { "epoch": 0.17119086982027626, "grad_norm": 1.2112123925133897, "learning_rate": 0.0001, "loss": 0.9379, "mean_abs_error": 420.91672110287647, "mean_abs_error_last_10": 142.16747039197526, "mean_abs_error_last_25": 244.579594640778, "mean_abs_error_last_50": 294.7034779092363, "mean_pred_prob": 0.02393487561494112, "mean_pred_prob_last_10": 0.12656549476087092, "mean_pred_prob_last_25": 0.0688202727586031, "mean_pred_prob_last_50": 0.04130489844828844, "mean_token_accuracy": 0.8738430142402649, "step": 9630 }, { "epoch": 0.1713686381170782, "grad_norm": 1.557463716644872, "learning_rate": 0.0001, "loss": 0.976, "mean_abs_error": 300.58681770786245, "mean_abs_error_last_10": 121.16542834497213, "mean_abs_error_last_25": 176.63000977062862, "mean_abs_error_last_50": 204.43801278817713, "mean_pred_prob": 0.02499200915917754, "mean_pred_prob_last_10": 0.14118488691747189, "mean_pred_prob_last_25": 0.0725301681086421, "mean_pred_prob_last_50": 0.04301931895315647, "mean_token_accuracy": 0.8710956633090973, "step": 9640 }, { "epoch": 0.17154640641388014, "grad_norm": 1.6366166115587348, "learning_rate": 0.0001, "loss": 1.0108, "mean_abs_error": 637.2310694787324, "mean_abs_error_last_10": 331.22596654031605, "mean_abs_error_last_25": 328.495757550941, "mean_abs_error_last_50": 376.7698159652654, "mean_pred_prob": 0.027717448025941848, "mean_pred_prob_last_10": 0.14652637097751722, "mean_pred_prob_last_25": 0.0801559534156695, "mean_pred_prob_last_50": 0.047565531660802664, "mean_token_accuracy": 0.8712197065353393, "step": 9650 }, { "epoch": 0.1717241747106821, "grad_norm": 2.700737760415593, "learning_rate": 0.0001, "loss": 0.9786, "mean_abs_error": 414.6799642358499, "mean_abs_error_last_10": 103.6379477564623, "mean_abs_error_last_25": 123.46150950977963, "mean_abs_error_last_50": 229.0856583834324, "mean_pred_prob": 0.0337948122585658, "mean_pred_prob_last_10": 0.1759901992045343, "mean_pred_prob_last_25": 0.09518220981117338, "mean_pred_prob_last_50": 0.05756826644064859, "mean_token_accuracy": 0.8686131715774537, "step": 9660 }, { "epoch": 0.17190194300748404, "grad_norm": 1.0740896346576956, "learning_rate": 0.0001, "loss": 0.9764, "mean_abs_error": 334.96368269458327, "mean_abs_error_last_10": 51.84981063728087, "mean_abs_error_last_25": 80.45014682324222, "mean_abs_error_last_50": 167.59015151399782, "mean_pred_prob": 0.03763624373823404, "mean_pred_prob_last_10": 0.18682567682117224, "mean_pred_prob_last_25": 0.10335119720548391, "mean_pred_prob_last_50": 0.0637640562839806, "mean_token_accuracy": 0.874669075012207, "step": 9670 }, { "epoch": 0.17207971130428598, "grad_norm": 0.8282145047062692, "learning_rate": 0.0001, "loss": 0.9264, "mean_abs_error": 890.4234423840193, "mean_abs_error_last_10": 438.74912643538966, "mean_abs_error_last_25": 503.9069589943745, "mean_abs_error_last_50": 663.8244914522035, "mean_pred_prob": 0.04111129357333994, "mean_pred_prob_last_10": 0.20153768592863344, "mean_pred_prob_last_25": 0.11352457407629117, "mean_pred_prob_last_50": 0.06994643079378875, "mean_token_accuracy": 0.8834766387939453, "step": 9680 }, { "epoch": 0.17225747960108795, "grad_norm": 0.7472378283038137, "learning_rate": 0.0001, "loss": 0.9283, "mean_abs_error": 557.0783924010047, "mean_abs_error_last_10": 93.83902147531987, "mean_abs_error_last_25": 241.76485370651116, "mean_abs_error_last_50": 343.3167164106639, "mean_pred_prob": 0.02223081237170845, "mean_pred_prob_last_10": 0.11878050342202187, "mean_pred_prob_last_25": 0.06574035985395313, "mean_pred_prob_last_50": 0.03838085550814867, "mean_token_accuracy": 0.8724006175994873, "step": 9690 }, { "epoch": 0.1724352478978899, "grad_norm": 1.0560950547510883, "learning_rate": 0.0001, "loss": 0.897, "mean_abs_error": 321.5663415374837, "mean_abs_error_last_10": 48.54929261495349, "mean_abs_error_last_25": 73.01099687547699, "mean_abs_error_last_50": 144.25941554167133, "mean_pred_prob": 0.03181915928144008, "mean_pred_prob_last_10": 0.17282403502613305, "mean_pred_prob_last_25": 0.09310576030984521, "mean_pred_prob_last_50": 0.055065160430967805, "mean_token_accuracy": 0.8742137491703034, "step": 9700 }, { "epoch": 0.17261301619469183, "grad_norm": 0.7279948236173053, "learning_rate": 0.0001, "loss": 0.877, "mean_abs_error": 383.9559300096161, "mean_abs_error_last_10": 82.23113514621613, "mean_abs_error_last_25": 128.11684095587094, "mean_abs_error_last_50": 226.02961458223234, "mean_pred_prob": 0.03754613677738235, "mean_pred_prob_last_10": 0.19715569815598427, "mean_pred_prob_last_25": 0.10573822311125695, "mean_pred_prob_last_50": 0.06356564676389098, "mean_token_accuracy": 0.8680460572242736, "step": 9710 }, { "epoch": 0.1727907844914938, "grad_norm": 1.9537020963840859, "learning_rate": 0.0001, "loss": 0.91, "mean_abs_error": 301.94694523296437, "mean_abs_error_last_10": 107.57073970639608, "mean_abs_error_last_25": 170.9304343459982, "mean_abs_error_last_50": 243.3470543753981, "mean_pred_prob": 0.044531552214175464, "mean_pred_prob_last_10": 0.22660361230373383, "mean_pred_prob_last_25": 0.12888335511088372, "mean_pred_prob_last_50": 0.0766074256040156, "mean_token_accuracy": 0.878207266330719, "step": 9720 }, { "epoch": 0.17296855278829573, "grad_norm": 2.135589046452929, "learning_rate": 0.0001, "loss": 0.9575, "mean_abs_error": 153.90612570282127, "mean_abs_error_last_10": 24.51313581795953, "mean_abs_error_last_25": 53.6238026126599, "mean_abs_error_last_50": 86.7117824860068, "mean_pred_prob": 0.04403179045766592, "mean_pred_prob_last_10": 0.2066966976970434, "mean_pred_prob_last_25": 0.11904654335230588, "mean_pred_prob_last_50": 0.07367783403024078, "mean_token_accuracy": 0.8788594722747802, "step": 9730 }, { "epoch": 0.17314632108509767, "grad_norm": 0.858391180914884, "learning_rate": 0.0001, "loss": 1.0576, "mean_abs_error": 1180.290827674628, "mean_abs_error_last_10": 589.5100735621572, "mean_abs_error_last_25": 723.1866222738862, "mean_abs_error_last_50": 896.2585413262319, "mean_pred_prob": 0.022400748933432624, "mean_pred_prob_last_10": 0.10928624458028935, "mean_pred_prob_last_25": 0.060873893887037414, "mean_pred_prob_last_50": 0.03752711413835641, "mean_token_accuracy": 0.867158567905426, "step": 9740 }, { "epoch": 0.17332408938189964, "grad_norm": 2.466143051363783, "learning_rate": 0.0001, "loss": 0.8718, "mean_abs_error": 711.8360872704468, "mean_abs_error_last_10": 185.4116229793611, "mean_abs_error_last_25": 251.92544280441342, "mean_abs_error_last_50": 416.1231841744616, "mean_pred_prob": 0.024562719621462746, "mean_pred_prob_last_10": 0.12282758380752057, "mean_pred_prob_last_25": 0.06893527104984969, "mean_pred_prob_last_50": 0.04203626932576299, "mean_token_accuracy": 0.8825551271438599, "step": 9750 }, { "epoch": 0.17350185767870158, "grad_norm": 1.5542036742406355, "learning_rate": 0.0001, "loss": 0.9009, "mean_abs_error": 321.6731181601123, "mean_abs_error_last_10": 185.98482347307774, "mean_abs_error_last_25": 178.88592739760196, "mean_abs_error_last_50": 239.6730945283185, "mean_pred_prob": 0.030463904957287014, "mean_pred_prob_last_10": 0.16301854532212018, "mean_pred_prob_last_25": 0.08819616325199604, "mean_pred_prob_last_50": 0.05244678477756679, "mean_token_accuracy": 0.876161789894104, "step": 9760 }, { "epoch": 0.17367962597550352, "grad_norm": 1.5510489812789272, "learning_rate": 0.0001, "loss": 0.8832, "mean_abs_error": 134.93508670619798, "mean_abs_error_last_10": 51.99218779039457, "mean_abs_error_last_25": 68.16702928708432, "mean_abs_error_last_50": 101.14980627261166, "mean_pred_prob": 0.0452170723117888, "mean_pred_prob_last_10": 0.23331219777464868, "mean_pred_prob_last_25": 0.12825298942625524, "mean_pred_prob_last_50": 0.07745201736688614, "mean_token_accuracy": 0.8797067701816559, "step": 9770 }, { "epoch": 0.17385739427230548, "grad_norm": 2.0686000888437803, "learning_rate": 0.0001, "loss": 0.9312, "mean_abs_error": 378.2755714191881, "mean_abs_error_last_10": 106.09761568974231, "mean_abs_error_last_25": 117.1839920906345, "mean_abs_error_last_50": 192.3960114757742, "mean_pred_prob": 0.04371452003251761, "mean_pred_prob_last_10": 0.22087115196045487, "mean_pred_prob_last_25": 0.12256088971626014, "mean_pred_prob_last_50": 0.07483414147282019, "mean_token_accuracy": 0.8709948539733887, "step": 9780 }, { "epoch": 0.17403516256910742, "grad_norm": 2.0414424071238586, "learning_rate": 0.0001, "loss": 0.9381, "mean_abs_error": 346.7219497651496, "mean_abs_error_last_10": 99.64753555580042, "mean_abs_error_last_25": 123.69595912212796, "mean_abs_error_last_50": 212.53610763505452, "mean_pred_prob": 0.03391081267036498, "mean_pred_prob_last_10": 0.17986130956560373, "mean_pred_prob_last_25": 0.09679863220080734, "mean_pred_prob_last_50": 0.05813539773225784, "mean_token_accuracy": 0.8696958303451539, "step": 9790 }, { "epoch": 0.17421293086590936, "grad_norm": 2.5504463809244537, "learning_rate": 0.0001, "loss": 0.9506, "mean_abs_error": 525.0386726849883, "mean_abs_error_last_10": 344.4746917244351, "mean_abs_error_last_25": 364.69466572996294, "mean_abs_error_last_50": 325.33803180526354, "mean_pred_prob": 0.028106179391033947, "mean_pred_prob_last_10": 0.15369178727269173, "mean_pred_prob_last_25": 0.08117410270497202, "mean_pred_prob_last_50": 0.04882973576895892, "mean_token_accuracy": 0.8768425166606904, "step": 9800 }, { "epoch": 0.17439069916271133, "grad_norm": 1.2230962701614037, "learning_rate": 0.0001, "loss": 0.9412, "mean_abs_error": 739.0068793451562, "mean_abs_error_last_10": 401.96009150550105, "mean_abs_error_last_25": 503.15794009576683, "mean_abs_error_last_50": 562.156285978497, "mean_pred_prob": 0.03551801369176246, "mean_pred_prob_last_10": 0.19063845294876955, "mean_pred_prob_last_25": 0.10069011375308037, "mean_pred_prob_last_50": 0.06065909678582102, "mean_token_accuracy": 0.8752301275730133, "step": 9810 }, { "epoch": 0.17456846745951327, "grad_norm": 1.662601828555954, "learning_rate": 0.0001, "loss": 0.9616, "mean_abs_error": 310.4338412951916, "mean_abs_error_last_10": 38.3883063634036, "mean_abs_error_last_25": 74.65792085606493, "mean_abs_error_last_50": 161.22294639660896, "mean_pred_prob": 0.03267026403918862, "mean_pred_prob_last_10": 0.17106485478579997, "mean_pred_prob_last_25": 0.09287088122218848, "mean_pred_prob_last_50": 0.05588314272463322, "mean_token_accuracy": 0.8681463539600373, "step": 9820 }, { "epoch": 0.17474623575631523, "grad_norm": 1.0919640592422717, "learning_rate": 0.0001, "loss": 0.9606, "mean_abs_error": 675.182919633201, "mean_abs_error_last_10": 333.9731118335468, "mean_abs_error_last_25": 390.650860575343, "mean_abs_error_last_50": 486.8239839864117, "mean_pred_prob": 0.032102079159813, "mean_pred_prob_last_10": 0.18101220289827324, "mean_pred_prob_last_25": 0.09602311690687201, "mean_pred_prob_last_50": 0.05620915794861503, "mean_token_accuracy": 0.8699983119964599, "step": 9830 }, { "epoch": 0.17492400405311717, "grad_norm": 1.152035742503932, "learning_rate": 0.0001, "loss": 0.9422, "mean_abs_error": 557.2181857401077, "mean_abs_error_last_10": 182.2216721282427, "mean_abs_error_last_25": 234.95157764484867, "mean_abs_error_last_50": 324.0202238773393, "mean_pred_prob": 0.02239004576113075, "mean_pred_prob_last_10": 0.12184042250737548, "mean_pred_prob_last_25": 0.06420423216186463, "mean_pred_prob_last_50": 0.03858162718825042, "mean_token_accuracy": 0.86840261220932, "step": 9840 }, { "epoch": 0.1751017723499191, "grad_norm": 0.9427704789546165, "learning_rate": 0.0001, "loss": 0.9077, "mean_abs_error": 1360.8088601939346, "mean_abs_error_last_10": 755.7958215681181, "mean_abs_error_last_25": 815.8119190830155, "mean_abs_error_last_50": 961.615036351878, "mean_pred_prob": 0.0389854006250971, "mean_pred_prob_last_10": 0.20143902553390944, "mean_pred_prob_last_25": 0.11378717524203238, "mean_pred_prob_last_50": 0.0677164679611451, "mean_token_accuracy": 0.868871682882309, "step": 9850 }, { "epoch": 0.17527954064672108, "grad_norm": 1.991608797717531, "learning_rate": 0.0001, "loss": 0.9472, "mean_abs_error": 487.9907621872392, "mean_abs_error_last_10": 218.97546864316942, "mean_abs_error_last_25": 235.5140491283918, "mean_abs_error_last_50": 392.7268918498588, "mean_pred_prob": 0.02499451460316777, "mean_pred_prob_last_10": 0.12696912967367097, "mean_pred_prob_last_25": 0.0705811316263862, "mean_pred_prob_last_50": 0.04282429956947453, "mean_token_accuracy": 0.8673667132854461, "step": 9860 }, { "epoch": 0.17545730894352302, "grad_norm": 1.264062001913592, "learning_rate": 0.0001, "loss": 0.8955, "mean_abs_error": 522.67293457365, "mean_abs_error_last_10": 165.4204543078968, "mean_abs_error_last_25": 178.49964350895328, "mean_abs_error_last_50": 234.61340376591366, "mean_pred_prob": 0.03707531990366988, "mean_pred_prob_last_10": 0.1859727258561179, "mean_pred_prob_last_25": 0.10676980568096042, "mean_pred_prob_last_50": 0.06446458240970969, "mean_token_accuracy": 0.8684438347816468, "step": 9870 }, { "epoch": 0.17563507724032495, "grad_norm": 0.914144398382925, "learning_rate": 0.0001, "loss": 1.0164, "mean_abs_error": 642.1617461292308, "mean_abs_error_last_10": 224.50334557306184, "mean_abs_error_last_25": 318.431438453495, "mean_abs_error_last_50": 423.7035902871642, "mean_pred_prob": 0.0432182491349522, "mean_pred_prob_last_10": 0.19264454904478043, "mean_pred_prob_last_25": 0.11566541783395223, "mean_pred_prob_last_50": 0.07197751537314616, "mean_token_accuracy": 0.867998480796814, "step": 9880 }, { "epoch": 0.17581284553712692, "grad_norm": 1.8784261107930689, "learning_rate": 0.0001, "loss": 0.9852, "mean_abs_error": 769.3732914220451, "mean_abs_error_last_10": 183.98834297750446, "mean_abs_error_last_25": 302.5694104522365, "mean_abs_error_last_50": 421.8517045828971, "mean_pred_prob": 0.018194325553486122, "mean_pred_prob_last_10": 0.10530900213634595, "mean_pred_prob_last_25": 0.05425416394136846, "mean_pred_prob_last_50": 0.031730495142983275, "mean_token_accuracy": 0.8720765233039856, "step": 9890 }, { "epoch": 0.17599061383392886, "grad_norm": 1.538365034746651, "learning_rate": 0.0001, "loss": 0.8965, "mean_abs_error": 220.5204291836948, "mean_abs_error_last_10": 128.89034710220332, "mean_abs_error_last_25": 136.33692382222588, "mean_abs_error_last_50": 161.85549634061422, "mean_pred_prob": 0.04224701200146228, "mean_pred_prob_last_10": 0.21515376064926386, "mean_pred_prob_last_25": 0.11716636391356587, "mean_pred_prob_last_50": 0.0717560872901231, "mean_token_accuracy": 0.8656403243541717, "step": 9900 }, { "epoch": 0.1761683821307308, "grad_norm": 1.791222963310974, "learning_rate": 0.0001, "loss": 1.0998, "mean_abs_error": 569.8127801739793, "mean_abs_error_last_10": 223.77320278002725, "mean_abs_error_last_25": 322.6422487627006, "mean_abs_error_last_50": 393.2976236170589, "mean_pred_prob": 0.017621538275852798, "mean_pred_prob_last_10": 0.10115551482886076, "mean_pred_prob_last_25": 0.0505056768655777, "mean_pred_prob_last_50": 0.02971602133475244, "mean_token_accuracy": 0.8694757997989655, "step": 9910 }, { "epoch": 0.17634615042753277, "grad_norm": 1.4402534164129337, "learning_rate": 0.0001, "loss": 0.9181, "mean_abs_error": 285.3575679844713, "mean_abs_error_last_10": 106.53720103385203, "mean_abs_error_last_25": 119.11971759761028, "mean_abs_error_last_50": 175.41895193908084, "mean_pred_prob": 0.019406223297119142, "mean_pred_prob_last_10": 0.1114263366907835, "mean_pred_prob_last_25": 0.05759085174649954, "mean_pred_prob_last_50": 0.03369243326596916, "mean_token_accuracy": 0.8808084309101105, "step": 9920 }, { "epoch": 0.1765239187243347, "grad_norm": 1.0425613386755894, "learning_rate": 0.0001, "loss": 0.9319, "mean_abs_error": 296.55264455977346, "mean_abs_error_last_10": 69.21761491187354, "mean_abs_error_last_25": 99.36503597300793, "mean_abs_error_last_50": 145.79814587139586, "mean_pred_prob": 0.03052942855283618, "mean_pred_prob_last_10": 0.16370060052722693, "mean_pred_prob_last_25": 0.08868390470743179, "mean_pred_prob_last_50": 0.0531061502173543, "mean_token_accuracy": 0.8751763343811035, "step": 9930 }, { "epoch": 0.17670168702113664, "grad_norm": 2.352073059057281, "learning_rate": 0.0001, "loss": 0.916, "mean_abs_error": 262.00082025453116, "mean_abs_error_last_10": 86.60229626556414, "mean_abs_error_last_25": 105.97545980433082, "mean_abs_error_last_50": 161.67075005551905, "mean_pred_prob": 0.03509553670883179, "mean_pred_prob_last_10": 0.18575826659798622, "mean_pred_prob_last_25": 0.09951217258349061, "mean_pred_prob_last_50": 0.060239596106112, "mean_token_accuracy": 0.8731988310813904, "step": 9940 }, { "epoch": 0.1768794553179386, "grad_norm": 1.3043334269974818, "learning_rate": 0.0001, "loss": 1.0668, "mean_abs_error": 1244.936221728769, "mean_abs_error_last_10": 473.3191647841427, "mean_abs_error_last_25": 580.6514326065962, "mean_abs_error_last_50": 782.2863912847477, "mean_pred_prob": 0.01720026727707591, "mean_pred_prob_last_10": 0.08749050797778182, "mean_pred_prob_last_25": 0.0474625339731574, "mean_pred_prob_last_50": 0.028866571496473627, "mean_token_accuracy": 0.8701606035232544, "step": 9950 }, { "epoch": 0.17705722361474055, "grad_norm": 2.2135774572894937, "learning_rate": 0.0001, "loss": 0.9806, "mean_abs_error": 890.5566244408037, "mean_abs_error_last_10": 445.0698520925506, "mean_abs_error_last_25": 518.708754683935, "mean_abs_error_last_50": 640.036730291016, "mean_pred_prob": 0.029190942013519815, "mean_pred_prob_last_10": 0.1485593863792019, "mean_pred_prob_last_25": 0.08260401662264485, "mean_pred_prob_last_50": 0.049935689553967676, "mean_token_accuracy": 0.8617023348808288, "step": 9960 }, { "epoch": 0.1772349919115425, "grad_norm": 1.173072430438228, "learning_rate": 0.0001, "loss": 0.8933, "mean_abs_error": 515.23410027962, "mean_abs_error_last_10": 131.09292947930214, "mean_abs_error_last_25": 231.7923282533318, "mean_abs_error_last_50": 308.9895543680841, "mean_pred_prob": 0.02913737038616091, "mean_pred_prob_last_10": 0.14900480527430773, "mean_pred_prob_last_25": 0.08276504171080887, "mean_pred_prob_last_50": 0.0502351357601583, "mean_token_accuracy": 0.8734391808509827, "step": 9970 }, { "epoch": 0.17741276020834446, "grad_norm": 1.1921526126097726, "learning_rate": 0.0001, "loss": 0.8594, "mean_abs_error": 349.85491803151325, "mean_abs_error_last_10": 174.0880374278479, "mean_abs_error_last_25": 161.45287678620326, "mean_abs_error_last_50": 194.97183344902112, "mean_pred_prob": 0.03427940456895158, "mean_pred_prob_last_10": 0.18236708343029023, "mean_pred_prob_last_25": 0.09697354158852249, "mean_pred_prob_last_50": 0.05860629135277122, "mean_token_accuracy": 0.8756744265556335, "step": 9980 }, { "epoch": 0.1775905285051464, "grad_norm": 1.5501460761348123, "learning_rate": 0.0001, "loss": 0.9147, "mean_abs_error": 503.5523371655693, "mean_abs_error_last_10": 257.4478826318008, "mean_abs_error_last_25": 288.85546263393024, "mean_abs_error_last_50": 365.40185311968963, "mean_pred_prob": 0.028060657321475446, "mean_pred_prob_last_10": 0.1429728176444769, "mean_pred_prob_last_25": 0.07974357679486274, "mean_pred_prob_last_50": 0.048729362851008776, "mean_token_accuracy": 0.8689668416976929, "step": 9990 }, { "epoch": 0.17776829680194833, "grad_norm": 1.5421858363236918, "learning_rate": 0.0001, "loss": 0.9742, "mean_abs_error": 1234.5999047945547, "mean_abs_error_last_10": 657.1503929740123, "mean_abs_error_last_25": 770.0918879295475, "mean_abs_error_last_50": 930.7260201641686, "mean_pred_prob": 0.022543409019272077, "mean_pred_prob_last_10": 0.12008160613768268, "mean_pred_prob_last_25": 0.06544668565184111, "mean_pred_prob_last_50": 0.0388703274395084, "mean_token_accuracy": 0.8726638793945313, "step": 10000 }, { "epoch": 0.1779460650987503, "grad_norm": 1.1695543029197877, "learning_rate": 0.0001, "loss": 0.9617, "mean_abs_error": 398.3420901734403, "mean_abs_error_last_10": 119.26545233169654, "mean_abs_error_last_25": 230.69569009298567, "mean_abs_error_last_50": 235.40397150840627, "mean_pred_prob": 0.03200362122152001, "mean_pred_prob_last_10": 0.1534167880192399, "mean_pred_prob_last_25": 0.08367677591741085, "mean_pred_prob_last_50": 0.05257185115478933, "mean_token_accuracy": 0.8807270169258118, "step": 10010 }, { "epoch": 0.17812383339555224, "grad_norm": 2.229774464046708, "learning_rate": 0.0001, "loss": 0.9864, "mean_abs_error": 164.42660776750887, "mean_abs_error_last_10": 43.35713734054209, "mean_abs_error_last_25": 66.2255199996914, "mean_abs_error_last_50": 100.91437232806837, "mean_pred_prob": 0.031843716744333506, "mean_pred_prob_last_10": 0.1708318829536438, "mean_pred_prob_last_25": 0.09044618792831897, "mean_pred_prob_last_50": 0.05453200601041317, "mean_token_accuracy": 0.8669722497463226, "step": 10020 }, { "epoch": 0.17830160169235418, "grad_norm": 1.5538071854912583, "learning_rate": 0.0001, "loss": 0.9598, "mean_abs_error": 1102.9569167743464, "mean_abs_error_last_10": 551.4352808052475, "mean_abs_error_last_25": 613.6413851777672, "mean_abs_error_last_50": 802.5255384015622, "mean_pred_prob": 0.03043232149502728, "mean_pred_prob_last_10": 0.14572229794575833, "mean_pred_prob_last_25": 0.0844538723496953, "mean_pred_prob_last_50": 0.05260309058940038, "mean_token_accuracy": 0.8675291955471038, "step": 10030 }, { "epoch": 0.17847936998915614, "grad_norm": 1.503386163105334, "learning_rate": 0.0001, "loss": 0.9642, "mean_abs_error": 308.0912235329254, "mean_abs_error_last_10": 122.02128502554274, "mean_abs_error_last_25": 191.51651200907924, "mean_abs_error_last_50": 233.80464242958314, "mean_pred_prob": 0.03613452890422195, "mean_pred_prob_last_10": 0.16901918724179268, "mean_pred_prob_last_25": 0.0971453933045268, "mean_pred_prob_last_50": 0.06040890458971262, "mean_token_accuracy": 0.8736545741558075, "step": 10040 }, { "epoch": 0.17865713828595808, "grad_norm": 1.6541271695617716, "learning_rate": 0.0001, "loss": 0.8909, "mean_abs_error": 550.2681845519195, "mean_abs_error_last_10": 115.68223163328264, "mean_abs_error_last_25": 132.09895895886942, "mean_abs_error_last_50": 256.6190562293379, "mean_pred_prob": 0.026471570937428624, "mean_pred_prob_last_10": 0.14605187671259046, "mean_pred_prob_last_25": 0.07756191836670041, "mean_pred_prob_last_50": 0.0462007709313184, "mean_token_accuracy": 0.8732097148895264, "step": 10050 }, { "epoch": 0.17883490658276002, "grad_norm": 2.2347415467691074, "learning_rate": 0.0001, "loss": 0.9772, "mean_abs_error": 400.93253184542107, "mean_abs_error_last_10": 203.68524580783298, "mean_abs_error_last_25": 325.9340816407992, "mean_abs_error_last_50": 322.00579713611836, "mean_pred_prob": 0.02241119653917849, "mean_pred_prob_last_10": 0.11831236984580755, "mean_pred_prob_last_25": 0.06300136223435401, "mean_pred_prob_last_50": 0.03826134242117405, "mean_token_accuracy": 0.8733155429363251, "step": 10060 }, { "epoch": 0.179012674879562, "grad_norm": 1.2836173246306135, "learning_rate": 0.0001, "loss": 0.9819, "mean_abs_error": 383.1730219138099, "mean_abs_error_last_10": 100.10554180534902, "mean_abs_error_last_25": 147.24912770961708, "mean_abs_error_last_50": 237.13866345803459, "mean_pred_prob": 0.033111125929281116, "mean_pred_prob_last_10": 0.17648691330105065, "mean_pred_prob_last_25": 0.09525797823444009, "mean_pred_prob_last_50": 0.05728894034400582, "mean_token_accuracy": 0.8715047836303711, "step": 10070 }, { "epoch": 0.17919044317636393, "grad_norm": 1.7001705136963166, "learning_rate": 0.0001, "loss": 0.9993, "mean_abs_error": 348.07480960392184, "mean_abs_error_last_10": 295.71818989661585, "mean_abs_error_last_25": 269.7437346383509, "mean_abs_error_last_50": 225.38454788680633, "mean_pred_prob": 0.0403311227215454, "mean_pred_prob_last_10": 0.19772878726944326, "mean_pred_prob_last_25": 0.11026569004170597, "mean_pred_prob_last_50": 0.06777599072083831, "mean_token_accuracy": 0.8761146485805511, "step": 10080 }, { "epoch": 0.17936821147316587, "grad_norm": 0.7650454466648314, "learning_rate": 0.0001, "loss": 0.9367, "mean_abs_error": 640.4865437630507, "mean_abs_error_last_10": 123.22304635108101, "mean_abs_error_last_25": 155.66903339243723, "mean_abs_error_last_50": 310.16320874138444, "mean_pred_prob": 0.026847826293669642, "mean_pred_prob_last_10": 0.14845110829919578, "mean_pred_prob_last_25": 0.08017728282138706, "mean_pred_prob_last_50": 0.04698380557820201, "mean_token_accuracy": 0.8855403900146485, "step": 10090 }, { "epoch": 0.17954597976996783, "grad_norm": 2.045862411936353, "learning_rate": 0.0001, "loss": 0.9363, "mean_abs_error": 267.3616842633767, "mean_abs_error_last_10": 68.85688759756528, "mean_abs_error_last_25": 95.05058686137639, "mean_abs_error_last_50": 136.11701055528977, "mean_pred_prob": 0.03972994587384164, "mean_pred_prob_last_10": 0.19960988902021198, "mean_pred_prob_last_25": 0.11192894221749157, "mean_pred_prob_last_50": 0.067224834067747, "mean_token_accuracy": 0.874125999212265, "step": 10100 }, { "epoch": 0.17972374806676977, "grad_norm": 1.2936654884619287, "learning_rate": 0.0001, "loss": 0.8205, "mean_abs_error": 370.6990841889327, "mean_abs_error_last_10": 146.9827272917382, "mean_abs_error_last_25": 153.6312848336286, "mean_abs_error_last_50": 217.33680232527212, "mean_pred_prob": 0.03123388030799106, "mean_pred_prob_last_10": 0.1643039089627564, "mean_pred_prob_last_25": 0.09079797624144703, "mean_pred_prob_last_50": 0.054089033021591605, "mean_token_accuracy": 0.8870980262756347, "step": 10110 }, { "epoch": 0.1799015163635717, "grad_norm": 0.8019388954088751, "learning_rate": 0.0001, "loss": 1.0063, "mean_abs_error": 911.3895286965433, "mean_abs_error_last_10": 359.3150080007646, "mean_abs_error_last_25": 480.2796221567595, "mean_abs_error_last_50": 641.1098735183434, "mean_pred_prob": 0.03328796589630656, "mean_pred_prob_last_10": 0.16884377151727675, "mean_pred_prob_last_25": 0.09330048067495227, "mean_pred_prob_last_50": 0.05623109904990997, "mean_token_accuracy": 0.8741016626358032, "step": 10120 }, { "epoch": 0.18007928466037368, "grad_norm": 1.091925199163929, "learning_rate": 0.0001, "loss": 0.8847, "mean_abs_error": 238.83680537777568, "mean_abs_error_last_10": 151.17109193804632, "mean_abs_error_last_25": 186.9884266998156, "mean_abs_error_last_50": 187.19306033512186, "mean_pred_prob": 0.04409814162645489, "mean_pred_prob_last_10": 0.21989989057183265, "mean_pred_prob_last_25": 0.12201283508911728, "mean_pred_prob_last_50": 0.07486291308887302, "mean_token_accuracy": 0.8770721614360809, "step": 10130 }, { "epoch": 0.18025705295717562, "grad_norm": 1.0257423055612132, "learning_rate": 0.0001, "loss": 0.882, "mean_abs_error": 234.34990250955303, "mean_abs_error_last_10": 41.411136104931316, "mean_abs_error_last_25": 87.87020824518191, "mean_abs_error_last_50": 156.79876301468863, "mean_pred_prob": 0.041181283560581504, "mean_pred_prob_last_10": 0.2065578131005168, "mean_pred_prob_last_25": 0.11432186402380466, "mean_pred_prob_last_50": 0.06880385791882873, "mean_token_accuracy": 0.8658403158187866, "step": 10140 }, { "epoch": 0.18043482125397756, "grad_norm": 1.3968832740486703, "learning_rate": 0.0001, "loss": 0.9164, "mean_abs_error": 651.8702201203162, "mean_abs_error_last_10": 148.39306885324814, "mean_abs_error_last_25": 202.78230853778373, "mean_abs_error_last_50": 324.5787604315592, "mean_pred_prob": 0.025088996940758078, "mean_pred_prob_last_10": 0.13993975636549294, "mean_pred_prob_last_25": 0.07334930894430727, "mean_pred_prob_last_50": 0.04348169012228027, "mean_token_accuracy": 0.8778554618358612, "step": 10150 }, { "epoch": 0.18061258955077952, "grad_norm": 0.7274068179459794, "learning_rate": 0.0001, "loss": 0.9427, "mean_abs_error": 393.5268522626342, "mean_abs_error_last_10": 74.82167175033675, "mean_abs_error_last_25": 132.23226825112334, "mean_abs_error_last_50": 239.77362937170352, "mean_pred_prob": 0.051677640213165434, "mean_pred_prob_last_10": 0.25077635479392485, "mean_pred_prob_last_25": 0.1431105500436388, "mean_pred_prob_last_50": 0.08736695096013136, "mean_token_accuracy": 0.871183580160141, "step": 10160 }, { "epoch": 0.18079035784758146, "grad_norm": 1.8684569040494268, "learning_rate": 0.0001, "loss": 0.9025, "mean_abs_error": 183.42043941575793, "mean_abs_error_last_10": 40.48005502680104, "mean_abs_error_last_25": 59.35056795124594, "mean_abs_error_last_50": 102.21656923535319, "mean_pred_prob": 0.04661301081068814, "mean_pred_prob_last_10": 0.22153081484138964, "mean_pred_prob_last_25": 0.12232610397040844, "mean_pred_prob_last_50": 0.0772043315693736, "mean_token_accuracy": 0.8792178809642792, "step": 10170 }, { "epoch": 0.1809681261443834, "grad_norm": 1.7552507372996589, "learning_rate": 0.0001, "loss": 0.9054, "mean_abs_error": 587.4972475404678, "mean_abs_error_last_10": 165.23204805163203, "mean_abs_error_last_25": 202.63686289536355, "mean_abs_error_last_50": 305.3103818721825, "mean_pred_prob": 0.02563231666572392, "mean_pred_prob_last_10": 0.13701508212834596, "mean_pred_prob_last_25": 0.0719970241189003, "mean_pred_prob_last_50": 0.043493846198543906, "mean_token_accuracy": 0.8821603894233704, "step": 10180 }, { "epoch": 0.18114589444118537, "grad_norm": 1.9447953868086327, "learning_rate": 0.0001, "loss": 0.9078, "mean_abs_error": 844.0053196032771, "mean_abs_error_last_10": 349.90458667729206, "mean_abs_error_last_25": 430.29420088041354, "mean_abs_error_last_50": 561.1785362635061, "mean_pred_prob": 0.038465536024887115, "mean_pred_prob_last_10": 0.2079021984449355, "mean_pred_prob_last_25": 0.11078383148415014, "mean_pred_prob_last_50": 0.06568366078427061, "mean_token_accuracy": 0.8697259724140167, "step": 10190 }, { "epoch": 0.1813236627379873, "grad_norm": 1.1982899821395068, "learning_rate": 0.0001, "loss": 0.9514, "mean_abs_error": 901.0202194640067, "mean_abs_error_last_10": 247.87296068613426, "mean_abs_error_last_25": 344.8557835837849, "mean_abs_error_last_50": 511.1379887955874, "mean_pred_prob": 0.026890172026469372, "mean_pred_prob_last_10": 0.15205833753570913, "mean_pred_prob_last_25": 0.07915071792085655, "mean_pred_prob_last_50": 0.046195387170882896, "mean_token_accuracy": 0.8699334979057312, "step": 10200 }, { "epoch": 0.18150143103478925, "grad_norm": 1.3455816349973806, "learning_rate": 0.0001, "loss": 0.9223, "mean_abs_error": 367.2884150139304, "mean_abs_error_last_10": 306.89764507957204, "mean_abs_error_last_25": 359.5652736608257, "mean_abs_error_last_50": 341.076541526745, "mean_pred_prob": 0.03654275012668222, "mean_pred_prob_last_10": 0.18818271923810242, "mean_pred_prob_last_25": 0.10360777033492923, "mean_pred_prob_last_50": 0.062290049018338324, "mean_token_accuracy": 0.881381380558014, "step": 10210 }, { "epoch": 0.1816791993315912, "grad_norm": 2.019475583187189, "learning_rate": 0.0001, "loss": 0.9533, "mean_abs_error": 323.56831785915057, "mean_abs_error_last_10": 88.05665684542937, "mean_abs_error_last_25": 138.8126682038653, "mean_abs_error_last_50": 175.18730220455362, "mean_pred_prob": 0.04151124048512429, "mean_pred_prob_last_10": 0.21483809556812047, "mean_pred_prob_last_25": 0.11786420606076717, "mean_pred_prob_last_50": 0.07125838575884699, "mean_token_accuracy": 0.8755117654800415, "step": 10220 }, { "epoch": 0.18185696762839315, "grad_norm": 0.9100428608329292, "learning_rate": 0.0001, "loss": 0.8809, "mean_abs_error": 102.3009093353785, "mean_abs_error_last_10": 15.854954677307498, "mean_abs_error_last_25": 29.095708219214572, "mean_abs_error_last_50": 60.00349724964123, "mean_pred_prob": 0.045241430029273035, "mean_pred_prob_last_10": 0.23187543526291848, "mean_pred_prob_last_25": 0.12766494490206243, "mean_pred_prob_last_50": 0.07774032428860664, "mean_token_accuracy": 0.8755784034729004, "step": 10230 }, { "epoch": 0.1820347359251951, "grad_norm": 1.6382047981725134, "learning_rate": 0.0001, "loss": 0.9541, "mean_abs_error": 978.5866884115763, "mean_abs_error_last_10": 420.22579889006886, "mean_abs_error_last_25": 517.4726241967048, "mean_abs_error_last_50": 634.9714756030852, "mean_pred_prob": 0.020350052491994574, "mean_pred_prob_last_10": 0.1117016736388905, "mean_pred_prob_last_25": 0.057734266613260844, "mean_pred_prob_last_50": 0.03474979230959434, "mean_token_accuracy": 0.872404944896698, "step": 10240 }, { "epoch": 0.18221250422199706, "grad_norm": 1.0746504564142605, "learning_rate": 0.0001, "loss": 0.8759, "mean_abs_error": 354.99320316298514, "mean_abs_error_last_10": 92.82405998040915, "mean_abs_error_last_25": 93.25193029090431, "mean_abs_error_last_50": 158.79505242456247, "mean_pred_prob": 0.046421046694740654, "mean_pred_prob_last_10": 0.23762140199542045, "mean_pred_prob_last_25": 0.1310779744759202, "mean_pred_prob_last_50": 0.07917429520748556, "mean_token_accuracy": 0.8615385353565216, "step": 10250 }, { "epoch": 0.182390272518799, "grad_norm": 1.8093191972358382, "learning_rate": 0.0001, "loss": 0.9678, "mean_abs_error": 429.32719016584406, "mean_abs_error_last_10": 133.82509170718097, "mean_abs_error_last_25": 194.24651296037413, "mean_abs_error_last_50": 268.3606516153134, "mean_pred_prob": 0.0341653294628486, "mean_pred_prob_last_10": 0.18768725972622632, "mean_pred_prob_last_25": 0.09687800437677652, "mean_pred_prob_last_50": 0.05800298571120947, "mean_token_accuracy": 0.8687435746192932, "step": 10260 }, { "epoch": 0.18256804081560093, "grad_norm": 1.2244897318898162, "learning_rate": 0.0001, "loss": 0.9423, "mean_abs_error": 400.4054512399774, "mean_abs_error_last_10": 211.52792162808427, "mean_abs_error_last_25": 200.82079319015884, "mean_abs_error_last_50": 239.9111996937092, "mean_pred_prob": 0.030143641494214536, "mean_pred_prob_last_10": 0.16150768380612135, "mean_pred_prob_last_25": 0.08690602965652942, "mean_pred_prob_last_50": 0.051078618271276355, "mean_token_accuracy": 0.8795562148094177, "step": 10270 }, { "epoch": 0.1827458091124029, "grad_norm": 2.2478249870175393, "learning_rate": 0.0001, "loss": 0.9506, "mean_abs_error": 812.9586740439828, "mean_abs_error_last_10": 191.0821679156357, "mean_abs_error_last_25": 272.17303663874117, "mean_abs_error_last_50": 449.68284631684963, "mean_pred_prob": 0.04071216157171875, "mean_pred_prob_last_10": 0.20751966396346688, "mean_pred_prob_last_25": 0.11653343953657895, "mean_pred_prob_last_50": 0.07019633209565654, "mean_token_accuracy": 0.871318519115448, "step": 10280 }, { "epoch": 0.18292357740920484, "grad_norm": 0.8210149327596643, "learning_rate": 0.0001, "loss": 0.889, "mean_abs_error": 582.9168969075347, "mean_abs_error_last_10": 261.27075699267095, "mean_abs_error_last_25": 257.77446436164894, "mean_abs_error_last_50": 346.47105436151395, "mean_pred_prob": 0.031684171367669475, "mean_pred_prob_last_10": 0.1732253099209629, "mean_pred_prob_last_25": 0.09104233756661415, "mean_pred_prob_last_50": 0.05462345978012308, "mean_token_accuracy": 0.8768515527248383, "step": 10290 }, { "epoch": 0.18310134570600678, "grad_norm": 1.1330971866535557, "learning_rate": 0.0001, "loss": 0.864, "mean_abs_error": 1202.5384534200773, "mean_abs_error_last_10": 765.2197744178621, "mean_abs_error_last_25": 863.1586380037495, "mean_abs_error_last_50": 964.2315817343349, "mean_pred_prob": 0.03516722786516766, "mean_pred_prob_last_10": 0.18909890422655734, "mean_pred_prob_last_25": 0.10361220267514, "mean_pred_prob_last_50": 0.061745379312196744, "mean_token_accuracy": 0.8774532198905944, "step": 10300 }, { "epoch": 0.18327911400280875, "grad_norm": 1.2122190645294986, "learning_rate": 0.0001, "loss": 0.8541, "mean_abs_error": 635.6987553371008, "mean_abs_error_last_10": 110.63524140182103, "mean_abs_error_last_25": 169.68040803142483, "mean_abs_error_last_50": 319.8385258561582, "mean_pred_prob": 0.03323825381230563, "mean_pred_prob_last_10": 0.18095551633741708, "mean_pred_prob_last_25": 0.09830365306697786, "mean_pred_prob_last_50": 0.05820413309847936, "mean_token_accuracy": 0.8795140266418457, "step": 10310 }, { "epoch": 0.18345688229961069, "grad_norm": 1.2048476861288715, "learning_rate": 0.0001, "loss": 0.8783, "mean_abs_error": 149.34574681359328, "mean_abs_error_last_10": 58.58302013104541, "mean_abs_error_last_25": 83.9387491728935, "mean_abs_error_last_50": 100.51578980311737, "mean_pred_prob": 0.04449671637266874, "mean_pred_prob_last_10": 0.2221929943189025, "mean_pred_prob_last_25": 0.12404852975159883, "mean_pred_prob_last_50": 0.07516841376200319, "mean_token_accuracy": 0.8787298142910004, "step": 10320 }, { "epoch": 0.18363465059641262, "grad_norm": 1.570369875247792, "learning_rate": 0.0001, "loss": 0.9576, "mean_abs_error": 1023.1363956998111, "mean_abs_error_last_10": 589.7683442239592, "mean_abs_error_last_25": 661.3336980389197, "mean_abs_error_last_50": 807.0997968478871, "mean_pred_prob": 0.040573099949688184, "mean_pred_prob_last_10": 0.2098906122962944, "mean_pred_prob_last_25": 0.11780514149577356, "mean_pred_prob_last_50": 0.0704561133228708, "mean_token_accuracy": 0.8772179901599884, "step": 10330 }, { "epoch": 0.1838124188932146, "grad_norm": 1.4035137722943738, "learning_rate": 0.0001, "loss": 0.9102, "mean_abs_error": 423.2053933559331, "mean_abs_error_last_10": 162.75105449271922, "mean_abs_error_last_25": 165.67162845053895, "mean_abs_error_last_50": 242.1416876903798, "mean_pred_prob": 0.03145219273865223, "mean_pred_prob_last_10": 0.15827272981405258, "mean_pred_prob_last_25": 0.08930419557727873, "mean_pred_prob_last_50": 0.05358035379322246, "mean_token_accuracy": 0.8675364017486572, "step": 10340 }, { "epoch": 0.18399018719001653, "grad_norm": 1.6765067741189965, "learning_rate": 0.0001, "loss": 0.8911, "mean_abs_error": 309.23624434509145, "mean_abs_error_last_10": 144.60801399321676, "mean_abs_error_last_25": 167.01898433203115, "mean_abs_error_last_50": 229.23321493011417, "mean_pred_prob": 0.031655964581295847, "mean_pred_prob_last_10": 0.15476235933601856, "mean_pred_prob_last_25": 0.08684908729046584, "mean_pred_prob_last_50": 0.05393935907632112, "mean_token_accuracy": 0.8815401494503021, "step": 10350 }, { "epoch": 0.18416795548681847, "grad_norm": 1.026788297458729, "learning_rate": 0.0001, "loss": 0.9537, "mean_abs_error": 292.3129741535464, "mean_abs_error_last_10": 71.76207201711115, "mean_abs_error_last_25": 121.13322594742894, "mean_abs_error_last_50": 171.2923170042248, "mean_pred_prob": 0.03985386856365949, "mean_pred_prob_last_10": 0.1731703758239746, "mean_pred_prob_last_25": 0.10575026338919997, "mean_pred_prob_last_50": 0.06719761989079416, "mean_token_accuracy": 0.8663409590721131, "step": 10360 }, { "epoch": 0.18434572378362044, "grad_norm": 1.104962764431315, "learning_rate": 0.0001, "loss": 0.9239, "mean_abs_error": 913.8144100551184, "mean_abs_error_last_10": 614.4376818050274, "mean_abs_error_last_25": 675.2120818581707, "mean_abs_error_last_50": 736.462229104988, "mean_pred_prob": 0.033778367626655384, "mean_pred_prob_last_10": 0.17183561403944622, "mean_pred_prob_last_25": 0.09495311443461105, "mean_pred_prob_last_50": 0.05692081677843817, "mean_token_accuracy": 0.8646961808204651, "step": 10370 }, { "epoch": 0.18452349208042237, "grad_norm": 0.8177332967223353, "learning_rate": 0.0001, "loss": 0.911, "mean_abs_error": 313.85357209454935, "mean_abs_error_last_10": 187.58846679265068, "mean_abs_error_last_25": 233.7329352103187, "mean_abs_error_last_50": 260.708074810905, "mean_pred_prob": 0.04198427351657301, "mean_pred_prob_last_10": 0.20561109259724616, "mean_pred_prob_last_25": 0.11522715454921126, "mean_pred_prob_last_50": 0.07096017515286804, "mean_token_accuracy": 0.8790245890617371, "step": 10380 }, { "epoch": 0.1847012603772243, "grad_norm": 0.9167499661200875, "learning_rate": 0.0001, "loss": 0.941, "mean_abs_error": 707.1894088443336, "mean_abs_error_last_10": 257.28282254853264, "mean_abs_error_last_25": 305.6712334762797, "mean_abs_error_last_50": 438.611943706328, "mean_pred_prob": 0.019619417953072114, "mean_pred_prob_last_10": 0.10507331063854508, "mean_pred_prob_last_25": 0.05546289574122056, "mean_pred_prob_last_50": 0.03334225024154876, "mean_token_accuracy": 0.875463193655014, "step": 10390 }, { "epoch": 0.18487902867402628, "grad_norm": 2.213182536627936, "learning_rate": 0.0001, "loss": 1.056, "mean_abs_error": 1772.182729900777, "mean_abs_error_last_10": 856.3106802013826, "mean_abs_error_last_25": 999.3657139318941, "mean_abs_error_last_50": 1302.8604990728002, "mean_pred_prob": 0.02092892715081689, "mean_pred_prob_last_10": 0.11115828970214352, "mean_pred_prob_last_25": 0.06143685903516598, "mean_pred_prob_last_50": 0.036503413853642995, "mean_token_accuracy": 0.8678044438362121, "step": 10400 }, { "epoch": 0.18505679697082822, "grad_norm": 0.9757848304134273, "learning_rate": 0.0001, "loss": 0.7898, "mean_abs_error": 433.58401139308717, "mean_abs_error_last_10": 130.2276895453254, "mean_abs_error_last_25": 198.19655243425714, "mean_abs_error_last_50": 283.3477536690706, "mean_pred_prob": 0.048697110620560126, "mean_pred_prob_last_10": 0.23386565446853638, "mean_pred_prob_last_25": 0.13400068398332224, "mean_pred_prob_last_50": 0.0825766020687297, "mean_token_accuracy": 0.8811751484870911, "step": 10410 }, { "epoch": 0.18523456526763016, "grad_norm": 1.041522922151053, "learning_rate": 0.0001, "loss": 0.9112, "mean_abs_error": 511.53914334766796, "mean_abs_error_last_10": 187.0060528275845, "mean_abs_error_last_25": 201.32120721351683, "mean_abs_error_last_50": 297.2493415758251, "mean_pred_prob": 0.024514889507554472, "mean_pred_prob_last_10": 0.12644897121936083, "mean_pred_prob_last_25": 0.06933866180479527, "mean_pred_prob_last_50": 0.0420004066079855, "mean_token_accuracy": 0.8630122601985931, "step": 10420 }, { "epoch": 0.18541233356443212, "grad_norm": 1.4698357472008932, "learning_rate": 0.0001, "loss": 1.016, "mean_abs_error": 539.6139736942109, "mean_abs_error_last_10": 166.77856820922145, "mean_abs_error_last_25": 256.120452958398, "mean_abs_error_last_50": 314.43702158957166, "mean_pred_prob": 0.033239040023181586, "mean_pred_prob_last_10": 0.15772275277413428, "mean_pred_prob_last_25": 0.09267926292959601, "mean_pred_prob_last_50": 0.05628152892459184, "mean_token_accuracy": 0.8685356438159942, "step": 10430 }, { "epoch": 0.18559010186123406, "grad_norm": 0.9839971912140875, "learning_rate": 0.0001, "loss": 0.9046, "mean_abs_error": 1051.3206040958933, "mean_abs_error_last_10": 671.7674302572408, "mean_abs_error_last_25": 702.0415662150056, "mean_abs_error_last_50": 788.5967364462647, "mean_pred_prob": 0.027118655944650526, "mean_pred_prob_last_10": 0.14639547473634593, "mean_pred_prob_last_25": 0.07957045024959371, "mean_pred_prob_last_50": 0.04715267835126724, "mean_token_accuracy": 0.873064124584198, "step": 10440 }, { "epoch": 0.185767870158036, "grad_norm": 1.459993653109282, "learning_rate": 0.0001, "loss": 0.9795, "mean_abs_error": 464.43619168715725, "mean_abs_error_last_10": 97.24773763403023, "mean_abs_error_last_25": 166.13189357120612, "mean_abs_error_last_50": 265.44775342667543, "mean_pred_prob": 0.03201858788379468, "mean_pred_prob_last_10": 0.15932827102951705, "mean_pred_prob_last_25": 0.08873625490814448, "mean_pred_prob_last_50": 0.05468746998813003, "mean_token_accuracy": 0.8700173079967499, "step": 10450 }, { "epoch": 0.18594563845483797, "grad_norm": 1.0002146009751756, "learning_rate": 0.0001, "loss": 0.845, "mean_abs_error": 148.68694475954987, "mean_abs_error_last_10": 93.81537683283071, "mean_abs_error_last_25": 84.18426313990594, "mean_abs_error_last_50": 91.28746715000754, "mean_pred_prob": 0.04916103831492365, "mean_pred_prob_last_10": 0.23485649451613427, "mean_pred_prob_last_25": 0.13431155625730754, "mean_pred_prob_last_50": 0.08341629281640053, "mean_token_accuracy": 0.8855259001255036, "step": 10460 }, { "epoch": 0.1861234067516399, "grad_norm": 1.3966203461380715, "learning_rate": 0.0001, "loss": 0.8813, "mean_abs_error": 966.0598971052556, "mean_abs_error_last_10": 642.7309472037784, "mean_abs_error_last_25": 676.6966160492109, "mean_abs_error_last_50": 760.9846061827665, "mean_pred_prob": 0.03587935826508328, "mean_pred_prob_last_10": 0.18003020606265635, "mean_pred_prob_last_25": 0.10073265333194285, "mean_pred_prob_last_50": 0.06137767068430548, "mean_token_accuracy": 0.8798254311084748, "step": 10470 }, { "epoch": 0.18630117504844185, "grad_norm": 0.8970769441105394, "learning_rate": 0.0001, "loss": 0.9521, "mean_abs_error": 357.54040312344495, "mean_abs_error_last_10": 87.31787112340257, "mean_abs_error_last_25": 154.68917103145628, "mean_abs_error_last_50": 256.59427488657514, "mean_pred_prob": 0.022697329940274357, "mean_pred_prob_last_10": 0.12081646285951138, "mean_pred_prob_last_25": 0.06483612768352032, "mean_pred_prob_last_50": 0.03901942195370793, "mean_token_accuracy": 0.8699258089065551, "step": 10480 }, { "epoch": 0.18647894334524381, "grad_norm": 0.9381444654484794, "learning_rate": 0.0001, "loss": 0.9664, "mean_abs_error": 572.943527695242, "mean_abs_error_last_10": 418.1848574519986, "mean_abs_error_last_25": 556.8622591968476, "mean_abs_error_last_50": 531.7264116395944, "mean_pred_prob": 0.035311121586710216, "mean_pred_prob_last_10": 0.1705466851592064, "mean_pred_prob_last_25": 0.09690438979305327, "mean_pred_prob_last_50": 0.05925307632423937, "mean_token_accuracy": 0.8673048734664917, "step": 10490 }, { "epoch": 0.18665671164204575, "grad_norm": 1.8243746407811279, "learning_rate": 0.0001, "loss": 0.9354, "mean_abs_error": 802.5376547358414, "mean_abs_error_last_10": 419.98244021241834, "mean_abs_error_last_25": 469.50581529996515, "mean_abs_error_last_50": 584.1767798495509, "mean_pred_prob": 0.0370255456015002, "mean_pred_prob_last_10": 0.18477760326350107, "mean_pred_prob_last_25": 0.10440565412282013, "mean_pred_prob_last_50": 0.06324347182526253, "mean_token_accuracy": 0.8685093998908997, "step": 10500 }, { "epoch": 0.18683447993884772, "grad_norm": 1.6649859263473872, "learning_rate": 0.0001, "loss": 0.8688, "mean_abs_error": 106.21777738736697, "mean_abs_error_last_10": 22.424763946680784, "mean_abs_error_last_25": 33.827347886528614, "mean_abs_error_last_50": 54.60976576179896, "mean_pred_prob": 0.04636782575398683, "mean_pred_prob_last_10": 0.24064907841384411, "mean_pred_prob_last_25": 0.13304008189588784, "mean_pred_prob_last_50": 0.08011818658560514, "mean_token_accuracy": 0.890397697687149, "step": 10510 }, { "epoch": 0.18701224823564966, "grad_norm": 2.7043541309407364, "learning_rate": 0.0001, "loss": 1.012, "mean_abs_error": 141.46235014474863, "mean_abs_error_last_10": 40.26954033522913, "mean_abs_error_last_25": 49.236769721074964, "mean_abs_error_last_50": 71.25371628919297, "mean_pred_prob": 0.0562728900462389, "mean_pred_prob_last_10": 0.23423731215298177, "mean_pred_prob_last_25": 0.14530783519148827, "mean_pred_prob_last_50": 0.09265754725784063, "mean_token_accuracy": 0.8694595217704773, "step": 10520 }, { "epoch": 0.1871900165324516, "grad_norm": 3.3361528233959357, "learning_rate": 0.0001, "loss": 0.9588, "mean_abs_error": 697.0337421903796, "mean_abs_error_last_10": 392.923863025367, "mean_abs_error_last_25": 538.317441927304, "mean_abs_error_last_50": 568.1733616475144, "mean_pred_prob": 0.019704359077150003, "mean_pred_prob_last_10": 0.10939052668982185, "mean_pred_prob_last_25": 0.05751456049620174, "mean_pred_prob_last_50": 0.03419060396263376, "mean_token_accuracy": 0.86611368060112, "step": 10530 }, { "epoch": 0.18736778482925356, "grad_norm": 1.331673839379978, "learning_rate": 0.0001, "loss": 0.967, "mean_abs_error": 2048.973936759145, "mean_abs_error_last_10": 985.2091660358535, "mean_abs_error_last_25": 1316.1649565599323, "mean_abs_error_last_50": 1588.2906436328512, "mean_pred_prob": 0.018317250013933518, "mean_pred_prob_last_10": 0.10176198819244746, "mean_pred_prob_last_25": 0.05378047170524951, "mean_pred_prob_last_50": 0.03174558823811822, "mean_token_accuracy": 0.8641449093818665, "step": 10540 }, { "epoch": 0.1875455531260555, "grad_norm": 4.9421361851699634, "learning_rate": 0.0001, "loss": 0.8987, "mean_abs_error": 546.9572995492766, "mean_abs_error_last_10": 102.9472310216465, "mean_abs_error_last_25": 195.74994146398504, "mean_abs_error_last_50": 333.42654093923176, "mean_pred_prob": 0.025642917025834322, "mean_pred_prob_last_10": 0.13444465715438128, "mean_pred_prob_last_25": 0.07431152125354856, "mean_pred_prob_last_50": 0.0444983615539968, "mean_token_accuracy": 0.877744323015213, "step": 10550 }, { "epoch": 0.18772332142285744, "grad_norm": 3.4703975175352366, "learning_rate": 0.0001, "loss": 1.0458, "mean_abs_error": 1358.8523730089767, "mean_abs_error_last_10": 1033.0439487460403, "mean_abs_error_last_25": 1053.48173064357, "mean_abs_error_last_50": 1176.4168687850006, "mean_pred_prob": 0.032278455729829146, "mean_pred_prob_last_10": 0.17508452734327876, "mean_pred_prob_last_25": 0.10278016397642205, "mean_pred_prob_last_50": 0.05797588736895705, "mean_token_accuracy": 0.8656735599040986, "step": 10560 }, { "epoch": 0.1879010897196594, "grad_norm": 1.7585337354533312, "learning_rate": 0.0001, "loss": 0.8432, "mean_abs_error": 138.6801146878953, "mean_abs_error_last_10": 50.473661692825615, "mean_abs_error_last_25": 86.94715416931788, "mean_abs_error_last_50": 108.96337693185353, "mean_pred_prob": 0.0419452746398747, "mean_pred_prob_last_10": 0.22103793807327748, "mean_pred_prob_last_25": 0.12027325090020895, "mean_pred_prob_last_50": 0.07211028663441539, "mean_token_accuracy": 0.880774450302124, "step": 10570 }, { "epoch": 0.18807885801646135, "grad_norm": 1.5686703144635215, "learning_rate": 0.0001, "loss": 0.914, "mean_abs_error": 178.92872963366253, "mean_abs_error_last_10": 73.95050031836607, "mean_abs_error_last_25": 127.52838462406666, "mean_abs_error_last_50": 142.19440611816609, "mean_pred_prob": 0.04222273826599121, "mean_pred_prob_last_10": 0.2069223476573825, "mean_pred_prob_last_25": 0.11919661350548268, "mean_pred_prob_last_50": 0.07273259432986379, "mean_token_accuracy": 0.8666323959827423, "step": 10580 }, { "epoch": 0.1882566263132633, "grad_norm": 1.0856025433094423, "learning_rate": 0.0001, "loss": 0.9774, "mean_abs_error": 444.79434257287414, "mean_abs_error_last_10": 46.147710748272054, "mean_abs_error_last_25": 96.7365336619235, "mean_abs_error_last_50": 197.06306702713044, "mean_pred_prob": 0.034102820325642824, "mean_pred_prob_last_10": 0.18325323406606914, "mean_pred_prob_last_25": 0.09991818657144905, "mean_pred_prob_last_50": 0.059709182707592845, "mean_token_accuracy": 0.8728672146797181, "step": 10590 }, { "epoch": 0.18843439461006525, "grad_norm": 1.3115847361131612, "learning_rate": 0.0001, "loss": 0.8689, "mean_abs_error": 52.02712680308205, "mean_abs_error_last_10": 8.785035073747977, "mean_abs_error_last_25": 17.52136634689362, "mean_abs_error_last_50": 30.650358950106238, "mean_pred_prob": 0.054208160005509855, "mean_pred_prob_last_10": 0.26818373799324036, "mean_pred_prob_last_25": 0.15139450877904892, "mean_pred_prob_last_50": 0.09233314469456673, "mean_token_accuracy": 0.8716213285923005, "step": 10600 }, { "epoch": 0.1886121629068672, "grad_norm": 1.5355847759570427, "learning_rate": 0.0001, "loss": 0.8096, "mean_abs_error": 437.36584515668545, "mean_abs_error_last_10": 116.04350770606545, "mean_abs_error_last_25": 157.3811634486081, "mean_abs_error_last_50": 221.73411136984026, "mean_pred_prob": 0.037585279322229326, "mean_pred_prob_last_10": 0.1912148112198338, "mean_pred_prob_last_25": 0.1072559233289212, "mean_pred_prob_last_50": 0.06491293247090653, "mean_token_accuracy": 0.8804933369159699, "step": 10610 }, { "epoch": 0.18878993120366913, "grad_norm": 2.7962303676110127, "learning_rate": 0.0001, "loss": 0.8526, "mean_abs_error": 700.5574695579735, "mean_abs_error_last_10": 296.85114236594217, "mean_abs_error_last_25": 389.9313513158304, "mean_abs_error_last_50": 502.58454672718256, "mean_pred_prob": 0.03579926360398531, "mean_pred_prob_last_10": 0.16102100682328455, "mean_pred_prob_last_25": 0.09510884792252909, "mean_pred_prob_last_50": 0.05941940928169061, "mean_token_accuracy": 0.8814294397830963, "step": 10620 }, { "epoch": 0.1889676995004711, "grad_norm": 1.1750928392521378, "learning_rate": 0.0001, "loss": 0.8922, "mean_abs_error": 619.1443355090884, "mean_abs_error_last_10": 103.1466937401754, "mean_abs_error_last_25": 173.81226291583556, "mean_abs_error_last_50": 317.4346812741927, "mean_pred_prob": 0.02786586827132851, "mean_pred_prob_last_10": 0.142600791528821, "mean_pred_prob_last_25": 0.08005885677412153, "mean_pred_prob_last_50": 0.048081617383286354, "mean_token_accuracy": 0.870039439201355, "step": 10630 }, { "epoch": 0.18914546779727304, "grad_norm": 1.0769785930591946, "learning_rate": 0.0001, "loss": 0.8857, "mean_abs_error": 367.7164590262548, "mean_abs_error_last_10": 113.47655775552569, "mean_abs_error_last_25": 154.751260126665, "mean_abs_error_last_50": 229.03085639270677, "mean_pred_prob": 0.026897927140817048, "mean_pred_prob_last_10": 0.1334921533241868, "mean_pred_prob_last_25": 0.07406584499403834, "mean_pred_prob_last_50": 0.045388949755579236, "mean_token_accuracy": 0.8718258142471313, "step": 10640 }, { "epoch": 0.18932323609407498, "grad_norm": 1.0766524029722218, "learning_rate": 0.0001, "loss": 0.8123, "mean_abs_error": 621.0059167654044, "mean_abs_error_last_10": 85.80661765312408, "mean_abs_error_last_25": 152.34129135472014, "mean_abs_error_last_50": 315.1109426849273, "mean_pred_prob": 0.026994994736742228, "mean_pred_prob_last_10": 0.13660785583779217, "mean_pred_prob_last_25": 0.07389870281331241, "mean_pred_prob_last_50": 0.0456437906017527, "mean_token_accuracy": 0.8799801230430603, "step": 10650 }, { "epoch": 0.18950100439087694, "grad_norm": 1.3583340836532765, "learning_rate": 0.0001, "loss": 0.9044, "mean_abs_error": 156.67043894835777, "mean_abs_error_last_10": 27.957241245567513, "mean_abs_error_last_25": 34.46913939622008, "mean_abs_error_last_50": 67.54677362200684, "mean_pred_prob": 0.041921827010810375, "mean_pred_prob_last_10": 0.19541256912052632, "mean_pred_prob_last_25": 0.11428265012800694, "mean_pred_prob_last_50": 0.07089540529996156, "mean_token_accuracy": 0.8644586980342865, "step": 10660 }, { "epoch": 0.18967877268767888, "grad_norm": 2.1008226007382746, "learning_rate": 0.0001, "loss": 0.9377, "mean_abs_error": 901.4461803742785, "mean_abs_error_last_10": 491.45287100539974, "mean_abs_error_last_25": 549.6117509158086, "mean_abs_error_last_50": 686.6399784134644, "mean_pred_prob": 0.025532396690687166, "mean_pred_prob_last_10": 0.13566152462735773, "mean_pred_prob_last_25": 0.07408700032974594, "mean_pred_prob_last_50": 0.04437858448945917, "mean_token_accuracy": 0.8841465890407563, "step": 10670 }, { "epoch": 0.18985654098448082, "grad_norm": 1.0063766432938028, "learning_rate": 0.0001, "loss": 0.8654, "mean_abs_error": 958.6558634152, "mean_abs_error_last_10": 466.62504089172774, "mean_abs_error_last_25": 357.62240133725584, "mean_abs_error_last_50": 638.5000367226701, "mean_pred_prob": 0.030916381580755114, "mean_pred_prob_last_10": 0.14880390502512456, "mean_pred_prob_last_25": 0.08341097878292203, "mean_pred_prob_last_50": 0.05133009334094822, "mean_token_accuracy": 0.8737985670566559, "step": 10680 }, { "epoch": 0.1900343092812828, "grad_norm": 1.711105222897578, "learning_rate": 0.0001, "loss": 0.9444, "mean_abs_error": 131.60231842398974, "mean_abs_error_last_10": 32.11500851966785, "mean_abs_error_last_25": 45.88799497150421, "mean_abs_error_last_50": 72.01082023383607, "mean_pred_prob": 0.03843014193698764, "mean_pred_prob_last_10": 0.18390727564692497, "mean_pred_prob_last_25": 0.10922197289764882, "mean_pred_prob_last_50": 0.06638370603322982, "mean_token_accuracy": 0.8784407913684845, "step": 10690 }, { "epoch": 0.19021207757808473, "grad_norm": 2.716746716203429, "learning_rate": 0.0001, "loss": 0.9404, "mean_abs_error": 265.0706113860757, "mean_abs_error_last_10": 93.0624726077857, "mean_abs_error_last_25": 140.845069912015, "mean_abs_error_last_50": 175.2432050599681, "mean_pred_prob": 0.04588805390521884, "mean_pred_prob_last_10": 0.2396471232175827, "mean_pred_prob_last_25": 0.1304637413471937, "mean_pred_prob_last_50": 0.07888309005647898, "mean_token_accuracy": 0.8665535032749176, "step": 10700 }, { "epoch": 0.19038984587488667, "grad_norm": 1.6464603411290168, "learning_rate": 0.0001, "loss": 0.9413, "mean_abs_error": 275.67008419828744, "mean_abs_error_last_10": 145.4734810859521, "mean_abs_error_last_25": 159.7764637296645, "mean_abs_error_last_50": 190.45515741344767, "mean_pred_prob": 0.025231527350842952, "mean_pred_prob_last_10": 0.1374819826334715, "mean_pred_prob_last_25": 0.07331459885463118, "mean_pred_prob_last_50": 0.04391514789313078, "mean_token_accuracy": 0.8769650757312775, "step": 10710 }, { "epoch": 0.19056761417168863, "grad_norm": 1.0353325753834606, "learning_rate": 0.0001, "loss": 0.9511, "mean_abs_error": 909.7840946805118, "mean_abs_error_last_10": 406.43786809764487, "mean_abs_error_last_25": 456.9870728564345, "mean_abs_error_last_50": 613.5031047764378, "mean_pred_prob": 0.025511257798643782, "mean_pred_prob_last_10": 0.13820319415535778, "mean_pred_prob_last_25": 0.07330701939063147, "mean_pred_prob_last_50": 0.04415873415418901, "mean_token_accuracy": 0.8729529321193695, "step": 10720 }, { "epoch": 0.19074538246849057, "grad_norm": 2.5805036522815326, "learning_rate": 0.0001, "loss": 0.888, "mean_abs_error": 439.86660249886836, "mean_abs_error_last_10": 142.20240386921662, "mean_abs_error_last_25": 225.9733082570129, "mean_abs_error_last_50": 339.46783339939617, "mean_pred_prob": 0.0234389903023839, "mean_pred_prob_last_10": 0.12350566405802965, "mean_pred_prob_last_25": 0.06684039821848273, "mean_pred_prob_last_50": 0.04028449016623199, "mean_token_accuracy": 0.8747556746006012, "step": 10730 }, { "epoch": 0.1909231507652925, "grad_norm": 1.1118005146320746, "learning_rate": 0.0001, "loss": 0.9508, "mean_abs_error": 568.1080068782572, "mean_abs_error_last_10": 105.2072347879171, "mean_abs_error_last_25": 224.84827799127237, "mean_abs_error_last_50": 317.04200153394453, "mean_pred_prob": 0.03924629355315119, "mean_pred_prob_last_10": 0.2042391125112772, "mean_pred_prob_last_25": 0.11185699561610818, "mean_pred_prob_last_50": 0.06707587442360818, "mean_token_accuracy": 0.8718175232410431, "step": 10740 }, { "epoch": 0.19110091906209448, "grad_norm": 1.133660798481605, "learning_rate": 0.0001, "loss": 0.9053, "mean_abs_error": 334.0315536146187, "mean_abs_error_last_10": 110.22502374622613, "mean_abs_error_last_25": 161.07958854776604, "mean_abs_error_last_50": 216.00483462363854, "mean_pred_prob": 0.027931324066594244, "mean_pred_prob_last_10": 0.14152237251400948, "mean_pred_prob_last_25": 0.07656199540942907, "mean_pred_prob_last_50": 0.047279462683945894, "mean_token_accuracy": 0.8792529344558716, "step": 10750 }, { "epoch": 0.19127868735889642, "grad_norm": 0.8359113621248829, "learning_rate": 0.0001, "loss": 0.9237, "mean_abs_error": 857.7497420772577, "mean_abs_error_last_10": 382.40582864304247, "mean_abs_error_last_25": 435.8931458067709, "mean_abs_error_last_50": 595.5835012298163, "mean_pred_prob": 0.03039667895209277, "mean_pred_prob_last_10": 0.16473602747719268, "mean_pred_prob_last_25": 0.08901499859930481, "mean_pred_prob_last_50": 0.052623201147071086, "mean_token_accuracy": 0.8789534568786621, "step": 10760 }, { "epoch": 0.19145645565569835, "grad_norm": 1.6998351772314644, "learning_rate": 0.0001, "loss": 0.9344, "mean_abs_error": 240.44554551231485, "mean_abs_error_last_10": 78.25619803952206, "mean_abs_error_last_25": 112.46088948109896, "mean_abs_error_last_50": 129.430652951017, "mean_pred_prob": 0.04347365815192461, "mean_pred_prob_last_10": 0.21326360353268684, "mean_pred_prob_last_25": 0.12053553420118987, "mean_pred_prob_last_50": 0.07365812205243856, "mean_token_accuracy": 0.8783887803554535, "step": 10770 }, { "epoch": 0.19163422395250032, "grad_norm": 1.269044927368005, "learning_rate": 0.0001, "loss": 0.8638, "mean_abs_error": 363.55216491892554, "mean_abs_error_last_10": 133.39441720466223, "mean_abs_error_last_25": 180.80726688493291, "mean_abs_error_last_50": 245.99233082138022, "mean_pred_prob": 0.02651806684443727, "mean_pred_prob_last_10": 0.14874515060801058, "mean_pred_prob_last_25": 0.07657146388664841, "mean_pred_prob_last_50": 0.04536468792939559, "mean_token_accuracy": 0.8716764390468598, "step": 10780 }, { "epoch": 0.19181199224930226, "grad_norm": 1.4751177543986267, "learning_rate": 0.0001, "loss": 0.9118, "mean_abs_error": 272.9299354753145, "mean_abs_error_last_10": 99.67669019545306, "mean_abs_error_last_25": 102.92935304620214, "mean_abs_error_last_50": 167.25825208760236, "mean_pred_prob": 0.03335869535803795, "mean_pred_prob_last_10": 0.1748334366828203, "mean_pred_prob_last_25": 0.09549061693251133, "mean_pred_prob_last_50": 0.05729168178513646, "mean_token_accuracy": 0.8732129096984863, "step": 10790 }, { "epoch": 0.1919897605461042, "grad_norm": 1.1008224082250349, "learning_rate": 0.0001, "loss": 0.8512, "mean_abs_error": 1046.867860608516, "mean_abs_error_last_10": 392.9774645613067, "mean_abs_error_last_25": 535.6335224831412, "mean_abs_error_last_50": 728.1781262134682, "mean_pred_prob": 0.025785853355773724, "mean_pred_prob_last_10": 0.13284990111133083, "mean_pred_prob_last_25": 0.07019835722749121, "mean_pred_prob_last_50": 0.04272212536307052, "mean_token_accuracy": 0.8788237392902374, "step": 10800 }, { "epoch": 0.19216752884290617, "grad_norm": 1.0780939217774201, "learning_rate": 0.0001, "loss": 0.9592, "mean_abs_error": 513.7147016454943, "mean_abs_error_last_10": 136.1884190774639, "mean_abs_error_last_25": 188.04008893985392, "mean_abs_error_last_50": 289.6108159251104, "mean_pred_prob": 0.05018231266294606, "mean_pred_prob_last_10": 0.2430642092600465, "mean_pred_prob_last_25": 0.13847989264177157, "mean_pred_prob_last_50": 0.08497873616288416, "mean_token_accuracy": 0.8698426425457001, "step": 10810 }, { "epoch": 0.1923452971397081, "grad_norm": 1.1456785859042613, "learning_rate": 0.0001, "loss": 0.8548, "mean_abs_error": 651.7568060127222, "mean_abs_error_last_10": 314.1267939115457, "mean_abs_error_last_25": 340.39500926635253, "mean_abs_error_last_50": 427.13727530338537, "mean_pred_prob": 0.025077867577783762, "mean_pred_prob_last_10": 0.13789448118768632, "mean_pred_prob_last_25": 0.07195083039696329, "mean_pred_prob_last_50": 0.04282840978121385, "mean_token_accuracy": 0.8659793972969055, "step": 10820 }, { "epoch": 0.19252306543651004, "grad_norm": 1.2168574528245206, "learning_rate": 0.0001, "loss": 0.9368, "mean_abs_error": 483.3127371381207, "mean_abs_error_last_10": 82.23607584269841, "mean_abs_error_last_25": 164.6668922179657, "mean_abs_error_last_50": 285.53698083328334, "mean_pred_prob": 0.036835356790106744, "mean_pred_prob_last_10": 0.1888646713225171, "mean_pred_prob_last_25": 0.10452851857990027, "mean_pred_prob_last_50": 0.06395634178770707, "mean_token_accuracy": 0.8723727703094483, "step": 10830 }, { "epoch": 0.192700833733312, "grad_norm": 1.474572137214667, "learning_rate": 0.0001, "loss": 0.9949, "mean_abs_error": 626.4927642894163, "mean_abs_error_last_10": 111.3136638001333, "mean_abs_error_last_25": 185.62039727533224, "mean_abs_error_last_50": 330.9318817385546, "mean_pred_prob": 0.029358610010240226, "mean_pred_prob_last_10": 0.1562287791632116, "mean_pred_prob_last_25": 0.08465230468427762, "mean_pred_prob_last_50": 0.05041844024090096, "mean_token_accuracy": 0.8746150970458985, "step": 10840 }, { "epoch": 0.19287860203011395, "grad_norm": 1.361298041091606, "learning_rate": 0.0001, "loss": 0.8712, "mean_abs_error": 425.61560656574085, "mean_abs_error_last_10": 134.72454390115598, "mean_abs_error_last_25": 155.57841867699997, "mean_abs_error_last_50": 225.4053800794734, "mean_pred_prob": 0.033279647777089846, "mean_pred_prob_last_10": 0.174220577115193, "mean_pred_prob_last_25": 0.09553300053812563, "mean_pred_prob_last_50": 0.057523214409593494, "mean_token_accuracy": 0.8809720396995544, "step": 10850 }, { "epoch": 0.1930563703269159, "grad_norm": 1.0258531801008615, "learning_rate": 0.0001, "loss": 0.8188, "mean_abs_error": 463.92740169636755, "mean_abs_error_last_10": 95.62499998666128, "mean_abs_error_last_25": 175.44731825483265, "mean_abs_error_last_50": 300.02385727024915, "mean_pred_prob": 0.03536153637105599, "mean_pred_prob_last_10": 0.18664964779745788, "mean_pred_prob_last_25": 0.10108341837767512, "mean_pred_prob_last_50": 0.06043772514676675, "mean_token_accuracy": 0.8868654310703278, "step": 10860 }, { "epoch": 0.19323413862371785, "grad_norm": 1.5536497439799923, "learning_rate": 0.0001, "loss": 0.8559, "mean_abs_error": 404.20230650203985, "mean_abs_error_last_10": 117.16909848700872, "mean_abs_error_last_25": 145.60124308612137, "mean_abs_error_last_50": 231.1248333755713, "mean_pred_prob": 0.027052981988526882, "mean_pred_prob_last_10": 0.14599363319575787, "mean_pred_prob_last_25": 0.07949713766574859, "mean_pred_prob_last_50": 0.04750454253517091, "mean_token_accuracy": 0.885867565870285, "step": 10870 }, { "epoch": 0.1934119069205198, "grad_norm": 1.5631895216364238, "learning_rate": 0.0001, "loss": 0.8499, "mean_abs_error": 1168.6111980053695, "mean_abs_error_last_10": 823.2087415570888, "mean_abs_error_last_25": 881.8717405874961, "mean_abs_error_last_50": 965.914216887928, "mean_pred_prob": 0.03543468088937516, "mean_pred_prob_last_10": 0.17534822508823708, "mean_pred_prob_last_25": 0.09803666660445742, "mean_pred_prob_last_50": 0.060441970544343346, "mean_token_accuracy": 0.8844849348068238, "step": 10880 }, { "epoch": 0.19358967521732173, "grad_norm": 3.995532185198987, "learning_rate": 0.0001, "loss": 0.9201, "mean_abs_error": 341.95596196458087, "mean_abs_error_last_10": 111.57896452276464, "mean_abs_error_last_25": 117.91463657097742, "mean_abs_error_last_50": 186.7372269945101, "mean_pred_prob": 0.05545131888356991, "mean_pred_prob_last_10": 0.25622155220480636, "mean_pred_prob_last_25": 0.15212063662474976, "mean_pred_prob_last_50": 0.09333408682141453, "mean_token_accuracy": 0.8724929332733155, "step": 10890 }, { "epoch": 0.1937674435141237, "grad_norm": 2.203332977276919, "learning_rate": 0.0001, "loss": 0.8702, "mean_abs_error": 542.1068399040403, "mean_abs_error_last_10": 174.46811132635273, "mean_abs_error_last_25": 201.7322873420988, "mean_abs_error_last_50": 337.09452530537857, "mean_pred_prob": 0.04014759467099793, "mean_pred_prob_last_10": 0.21746099086594767, "mean_pred_prob_last_25": 0.11712213395512663, "mean_pred_prob_last_50": 0.06931307869963348, "mean_token_accuracy": 0.8644970178604126, "step": 10900 }, { "epoch": 0.19394521181092564, "grad_norm": 1.1212956901179227, "learning_rate": 0.0001, "loss": 1.0235, "mean_abs_error": 794.5869401642361, "mean_abs_error_last_10": 134.17305696512219, "mean_abs_error_last_25": 288.6793428347247, "mean_abs_error_last_50": 486.32832128374764, "mean_pred_prob": 0.025773311039665715, "mean_pred_prob_last_10": 0.14201275177765638, "mean_pred_prob_last_25": 0.07385660556610674, "mean_pred_prob_last_50": 0.04439036578405649, "mean_token_accuracy": 0.8688650071620941, "step": 10910 }, { "epoch": 0.19412298010772758, "grad_norm": 1.2409201671170433, "learning_rate": 0.0001, "loss": 0.9511, "mean_abs_error": 1369.5277490572694, "mean_abs_error_last_10": 1023.5504514776, "mean_abs_error_last_25": 1021.7681893978312, "mean_abs_error_last_50": 1072.5242500745296, "mean_pred_prob": 0.02174055885989219, "mean_pred_prob_last_10": 0.10981980260403361, "mean_pred_prob_last_25": 0.06245252338703722, "mean_pred_prob_last_50": 0.03792126834305236, "mean_token_accuracy": 0.8767410218715668, "step": 10920 }, { "epoch": 0.19430074840452954, "grad_norm": 0.7098940653571401, "learning_rate": 0.0001, "loss": 0.8819, "mean_abs_error": 151.92949044218437, "mean_abs_error_last_10": 48.96979995438379, "mean_abs_error_last_25": 58.86197144625844, "mean_abs_error_last_50": 90.21798154917363, "mean_pred_prob": 0.03699411624111235, "mean_pred_prob_last_10": 0.18377820625901223, "mean_pred_prob_last_25": 0.10138339269906282, "mean_pred_prob_last_50": 0.06255662981420755, "mean_token_accuracy": 0.8809450984001159, "step": 10930 }, { "epoch": 0.19447851670133148, "grad_norm": 1.225488391268053, "learning_rate": 0.0001, "loss": 0.9586, "mean_abs_error": 1313.3168513556807, "mean_abs_error_last_10": 710.8337613234481, "mean_abs_error_last_25": 813.8946130730337, "mean_abs_error_last_50": 993.8141841168533, "mean_pred_prob": 0.020485225194715895, "mean_pred_prob_last_10": 0.1137059592132573, "mean_pred_prob_last_25": 0.05781987489463063, "mean_pred_prob_last_50": 0.034852878724632316, "mean_token_accuracy": 0.8735537350177764, "step": 10940 }, { "epoch": 0.19465628499813342, "grad_norm": 3.208992539461623, "learning_rate": 0.0001, "loss": 0.8415, "mean_abs_error": 328.46182690725107, "mean_abs_error_last_10": 54.53663346847377, "mean_abs_error_last_25": 103.13015658745753, "mean_abs_error_last_50": 175.90304046865586, "mean_pred_prob": 0.038539633899927137, "mean_pred_prob_last_10": 0.18827849794179202, "mean_pred_prob_last_25": 0.1038442198652774, "mean_pred_prob_last_50": 0.0646806046133861, "mean_token_accuracy": 0.8730710029602051, "step": 10950 }, { "epoch": 0.1948340532949354, "grad_norm": 1.5385742236804756, "learning_rate": 0.0001, "loss": 0.9054, "mean_abs_error": 302.9185247696569, "mean_abs_error_last_10": 128.4099457707801, "mean_abs_error_last_25": 194.9651482429603, "mean_abs_error_last_50": 201.989539344631, "mean_pred_prob": 0.03973533548414707, "mean_pred_prob_last_10": 0.20884822346270085, "mean_pred_prob_last_25": 0.1141515238210559, "mean_pred_prob_last_50": 0.06812646724283695, "mean_token_accuracy": 0.8811823844909668, "step": 10960 }, { "epoch": 0.19501182159173733, "grad_norm": 1.0538798416634332, "learning_rate": 0.0001, "loss": 0.9412, "mean_abs_error": 635.9517163574762, "mean_abs_error_last_10": 463.5854678943345, "mean_abs_error_last_25": 506.3818421690506, "mean_abs_error_last_50": 478.80162645053935, "mean_pred_prob": 0.03162157970946282, "mean_pred_prob_last_10": 0.1512648842181079, "mean_pred_prob_last_25": 0.08836940965848043, "mean_pred_prob_last_50": 0.054335947544313964, "mean_token_accuracy": 0.862689608335495, "step": 10970 }, { "epoch": 0.19518958988853927, "grad_norm": 1.330005565569686, "learning_rate": 0.0001, "loss": 0.986, "mean_abs_error": 313.52849961214457, "mean_abs_error_last_10": 169.96193841399358, "mean_abs_error_last_25": 184.18254486280526, "mean_abs_error_last_50": 193.67140212615405, "mean_pred_prob": 0.04045684726443142, "mean_pred_prob_last_10": 0.2174405438825488, "mean_pred_prob_last_25": 0.12027344582602381, "mean_pred_prob_last_50": 0.07035157340578735, "mean_token_accuracy": 0.8659710884094238, "step": 10980 }, { "epoch": 0.19536735818534123, "grad_norm": 0.9711966999971389, "learning_rate": 0.0001, "loss": 0.8834, "mean_abs_error": 133.23896233558403, "mean_abs_error_last_10": 63.72223667716097, "mean_abs_error_last_25": 64.54155570735541, "mean_abs_error_last_50": 82.6491867924032, "mean_pred_prob": 0.043376680370420216, "mean_pred_prob_last_10": 0.22067477852106093, "mean_pred_prob_last_25": 0.12176754400134086, "mean_pred_prob_last_50": 0.07440138291567563, "mean_token_accuracy": 0.8722557187080383, "step": 10990 }, { "epoch": 0.19554512648214317, "grad_norm": 1.8002204100335184, "learning_rate": 0.0001, "loss": 0.9569, "mean_abs_error": 396.86104118959906, "mean_abs_error_last_10": 110.48736572151147, "mean_abs_error_last_25": 158.51402539091274, "mean_abs_error_last_50": 214.81398693572, "mean_pred_prob": 0.02645134520716965, "mean_pred_prob_last_10": 0.1334174394607544, "mean_pred_prob_last_25": 0.07254974823445082, "mean_pred_prob_last_50": 0.04452559142373502, "mean_token_accuracy": 0.8711114406585694, "step": 11000 }, { "epoch": 0.1957228947789451, "grad_norm": 1.2751095861704602, "learning_rate": 0.0001, "loss": 0.9425, "mean_abs_error": 784.4870076865691, "mean_abs_error_last_10": 379.8730819551878, "mean_abs_error_last_25": 460.7615479255011, "mean_abs_error_last_50": 563.4759081337639, "mean_pred_prob": 0.029736874607624485, "mean_pred_prob_last_10": 0.14357685143768323, "mean_pred_prob_last_25": 0.08169824947835877, "mean_pred_prob_last_50": 0.050205462242593056, "mean_token_accuracy": 0.8666167974472045, "step": 11010 }, { "epoch": 0.19590066307574708, "grad_norm": 1.6876038003409604, "learning_rate": 0.0001, "loss": 0.9313, "mean_abs_error": 327.46584474219014, "mean_abs_error_last_10": 116.70600330514222, "mean_abs_error_last_25": 190.49469995178313, "mean_abs_error_last_50": 277.3275401272103, "mean_pred_prob": 0.022036033356562255, "mean_pred_prob_last_10": 0.11015170589089393, "mean_pred_prob_last_25": 0.05950014973059296, "mean_pred_prob_last_50": 0.03669797801412642, "mean_token_accuracy": 0.8669535338878631, "step": 11020 }, { "epoch": 0.19607843137254902, "grad_norm": 1.5110569988252367, "learning_rate": 0.0001, "loss": 0.94, "mean_abs_error": 403.7926882101237, "mean_abs_error_last_10": 117.77005237382954, "mean_abs_error_last_25": 113.62283484212874, "mean_abs_error_last_50": 183.7160738596752, "mean_pred_prob": 0.039060625713318586, "mean_pred_prob_last_10": 0.2023260433226824, "mean_pred_prob_last_25": 0.10975286830216646, "mean_pred_prob_last_50": 0.06688067843206227, "mean_token_accuracy": 0.8662804424762726, "step": 11030 }, { "epoch": 0.19625619966935096, "grad_norm": 1.732587572803249, "learning_rate": 0.0001, "loss": 0.9103, "mean_abs_error": 215.64442259839478, "mean_abs_error_last_10": 103.98446278824568, "mean_abs_error_last_25": 108.8217928950398, "mean_abs_error_last_50": 121.90330874295857, "mean_pred_prob": 0.039645819924771786, "mean_pred_prob_last_10": 0.2003929728642106, "mean_pred_prob_last_25": 0.10911775548011064, "mean_pred_prob_last_50": 0.06685208766721189, "mean_token_accuracy": 0.8724344432353973, "step": 11040 }, { "epoch": 0.19643396796615292, "grad_norm": 3.469946391634885, "learning_rate": 0.0001, "loss": 0.8956, "mean_abs_error": 547.4354836175139, "mean_abs_error_last_10": 160.8341798446075, "mean_abs_error_last_25": 191.78237110642257, "mean_abs_error_last_50": 297.61990338162207, "mean_pred_prob": 0.02459199376171455, "mean_pred_prob_last_10": 0.12146462136879563, "mean_pred_prob_last_25": 0.06949816909618675, "mean_pred_prob_last_50": 0.04205814350862056, "mean_token_accuracy": 0.8776726484298706, "step": 11050 }, { "epoch": 0.19661173626295486, "grad_norm": 1.0206004612427282, "learning_rate": 0.0001, "loss": 0.9317, "mean_abs_error": 492.77152375879194, "mean_abs_error_last_10": 136.2243889354124, "mean_abs_error_last_25": 202.93684503304863, "mean_abs_error_last_50": 297.64605678446065, "mean_pred_prob": 0.022395960055291653, "mean_pred_prob_last_10": 0.11705002151429653, "mean_pred_prob_last_25": 0.06398654216900468, "mean_pred_prob_last_50": 0.03867805376648903, "mean_token_accuracy": 0.8682258427143097, "step": 11060 }, { "epoch": 0.1967895045597568, "grad_norm": 1.0888474508642523, "learning_rate": 0.0001, "loss": 0.9292, "mean_abs_error": 443.6610984464058, "mean_abs_error_last_10": 185.98761495494404, "mean_abs_error_last_25": 231.94226030079398, "mean_abs_error_last_50": 331.59865577765356, "mean_pred_prob": 0.03699422769714147, "mean_pred_prob_last_10": 0.1968392938375473, "mean_pred_prob_last_25": 0.10746871922165155, "mean_pred_prob_last_50": 0.06401776219718158, "mean_token_accuracy": 0.874212384223938, "step": 11070 }, { "epoch": 0.19696727285655877, "grad_norm": 1.3979220712390583, "learning_rate": 0.0001, "loss": 0.8472, "mean_abs_error": 501.37648350068366, "mean_abs_error_last_10": 148.25193854529778, "mean_abs_error_last_25": 215.20597217659497, "mean_abs_error_last_50": 288.19980852365165, "mean_pred_prob": 0.029347463912563397, "mean_pred_prob_last_10": 0.14901589655783026, "mean_pred_prob_last_25": 0.08203668252099305, "mean_pred_prob_last_50": 0.04996459329850041, "mean_token_accuracy": 0.8724275648593902, "step": 11080 }, { "epoch": 0.1971450411533607, "grad_norm": 1.4030157488675095, "learning_rate": 0.0001, "loss": 0.9355, "mean_abs_error": 538.0494425288184, "mean_abs_error_last_10": 176.87785048152423, "mean_abs_error_last_25": 250.60211095575295, "mean_abs_error_last_50": 351.1154524704446, "mean_pred_prob": 0.030874332768144087, "mean_pred_prob_last_10": 0.16923286172095686, "mean_pred_prob_last_25": 0.08955586303491145, "mean_pred_prob_last_50": 0.05348441688111052, "mean_token_accuracy": 0.8774020493030548, "step": 11090 }, { "epoch": 0.19732280945016265, "grad_norm": 1.3082360016807661, "learning_rate": 0.0001, "loss": 0.9134, "mean_abs_error": 656.467208907326, "mean_abs_error_last_10": 182.1057232666486, "mean_abs_error_last_25": 244.17300494242744, "mean_abs_error_last_50": 376.9106988369905, "mean_pred_prob": 0.020751273981295525, "mean_pred_prob_last_10": 0.11278587334090844, "mean_pred_prob_last_25": 0.059806485020089895, "mean_pred_prob_last_50": 0.03567674700170755, "mean_token_accuracy": 0.8676083803176879, "step": 11100 }, { "epoch": 0.1975005777469646, "grad_norm": 1.0763419755232582, "learning_rate": 0.0001, "loss": 0.8925, "mean_abs_error": 388.9866581846343, "mean_abs_error_last_10": 132.41109195208347, "mean_abs_error_last_25": 179.27005751600066, "mean_abs_error_last_50": 290.381525835691, "mean_pred_prob": 0.03258770238608122, "mean_pred_prob_last_10": 0.17768124788999556, "mean_pred_prob_last_25": 0.09595340555533767, "mean_pred_prob_last_50": 0.05707371640019119, "mean_token_accuracy": 0.879834508895874, "step": 11110 }, { "epoch": 0.19767834604376655, "grad_norm": 2.0184730324995472, "learning_rate": 0.0001, "loss": 1.0269, "mean_abs_error": 986.6093621426355, "mean_abs_error_last_10": 589.6098894487939, "mean_abs_error_last_25": 681.489988717974, "mean_abs_error_last_50": 748.2142697573413, "mean_pred_prob": 0.03419295931380475, "mean_pred_prob_last_10": 0.1628385575575521, "mean_pred_prob_last_25": 0.09346745688235387, "mean_pred_prob_last_50": 0.05781589404505212, "mean_token_accuracy": 0.8783744931221008, "step": 11120 }, { "epoch": 0.1978561143405685, "grad_norm": 1.196781240723309, "learning_rate": 0.0001, "loss": 0.836, "mean_abs_error": 466.91640981371046, "mean_abs_error_last_10": 258.330978593505, "mean_abs_error_last_25": 266.91850920791404, "mean_abs_error_last_50": 347.16581186474457, "mean_pred_prob": 0.03040162431425415, "mean_pred_prob_last_10": 0.16354443765012547, "mean_pred_prob_last_25": 0.0881679834332317, "mean_pred_prob_last_50": 0.05230971582350321, "mean_token_accuracy": 0.8735580146312714, "step": 11130 }, { "epoch": 0.19803388263737046, "grad_norm": 3.3028810680030056, "learning_rate": 0.0001, "loss": 0.8987, "mean_abs_error": 250.4536171266168, "mean_abs_error_last_10": 71.49965656099334, "mean_abs_error_last_25": 91.13431500624026, "mean_abs_error_last_50": 138.9519787697427, "mean_pred_prob": 0.03803221834823489, "mean_pred_prob_last_10": 0.1934048492461443, "mean_pred_prob_last_25": 0.10755870835855603, "mean_pred_prob_last_50": 0.06523426566272975, "mean_token_accuracy": 0.8683174669742584, "step": 11140 }, { "epoch": 0.1982116509341724, "grad_norm": 1.296764741602712, "learning_rate": 0.0001, "loss": 0.9496, "mean_abs_error": 305.8853221315348, "mean_abs_error_last_10": 53.07560455487112, "mean_abs_error_last_25": 89.70221680166784, "mean_abs_error_last_50": 156.84794799080547, "mean_pred_prob": 0.036071416456252337, "mean_pred_prob_last_10": 0.20271439515054226, "mean_pred_prob_last_25": 0.10641621071845293, "mean_pred_prob_last_50": 0.06256639873608946, "mean_token_accuracy": 0.8760196328163147, "step": 11150 }, { "epoch": 0.19838941923097433, "grad_norm": 3.1758747221068506, "learning_rate": 0.0001, "loss": 0.9566, "mean_abs_error": 1218.1021822475686, "mean_abs_error_last_10": 351.7396927362041, "mean_abs_error_last_25": 472.83193732195275, "mean_abs_error_last_50": 810.0613622131534, "mean_pred_prob": 0.013757079903734848, "mean_pred_prob_last_10": 0.07627955331699923, "mean_pred_prob_last_25": 0.040392870892537756, "mean_pred_prob_last_50": 0.02362976940930821, "mean_token_accuracy": 0.8677432775497437, "step": 11160 }, { "epoch": 0.1985671875277763, "grad_norm": 1.2960781041711655, "learning_rate": 0.0001, "loss": 0.8499, "mean_abs_error": 1210.3927782404555, "mean_abs_error_last_10": 480.1333194692649, "mean_abs_error_last_25": 590.6594743868762, "mean_abs_error_last_50": 797.7867341851272, "mean_pred_prob": 0.019992070185253397, "mean_pred_prob_last_10": 0.10889670073520392, "mean_pred_prob_last_25": 0.058073612907901404, "mean_pred_prob_last_50": 0.03453818193520419, "mean_token_accuracy": 0.8778709173202515, "step": 11170 }, { "epoch": 0.19874495582457824, "grad_norm": 1.4657956554676976, "learning_rate": 0.0001, "loss": 0.8287, "mean_abs_error": 553.4975108673085, "mean_abs_error_last_10": 116.0348672907127, "mean_abs_error_last_25": 138.2012685794816, "mean_abs_error_last_50": 280.76050835028377, "mean_pred_prob": 0.02689850840251893, "mean_pred_prob_last_10": 0.14943038625642657, "mean_pred_prob_last_25": 0.07742247749119997, "mean_pred_prob_last_50": 0.0457783785648644, "mean_token_accuracy": 0.878143697977066, "step": 11180 }, { "epoch": 0.1989227241213802, "grad_norm": 0.8661137069568058, "learning_rate": 0.0001, "loss": 0.914, "mean_abs_error": 1342.284884509624, "mean_abs_error_last_10": 623.4498940393288, "mean_abs_error_last_25": 722.5420241094696, "mean_abs_error_last_50": 954.2874065805712, "mean_pred_prob": 0.015093268736382014, "mean_pred_prob_last_10": 0.08528876383497845, "mean_pred_prob_last_25": 0.04487900734238792, "mean_pred_prob_last_50": 0.026552485922002232, "mean_token_accuracy": 0.868292385339737, "step": 11190 }, { "epoch": 0.19910049241818215, "grad_norm": 1.6166824664293487, "learning_rate": 0.0001, "loss": 0.8603, "mean_abs_error": 298.74573626142995, "mean_abs_error_last_10": 72.08030924700864, "mean_abs_error_last_25": 94.45164823242172, "mean_abs_error_last_50": 152.63851046272026, "mean_pred_prob": 0.0401707204291597, "mean_pred_prob_last_10": 0.19496657252311705, "mean_pred_prob_last_25": 0.11083662351593375, "mean_pred_prob_last_50": 0.06796722654253244, "mean_token_accuracy": 0.8843480885028839, "step": 11200 }, { "epoch": 0.19927826071498408, "grad_norm": 1.5043309266930711, "learning_rate": 0.0001, "loss": 0.8469, "mean_abs_error": 283.21595807424166, "mean_abs_error_last_10": 63.504659619687274, "mean_abs_error_last_25": 83.86084005456782, "mean_abs_error_last_50": 175.1557429934584, "mean_pred_prob": 0.03294236878864467, "mean_pred_prob_last_10": 0.17221885398030282, "mean_pred_prob_last_25": 0.09508730880916119, "mean_pred_prob_last_50": 0.0569153212942183, "mean_token_accuracy": 0.8677500426769257, "step": 11210 }, { "epoch": 0.19945602901178605, "grad_norm": 1.211614450249723, "learning_rate": 0.0001, "loss": 0.8651, "mean_abs_error": 727.6948844052224, "mean_abs_error_last_10": 313.9836359365563, "mean_abs_error_last_25": 471.08631196111475, "mean_abs_error_last_50": 520.4024713894648, "mean_pred_prob": 0.03814503291214351, "mean_pred_prob_last_10": 0.19666927848593332, "mean_pred_prob_last_25": 0.1084734719421249, "mean_pred_prob_last_50": 0.06567056628991849, "mean_token_accuracy": 0.8731130659580231, "step": 11220 }, { "epoch": 0.199633797308588, "grad_norm": 2.2073278444550084, "learning_rate": 0.0001, "loss": 0.8487, "mean_abs_error": 1183.6226258893716, "mean_abs_error_last_10": 767.5052641944459, "mean_abs_error_last_25": 796.11610219975, "mean_abs_error_last_50": 917.8095733422749, "mean_pred_prob": 0.03925041812981363, "mean_pred_prob_last_10": 0.20110240818175953, "mean_pred_prob_last_25": 0.11032143246993656, "mean_pred_prob_last_50": 0.06683814413554501, "mean_token_accuracy": 0.8854363679885864, "step": 11230 }, { "epoch": 0.19981156560538993, "grad_norm": 1.1889305618597183, "learning_rate": 0.0001, "loss": 0.8705, "mean_abs_error": 953.9697117702187, "mean_abs_error_last_10": 565.2795041398924, "mean_abs_error_last_25": 654.6022506461115, "mean_abs_error_last_50": 753.1511415447642, "mean_pred_prob": 0.030940933177771513, "mean_pred_prob_last_10": 0.15820532079960686, "mean_pred_prob_last_25": 0.08727646571933292, "mean_pred_prob_last_50": 0.05278227765520569, "mean_token_accuracy": 0.8748281002044678, "step": 11240 }, { "epoch": 0.1999893339021919, "grad_norm": 1.3200149596940736, "learning_rate": 0.0001, "loss": 0.9045, "mean_abs_error": 569.619796212681, "mean_abs_error_last_10": 360.8113736388275, "mean_abs_error_last_25": 436.6034739290734, "mean_abs_error_last_50": 518.701698159038, "mean_pred_prob": 0.021078515239059924, "mean_pred_prob_last_10": 0.1245119521394372, "mean_pred_prob_last_25": 0.06431548912078142, "mean_pred_prob_last_50": 0.0370773047208786, "mean_token_accuracy": 0.876952612400055, "step": 11250 }, { "epoch": 0.20016710219899383, "grad_norm": 2.1893443551754386, "learning_rate": 0.0001, "loss": 0.8141, "mean_abs_error": 60.65148071198614, "mean_abs_error_last_10": 9.193443252053786, "mean_abs_error_last_25": 23.72846377333093, "mean_abs_error_last_50": 40.10352485170487, "mean_pred_prob": 0.05026100166141987, "mean_pred_prob_last_10": 0.24860356599092484, "mean_pred_prob_last_25": 0.1408036857843399, "mean_pred_prob_last_50": 0.08562898375093937, "mean_token_accuracy": 0.8768066167831421, "step": 11260 }, { "epoch": 0.20034487049579577, "grad_norm": 1.1662630303008994, "learning_rate": 0.0001, "loss": 0.8456, "mean_abs_error": 516.1565248755539, "mean_abs_error_last_10": 103.30954765270674, "mean_abs_error_last_25": 232.27822106869112, "mean_abs_error_last_50": 353.7614352835983, "mean_pred_prob": 0.02311119104269892, "mean_pred_prob_last_10": 0.1349024584516883, "mean_pred_prob_last_25": 0.06902674939483404, "mean_pred_prob_last_50": 0.04025374026969075, "mean_token_accuracy": 0.8732221186161041, "step": 11270 }, { "epoch": 0.20052263879259774, "grad_norm": 1.558771634937419, "learning_rate": 0.0001, "loss": 0.9516, "mean_abs_error": 270.8330958091531, "mean_abs_error_last_10": 94.86752228399816, "mean_abs_error_last_25": 116.94787266638659, "mean_abs_error_last_50": 157.38957756123904, "mean_pred_prob": 0.031025674380362033, "mean_pred_prob_last_10": 0.15811052937060593, "mean_pred_prob_last_25": 0.0889727839268744, "mean_pred_prob_last_50": 0.05296036852523685, "mean_token_accuracy": 0.8754244863986969, "step": 11280 }, { "epoch": 0.20070040708939968, "grad_norm": 1.2482095813646676, "learning_rate": 0.0001, "loss": 0.889, "mean_abs_error": 546.6251098721148, "mean_abs_error_last_10": 209.15601963525788, "mean_abs_error_last_25": 242.08406320204503, "mean_abs_error_last_50": 332.77947094125057, "mean_pred_prob": 0.03854619909834582, "mean_pred_prob_last_10": 0.20984867979423144, "mean_pred_prob_last_25": 0.11221459588850849, "mean_pred_prob_last_50": 0.0667523447657004, "mean_token_accuracy": 0.871600067615509, "step": 11290 }, { "epoch": 0.20087817538620162, "grad_norm": 1.2333816426945352, "learning_rate": 0.0001, "loss": 0.8542, "mean_abs_error": 446.29843975565666, "mean_abs_error_last_10": 182.44935372785878, "mean_abs_error_last_25": 180.27789281354484, "mean_abs_error_last_50": 235.34100170616884, "mean_pred_prob": 0.038964980409946295, "mean_pred_prob_last_10": 0.1723558372235857, "mean_pred_prob_last_25": 0.10135084206704051, "mean_pred_prob_last_50": 0.06416244121501222, "mean_token_accuracy": 0.8678414046764373, "step": 11300 }, { "epoch": 0.20105594368300359, "grad_norm": 1.8045101749762742, "learning_rate": 0.0001, "loss": 0.8373, "mean_abs_error": 517.5706883505063, "mean_abs_error_last_10": 146.82663792747596, "mean_abs_error_last_25": 190.73063847539225, "mean_abs_error_last_50": 247.09272172479183, "mean_pred_prob": 0.030660398956388234, "mean_pred_prob_last_10": 0.15894126761704683, "mean_pred_prob_last_25": 0.08736710911616682, "mean_pred_prob_last_50": 0.052967631397768854, "mean_token_accuracy": 0.8839539289474487, "step": 11310 }, { "epoch": 0.20123371197980552, "grad_norm": 0.6751527245726718, "learning_rate": 0.0001, "loss": 0.8951, "mean_abs_error": 202.26493378051086, "mean_abs_error_last_10": 32.09147054234587, "mean_abs_error_last_25": 63.71780763346125, "mean_abs_error_last_50": 108.3052702986721, "mean_pred_prob": 0.03578896187245846, "mean_pred_prob_last_10": 0.19026724770665168, "mean_pred_prob_last_25": 0.10282229892909527, "mean_pred_prob_last_50": 0.06145996153354645, "mean_token_accuracy": 0.8723556876182557, "step": 11320 }, { "epoch": 0.20141148027660746, "grad_norm": 1.1467227208115673, "learning_rate": 0.0001, "loss": 0.8836, "mean_abs_error": 374.3514339927687, "mean_abs_error_last_10": 107.89995871643097, "mean_abs_error_last_25": 198.98909383791192, "mean_abs_error_last_50": 213.69002274087129, "mean_pred_prob": 0.03810546551831066, "mean_pred_prob_last_10": 0.1871168628334999, "mean_pred_prob_last_25": 0.10479878457263112, "mean_pred_prob_last_50": 0.06443503126502037, "mean_token_accuracy": 0.8700820684432984, "step": 11330 }, { "epoch": 0.20158924857340943, "grad_norm": 1.018508377654183, "learning_rate": 0.0001, "loss": 0.8307, "mean_abs_error": 552.9474000072402, "mean_abs_error_last_10": 106.92105423259208, "mean_abs_error_last_25": 146.95101932763274, "mean_abs_error_last_50": 281.17466767256496, "mean_pred_prob": 0.03050414322060533, "mean_pred_prob_last_10": 0.1603467793785967, "mean_pred_prob_last_25": 0.08690487036947162, "mean_pred_prob_last_50": 0.05209033691789955, "mean_token_accuracy": 0.8776413977146149, "step": 11340 }, { "epoch": 0.20176701687021137, "grad_norm": 1.4662095401409472, "learning_rate": 0.0001, "loss": 0.9154, "mean_abs_error": 516.8098186400085, "mean_abs_error_last_10": 249.9390790803679, "mean_abs_error_last_25": 258.61944289356336, "mean_abs_error_last_50": 391.29496353504646, "mean_pred_prob": 0.027132810559123755, "mean_pred_prob_last_10": 0.13788016941398382, "mean_pred_prob_last_25": 0.07662954609841108, "mean_pred_prob_last_50": 0.04610684500075877, "mean_token_accuracy": 0.8728831052780152, "step": 11350 }, { "epoch": 0.2019447851670133, "grad_norm": 2.334650237351431, "learning_rate": 0.0001, "loss": 0.8709, "mean_abs_error": 443.0941744232208, "mean_abs_error_last_10": 113.26130838911158, "mean_abs_error_last_25": 242.64022050458607, "mean_abs_error_last_50": 298.75915363348673, "mean_pred_prob": 0.044113854854367675, "mean_pred_prob_last_10": 0.2159362418577075, "mean_pred_prob_last_25": 0.1230186877772212, "mean_pred_prob_last_50": 0.07477128393948078, "mean_token_accuracy": 0.8759994328022003, "step": 11360 }, { "epoch": 0.20212255346381527, "grad_norm": 1.7970343085656193, "learning_rate": 0.0001, "loss": 0.9281, "mean_abs_error": 1644.0207652691854, "mean_abs_error_last_10": 851.7050386446178, "mean_abs_error_last_25": 978.455478984268, "mean_abs_error_last_50": 1219.8490703325633, "mean_pred_prob": 0.015578883337730077, "mean_pred_prob_last_10": 0.08271220930328127, "mean_pred_prob_last_25": 0.04438383919914486, "mean_pred_prob_last_50": 0.02673469491128344, "mean_token_accuracy": 0.8701817870140076, "step": 11370 }, { "epoch": 0.2023003217606172, "grad_norm": 1.002627376975498, "learning_rate": 0.0001, "loss": 0.9143, "mean_abs_error": 218.04726003662427, "mean_abs_error_last_10": 70.17837129526295, "mean_abs_error_last_25": 118.75180248058857, "mean_abs_error_last_50": 140.04241442936, "mean_pred_prob": 0.03015491096302867, "mean_pred_prob_last_10": 0.16319910120218992, "mean_pred_prob_last_25": 0.08607291374355555, "mean_pred_prob_last_50": 0.05174702969379723, "mean_token_accuracy": 0.8813162684440613, "step": 11380 }, { "epoch": 0.20247809005741915, "grad_norm": 0.9806245516343702, "learning_rate": 0.0001, "loss": 0.9034, "mean_abs_error": 242.0427940266236, "mean_abs_error_last_10": 66.01431396633782, "mean_abs_error_last_25": 112.34322846092748, "mean_abs_error_last_50": 152.82692137379667, "mean_pred_prob": 0.02375319804996252, "mean_pred_prob_last_10": 0.1286106374114752, "mean_pred_prob_last_25": 0.06868390403687955, "mean_pred_prob_last_50": 0.041059653554111716, "mean_token_accuracy": 0.8698828101158143, "step": 11390 }, { "epoch": 0.20265585835422112, "grad_norm": 1.1469838940071369, "learning_rate": 0.0001, "loss": 0.9688, "mean_abs_error": 1156.5841250043388, "mean_abs_error_last_10": 586.450119493562, "mean_abs_error_last_25": 712.5706044933718, "mean_abs_error_last_50": 849.3573407362977, "mean_pred_prob": 0.01985246988479048, "mean_pred_prob_last_10": 0.10617660112038721, "mean_pred_prob_last_25": 0.057461293810047206, "mean_pred_prob_last_50": 0.03452226358349435, "mean_token_accuracy": 0.8692553460597991, "step": 11400 }, { "epoch": 0.20283362665102306, "grad_norm": 1.2667526367874031, "learning_rate": 0.0001, "loss": 0.8016, "mean_abs_error": 589.7525548836427, "mean_abs_error_last_10": 188.93618595405772, "mean_abs_error_last_25": 287.785005854527, "mean_abs_error_last_50": 335.6670634533837, "mean_pred_prob": 0.022428348870016636, "mean_pred_prob_last_10": 0.1186062490567565, "mean_pred_prob_last_25": 0.06374907176941633, "mean_pred_prob_last_50": 0.03849151525646448, "mean_token_accuracy": 0.8736949443817139, "step": 11410 }, { "epoch": 0.203011394947825, "grad_norm": 1.1943380202408347, "learning_rate": 0.0001, "loss": 0.96, "mean_abs_error": 579.7446716976118, "mean_abs_error_last_10": 241.9581675343896, "mean_abs_error_last_25": 356.66810083994835, "mean_abs_error_last_50": 438.9617117459531, "mean_pred_prob": 0.016263558203354476, "mean_pred_prob_last_10": 0.09290844537317752, "mean_pred_prob_last_25": 0.04918693606741727, "mean_pred_prob_last_50": 0.028361658565700054, "mean_token_accuracy": 0.8739218473434448, "step": 11420 }, { "epoch": 0.20318916324462696, "grad_norm": 0.9204080943218399, "learning_rate": 0.0001, "loss": 0.8945, "mean_abs_error": 873.7385260417175, "mean_abs_error_last_10": 394.28979715169487, "mean_abs_error_last_25": 497.7701201561752, "mean_abs_error_last_50": 641.6676460965889, "mean_pred_prob": 0.03599365951376967, "mean_pred_prob_last_10": 0.195262576994719, "mean_pred_prob_last_25": 0.10302964732109103, "mean_pred_prob_last_50": 0.06164304989215452, "mean_token_accuracy": 0.8683933019638062, "step": 11430 }, { "epoch": 0.2033669315414289, "grad_norm": 2.3650293733458176, "learning_rate": 0.0001, "loss": 0.974, "mean_abs_error": 1018.063197863615, "mean_abs_error_last_10": 358.64820220233054, "mean_abs_error_last_25": 477.68755966318713, "mean_abs_error_last_50": 697.3089574038513, "mean_pred_prob": 0.02035800231387839, "mean_pred_prob_last_10": 0.11602053520036862, "mean_pred_prob_last_25": 0.05849454795825295, "mean_pred_prob_last_50": 0.03497867565602064, "mean_token_accuracy": 0.8769116103649139, "step": 11440 }, { "epoch": 0.20354469983823084, "grad_norm": 1.7317998434619464, "learning_rate": 0.0001, "loss": 0.8538, "mean_abs_error": 435.47482003351763, "mean_abs_error_last_10": 265.91512039851585, "mean_abs_error_last_25": 288.66514334769386, "mean_abs_error_last_50": 319.8932436645202, "mean_pred_prob": 0.02998356653843075, "mean_pred_prob_last_10": 0.1560944851487875, "mean_pred_prob_last_25": 0.08446579836308957, "mean_pred_prob_last_50": 0.05136880301870406, "mean_token_accuracy": 0.8672271609306336, "step": 11450 }, { "epoch": 0.2037224681350328, "grad_norm": 0.988626357958139, "learning_rate": 0.0001, "loss": 0.9042, "mean_abs_error": 402.04697432828476, "mean_abs_error_last_10": 83.575036109261, "mean_abs_error_last_25": 198.72406807294152, "mean_abs_error_last_50": 288.1316800887375, "mean_pred_prob": 0.03618712313473225, "mean_pred_prob_last_10": 0.18005412444472313, "mean_pred_prob_last_25": 0.10083128120750189, "mean_pred_prob_last_50": 0.060734827117994425, "mean_token_accuracy": 0.8766677796840667, "step": 11460 }, { "epoch": 0.20390023643183475, "grad_norm": 1.968583663833732, "learning_rate": 0.0001, "loss": 0.9193, "mean_abs_error": 489.6516867969937, "mean_abs_error_last_10": 90.59227196484952, "mean_abs_error_last_25": 117.68709407814143, "mean_abs_error_last_50": 207.44739306291075, "mean_pred_prob": 0.03217893107794225, "mean_pred_prob_last_10": 0.16570237157866358, "mean_pred_prob_last_25": 0.08971567694097757, "mean_pred_prob_last_50": 0.05473704505711794, "mean_token_accuracy": 0.8670540392398834, "step": 11470 }, { "epoch": 0.20407800472863669, "grad_norm": 4.035073359067404, "learning_rate": 0.0001, "loss": 0.9001, "mean_abs_error": 1870.5018985464444, "mean_abs_error_last_10": 974.4968362015472, "mean_abs_error_last_25": 1140.7846959982496, "mean_abs_error_last_50": 1404.992830309606, "mean_pred_prob": 0.0120474863491836, "mean_pred_prob_last_10": 0.0709099644300295, "mean_pred_prob_last_25": 0.03582749442430213, "mean_pred_prob_last_50": 0.021069799628457987, "mean_token_accuracy": 0.8786756515502929, "step": 11480 }, { "epoch": 0.20425577302543865, "grad_norm": 1.8566336093886426, "learning_rate": 0.0001, "loss": 0.9077, "mean_abs_error": 1146.1037744907658, "mean_abs_error_last_10": 437.0713201262267, "mean_abs_error_last_25": 558.9712971977981, "mean_abs_error_last_50": 732.3850320951767, "mean_pred_prob": 0.03987895030004438, "mean_pred_prob_last_10": 0.20115034048212693, "mean_pred_prob_last_25": 0.11293939856695942, "mean_pred_prob_last_50": 0.06837432276224717, "mean_token_accuracy": 0.8707727909088134, "step": 11490 }, { "epoch": 0.2044335413222406, "grad_norm": 2.0365217808407454, "learning_rate": 0.0001, "loss": 0.9148, "mean_abs_error": 539.1455544098233, "mean_abs_error_last_10": 337.06018142305334, "mean_abs_error_last_25": 298.6980181157484, "mean_abs_error_last_50": 318.4623989272926, "mean_pred_prob": 0.03173894794890657, "mean_pred_prob_last_10": 0.15327961503062398, "mean_pred_prob_last_25": 0.08811674322932958, "mean_pred_prob_last_50": 0.05419514407403767, "mean_token_accuracy": 0.8687439143657685, "step": 11500 }, { "epoch": 0.20461130961904253, "grad_norm": 3.0002017909470613, "learning_rate": 0.0001, "loss": 1.0449, "mean_abs_error": 1392.2191554742676, "mean_abs_error_last_10": 462.60755166222543, "mean_abs_error_last_25": 608.0727609511781, "mean_abs_error_last_50": 854.0165861230405, "mean_pred_prob": 0.015717830508947372, "mean_pred_prob_last_10": 0.0874187336070463, "mean_pred_prob_last_25": 0.04526258367695846, "mean_pred_prob_last_50": 0.027156822293181903, "mean_token_accuracy": 0.8607105851173401, "step": 11510 }, { "epoch": 0.2047890779158445, "grad_norm": 0.8716716851242272, "learning_rate": 0.0001, "loss": 0.9307, "mean_abs_error": 1462.6598057739598, "mean_abs_error_last_10": 401.87214449300933, "mean_abs_error_last_25": 585.7835605100396, "mean_abs_error_last_50": 921.5309004745393, "mean_pred_prob": 0.02492640517593827, "mean_pred_prob_last_10": 0.1279449574125465, "mean_pred_prob_last_25": 0.06754362702195067, "mean_pred_prob_last_50": 0.04184209186059888, "mean_token_accuracy": 0.869555675983429, "step": 11520 }, { "epoch": 0.20496684621264644, "grad_norm": 2.162328506300813, "learning_rate": 0.0001, "loss": 0.9362, "mean_abs_error": 275.84369220209175, "mean_abs_error_last_10": 147.76500996735118, "mean_abs_error_last_25": 165.510798626341, "mean_abs_error_last_50": 207.83761027530795, "mean_pred_prob": 0.03658590068225749, "mean_pred_prob_last_10": 0.16707379680592566, "mean_pred_prob_last_25": 0.09819218126358464, "mean_pred_prob_last_50": 0.060779649147298186, "mean_token_accuracy": 0.858466374874115, "step": 11530 }, { "epoch": 0.20514461450944838, "grad_norm": 1.6898270258904402, "learning_rate": 0.0001, "loss": 0.9127, "mean_abs_error": 797.2968155047081, "mean_abs_error_last_10": 325.278425602683, "mean_abs_error_last_25": 371.7019758197199, "mean_abs_error_last_50": 452.1198859315573, "mean_pred_prob": 0.014172308798879384, "mean_pred_prob_last_10": 0.07445647763088345, "mean_pred_prob_last_25": 0.04022519844584167, "mean_pred_prob_last_50": 0.02419558197725564, "mean_token_accuracy": 0.8714510560035705, "step": 11540 }, { "epoch": 0.20532238280625034, "grad_norm": 0.9579375728214515, "learning_rate": 0.0001, "loss": 0.8952, "mean_abs_error": 689.340235031193, "mean_abs_error_last_10": 224.6613937671391, "mean_abs_error_last_25": 312.13246259505985, "mean_abs_error_last_50": 427.3596911114162, "mean_pred_prob": 0.04306341695773881, "mean_pred_prob_last_10": 0.21508560960064643, "mean_pred_prob_last_25": 0.12032570402952843, "mean_pred_prob_last_50": 0.0731003895343747, "mean_token_accuracy": 0.8731038510799408, "step": 11550 }, { "epoch": 0.20550015110305228, "grad_norm": 1.4425252391671002, "learning_rate": 0.0001, "loss": 0.8701, "mean_abs_error": 565.7886535762366, "mean_abs_error_last_10": 178.0601133897099, "mean_abs_error_last_25": 198.62609657916175, "mean_abs_error_last_50": 322.1303971260248, "mean_pred_prob": 0.025633049418684096, "mean_pred_prob_last_10": 0.14569594992790372, "mean_pred_prob_last_25": 0.07627497628564014, "mean_pred_prob_last_50": 0.04468687869375572, "mean_token_accuracy": 0.8855255305767059, "step": 11560 }, { "epoch": 0.20567791939985422, "grad_norm": 1.3376270981974965, "learning_rate": 0.0001, "loss": 0.9361, "mean_abs_error": 777.8376621331465, "mean_abs_error_last_10": 101.55206896585814, "mean_abs_error_last_25": 187.27869542998934, "mean_abs_error_last_50": 356.1899388925591, "mean_pred_prob": 0.025294053286779673, "mean_pred_prob_last_10": 0.12568302056752145, "mean_pred_prob_last_25": 0.06981553708901629, "mean_pred_prob_last_50": 0.043028145970311016, "mean_token_accuracy": 0.8793414235115051, "step": 11570 }, { "epoch": 0.2058556876966562, "grad_norm": 2.2768506089846543, "learning_rate": 0.0001, "loss": 0.9159, "mean_abs_error": 367.74155354929275, "mean_abs_error_last_10": 101.19198737873948, "mean_abs_error_last_25": 150.91647483368334, "mean_abs_error_last_50": 217.98418395055143, "mean_pred_prob": 0.04747989673633128, "mean_pred_prob_last_10": 0.2380339432042092, "mean_pred_prob_last_25": 0.1368200274882838, "mean_pred_prob_last_50": 0.0821427098941058, "mean_token_accuracy": 0.8755077004432679, "step": 11580 }, { "epoch": 0.20603345599345813, "grad_norm": 1.1598420724553826, "learning_rate": 0.0001, "loss": 0.8785, "mean_abs_error": 174.29718456824952, "mean_abs_error_last_10": 44.32662957187081, "mean_abs_error_last_25": 68.76642297542034, "mean_abs_error_last_50": 98.54008526043904, "mean_pred_prob": 0.04863280551508069, "mean_pred_prob_last_10": 0.2450232483446598, "mean_pred_prob_last_25": 0.1363038571551442, "mean_pred_prob_last_50": 0.08307916792109608, "mean_token_accuracy": 0.870741456747055, "step": 11590 }, { "epoch": 0.20621122429026006, "grad_norm": 2.227010878909355, "learning_rate": 0.0001, "loss": 0.8984, "mean_abs_error": 295.9353265473315, "mean_abs_error_last_10": 167.53535201968288, "mean_abs_error_last_25": 164.76117149560653, "mean_abs_error_last_50": 202.22575800983958, "mean_pred_prob": 0.029385585570707917, "mean_pred_prob_last_10": 0.15130245443433524, "mean_pred_prob_last_25": 0.08310516849160195, "mean_pred_prob_last_50": 0.05033079888671636, "mean_token_accuracy": 0.8837242245674133, "step": 11600 }, { "epoch": 0.20638899258706203, "grad_norm": 1.9622510963615434, "learning_rate": 0.0001, "loss": 0.9319, "mean_abs_error": 1467.881348612902, "mean_abs_error_last_10": 751.4252049607893, "mean_abs_error_last_25": 842.6021342109964, "mean_abs_error_last_50": 1081.1232031581108, "mean_pred_prob": 0.026131113599694798, "mean_pred_prob_last_10": 0.13392427978105842, "mean_pred_prob_last_25": 0.07501666481548455, "mean_pred_prob_last_50": 0.04530697252921527, "mean_token_accuracy": 0.8649958491325378, "step": 11610 }, { "epoch": 0.20656676088386397, "grad_norm": 2.6742725428585548, "learning_rate": 0.0001, "loss": 0.8975, "mean_abs_error": 810.6378092506701, "mean_abs_error_last_10": 377.7697393438639, "mean_abs_error_last_25": 439.2159226870102, "mean_abs_error_last_50": 580.3049726965298, "mean_pred_prob": 0.030699370510410516, "mean_pred_prob_last_10": 0.159664971427992, "mean_pred_prob_last_25": 0.0885250057792291, "mean_pred_prob_last_50": 0.05256851418525912, "mean_token_accuracy": 0.8663679242134095, "step": 11620 }, { "epoch": 0.2067445291806659, "grad_norm": 2.4550934290642927, "learning_rate": 0.0001, "loss": 0.981, "mean_abs_error": 848.9771954921667, "mean_abs_error_last_10": 252.74274299677177, "mean_abs_error_last_25": 478.1510691895921, "mean_abs_error_last_50": 629.2893561310871, "mean_pred_prob": 0.024520535580813886, "mean_pred_prob_last_10": 0.12780540923122316, "mean_pred_prob_last_25": 0.06883988024201244, "mean_pred_prob_last_50": 0.04095005757408217, "mean_token_accuracy": 0.8685608863830566, "step": 11630 }, { "epoch": 0.20692229747746788, "grad_norm": 1.3032843399449858, "learning_rate": 0.0001, "loss": 0.8615, "mean_abs_error": 518.8481989388613, "mean_abs_error_last_10": 136.80437290369778, "mean_abs_error_last_25": 234.37405842916914, "mean_abs_error_last_50": 330.02528580527064, "mean_pred_prob": 0.03994699236354791, "mean_pred_prob_last_10": 0.20611927572172134, "mean_pred_prob_last_25": 0.11471885043429211, "mean_pred_prob_last_50": 0.06895323094213382, "mean_token_accuracy": 0.8748382449150085, "step": 11640 }, { "epoch": 0.20710006577426981, "grad_norm": 2.945875344474878, "learning_rate": 0.0001, "loss": 0.9759, "mean_abs_error": 451.06097290064207, "mean_abs_error_last_10": 170.29724796499445, "mean_abs_error_last_25": 217.09415860240682, "mean_abs_error_last_50": 267.9224176767698, "mean_pred_prob": 0.02649832561146468, "mean_pred_prob_last_10": 0.14554252810776233, "mean_pred_prob_last_25": 0.0771257346495986, "mean_pred_prob_last_50": 0.045903612906113264, "mean_token_accuracy": 0.8630534052848816, "step": 11650 }, { "epoch": 0.20727783407107175, "grad_norm": 2.1704842095467973, "learning_rate": 0.0001, "loss": 0.8427, "mean_abs_error": 482.15728940874016, "mean_abs_error_last_10": 259.65968628592384, "mean_abs_error_last_25": 336.94632917675574, "mean_abs_error_last_50": 327.16689688905626, "mean_pred_prob": 0.031410619302187116, "mean_pred_prob_last_10": 0.16699516519438476, "mean_pred_prob_last_25": 0.0879304934758693, "mean_pred_prob_last_50": 0.05377566659590229, "mean_token_accuracy": 0.8702224791049957, "step": 11660 }, { "epoch": 0.20745560236787372, "grad_norm": 0.8501203800719329, "learning_rate": 0.0001, "loss": 0.8334, "mean_abs_error": 589.4397637198376, "mean_abs_error_last_10": 213.53261366419923, "mean_abs_error_last_25": 302.92553007193794, "mean_abs_error_last_50": 417.1307814593723, "mean_pred_prob": 0.03619453281862661, "mean_pred_prob_last_10": 0.18362397794844582, "mean_pred_prob_last_25": 0.1040529171063099, "mean_pred_prob_last_50": 0.062091027735732496, "mean_token_accuracy": 0.8715105712413788, "step": 11670 }, { "epoch": 0.20763337066467566, "grad_norm": 1.7160477887324055, "learning_rate": 0.0001, "loss": 0.9022, "mean_abs_error": 1420.186207031207, "mean_abs_error_last_10": 647.97150321616, "mean_abs_error_last_25": 763.8456338871317, "mean_abs_error_last_50": 1023.56315204696, "mean_pred_prob": 0.03836535905720666, "mean_pred_prob_last_10": 0.1697756049761665, "mean_pred_prob_last_25": 0.09708157923887484, "mean_pred_prob_last_50": 0.061907166622404475, "mean_token_accuracy": 0.8688500642776489, "step": 11680 }, { "epoch": 0.2078111389614776, "grad_norm": 1.2698588118011087, "learning_rate": 0.0001, "loss": 0.9117, "mean_abs_error": 1266.4369297104645, "mean_abs_error_last_10": 616.7683482222858, "mean_abs_error_last_25": 676.9993041751317, "mean_abs_error_last_50": 828.8768567687754, "mean_pred_prob": 0.01633098365855403, "mean_pred_prob_last_10": 0.09019362067920156, "mean_pred_prob_last_25": 0.04699059972190298, "mean_pred_prob_last_50": 0.028127858581137845, "mean_token_accuracy": 0.8621604144573212, "step": 11690 }, { "epoch": 0.20798890725827957, "grad_norm": 0.6689258110365994, "learning_rate": 0.0001, "loss": 0.8636, "mean_abs_error": 532.8293871580821, "mean_abs_error_last_10": 187.11975460793226, "mean_abs_error_last_25": 291.6357356603268, "mean_abs_error_last_50": 383.2031945679362, "mean_pred_prob": 0.024789530446287244, "mean_pred_prob_last_10": 0.13988553350791336, "mean_pred_prob_last_25": 0.07080913840909489, "mean_pred_prob_last_50": 0.042516694474034014, "mean_token_accuracy": 0.8774095952510834, "step": 11700 }, { "epoch": 0.2081666755550815, "grad_norm": 1.7133720378962347, "learning_rate": 0.0001, "loss": 0.9519, "mean_abs_error": 607.3836746202934, "mean_abs_error_last_10": 91.8101141756372, "mean_abs_error_last_25": 189.85019972686288, "mean_abs_error_last_50": 331.51988265453133, "mean_pred_prob": 0.03687190898344852, "mean_pred_prob_last_10": 0.19488099133595824, "mean_pred_prob_last_25": 0.10583558201324195, "mean_pred_prob_last_50": 0.06347130064386874, "mean_token_accuracy": 0.8634154319763183, "step": 11710 }, { "epoch": 0.20834444385188344, "grad_norm": 1.3136749633685247, "learning_rate": 0.0001, "loss": 0.8624, "mean_abs_error": 376.4879995814432, "mean_abs_error_last_10": 75.80205435867718, "mean_abs_error_last_25": 131.77131932053004, "mean_abs_error_last_50": 187.67099284001475, "mean_pred_prob": 0.027292555687017737, "mean_pred_prob_last_10": 0.14867089092731475, "mean_pred_prob_last_25": 0.08039444014430046, "mean_pred_prob_last_50": 0.04742163186892867, "mean_token_accuracy": 0.8738796591758728, "step": 11720 }, { "epoch": 0.2085222121486854, "grad_norm": 2.5705137984462545, "learning_rate": 0.0001, "loss": 0.8836, "mean_abs_error": 158.76064323271788, "mean_abs_error_last_10": 68.23742968811743, "mean_abs_error_last_25": 115.78978455644858, "mean_abs_error_last_50": 138.83518831645162, "mean_pred_prob": 0.03417698545381427, "mean_pred_prob_last_10": 0.18034566082060338, "mean_pred_prob_last_25": 0.09484649300575257, "mean_pred_prob_last_50": 0.05805743932723999, "mean_token_accuracy": 0.8780906856060028, "step": 11730 }, { "epoch": 0.20869998044548735, "grad_norm": 1.6582314126247581, "learning_rate": 0.0001, "loss": 1.006, "mean_abs_error": 298.0528074164297, "mean_abs_error_last_10": 124.18066831530291, "mean_abs_error_last_25": 196.8089125861085, "mean_abs_error_last_50": 243.97125871994763, "mean_pred_prob": 0.0433656457811594, "mean_pred_prob_last_10": 0.19909239187836647, "mean_pred_prob_last_25": 0.11306123472750187, "mean_pred_prob_last_50": 0.07120178947225213, "mean_token_accuracy": 0.8652605593204499, "step": 11740 }, { "epoch": 0.2088777487422893, "grad_norm": 1.833842043108104, "learning_rate": 0.0001, "loss": 0.8921, "mean_abs_error": 810.4440833543565, "mean_abs_error_last_10": 289.6396862759308, "mean_abs_error_last_25": 436.3809486895053, "mean_abs_error_last_50": 573.0386248338929, "mean_pred_prob": 0.022780283889733254, "mean_pred_prob_last_10": 0.1197087058564648, "mean_pred_prob_last_25": 0.06287369575584308, "mean_pred_prob_last_50": 0.038207075028913094, "mean_token_accuracy": 0.8719025552272797, "step": 11750 }, { "epoch": 0.20905551703909125, "grad_norm": 1.206549161190342, "learning_rate": 0.0001, "loss": 0.8391, "mean_abs_error": 578.7903818354864, "mean_abs_error_last_10": 224.1548828382412, "mean_abs_error_last_25": 284.75177391344994, "mean_abs_error_last_50": 351.54892465372586, "mean_pred_prob": 0.027851469255983828, "mean_pred_prob_last_10": 0.15100044701248408, "mean_pred_prob_last_25": 0.08006727499887348, "mean_pred_prob_last_50": 0.04795614215545356, "mean_token_accuracy": 0.8746301054954528, "step": 11760 }, { "epoch": 0.2092332853358932, "grad_norm": 2.0771145218180758, "learning_rate": 0.0001, "loss": 0.9534, "mean_abs_error": 299.04618895088373, "mean_abs_error_last_10": 76.9948263576542, "mean_abs_error_last_25": 110.33089062059908, "mean_abs_error_last_50": 173.75542504163502, "mean_pred_prob": 0.03276241405401379, "mean_pred_prob_last_10": 0.17016585934907197, "mean_pred_prob_last_25": 0.09269735394045711, "mean_pred_prob_last_50": 0.055918258940801026, "mean_token_accuracy": 0.868646764755249, "step": 11770 }, { "epoch": 0.20941105363269513, "grad_norm": 1.8855145466086458, "learning_rate": 0.0001, "loss": 0.8619, "mean_abs_error": 687.2330950933986, "mean_abs_error_last_10": 747.3183120913096, "mean_abs_error_last_25": 787.1813805441122, "mean_abs_error_last_50": 644.5018410237964, "mean_pred_prob": 0.03376431326032616, "mean_pred_prob_last_10": 0.16178323344793172, "mean_pred_prob_last_25": 0.0923189205583185, "mean_pred_prob_last_50": 0.05675273227971047, "mean_token_accuracy": 0.8774254381656647, "step": 11780 }, { "epoch": 0.2095888219294971, "grad_norm": 1.62219203941866, "learning_rate": 0.0001, "loss": 0.9223, "mean_abs_error": 323.03525538264705, "mean_abs_error_last_10": 113.36393709144743, "mean_abs_error_last_25": 120.18505470162047, "mean_abs_error_last_50": 197.47878976888364, "mean_pred_prob": 0.03709734668955207, "mean_pred_prob_last_10": 0.18707998227328063, "mean_pred_prob_last_25": 0.10572175206616521, "mean_pred_prob_last_50": 0.0641197550110519, "mean_token_accuracy": 0.870939576625824, "step": 11790 }, { "epoch": 0.20976659022629904, "grad_norm": 1.2795120202212116, "learning_rate": 0.0001, "loss": 0.8488, "mean_abs_error": 381.06559703257057, "mean_abs_error_last_10": 83.88204941618777, "mean_abs_error_last_25": 126.57241308637126, "mean_abs_error_last_50": 214.7094094993965, "mean_pred_prob": 0.029765930166468023, "mean_pred_prob_last_10": 0.16767589282244444, "mean_pred_prob_last_25": 0.08737443508580327, "mean_pred_prob_last_50": 0.0519922150298953, "mean_token_accuracy": 0.8759040117263794, "step": 11800 }, { "epoch": 0.20994435852310098, "grad_norm": 1.7019056077516488, "learning_rate": 0.0001, "loss": 0.9572, "mean_abs_error": 1278.2068048809779, "mean_abs_error_last_10": 558.7638397398262, "mean_abs_error_last_25": 628.217415028355, "mean_abs_error_last_50": 869.5669377614615, "mean_pred_prob": 0.03602403218974359, "mean_pred_prob_last_10": 0.17851545487937984, "mean_pred_prob_last_25": 0.09845278953143861, "mean_pred_prob_last_50": 0.06060732159530744, "mean_token_accuracy": 0.8715672194957733, "step": 11810 }, { "epoch": 0.21012212681990294, "grad_norm": 2.34013195609284, "learning_rate": 0.0001, "loss": 0.8333, "mean_abs_error": 141.98836716904825, "mean_abs_error_last_10": 109.93361496169591, "mean_abs_error_last_25": 97.52218316926835, "mean_abs_error_last_50": 111.7176984798177, "mean_pred_prob": 0.04707644176669419, "mean_pred_prob_last_10": 0.23482532147318125, "mean_pred_prob_last_25": 0.1275540616363287, "mean_pred_prob_last_50": 0.07905022748745978, "mean_token_accuracy": 0.8694454193115234, "step": 11820 }, { "epoch": 0.21029989511670488, "grad_norm": 1.865119046966025, "learning_rate": 0.0001, "loss": 0.9281, "mean_abs_error": 84.91962409387648, "mean_abs_error_last_10": 17.459162890086468, "mean_abs_error_last_25": 27.719841185727745, "mean_abs_error_last_50": 42.083302577211796, "mean_pred_prob": 0.0476031880825758, "mean_pred_prob_last_10": 0.24702619016170502, "mean_pred_prob_last_25": 0.13606655448675156, "mean_pred_prob_last_50": 0.08178750090301037, "mean_token_accuracy": 0.8738372325897217, "step": 11830 }, { "epoch": 0.21047766341350682, "grad_norm": 0.9956536390902552, "learning_rate": 0.0001, "loss": 0.8691, "mean_abs_error": 300.97157058704533, "mean_abs_error_last_10": 98.0595122088724, "mean_abs_error_last_25": 144.73209642864876, "mean_abs_error_last_50": 177.7035505583172, "mean_pred_prob": 0.034835299057886, "mean_pred_prob_last_10": 0.16884384974837302, "mean_pred_prob_last_25": 0.09727880787104368, "mean_pred_prob_last_50": 0.05960243716835976, "mean_token_accuracy": 0.8833135604858399, "step": 11840 }, { "epoch": 0.2106554317103088, "grad_norm": 1.883514143829906, "learning_rate": 0.0001, "loss": 0.9377, "mean_abs_error": 384.98194648504307, "mean_abs_error_last_10": 59.569001848848, "mean_abs_error_last_25": 109.16058922594695, "mean_abs_error_last_50": 194.01824513722045, "mean_pred_prob": 0.024044283898547292, "mean_pred_prob_last_10": 0.13921173177659513, "mean_pred_prob_last_25": 0.07165742870420218, "mean_pred_prob_last_50": 0.04216693858616054, "mean_token_accuracy": 0.8692480862140656, "step": 11850 }, { "epoch": 0.21083320000711073, "grad_norm": 1.4002486978161848, "learning_rate": 0.0001, "loss": 0.852, "mean_abs_error": 1081.5780201772272, "mean_abs_error_last_10": 446.5710891047032, "mean_abs_error_last_25": 599.5267864379026, "mean_abs_error_last_50": 751.1063037591871, "mean_pred_prob": 0.024456515890778973, "mean_pred_prob_last_10": 0.13897562901838684, "mean_pred_prob_last_25": 0.07277657961531077, "mean_pred_prob_last_50": 0.04268435638514347, "mean_token_accuracy": 0.8702242553234101, "step": 11860 }, { "epoch": 0.2110109683039127, "grad_norm": 2.193028805172519, "learning_rate": 0.0001, "loss": 0.8667, "mean_abs_error": 345.094964621559, "mean_abs_error_last_10": 86.60657650484195, "mean_abs_error_last_25": 128.83685106802344, "mean_abs_error_last_50": 218.1172595455865, "mean_pred_prob": 0.030001259758137167, "mean_pred_prob_last_10": 0.16256921403110028, "mean_pred_prob_last_25": 0.08647894011810422, "mean_pred_prob_last_50": 0.05229145297780633, "mean_token_accuracy": 0.8752882599830627, "step": 11870 }, { "epoch": 0.21118873660071463, "grad_norm": 0.8013529463146414, "learning_rate": 0.0001, "loss": 0.8232, "mean_abs_error": 351.9861646030432, "mean_abs_error_last_10": 98.2470360353642, "mean_abs_error_last_25": 161.8073162279491, "mean_abs_error_last_50": 207.2567942372494, "mean_pred_prob": 0.030301515385508536, "mean_pred_prob_last_10": 0.15224042441695929, "mean_pred_prob_last_25": 0.08418765468522907, "mean_pred_prob_last_50": 0.05148760206066072, "mean_token_accuracy": 0.8767567574977875, "step": 11880 }, { "epoch": 0.21136650489751657, "grad_norm": 1.6761831495409976, "learning_rate": 0.0001, "loss": 0.8112, "mean_abs_error": 331.73076952984155, "mean_abs_error_last_10": 140.8035975875248, "mean_abs_error_last_25": 164.5999386000964, "mean_abs_error_last_50": 174.85594717751502, "mean_pred_prob": 0.03910260980483145, "mean_pred_prob_last_10": 0.20595889389514924, "mean_pred_prob_last_25": 0.108578621619381, "mean_pred_prob_last_50": 0.06577329862629995, "mean_token_accuracy": 0.8844096183776855, "step": 11890 }, { "epoch": 0.21154427319431854, "grad_norm": 1.5605649453236818, "learning_rate": 0.0001, "loss": 0.9502, "mean_abs_error": 528.3202320750609, "mean_abs_error_last_10": 240.01729464808926, "mean_abs_error_last_25": 305.05183538722116, "mean_abs_error_last_50": 356.86818395961075, "mean_pred_prob": 0.031221540999831633, "mean_pred_prob_last_10": 0.16960947931511328, "mean_pred_prob_last_25": 0.09016107801580801, "mean_pred_prob_last_50": 0.054109145398251714, "mean_token_accuracy": 0.8717269897460938, "step": 11900 }, { "epoch": 0.21172204149112048, "grad_norm": 1.2427092981267094, "learning_rate": 0.0001, "loss": 0.873, "mean_abs_error": 495.3027726278462, "mean_abs_error_last_10": 148.21801209197977, "mean_abs_error_last_25": 290.5459805656284, "mean_abs_error_last_50": 350.5451493989084, "mean_pred_prob": 0.027574619225924836, "mean_pred_prob_last_10": 0.14690505743492394, "mean_pred_prob_last_25": 0.07863961324328557, "mean_pred_prob_last_50": 0.04702147388015874, "mean_token_accuracy": 0.8644217371940612, "step": 11910 }, { "epoch": 0.21189980978792242, "grad_norm": 2.5048616152255825, "learning_rate": 0.0001, "loss": 0.84, "mean_abs_error": 710.4034428966092, "mean_abs_error_last_10": 259.14762966273076, "mean_abs_error_last_25": 308.4575652223145, "mean_abs_error_last_50": 426.1155660768721, "mean_pred_prob": 0.029602931428235023, "mean_pred_prob_last_10": 0.1433050338178873, "mean_pred_prob_last_25": 0.08303047413937747, "mean_pred_prob_last_50": 0.051062389835715295, "mean_token_accuracy": 0.8833662033081054, "step": 11920 }, { "epoch": 0.21207757808472438, "grad_norm": 1.3914579893213528, "learning_rate": 0.0001, "loss": 0.8728, "mean_abs_error": 568.2633652006255, "mean_abs_error_last_10": 228.80772964210527, "mean_abs_error_last_25": 314.80354427196687, "mean_abs_error_last_50": 401.11112385998604, "mean_pred_prob": 0.034141244628699496, "mean_pred_prob_last_10": 0.18089362488826738, "mean_pred_prob_last_25": 0.09641452333307825, "mean_pred_prob_last_50": 0.057947093847906216, "mean_token_accuracy": 0.8760661900043487, "step": 11930 }, { "epoch": 0.21225534638152632, "grad_norm": 1.4496936154867448, "learning_rate": 0.0001, "loss": 0.9835, "mean_abs_error": 1114.018777496861, "mean_abs_error_last_10": 509.26584100113143, "mean_abs_error_last_25": 622.3200837663719, "mean_abs_error_last_50": 876.6464420648223, "mean_pred_prob": 0.015141879059956408, "mean_pred_prob_last_10": 0.087033585307654, "mean_pred_prob_last_25": 0.04486298102128785, "mean_pred_prob_last_50": 0.025990688434103505, "mean_token_accuracy": 0.8691661894321442, "step": 11940 }, { "epoch": 0.21243311467832826, "grad_norm": 1.0330897544026971, "learning_rate": 0.0001, "loss": 0.8811, "mean_abs_error": 138.9447394613162, "mean_abs_error_last_10": 35.04314216994391, "mean_abs_error_last_25": 88.26541849362484, "mean_abs_error_last_50": 112.8976818651817, "mean_pred_prob": 0.056838497146964075, "mean_pred_prob_last_10": 0.27127439230680467, "mean_pred_prob_last_25": 0.15546264965087175, "mean_pred_prob_last_50": 0.09607106531038881, "mean_token_accuracy": 0.8649341583251953, "step": 11950 }, { "epoch": 0.21261088297513023, "grad_norm": 0.7264236559354892, "learning_rate": 0.0001, "loss": 0.9624, "mean_abs_error": 531.5121799071818, "mean_abs_error_last_10": 216.54406886569774, "mean_abs_error_last_25": 301.67194170965314, "mean_abs_error_last_50": 370.04509072405756, "mean_pred_prob": 0.030687318224227055, "mean_pred_prob_last_10": 0.161654373339843, "mean_pred_prob_last_25": 0.08815347524359822, "mean_pred_prob_last_50": 0.052776839351281524, "mean_token_accuracy": 0.873429411649704, "step": 11960 }, { "epoch": 0.21278865127193217, "grad_norm": 1.4630131473409151, "learning_rate": 0.0001, "loss": 0.9831, "mean_abs_error": 503.3580601441587, "mean_abs_error_last_10": 134.81461467234402, "mean_abs_error_last_25": 220.67330313814614, "mean_abs_error_last_50": 304.39683127157366, "mean_pred_prob": 0.03756377125973813, "mean_pred_prob_last_10": 0.17863762949127704, "mean_pred_prob_last_25": 0.10115101875271648, "mean_pred_prob_last_50": 0.06313878720393404, "mean_token_accuracy": 0.8603512644767761, "step": 11970 }, { "epoch": 0.2129664195687341, "grad_norm": 1.837826443057087, "learning_rate": 0.0001, "loss": 0.9609, "mean_abs_error": 364.99602459288565, "mean_abs_error_last_10": 111.76470474601379, "mean_abs_error_last_25": 221.8601229257687, "mean_abs_error_last_50": 276.7173831436068, "mean_pred_prob": 0.03303151414729655, "mean_pred_prob_last_10": 0.17720238715410233, "mean_pred_prob_last_25": 0.09481735806912184, "mean_pred_prob_last_50": 0.05701871938072145, "mean_token_accuracy": 0.880053174495697, "step": 11980 }, { "epoch": 0.21314418786553607, "grad_norm": 2.7689937751506353, "learning_rate": 0.0001, "loss": 0.8313, "mean_abs_error": 471.01160080939127, "mean_abs_error_last_10": 128.51122556210606, "mean_abs_error_last_25": 187.55146085338006, "mean_abs_error_last_50": 294.84725097093656, "mean_pred_prob": 0.03903324269340373, "mean_pred_prob_last_10": 0.1983731704065576, "mean_pred_prob_last_25": 0.10914467981783674, "mean_pred_prob_last_50": 0.06660091944504529, "mean_token_accuracy": 0.8774777770042419, "step": 11990 }, { "epoch": 0.213321956162338, "grad_norm": 2.9150121661105506, "learning_rate": 0.0001, "loss": 0.8402, "mean_abs_error": 208.7433138735252, "mean_abs_error_last_10": 26.202773191924923, "mean_abs_error_last_25": 52.256642733554955, "mean_abs_error_last_50": 106.08837911897577, "mean_pred_prob": 0.03510751649737358, "mean_pred_prob_last_10": 0.18568985313177108, "mean_pred_prob_last_25": 0.10127130132168531, "mean_pred_prob_last_50": 0.06073639951646328, "mean_token_accuracy": 0.8784161388874054, "step": 12000 }, { "epoch": 0.21349972445913995, "grad_norm": 1.176232792412275, "learning_rate": 0.0001, "loss": 0.8698, "mean_abs_error": 834.2798883515021, "mean_abs_error_last_10": 462.5388798700833, "mean_abs_error_last_25": 547.9871287544771, "mean_abs_error_last_50": 624.6900781943707, "mean_pred_prob": 0.034460165683412924, "mean_pred_prob_last_10": 0.1701855749008246, "mean_pred_prob_last_25": 0.09492736140673515, "mean_pred_prob_last_50": 0.058423302852315825, "mean_token_accuracy": 0.8719037890434265, "step": 12010 }, { "epoch": 0.21367749275594192, "grad_norm": 1.3303502448744504, "learning_rate": 0.0001, "loss": 0.9178, "mean_abs_error": 166.68915126190373, "mean_abs_error_last_10": 72.62059671536389, "mean_abs_error_last_25": 94.44458506948445, "mean_abs_error_last_50": 107.16907675285943, "mean_pred_prob": 0.050549618108198045, "mean_pred_prob_last_10": 0.24746068455278875, "mean_pred_prob_last_25": 0.1385601783171296, "mean_pred_prob_last_50": 0.08531993958167731, "mean_token_accuracy": 0.8756919860839844, "step": 12020 }, { "epoch": 0.21385526105274386, "grad_norm": 1.7697445938163092, "learning_rate": 0.0001, "loss": 0.8135, "mean_abs_error": 350.96152118317866, "mean_abs_error_last_10": 184.25968054467845, "mean_abs_error_last_25": 156.309548948608, "mean_abs_error_last_50": 209.02348942198228, "mean_pred_prob": 0.03803204589057714, "mean_pred_prob_last_10": 0.1800553185865283, "mean_pred_prob_last_25": 0.1026305228471756, "mean_pred_prob_last_50": 0.06437746961601079, "mean_token_accuracy": 0.8807295382022857, "step": 12030 }, { "epoch": 0.2140330293495458, "grad_norm": 0.9911133143510841, "learning_rate": 0.0001, "loss": 0.9343, "mean_abs_error": 176.92582262124043, "mean_abs_error_last_10": 60.447762088854006, "mean_abs_error_last_25": 73.60430749103034, "mean_abs_error_last_50": 109.95801059131782, "mean_pred_prob": 0.04227097383700311, "mean_pred_prob_last_10": 0.2230904433876276, "mean_pred_prob_last_25": 0.11774134803563356, "mean_pred_prob_last_50": 0.07125563202425837, "mean_token_accuracy": 0.8768994927406311, "step": 12040 }, { "epoch": 0.21421079764634776, "grad_norm": 1.6937860205524382, "learning_rate": 0.0001, "loss": 0.8726, "mean_abs_error": 77.43402834747442, "mean_abs_error_last_10": 19.81306710160775, "mean_abs_error_last_25": 36.59939534139063, "mean_abs_error_last_50": 49.80797964491305, "mean_pred_prob": 0.0633300831541419, "mean_pred_prob_last_10": 0.28396191373467444, "mean_pred_prob_last_25": 0.16811933368444443, "mean_pred_prob_last_50": 0.10558753702789545, "mean_token_accuracy": 0.8752695500850678, "step": 12050 }, { "epoch": 0.2143885659431497, "grad_norm": 1.8663748395153066, "learning_rate": 0.0001, "loss": 0.857, "mean_abs_error": 263.5534432323868, "mean_abs_error_last_10": 86.45829989881511, "mean_abs_error_last_25": 102.2100068449475, "mean_abs_error_last_50": 178.7714960843055, "mean_pred_prob": 0.04668718494940549, "mean_pred_prob_last_10": 0.20856768693774938, "mean_pred_prob_last_25": 0.12765393909066916, "mean_pred_prob_last_50": 0.07942569484002888, "mean_token_accuracy": 0.8710464477539063, "step": 12060 }, { "epoch": 0.21456633423995164, "grad_norm": 1.354076634750099, "learning_rate": 0.0001, "loss": 0.9125, "mean_abs_error": 1339.392999140241, "mean_abs_error_last_10": 761.6378040221307, "mean_abs_error_last_25": 887.6733235706275, "mean_abs_error_last_50": 1003.2802602433412, "mean_pred_prob": 0.02292611462471541, "mean_pred_prob_last_10": 0.13462980499098193, "mean_pred_prob_last_25": 0.06767850366377388, "mean_pred_prob_last_50": 0.03954172268422553, "mean_token_accuracy": 0.8719287395477295, "step": 12070 }, { "epoch": 0.2147441025367536, "grad_norm": 1.7886629374249237, "learning_rate": 0.0001, "loss": 0.9096, "mean_abs_error": 506.1285768943627, "mean_abs_error_last_10": 186.1764492395, "mean_abs_error_last_25": 210.4044056108919, "mean_abs_error_last_50": 291.633997987223, "mean_pred_prob": 0.024912057718029245, "mean_pred_prob_last_10": 0.11831965015735477, "mean_pred_prob_last_25": 0.06746058595599606, "mean_pred_prob_last_50": 0.042288495122920725, "mean_token_accuracy": 0.8729872345924378, "step": 12080 }, { "epoch": 0.21492187083355554, "grad_norm": 1.9908842757309606, "learning_rate": 0.0001, "loss": 0.8687, "mean_abs_error": 749.9998157171746, "mean_abs_error_last_10": 133.15324970672935, "mean_abs_error_last_25": 208.9452372462387, "mean_abs_error_last_50": 383.77621762483153, "mean_pred_prob": 0.024895629921229557, "mean_pred_prob_last_10": 0.12534790923818945, "mean_pred_prob_last_25": 0.06991620828630403, "mean_pred_prob_last_50": 0.04214724687626585, "mean_token_accuracy": 0.8682981431484222, "step": 12090 }, { "epoch": 0.21509963913035748, "grad_norm": 2.2381580707969957, "learning_rate": 0.0001, "loss": 0.8483, "mean_abs_error": 645.6446124392044, "mean_abs_error_last_10": 197.4357484077066, "mean_abs_error_last_25": 338.8462848137557, "mean_abs_error_last_50": 415.5400791698056, "mean_pred_prob": 0.015100969024933875, "mean_pred_prob_last_10": 0.08469270813511684, "mean_pred_prob_last_25": 0.04350617997115478, "mean_pred_prob_last_50": 0.02598056342685595, "mean_token_accuracy": 0.8733287990093231, "step": 12100 }, { "epoch": 0.21527740742715945, "grad_norm": 1.4160342579644987, "learning_rate": 0.0001, "loss": 0.8208, "mean_abs_error": 497.8113653390541, "mean_abs_error_last_10": 111.10662259734754, "mean_abs_error_last_25": 170.13817435435425, "mean_abs_error_last_50": 263.2833945685688, "mean_pred_prob": 0.025766613730229437, "mean_pred_prob_last_10": 0.1390642525628209, "mean_pred_prob_last_25": 0.07506646439433098, "mean_pred_prob_last_50": 0.04487234577536583, "mean_token_accuracy": 0.8738782942295075, "step": 12110 }, { "epoch": 0.2154551757239614, "grad_norm": 2.1885411381787288, "learning_rate": 0.0001, "loss": 0.9838, "mean_abs_error": 256.73103254945994, "mean_abs_error_last_10": 105.95948430421659, "mean_abs_error_last_25": 196.64847367348506, "mean_abs_error_last_50": 204.72515443049065, "mean_pred_prob": 0.037507379148155454, "mean_pred_prob_last_10": 0.17944371178746224, "mean_pred_prob_last_25": 0.0993470166809857, "mean_pred_prob_last_50": 0.06301564993336797, "mean_token_accuracy": 0.8738940536975861, "step": 12120 }, { "epoch": 0.21563294402076333, "grad_norm": 1.2418670142008805, "learning_rate": 0.0001, "loss": 0.8834, "mean_abs_error": 788.9414314781263, "mean_abs_error_last_10": 317.9965263623403, "mean_abs_error_last_25": 343.1799671895631, "mean_abs_error_last_50": 454.5149790809277, "mean_pred_prob": 0.037629319235566075, "mean_pred_prob_last_10": 0.1817912457510829, "mean_pred_prob_last_25": 0.1035319171322044, "mean_pred_prob_last_50": 0.06345035098493099, "mean_token_accuracy": 0.8770008683204651, "step": 12130 }, { "epoch": 0.2158107123175653, "grad_norm": 1.8509174289753496, "learning_rate": 0.0001, "loss": 0.841, "mean_abs_error": 1077.9516741851744, "mean_abs_error_last_10": 405.9777768319791, "mean_abs_error_last_25": 738.9803113775313, "mean_abs_error_last_50": 829.9032678046951, "mean_pred_prob": 0.025081541039980948, "mean_pred_prob_last_10": 0.13691175364656374, "mean_pred_prob_last_25": 0.07349845064454712, "mean_pred_prob_last_50": 0.043615946249337866, "mean_token_accuracy": 0.8792125463485718, "step": 12140 }, { "epoch": 0.21598848061436723, "grad_norm": 2.606368380933639, "learning_rate": 0.0001, "loss": 0.8252, "mean_abs_error": 376.50461963806805, "mean_abs_error_last_10": 102.39178538400131, "mean_abs_error_last_25": 137.36609659040602, "mean_abs_error_last_50": 226.11360388958974, "mean_pred_prob": 0.04269860079512, "mean_pred_prob_last_10": 0.20757140759378673, "mean_pred_prob_last_25": 0.11793078398331999, "mean_pred_prob_last_50": 0.07157147806137801, "mean_token_accuracy": 0.8770746946334839, "step": 12150 }, { "epoch": 0.21616624891116917, "grad_norm": 1.5101835738459475, "learning_rate": 0.0001, "loss": 0.9485, "mean_abs_error": 436.3168596643469, "mean_abs_error_last_10": 101.59879833922115, "mean_abs_error_last_25": 134.27638324807157, "mean_abs_error_last_50": 237.44594895443214, "mean_pred_prob": 0.04431054345332086, "mean_pred_prob_last_10": 0.22933156188810244, "mean_pred_prob_last_25": 0.12662672894075513, "mean_pred_prob_last_50": 0.07591363796964287, "mean_token_accuracy": 0.8775364756584167, "step": 12160 }, { "epoch": 0.21634401720797114, "grad_norm": 1.3565900140605391, "learning_rate": 0.0001, "loss": 0.9461, "mean_abs_error": 416.2796860669249, "mean_abs_error_last_10": 202.31638125842596, "mean_abs_error_last_25": 249.58203439343657, "mean_abs_error_last_50": 320.1490357279231, "mean_pred_prob": 0.032260246109217405, "mean_pred_prob_last_10": 0.17903459854424, "mean_pred_prob_last_25": 0.09294174304232002, "mean_pred_prob_last_50": 0.05502614974975586, "mean_token_accuracy": 0.8733980357646942, "step": 12170 }, { "epoch": 0.21652178550477308, "grad_norm": 1.5109980023027545, "learning_rate": 0.0001, "loss": 0.8872, "mean_abs_error": 879.4202846336706, "mean_abs_error_last_10": 270.9132631414336, "mean_abs_error_last_25": 352.3822612014826, "mean_abs_error_last_50": 517.8569758196943, "mean_pred_prob": 0.020374336349777877, "mean_pred_prob_last_10": 0.1118485847255215, "mean_pred_prob_last_25": 0.05884986475575715, "mean_pred_prob_last_50": 0.03517865779576823, "mean_token_accuracy": 0.8712960720062256, "step": 12180 }, { "epoch": 0.21669955380157502, "grad_norm": 1.1997581604733478, "learning_rate": 0.0001, "loss": 0.8534, "mean_abs_error": 525.8286228468039, "mean_abs_error_last_10": 144.81533448806235, "mean_abs_error_last_25": 193.2498000469436, "mean_abs_error_last_50": 269.1488212242073, "mean_pred_prob": 0.02599679506383836, "mean_pred_prob_last_10": 0.1346279125660658, "mean_pred_prob_last_25": 0.07366402642801403, "mean_pred_prob_last_50": 0.044709078315645454, "mean_token_accuracy": 0.8795737028121948, "step": 12190 }, { "epoch": 0.21687732209837698, "grad_norm": 1.430749974851287, "learning_rate": 0.0001, "loss": 0.8952, "mean_abs_error": 1197.1669231159967, "mean_abs_error_last_10": 640.3326535680882, "mean_abs_error_last_25": 793.0598564966243, "mean_abs_error_last_50": 912.6927075310032, "mean_pred_prob": 0.035533717962971424, "mean_pred_prob_last_10": 0.1564041061574244, "mean_pred_prob_last_25": 0.09039396289444994, "mean_pred_prob_last_50": 0.05751966694806469, "mean_token_accuracy": 0.8712430357933044, "step": 12200 }, { "epoch": 0.21705509039517892, "grad_norm": 1.3006849503513103, "learning_rate": 0.0001, "loss": 0.8895, "mean_abs_error": 722.6768254252107, "mean_abs_error_last_10": 239.18780345010964, "mean_abs_error_last_25": 328.38347186470435, "mean_abs_error_last_50": 413.123335270899, "mean_pred_prob": 0.02942771092057228, "mean_pred_prob_last_10": 0.14327189088799058, "mean_pred_prob_last_25": 0.08102077045477926, "mean_pred_prob_last_50": 0.05026488617295399, "mean_token_accuracy": 0.8721463024616242, "step": 12210 }, { "epoch": 0.21723285869198086, "grad_norm": 1.5771100731465753, "learning_rate": 0.0001, "loss": 0.8893, "mean_abs_error": 743.3488892467284, "mean_abs_error_last_10": 273.2563723685879, "mean_abs_error_last_25": 284.04224628210653, "mean_abs_error_last_50": 371.7064772951317, "mean_pred_prob": 0.019322017696686088, "mean_pred_prob_last_10": 0.10856681549921632, "mean_pred_prob_last_25": 0.057747541833668946, "mean_pred_prob_last_50": 0.03427345282398164, "mean_token_accuracy": 0.8757495164871216, "step": 12220 }, { "epoch": 0.21741062698878283, "grad_norm": 0.8161719963862795, "learning_rate": 0.0001, "loss": 0.8557, "mean_abs_error": 758.3275203734289, "mean_abs_error_last_10": 401.0200241705172, "mean_abs_error_last_25": 404.52670124703457, "mean_abs_error_last_50": 517.6251720754494, "mean_pred_prob": 0.04410554105998017, "mean_pred_prob_last_10": 0.21320886369794606, "mean_pred_prob_last_25": 0.12365113723790273, "mean_pred_prob_last_50": 0.07456204117916058, "mean_token_accuracy": 0.8658639311790466, "step": 12230 }, { "epoch": 0.21758839528558477, "grad_norm": 1.8731841405527698, "learning_rate": 0.0001, "loss": 0.795, "mean_abs_error": 400.51594773862587, "mean_abs_error_last_10": 136.47711478014983, "mean_abs_error_last_25": 180.52972988847952, "mean_abs_error_last_50": 233.72873669318852, "mean_pred_prob": 0.04008029322139919, "mean_pred_prob_last_10": 0.18853555498644708, "mean_pred_prob_last_25": 0.10935988435521722, "mean_pred_prob_last_50": 0.06751655529951676, "mean_token_accuracy": 0.8754282832145691, "step": 12240 }, { "epoch": 0.2177661635823867, "grad_norm": 3.1299275456157583, "learning_rate": 0.0001, "loss": 0.8239, "mean_abs_error": 513.4864176171646, "mean_abs_error_last_10": 79.19589844678433, "mean_abs_error_last_25": 150.83225181453562, "mean_abs_error_last_50": 259.3304446403314, "mean_pred_prob": 0.026717853778973222, "mean_pred_prob_last_10": 0.14840511102229356, "mean_pred_prob_last_25": 0.07575543923303485, "mean_pred_prob_last_50": 0.045185253489762545, "mean_token_accuracy": 0.8681615710258483, "step": 12250 }, { "epoch": 0.21794393187918867, "grad_norm": 1.4138458857187102, "learning_rate": 0.0001, "loss": 0.8923, "mean_abs_error": 648.1516336302933, "mean_abs_error_last_10": 227.8867198508132, "mean_abs_error_last_25": 414.73944068104856, "mean_abs_error_last_50": 503.74158584220515, "mean_pred_prob": 0.03197573780780658, "mean_pred_prob_last_10": 0.15313756447285415, "mean_pred_prob_last_25": 0.08626314478460699, "mean_pred_prob_last_50": 0.054039420536719264, "mean_token_accuracy": 0.8699383854866027, "step": 12260 }, { "epoch": 0.2181217001759906, "grad_norm": 3.605314235472834, "learning_rate": 0.0001, "loss": 0.8963, "mean_abs_error": 658.8991617700768, "mean_abs_error_last_10": 146.9484135763426, "mean_abs_error_last_25": 213.15635726512284, "mean_abs_error_last_50": 359.95690970054375, "mean_pred_prob": 0.03864694021176547, "mean_pred_prob_last_10": 0.20141743225394748, "mean_pred_prob_last_25": 0.10961840074160137, "mean_pred_prob_last_50": 0.06611565779312514, "mean_token_accuracy": 0.8659316658973694, "step": 12270 }, { "epoch": 0.21829946847279255, "grad_norm": 0.9925347081978025, "learning_rate": 0.0001, "loss": 0.8859, "mean_abs_error": 548.8948087935609, "mean_abs_error_last_10": 308.6914935819592, "mean_abs_error_last_25": 352.33411461534445, "mean_abs_error_last_50": 443.4534088002053, "mean_pred_prob": 0.05183462378627155, "mean_pred_prob_last_10": 0.22517069249006455, "mean_pred_prob_last_25": 0.12995621571608354, "mean_pred_prob_last_50": 0.08483723040262703, "mean_token_accuracy": 0.8756538271903992, "step": 12280 }, { "epoch": 0.21847723676959452, "grad_norm": 2.692830627764154, "learning_rate": 0.0001, "loss": 0.8792, "mean_abs_error": 449.96147215961753, "mean_abs_error_last_10": 86.04461664621809, "mean_abs_error_last_25": 141.34777320398277, "mean_abs_error_last_50": 253.03908674580816, "mean_pred_prob": 0.03757117809727788, "mean_pred_prob_last_10": 0.14980210606008768, "mean_pred_prob_last_25": 0.09075891384854913, "mean_pred_prob_last_50": 0.05931160906329751, "mean_token_accuracy": 0.8764506757259369, "step": 12290 }, { "epoch": 0.21865500506639646, "grad_norm": 1.0886119412117543, "learning_rate": 0.0001, "loss": 0.9428, "mean_abs_error": 1091.3045037440368, "mean_abs_error_last_10": 522.4644403349472, "mean_abs_error_last_25": 568.0882317073848, "mean_abs_error_last_50": 707.5052612202014, "mean_pred_prob": 0.034450269033550286, "mean_pred_prob_last_10": 0.17247338324086742, "mean_pred_prob_last_25": 0.09569538636133075, "mean_pred_prob_last_50": 0.057683359662769364, "mean_token_accuracy": 0.8724733531475067, "step": 12300 }, { "epoch": 0.2188327733631984, "grad_norm": 1.4054372793345533, "learning_rate": 0.0001, "loss": 0.7458, "mean_abs_error": 825.2833359817005, "mean_abs_error_last_10": 318.1559409018441, "mean_abs_error_last_25": 403.85932320127915, "mean_abs_error_last_50": 547.4807880363408, "mean_pred_prob": 0.036297055607428776, "mean_pred_prob_last_10": 0.17836550368228926, "mean_pred_prob_last_25": 0.09832267659367062, "mean_pred_prob_last_50": 0.06044499668641947, "mean_token_accuracy": 0.8880791187286377, "step": 12310 }, { "epoch": 0.21901054166000036, "grad_norm": 1.1822087801344385, "learning_rate": 0.0001, "loss": 0.8176, "mean_abs_error": 1145.400876298922, "mean_abs_error_last_10": 576.9865043714539, "mean_abs_error_last_25": 627.3236842328691, "mean_abs_error_last_50": 765.9806473814875, "mean_pred_prob": 0.023214277750230394, "mean_pred_prob_last_10": 0.11709524180041626, "mean_pred_prob_last_25": 0.0651600843644701, "mean_pred_prob_last_50": 0.03968793603999075, "mean_token_accuracy": 0.8741910099983216, "step": 12320 }, { "epoch": 0.2191883099568023, "grad_norm": 1.2790690905027153, "learning_rate": 0.0001, "loss": 0.8814, "mean_abs_error": 645.5457827519225, "mean_abs_error_last_10": 294.1301102644579, "mean_abs_error_last_25": 340.06636124825275, "mean_abs_error_last_50": 414.3850283496419, "mean_pred_prob": 0.039774926169775425, "mean_pred_prob_last_10": 0.1943037582328543, "mean_pred_prob_last_25": 0.11019431701861322, "mean_pred_prob_last_50": 0.06747106526745483, "mean_token_accuracy": 0.8675763785839081, "step": 12330 }, { "epoch": 0.21936607825360424, "grad_norm": 1.0842825439437205, "learning_rate": 0.0001, "loss": 0.9002, "mean_abs_error": 873.8647605306663, "mean_abs_error_last_10": 502.46753680656394, "mean_abs_error_last_25": 513.9821775016005, "mean_abs_error_last_50": 595.2008176563753, "mean_pred_prob": 0.027985783544136212, "mean_pred_prob_last_10": 0.13731639471952803, "mean_pred_prob_last_25": 0.07840868753264658, "mean_pred_prob_last_50": 0.047826905152760446, "mean_token_accuracy": 0.8707474708557129, "step": 12340 }, { "epoch": 0.2195438465504062, "grad_norm": 1.1013144309348717, "learning_rate": 0.0001, "loss": 0.8933, "mean_abs_error": 451.66636349610724, "mean_abs_error_last_10": 157.11847007163627, "mean_abs_error_last_25": 197.52645974189824, "mean_abs_error_last_50": 270.2334842897035, "mean_pred_prob": 0.0440121776657179, "mean_pred_prob_last_10": 0.21080933641642333, "mean_pred_prob_last_25": 0.12003607125952839, "mean_pred_prob_last_50": 0.07440440217033029, "mean_token_accuracy": 0.8788997650146484, "step": 12350 }, { "epoch": 0.21972161484720815, "grad_norm": 1.175575438090727, "learning_rate": 0.0001, "loss": 0.7433, "mean_abs_error": 159.99456092165687, "mean_abs_error_last_10": 72.38558109927453, "mean_abs_error_last_25": 92.53986800744218, "mean_abs_error_last_50": 112.58933743645339, "mean_pred_prob": 0.037184218596667054, "mean_pred_prob_last_10": 0.19359052404761315, "mean_pred_prob_last_25": 0.10613568406552076, "mean_pred_prob_last_50": 0.06382783111184835, "mean_token_accuracy": 0.8790655255317688, "step": 12360 }, { "epoch": 0.21989938314401009, "grad_norm": 1.4595101325688948, "learning_rate": 0.0001, "loss": 0.938, "mean_abs_error": 555.882676734632, "mean_abs_error_last_10": 112.04607916521404, "mean_abs_error_last_25": 146.56644972635098, "mean_abs_error_last_50": 283.30995065870053, "mean_pred_prob": 0.032763570873066784, "mean_pred_prob_last_10": 0.1667237157933414, "mean_pred_prob_last_25": 0.0942890583537519, "mean_pred_prob_last_50": 0.056539533124305305, "mean_token_accuracy": 0.8731998562812805, "step": 12370 }, { "epoch": 0.22007715144081205, "grad_norm": 1.4554947762834767, "learning_rate": 0.0001, "loss": 0.8824, "mean_abs_error": 1086.0128312163813, "mean_abs_error_last_10": 483.1913880143458, "mean_abs_error_last_25": 540.8247234880797, "mean_abs_error_last_50": 704.5551861831384, "mean_pred_prob": 0.01899104166950565, "mean_pred_prob_last_10": 0.09691325352760032, "mean_pred_prob_last_25": 0.054538208368467164, "mean_pred_prob_last_50": 0.03312190828146413, "mean_token_accuracy": 0.8752659320831299, "step": 12380 }, { "epoch": 0.220254919737614, "grad_norm": 3.485080683485697, "learning_rate": 0.0001, "loss": 0.903, "mean_abs_error": 814.332293546854, "mean_abs_error_last_10": 155.18694286789278, "mean_abs_error_last_25": 253.36002381746553, "mean_abs_error_last_50": 444.91938539787577, "mean_pred_prob": 0.03297943680663593, "mean_pred_prob_last_10": 0.1688099523773417, "mean_pred_prob_last_25": 0.09370431349379942, "mean_pred_prob_last_50": 0.05601065853261389, "mean_token_accuracy": 0.866928106546402, "step": 12390 }, { "epoch": 0.22043268803441593, "grad_norm": 1.4181366668651734, "learning_rate": 0.0001, "loss": 0.7868, "mean_abs_error": 732.2170203748889, "mean_abs_error_last_10": 347.083433169758, "mean_abs_error_last_25": 370.05786117188603, "mean_abs_error_last_50": 432.2344828706123, "mean_pred_prob": 0.02877198893693276, "mean_pred_prob_last_10": 0.15177161254687235, "mean_pred_prob_last_25": 0.08439989912440068, "mean_pred_prob_last_50": 0.04955792461405508, "mean_token_accuracy": 0.8724436104297638, "step": 12400 }, { "epoch": 0.2206104563312179, "grad_norm": 1.3374644157319562, "learning_rate": 0.0001, "loss": 0.9354, "mean_abs_error": 265.3179423497956, "mean_abs_error_last_10": 148.5759010211993, "mean_abs_error_last_25": 174.76890572303301, "mean_abs_error_last_50": 172.93834692707236, "mean_pred_prob": 0.04211871169973165, "mean_pred_prob_last_10": 0.21220083236694337, "mean_pred_prob_last_25": 0.11968064000830055, "mean_pred_prob_last_50": 0.07252924232743681, "mean_token_accuracy": 0.8586375892162323, "step": 12410 }, { "epoch": 0.22078822462801984, "grad_norm": 1.4611455318863378, "learning_rate": 0.0001, "loss": 0.8196, "mean_abs_error": 225.01107155069303, "mean_abs_error_last_10": 37.31420140847327, "mean_abs_error_last_25": 67.86911052044182, "mean_abs_error_last_50": 110.53812933188951, "mean_pred_prob": 0.03828376829624176, "mean_pred_prob_last_10": 0.20073453076183795, "mean_pred_prob_last_25": 0.10757216494530439, "mean_pred_prob_last_50": 0.06537990793585777, "mean_token_accuracy": 0.8826544046401977, "step": 12420 }, { "epoch": 0.22096599292482177, "grad_norm": 1.0221348118353406, "learning_rate": 0.0001, "loss": 0.9744, "mean_abs_error": 233.26539930616497, "mean_abs_error_last_10": 35.80663321298796, "mean_abs_error_last_25": 51.70886697417171, "mean_abs_error_last_50": 118.97580032969161, "mean_pred_prob": 0.04148263824172318, "mean_pred_prob_last_10": 0.21415861882269382, "mean_pred_prob_last_25": 0.11947254687547684, "mean_pred_prob_last_50": 0.07165379449725151, "mean_token_accuracy": 0.8655694603919983, "step": 12430 }, { "epoch": 0.22114376122162374, "grad_norm": 3.57504978366468, "learning_rate": 0.0001, "loss": 0.916, "mean_abs_error": 203.46069951616204, "mean_abs_error_last_10": 48.66614863596646, "mean_abs_error_last_25": 77.56014021544713, "mean_abs_error_last_50": 133.10941854687738, "mean_pred_prob": 0.03478737436234951, "mean_pred_prob_last_10": 0.1755179151892662, "mean_pred_prob_last_25": 0.09959070533514022, "mean_pred_prob_last_50": 0.060189868602901694, "mean_token_accuracy": 0.8626110970973968, "step": 12440 }, { "epoch": 0.22132152951842568, "grad_norm": 0.7718397670359385, "learning_rate": 0.0001, "loss": 0.8122, "mean_abs_error": 358.55239753999393, "mean_abs_error_last_10": 93.51410659447306, "mean_abs_error_last_25": 107.65068902785697, "mean_abs_error_last_50": 174.3165400388852, "mean_pred_prob": 0.03407990352716297, "mean_pred_prob_last_10": 0.17579738274216652, "mean_pred_prob_last_25": 0.09768469696864486, "mean_pred_prob_last_50": 0.05849778414703906, "mean_token_accuracy": 0.8756913542747498, "step": 12450 }, { "epoch": 0.22149929781522762, "grad_norm": 2.2462571732833765, "learning_rate": 0.0001, "loss": 0.9251, "mean_abs_error": 912.7742789983917, "mean_abs_error_last_10": 393.9780020332554, "mean_abs_error_last_25": 510.5571932509771, "mean_abs_error_last_50": 670.3606478569739, "mean_pred_prob": 0.028972653731761965, "mean_pred_prob_last_10": 0.1575010034663137, "mean_pred_prob_last_25": 0.08300395528785884, "mean_pred_prob_last_50": 0.04926208814140409, "mean_token_accuracy": 0.8743777990341186, "step": 12460 }, { "epoch": 0.22167706611202959, "grad_norm": 1.2750276144214279, "learning_rate": 0.0001, "loss": 0.8322, "mean_abs_error": 359.93176027246574, "mean_abs_error_last_10": 105.93683665620362, "mean_abs_error_last_25": 169.42855183186225, "mean_abs_error_last_50": 236.12433276444693, "mean_pred_prob": 0.03857379332184792, "mean_pred_prob_last_10": 0.1836058171465993, "mean_pred_prob_last_25": 0.1068353259935975, "mean_pred_prob_last_50": 0.06471887403167784, "mean_token_accuracy": 0.8758804619312286, "step": 12470 }, { "epoch": 0.22185483440883152, "grad_norm": 2.1860978342998076, "learning_rate": 0.0001, "loss": 0.8613, "mean_abs_error": 460.5870609520715, "mean_abs_error_last_10": 186.06934560455815, "mean_abs_error_last_25": 188.65173964213142, "mean_abs_error_last_50": 263.94149136673104, "mean_pred_prob": 0.036052922648377714, "mean_pred_prob_last_10": 0.19452634127810597, "mean_pred_prob_last_25": 0.1040943696978502, "mean_pred_prob_last_50": 0.06212137686670758, "mean_token_accuracy": 0.8784567773342132, "step": 12480 }, { "epoch": 0.22203260270563346, "grad_norm": 1.6537974383521101, "learning_rate": 0.0001, "loss": 0.8388, "mean_abs_error": 207.24900835669942, "mean_abs_error_last_10": 56.4014070138483, "mean_abs_error_last_25": 97.67613708627592, "mean_abs_error_last_50": 151.78576413305112, "mean_pred_prob": 0.039564395789057016, "mean_pred_prob_last_10": 0.2022631995379925, "mean_pred_prob_last_25": 0.11026619244366884, "mean_pred_prob_last_50": 0.0667213637381792, "mean_token_accuracy": 0.8736485958099365, "step": 12490 }, { "epoch": 0.22221037100243543, "grad_norm": 1.9513331460147734, "learning_rate": 0.0001, "loss": 0.8785, "mean_abs_error": 273.84457284491884, "mean_abs_error_last_10": 85.59231280611486, "mean_abs_error_last_25": 130.7039755761698, "mean_abs_error_last_50": 178.12502052189308, "mean_pred_prob": 0.03533787957858294, "mean_pred_prob_last_10": 0.18189196065068244, "mean_pred_prob_last_25": 0.09929105332121253, "mean_pred_prob_last_50": 0.060408892761915925, "mean_token_accuracy": 0.8697553217411041, "step": 12500 }, { "epoch": 0.22238813929923737, "grad_norm": 1.163969553451681, "learning_rate": 0.0001, "loss": 0.8902, "mean_abs_error": 594.7349722765077, "mean_abs_error_last_10": 224.55308424104092, "mean_abs_error_last_25": 243.74439511871628, "mean_abs_error_last_50": 343.1423049439751, "mean_pred_prob": 0.029805874539306387, "mean_pred_prob_last_10": 0.16072792964987456, "mean_pred_prob_last_25": 0.08705914921592921, "mean_pred_prob_last_50": 0.050838996830862015, "mean_token_accuracy": 0.8718930065631867, "step": 12510 }, { "epoch": 0.2225659075960393, "grad_norm": 0.9203270371974057, "learning_rate": 0.0001, "loss": 0.9722, "mean_abs_error": 368.6192851591739, "mean_abs_error_last_10": 164.05581852478863, "mean_abs_error_last_25": 238.1123274170845, "mean_abs_error_last_50": 257.61673108077275, "mean_pred_prob": 0.030663894675672054, "mean_pred_prob_last_10": 0.15920035745948552, "mean_pred_prob_last_25": 0.08602606090717017, "mean_pred_prob_last_50": 0.05232313594315201, "mean_token_accuracy": 0.8670771002769471, "step": 12520 }, { "epoch": 0.22274367589284128, "grad_norm": 1.7342783752441935, "learning_rate": 0.0001, "loss": 0.8465, "mean_abs_error": 721.5190132458379, "mean_abs_error_last_10": 233.94431737764822, "mean_abs_error_last_25": 336.03709258712433, "mean_abs_error_last_50": 436.4830614009751, "mean_pred_prob": 0.03158955888357014, "mean_pred_prob_last_10": 0.16838312345789747, "mean_pred_prob_last_25": 0.09219858470605687, "mean_pred_prob_last_50": 0.05497871383558959, "mean_token_accuracy": 0.8725012719631196, "step": 12530 }, { "epoch": 0.22292144418964321, "grad_norm": 3.601552685515776, "learning_rate": 0.0001, "loss": 0.8613, "mean_abs_error": 805.9506909929166, "mean_abs_error_last_10": 258.0389372650926, "mean_abs_error_last_25": 412.88578796492027, "mean_abs_error_last_50": 562.6248680835927, "mean_pred_prob": 0.02948399709712248, "mean_pred_prob_last_10": 0.1671851779159624, "mean_pred_prob_last_25": 0.08606550726108253, "mean_pred_prob_last_50": 0.05035914470208809, "mean_token_accuracy": 0.8643257081508636, "step": 12540 }, { "epoch": 0.22309921248644518, "grad_norm": 0.9169543562813742, "learning_rate": 0.0001, "loss": 0.8269, "mean_abs_error": 1227.8005346552177, "mean_abs_error_last_10": 667.4956040356993, "mean_abs_error_last_25": 755.4141289610272, "mean_abs_error_last_50": 991.6001523673676, "mean_pred_prob": 0.028777374047786, "mean_pred_prob_last_10": 0.14387617735483219, "mean_pred_prob_last_25": 0.07930389934044797, "mean_pred_prob_last_50": 0.048810900129319636, "mean_token_accuracy": 0.8853861629962921, "step": 12550 }, { "epoch": 0.22327698078324712, "grad_norm": 1.5445939661594599, "learning_rate": 0.0001, "loss": 0.8193, "mean_abs_error": 189.44831317379644, "mean_abs_error_last_10": 45.681055400445004, "mean_abs_error_last_25": 78.32729321693651, "mean_abs_error_last_50": 123.1051399754399, "mean_pred_prob": 0.0347580892033875, "mean_pred_prob_last_10": 0.17792913019657136, "mean_pred_prob_last_25": 0.09669706132262945, "mean_pred_prob_last_50": 0.05877727381885052, "mean_token_accuracy": 0.876052051782608, "step": 12560 }, { "epoch": 0.22345474908004906, "grad_norm": 1.3437205292501493, "learning_rate": 0.0001, "loss": 0.8352, "mean_abs_error": 296.7283069600896, "mean_abs_error_last_10": 117.04001815779282, "mean_abs_error_last_25": 136.44290575204604, "mean_abs_error_last_50": 227.90473432244318, "mean_pred_prob": 0.04193559093400836, "mean_pred_prob_last_10": 0.19809479787945747, "mean_pred_prob_last_25": 0.11136716566979885, "mean_pred_prob_last_50": 0.06992630772292614, "mean_token_accuracy": 0.873814868927002, "step": 12570 }, { "epoch": 0.22363251737685103, "grad_norm": 1.9590265205056134, "learning_rate": 0.0001, "loss": 0.8567, "mean_abs_error": 298.0939663267922, "mean_abs_error_last_10": 130.24246341084506, "mean_abs_error_last_25": 170.2785097125054, "mean_abs_error_last_50": 212.88388686915982, "mean_pred_prob": 0.051063843490555885, "mean_pred_prob_last_10": 0.23004166316241026, "mean_pred_prob_last_25": 0.13722062185406686, "mean_pred_prob_last_50": 0.08554991567507386, "mean_token_accuracy": 0.8758633852005004, "step": 12580 }, { "epoch": 0.22381028567365296, "grad_norm": 1.3126863385778516, "learning_rate": 0.0001, "loss": 0.8594, "mean_abs_error": 795.3071824003304, "mean_abs_error_last_10": 281.90526610115364, "mean_abs_error_last_25": 346.5296782746985, "mean_abs_error_last_50": 478.49821960472065, "mean_pred_prob": 0.029307216184679418, "mean_pred_prob_last_10": 0.16634508831775746, "mean_pred_prob_last_25": 0.08664382622810081, "mean_pred_prob_last_50": 0.050275354098994286, "mean_token_accuracy": 0.8642687380313874, "step": 12590 }, { "epoch": 0.2239880539704549, "grad_norm": 2.6962353218752932, "learning_rate": 0.0001, "loss": 0.875, "mean_abs_error": 390.5264177253161, "mean_abs_error_last_10": 133.50951869800957, "mean_abs_error_last_25": 208.26081122545656, "mean_abs_error_last_50": 284.42582370285993, "mean_pred_prob": 0.0264605856500566, "mean_pred_prob_last_10": 0.14123608227819204, "mean_pred_prob_last_25": 0.07701879246160388, "mean_pred_prob_last_50": 0.04584545767866075, "mean_token_accuracy": 0.8769829988479614, "step": 12600 }, { "epoch": 0.22416582226725687, "grad_norm": 1.2970926411954695, "learning_rate": 0.0001, "loss": 0.823, "mean_abs_error": 818.2372079856978, "mean_abs_error_last_10": 294.715886351275, "mean_abs_error_last_25": 365.0755647136175, "mean_abs_error_last_50": 510.00853524812084, "mean_pred_prob": 0.023728633971768432, "mean_pred_prob_last_10": 0.12029497192124836, "mean_pred_prob_last_25": 0.06814703125855885, "mean_pred_prob_last_50": 0.040944242750992996, "mean_token_accuracy": 0.8753432929515839, "step": 12610 }, { "epoch": 0.2243435905640588, "grad_norm": 1.3427270530595206, "learning_rate": 0.0001, "loss": 0.9107, "mean_abs_error": 187.15659238171006, "mean_abs_error_last_10": 47.39604086676841, "mean_abs_error_last_25": 70.34766151610694, "mean_abs_error_last_50": 115.45696180427849, "mean_pred_prob": 0.03119883220642805, "mean_pred_prob_last_10": 0.1709839791059494, "mean_pred_prob_last_25": 0.09300694633275271, "mean_pred_prob_last_50": 0.05447311736643314, "mean_token_accuracy": 0.8730680584907532, "step": 12620 }, { "epoch": 0.22452135886086075, "grad_norm": 1.769164626643517, "learning_rate": 0.0001, "loss": 0.8583, "mean_abs_error": 66.15020756357362, "mean_abs_error_last_10": 13.347998376197566, "mean_abs_error_last_25": 27.231543895452536, "mean_abs_error_last_50": 50.27362668991747, "mean_pred_prob": 0.05931478757411242, "mean_pred_prob_last_10": 0.2960579484701157, "mean_pred_prob_last_25": 0.16553324684500695, "mean_pred_prob_last_50": 0.10017049387097358, "mean_token_accuracy": 0.8803815603256225, "step": 12630 }, { "epoch": 0.22469912715766271, "grad_norm": 3.2450953039400017, "learning_rate": 0.0001, "loss": 0.8, "mean_abs_error": 509.66385553125275, "mean_abs_error_last_10": 141.80656864294107, "mean_abs_error_last_25": 161.5214097606067, "mean_abs_error_last_50": 260.1874869985686, "mean_pred_prob": 0.044012455217307435, "mean_pred_prob_last_10": 0.22304567041574047, "mean_pred_prob_last_25": 0.12156164539046585, "mean_pred_prob_last_50": 0.074328510009218, "mean_token_accuracy": 0.8770194530487061, "step": 12640 }, { "epoch": 0.22487689545446465, "grad_norm": 0.6957137700908014, "learning_rate": 0.0001, "loss": 0.8877, "mean_abs_error": 1794.917215345705, "mean_abs_error_last_10": 757.541091287121, "mean_abs_error_last_25": 952.6494520041113, "mean_abs_error_last_50": 1211.8949021682913, "mean_pred_prob": 0.031103870159131474, "mean_pred_prob_last_10": 0.14734193052572664, "mean_pred_prob_last_25": 0.08209415356395766, "mean_pred_prob_last_50": 0.05157787670905236, "mean_token_accuracy": 0.8664799928665161, "step": 12650 }, { "epoch": 0.2250546637512666, "grad_norm": 1.522839052023044, "learning_rate": 0.0001, "loss": 0.943, "mean_abs_error": 165.17334989577367, "mean_abs_error_last_10": 24.008501462590903, "mean_abs_error_last_25": 45.184227285389476, "mean_abs_error_last_50": 92.74563954893513, "mean_pred_prob": 0.042024791799485685, "mean_pred_prob_last_10": 0.19770190864801407, "mean_pred_prob_last_25": 0.11224032416939736, "mean_pred_prob_last_50": 0.07070771809667349, "mean_token_accuracy": 0.8604459702968598, "step": 12660 }, { "epoch": 0.22523243204806856, "grad_norm": 1.069471072001482, "learning_rate": 0.0001, "loss": 0.9818, "mean_abs_error": 656.3253800973682, "mean_abs_error_last_10": 360.2119700932791, "mean_abs_error_last_25": 411.27994399204874, "mean_abs_error_last_50": 444.0511898845026, "mean_pred_prob": 0.030244267848320305, "mean_pred_prob_last_10": 0.15470323488116264, "mean_pred_prob_last_25": 0.08394672879949212, "mean_pred_prob_last_50": 0.051309557538479565, "mean_token_accuracy": 0.861564826965332, "step": 12670 }, { "epoch": 0.2254102003448705, "grad_norm": 1.012317196417666, "learning_rate": 0.0001, "loss": 0.8631, "mean_abs_error": 870.8382064595968, "mean_abs_error_last_10": 608.928835102425, "mean_abs_error_last_25": 608.8802759493485, "mean_abs_error_last_50": 695.4140380984367, "mean_pred_prob": 0.026969379626098088, "mean_pred_prob_last_10": 0.1546213855530368, "mean_pred_prob_last_25": 0.08081151729275007, "mean_pred_prob_last_50": 0.046516151570540384, "mean_token_accuracy": 0.8736763298511505, "step": 12680 }, { "epoch": 0.22558796864167244, "grad_norm": 1.3565872882705596, "learning_rate": 0.0001, "loss": 0.8113, "mean_abs_error": 275.280945755066, "mean_abs_error_last_10": 102.34948743010588, "mean_abs_error_last_25": 131.26554236081526, "mean_abs_error_last_50": 169.4121647792153, "mean_pred_prob": 0.033945142617449166, "mean_pred_prob_last_10": 0.18161139893345535, "mean_pred_prob_last_25": 0.09569248324260116, "mean_pred_prob_last_50": 0.05790933475364, "mean_token_accuracy": 0.8718905210494995, "step": 12690 }, { "epoch": 0.2257657369384744, "grad_norm": 0.8252111408830708, "learning_rate": 0.0001, "loss": 0.9145, "mean_abs_error": 242.2715745047421, "mean_abs_error_last_10": 74.09187529022985, "mean_abs_error_last_25": 120.32814887463162, "mean_abs_error_last_50": 156.33296820453182, "mean_pred_prob": 0.0300366242416203, "mean_pred_prob_last_10": 0.16741905286908149, "mean_pred_prob_last_25": 0.08877866622060537, "mean_pred_prob_last_50": 0.05270733190700412, "mean_token_accuracy": 0.8789088726043701, "step": 12700 }, { "epoch": 0.22594350523527634, "grad_norm": 1.0319128944680864, "learning_rate": 0.0001, "loss": 0.9117, "mean_abs_error": 395.058419820131, "mean_abs_error_last_10": 92.67378985104094, "mean_abs_error_last_25": 145.20649220635318, "mean_abs_error_last_50": 261.4751452660592, "mean_pred_prob": 0.029587647458538414, "mean_pred_prob_last_10": 0.15684194676578045, "mean_pred_prob_last_25": 0.08362506534904242, "mean_pred_prob_last_50": 0.05056725684553385, "mean_token_accuracy": 0.8668603420257568, "step": 12710 }, { "epoch": 0.22612127353207828, "grad_norm": 1.8963468300161532, "learning_rate": 0.0001, "loss": 0.9574, "mean_abs_error": 192.36700028699573, "mean_abs_error_last_10": 142.20634786055052, "mean_abs_error_last_25": 137.65444170641197, "mean_abs_error_last_50": 148.5935035682593, "mean_pred_prob": 0.03739644782617688, "mean_pred_prob_last_10": 0.20239728093147277, "mean_pred_prob_last_25": 0.10860011540353298, "mean_pred_prob_last_50": 0.06449717236682773, "mean_token_accuracy": 0.8628876268863678, "step": 12720 }, { "epoch": 0.22629904182888025, "grad_norm": 1.0984563813934431, "learning_rate": 0.0001, "loss": 0.8323, "mean_abs_error": 647.4027969148051, "mean_abs_error_last_10": 266.25518647188005, "mean_abs_error_last_25": 327.3130772537585, "mean_abs_error_last_50": 429.1173543915368, "mean_pred_prob": 0.03245757026888896, "mean_pred_prob_last_10": 0.17440691358642652, "mean_pred_prob_last_25": 0.09437460480839946, "mean_pred_prob_last_50": 0.05634900426666718, "mean_token_accuracy": 0.8785782814025879, "step": 12730 }, { "epoch": 0.2264768101256822, "grad_norm": 1.4053017116342505, "learning_rate": 0.0001, "loss": 0.8943, "mean_abs_error": 243.0945307326047, "mean_abs_error_last_10": 69.42802345005643, "mean_abs_error_last_25": 73.51200047670328, "mean_abs_error_last_50": 128.4151202169572, "mean_pred_prob": 0.041609922982752325, "mean_pred_prob_last_10": 0.21457737889140843, "mean_pred_prob_last_25": 0.11689077112823724, "mean_pred_prob_last_50": 0.07050613397732378, "mean_token_accuracy": 0.8656040966510773, "step": 12740 }, { "epoch": 0.22665457842248413, "grad_norm": 1.812875937404254, "learning_rate": 0.0001, "loss": 0.8699, "mean_abs_error": 893.484997685855, "mean_abs_error_last_10": 274.52403513531175, "mean_abs_error_last_25": 344.5162264348189, "mean_abs_error_last_50": 511.2453536135348, "mean_pred_prob": 0.04105402713466901, "mean_pred_prob_last_10": 0.2093687829445116, "mean_pred_prob_last_25": 0.11358527423581108, "mean_pred_prob_last_50": 0.0698033686261624, "mean_token_accuracy": 0.8767212927341461, "step": 12750 }, { "epoch": 0.2268323467192861, "grad_norm": 3.0712377316796777, "learning_rate": 0.0001, "loss": 0.9158, "mean_abs_error": 318.3790285059836, "mean_abs_error_last_10": 94.59997929662345, "mean_abs_error_last_25": 108.43943881397858, "mean_abs_error_last_50": 177.15117092229943, "mean_pred_prob": 0.03103778827935457, "mean_pred_prob_last_10": 0.16269465629011393, "mean_pred_prob_last_25": 0.08921342799440027, "mean_pred_prob_last_50": 0.05315853422507644, "mean_token_accuracy": 0.8694903969764709, "step": 12760 }, { "epoch": 0.22701011501608803, "grad_norm": 2.3832668692390118, "learning_rate": 0.0001, "loss": 0.912, "mean_abs_error": 288.16293573043816, "mean_abs_error_last_10": 59.24938526322465, "mean_abs_error_last_25": 101.9166724432209, "mean_abs_error_last_50": 151.68981631324988, "mean_pred_prob": 0.045725674158893524, "mean_pred_prob_last_10": 0.2254110960289836, "mean_pred_prob_last_25": 0.12555606672540306, "mean_pred_prob_last_50": 0.07751376791857183, "mean_token_accuracy": 0.8771076798439026, "step": 12770 }, { "epoch": 0.22718788331288997, "grad_norm": 0.9888512280142326, "learning_rate": 0.0001, "loss": 0.8111, "mean_abs_error": 492.532018557051, "mean_abs_error_last_10": 147.38348024396402, "mean_abs_error_last_25": 206.3943558594827, "mean_abs_error_last_50": 275.8195496526731, "mean_pred_prob": 0.02765959925018251, "mean_pred_prob_last_10": 0.14628804801031947, "mean_pred_prob_last_25": 0.0775118435965851, "mean_pred_prob_last_50": 0.04729524399153888, "mean_token_accuracy": 0.8780212819576263, "step": 12780 }, { "epoch": 0.22736565160969194, "grad_norm": 1.0388400270425258, "learning_rate": 0.0001, "loss": 0.93, "mean_abs_error": 614.584627393964, "mean_abs_error_last_10": 229.58853470289327, "mean_abs_error_last_25": 299.1875578955633, "mean_abs_error_last_50": 339.5555619499316, "mean_pred_prob": 0.022176662104902788, "mean_pred_prob_last_10": 0.1154401014209725, "mean_pred_prob_last_25": 0.062497797911055385, "mean_pred_prob_last_50": 0.03768012194195762, "mean_token_accuracy": 0.8637936592102051, "step": 12790 }, { "epoch": 0.22754341990649388, "grad_norm": 2.727387114442904, "learning_rate": 0.0001, "loss": 0.852, "mean_abs_error": 401.3156403574425, "mean_abs_error_last_10": 80.42418334375922, "mean_abs_error_last_25": 129.6387772229372, "mean_abs_error_last_50": 209.77818135893753, "mean_pred_prob": 0.028530127136036754, "mean_pred_prob_last_10": 0.1597752707079053, "mean_pred_prob_last_25": 0.08381096851080656, "mean_pred_prob_last_50": 0.049306431831791994, "mean_token_accuracy": 0.8728002309799194, "step": 12800 }, { "epoch": 0.22772118820329582, "grad_norm": 1.0569692482950719, "learning_rate": 0.0001, "loss": 0.9076, "mean_abs_error": 901.9564147951693, "mean_abs_error_last_10": 383.024944093053, "mean_abs_error_last_25": 484.1246179239389, "mean_abs_error_last_50": 617.4781747260438, "mean_pred_prob": 0.02368094026169274, "mean_pred_prob_last_10": 0.1268205433327239, "mean_pred_prob_last_25": 0.06784889512928202, "mean_pred_prob_last_50": 0.040938432546681724, "mean_token_accuracy": 0.8600219845771789, "step": 12810 }, { "epoch": 0.22789895650009778, "grad_norm": 1.9212346775772544, "learning_rate": 0.0001, "loss": 0.8363, "mean_abs_error": 524.1921470854678, "mean_abs_error_last_10": 184.76041798461432, "mean_abs_error_last_25": 248.76019179811607, "mean_abs_error_last_50": 356.43634983195807, "mean_pred_prob": 0.025891821761615575, "mean_pred_prob_last_10": 0.15015688110142947, "mean_pred_prob_last_25": 0.07573962453752756, "mean_pred_prob_last_50": 0.044624456111341713, "mean_token_accuracy": 0.8756346702575684, "step": 12820 }, { "epoch": 0.22807672479689972, "grad_norm": 2.4787623311080944, "learning_rate": 0.0001, "loss": 0.8893, "mean_abs_error": 465.96151186829593, "mean_abs_error_last_10": 51.044699502980926, "mean_abs_error_last_25": 120.3141738380386, "mean_abs_error_last_50": 265.9277387028883, "mean_pred_prob": 0.03238620364572853, "mean_pred_prob_last_10": 0.17540073301643133, "mean_pred_prob_last_25": 0.0922610692679882, "mean_pred_prob_last_50": 0.05539116645231843, "mean_token_accuracy": 0.8724618077278137, "step": 12830 }, { "epoch": 0.22825449309370166, "grad_norm": 0.699656837563501, "learning_rate": 0.0001, "loss": 0.7932, "mean_abs_error": 508.9355944478725, "mean_abs_error_last_10": 182.9705772644591, "mean_abs_error_last_25": 228.51972937552873, "mean_abs_error_last_50": 320.54829137955915, "mean_pred_prob": 0.04029373195953667, "mean_pred_prob_last_10": 0.18835526910261252, "mean_pred_prob_last_25": 0.10969023692887277, "mean_pred_prob_last_50": 0.0683726362825837, "mean_token_accuracy": 0.8807801723480224, "step": 12840 }, { "epoch": 0.22843226139050363, "grad_norm": 0.9910353036433336, "learning_rate": 0.0001, "loss": 0.9463, "mean_abs_error": 717.4552260618086, "mean_abs_error_last_10": 228.76268788983225, "mean_abs_error_last_25": 322.2529756979344, "mean_abs_error_last_50": 411.51363880630316, "mean_pred_prob": 0.025941802409943194, "mean_pred_prob_last_10": 0.1288102194201201, "mean_pred_prob_last_25": 0.07323260246776045, "mean_pred_prob_last_50": 0.04395050027524121, "mean_token_accuracy": 0.8618950784206391, "step": 12850 }, { "epoch": 0.22861002968730557, "grad_norm": 5.329451077609192, "learning_rate": 0.0001, "loss": 0.9163, "mean_abs_error": 96.76030507602346, "mean_abs_error_last_10": 18.214987066003214, "mean_abs_error_last_25": 39.813834051196736, "mean_abs_error_last_50": 61.63684498108156, "mean_pred_prob": 0.05202076770365238, "mean_pred_prob_last_10": 0.24548578187823294, "mean_pred_prob_last_25": 0.1410454448312521, "mean_pred_prob_last_50": 0.08793643433600665, "mean_token_accuracy": 0.8723444640636444, "step": 12860 }, { "epoch": 0.2287877979841075, "grad_norm": 1.5416249601811511, "learning_rate": 0.0001, "loss": 0.845, "mean_abs_error": 128.6943085063554, "mean_abs_error_last_10": 27.622052481819914, "mean_abs_error_last_25": 81.12146283634459, "mean_abs_error_last_50": 89.11046650474864, "mean_pred_prob": 0.05516036041080952, "mean_pred_prob_last_10": 0.27677141800522803, "mean_pred_prob_last_25": 0.15289798714220523, "mean_pred_prob_last_50": 0.0933132415637374, "mean_token_accuracy": 0.8713492572307586, "step": 12870 }, { "epoch": 0.22896556628090947, "grad_norm": 2.3362826547942346, "learning_rate": 0.0001, "loss": 0.91, "mean_abs_error": 973.7962000340209, "mean_abs_error_last_10": 198.0885173163287, "mean_abs_error_last_25": 461.8609249052791, "mean_abs_error_last_50": 755.6160852078638, "mean_pred_prob": 0.03509971732273698, "mean_pred_prob_last_10": 0.1664693433791399, "mean_pred_prob_last_25": 0.09489578567445278, "mean_pred_prob_last_50": 0.059309645229950546, "mean_token_accuracy": 0.8755462288856506, "step": 12880 }, { "epoch": 0.2291433345777114, "grad_norm": 1.9979867141239558, "learning_rate": 0.0001, "loss": 0.8923, "mean_abs_error": 1054.5956087405498, "mean_abs_error_last_10": 598.4409662673578, "mean_abs_error_last_25": 714.2091609185157, "mean_abs_error_last_50": 829.3827017861307, "mean_pred_prob": 0.02842426394781796, "mean_pred_prob_last_10": 0.15048633317783242, "mean_pred_prob_last_25": 0.08210617319709854, "mean_pred_prob_last_50": 0.04917644829693017, "mean_token_accuracy": 0.8709315776824951, "step": 12890 }, { "epoch": 0.22932110287451335, "grad_norm": 2.3631753802134408, "learning_rate": 0.0001, "loss": 0.9777, "mean_abs_error": 614.0937920461113, "mean_abs_error_last_10": 276.48109033430694, "mean_abs_error_last_25": 317.1893592399557, "mean_abs_error_last_50": 388.4204497390382, "mean_pred_prob": 0.03379342239932157, "mean_pred_prob_last_10": 0.19952831822447478, "mean_pred_prob_last_25": 0.10412923212861643, "mean_pred_prob_last_50": 0.059224029205506665, "mean_token_accuracy": 0.8639059662818909, "step": 12900 }, { "epoch": 0.22949887117131532, "grad_norm": 1.4872477487388784, "learning_rate": 0.0001, "loss": 0.8186, "mean_abs_error": 190.8605637073041, "mean_abs_error_last_10": 58.661840629697586, "mean_abs_error_last_25": 99.8872013948239, "mean_abs_error_last_50": 121.82823972708533, "mean_pred_prob": 0.052542126085609195, "mean_pred_prob_last_10": 0.2705815754830837, "mean_pred_prob_last_25": 0.1490311784669757, "mean_pred_prob_last_50": 0.08959813956171274, "mean_token_accuracy": 0.8772433757781982, "step": 12910 }, { "epoch": 0.22967663946811726, "grad_norm": 1.3807841968078345, "learning_rate": 0.0001, "loss": 0.8586, "mean_abs_error": 267.76522653977264, "mean_abs_error_last_10": 144.07910727112858, "mean_abs_error_last_25": 160.51562656226625, "mean_abs_error_last_50": 213.40988236093435, "mean_pred_prob": 0.04110349863767624, "mean_pred_prob_last_10": 0.215642024949193, "mean_pred_prob_last_25": 0.11583522735163569, "mean_pred_prob_last_50": 0.07000100929290057, "mean_token_accuracy": 0.8637454509735107, "step": 12920 }, { "epoch": 0.2298544077649192, "grad_norm": 2.134960679303704, "learning_rate": 0.0001, "loss": 0.8862, "mean_abs_error": 485.833813464345, "mean_abs_error_last_10": 135.63201524277957, "mean_abs_error_last_25": 236.5334925312427, "mean_abs_error_last_50": 327.9899820230143, "mean_pred_prob": 0.023141980567015708, "mean_pred_prob_last_10": 0.11168429004028439, "mean_pred_prob_last_25": 0.06174513977020979, "mean_pred_prob_last_50": 0.03856946232262999, "mean_token_accuracy": 0.8669802784919739, "step": 12930 }, { "epoch": 0.23003217606172116, "grad_norm": 1.4189722737784511, "learning_rate": 0.0001, "loss": 0.7302, "mean_abs_error": 1189.9398167503232, "mean_abs_error_last_10": 541.5167928134831, "mean_abs_error_last_25": 691.320550780698, "mean_abs_error_last_50": 836.7402976622254, "mean_pred_prob": 0.02942843213386368, "mean_pred_prob_last_10": 0.15254027557966765, "mean_pred_prob_last_25": 0.08385703347157687, "mean_pred_prob_last_50": 0.05064369544852525, "mean_token_accuracy": 0.8897927463054657, "step": 12940 }, { "epoch": 0.2302099443585231, "grad_norm": 0.9616501427997007, "learning_rate": 0.0001, "loss": 0.9352, "mean_abs_error": 1058.4007429214266, "mean_abs_error_last_10": 604.9306238245271, "mean_abs_error_last_25": 674.8848508750611, "mean_abs_error_last_50": 782.8609916316589, "mean_pred_prob": 0.033081129110360055, "mean_pred_prob_last_10": 0.170639137699618, "mean_pred_prob_last_25": 0.09481268976378487, "mean_pred_prob_last_50": 0.056687399113434365, "mean_token_accuracy": 0.863272750377655, "step": 12950 }, { "epoch": 0.23038771265532504, "grad_norm": 1.5371163918300426, "learning_rate": 0.0001, "loss": 0.8533, "mean_abs_error": 209.20494944006796, "mean_abs_error_last_10": 150.83035749576504, "mean_abs_error_last_25": 134.35082529829992, "mean_abs_error_last_50": 151.40892728943362, "mean_pred_prob": 0.04538357933051884, "mean_pred_prob_last_10": 0.22937191277742386, "mean_pred_prob_last_25": 0.12906583100557328, "mean_pred_prob_last_50": 0.07810501605272294, "mean_token_accuracy": 0.8681483089923858, "step": 12960 }, { "epoch": 0.230565480952127, "grad_norm": 0.982702450829024, "learning_rate": 0.0001, "loss": 0.8628, "mean_abs_error": 1070.8130159068905, "mean_abs_error_last_10": 301.4006535613914, "mean_abs_error_last_25": 474.16591215766266, "mean_abs_error_last_50": 656.869120253071, "mean_pred_prob": 0.030019667066517285, "mean_pred_prob_last_10": 0.16039174500619993, "mean_pred_prob_last_25": 0.08580746675725096, "mean_pred_prob_last_50": 0.050651985738659276, "mean_token_accuracy": 0.8684192419052124, "step": 12970 }, { "epoch": 0.23074324924892894, "grad_norm": 3.1166589739608406, "learning_rate": 0.0001, "loss": 0.9933, "mean_abs_error": 736.157603532252, "mean_abs_error_last_10": 177.88894798465503, "mean_abs_error_last_25": 222.29785465386163, "mean_abs_error_last_50": 380.2324217664314, "mean_pred_prob": 0.018930764799006284, "mean_pred_prob_last_10": 0.09222298078238964, "mean_pred_prob_last_25": 0.052086074091494086, "mean_pred_prob_last_50": 0.031990253692492844, "mean_token_accuracy": 0.8672313451766968, "step": 12980 }, { "epoch": 0.23092101754573088, "grad_norm": 1.1100567783626254, "learning_rate": 0.0001, "loss": 0.9355, "mean_abs_error": 173.04128490495503, "mean_abs_error_last_10": 42.097465836540366, "mean_abs_error_last_25": 78.0340410928417, "mean_abs_error_last_50": 99.96073642377469, "mean_pred_prob": 0.056759750470519066, "mean_pred_prob_last_10": 0.27433697897940873, "mean_pred_prob_last_25": 0.15176808973774314, "mean_pred_prob_last_50": 0.09397706119343638, "mean_token_accuracy": 0.8726705193519593, "step": 12990 }, { "epoch": 0.23109878584253285, "grad_norm": 1.5306918982470141, "learning_rate": 0.0001, "loss": 0.966, "mean_abs_error": 807.5603829765088, "mean_abs_error_last_10": 393.5698040742924, "mean_abs_error_last_25": 468.9196312350641, "mean_abs_error_last_50": 573.854713490396, "mean_pred_prob": 0.0279403364402242, "mean_pred_prob_last_10": 0.14172358610667288, "mean_pred_prob_last_25": 0.0798210130771622, "mean_pred_prob_last_50": 0.047331438964465636, "mean_token_accuracy": 0.8708134889602661, "step": 13000 }, { "epoch": 0.2312765541393348, "grad_norm": 2.1326809251171097, "learning_rate": 0.0001, "loss": 0.8711, "mean_abs_error": 437.851182360136, "mean_abs_error_last_10": 95.5434097951525, "mean_abs_error_last_25": 146.2201346159049, "mean_abs_error_last_50": 228.20109235914788, "mean_pred_prob": 0.02426367816515267, "mean_pred_prob_last_10": 0.12717255502939223, "mean_pred_prob_last_25": 0.06779773365706206, "mean_pred_prob_last_50": 0.04110380383208394, "mean_token_accuracy": 0.8738504767417907, "step": 13010 }, { "epoch": 0.23145432243613673, "grad_norm": 2.7715715360273108, "learning_rate": 0.0001, "loss": 0.8888, "mean_abs_error": 1093.9669876417415, "mean_abs_error_last_10": 476.4755070027633, "mean_abs_error_last_25": 563.5685838157159, "mean_abs_error_last_50": 825.1617652582406, "mean_pred_prob": 0.030931538974982686, "mean_pred_prob_last_10": 0.17009370188752654, "mean_pred_prob_last_25": 0.08853885287826416, "mean_pred_prob_last_50": 0.053122699947562066, "mean_token_accuracy": 0.8767788112163544, "step": 13020 }, { "epoch": 0.2316320907329387, "grad_norm": 0.9172790609251642, "learning_rate": 0.0001, "loss": 0.8202, "mean_abs_error": 839.1649374749113, "mean_abs_error_last_10": 270.0380632704123, "mean_abs_error_last_25": 330.20139497430193, "mean_abs_error_last_50": 495.74495128085965, "mean_pred_prob": 0.02928082530852407, "mean_pred_prob_last_10": 0.157870194647694, "mean_pred_prob_last_25": 0.08412915359949694, "mean_pred_prob_last_50": 0.0504174831265118, "mean_token_accuracy": 0.8731795728206635, "step": 13030 }, { "epoch": 0.23180985902974063, "grad_norm": 1.337773298093606, "learning_rate": 0.0001, "loss": 0.904, "mean_abs_error": 1132.254284852728, "mean_abs_error_last_10": 555.764647304914, "mean_abs_error_last_25": 639.8719766054759, "mean_abs_error_last_50": 741.1144580343132, "mean_pred_prob": 0.02604634408489801, "mean_pred_prob_last_10": 0.13941715423134154, "mean_pred_prob_last_25": 0.07443358985474333, "mean_pred_prob_last_50": 0.044928522166446785, "mean_token_accuracy": 0.8720694839954376, "step": 13040 }, { "epoch": 0.23198762732654257, "grad_norm": 1.6837543504269563, "learning_rate": 0.0001, "loss": 0.8863, "mean_abs_error": 187.1403183614061, "mean_abs_error_last_10": 29.895165557390975, "mean_abs_error_last_25": 55.84969544802165, "mean_abs_error_last_50": 87.36299130168683, "mean_pred_prob": 0.04023553398437798, "mean_pred_prob_last_10": 0.2092243805527687, "mean_pred_prob_last_25": 0.11511061824858189, "mean_pred_prob_last_50": 0.06926044085994362, "mean_token_accuracy": 0.880823940038681, "step": 13050 }, { "epoch": 0.23216539562334454, "grad_norm": 1.6631609740283235, "learning_rate": 0.0001, "loss": 0.8443, "mean_abs_error": 424.0233872624234, "mean_abs_error_last_10": 84.06926822903438, "mean_abs_error_last_25": 129.94974206730265, "mean_abs_error_last_50": 222.818871753863, "mean_pred_prob": 0.032360216544475406, "mean_pred_prob_last_10": 0.16114282389171422, "mean_pred_prob_last_25": 0.08829604000784456, "mean_pred_prob_last_50": 0.05414919690228999, "mean_token_accuracy": 0.8840370357036591, "step": 13060 }, { "epoch": 0.23234316392014648, "grad_norm": 1.226529683702483, "learning_rate": 0.0001, "loss": 0.9384, "mean_abs_error": 324.932009676804, "mean_abs_error_last_10": 53.493171715019535, "mean_abs_error_last_25": 99.07036831728792, "mean_abs_error_last_50": 158.8033299367509, "mean_pred_prob": 0.03274258929304778, "mean_pred_prob_last_10": 0.1809906281530857, "mean_pred_prob_last_25": 0.09729040693491697, "mean_pred_prob_last_50": 0.05661243610084057, "mean_token_accuracy": 0.8709251105785369, "step": 13070 }, { "epoch": 0.23252093221694842, "grad_norm": 1.5225568977189858, "learning_rate": 0.0001, "loss": 0.8564, "mean_abs_error": 651.6594821753133, "mean_abs_error_last_10": 173.54444814555998, "mean_abs_error_last_25": 246.30035632614113, "mean_abs_error_last_50": 394.946235155518, "mean_pred_prob": 0.02505810662987642, "mean_pred_prob_last_10": 0.13383902731584385, "mean_pred_prob_last_25": 0.07264792934875004, "mean_pred_prob_last_50": 0.04305872870609164, "mean_token_accuracy": 0.8802487671375274, "step": 13080 }, { "epoch": 0.23269870051375038, "grad_norm": 1.853614701701668, "learning_rate": 0.0001, "loss": 0.8625, "mean_abs_error": 335.22045452205714, "mean_abs_error_last_10": 88.8429362671005, "mean_abs_error_last_25": 141.53128188780923, "mean_abs_error_last_50": 216.27246415205914, "mean_pred_prob": 0.028429813450202346, "mean_pred_prob_last_10": 0.1411144223064184, "mean_pred_prob_last_25": 0.07770005539059639, "mean_pred_prob_last_50": 0.04757055025547743, "mean_token_accuracy": 0.8822384476661682, "step": 13090 }, { "epoch": 0.23287646881055232, "grad_norm": 1.0325966102795783, "learning_rate": 0.0001, "loss": 0.8948, "mean_abs_error": 369.18590484688593, "mean_abs_error_last_10": 82.11217059554097, "mean_abs_error_last_25": 123.37926205237015, "mean_abs_error_last_50": 240.75760677006284, "mean_pred_prob": 0.03568024691194296, "mean_pred_prob_last_10": 0.1796493260189891, "mean_pred_prob_last_25": 0.09983649095520378, "mean_pred_prob_last_50": 0.06099469061009586, "mean_token_accuracy": 0.8725189149379731, "step": 13100 }, { "epoch": 0.23305423710735426, "grad_norm": 0.8939731044335699, "learning_rate": 0.0001, "loss": 0.8312, "mean_abs_error": 466.39779296719036, "mean_abs_error_last_10": 134.96720546396688, "mean_abs_error_last_25": 199.433361860787, "mean_abs_error_last_50": 276.2487655586346, "mean_pred_prob": 0.03508849613717757, "mean_pred_prob_last_10": 0.17884813379496337, "mean_pred_prob_last_25": 0.09912901007337496, "mean_pred_prob_last_50": 0.05946904867887497, "mean_token_accuracy": 0.8740716397762298, "step": 13110 }, { "epoch": 0.23323200540415623, "grad_norm": 1.1297430312691434, "learning_rate": 0.0001, "loss": 0.8486, "mean_abs_error": 318.39829677904294, "mean_abs_error_last_10": 72.08842564570924, "mean_abs_error_last_25": 97.10159440738883, "mean_abs_error_last_50": 153.9384870372559, "mean_pred_prob": 0.026278402376919984, "mean_pred_prob_last_10": 0.14295539669692517, "mean_pred_prob_last_25": 0.0761263707652688, "mean_pred_prob_last_50": 0.04558262676000595, "mean_token_accuracy": 0.8831967711448669, "step": 13120 }, { "epoch": 0.23340977370095817, "grad_norm": 1.7639539672967701, "learning_rate": 0.0001, "loss": 0.8862, "mean_abs_error": 610.7878074483759, "mean_abs_error_last_10": 106.58639679942944, "mean_abs_error_last_25": 176.6496900933532, "mean_abs_error_last_50": 320.12103984580517, "mean_pred_prob": 0.03295235112309456, "mean_pred_prob_last_10": 0.17471484843408688, "mean_pred_prob_last_25": 0.09157452925574035, "mean_pred_prob_last_50": 0.055363617627881466, "mean_token_accuracy": 0.8869170248508453, "step": 13130 }, { "epoch": 0.2335875419977601, "grad_norm": 1.786771542365998, "learning_rate": 0.0001, "loss": 0.8362, "mean_abs_error": 1255.2443840168146, "mean_abs_error_last_10": 954.8254667301031, "mean_abs_error_last_25": 943.79602150629, "mean_abs_error_last_50": 1006.1382384289143, "mean_pred_prob": 0.025627765691024252, "mean_pred_prob_last_10": 0.1300692529490334, "mean_pred_prob_last_25": 0.07110792469175067, "mean_pred_prob_last_50": 0.043300608835124876, "mean_token_accuracy": 0.8835823476314545, "step": 13140 }, { "epoch": 0.23376531029456207, "grad_norm": 1.4460053334503524, "learning_rate": 0.0001, "loss": 0.9729, "mean_abs_error": 219.53050971081075, "mean_abs_error_last_10": 133.60243520809723, "mean_abs_error_last_25": 122.92142666898039, "mean_abs_error_last_50": 144.31991383080614, "mean_pred_prob": 0.048838916490785776, "mean_pred_prob_last_10": 0.23656492214649916, "mean_pred_prob_last_25": 0.13382806107401848, "mean_pred_prob_last_50": 0.08315326948650181, "mean_token_accuracy": 0.8609884977340698, "step": 13150 }, { "epoch": 0.233943078591364, "grad_norm": 1.0180575094070228, "learning_rate": 0.0001, "loss": 0.89, "mean_abs_error": 262.89194875669773, "mean_abs_error_last_10": 72.30950984657706, "mean_abs_error_last_25": 135.39924838069442, "mean_abs_error_last_50": 177.43689430905263, "mean_pred_prob": 0.03648360799998045, "mean_pred_prob_last_10": 0.1853721708059311, "mean_pred_prob_last_25": 0.10449043586850167, "mean_pred_prob_last_50": 0.06300211977213621, "mean_token_accuracy": 0.8736089527606964, "step": 13160 }, { "epoch": 0.23412084688816595, "grad_norm": 2.2320355581746414, "learning_rate": 0.0001, "loss": 0.8173, "mean_abs_error": 612.4909072113362, "mean_abs_error_last_10": 89.28377072535767, "mean_abs_error_last_25": 173.42342617835408, "mean_abs_error_last_50": 277.6854917514419, "mean_pred_prob": 0.04138740417547524, "mean_pred_prob_last_10": 0.19003064362332225, "mean_pred_prob_last_25": 0.10878967754542827, "mean_pred_prob_last_50": 0.06862624821951613, "mean_token_accuracy": 0.8707089960575104, "step": 13170 }, { "epoch": 0.23429861518496792, "grad_norm": 1.169407697690198, "learning_rate": 0.0001, "loss": 0.9522, "mean_abs_error": 353.92417444157456, "mean_abs_error_last_10": 242.82227835439397, "mean_abs_error_last_25": 259.5392463795557, "mean_abs_error_last_50": 256.7859207074843, "mean_pred_prob": 0.032859944039955735, "mean_pred_prob_last_10": 0.1692655708640814, "mean_pred_prob_last_25": 0.09012198448181152, "mean_pred_prob_last_50": 0.055207021068781614, "mean_token_accuracy": 0.8601599991321563, "step": 13180 }, { "epoch": 0.23447638348176986, "grad_norm": 1.4863625380940941, "learning_rate": 0.0001, "loss": 0.8547, "mean_abs_error": 186.91175824291207, "mean_abs_error_last_10": 25.045179671559172, "mean_abs_error_last_25": 55.884468892238374, "mean_abs_error_last_50": 93.09868898614658, "mean_pred_prob": 0.04847258660010993, "mean_pred_prob_last_10": 0.24243725277483463, "mean_pred_prob_last_25": 0.1366581406444311, "mean_pred_prob_last_50": 0.08379546646028757, "mean_token_accuracy": 0.8800470471382141, "step": 13190 }, { "epoch": 0.2346541517785718, "grad_norm": 1.391464261324021, "learning_rate": 0.0001, "loss": 0.9099, "mean_abs_error": 278.3742333503978, "mean_abs_error_last_10": 127.5380057564153, "mean_abs_error_last_25": 133.3143378409158, "mean_abs_error_last_50": 166.84416084556327, "mean_pred_prob": 0.03510820469819009, "mean_pred_prob_last_10": 0.1986284950748086, "mean_pred_prob_last_25": 0.10290555227547885, "mean_pred_prob_last_50": 0.0606811850797385, "mean_token_accuracy": 0.8746282756328583, "step": 13200 }, { "epoch": 0.23483192007537376, "grad_norm": 1.2326115912653124, "learning_rate": 0.0001, "loss": 0.9511, "mean_abs_error": 1377.2408790346694, "mean_abs_error_last_10": 685.7570213857687, "mean_abs_error_last_25": 837.6595476714641, "mean_abs_error_last_50": 981.2746437892927, "mean_pred_prob": 0.025914233972434884, "mean_pred_prob_last_10": 0.11426518251537346, "mean_pred_prob_last_25": 0.06688113119162153, "mean_pred_prob_last_50": 0.04280973607674241, "mean_token_accuracy": 0.8695816695690155, "step": 13210 }, { "epoch": 0.2350096883721757, "grad_norm": 1.0430341828983813, "learning_rate": 0.0001, "loss": 0.8706, "mean_abs_error": 614.0330663466564, "mean_abs_error_last_10": 232.4808107421411, "mean_abs_error_last_25": 324.82991028159296, "mean_abs_error_last_50": 415.2912564607095, "mean_pred_prob": 0.046679301443509756, "mean_pred_prob_last_10": 0.23228055767831393, "mean_pred_prob_last_25": 0.13130278973840176, "mean_pred_prob_last_50": 0.07964127327431925, "mean_token_accuracy": 0.8716065347194671, "step": 13220 }, { "epoch": 0.23518745666897767, "grad_norm": 2.4989485463793395, "learning_rate": 0.0001, "loss": 0.8386, "mean_abs_error": 852.6784116003635, "mean_abs_error_last_10": 354.31601970068084, "mean_abs_error_last_25": 430.61730822598304, "mean_abs_error_last_50": 571.0850359683487, "mean_pred_prob": 0.0277238530368777, "mean_pred_prob_last_10": 0.142797398485709, "mean_pred_prob_last_25": 0.07902250529150479, "mean_pred_prob_last_50": 0.047854135822854006, "mean_token_accuracy": 0.871122145652771, "step": 13230 }, { "epoch": 0.2353652249657796, "grad_norm": 1.0992503707027717, "learning_rate": 0.0001, "loss": 0.8234, "mean_abs_error": 429.38540207478025, "mean_abs_error_last_10": 92.4866872222171, "mean_abs_error_last_25": 146.11553518168552, "mean_abs_error_last_50": 263.6491810122236, "mean_pred_prob": 0.02998469434096478, "mean_pred_prob_last_10": 0.15889461748301983, "mean_pred_prob_last_25": 0.08512855239678174, "mean_pred_prob_last_50": 0.05087272456148639, "mean_token_accuracy": 0.8761606991291047, "step": 13240 }, { "epoch": 0.23554299326258155, "grad_norm": 3.5045116726840893, "learning_rate": 0.0001, "loss": 0.8376, "mean_abs_error": 158.72463987939312, "mean_abs_error_last_10": 38.642260194392286, "mean_abs_error_last_25": 62.85423879233923, "mean_abs_error_last_50": 92.60009747834222, "mean_pred_prob": 0.05237983246333897, "mean_pred_prob_last_10": 0.24127778597176075, "mean_pred_prob_last_25": 0.13908224646002054, "mean_pred_prob_last_50": 0.08702379390597344, "mean_token_accuracy": 0.8772764205932617, "step": 13250 }, { "epoch": 0.2357207615593835, "grad_norm": 1.4864293174207548, "learning_rate": 0.0001, "loss": 0.9537, "mean_abs_error": 782.0677908618593, "mean_abs_error_last_10": 357.45931780823787, "mean_abs_error_last_25": 457.84031323589977, "mean_abs_error_last_50": 556.1067564762559, "mean_pred_prob": 0.035758529894519595, "mean_pred_prob_last_10": 0.1637123336608056, "mean_pred_prob_last_25": 0.09437859341269358, "mean_pred_prob_last_50": 0.059767898192512804, "mean_token_accuracy": 0.8700472056865692, "step": 13260 }, { "epoch": 0.23589852985618545, "grad_norm": 1.0588063234962013, "learning_rate": 0.0001, "loss": 0.9058, "mean_abs_error": 677.6305080676415, "mean_abs_error_last_10": 356.32605985953325, "mean_abs_error_last_25": 429.26751012406487, "mean_abs_error_last_50": 538.1172277938787, "mean_pred_prob": 0.045682459541421847, "mean_pred_prob_last_10": 0.2280811046657618, "mean_pred_prob_last_25": 0.12629305899026805, "mean_pred_prob_last_50": 0.07705212024447974, "mean_token_accuracy": 0.8749590277671814, "step": 13270 }, { "epoch": 0.2360762981529874, "grad_norm": 2.6225112881340613, "learning_rate": 0.0001, "loss": 0.9207, "mean_abs_error": 490.30703955684476, "mean_abs_error_last_10": 298.71910220020675, "mean_abs_error_last_25": 311.5251072998945, "mean_abs_error_last_50": 363.4965870032801, "mean_pred_prob": 0.029034850746393205, "mean_pred_prob_last_10": 0.15017364639788866, "mean_pred_prob_last_25": 0.08159833746030927, "mean_pred_prob_last_50": 0.049331640172749755, "mean_token_accuracy": 0.8751125276088715, "step": 13280 }, { "epoch": 0.23625406644978936, "grad_norm": 0.8809482436231312, "learning_rate": 0.0001, "loss": 0.784, "mean_abs_error": 280.2484743213598, "mean_abs_error_last_10": 116.60471581282354, "mean_abs_error_last_25": 148.38942913973295, "mean_abs_error_last_50": 164.70034942939964, "mean_pred_prob": 0.05352730276063085, "mean_pred_prob_last_10": 0.23790479311719537, "mean_pred_prob_last_25": 0.1414451017975807, "mean_pred_prob_last_50": 0.08852216461673379, "mean_token_accuracy": 0.8764346837997437, "step": 13290 }, { "epoch": 0.2364318347465913, "grad_norm": 0.828463162700113, "learning_rate": 0.0001, "loss": 0.9182, "mean_abs_error": 712.8865703228487, "mean_abs_error_last_10": 374.102984333672, "mean_abs_error_last_25": 523.559631384456, "mean_abs_error_last_50": 531.3696460597499, "mean_pred_prob": 0.03162047461955808, "mean_pred_prob_last_10": 0.16128048647660762, "mean_pred_prob_last_25": 0.08734018462710083, "mean_pred_prob_last_50": 0.05323060335358605, "mean_token_accuracy": 0.8632344722747802, "step": 13300 }, { "epoch": 0.23660960304339324, "grad_norm": 1.2027333929555453, "learning_rate": 0.0001, "loss": 0.8536, "mean_abs_error": 629.7347468946185, "mean_abs_error_last_10": 354.0167827883638, "mean_abs_error_last_25": 429.8931905979372, "mean_abs_error_last_50": 498.5129678064276, "mean_pred_prob": 0.05030517922132276, "mean_pred_prob_last_10": 0.23920304241473786, "mean_pred_prob_last_25": 0.1386948786006542, "mean_pred_prob_last_50": 0.08536534291633871, "mean_token_accuracy": 0.8668614327907562, "step": 13310 }, { "epoch": 0.2367873713401952, "grad_norm": 1.7895544813103592, "learning_rate": 0.0001, "loss": 0.9132, "mean_abs_error": 639.7890852890661, "mean_abs_error_last_10": 183.84816936831027, "mean_abs_error_last_25": 272.99418246480843, "mean_abs_error_last_50": 404.4666943997982, "mean_pred_prob": 0.03549154635402374, "mean_pred_prob_last_10": 0.15751946098171174, "mean_pred_prob_last_25": 0.09372896165004932, "mean_pred_prob_last_50": 0.058236009045504036, "mean_token_accuracy": 0.8628230035305023, "step": 13320 }, { "epoch": 0.23696513963699714, "grad_norm": 0.9701535074089195, "learning_rate": 0.0001, "loss": 0.8172, "mean_abs_error": 557.9470505677559, "mean_abs_error_last_10": 262.59571634856496, "mean_abs_error_last_25": 289.6230142754435, "mean_abs_error_last_50": 414.1822807473699, "mean_pred_prob": 0.031212024204432965, "mean_pred_prob_last_10": 0.1669879298657179, "mean_pred_prob_last_25": 0.08920867284759879, "mean_pred_prob_last_50": 0.05351186888292432, "mean_token_accuracy": 0.8749695658683777, "step": 13330 }, { "epoch": 0.23714290793379908, "grad_norm": 2.178434830948322, "learning_rate": 0.0001, "loss": 0.9137, "mean_abs_error": 581.1008416706215, "mean_abs_error_last_10": 280.43883639737396, "mean_abs_error_last_25": 280.8703888123726, "mean_abs_error_last_50": 347.0766125481799, "mean_pred_prob": 0.034210804232861845, "mean_pred_prob_last_10": 0.16728310161270202, "mean_pred_prob_last_25": 0.09470381126739084, "mean_pred_prob_last_50": 0.05864639269420877, "mean_token_accuracy": 0.8580438733100891, "step": 13340 }, { "epoch": 0.23732067623060105, "grad_norm": 1.0084673855354658, "learning_rate": 0.0001, "loss": 0.8764, "mean_abs_error": 326.34385230596854, "mean_abs_error_last_10": 88.50404295819195, "mean_abs_error_last_25": 111.79515029780069, "mean_abs_error_last_50": 197.5958806173748, "mean_pred_prob": 0.03552676266990602, "mean_pred_prob_last_10": 0.18157072160393, "mean_pred_prob_last_25": 0.10204092981293797, "mean_pred_prob_last_50": 0.06061811530962587, "mean_token_accuracy": 0.871791535615921, "step": 13350 }, { "epoch": 0.23749844452740299, "grad_norm": 1.688705776584234, "learning_rate": 0.0001, "loss": 0.865, "mean_abs_error": 96.84696035694326, "mean_abs_error_last_10": 34.414810561158205, "mean_abs_error_last_25": 61.14078000095369, "mean_abs_error_last_50": 79.16999975064981, "mean_pred_prob": 0.048570492677390574, "mean_pred_prob_last_10": 0.24362876266241074, "mean_pred_prob_last_25": 0.13190627992153167, "mean_pred_prob_last_50": 0.0807470627129078, "mean_token_accuracy": 0.8723683595657349, "step": 13360 }, { "epoch": 0.23767621282420492, "grad_norm": 2.1375487563580577, "learning_rate": 0.0001, "loss": 0.9463, "mean_abs_error": 536.9448019095074, "mean_abs_error_last_10": 394.89067157581775, "mean_abs_error_last_25": 509.1060567713509, "mean_abs_error_last_50": 510.6848333802166, "mean_pred_prob": 0.02608198719099164, "mean_pred_prob_last_10": 0.13869107803329825, "mean_pred_prob_last_25": 0.07536662840284407, "mean_pred_prob_last_50": 0.044713138090446594, "mean_token_accuracy": 0.8693016350269318, "step": 13370 }, { "epoch": 0.2378539811210069, "grad_norm": 2.8506061528681745, "learning_rate": 0.0001, "loss": 1.0003, "mean_abs_error": 144.6492886331708, "mean_abs_error_last_10": 25.935405426937525, "mean_abs_error_last_25": 69.32516373686569, "mean_abs_error_last_50": 89.94897841945033, "mean_pred_prob": 0.05572262005880475, "mean_pred_prob_last_10": 0.2732754100114107, "mean_pred_prob_last_25": 0.15366836301982403, "mean_pred_prob_last_50": 0.09407096402719617, "mean_token_accuracy": 0.8687396585941315, "step": 13380 }, { "epoch": 0.23803174941780883, "grad_norm": 1.0270524913863723, "learning_rate": 0.0001, "loss": 0.8331, "mean_abs_error": 261.7927445483643, "mean_abs_error_last_10": 184.27364118657076, "mean_abs_error_last_25": 157.01583684437153, "mean_abs_error_last_50": 192.84136363438435, "mean_pred_prob": 0.055245566566009076, "mean_pred_prob_last_10": 0.25118911121971904, "mean_pred_prob_last_25": 0.1456696685985662, "mean_pred_prob_last_50": 0.09113743336638436, "mean_token_accuracy": 0.875937032699585, "step": 13390 }, { "epoch": 0.23820951771461077, "grad_norm": 1.2029839225995693, "learning_rate": 0.0001, "loss": 1.0164, "mean_abs_error": 647.7831745894157, "mean_abs_error_last_10": 378.7198445026733, "mean_abs_error_last_25": 389.06229254021594, "mean_abs_error_last_50": 425.56512416606546, "mean_pred_prob": 0.041558121173875404, "mean_pred_prob_last_10": 0.1895099639077671, "mean_pred_prob_last_25": 0.10697676448035054, "mean_pred_prob_last_50": 0.0682351837225724, "mean_token_accuracy": 0.8712934672832489, "step": 13400 }, { "epoch": 0.23838728601141274, "grad_norm": 0.8661858819482772, "learning_rate": 0.0001, "loss": 0.8466, "mean_abs_error": 861.671534488444, "mean_abs_error_last_10": 283.96291943856784, "mean_abs_error_last_25": 308.86317019526814, "mean_abs_error_last_50": 437.64347546518536, "mean_pred_prob": 0.019025505316676572, "mean_pred_prob_last_10": 0.1076913007418625, "mean_pred_prob_last_25": 0.05697904010303319, "mean_pred_prob_last_50": 0.03344925650162622, "mean_token_accuracy": 0.8616928339004517, "step": 13410 }, { "epoch": 0.23856505430821467, "grad_norm": 1.229852871543987, "learning_rate": 0.0001, "loss": 0.9177, "mean_abs_error": 879.1241039293906, "mean_abs_error_last_10": 310.64217379776113, "mean_abs_error_last_25": 365.4830372322899, "mean_abs_error_last_50": 470.03882569578656, "mean_pred_prob": 0.021322181244613603, "mean_pred_prob_last_10": 0.10912750400602818, "mean_pred_prob_last_25": 0.05957246976904571, "mean_pred_prob_last_50": 0.036299089167732744, "mean_token_accuracy": 0.8595185101032257, "step": 13420 }, { "epoch": 0.2387428226050166, "grad_norm": 1.2783395285597838, "learning_rate": 0.0001, "loss": 0.8591, "mean_abs_error": 479.8100914942558, "mean_abs_error_last_10": 80.23309503611038, "mean_abs_error_last_25": 157.49503957921212, "mean_abs_error_last_50": 246.26188258906973, "mean_pred_prob": 0.038456378900445995, "mean_pred_prob_last_10": 0.18274849001318216, "mean_pred_prob_last_25": 0.1030035969801247, "mean_pred_prob_last_50": 0.0643857357557863, "mean_token_accuracy": 0.8761913001537323, "step": 13430 }, { "epoch": 0.23892059090181858, "grad_norm": 1.056493931349728, "learning_rate": 0.0001, "loss": 0.904, "mean_abs_error": 188.02512266346463, "mean_abs_error_last_10": 52.21105170914846, "mean_abs_error_last_25": 64.1061622206273, "mean_abs_error_last_50": 111.92898225385997, "mean_pred_prob": 0.04910553307272494, "mean_pred_prob_last_10": 0.21710613630712033, "mean_pred_prob_last_25": 0.13095073476433755, "mean_pred_prob_last_50": 0.08242821116000414, "mean_token_accuracy": 0.873736995458603, "step": 13440 }, { "epoch": 0.23909835919862052, "grad_norm": 1.6891169746226105, "learning_rate": 0.0001, "loss": 0.8716, "mean_abs_error": 428.2548165866876, "mean_abs_error_last_10": 173.58178763688085, "mean_abs_error_last_25": 256.4856559120896, "mean_abs_error_last_50": 333.7586168611084, "mean_pred_prob": 0.03721034048940055, "mean_pred_prob_last_10": 0.19008737964322792, "mean_pred_prob_last_25": 0.10473605099832639, "mean_pred_prob_last_50": 0.06386824890505523, "mean_token_accuracy": 0.8705176770687103, "step": 13450 }, { "epoch": 0.23927612749542246, "grad_norm": 1.410317447949008, "learning_rate": 0.0001, "loss": 0.7872, "mean_abs_error": 170.19518070739272, "mean_abs_error_last_10": 25.708781315782197, "mean_abs_error_last_25": 66.16453783242969, "mean_abs_error_last_50": 97.44720242725462, "mean_pred_prob": 0.04696113532409072, "mean_pred_prob_last_10": 0.23632124923169612, "mean_pred_prob_last_25": 0.13097013104707003, "mean_pred_prob_last_50": 0.07855078624561429, "mean_token_accuracy": 0.8787395179271698, "step": 13460 }, { "epoch": 0.23945389579222442, "grad_norm": 2.3028729939926698, "learning_rate": 0.0001, "loss": 0.9823, "mean_abs_error": 361.87948033041823, "mean_abs_error_last_10": 195.1760779354212, "mean_abs_error_last_25": 253.58652034529223, "mean_abs_error_last_50": 310.80268526961015, "mean_pred_prob": 0.032082735444419086, "mean_pred_prob_last_10": 0.17349672894924878, "mean_pred_prob_last_25": 0.09266500491648913, "mean_pred_prob_last_50": 0.05537531604059041, "mean_token_accuracy": 0.863130921125412, "step": 13470 }, { "epoch": 0.23963166408902636, "grad_norm": 1.8263584982781325, "learning_rate": 0.0001, "loss": 0.8871, "mean_abs_error": 1184.9676534095056, "mean_abs_error_last_10": 541.389613122612, "mean_abs_error_last_25": 685.330505378864, "mean_abs_error_last_50": 895.6069526400652, "mean_pred_prob": 0.033028492816083596, "mean_pred_prob_last_10": 0.14901407967554406, "mean_pred_prob_last_25": 0.08727439672802575, "mean_pred_prob_last_50": 0.054874196446326094, "mean_token_accuracy": 0.8719025433063508, "step": 13480 }, { "epoch": 0.2398094323858283, "grad_norm": 1.726969013597408, "learning_rate": 0.0001, "loss": 0.891, "mean_abs_error": 384.10494757235267, "mean_abs_error_last_10": 118.32486647646056, "mean_abs_error_last_25": 143.7774131790646, "mean_abs_error_last_50": 206.72455023313515, "mean_pred_prob": 0.031351924198679625, "mean_pred_prob_last_10": 0.1668539757374674, "mean_pred_prob_last_25": 0.0895816890290007, "mean_pred_prob_last_50": 0.05298302584560588, "mean_token_accuracy": 0.8810888409614563, "step": 13490 }, { "epoch": 0.23998720068263027, "grad_norm": 0.7313646049651804, "learning_rate": 0.0001, "loss": 0.9058, "mean_abs_error": 1064.2182051682735, "mean_abs_error_last_10": 405.68477070005014, "mean_abs_error_last_25": 509.68577970998365, "mean_abs_error_last_50": 667.4416069380557, "mean_pred_prob": 0.04886217432358535, "mean_pred_prob_last_10": 0.22932409201748669, "mean_pred_prob_last_25": 0.13274547012988477, "mean_pred_prob_last_50": 0.08130243866762613, "mean_token_accuracy": 0.860860413312912, "step": 13500 }, { "epoch": 0.2401649689794322, "grad_norm": 1.2257622418755845, "learning_rate": 0.0001, "loss": 0.8506, "mean_abs_error": 926.9675879388242, "mean_abs_error_last_10": 493.6896908322835, "mean_abs_error_last_25": 552.6135140835798, "mean_abs_error_last_50": 704.804600103547, "mean_pred_prob": 0.028658846576581708, "mean_pred_prob_last_10": 0.15192273885477334, "mean_pred_prob_last_25": 0.08267264549212996, "mean_pred_prob_last_50": 0.04922321676276624, "mean_token_accuracy": 0.8851876258850098, "step": 13510 }, { "epoch": 0.24034273727623415, "grad_norm": 1.0214227256527748, "learning_rate": 0.0001, "loss": 0.8548, "mean_abs_error": 937.7178798613038, "mean_abs_error_last_10": 559.6276615069442, "mean_abs_error_last_25": 566.9206645723132, "mean_abs_error_last_50": 734.3223068287136, "mean_pred_prob": 0.032766517071286215, "mean_pred_prob_last_10": 0.17741939991246908, "mean_pred_prob_last_25": 0.09658172123308759, "mean_pred_prob_last_50": 0.05691847904381575, "mean_token_accuracy": 0.8807061076164245, "step": 13520 }, { "epoch": 0.24052050557303611, "grad_norm": 1.3664263419233789, "learning_rate": 0.0001, "loss": 0.8575, "mean_abs_error": 660.7913578820652, "mean_abs_error_last_10": 234.0302551817907, "mean_abs_error_last_25": 401.97014094505624, "mean_abs_error_last_50": 485.0155108303835, "mean_pred_prob": 0.06129711684188806, "mean_pred_prob_last_10": 0.270861013897229, "mean_pred_prob_last_25": 0.15954498735954986, "mean_pred_prob_last_50": 0.10130224882741459, "mean_token_accuracy": 0.8746324062347413, "step": 13530 }, { "epoch": 0.24069827386983805, "grad_norm": 1.0275188853602253, "learning_rate": 0.0001, "loss": 0.9341, "mean_abs_error": 853.0088502361425, "mean_abs_error_last_10": 359.97831606002035, "mean_abs_error_last_25": 535.9360316227077, "mean_abs_error_last_50": 658.4520465351671, "mean_pred_prob": 0.019036355512798763, "mean_pred_prob_last_10": 0.11377553021302447, "mean_pred_prob_last_25": 0.05791405207710341, "mean_pred_prob_last_50": 0.03357875845395029, "mean_token_accuracy": 0.8707859933376312, "step": 13540 }, { "epoch": 0.24087604216664, "grad_norm": 1.3883921481187889, "learning_rate": 0.0001, "loss": 0.8194, "mean_abs_error": 179.14832152242545, "mean_abs_error_last_10": 34.93483963194312, "mean_abs_error_last_25": 56.06917657782607, "mean_abs_error_last_50": 90.14790694445063, "mean_pred_prob": 0.04083855566568673, "mean_pred_prob_last_10": 0.21352566219866276, "mean_pred_prob_last_25": 0.11737004648894071, "mean_pred_prob_last_50": 0.06996240094304085, "mean_token_accuracy": 0.8761168956756592, "step": 13550 }, { "epoch": 0.24105381046344196, "grad_norm": 2.091501457905372, "learning_rate": 0.0001, "loss": 0.8636, "mean_abs_error": 418.06003435171687, "mean_abs_error_last_10": 69.78484665859237, "mean_abs_error_last_25": 125.28878088152797, "mean_abs_error_last_50": 239.60538148861733, "mean_pred_prob": 0.047172444820171223, "mean_pred_prob_last_10": 0.24742152406834067, "mean_pred_prob_last_25": 0.13543324244674296, "mean_pred_prob_last_50": 0.08124535115202888, "mean_token_accuracy": 0.8750670313835144, "step": 13560 }, { "epoch": 0.2412315787602439, "grad_norm": 1.2705041900366962, "learning_rate": 0.0001, "loss": 0.8796, "mean_abs_error": 87.05422398571898, "mean_abs_error_last_10": 20.42175073452777, "mean_abs_error_last_25": 27.062055950297555, "mean_abs_error_last_50": 44.754755948596575, "mean_pred_prob": 0.04425876662135124, "mean_pred_prob_last_10": 0.22930484339594842, "mean_pred_prob_last_25": 0.12795196250081062, "mean_pred_prob_last_50": 0.07623090744018554, "mean_token_accuracy": 0.874656742811203, "step": 13570 }, { "epoch": 0.24140934705704584, "grad_norm": 1.3428063552797405, "learning_rate": 0.0001, "loss": 0.8605, "mean_abs_error": 948.6460473773438, "mean_abs_error_last_10": 523.637044217942, "mean_abs_error_last_25": 584.1757580975388, "mean_abs_error_last_50": 666.1685740061517, "mean_pred_prob": 0.034419074036122765, "mean_pred_prob_last_10": 0.18498623069317546, "mean_pred_prob_last_25": 0.09941397751972544, "mean_pred_prob_last_50": 0.05949551457597409, "mean_token_accuracy": 0.8751284122467041, "step": 13580 }, { "epoch": 0.2415871153538478, "grad_norm": 1.978567619841908, "learning_rate": 0.0001, "loss": 0.8866, "mean_abs_error": 366.6349325281637, "mean_abs_error_last_10": 138.8586806821488, "mean_abs_error_last_25": 171.1200419722753, "mean_abs_error_last_50": 226.95223049890137, "mean_pred_prob": 0.04118329188786447, "mean_pred_prob_last_10": 0.20216532610356808, "mean_pred_prob_last_25": 0.11162155186757446, "mean_pred_prob_last_50": 0.06904486492276192, "mean_token_accuracy": 0.8737736165523529, "step": 13590 }, { "epoch": 0.24176488365064974, "grad_norm": 1.2275975688256584, "learning_rate": 0.0001, "loss": 0.8225, "mean_abs_error": 194.1563939934685, "mean_abs_error_last_10": 30.751148232784878, "mean_abs_error_last_25": 63.033922813660105, "mean_abs_error_last_50": 93.60581710801881, "mean_pred_prob": 0.04862502692267299, "mean_pred_prob_last_10": 0.24664140194654466, "mean_pred_prob_last_25": 0.13888680273666978, "mean_pred_prob_last_50": 0.08342278553172947, "mean_token_accuracy": 0.863960736989975, "step": 13600 }, { "epoch": 0.24194265194745168, "grad_norm": 1.2152134760149806, "learning_rate": 0.0001, "loss": 0.9054, "mean_abs_error": 1164.0413242316902, "mean_abs_error_last_10": 672.6118800186225, "mean_abs_error_last_25": 756.9696331688117, "mean_abs_error_last_50": 971.4602086505818, "mean_pred_prob": 0.02901528420334216, "mean_pred_prob_last_10": 0.14693846096051857, "mean_pred_prob_last_25": 0.08181086448021233, "mean_pred_prob_last_50": 0.049224217468872664, "mean_token_accuracy": 0.8730914831161499, "step": 13610 }, { "epoch": 0.24212042024425365, "grad_norm": 1.0664521725162828, "learning_rate": 0.0001, "loss": 0.9117, "mean_abs_error": 639.4301544302863, "mean_abs_error_last_10": 286.6832574364676, "mean_abs_error_last_25": 348.46847005162664, "mean_abs_error_last_50": 441.67794309927814, "mean_pred_prob": 0.03813975555240177, "mean_pred_prob_last_10": 0.18666208180366084, "mean_pred_prob_last_25": 0.10429898248403333, "mean_pred_prob_last_50": 0.06382503802888095, "mean_token_accuracy": 0.8650489151477814, "step": 13620 }, { "epoch": 0.2422981885410556, "grad_norm": 0.9141966929275213, "learning_rate": 0.0001, "loss": 0.8519, "mean_abs_error": 348.27395085022516, "mean_abs_error_last_10": 169.0913594941862, "mean_abs_error_last_25": 220.0723286236085, "mean_abs_error_last_50": 230.60092655611737, "mean_pred_prob": 0.037643374875187874, "mean_pred_prob_last_10": 0.1945824976079166, "mean_pred_prob_last_25": 0.1073744282592088, "mean_pred_prob_last_50": 0.06459321430884302, "mean_token_accuracy": 0.8746932446956635, "step": 13630 }, { "epoch": 0.24247595683785753, "grad_norm": 1.5247886979007705, "learning_rate": 0.0001, "loss": 0.841, "mean_abs_error": 1315.8780743342063, "mean_abs_error_last_10": 623.3236705229499, "mean_abs_error_last_25": 730.9195269283994, "mean_abs_error_last_50": 918.648038398908, "mean_pred_prob": 0.023289032392494847, "mean_pred_prob_last_10": 0.1243266366480384, "mean_pred_prob_last_25": 0.06532691275933758, "mean_pred_prob_last_50": 0.03925586613913765, "mean_token_accuracy": 0.8716314673423767, "step": 13640 }, { "epoch": 0.2426537251346595, "grad_norm": 2.6523282078867774, "learning_rate": 0.0001, "loss": 0.8254, "mean_abs_error": 290.1731969878975, "mean_abs_error_last_10": 84.89625780350913, "mean_abs_error_last_25": 139.19579401902837, "mean_abs_error_last_50": 188.1206149735256, "mean_pred_prob": 0.04649315611459315, "mean_pred_prob_last_10": 0.21616340465843678, "mean_pred_prob_last_25": 0.12157043563202023, "mean_pred_prob_last_50": 0.07692138715647161, "mean_token_accuracy": 0.8749559044837951, "step": 13650 }, { "epoch": 0.24283149343146143, "grad_norm": 0.923441072908688, "learning_rate": 0.0001, "loss": 0.9657, "mean_abs_error": 502.6792720166535, "mean_abs_error_last_10": 145.36422829057176, "mean_abs_error_last_25": 244.05808722625596, "mean_abs_error_last_50": 289.99710974892344, "mean_pred_prob": 0.03672494513448328, "mean_pred_prob_last_10": 0.20262352991849183, "mean_pred_prob_last_25": 0.10595645001158119, "mean_pred_prob_last_50": 0.06316521423868834, "mean_token_accuracy": 0.867387568950653, "step": 13660 }, { "epoch": 0.24300926172826337, "grad_norm": 2.4192318053859276, "learning_rate": 0.0001, "loss": 0.8577, "mean_abs_error": 513.0264223965105, "mean_abs_error_last_10": 260.53740041221533, "mean_abs_error_last_25": 269.977720549408, "mean_abs_error_last_50": 314.59612492081544, "mean_pred_prob": 0.03658332356717438, "mean_pred_prob_last_10": 0.18399215342942626, "mean_pred_prob_last_25": 0.1037768233451061, "mean_pred_prob_last_50": 0.06232571598375216, "mean_token_accuracy": 0.8771336793899536, "step": 13670 }, { "epoch": 0.24318703002506534, "grad_norm": 2.1293192986372, "learning_rate": 0.0001, "loss": 0.82, "mean_abs_error": 353.0638838643419, "mean_abs_error_last_10": 77.55826788199174, "mean_abs_error_last_25": 142.3707325185835, "mean_abs_error_last_50": 261.6795699961699, "mean_pred_prob": 0.02777929133735597, "mean_pred_prob_last_10": 0.15592887718230486, "mean_pred_prob_last_25": 0.08198169711977243, "mean_pred_prob_last_50": 0.04808244719170034, "mean_token_accuracy": 0.8824773252010345, "step": 13680 }, { "epoch": 0.24336479832186728, "grad_norm": 2.3610164541862737, "learning_rate": 0.0001, "loss": 0.8219, "mean_abs_error": 619.2471095645791, "mean_abs_error_last_10": 274.4963541653167, "mean_abs_error_last_25": 327.05963230688155, "mean_abs_error_last_50": 407.7465008412354, "mean_pred_prob": 0.040385385230183604, "mean_pred_prob_last_10": 0.18153109411359764, "mean_pred_prob_last_25": 0.10695525652263313, "mean_pred_prob_last_50": 0.06706993155530654, "mean_token_accuracy": 0.8681183397769928, "step": 13690 }, { "epoch": 0.24354256661866922, "grad_norm": 1.0581613012887028, "learning_rate": 0.0001, "loss": 0.8853, "mean_abs_error": 375.1196070982463, "mean_abs_error_last_10": 46.340660507095514, "mean_abs_error_last_25": 74.83343023648999, "mean_abs_error_last_50": 165.89534495277445, "mean_pred_prob": 0.037171464692801234, "mean_pred_prob_last_10": 0.17617277726531028, "mean_pred_prob_last_25": 0.10261079650372266, "mean_pred_prob_last_50": 0.06290308944880962, "mean_token_accuracy": 0.8797361552715302, "step": 13700 }, { "epoch": 0.24372033491547118, "grad_norm": 2.4969987736376433, "learning_rate": 0.0001, "loss": 0.8166, "mean_abs_error": 1312.5069275444075, "mean_abs_error_last_10": 444.6523886812853, "mean_abs_error_last_25": 535.6638428076096, "mean_abs_error_last_50": 775.7719960256612, "mean_pred_prob": 0.02191695159563096, "mean_pred_prob_last_10": 0.11350479925749823, "mean_pred_prob_last_25": 0.06138526465219911, "mean_pred_prob_last_50": 0.03712424845143687, "mean_token_accuracy": 0.8721427679061889, "step": 13710 }, { "epoch": 0.24389810321227312, "grad_norm": 1.2058982845478454, "learning_rate": 0.0001, "loss": 0.7701, "mean_abs_error": 730.3838883560195, "mean_abs_error_last_10": 247.4201489306998, "mean_abs_error_last_25": 331.26428335064406, "mean_abs_error_last_50": 482.20581622200996, "mean_pred_prob": 0.04164896758447867, "mean_pred_prob_last_10": 0.20965189149137586, "mean_pred_prob_last_25": 0.11418164311908185, "mean_pred_prob_last_50": 0.06966410296736285, "mean_token_accuracy": 0.8883793115615845, "step": 13720 }, { "epoch": 0.24407587150907506, "grad_norm": 1.583983084587434, "learning_rate": 0.0001, "loss": 0.844, "mean_abs_error": 1377.3824876661524, "mean_abs_error_last_10": 636.0867674720712, "mean_abs_error_last_25": 732.1946315959659, "mean_abs_error_last_50": 939.4795122082705, "mean_pred_prob": 0.02310331277840305, "mean_pred_prob_last_10": 0.12659680966171435, "mean_pred_prob_last_25": 0.06732871875283308, "mean_pred_prob_last_50": 0.03981004018714884, "mean_token_accuracy": 0.8748024225234985, "step": 13730 }, { "epoch": 0.24425363980587703, "grad_norm": 2.562770827344572, "learning_rate": 0.0001, "loss": 0.8788, "mean_abs_error": 254.85283375630826, "mean_abs_error_last_10": 79.61198296948837, "mean_abs_error_last_25": 103.77946102998605, "mean_abs_error_last_50": 167.25597404666198, "mean_pred_prob": 0.034162883181124926, "mean_pred_prob_last_10": 0.17155568562448026, "mean_pred_prob_last_25": 0.09543223055079579, "mean_pred_prob_last_50": 0.05780866229906678, "mean_token_accuracy": 0.8675204992294312, "step": 13740 }, { "epoch": 0.24443140810267897, "grad_norm": 1.1025571350633487, "learning_rate": 0.0001, "loss": 0.853, "mean_abs_error": 186.54939026260757, "mean_abs_error_last_10": 44.787354350633606, "mean_abs_error_last_25": 58.30595271107734, "mean_abs_error_last_50": 95.13660325424719, "mean_pred_prob": 0.055007158475928006, "mean_pred_prob_last_10": 0.2604725703597069, "mean_pred_prob_last_25": 0.1500138245522976, "mean_pred_prob_last_50": 0.09307378339581192, "mean_token_accuracy": 0.8773823857307435, "step": 13750 }, { "epoch": 0.2446091763994809, "grad_norm": 4.6111817237497, "learning_rate": 0.0001, "loss": 0.9296, "mean_abs_error": 1520.0559993206257, "mean_abs_error_last_10": 846.3830008265998, "mean_abs_error_last_25": 1091.1466440724507, "mean_abs_error_last_50": 1283.166091067285, "mean_pred_prob": 0.019573331986612173, "mean_pred_prob_last_10": 0.09823827459767927, "mean_pred_prob_last_25": 0.05511986667697784, "mean_pred_prob_last_50": 0.03367740760731976, "mean_token_accuracy": 0.8605895400047302, "step": 13760 }, { "epoch": 0.24478694469628287, "grad_norm": 0.9744365690529507, "learning_rate": 0.0001, "loss": 0.8221, "mean_abs_error": 366.6353385659159, "mean_abs_error_last_10": 186.70478368734902, "mean_abs_error_last_25": 247.5618711691471, "mean_abs_error_last_50": 253.14957200100952, "mean_pred_prob": 0.03654551503714174, "mean_pred_prob_last_10": 0.18677008282393218, "mean_pred_prob_last_25": 0.10115549918264151, "mean_pred_prob_last_50": 0.06193662472069263, "mean_token_accuracy": 0.8811097621917725, "step": 13770 }, { "epoch": 0.2449647129930848, "grad_norm": 0.8665299967814265, "learning_rate": 0.0001, "loss": 0.7345, "mean_abs_error": 233.11317768814405, "mean_abs_error_last_10": 85.98220063177114, "mean_abs_error_last_25": 167.85273687185799, "mean_abs_error_last_50": 178.03878735344773, "mean_pred_prob": 0.0401735267136246, "mean_pred_prob_last_10": 0.20762807335704564, "mean_pred_prob_last_25": 0.11178600965067745, "mean_pred_prob_last_50": 0.06803270890377462, "mean_token_accuracy": 0.8829091370105744, "step": 13780 }, { "epoch": 0.24514248128988675, "grad_norm": 1.5705213499475963, "learning_rate": 0.0001, "loss": 0.9092, "mean_abs_error": 629.3128161068386, "mean_abs_error_last_10": 411.38302715502385, "mean_abs_error_last_25": 432.64979999893757, "mean_abs_error_last_50": 519.4270138404625, "mean_pred_prob": 0.026165381341706963, "mean_pred_prob_last_10": 0.1330480654607527, "mean_pred_prob_last_25": 0.07344134034356102, "mean_pred_prob_last_50": 0.044168894516769794, "mean_token_accuracy": 0.8639975666999817, "step": 13790 }, { "epoch": 0.24532024958668872, "grad_norm": 1.2145263789003804, "learning_rate": 0.0001, "loss": 0.7632, "mean_abs_error": 253.25863234284074, "mean_abs_error_last_10": 71.78978291290869, "mean_abs_error_last_25": 87.00114217101891, "mean_abs_error_last_50": 123.3255206370264, "mean_pred_prob": 0.052664268855005504, "mean_pred_prob_last_10": 0.2514565458521247, "mean_pred_prob_last_25": 0.14285493725910783, "mean_pred_prob_last_50": 0.08896364783868194, "mean_token_accuracy": 0.8828565001487731, "step": 13800 }, { "epoch": 0.24549801788349065, "grad_norm": 1.3956025891528054, "learning_rate": 0.0001, "loss": 0.8998, "mean_abs_error": 314.983651686931, "mean_abs_error_last_10": 141.80304914796778, "mean_abs_error_last_25": 191.5792685701064, "mean_abs_error_last_50": 228.7806878444106, "mean_pred_prob": 0.05004249578341842, "mean_pred_prob_last_10": 0.25035325679928067, "mean_pred_prob_last_25": 0.14010263746604323, "mean_pred_prob_last_50": 0.08501412118785083, "mean_token_accuracy": 0.8691323041915894, "step": 13810 }, { "epoch": 0.2456757861802926, "grad_norm": 1.9955903893810336, "learning_rate": 0.0001, "loss": 0.9349, "mean_abs_error": 433.0557198113267, "mean_abs_error_last_10": 157.1198532762137, "mean_abs_error_last_25": 251.4934914970883, "mean_abs_error_last_50": 319.4551444065834, "mean_pred_prob": 0.017437661392614247, "mean_pred_prob_last_10": 0.10623673181980849, "mean_pred_prob_last_25": 0.05279205990955234, "mean_pred_prob_last_50": 0.030564474314451216, "mean_token_accuracy": 0.8731712877750397, "step": 13820 }, { "epoch": 0.24585355447709456, "grad_norm": 1.3083460422346154, "learning_rate": 0.0001, "loss": 0.7682, "mean_abs_error": 572.6953044877362, "mean_abs_error_last_10": 191.47864140376365, "mean_abs_error_last_25": 294.3566046117561, "mean_abs_error_last_50": 366.3669322799768, "mean_pred_prob": 0.0426681643089978, "mean_pred_prob_last_10": 0.20872760665370152, "mean_pred_prob_last_25": 0.1162807297310792, "mean_pred_prob_last_50": 0.07253483743988909, "mean_token_accuracy": 0.8692632734775543, "step": 13830 }, { "epoch": 0.2460313227738965, "grad_norm": 1.1390134006348638, "learning_rate": 0.0001, "loss": 0.8522, "mean_abs_error": 1268.2207998101017, "mean_abs_error_last_10": 656.7591940033446, "mean_abs_error_last_25": 750.3303929456508, "mean_abs_error_last_50": 917.4382522704652, "mean_pred_prob": 0.017626420433225577, "mean_pred_prob_last_10": 0.10043993000581394, "mean_pred_prob_last_25": 0.052307228030986155, "mean_pred_prob_last_50": 0.030733216596127023, "mean_token_accuracy": 0.8742846369743347, "step": 13840 }, { "epoch": 0.24620909107069844, "grad_norm": 1.6168637440635947, "learning_rate": 0.0001, "loss": 0.809, "mean_abs_error": 441.97917140494565, "mean_abs_error_last_10": 75.52478058506772, "mean_abs_error_last_25": 120.58046662125608, "mean_abs_error_last_50": 208.92187025219113, "mean_pred_prob": 0.026934986654669046, "mean_pred_prob_last_10": 0.14148054048418998, "mean_pred_prob_last_25": 0.07665241602808237, "mean_pred_prob_last_50": 0.04622940700501203, "mean_token_accuracy": 0.8781963109970092, "step": 13850 }, { "epoch": 0.2463868593675004, "grad_norm": 1.4213186391583865, "learning_rate": 0.0001, "loss": 0.8485, "mean_abs_error": 363.000657824584, "mean_abs_error_last_10": 110.02589813395227, "mean_abs_error_last_25": 123.7015109478264, "mean_abs_error_last_50": 203.69445294864184, "mean_pred_prob": 0.03411452141590417, "mean_pred_prob_last_10": 0.16626609545201063, "mean_pred_prob_last_25": 0.09321140553802251, "mean_pred_prob_last_50": 0.058088908717036244, "mean_token_accuracy": 0.8708656311035157, "step": 13860 }, { "epoch": 0.24656462766430234, "grad_norm": 1.250573959043146, "learning_rate": 0.0001, "loss": 0.8951, "mean_abs_error": 292.2252185531634, "mean_abs_error_last_10": 72.69231945491109, "mean_abs_error_last_25": 135.0149389998638, "mean_abs_error_last_50": 199.0017855255893, "mean_pred_prob": 0.03132957881316543, "mean_pred_prob_last_10": 0.1611503142863512, "mean_pred_prob_last_25": 0.08678492065519094, "mean_pred_prob_last_50": 0.0529765609651804, "mean_token_accuracy": 0.874553668498993, "step": 13870 }, { "epoch": 0.2467423959611043, "grad_norm": 1.5456097936252469, "learning_rate": 0.0001, "loss": 0.9468, "mean_abs_error": 263.069708142047, "mean_abs_error_last_10": 119.77176431659514, "mean_abs_error_last_25": 179.30326217883112, "mean_abs_error_last_50": 203.04387725234938, "mean_pred_prob": 0.042471754271537064, "mean_pred_prob_last_10": 0.2128001945093274, "mean_pred_prob_last_25": 0.11795386122539639, "mean_pred_prob_last_50": 0.0719949159771204, "mean_token_accuracy": 0.870544046163559, "step": 13880 }, { "epoch": 0.24692016425790625, "grad_norm": 1.0233103877960534, "learning_rate": 0.0001, "loss": 0.8405, "mean_abs_error": 202.5851980639282, "mean_abs_error_last_10": 74.18478498103573, "mean_abs_error_last_25": 93.93119450122215, "mean_abs_error_last_50": 136.81308678527154, "mean_pred_prob": 0.035479426151141524, "mean_pred_prob_last_10": 0.1852981775999069, "mean_pred_prob_last_25": 0.10102548468858004, "mean_pred_prob_last_50": 0.06104405065998435, "mean_token_accuracy": 0.8714420676231385, "step": 13890 }, { "epoch": 0.2470979325547082, "grad_norm": 1.0199743749907317, "learning_rate": 0.0001, "loss": 0.8735, "mean_abs_error": 361.5044156071766, "mean_abs_error_last_10": 115.91180785044371, "mean_abs_error_last_25": 173.50217455595535, "mean_abs_error_last_50": 233.67507000668024, "mean_pred_prob": 0.030168739380314946, "mean_pred_prob_last_10": 0.16116591095924376, "mean_pred_prob_last_25": 0.08659883094951511, "mean_pred_prob_last_50": 0.05190078029409051, "mean_token_accuracy": 0.870569360256195, "step": 13900 }, { "epoch": 0.24727570085151016, "grad_norm": 1.9412264914610649, "learning_rate": 0.0001, "loss": 0.8578, "mean_abs_error": 383.292284017688, "mean_abs_error_last_10": 127.77047622692442, "mean_abs_error_last_25": 175.19271927201675, "mean_abs_error_last_50": 225.75457615528663, "mean_pred_prob": 0.032254723034566266, "mean_pred_prob_last_10": 0.18405755376443267, "mean_pred_prob_last_25": 0.09416289143264293, "mean_pred_prob_last_50": 0.05568895946489647, "mean_token_accuracy": 0.8824355363845825, "step": 13910 }, { "epoch": 0.2474534691483121, "grad_norm": 0.842667827687648, "learning_rate": 0.0001, "loss": 0.7798, "mean_abs_error": 492.05843849777176, "mean_abs_error_last_10": 181.23517408444258, "mean_abs_error_last_25": 215.0559596131721, "mean_abs_error_last_50": 273.87270762084813, "mean_pred_prob": 0.0308861801517196, "mean_pred_prob_last_10": 0.15839458460104652, "mean_pred_prob_last_25": 0.08667151081608608, "mean_pred_prob_last_50": 0.05261752700898796, "mean_token_accuracy": 0.8676757633686065, "step": 13920 }, { "epoch": 0.24763123744511403, "grad_norm": 0.8893792769192218, "learning_rate": 0.0001, "loss": 0.8707, "mean_abs_error": 742.5262104634215, "mean_abs_error_last_10": 290.7220715545724, "mean_abs_error_last_25": 380.65977545048, "mean_abs_error_last_50": 488.4732902001877, "mean_pred_prob": 0.039008118756464684, "mean_pred_prob_last_10": 0.1891283608507365, "mean_pred_prob_last_25": 0.1090451922500506, "mean_pred_prob_last_50": 0.06608278318308294, "mean_token_accuracy": 0.8702419817447662, "step": 13930 }, { "epoch": 0.247809005741916, "grad_norm": 5.516946010653455, "learning_rate": 0.0001, "loss": 0.9088, "mean_abs_error": 339.1469080985353, "mean_abs_error_last_10": 127.2124984951334, "mean_abs_error_last_25": 170.65637656192936, "mean_abs_error_last_50": 231.06256804506438, "mean_pred_prob": 0.0321957825217396, "mean_pred_prob_last_10": 0.1609950166195631, "mean_pred_prob_last_25": 0.08958266656845808, "mean_pred_prob_last_50": 0.05481215743348002, "mean_token_accuracy": 0.8773647785186768, "step": 13940 }, { "epoch": 0.24798677403871794, "grad_norm": 1.8400519638624468, "learning_rate": 0.0001, "loss": 0.8097, "mean_abs_error": 991.7994263925863, "mean_abs_error_last_10": 365.8758347591171, "mean_abs_error_last_25": 505.91631450439854, "mean_abs_error_last_50": 656.7128203536902, "mean_pred_prob": 0.016247461450984702, "mean_pred_prob_last_10": 0.07908463284838944, "mean_pred_prob_last_25": 0.044238243444124234, "mean_pred_prob_last_50": 0.027574010158423333, "mean_token_accuracy": 0.8710750818252564, "step": 13950 }, { "epoch": 0.24816454233551988, "grad_norm": 2.880539056961255, "learning_rate": 0.0001, "loss": 0.8902, "mean_abs_error": 784.3450181767312, "mean_abs_error_last_10": 407.0582259411402, "mean_abs_error_last_25": 431.4740469960922, "mean_abs_error_last_50": 540.1076343031889, "mean_pred_prob": 0.02954759210406337, "mean_pred_prob_last_10": 0.15224603712558746, "mean_pred_prob_last_25": 0.08346825440239627, "mean_pred_prob_last_50": 0.05050981838430744, "mean_token_accuracy": 0.8735434114933014, "step": 13960 }, { "epoch": 0.24834231063232184, "grad_norm": 0.9974634360805733, "learning_rate": 0.0001, "loss": 0.87, "mean_abs_error": 412.7878826330126, "mean_abs_error_last_10": 193.71124878740167, "mean_abs_error_last_25": 308.1636289052693, "mean_abs_error_last_50": 325.8881729269057, "mean_pred_prob": 0.03562126597389579, "mean_pred_prob_last_10": 0.18886060789227485, "mean_pred_prob_last_25": 0.1001390383578837, "mean_pred_prob_last_50": 0.06040152446366846, "mean_token_accuracy": 0.8730429589748383, "step": 13970 }, { "epoch": 0.24852007892912378, "grad_norm": 0.8235555397632033, "learning_rate": 0.0001, "loss": 0.8774, "mean_abs_error": 210.83740345059346, "mean_abs_error_last_10": 53.092649247415274, "mean_abs_error_last_25": 102.74515641588668, "mean_abs_error_last_50": 142.46670596514102, "mean_pred_prob": 0.03867783430032432, "mean_pred_prob_last_10": 0.1985168792307377, "mean_pred_prob_last_25": 0.1089384151622653, "mean_pred_prob_last_50": 0.06434853207319975, "mean_token_accuracy": 0.8750421583652497, "step": 13980 }, { "epoch": 0.24869784722592572, "grad_norm": 1.2628184633046746, "learning_rate": 0.0001, "loss": 0.8419, "mean_abs_error": 419.1617540794217, "mean_abs_error_last_10": 234.8999929814736, "mean_abs_error_last_25": 249.18139966339157, "mean_abs_error_last_50": 273.85290371659966, "mean_pred_prob": 0.045807384746149184, "mean_pred_prob_last_10": 0.21585227511823177, "mean_pred_prob_last_25": 0.1245505808852613, "mean_pred_prob_last_50": 0.07770178220234811, "mean_token_accuracy": 0.8743957817554474, "step": 13990 }, { "epoch": 0.2488756155227277, "grad_norm": 1.7229147910347151, "learning_rate": 0.0001, "loss": 0.7803, "mean_abs_error": 310.956263210938, "mean_abs_error_last_10": 73.72451073025965, "mean_abs_error_last_25": 91.01348045609674, "mean_abs_error_last_50": 168.27539100693508, "mean_pred_prob": 0.03305134205147624, "mean_pred_prob_last_10": 0.1809630073606968, "mean_pred_prob_last_25": 0.09895358383655548, "mean_pred_prob_last_50": 0.05800867220386863, "mean_token_accuracy": 0.8765140175819397, "step": 14000 }, { "epoch": 0.24905338381952963, "grad_norm": 1.5881680802038545, "learning_rate": 0.0001, "loss": 0.8779, "mean_abs_error": 712.5593874563413, "mean_abs_error_last_10": 222.9186300196005, "mean_abs_error_last_25": 357.8110372970401, "mean_abs_error_last_50": 460.75329049708097, "mean_pred_prob": 0.017431301053147764, "mean_pred_prob_last_10": 0.09402377353981137, "mean_pred_prob_last_25": 0.051210156292654574, "mean_pred_prob_last_50": 0.030331041594035924, "mean_token_accuracy": 0.8615191400051116, "step": 14010 }, { "epoch": 0.24923115211633157, "grad_norm": 1.0343615054021555, "learning_rate": 0.0001, "loss": 0.7824, "mean_abs_error": 90.89743461901435, "mean_abs_error_last_10": 17.088108168207217, "mean_abs_error_last_25": 37.44112831495751, "mean_abs_error_last_50": 68.89781326235592, "mean_pred_prob": 0.055350602697581054, "mean_pred_prob_last_10": 0.2768467668443918, "mean_pred_prob_last_25": 0.15465899426490068, "mean_pred_prob_last_50": 0.09443831779062747, "mean_token_accuracy": 0.8690748393535614, "step": 14020 }, { "epoch": 0.24940892041313353, "grad_norm": 1.5701288432224345, "learning_rate": 0.0001, "loss": 1.0447, "mean_abs_error": 1241.0945898753723, "mean_abs_error_last_10": 600.0221266287069, "mean_abs_error_last_25": 754.7482308590068, "mean_abs_error_last_50": 908.9595743047132, "mean_pred_prob": 0.022495929749857167, "mean_pred_prob_last_10": 0.11379508241079747, "mean_pred_prob_last_25": 0.06351386198366527, "mean_pred_prob_last_50": 0.038275675498880446, "mean_token_accuracy": 0.8690486907958984, "step": 14030 }, { "epoch": 0.24958668870993547, "grad_norm": 0.9405449998897334, "learning_rate": 0.0001, "loss": 0.9088, "mean_abs_error": 522.928506708676, "mean_abs_error_last_10": 184.37973355649575, "mean_abs_error_last_25": 279.178112479376, "mean_abs_error_last_50": 397.48097542970766, "mean_pred_prob": 0.027857054118067028, "mean_pred_prob_last_10": 0.14861353766173124, "mean_pred_prob_last_25": 0.08064585486426949, "mean_pred_prob_last_50": 0.0477166089694947, "mean_token_accuracy": 0.8707204222679138, "step": 14040 }, { "epoch": 0.2497644570067374, "grad_norm": 1.2898680480290412, "learning_rate": 0.0001, "loss": 0.8995, "mean_abs_error": 972.7018980744479, "mean_abs_error_last_10": 335.6470646233041, "mean_abs_error_last_25": 443.7178093341925, "mean_abs_error_last_50": 589.0124365580882, "mean_pred_prob": 0.022992870735470204, "mean_pred_prob_last_10": 0.11539294348913245, "mean_pred_prob_last_25": 0.06251606113510207, "mean_pred_prob_last_50": 0.03810118095134385, "mean_token_accuracy": 0.8684328854084015, "step": 14050 }, { "epoch": 0.24994222530353938, "grad_norm": 4.274907413202517, "learning_rate": 0.0001, "loss": 0.873, "mean_abs_error": 1760.179666426773, "mean_abs_error_last_10": 1016.8275084704343, "mean_abs_error_last_25": 1112.3659278574694, "mean_abs_error_last_50": 1332.3751527330485, "mean_pred_prob": 0.02739835960819619, "mean_pred_prob_last_10": 0.14290862003981603, "mean_pred_prob_last_25": 0.07789445784583222, "mean_pred_prob_last_50": 0.046937745201285, "mean_token_accuracy": 0.8704766094684601, "step": 14060 }, { "epoch": 0.2501199936003413, "grad_norm": 1.224807341754209, "learning_rate": 0.0001, "loss": 0.8876, "mean_abs_error": 855.3483700633951, "mean_abs_error_last_10": 313.14510051967056, "mean_abs_error_last_25": 438.8722913948624, "mean_abs_error_last_50": 537.5818124318419, "mean_pred_prob": 0.022036187219782732, "mean_pred_prob_last_10": 0.1224754741997458, "mean_pred_prob_last_25": 0.06429362072958611, "mean_pred_prob_last_50": 0.03801996778347529, "mean_token_accuracy": 0.8776444256305694, "step": 14070 }, { "epoch": 0.25029776189714326, "grad_norm": 2.905525317171504, "learning_rate": 0.0001, "loss": 0.9142, "mean_abs_error": 785.0600664731406, "mean_abs_error_last_10": 321.03511917938977, "mean_abs_error_last_25": 332.9581816513984, "mean_abs_error_last_50": 506.16362694667777, "mean_pred_prob": 0.02343730116263032, "mean_pred_prob_last_10": 0.12141459975391626, "mean_pred_prob_last_25": 0.06518601085990668, "mean_pred_prob_last_50": 0.03974597756750882, "mean_token_accuracy": 0.8580931603908539, "step": 14080 }, { "epoch": 0.2504755301939452, "grad_norm": 0.8541133409900541, "learning_rate": 0.0001, "loss": 0.8494, "mean_abs_error": 762.3264187100045, "mean_abs_error_last_10": 227.62765345945704, "mean_abs_error_last_25": 315.49973684816695, "mean_abs_error_last_50": 445.3557052251446, "mean_pred_prob": 0.026764066569739953, "mean_pred_prob_last_10": 0.13075679116882383, "mean_pred_prob_last_25": 0.07449674576055258, "mean_pred_prob_last_50": 0.045360517123481256, "mean_token_accuracy": 0.8737325727939605, "step": 14090 }, { "epoch": 0.25065329849074713, "grad_norm": 1.4111192008776727, "learning_rate": 0.0001, "loss": 0.8243, "mean_abs_error": 449.73930061646496, "mean_abs_error_last_10": 152.46772099309044, "mean_abs_error_last_25": 165.8449154907981, "mean_abs_error_last_50": 277.7701020924039, "mean_pred_prob": 0.03317284313961864, "mean_pred_prob_last_10": 0.1810297946445644, "mean_pred_prob_last_25": 0.09575165514834225, "mean_pred_prob_last_50": 0.05719874147325754, "mean_token_accuracy": 0.877783614397049, "step": 14100 }, { "epoch": 0.25083106678754913, "grad_norm": 1.3130321417849946, "learning_rate": 0.0001, "loss": 0.802, "mean_abs_error": 174.71713325733157, "mean_abs_error_last_10": 23.100503554266705, "mean_abs_error_last_25": 50.186136492786076, "mean_abs_error_last_50": 98.46454754152334, "mean_pred_prob": 0.04281053463928401, "mean_pred_prob_last_10": 0.22521080486476422, "mean_pred_prob_last_25": 0.12374527808278799, "mean_pred_prob_last_50": 0.07307673618197441, "mean_token_accuracy": 0.8767200767993927, "step": 14110 }, { "epoch": 0.25100883508435107, "grad_norm": 2.2349566627093296, "learning_rate": 0.0001, "loss": 0.8827, "mean_abs_error": 112.54813852610819, "mean_abs_error_last_10": 28.2134054475055, "mean_abs_error_last_25": 73.36583449949362, "mean_abs_error_last_50": 106.77828145611048, "mean_pred_prob": 0.05160017916932702, "mean_pred_prob_last_10": 0.27455770373344424, "mean_pred_prob_last_25": 0.14769111443310975, "mean_pred_prob_last_50": 0.08827991914004088, "mean_token_accuracy": 0.8714948236942291, "step": 14120 }, { "epoch": 0.251186603381153, "grad_norm": 2.19089003331123, "learning_rate": 0.0001, "loss": 0.9365, "mean_abs_error": 435.4100289404758, "mean_abs_error_last_10": 165.86262222461022, "mean_abs_error_last_25": 173.66101169498347, "mean_abs_error_last_50": 249.77460098799696, "mean_pred_prob": 0.013905950239859521, "mean_pred_prob_last_10": 0.07907525803893804, "mean_pred_prob_last_25": 0.04145118398591876, "mean_pred_prob_last_50": 0.02423112103715539, "mean_token_accuracy": 0.8775747060775757, "step": 14130 }, { "epoch": 0.25136437167795495, "grad_norm": 1.730102756574621, "learning_rate": 0.0001, "loss": 0.9702, "mean_abs_error": 384.53807205880054, "mean_abs_error_last_10": 98.35030263163749, "mean_abs_error_last_25": 129.52293951382683, "mean_abs_error_last_50": 201.72382942299288, "mean_pred_prob": 0.0281753771007061, "mean_pred_prob_last_10": 0.15336387176066638, "mean_pred_prob_last_25": 0.08150872103869915, "mean_pred_prob_last_50": 0.04848169656470418, "mean_token_accuracy": 0.8741117656230927, "step": 14140 }, { "epoch": 0.2515421399747569, "grad_norm": 0.7665426393373275, "learning_rate": 0.0001, "loss": 0.9139, "mean_abs_error": 178.1679178004608, "mean_abs_error_last_10": 35.17542393010815, "mean_abs_error_last_25": 48.71251031850342, "mean_abs_error_last_50": 106.69858952569606, "mean_pred_prob": 0.04401914281770587, "mean_pred_prob_last_10": 0.2108828354626894, "mean_pred_prob_last_25": 0.11965167336165905, "mean_pred_prob_last_50": 0.07368589518591762, "mean_token_accuracy": 0.8688346207141876, "step": 14150 }, { "epoch": 0.2517199082715588, "grad_norm": 1.2609889116983106, "learning_rate": 0.0001, "loss": 0.8426, "mean_abs_error": 1108.8342676645548, "mean_abs_error_last_10": 518.2694771005066, "mean_abs_error_last_25": 592.5496945134831, "mean_abs_error_last_50": 720.9336460665994, "mean_pred_prob": 0.046529115780140275, "mean_pred_prob_last_10": 0.21639959284802898, "mean_pred_prob_last_25": 0.12516100431967062, "mean_pred_prob_last_50": 0.07821134369587526, "mean_token_accuracy": 0.8729375720024108, "step": 14160 }, { "epoch": 0.2518976765683608, "grad_norm": 1.6395876749490679, "learning_rate": 0.0001, "loss": 0.8063, "mean_abs_error": 217.66637409793321, "mean_abs_error_last_10": 89.04318240852666, "mean_abs_error_last_25": 118.38891088079421, "mean_abs_error_last_50": 167.10109551745185, "mean_pred_prob": 0.03267652373760939, "mean_pred_prob_last_10": 0.17254423275589942, "mean_pred_prob_last_25": 0.09521848578006029, "mean_pred_prob_last_50": 0.05630057528614998, "mean_token_accuracy": 0.8826554834842681, "step": 14170 }, { "epoch": 0.25207544486516276, "grad_norm": 1.754929649470318, "learning_rate": 0.0001, "loss": 0.9421, "mean_abs_error": 541.786898142441, "mean_abs_error_last_10": 279.7117365776664, "mean_abs_error_last_25": 325.05347895568946, "mean_abs_error_last_50": 348.01597851844383, "mean_pred_prob": 0.027427853224799036, "mean_pred_prob_last_10": 0.14870200993027538, "mean_pred_prob_last_25": 0.07916255041491241, "mean_pred_prob_last_50": 0.04737453714478761, "mean_token_accuracy": 0.8687612831592559, "step": 14180 }, { "epoch": 0.2522532131619647, "grad_norm": 1.4224534326808715, "learning_rate": 0.0001, "loss": 0.8769, "mean_abs_error": 277.33463232806565, "mean_abs_error_last_10": 66.01767534947618, "mean_abs_error_last_25": 112.71095189023588, "mean_abs_error_last_50": 162.0410044949252, "mean_pred_prob": 0.03388744681142271, "mean_pred_prob_last_10": 0.17134114895015956, "mean_pred_prob_last_25": 0.09248884227126837, "mean_pred_prob_last_50": 0.05726541914045811, "mean_token_accuracy": 0.8703486084938049, "step": 14190 }, { "epoch": 0.25243098145876663, "grad_norm": 2.4277640333593027, "learning_rate": 0.0001, "loss": 0.867, "mean_abs_error": 198.5397068294409, "mean_abs_error_last_10": 28.72111485710264, "mean_abs_error_last_25": 63.32892163522397, "mean_abs_error_last_50": 131.4872205230221, "mean_pred_prob": 0.03832327183336019, "mean_pred_prob_last_10": 0.2084938518702984, "mean_pred_prob_last_25": 0.11227780319750309, "mean_pred_prob_last_50": 0.06605458669364453, "mean_token_accuracy": 0.8753917276859283, "step": 14200 }, { "epoch": 0.2526087497555686, "grad_norm": 1.3730997035208827, "learning_rate": 0.0001, "loss": 0.9079, "mean_abs_error": 496.95372045578563, "mean_abs_error_last_10": 136.92494595100487, "mean_abs_error_last_25": 310.154172824767, "mean_abs_error_last_50": 405.5443849401073, "mean_pred_prob": 0.033350861095823345, "mean_pred_prob_last_10": 0.1733757894486189, "mean_pred_prob_last_25": 0.09142660293728114, "mean_pred_prob_last_50": 0.05587578006088734, "mean_token_accuracy": 0.8720085918903351, "step": 14210 }, { "epoch": 0.25278651805237057, "grad_norm": 2.7271466336679704, "learning_rate": 0.0001, "loss": 0.7791, "mean_abs_error": 259.5729836259451, "mean_abs_error_last_10": 71.49936894290934, "mean_abs_error_last_25": 155.47966484515922, "mean_abs_error_last_50": 185.36706567820323, "mean_pred_prob": 0.03944460772909224, "mean_pred_prob_last_10": 0.20674816742539406, "mean_pred_prob_last_25": 0.11269342917948962, "mean_pred_prob_last_50": 0.06759494533762336, "mean_token_accuracy": 0.8765312552452087, "step": 14220 }, { "epoch": 0.2529642863491725, "grad_norm": 1.5157622892480125, "learning_rate": 0.0001, "loss": 0.7538, "mean_abs_error": 158.68036124705517, "mean_abs_error_last_10": 46.5121405824281, "mean_abs_error_last_25": 57.820327477092256, "mean_abs_error_last_50": 86.5571884634063, "mean_pred_prob": 0.04762780568562448, "mean_pred_prob_last_10": 0.23205620087683201, "mean_pred_prob_last_25": 0.13251449912786484, "mean_pred_prob_last_50": 0.08123918874189258, "mean_token_accuracy": 0.8777973890304566, "step": 14230 }, { "epoch": 0.25314205464597445, "grad_norm": 1.090616701708572, "learning_rate": 0.0001, "loss": 0.861, "mean_abs_error": 392.218330435798, "mean_abs_error_last_10": 200.78730366673182, "mean_abs_error_last_25": 206.18821694286595, "mean_abs_error_last_50": 232.4695525877882, "mean_pred_prob": 0.03421314823208377, "mean_pred_prob_last_10": 0.16879807161167265, "mean_pred_prob_last_25": 0.09466462519485504, "mean_pred_prob_last_50": 0.05782867604866624, "mean_token_accuracy": 0.8702515721321106, "step": 14240 }, { "epoch": 0.2533198229427764, "grad_norm": 1.717882544650022, "learning_rate": 0.0001, "loss": 0.9555, "mean_abs_error": 695.0400210818832, "mean_abs_error_last_10": 203.26077977211588, "mean_abs_error_last_25": 301.00220909108396, "mean_abs_error_last_50": 412.65890265670953, "mean_pred_prob": 0.021520010256790557, "mean_pred_prob_last_10": 0.12513583332765849, "mean_pred_prob_last_25": 0.06406132411211729, "mean_pred_prob_last_50": 0.037404411984607575, "mean_token_accuracy": 0.8715497612953186, "step": 14250 }, { "epoch": 0.2534975912395783, "grad_norm": 1.7956539349583143, "learning_rate": 0.0001, "loss": 0.9438, "mean_abs_error": 322.82886291948546, "mean_abs_error_last_10": 68.31539694378725, "mean_abs_error_last_25": 168.99118507663937, "mean_abs_error_last_50": 185.77844700484303, "mean_pred_prob": 0.037266587186604735, "mean_pred_prob_last_10": 0.187230995669961, "mean_pred_prob_last_25": 0.10583773534744978, "mean_pred_prob_last_50": 0.06401775982230902, "mean_token_accuracy": 0.8659761369228363, "step": 14260 }, { "epoch": 0.25367535953638026, "grad_norm": 1.77071633452529, "learning_rate": 0.0001, "loss": 0.8151, "mean_abs_error": 249.87266776772464, "mean_abs_error_last_10": 69.0224764958014, "mean_abs_error_last_25": 146.77663566433503, "mean_abs_error_last_50": 211.6897011345135, "mean_pred_prob": 0.042229969054460526, "mean_pred_prob_last_10": 0.21008716486394405, "mean_pred_prob_last_25": 0.11780192842707038, "mean_pred_prob_last_50": 0.07163483537733555, "mean_token_accuracy": 0.8752397418022155, "step": 14270 }, { "epoch": 0.25385312783318226, "grad_norm": 1.0961855528263418, "learning_rate": 0.0001, "loss": 0.9539, "mean_abs_error": 528.1755554571595, "mean_abs_error_last_10": 144.61361146756775, "mean_abs_error_last_25": 199.2705146858041, "mean_abs_error_last_50": 294.7647345774318, "mean_pred_prob": 0.024056192394345997, "mean_pred_prob_last_10": 0.12583004869520664, "mean_pred_prob_last_25": 0.0671600604429841, "mean_pred_prob_last_50": 0.04058463028632105, "mean_token_accuracy": 0.8738227307796478, "step": 14280 }, { "epoch": 0.2540308961299842, "grad_norm": 1.4758372041765304, "learning_rate": 0.0001, "loss": 0.8924, "mean_abs_error": 283.130746678222, "mean_abs_error_last_10": 54.49030343183908, "mean_abs_error_last_25": 64.22988972383845, "mean_abs_error_last_50": 133.05101226483904, "mean_pred_prob": 0.039565386390313505, "mean_pred_prob_last_10": 0.19998530931770803, "mean_pred_prob_last_25": 0.11119791977107525, "mean_pred_prob_last_50": 0.06750903418287635, "mean_token_accuracy": 0.8773786425590515, "step": 14290 }, { "epoch": 0.25420866442678614, "grad_norm": 1.2017249250946547, "learning_rate": 0.0001, "loss": 0.8151, "mean_abs_error": 410.1479625821985, "mean_abs_error_last_10": 314.4306134296987, "mean_abs_error_last_25": 321.87872505167525, "mean_abs_error_last_50": 323.810730786123, "mean_pred_prob": 0.033996936661424115, "mean_pred_prob_last_10": 0.16422100631752984, "mean_pred_prob_last_25": 0.09304073646198958, "mean_pred_prob_last_50": 0.057663734536617994, "mean_token_accuracy": 0.8681372284889222, "step": 14300 }, { "epoch": 0.2543864327235881, "grad_norm": 1.6559307392910831, "learning_rate": 0.0001, "loss": 0.9392, "mean_abs_error": 1144.7219854013258, "mean_abs_error_last_10": 611.8365170426669, "mean_abs_error_last_25": 664.6963389557111, "mean_abs_error_last_50": 779.0365324021562, "mean_pred_prob": 0.02093786173063563, "mean_pred_prob_last_10": 0.11281827252532821, "mean_pred_prob_last_25": 0.06165342370950384, "mean_pred_prob_last_50": 0.03621517781721195, "mean_token_accuracy": 0.8686376571655273, "step": 14310 }, { "epoch": 0.25456420102039, "grad_norm": 1.44408907984519, "learning_rate": 0.0001, "loss": 0.9803, "mean_abs_error": 424.7966758336376, "mean_abs_error_last_10": 123.98641277002386, "mean_abs_error_last_25": 159.62702774093324, "mean_abs_error_last_50": 229.3751709134686, "mean_pred_prob": 0.03605179071892053, "mean_pred_prob_last_10": 0.19005597308278083, "mean_pred_prob_last_25": 0.10313533330336214, "mean_pred_prob_last_50": 0.06167748565785587, "mean_token_accuracy": 0.864632374048233, "step": 14320 }, { "epoch": 0.25474196931719195, "grad_norm": 1.158281333271008, "learning_rate": 0.0001, "loss": 0.8266, "mean_abs_error": 393.9298423189069, "mean_abs_error_last_10": 127.6931126388026, "mean_abs_error_last_25": 193.75329982909852, "mean_abs_error_last_50": 258.46397822838765, "mean_pred_prob": 0.021992406528443097, "mean_pred_prob_last_10": 0.11866647470742464, "mean_pred_prob_last_25": 0.06293706698343157, "mean_pred_prob_last_50": 0.03772062282077968, "mean_token_accuracy": 0.8786173939704895, "step": 14330 }, { "epoch": 0.25491973761399395, "grad_norm": 1.8851774111965696, "learning_rate": 0.0001, "loss": 0.9826, "mean_abs_error": 1286.321967670495, "mean_abs_error_last_10": 702.4130260093885, "mean_abs_error_last_25": 748.7045105613103, "mean_abs_error_last_50": 900.9925727601069, "mean_pred_prob": 0.0316152908388176, "mean_pred_prob_last_10": 0.14616246430086904, "mean_pred_prob_last_25": 0.08559137332485989, "mean_pred_prob_last_50": 0.05273010157397948, "mean_token_accuracy": 0.8677271783351899, "step": 14340 }, { "epoch": 0.2550975059107959, "grad_norm": 1.5006174407653368, "learning_rate": 0.0001, "loss": 0.9886, "mean_abs_error": 841.1068303672437, "mean_abs_error_last_10": 442.9670580172241, "mean_abs_error_last_25": 499.75459735733074, "mean_abs_error_last_50": 607.3431698604833, "mean_pred_prob": 0.029029409494251014, "mean_pred_prob_last_10": 0.16031559146067592, "mean_pred_prob_last_25": 0.0847574802202871, "mean_pred_prob_last_50": 0.05024959995062091, "mean_token_accuracy": 0.8647123277187347, "step": 14350 }, { "epoch": 0.2552752742075978, "grad_norm": 1.0171455618287824, "learning_rate": 0.0001, "loss": 0.9443, "mean_abs_error": 1043.4172623247828, "mean_abs_error_last_10": 439.42655323977914, "mean_abs_error_last_25": 534.9301693899341, "mean_abs_error_last_50": 639.6162284520162, "mean_pred_prob": 0.015483762705116533, "mean_pred_prob_last_10": 0.07871864826302044, "mean_pred_prob_last_25": 0.041331083502154795, "mean_pred_prob_last_50": 0.02577649866580032, "mean_token_accuracy": 0.8715389311313629, "step": 14360 }, { "epoch": 0.25545304250439976, "grad_norm": 0.7374607518980881, "learning_rate": 0.0001, "loss": 0.8143, "mean_abs_error": 717.3045940979016, "mean_abs_error_last_10": 360.0477941219728, "mean_abs_error_last_25": 426.32179752172686, "mean_abs_error_last_50": 531.2637882121446, "mean_pred_prob": 0.043790257108048534, "mean_pred_prob_last_10": 0.2183210093935486, "mean_pred_prob_last_25": 0.12265670747146942, "mean_pred_prob_last_50": 0.07419685031636618, "mean_token_accuracy": 0.8699722051620483, "step": 14370 }, { "epoch": 0.2556308108012017, "grad_norm": 1.7858131469612797, "learning_rate": 0.0001, "loss": 0.8478, "mean_abs_error": 766.1850162085044, "mean_abs_error_last_10": 201.47237547192714, "mean_abs_error_last_25": 256.10431169805986, "mean_abs_error_last_50": 445.34466251184404, "mean_pred_prob": 0.02932588068943005, "mean_pred_prob_last_10": 0.14418830818030984, "mean_pred_prob_last_25": 0.08318542390479707, "mean_pred_prob_last_50": 0.05059697854449041, "mean_token_accuracy": 0.8805811643600464, "step": 14380 }, { "epoch": 0.25580857909800364, "grad_norm": 1.42860639010549, "learning_rate": 0.0001, "loss": 0.861, "mean_abs_error": 129.532883697877, "mean_abs_error_last_10": 27.48409144284425, "mean_abs_error_last_25": 41.21781770085622, "mean_abs_error_last_50": 70.5030800639137, "mean_pred_prob": 0.047778353467583655, "mean_pred_prob_last_10": 0.23992249108850955, "mean_pred_prob_last_25": 0.13447112757712604, "mean_pred_prob_last_50": 0.08127784375101328, "mean_token_accuracy": 0.8793522894382477, "step": 14390 }, { "epoch": 0.25598634739480564, "grad_norm": 1.0237378703560815, "learning_rate": 0.0001, "loss": 0.8714, "mean_abs_error": 448.3026060281374, "mean_abs_error_last_10": 193.32977823961224, "mean_abs_error_last_25": 276.05475161420355, "mean_abs_error_last_50": 361.0287377482527, "mean_pred_prob": 0.037471760716289285, "mean_pred_prob_last_10": 0.18635302502661943, "mean_pred_prob_last_25": 0.10573464957997203, "mean_pred_prob_last_50": 0.06350895948708057, "mean_token_accuracy": 0.8669166207313538, "step": 14400 }, { "epoch": 0.2561641156916076, "grad_norm": 2.0423777468244575, "learning_rate": 0.0001, "loss": 0.7824, "mean_abs_error": 1167.7280590177852, "mean_abs_error_last_10": 758.3389285034428, "mean_abs_error_last_25": 792.7744067890549, "mean_abs_error_last_50": 867.4238178269404, "mean_pred_prob": 0.035978146579873284, "mean_pred_prob_last_10": 0.1747742971405387, "mean_pred_prob_last_25": 0.09798987624817528, "mean_pred_prob_last_50": 0.06071121357381344, "mean_token_accuracy": 0.8744948446750641, "step": 14410 }, { "epoch": 0.2563418839884095, "grad_norm": 1.74817825170023, "learning_rate": 0.0001, "loss": 0.8142, "mean_abs_error": 510.47306695738064, "mean_abs_error_last_10": 93.10982153628814, "mean_abs_error_last_25": 162.00443333615166, "mean_abs_error_last_50": 266.2244824149099, "mean_pred_prob": 0.027911472925916316, "mean_pred_prob_last_10": 0.15416395626962184, "mean_pred_prob_last_25": 0.08345206389203667, "mean_pred_prob_last_50": 0.048949177656322715, "mean_token_accuracy": 0.8691687226295471, "step": 14420 }, { "epoch": 0.25651965228521145, "grad_norm": 1.208775412800667, "learning_rate": 0.0001, "loss": 0.7807, "mean_abs_error": 196.49704610955726, "mean_abs_error_last_10": 53.25715811742593, "mean_abs_error_last_25": 110.81822528160691, "mean_abs_error_last_50": 160.05337544127724, "mean_pred_prob": 0.03289566291496158, "mean_pred_prob_last_10": 0.16927246637642385, "mean_pred_prob_last_25": 0.09106359332799911, "mean_pred_prob_last_50": 0.055339549854397777, "mean_token_accuracy": 0.8820864677429199, "step": 14430 }, { "epoch": 0.2566974205820134, "grad_norm": 1.1875021654631988, "learning_rate": 0.0001, "loss": 0.9122, "mean_abs_error": 305.80018426236876, "mean_abs_error_last_10": 45.04706080811857, "mean_abs_error_last_25": 82.5569705464622, "mean_abs_error_last_50": 174.14086684486588, "mean_pred_prob": 0.043537835427559915, "mean_pred_prob_last_10": 0.2242141429334879, "mean_pred_prob_last_25": 0.12419646866619587, "mean_pred_prob_last_50": 0.07490047155879438, "mean_token_accuracy": 0.8697650074958801, "step": 14440 }, { "epoch": 0.25687518887881533, "grad_norm": 0.7175320568572523, "learning_rate": 0.0001, "loss": 0.7988, "mean_abs_error": 1624.84987181387, "mean_abs_error_last_10": 953.7656558102702, "mean_abs_error_last_25": 1135.780070567012, "mean_abs_error_last_50": 1298.3721361172793, "mean_pred_prob": 0.04061987571039936, "mean_pred_prob_last_10": 0.20092684260453098, "mean_pred_prob_last_25": 0.11005804178857943, "mean_pred_prob_last_50": 0.0675160557279014, "mean_token_accuracy": 0.8796914756298065, "step": 14450 }, { "epoch": 0.2570529571756173, "grad_norm": 1.1832932922994943, "learning_rate": 0.0001, "loss": 0.8105, "mean_abs_error": 657.0363739783654, "mean_abs_error_last_10": 173.7779298857833, "mean_abs_error_last_25": 276.8659682699055, "mean_abs_error_last_50": 418.2991483240855, "mean_pred_prob": 0.034160653562867085, "mean_pred_prob_last_10": 0.17587556816870348, "mean_pred_prob_last_25": 0.09583604645449668, "mean_pred_prob_last_50": 0.05791493043361697, "mean_token_accuracy": 0.8734539926052094, "step": 14460 }, { "epoch": 0.25723072547241926, "grad_norm": 0.7516752542922331, "learning_rate": 0.0001, "loss": 0.8325, "mean_abs_error": 713.1734510813137, "mean_abs_error_last_10": 395.08057469298154, "mean_abs_error_last_25": 416.3686250775686, "mean_abs_error_last_50": 474.4519551088075, "mean_pred_prob": 0.02449700083816424, "mean_pred_prob_last_10": 0.12525005454663188, "mean_pred_prob_last_25": 0.06953225150937214, "mean_pred_prob_last_50": 0.04169718457851559, "mean_token_accuracy": 0.870624977350235, "step": 14470 }, { "epoch": 0.2574084937692212, "grad_norm": 1.1498044665983889, "learning_rate": 0.0001, "loss": 0.9349, "mean_abs_error": 1684.1109968681915, "mean_abs_error_last_10": 824.3546431132236, "mean_abs_error_last_25": 983.924072392117, "mean_abs_error_last_50": 1285.411747462825, "mean_pred_prob": 0.026989474052970763, "mean_pred_prob_last_10": 0.1343647770670941, "mean_pred_prob_last_25": 0.07578119998652255, "mean_pred_prob_last_50": 0.04588769913971191, "mean_token_accuracy": 0.8567633688449859, "step": 14480 }, { "epoch": 0.25758626206602314, "grad_norm": 1.4813883400868484, "learning_rate": 0.0001, "loss": 0.8007, "mean_abs_error": 1387.5064444952964, "mean_abs_error_last_10": 739.1918885426451, "mean_abs_error_last_25": 740.7894766058182, "mean_abs_error_last_50": 922.0271056324925, "mean_pred_prob": 0.0403000484657241, "mean_pred_prob_last_10": 0.1876424129237421, "mean_pred_prob_last_25": 0.10839936471311376, "mean_pred_prob_last_50": 0.06774270792957396, "mean_token_accuracy": 0.8737469017505646, "step": 14490 }, { "epoch": 0.2577640303628251, "grad_norm": 1.904724447293202, "learning_rate": 0.0001, "loss": 0.8626, "mean_abs_error": 460.69273609536503, "mean_abs_error_last_10": 226.11635484748496, "mean_abs_error_last_25": 285.0427505559638, "mean_abs_error_last_50": 333.29011656970914, "mean_pred_prob": 0.046959445887478066, "mean_pred_prob_last_10": 0.227476643800037, "mean_pred_prob_last_25": 0.12940468568704092, "mean_pred_prob_last_50": 0.07908044843934477, "mean_token_accuracy": 0.8685211598873138, "step": 14500 }, { "epoch": 0.257941798659627, "grad_norm": 1.116230036786072, "learning_rate": 0.0001, "loss": 0.9014, "mean_abs_error": 326.03874148708167, "mean_abs_error_last_10": 100.61559509331009, "mean_abs_error_last_25": 137.52522364538103, "mean_abs_error_last_50": 180.01588793493553, "mean_pred_prob": 0.03253259863704443, "mean_pred_prob_last_10": 0.16395014338195324, "mean_pred_prob_last_25": 0.09062898643314839, "mean_pred_prob_last_50": 0.055154819134622814, "mean_token_accuracy": 0.8678394556045532, "step": 14510 }, { "epoch": 0.258119566956429, "grad_norm": 1.5458646307383852, "learning_rate": 0.0001, "loss": 0.7797, "mean_abs_error": 537.6549296392444, "mean_abs_error_last_10": 176.806932980191, "mean_abs_error_last_25": 199.09275458061137, "mean_abs_error_last_50": 286.63779696003337, "mean_pred_prob": 0.03482324074138887, "mean_pred_prob_last_10": 0.16733234545681624, "mean_pred_prob_last_25": 0.09772984591545537, "mean_pred_prob_last_50": 0.05913200215436518, "mean_token_accuracy": 0.8779250919818878, "step": 14520 }, { "epoch": 0.25829733525323095, "grad_norm": 0.8833218132997153, "learning_rate": 0.0001, "loss": 0.9131, "mean_abs_error": 405.2004293482784, "mean_abs_error_last_10": 129.9309097192051, "mean_abs_error_last_25": 284.6097369510911, "mean_abs_error_last_50": 385.4094372655711, "mean_pred_prob": 0.033681785129010676, "mean_pred_prob_last_10": 0.18480443991720677, "mean_pred_prob_last_25": 0.09649134436622261, "mean_pred_prob_last_50": 0.057081344164907934, "mean_token_accuracy": 0.8739392876625061, "step": 14530 }, { "epoch": 0.2584751035500329, "grad_norm": 1.289881895386755, "learning_rate": 0.0001, "loss": 0.9275, "mean_abs_error": 608.5205724612507, "mean_abs_error_last_10": 254.21020266990595, "mean_abs_error_last_25": 357.9002092842381, "mean_abs_error_last_50": 410.37440307686165, "mean_pred_prob": 0.02124889788683504, "mean_pred_prob_last_10": 0.11160877449437975, "mean_pred_prob_last_25": 0.061260949540883305, "mean_pred_prob_last_50": 0.03679476724937558, "mean_token_accuracy": 0.8737135767936707, "step": 14540 }, { "epoch": 0.25865287184683483, "grad_norm": 1.8766897498047317, "learning_rate": 0.0001, "loss": 0.8002, "mean_abs_error": 258.5676981152134, "mean_abs_error_last_10": 137.66181388926185, "mean_abs_error_last_25": 161.60934714556225, "mean_abs_error_last_50": 197.84809753793076, "mean_pred_prob": 0.045730771217495206, "mean_pred_prob_last_10": 0.2555541810579598, "mean_pred_prob_last_25": 0.12896307832561432, "mean_pred_prob_last_50": 0.0761363689089194, "mean_token_accuracy": 0.8768078565597535, "step": 14550 }, { "epoch": 0.25883064014363677, "grad_norm": 1.2993657504900864, "learning_rate": 0.0001, "loss": 0.9013, "mean_abs_error": 1192.4249048784352, "mean_abs_error_last_10": 729.2975532570041, "mean_abs_error_last_25": 828.8755477884639, "mean_abs_error_last_50": 972.4121204261704, "mean_pred_prob": 0.03729438823356759, "mean_pred_prob_last_10": 0.1754008653195342, "mean_pred_prob_last_25": 0.10318703864904819, "mean_pred_prob_last_50": 0.06342930937244091, "mean_token_accuracy": 0.8687099337577819, "step": 14560 }, { "epoch": 0.2590084084404387, "grad_norm": 1.926212226227916, "learning_rate": 0.0001, "loss": 0.8073, "mean_abs_error": 149.4538665292742, "mean_abs_error_last_10": 33.150744489488275, "mean_abs_error_last_25": 65.93765534828044, "mean_abs_error_last_50": 91.1218681576761, "mean_pred_prob": 0.03944326592609286, "mean_pred_prob_last_10": 0.19208431541919707, "mean_pred_prob_last_25": 0.11057165935635567, "mean_pred_prob_last_50": 0.06662894356995822, "mean_token_accuracy": 0.8788119792938233, "step": 14570 }, { "epoch": 0.2591861767372407, "grad_norm": 2.7505179371953896, "learning_rate": 0.0001, "loss": 0.7838, "mean_abs_error": 195.59970267736702, "mean_abs_error_last_10": 28.097154412142817, "mean_abs_error_last_25": 50.32759355227835, "mean_abs_error_last_50": 100.1256362583669, "mean_pred_prob": 0.037433257792145014, "mean_pred_prob_last_10": 0.20551974568516015, "mean_pred_prob_last_25": 0.11164525449275971, "mean_pred_prob_last_50": 0.06550060445442796, "mean_token_accuracy": 0.881004136800766, "step": 14580 }, { "epoch": 0.25936394503404264, "grad_norm": 1.1009683173645537, "learning_rate": 0.0001, "loss": 0.9678, "mean_abs_error": 504.4114468930003, "mean_abs_error_last_10": 307.0052774734178, "mean_abs_error_last_25": 250.71275032732507, "mean_abs_error_last_50": 328.1193896673445, "mean_pred_prob": 0.028644542524125426, "mean_pred_prob_last_10": 0.15063959509134292, "mean_pred_prob_last_25": 0.08252355859149248, "mean_pred_prob_last_50": 0.04892430305480957, "mean_token_accuracy": 0.8681984007358551, "step": 14590 }, { "epoch": 0.2595417133308446, "grad_norm": 0.9632567945072257, "learning_rate": 0.0001, "loss": 0.9535, "mean_abs_error": 328.4607143061643, "mean_abs_error_last_10": 275.6228521326713, "mean_abs_error_last_25": 279.7557529180623, "mean_abs_error_last_50": 264.1953360292079, "mean_pred_prob": 0.032448221393860874, "mean_pred_prob_last_10": 0.1618251122534275, "mean_pred_prob_last_25": 0.08986640209332108, "mean_pred_prob_last_50": 0.055037437658756974, "mean_token_accuracy": 0.8699534475803375, "step": 14600 }, { "epoch": 0.2597194816276465, "grad_norm": 3.203111929815938, "learning_rate": 0.0001, "loss": 0.8394, "mean_abs_error": 821.4166748910415, "mean_abs_error_last_10": 443.5026259236166, "mean_abs_error_last_25": 567.0284163330844, "mean_abs_error_last_50": 659.662663579845, "mean_pred_prob": 0.024026762906578368, "mean_pred_prob_last_10": 0.13134701181552372, "mean_pred_prob_last_25": 0.06950153702637181, "mean_pred_prob_last_50": 0.04112242651754059, "mean_token_accuracy": 0.8708438634872436, "step": 14610 }, { "epoch": 0.25989724992444846, "grad_norm": 1.9070867684473607, "learning_rate": 0.0001, "loss": 0.8246, "mean_abs_error": 327.00396664358175, "mean_abs_error_last_10": 190.56850104731225, "mean_abs_error_last_25": 189.6031258260021, "mean_abs_error_last_50": 203.52976889421052, "mean_pred_prob": 0.03299289490096271, "mean_pred_prob_last_10": 0.16869947388768197, "mean_pred_prob_last_25": 0.09343015179038047, "mean_pred_prob_last_50": 0.055822957679629324, "mean_token_accuracy": 0.8763719737529755, "step": 14620 }, { "epoch": 0.2600750182212504, "grad_norm": 0.9625772724771788, "learning_rate": 0.0001, "loss": 0.7762, "mean_abs_error": 252.67984121263197, "mean_abs_error_last_10": 185.8115374503574, "mean_abs_error_last_25": 185.69267055965847, "mean_abs_error_last_50": 204.54307744522623, "mean_pred_prob": 0.04594567478634417, "mean_pred_prob_last_10": 0.21439975856337695, "mean_pred_prob_last_25": 0.1237764157820493, "mean_pred_prob_last_50": 0.07791409675264731, "mean_token_accuracy": 0.8824566781520844, "step": 14630 }, { "epoch": 0.2602527865180524, "grad_norm": 1.074911496089566, "learning_rate": 0.0001, "loss": 0.8129, "mean_abs_error": 244.82816450050632, "mean_abs_error_last_10": 111.8496595678957, "mean_abs_error_last_25": 163.4597345082188, "mean_abs_error_last_50": 156.3154685236587, "mean_pred_prob": 0.0438558598048985, "mean_pred_prob_last_10": 0.21626781821250915, "mean_pred_prob_last_25": 0.12368933884426951, "mean_pred_prob_last_50": 0.0754704428371042, "mean_token_accuracy": 0.8751575171947479, "step": 14640 }, { "epoch": 0.26043055481485433, "grad_norm": 1.5841612852046507, "learning_rate": 0.0001, "loss": 0.8663, "mean_abs_error": 437.23340383698786, "mean_abs_error_last_10": 101.3325922400913, "mean_abs_error_last_25": 242.45716104075723, "mean_abs_error_last_50": 305.95396827344473, "mean_pred_prob": 0.027294330927543343, "mean_pred_prob_last_10": 0.1582683503627777, "mean_pred_prob_last_25": 0.08215795904397964, "mean_pred_prob_last_50": 0.047496722126379606, "mean_token_accuracy": 0.8799603760242463, "step": 14650 }, { "epoch": 0.26060832311165627, "grad_norm": 1.6719140094802796, "learning_rate": 0.0001, "loss": 0.9841, "mean_abs_error": 1115.5647305791283, "mean_abs_error_last_10": 424.5597742007133, "mean_abs_error_last_25": 495.9986849993382, "mean_abs_error_last_50": 720.7092842781419, "mean_pred_prob": 0.024934746281360276, "mean_pred_prob_last_10": 0.13856431547319517, "mean_pred_prob_last_25": 0.07386978496797383, "mean_pred_prob_last_50": 0.042920020382734944, "mean_token_accuracy": 0.8644481360912323, "step": 14660 }, { "epoch": 0.2607860914084582, "grad_norm": 2.2336800797925087, "learning_rate": 0.0001, "loss": 1.0283, "mean_abs_error": 424.0165210512849, "mean_abs_error_last_10": 136.2762418251303, "mean_abs_error_last_25": 196.05305093374858, "mean_abs_error_last_50": 268.13087620808363, "mean_pred_prob": 0.019458272168412803, "mean_pred_prob_last_10": 0.10473815090954304, "mean_pred_prob_last_25": 0.055540656670928, "mean_pred_prob_last_50": 0.0331755580380559, "mean_token_accuracy": 0.8594849348068238, "step": 14670 }, { "epoch": 0.26096385970526015, "grad_norm": 1.5222876011274526, "learning_rate": 0.0001, "loss": 0.8399, "mean_abs_error": 568.4037859105538, "mean_abs_error_last_10": 405.5287559314138, "mean_abs_error_last_25": 331.91573076173074, "mean_abs_error_last_50": 401.65494221654427, "mean_pred_prob": 0.03310985015123151, "mean_pred_prob_last_10": 0.1637035074760206, "mean_pred_prob_last_25": 0.09343279838794842, "mean_pred_prob_last_50": 0.05655389952007681, "mean_token_accuracy": 0.8744799673557282, "step": 14680 }, { "epoch": 0.2611416280020621, "grad_norm": 1.5147238828184033, "learning_rate": 0.0001, "loss": 0.8274, "mean_abs_error": 815.9407519835012, "mean_abs_error_last_10": 246.78542572315072, "mean_abs_error_last_25": 336.46373114736326, "mean_abs_error_last_50": 475.3595454301379, "mean_pred_prob": 0.02606917840312235, "mean_pred_prob_last_10": 0.12438675733283162, "mean_pred_prob_last_25": 0.07293188816984184, "mean_pred_prob_last_50": 0.04411557323182933, "mean_token_accuracy": 0.8807965040206909, "step": 14690 }, { "epoch": 0.2613193962988641, "grad_norm": 1.2844537209693947, "learning_rate": 0.0001, "loss": 0.8984, "mean_abs_error": 948.7748664087501, "mean_abs_error_last_10": 309.34231573628114, "mean_abs_error_last_25": 489.03530934827467, "mean_abs_error_last_50": 657.3093447764983, "mean_pred_prob": 0.036796293567749674, "mean_pred_prob_last_10": 0.18952655323664658, "mean_pred_prob_last_25": 0.10241011368343607, "mean_pred_prob_last_50": 0.06233219550922513, "mean_token_accuracy": 0.8768412411212921, "step": 14700 }, { "epoch": 0.261497164595666, "grad_norm": 1.0697452639557588, "learning_rate": 0.0001, "loss": 0.8013, "mean_abs_error": 267.3318846961321, "mean_abs_error_last_10": 108.21446502957573, "mean_abs_error_last_25": 137.79312111790966, "mean_abs_error_last_50": 183.56747517239336, "mean_pred_prob": 0.02690626187250018, "mean_pred_prob_last_10": 0.1475750794634223, "mean_pred_prob_last_25": 0.07718694526702166, "mean_pred_prob_last_50": 0.04555430375039578, "mean_token_accuracy": 0.8757904589176178, "step": 14710 }, { "epoch": 0.26167493289246796, "grad_norm": 0.7973476623098088, "learning_rate": 0.0001, "loss": 0.7731, "mean_abs_error": 325.3620527436186, "mean_abs_error_last_10": 140.2795157076947, "mean_abs_error_last_25": 133.22403264240643, "mean_abs_error_last_50": 171.71550917133638, "mean_pred_prob": 0.051412385609000924, "mean_pred_prob_last_10": 0.20813064463436604, "mean_pred_prob_last_25": 0.13125704051926732, "mean_pred_prob_last_50": 0.08473794152960182, "mean_token_accuracy": 0.8777957022190094, "step": 14720 }, { "epoch": 0.2618527011892699, "grad_norm": 2.752585193209947, "learning_rate": 0.0001, "loss": 0.8405, "mean_abs_error": 468.10576714379505, "mean_abs_error_last_10": 103.2533791861285, "mean_abs_error_last_25": 146.82293671329558, "mean_abs_error_last_50": 235.96124119213027, "mean_pred_prob": 0.0338053775485605, "mean_pred_prob_last_10": 0.18469781428575516, "mean_pred_prob_last_25": 0.09969610262196511, "mean_pred_prob_last_50": 0.05811747356783599, "mean_token_accuracy": 0.8737320005893707, "step": 14730 }, { "epoch": 0.26203046948607184, "grad_norm": 1.4412239986146076, "learning_rate": 0.0001, "loss": 0.7918, "mean_abs_error": 317.3869680278523, "mean_abs_error_last_10": 110.54910000417183, "mean_abs_error_last_25": 161.360345616767, "mean_abs_error_last_50": 235.23895570553213, "mean_pred_prob": 0.03671496310271323, "mean_pred_prob_last_10": 0.1725530406460166, "mean_pred_prob_last_25": 0.09937251387163996, "mean_pred_prob_last_50": 0.06177800367586315, "mean_token_accuracy": 0.8754999876022339, "step": 14740 }, { "epoch": 0.2622082377828738, "grad_norm": 1.6347244950719606, "learning_rate": 0.0001, "loss": 0.8809, "mean_abs_error": 534.7916484849991, "mean_abs_error_last_10": 133.11501520553503, "mean_abs_error_last_25": 246.15404712952022, "mean_abs_error_last_50": 355.8834851105772, "mean_pred_prob": 0.029166373377665877, "mean_pred_prob_last_10": 0.1511841758619994, "mean_pred_prob_last_25": 0.07981601328356192, "mean_pred_prob_last_50": 0.048474688909482214, "mean_token_accuracy": 0.8755241096019745, "step": 14750 }, { "epoch": 0.26238600607967577, "grad_norm": 1.3943078369269144, "learning_rate": 0.0001, "loss": 0.9812, "mean_abs_error": 1016.3377521445922, "mean_abs_error_last_10": 515.7342214350807, "mean_abs_error_last_25": 657.1554482214793, "mean_abs_error_last_50": 776.348679009669, "mean_pred_prob": 0.02536688808031613, "mean_pred_prob_last_10": 0.13552713693643453, "mean_pred_prob_last_25": 0.07214396411727649, "mean_pred_prob_last_50": 0.043396302028850184, "mean_token_accuracy": 0.863700807094574, "step": 14760 }, { "epoch": 0.2625637743764777, "grad_norm": 1.2953032671324145, "learning_rate": 0.0001, "loss": 0.8979, "mean_abs_error": 778.3410722479831, "mean_abs_error_last_10": 414.2290699715165, "mean_abs_error_last_25": 481.8929700271341, "mean_abs_error_last_50": 561.1125144428131, "mean_pred_prob": 0.037018559593707326, "mean_pred_prob_last_10": 0.1811582397815073, "mean_pred_prob_last_25": 0.10307770137442276, "mean_pred_prob_last_50": 0.06259423560404684, "mean_token_accuracy": 0.8723838031291962, "step": 14770 }, { "epoch": 0.26274154267327965, "grad_norm": 1.4223936945889142, "learning_rate": 0.0001, "loss": 0.865, "mean_abs_error": 626.5207632375178, "mean_abs_error_last_10": 172.93789167803297, "mean_abs_error_last_25": 294.5431648542409, "mean_abs_error_last_50": 399.7677220312865, "mean_pred_prob": 0.02478403099812567, "mean_pred_prob_last_10": 0.11442034710198641, "mean_pred_prob_last_25": 0.0659471033141017, "mean_pred_prob_last_50": 0.04154656399041414, "mean_token_accuracy": 0.874054777622223, "step": 14780 }, { "epoch": 0.2629193109700816, "grad_norm": 1.008824321911001, "learning_rate": 0.0001, "loss": 0.9618, "mean_abs_error": 581.5985842167372, "mean_abs_error_last_10": 125.37022758889206, "mean_abs_error_last_25": 193.0055652244309, "mean_abs_error_last_50": 324.4949939970719, "mean_pred_prob": 0.04108834164799191, "mean_pred_prob_last_10": 0.1967487015761435, "mean_pred_prob_last_25": 0.11323017468675971, "mean_pred_prob_last_50": 0.06837902803090401, "mean_token_accuracy": 0.8873584747314454, "step": 14790 }, { "epoch": 0.2630970792668835, "grad_norm": 2.049336792983231, "learning_rate": 0.0001, "loss": 1.0333, "mean_abs_error": 627.5387294301415, "mean_abs_error_last_10": 146.47336842184217, "mean_abs_error_last_25": 228.23518829664846, "mean_abs_error_last_50": 297.68552058921733, "mean_pred_prob": 0.037806146685034035, "mean_pred_prob_last_10": 0.17995055746287109, "mean_pred_prob_last_25": 0.10315519636496902, "mean_pred_prob_last_50": 0.06309639415703713, "mean_token_accuracy": 0.8718375265598297, "step": 14800 }, { "epoch": 0.26327484756368547, "grad_norm": 4.01959467948561, "learning_rate": 0.0001, "loss": 0.8591, "mean_abs_error": 503.31746049322084, "mean_abs_error_last_10": 236.2850291845357, "mean_abs_error_last_25": 283.47219413061856, "mean_abs_error_last_50": 360.99003830261825, "mean_pred_prob": 0.041621879988815634, "mean_pred_prob_last_10": 0.21689021250931545, "mean_pred_prob_last_25": 0.11683994579943828, "mean_pred_prob_last_50": 0.07110156775452196, "mean_token_accuracy": 0.8827427685260772, "step": 14810 }, { "epoch": 0.26345261586048746, "grad_norm": 2.8672975720798863, "learning_rate": 0.0001, "loss": 0.8543, "mean_abs_error": 695.3419536458996, "mean_abs_error_last_10": 284.64856983802065, "mean_abs_error_last_25": 423.5747741652992, "mean_abs_error_last_50": 497.811177937502, "mean_pred_prob": 0.030526748194824906, "mean_pred_prob_last_10": 0.1491554870211985, "mean_pred_prob_last_25": 0.08218452323926613, "mean_pred_prob_last_50": 0.0508128629240673, "mean_token_accuracy": 0.8745155394077301, "step": 14820 }, { "epoch": 0.2636303841572894, "grad_norm": 1.9612509612589337, "learning_rate": 0.0001, "loss": 0.7918, "mean_abs_error": 162.0241031840182, "mean_abs_error_last_10": 89.94634728256044, "mean_abs_error_last_25": 107.97748350942717, "mean_abs_error_last_50": 121.59028181689993, "mean_pred_prob": 0.03511724486015737, "mean_pred_prob_last_10": 0.18827176056802272, "mean_pred_prob_last_25": 0.09866813411936164, "mean_pred_prob_last_50": 0.060090317344293, "mean_token_accuracy": 0.8749865889549255, "step": 14830 }, { "epoch": 0.26380815245409134, "grad_norm": 2.403033594794947, "learning_rate": 0.0001, "loss": 0.9067, "mean_abs_error": 656.7363882741798, "mean_abs_error_last_10": 306.1679261015888, "mean_abs_error_last_25": 362.07975832128307, "mean_abs_error_last_50": 480.39331626830733, "mean_pred_prob": 0.04812277212622575, "mean_pred_prob_last_10": 0.2543796551937703, "mean_pred_prob_last_25": 0.13892597210069652, "mean_pred_prob_last_50": 0.08251669052406214, "mean_token_accuracy": 0.8608006060123443, "step": 14840 }, { "epoch": 0.2639859207508933, "grad_norm": 1.4915394788072514, "learning_rate": 0.0001, "loss": 0.9503, "mean_abs_error": 1104.8197192585667, "mean_abs_error_last_10": 651.0070029068202, "mean_abs_error_last_25": 701.7563717943794, "mean_abs_error_last_50": 843.375117073561, "mean_pred_prob": 0.032018977725238075, "mean_pred_prob_last_10": 0.16034619025158464, "mean_pred_prob_last_25": 0.09011631888715783, "mean_pred_prob_last_50": 0.0543356221911381, "mean_token_accuracy": 0.8668608367443085, "step": 14850 }, { "epoch": 0.2641636890476952, "grad_norm": 1.3769859840371643, "learning_rate": 0.0001, "loss": 0.8372, "mean_abs_error": 275.057856118288, "mean_abs_error_last_10": 89.48481395692292, "mean_abs_error_last_25": 139.8100146545948, "mean_abs_error_last_50": 180.63220347521678, "mean_pred_prob": 0.037871692550834266, "mean_pred_prob_last_10": 0.2041621534153819, "mean_pred_prob_last_25": 0.10945697413990274, "mean_pred_prob_last_50": 0.06532706836005672, "mean_token_accuracy": 0.8788729548454285, "step": 14860 }, { "epoch": 0.26434145734449715, "grad_norm": 0.9797524525181613, "learning_rate": 0.0001, "loss": 0.8052, "mean_abs_error": 961.28999295552, "mean_abs_error_last_10": 512.9609665842138, "mean_abs_error_last_25": 602.7174227350241, "mean_abs_error_last_50": 681.4849822075591, "mean_pred_prob": 0.024534891988150776, "mean_pred_prob_last_10": 0.12198934147018008, "mean_pred_prob_last_25": 0.07030327198153827, "mean_pred_prob_last_50": 0.04291539064142853, "mean_token_accuracy": 0.8738719522953033, "step": 14870 }, { "epoch": 0.26451922564129915, "grad_norm": 1.1054021594951124, "learning_rate": 0.0001, "loss": 0.8397, "mean_abs_error": 884.8348805894742, "mean_abs_error_last_10": 273.21219853695084, "mean_abs_error_last_25": 330.8222742833044, "mean_abs_error_last_50": 527.5560132367111, "mean_pred_prob": 0.032408338363165964, "mean_pred_prob_last_10": 0.1557078154582996, "mean_pred_prob_last_25": 0.08788775986176915, "mean_pred_prob_last_50": 0.054647296096663925, "mean_token_accuracy": 0.865203446149826, "step": 14880 }, { "epoch": 0.2646969939381011, "grad_norm": 1.5954005445686925, "learning_rate": 0.0001, "loss": 0.8699, "mean_abs_error": 103.7867575244251, "mean_abs_error_last_10": 12.502037186326683, "mean_abs_error_last_25": 40.539487261869155, "mean_abs_error_last_50": 62.303043647496324, "mean_pred_prob": 0.053178520314395426, "mean_pred_prob_last_10": 0.2748301036655903, "mean_pred_prob_last_25": 0.15286779422312974, "mean_pred_prob_last_50": 0.0914877480827272, "mean_token_accuracy": 0.8774262189865112, "step": 14890 }, { "epoch": 0.264874762234903, "grad_norm": 1.3357108905425783, "learning_rate": 0.0001, "loss": 0.9399, "mean_abs_error": 249.10842698747965, "mean_abs_error_last_10": 113.17914423089658, "mean_abs_error_last_25": 115.72318780345499, "mean_abs_error_last_50": 154.9788049902737, "mean_pred_prob": 0.03151436320040375, "mean_pred_prob_last_10": 0.16419172137975693, "mean_pred_prob_last_25": 0.08833122076466679, "mean_pred_prob_last_50": 0.05376245779916644, "mean_token_accuracy": 0.8727482795715332, "step": 14900 }, { "epoch": 0.26505253053170497, "grad_norm": 2.0111239641829632, "learning_rate": 0.0001, "loss": 0.7757, "mean_abs_error": 164.98412012022658, "mean_abs_error_last_10": 55.051241370447066, "mean_abs_error_last_25": 74.48255628687933, "mean_abs_error_last_50": 88.72501832331761, "mean_pred_prob": 0.04580938257277012, "mean_pred_prob_last_10": 0.2236955663189292, "mean_pred_prob_last_25": 0.12658562641590834, "mean_pred_prob_last_50": 0.07756535648368298, "mean_token_accuracy": 0.8653243601322174, "step": 14910 }, { "epoch": 0.2652302988285069, "grad_norm": 1.2802206350413712, "learning_rate": 0.0001, "loss": 0.863, "mean_abs_error": 461.4663947699072, "mean_abs_error_last_10": 264.91139406074313, "mean_abs_error_last_25": 323.78574224119876, "mean_abs_error_last_50": 359.00214607884993, "mean_pred_prob": 0.0264601044007577, "mean_pred_prob_last_10": 0.1473164940602146, "mean_pred_prob_last_25": 0.07789541605161503, "mean_pred_prob_last_50": 0.04601673687575385, "mean_token_accuracy": 0.8686109364032746, "step": 14920 }, { "epoch": 0.2654080671253089, "grad_norm": 1.170937093178388, "learning_rate": 0.0001, "loss": 0.9056, "mean_abs_error": 1061.9902897625934, "mean_abs_error_last_10": 491.09933738313265, "mean_abs_error_last_25": 572.0211524900626, "mean_abs_error_last_50": 705.7582099581016, "mean_pred_prob": 0.03184689769695979, "mean_pred_prob_last_10": 0.1577352108899504, "mean_pred_prob_last_25": 0.08867232263146434, "mean_pred_prob_last_50": 0.053924031139467844, "mean_token_accuracy": 0.8741096496582031, "step": 14930 }, { "epoch": 0.26558583542211084, "grad_norm": 2.3701087979507993, "learning_rate": 0.0001, "loss": 0.8743, "mean_abs_error": 203.96620704409457, "mean_abs_error_last_10": 69.2144182280347, "mean_abs_error_last_25": 156.6669486154032, "mean_abs_error_last_50": 151.2620187897084, "mean_pred_prob": 0.03859729510731995, "mean_pred_prob_last_10": 0.1926053550094366, "mean_pred_prob_last_25": 0.10661247875541449, "mean_pred_prob_last_50": 0.06475640721619129, "mean_token_accuracy": 0.8726322889328003, "step": 14940 }, { "epoch": 0.2657636037189128, "grad_norm": 1.117496005821038, "learning_rate": 0.0001, "loss": 0.8843, "mean_abs_error": 527.7859747762957, "mean_abs_error_last_10": 77.11700545610367, "mean_abs_error_last_25": 115.14619980979414, "mean_abs_error_last_50": 235.59099871206104, "mean_pred_prob": 0.05405448926612735, "mean_pred_prob_last_10": 0.2525528150843456, "mean_pred_prob_last_25": 0.14700009028892963, "mean_pred_prob_last_50": 0.09053324849810451, "mean_token_accuracy": 0.8792772233486176, "step": 14950 }, { "epoch": 0.2659413720157147, "grad_norm": 2.070996739692856, "learning_rate": 0.0001, "loss": 0.8313, "mean_abs_error": 831.6374642697829, "mean_abs_error_last_10": 271.57199612291936, "mean_abs_error_last_25": 367.53335838825905, "mean_abs_error_last_50": 515.8227267768709, "mean_pred_prob": 0.0383326021314133, "mean_pred_prob_last_10": 0.17232572137145324, "mean_pred_prob_last_25": 0.09827955782529899, "mean_pred_prob_last_50": 0.06341139795840718, "mean_token_accuracy": 0.8644954025745392, "step": 14960 }, { "epoch": 0.26611914031251666, "grad_norm": 1.5590693934664375, "learning_rate": 0.0001, "loss": 0.9011, "mean_abs_error": 1139.2099568998203, "mean_abs_error_last_10": 672.1507455253688, "mean_abs_error_last_25": 741.8734655688758, "mean_abs_error_last_50": 872.2441965813183, "mean_pred_prob": 0.03620803314115619, "mean_pred_prob_last_10": 0.19699751846055732, "mean_pred_prob_last_25": 0.10557270859135315, "mean_pred_prob_last_50": 0.062378958270710425, "mean_token_accuracy": 0.8726842939853668, "step": 14970 }, { "epoch": 0.2662969086093186, "grad_norm": 2.384539967064738, "learning_rate": 0.0001, "loss": 0.7912, "mean_abs_error": 295.62923296984115, "mean_abs_error_last_10": 46.48362087723088, "mean_abs_error_last_25": 93.713937034564, "mean_abs_error_last_50": 164.1106024384334, "mean_pred_prob": 0.03139179653953761, "mean_pred_prob_last_10": 0.1582672679796815, "mean_pred_prob_last_25": 0.0834562674164772, "mean_pred_prob_last_50": 0.0513124224729836, "mean_token_accuracy": 0.8792551755905151, "step": 14980 }, { "epoch": 0.2664746769061206, "grad_norm": 2.127627782616515, "learning_rate": 0.0001, "loss": 0.9663, "mean_abs_error": 1078.1742014650447, "mean_abs_error_last_10": 363.30719750525685, "mean_abs_error_last_25": 491.0871771203447, "mean_abs_error_last_50": 695.7583543888395, "mean_pred_prob": 0.02362455066468101, "mean_pred_prob_last_10": 0.13035797562333756, "mean_pred_prob_last_25": 0.06884734402992762, "mean_pred_prob_last_50": 0.04072299569961615, "mean_token_accuracy": 0.8675091505050659, "step": 14990 }, { "epoch": 0.26665244520292253, "grad_norm": 1.805185596582345, "learning_rate": 0.0001, "loss": 0.8578, "mean_abs_error": 648.1450079568439, "mean_abs_error_last_10": 250.07187857185104, "mean_abs_error_last_25": 388.27826213221783, "mean_abs_error_last_50": 486.2391449513927, "mean_pred_prob": 0.020248929923400282, "mean_pred_prob_last_10": 0.10636411225423217, "mean_pred_prob_last_25": 0.057405904540792106, "mean_pred_prob_last_50": 0.03476010048761964, "mean_token_accuracy": 0.8688831150531768, "step": 15000 }, { "epoch": 0.26683021349972447, "grad_norm": 1.055211898641243, "learning_rate": 0.0001, "loss": 0.7956, "mean_abs_error": 1050.2684008889994, "mean_abs_error_last_10": 384.61836694687804, "mean_abs_error_last_25": 546.9330125825975, "mean_abs_error_last_50": 747.3002237641626, "mean_pred_prob": 0.0336803866608534, "mean_pred_prob_last_10": 0.1824553510756232, "mean_pred_prob_last_25": 0.09698240864963736, "mean_pred_prob_last_50": 0.05754575734899845, "mean_token_accuracy": 0.8767897188663483, "step": 15010 }, { "epoch": 0.2670079817965264, "grad_norm": 2.40035305252267, "learning_rate": 0.0001, "loss": 0.8458, "mean_abs_error": 1105.092521833711, "mean_abs_error_last_10": 486.97580392349766, "mean_abs_error_last_25": 554.3835235959699, "mean_abs_error_last_50": 676.9546065434968, "mean_pred_prob": 0.012537238275399432, "mean_pred_prob_last_10": 0.06796256527304649, "mean_pred_prob_last_25": 0.03625834853737615, "mean_pred_prob_last_50": 0.02178482657764107, "mean_token_accuracy": 0.8818208813667298, "step": 15020 }, { "epoch": 0.26718575009332834, "grad_norm": 1.0510627451968009, "learning_rate": 0.0001, "loss": 0.8258, "mean_abs_error": 521.0831456588783, "mean_abs_error_last_10": 312.07120407069146, "mean_abs_error_last_25": 289.16658675998485, "mean_abs_error_last_50": 345.5213714897554, "mean_pred_prob": 0.021359736868180334, "mean_pred_prob_last_10": 0.12056312840431929, "mean_pred_prob_last_25": 0.06011770153418183, "mean_pred_prob_last_50": 0.0362246751319617, "mean_token_accuracy": 0.8710079431533814, "step": 15030 }, { "epoch": 0.2673635183901303, "grad_norm": 1.2382404998479226, "learning_rate": 0.0001, "loss": 0.8176, "mean_abs_error": 487.50934890231247, "mean_abs_error_last_10": 99.25219741072087, "mean_abs_error_last_25": 153.20579493062195, "mean_abs_error_last_50": 266.88032815412276, "mean_pred_prob": 0.027079546777531504, "mean_pred_prob_last_10": 0.13560090642422437, "mean_pred_prob_last_25": 0.07491316767409444, "mean_pred_prob_last_50": 0.04592734985053539, "mean_token_accuracy": 0.878762173652649, "step": 15040 }, { "epoch": 0.2675412866869323, "grad_norm": 1.4540641444048483, "learning_rate": 0.0001, "loss": 0.8788, "mean_abs_error": 148.630140877255, "mean_abs_error_last_10": 29.05085621372489, "mean_abs_error_last_25": 52.193129571807006, "mean_abs_error_last_50": 103.66881534606334, "mean_pred_prob": 0.044938688864931464, "mean_pred_prob_last_10": 0.24219715483486653, "mean_pred_prob_last_25": 0.13250871263444425, "mean_pred_prob_last_50": 0.07789177987724542, "mean_token_accuracy": 0.8751834988594055, "step": 15050 }, { "epoch": 0.2677190549837342, "grad_norm": 0.8566259696299078, "learning_rate": 0.0001, "loss": 0.8117, "mean_abs_error": 734.3425976615073, "mean_abs_error_last_10": 232.42315996407433, "mean_abs_error_last_25": 309.658052307779, "mean_abs_error_last_50": 513.0842109907614, "mean_pred_prob": 0.026340797531884164, "mean_pred_prob_last_10": 0.14359307254198939, "mean_pred_prob_last_25": 0.07607266117120162, "mean_pred_prob_last_50": 0.045034411386586724, "mean_token_accuracy": 0.8816964626312256, "step": 15060 }, { "epoch": 0.26789682328053616, "grad_norm": 1.637967173662454, "learning_rate": 0.0001, "loss": 0.8602, "mean_abs_error": 719.2822583502909, "mean_abs_error_last_10": 230.83331862243477, "mean_abs_error_last_25": 300.6275425591431, "mean_abs_error_last_50": 418.56583757381725, "mean_pred_prob": 0.03091462654992938, "mean_pred_prob_last_10": 0.155561607319396, "mean_pred_prob_last_25": 0.08749267725506797, "mean_pred_prob_last_50": 0.05290440968819894, "mean_token_accuracy": 0.8750675201416016, "step": 15070 }, { "epoch": 0.2680745915773381, "grad_norm": 1.3313271617118938, "learning_rate": 0.0001, "loss": 0.8042, "mean_abs_error": 325.0860088907186, "mean_abs_error_last_10": 122.49406747628848, "mean_abs_error_last_25": 172.98744695319917, "mean_abs_error_last_50": 181.26683495798105, "mean_pred_prob": 0.035146378166973594, "mean_pred_prob_last_10": 0.180784098431468, "mean_pred_prob_last_25": 0.09756576418876647, "mean_pred_prob_last_50": 0.06009782999753952, "mean_token_accuracy": 0.8795769274234772, "step": 15080 }, { "epoch": 0.26825235987414003, "grad_norm": 2.0867147079032846, "learning_rate": 0.0001, "loss": 0.838, "mean_abs_error": 799.6189079840358, "mean_abs_error_last_10": 226.13649963565376, "mean_abs_error_last_25": 324.6961356948139, "mean_abs_error_last_50": 501.1841303231974, "mean_pred_prob": 0.03704979228496086, "mean_pred_prob_last_10": 0.1864794261287898, "mean_pred_prob_last_25": 0.10085241092019714, "mean_pred_prob_last_50": 0.06289045673329383, "mean_token_accuracy": 0.8695121109485626, "step": 15090 }, { "epoch": 0.268430128170942, "grad_norm": 2.806463422658218, "learning_rate": 0.0001, "loss": 0.8932, "mean_abs_error": 309.97422156558497, "mean_abs_error_last_10": 88.55331947956608, "mean_abs_error_last_25": 115.22543299006827, "mean_abs_error_last_50": 205.22194291588258, "mean_pred_prob": 0.034431288088671866, "mean_pred_prob_last_10": 0.19661543741822243, "mean_pred_prob_last_25": 0.10419762078672648, "mean_pred_prob_last_50": 0.06068664658814669, "mean_token_accuracy": 0.8767160654067994, "step": 15100 }, { "epoch": 0.26860789646774397, "grad_norm": 1.0144249247527675, "learning_rate": 0.0001, "loss": 0.9602, "mean_abs_error": 1918.444313137704, "mean_abs_error_last_10": 1164.9222233210082, "mean_abs_error_last_25": 1310.5428793004742, "mean_abs_error_last_50": 1561.6374475603188, "mean_pred_prob": 0.03134060056727321, "mean_pred_prob_last_10": 0.14916262593469581, "mean_pred_prob_last_25": 0.08292170506319962, "mean_pred_prob_last_50": 0.05222883299829846, "mean_token_accuracy": 0.8740988492965698, "step": 15110 }, { "epoch": 0.2687856647645459, "grad_norm": 1.066258865989434, "learning_rate": 0.0001, "loss": 0.8596, "mean_abs_error": 439.75218965189026, "mean_abs_error_last_10": 98.35221745409976, "mean_abs_error_last_25": 133.80555924676023, "mean_abs_error_last_50": 241.67411891943956, "mean_pred_prob": 0.0309684528503567, "mean_pred_prob_last_10": 0.1658648693934083, "mean_pred_prob_last_25": 0.08968111425638199, "mean_pred_prob_last_50": 0.05365268513560295, "mean_token_accuracy": 0.8696214258670807, "step": 15120 }, { "epoch": 0.26896343306134785, "grad_norm": 2.0345325972043344, "learning_rate": 0.0001, "loss": 0.8861, "mean_abs_error": 1624.3934715927548, "mean_abs_error_last_10": 952.318823860217, "mean_abs_error_last_25": 1030.2517340482668, "mean_abs_error_last_50": 1264.031066319019, "mean_pred_prob": 0.02500721951801097, "mean_pred_prob_last_10": 0.13666681032045744, "mean_pred_prob_last_25": 0.07126953576953383, "mean_pred_prob_last_50": 0.0421712428185856, "mean_token_accuracy": 0.8762621223926544, "step": 15130 }, { "epoch": 0.2691412013581498, "grad_norm": 3.0557824685062713, "learning_rate": 0.0001, "loss": 0.861, "mean_abs_error": 318.64912973065987, "mean_abs_error_last_10": 235.0142932870887, "mean_abs_error_last_25": 267.6314756346054, "mean_abs_error_last_50": 254.21842070563258, "mean_pred_prob": 0.03687804311048239, "mean_pred_prob_last_10": 0.1823043443262577, "mean_pred_prob_last_25": 0.1012232132256031, "mean_pred_prob_last_50": 0.06242759719025344, "mean_token_accuracy": 0.8735696852207184, "step": 15140 }, { "epoch": 0.2693189696549517, "grad_norm": 1.6579026753720612, "learning_rate": 0.0001, "loss": 0.9507, "mean_abs_error": 516.1736489992699, "mean_abs_error_last_10": 290.1530641921317, "mean_abs_error_last_25": 289.20115702432156, "mean_abs_error_last_50": 358.4234722602719, "mean_pred_prob": 0.026388170843711124, "mean_pred_prob_last_10": 0.13401271934853867, "mean_pred_prob_last_25": 0.07360158723313362, "mean_pred_prob_last_50": 0.044898529874626544, "mean_token_accuracy": 0.8705066084861756, "step": 15150 }, { "epoch": 0.26949673795175366, "grad_norm": 1.205445598332373, "learning_rate": 0.0001, "loss": 0.8616, "mean_abs_error": 231.02121319707607, "mean_abs_error_last_10": 148.24208857826778, "mean_abs_error_last_25": 207.41923765290522, "mean_abs_error_last_50": 198.41383529233298, "mean_pred_prob": 0.03931064144708216, "mean_pred_prob_last_10": 0.1948462450876832, "mean_pred_prob_last_25": 0.10775272035971284, "mean_pred_prob_last_50": 0.06626064302399755, "mean_token_accuracy": 0.8631694972515106, "step": 15160 }, { "epoch": 0.26967450624855566, "grad_norm": 1.251407137263006, "learning_rate": 0.0001, "loss": 0.8631, "mean_abs_error": 419.13401264717913, "mean_abs_error_last_10": 73.5819223496636, "mean_abs_error_last_25": 148.87816742379812, "mean_abs_error_last_50": 273.103481687722, "mean_pred_prob": 0.060331244580447675, "mean_pred_prob_last_10": 0.2809063887223601, "mean_pred_prob_last_25": 0.16465306654572487, "mean_pred_prob_last_50": 0.10213291244581342, "mean_token_accuracy": 0.8702337801456451, "step": 15170 }, { "epoch": 0.2698522745453576, "grad_norm": 1.462936496290174, "learning_rate": 0.0001, "loss": 0.855, "mean_abs_error": 416.86835984983827, "mean_abs_error_last_10": 143.80282176244083, "mean_abs_error_last_25": 170.245056504298, "mean_abs_error_last_50": 231.2610696421847, "mean_pred_prob": 0.03525058052036911, "mean_pred_prob_last_10": 0.16869691014289856, "mean_pred_prob_last_25": 0.09357276195660233, "mean_pred_prob_last_50": 0.05835501586552709, "mean_token_accuracy": 0.886011290550232, "step": 15180 }, { "epoch": 0.27003004284215953, "grad_norm": 1.132301885772741, "learning_rate": 0.0001, "loss": 0.7809, "mean_abs_error": 200.4163317096922, "mean_abs_error_last_10": 17.416778587035214, "mean_abs_error_last_25": 50.722254117970266, "mean_abs_error_last_50": 117.02776710275745, "mean_pred_prob": 0.035678531788289544, "mean_pred_prob_last_10": 0.18931617289781572, "mean_pred_prob_last_25": 0.1028917420655489, "mean_pred_prob_last_50": 0.061194530222564936, "mean_token_accuracy": 0.8830712080001831, "step": 15190 }, { "epoch": 0.2702078111389615, "grad_norm": 0.8862061139884946, "learning_rate": 0.0001, "loss": 0.8714, "mean_abs_error": 2117.516663752068, "mean_abs_error_last_10": 1260.2040260501558, "mean_abs_error_last_25": 1350.5825422625633, "mean_abs_error_last_50": 1624.3080901832707, "mean_pred_prob": 0.02392583029140951, "mean_pred_prob_last_10": 0.13557063553307672, "mean_pred_prob_last_25": 0.07159370685840258, "mean_pred_prob_last_50": 0.04184693558927392, "mean_token_accuracy": 0.8708284795284271, "step": 15200 }, { "epoch": 0.2703855794357634, "grad_norm": 0.8731872588397653, "learning_rate": 0.0001, "loss": 0.8502, "mean_abs_error": 978.6996172576553, "mean_abs_error_last_10": 613.7896602693918, "mean_abs_error_last_25": 727.8593285366243, "mean_abs_error_last_50": 785.1618309467052, "mean_pred_prob": 0.03190654192439979, "mean_pred_prob_last_10": 0.14121774593950248, "mean_pred_prob_last_25": 0.08092001934710424, "mean_pred_prob_last_50": 0.05238562319718767, "mean_token_accuracy": 0.871285218000412, "step": 15210 }, { "epoch": 0.27056334773256535, "grad_norm": 1.0973939579010625, "learning_rate": 0.0001, "loss": 0.7989, "mean_abs_error": 261.40502917062474, "mean_abs_error_last_10": 78.8416479104221, "mean_abs_error_last_25": 101.78107279827354, "mean_abs_error_last_50": 184.20341726318975, "mean_pred_prob": 0.03180300248786807, "mean_pred_prob_last_10": 0.1683232370764017, "mean_pred_prob_last_25": 0.09198320601135493, "mean_pred_prob_last_50": 0.055383968539536, "mean_token_accuracy": 0.8646357715129852, "step": 15220 }, { "epoch": 0.27074111602936735, "grad_norm": 1.8784553742182521, "learning_rate": 0.0001, "loss": 0.805, "mean_abs_error": 312.8856744278997, "mean_abs_error_last_10": 64.78347491100018, "mean_abs_error_last_25": 124.1796701750902, "mean_abs_error_last_50": 202.48852539407494, "mean_pred_prob": 0.04259185660630464, "mean_pred_prob_last_10": 0.19938667081296443, "mean_pred_prob_last_25": 0.11452503129839897, "mean_pred_prob_last_50": 0.07125060725957155, "mean_token_accuracy": 0.8735065221786499, "step": 15230 }, { "epoch": 0.2709188843261693, "grad_norm": 2.1611663185840304, "learning_rate": 0.0001, "loss": 0.7867, "mean_abs_error": 1345.0701882567287, "mean_abs_error_last_10": 906.610953020614, "mean_abs_error_last_25": 990.1604356032028, "mean_abs_error_last_50": 1126.6575466876225, "mean_pred_prob": 0.027868046533694724, "mean_pred_prob_last_10": 0.14385497445910006, "mean_pred_prob_last_25": 0.07753187078196788, "mean_pred_prob_last_50": 0.04702587898209458, "mean_token_accuracy": 0.8772989630699157, "step": 15240 }, { "epoch": 0.2710966526229712, "grad_norm": 2.0710988024995487, "learning_rate": 0.0001, "loss": 0.8145, "mean_abs_error": 605.3908941038319, "mean_abs_error_last_10": 214.92572216679636, "mean_abs_error_last_25": 337.0904668231561, "mean_abs_error_last_50": 424.18500954730996, "mean_pred_prob": 0.035811613453552125, "mean_pred_prob_last_10": 0.17946300422772765, "mean_pred_prob_last_25": 0.09936675251228735, "mean_pred_prob_last_50": 0.06075553761329502, "mean_token_accuracy": 0.8745033860206604, "step": 15250 }, { "epoch": 0.27127442091977316, "grad_norm": 1.0623439907591459, "learning_rate": 0.0001, "loss": 0.8286, "mean_abs_error": 814.7060004586208, "mean_abs_error_last_10": 401.705232158445, "mean_abs_error_last_25": 461.9935135936721, "mean_abs_error_last_50": 580.8451408819143, "mean_pred_prob": 0.04464264499838464, "mean_pred_prob_last_10": 0.21344324810197576, "mean_pred_prob_last_25": 0.12185174309706781, "mean_pred_prob_last_50": 0.07566727032244672, "mean_token_accuracy": 0.8647628247737884, "step": 15260 }, { "epoch": 0.2714521892165751, "grad_norm": 1.3810210818386912, "learning_rate": 0.0001, "loss": 0.9103, "mean_abs_error": 1189.1665694369362, "mean_abs_error_last_10": 896.4021854897098, "mean_abs_error_last_25": 927.944186618334, "mean_abs_error_last_50": 1010.6949522515293, "mean_pred_prob": 0.03294425171188777, "mean_pred_prob_last_10": 0.16954423072820646, "mean_pred_prob_last_25": 0.09533420150182792, "mean_pred_prob_last_50": 0.05605807086103596, "mean_token_accuracy": 0.872936338186264, "step": 15270 }, { "epoch": 0.27162995751337704, "grad_norm": 1.0156221354086221, "learning_rate": 0.0001, "loss": 0.9305, "mean_abs_error": 727.6591462684784, "mean_abs_error_last_10": 244.40354147251938, "mean_abs_error_last_25": 289.29952890170415, "mean_abs_error_last_50": 380.39225733328226, "mean_pred_prob": 0.0320674785412848, "mean_pred_prob_last_10": 0.14448971913661807, "mean_pred_prob_last_25": 0.08244370387401431, "mean_pred_prob_last_50": 0.05313194064656272, "mean_token_accuracy": 0.8651672840118408, "step": 15280 }, { "epoch": 0.27180772581017903, "grad_norm": 1.4164293828147696, "learning_rate": 0.0001, "loss": 0.7756, "mean_abs_error": 694.7650561999405, "mean_abs_error_last_10": 314.91617362017814, "mean_abs_error_last_25": 405.05850858793116, "mean_abs_error_last_50": 507.97716749531884, "mean_pred_prob": 0.0413673831091728, "mean_pred_prob_last_10": 0.20086297765374184, "mean_pred_prob_last_25": 0.11271731398883275, "mean_pred_prob_last_50": 0.06941873864852824, "mean_token_accuracy": 0.8699567496776581, "step": 15290 }, { "epoch": 0.271985494106981, "grad_norm": 1.174356282559506, "learning_rate": 0.0001, "loss": 0.9298, "mean_abs_error": 479.81307224449273, "mean_abs_error_last_10": 155.57954629777691, "mean_abs_error_last_25": 187.71184160389086, "mean_abs_error_last_50": 270.3691640027929, "mean_pred_prob": 0.03705091548035853, "mean_pred_prob_last_10": 0.19859559824690223, "mean_pred_prob_last_25": 0.10903157493448816, "mean_pred_prob_last_50": 0.06498496970161796, "mean_token_accuracy": 0.8680202007293701, "step": 15300 }, { "epoch": 0.2721632624037829, "grad_norm": 0.9883743755432057, "learning_rate": 0.0001, "loss": 0.7728, "mean_abs_error": 522.7684000450205, "mean_abs_error_last_10": 256.4888661608756, "mean_abs_error_last_25": 262.4773810780194, "mean_abs_error_last_50": 329.13920468799313, "mean_pred_prob": 0.02693784032599069, "mean_pred_prob_last_10": 0.13850164230680093, "mean_pred_prob_last_25": 0.0759935190435499, "mean_pred_prob_last_50": 0.04605899396119639, "mean_token_accuracy": 0.8731039702892304, "step": 15310 }, { "epoch": 0.27234103070058485, "grad_norm": 1.427048903699474, "learning_rate": 0.0001, "loss": 0.7242, "mean_abs_error": 697.3668811453092, "mean_abs_error_last_10": 233.05798356723412, "mean_abs_error_last_25": 421.8948617505479, "mean_abs_error_last_50": 473.1702747143371, "mean_pred_prob": 0.03153450447134674, "mean_pred_prob_last_10": 0.16012352088000625, "mean_pred_prob_last_25": 0.08728102303575724, "mean_pred_prob_last_50": 0.053408558410592374, "mean_token_accuracy": 0.8804226398468018, "step": 15320 }, { "epoch": 0.2725187989973868, "grad_norm": 1.3352172498564012, "learning_rate": 0.0001, "loss": 0.793, "mean_abs_error": 166.65499857454802, "mean_abs_error_last_10": 36.170956873820884, "mean_abs_error_last_25": 75.7034386909246, "mean_abs_error_last_50": 115.16150987733297, "mean_pred_prob": 0.04346395498141646, "mean_pred_prob_last_10": 0.21632645577192305, "mean_pred_prob_last_25": 0.12395459413528442, "mean_pred_prob_last_50": 0.07438696110621094, "mean_token_accuracy": 0.8683727562427521, "step": 15330 }, { "epoch": 0.27269656729418873, "grad_norm": 1.6105353622583825, "learning_rate": 0.0001, "loss": 0.8435, "mean_abs_error": 450.60580415818123, "mean_abs_error_last_10": 123.08844977023978, "mean_abs_error_last_25": 185.98932458493732, "mean_abs_error_last_50": 267.0178326259025, "mean_pred_prob": 0.028434175305301325, "mean_pred_prob_last_10": 0.15261408514343203, "mean_pred_prob_last_25": 0.08076156995957717, "mean_pred_prob_last_50": 0.048833649122389036, "mean_token_accuracy": 0.8717295706272126, "step": 15340 }, { "epoch": 0.2728743355909907, "grad_norm": 3.468807219671661, "learning_rate": 0.0001, "loss": 0.877, "mean_abs_error": 192.09317036913274, "mean_abs_error_last_10": 82.87719734432113, "mean_abs_error_last_25": 94.45996715188862, "mean_abs_error_last_50": 120.48814231666488, "mean_pred_prob": 0.03536195182241499, "mean_pred_prob_last_10": 0.18289992064237595, "mean_pred_prob_last_25": 0.09824881739914418, "mean_pred_prob_last_50": 0.05978877535089851, "mean_token_accuracy": 0.8682318687438965, "step": 15350 }, { "epoch": 0.27305210388779266, "grad_norm": 1.2326967300723453, "learning_rate": 0.0001, "loss": 0.8357, "mean_abs_error": 387.9197929138787, "mean_abs_error_last_10": 63.99207283376936, "mean_abs_error_last_25": 106.74870136976372, "mean_abs_error_last_50": 270.7963252218374, "mean_pred_prob": 0.042530438583344224, "mean_pred_prob_last_10": 0.201826111972332, "mean_pred_prob_last_25": 0.11586670577526093, "mean_pred_prob_last_50": 0.071508132154122, "mean_token_accuracy": 0.8757938265800476, "step": 15360 }, { "epoch": 0.2732298721845946, "grad_norm": 1.4200593041700924, "learning_rate": 0.0001, "loss": 0.8474, "mean_abs_error": 322.8677098462428, "mean_abs_error_last_10": 121.03744637779991, "mean_abs_error_last_25": 106.8927259146579, "mean_abs_error_last_50": 170.87652817873533, "mean_pred_prob": 0.03580225000623614, "mean_pred_prob_last_10": 0.1741159690544009, "mean_pred_prob_last_25": 0.09648515256121755, "mean_pred_prob_last_50": 0.05958943162113428, "mean_token_accuracy": 0.8819763481616973, "step": 15370 }, { "epoch": 0.27340764048139654, "grad_norm": 2.1947090139944567, "learning_rate": 0.0001, "loss": 0.8954, "mean_abs_error": 593.9161304171932, "mean_abs_error_last_10": 190.64451699525853, "mean_abs_error_last_25": 270.99303988206043, "mean_abs_error_last_50": 412.3448660555575, "mean_pred_prob": 0.06211616077052895, "mean_pred_prob_last_10": 0.2671137263183482, "mean_pred_prob_last_25": 0.1612137242744211, "mean_pred_prob_last_50": 0.1030384722223971, "mean_token_accuracy": 0.8647128522396088, "step": 15380 }, { "epoch": 0.2735854087781985, "grad_norm": 1.3976829736254157, "learning_rate": 0.0001, "loss": 0.9144, "mean_abs_error": 285.86462631955936, "mean_abs_error_last_10": 54.544333121549755, "mean_abs_error_last_25": 83.08391714476788, "mean_abs_error_last_50": 153.558887520888, "mean_pred_prob": 0.036223152000457046, "mean_pred_prob_last_10": 0.2004458863288164, "mean_pred_prob_last_25": 0.10522636529058219, "mean_pred_prob_last_50": 0.06238612309098244, "mean_token_accuracy": 0.8702505826950073, "step": 15390 }, { "epoch": 0.2737631770750004, "grad_norm": 1.8441618177784713, "learning_rate": 0.0001, "loss": 0.8986, "mean_abs_error": 986.7862641019805, "mean_abs_error_last_10": 708.31865701634, "mean_abs_error_last_25": 716.0885762800254, "mean_abs_error_last_50": 802.2777617394299, "mean_pred_prob": 0.026283225079532714, "mean_pred_prob_last_10": 0.13683082971110708, "mean_pred_prob_last_25": 0.07296017803309951, "mean_pred_prob_last_50": 0.0447312827775022, "mean_token_accuracy": 0.8806046903133392, "step": 15400 }, { "epoch": 0.2739409453718024, "grad_norm": 0.8872229420823358, "learning_rate": 0.0001, "loss": 0.9112, "mean_abs_error": 705.9151593090166, "mean_abs_error_last_10": 246.75288086203813, "mean_abs_error_last_25": 373.235190138078, "mean_abs_error_last_50": 510.35276267999944, "mean_pred_prob": 0.037121520668733865, "mean_pred_prob_last_10": 0.18540319511666895, "mean_pred_prob_last_25": 0.10326648856862448, "mean_pred_prob_last_50": 0.06240841430844739, "mean_token_accuracy": 0.8772875964641571, "step": 15410 }, { "epoch": 0.27411871366860435, "grad_norm": 1.3456681321339397, "learning_rate": 0.0001, "loss": 0.88, "mean_abs_error": 842.480131067703, "mean_abs_error_last_10": 324.14117692058596, "mean_abs_error_last_25": 396.1564251304373, "mean_abs_error_last_50": 578.2567446854455, "mean_pred_prob": 0.028742577662342228, "mean_pred_prob_last_10": 0.14749595292378218, "mean_pred_prob_last_25": 0.08153349870699458, "mean_pred_prob_last_50": 0.04850776069215499, "mean_token_accuracy": 0.8724699735641479, "step": 15420 }, { "epoch": 0.2742964819654063, "grad_norm": 2.328511865452262, "learning_rate": 0.0001, "loss": 0.8197, "mean_abs_error": 326.41435837519896, "mean_abs_error_last_10": 57.14085672482876, "mean_abs_error_last_25": 114.14625594495132, "mean_abs_error_last_50": 173.4749173355458, "mean_pred_prob": 0.029744457546621562, "mean_pred_prob_last_10": 0.15072248876094818, "mean_pred_prob_last_25": 0.08415551632642745, "mean_pred_prob_last_50": 0.051001557148993015, "mean_token_accuracy": 0.8776917695999146, "step": 15430 }, { "epoch": 0.27447425026220823, "grad_norm": 35.69055911923346, "learning_rate": 0.0001, "loss": 0.8335, "mean_abs_error": 623.4655639751177, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 395.9775485614226, "mean_pred_prob": 0.06629563049064018, "mean_pred_prob_last_10": 0.21228857740061358, "mean_pred_prob_last_25": 0.14154606554075144, "mean_pred_prob_last_50": 0.10489768436236772, "mean_token_accuracy": 0.8786084532737732, "step": 15440 }, { "epoch": 0.27465201855901017, "grad_norm": 1.698043581575087, "learning_rate": 0.0001, "loss": 0.9374, "mean_abs_error": 430.7787081764203, "mean_abs_error_last_10": 206.4847164623658, "mean_abs_error_last_25": 202.25426140293075, "mean_abs_error_last_50": 239.7026118829177, "mean_pred_prob": 0.040008844539988785, "mean_pred_prob_last_10": 0.20059714298695325, "mean_pred_prob_last_25": 0.10807135563809425, "mean_pred_prob_last_50": 0.06700230039423331, "mean_token_accuracy": 0.8747044920921325, "step": 15450 }, { "epoch": 0.2748297868558121, "grad_norm": 1.5380467605500667, "learning_rate": 0.0001, "loss": 0.8723, "mean_abs_error": 1255.1189862290596, "mean_abs_error_last_10": 450.23152106049685, "mean_abs_error_last_25": 497.2927416246269, "mean_abs_error_last_50": 675.5578389931901, "mean_pred_prob": 0.028859799160272814, "mean_pred_prob_last_10": 0.12769611471449024, "mean_pred_prob_last_25": 0.07809999886667356, "mean_pred_prob_last_50": 0.048857742606196554, "mean_token_accuracy": 0.85931898355484, "step": 15460 }, { "epoch": 0.2750075551526141, "grad_norm": 0.9378559268562336, "learning_rate": 0.0001, "loss": 0.8599, "mean_abs_error": 197.35577806254778, "mean_abs_error_last_10": 61.60861283560773, "mean_abs_error_last_25": 88.60456474517306, "mean_abs_error_last_50": 135.70817919353968, "mean_pred_prob": 0.05508704455569387, "mean_pred_prob_last_10": 0.2679119072854519, "mean_pred_prob_last_25": 0.1521805187687278, "mean_pred_prob_last_50": 0.09346800222992897, "mean_token_accuracy": 0.8773142755031585, "step": 15470 }, { "epoch": 0.27518532344941604, "grad_norm": 1.9694438801309602, "learning_rate": 0.0001, "loss": 0.8293, "mean_abs_error": 475.4127871277733, "mean_abs_error_last_10": 172.50405635688804, "mean_abs_error_last_25": 219.11584123722847, "mean_abs_error_last_50": 309.0380802393171, "mean_pred_prob": 0.023561568302102386, "mean_pred_prob_last_10": 0.13235369324684143, "mean_pred_prob_last_25": 0.06706460323184729, "mean_pred_prob_last_50": 0.040070776175707576, "mean_token_accuracy": 0.871739786863327, "step": 15480 }, { "epoch": 0.275363091746218, "grad_norm": 2.2870724376529328, "learning_rate": 0.0001, "loss": 0.8995, "mean_abs_error": 926.9595721078574, "mean_abs_error_last_10": 308.63987942684133, "mean_abs_error_last_25": 359.41481469728086, "mean_abs_error_last_50": 520.0684943520216, "mean_pred_prob": 0.019045979378279298, "mean_pred_prob_last_10": 0.10060054347850382, "mean_pred_prob_last_25": 0.05452955716755241, "mean_pred_prob_last_50": 0.03268796318443492, "mean_token_accuracy": 0.8592640221118927, "step": 15490 }, { "epoch": 0.2755408600430199, "grad_norm": 0.894093649556221, "learning_rate": 0.0001, "loss": 0.894, "mean_abs_error": 523.3935921266894, "mean_abs_error_last_10": 156.42945328339394, "mean_abs_error_last_25": 191.52407274360328, "mean_abs_error_last_50": 336.16716113010483, "mean_pred_prob": 0.027784721250645815, "mean_pred_prob_last_10": 0.14981649379478768, "mean_pred_prob_last_25": 0.07971596732968464, "mean_pred_prob_last_50": 0.04749538945616223, "mean_token_accuracy": 0.8787936925888061, "step": 15500 }, { "epoch": 0.27571862833982186, "grad_norm": 1.504566636826375, "learning_rate": 0.0001, "loss": 0.8385, "mean_abs_error": 521.3145731623973, "mean_abs_error_last_10": 205.148439099607, "mean_abs_error_last_25": 229.55416378668852, "mean_abs_error_last_50": 327.96831495527164, "mean_pred_prob": 0.03781978628830984, "mean_pred_prob_last_10": 0.19108861351851375, "mean_pred_prob_last_25": 0.1037509214016609, "mean_pred_prob_last_50": 0.06309591222088784, "mean_token_accuracy": 0.8730526924133301, "step": 15510 }, { "epoch": 0.2758963966366238, "grad_norm": 1.572782513588474, "learning_rate": 0.0001, "loss": 0.8005, "mean_abs_error": 245.74227505916247, "mean_abs_error_last_10": 89.55518484920206, "mean_abs_error_last_25": 137.34382644325285, "mean_abs_error_last_50": 203.20140825219164, "mean_pred_prob": 0.03288864577189088, "mean_pred_prob_last_10": 0.1668690536171198, "mean_pred_prob_last_25": 0.09077448062598706, "mean_pred_prob_last_50": 0.055195328500121835, "mean_token_accuracy": 0.8807877182960511, "step": 15520 }, { "epoch": 0.2760741649334258, "grad_norm": 1.446166799880685, "learning_rate": 0.0001, "loss": 0.9258, "mean_abs_error": 851.3769985377478, "mean_abs_error_last_10": 496.9217701497588, "mean_abs_error_last_25": 589.8858270190301, "mean_abs_error_last_50": 675.52316544744, "mean_pred_prob": 0.030525269295321778, "mean_pred_prob_last_10": 0.17154808741470334, "mean_pred_prob_last_25": 0.08789689750410616, "mean_pred_prob_last_50": 0.05224516839080025, "mean_token_accuracy": 0.8781020641326904, "step": 15530 }, { "epoch": 0.27625193323022773, "grad_norm": 2.8681916205286644, "learning_rate": 0.0001, "loss": 0.8374, "mean_abs_error": 307.8678716180914, "mean_abs_error_last_10": 80.44353088805613, "mean_abs_error_last_25": 110.71305944210394, "mean_abs_error_last_50": 176.44947924604557, "mean_pred_prob": 0.04909799643792212, "mean_pred_prob_last_10": 0.23212698362767697, "mean_pred_prob_last_25": 0.13188689071685075, "mean_pred_prob_last_50": 0.08131814561784267, "mean_token_accuracy": 0.8672978937625885, "step": 15540 }, { "epoch": 0.27642970152702967, "grad_norm": 1.648654795774636, "learning_rate": 0.0001, "loss": 0.7973, "mean_abs_error": 206.24979014046056, "mean_abs_error_last_10": 103.99290688234676, "mean_abs_error_last_25": 98.6451979526609, "mean_abs_error_last_50": 131.58520404548693, "mean_pred_prob": 0.04447192982770502, "mean_pred_prob_last_10": 0.2255209444090724, "mean_pred_prob_last_25": 0.12521550571545959, "mean_pred_prob_last_50": 0.07636705646291375, "mean_token_accuracy": 0.8702200591564179, "step": 15550 }, { "epoch": 0.2766074698238316, "grad_norm": 1.0866174547997773, "learning_rate": 0.0001, "loss": 0.8819, "mean_abs_error": 582.8428399814073, "mean_abs_error_last_10": 225.7446169865485, "mean_abs_error_last_25": 321.16498239556785, "mean_abs_error_last_50": 400.4680780173663, "mean_pred_prob": 0.031945440045092255, "mean_pred_prob_last_10": 0.15962451373925432, "mean_pred_prob_last_25": 0.08859684228664264, "mean_pred_prob_last_50": 0.05399274682858959, "mean_token_accuracy": 0.8725794792175293, "step": 15560 }, { "epoch": 0.27678523812063355, "grad_norm": 0.9758432344492102, "learning_rate": 0.0001, "loss": 0.8441, "mean_abs_error": 435.71992944932174, "mean_abs_error_last_10": 130.23485750922055, "mean_abs_error_last_25": 156.82285146714298, "mean_abs_error_last_50": 238.9095036307157, "mean_pred_prob": 0.027499500126577914, "mean_pred_prob_last_10": 0.14082235684618355, "mean_pred_prob_last_25": 0.07635384541936219, "mean_pred_prob_last_50": 0.04662650129757821, "mean_token_accuracy": 0.8788472652435303, "step": 15570 }, { "epoch": 0.27696300641743554, "grad_norm": 1.5890941469053517, "learning_rate": 0.0001, "loss": 0.9916, "mean_abs_error": 654.8193513682506, "mean_abs_error_last_10": 381.3881874373313, "mean_abs_error_last_25": 500.47346608737416, "mean_abs_error_last_50": 508.5086804954352, "mean_pred_prob": 0.013512760982848704, "mean_pred_prob_last_10": 0.08325189054012298, "mean_pred_prob_last_25": 0.04048142898827791, "mean_pred_prob_last_50": 0.02365106586366892, "mean_token_accuracy": 0.8739813268184662, "step": 15580 }, { "epoch": 0.2771407747142375, "grad_norm": 3.0569390313524254, "learning_rate": 0.0001, "loss": 0.8846, "mean_abs_error": 1038.1296894321363, "mean_abs_error_last_10": 553.2617877033323, "mean_abs_error_last_25": 616.2390574378766, "mean_abs_error_last_50": 731.1846599502574, "mean_pred_prob": 0.028726226014259736, "mean_pred_prob_last_10": 0.1553808597382158, "mean_pred_prob_last_25": 0.08110709440952632, "mean_pred_prob_last_50": 0.04878189187729731, "mean_token_accuracy": 0.8683794438838959, "step": 15590 }, { "epoch": 0.2773185430110394, "grad_norm": 1.2125948966523241, "learning_rate": 0.0001, "loss": 0.8342, "mean_abs_error": 836.6530434755889, "mean_abs_error_last_10": 316.78659564123416, "mean_abs_error_last_25": 406.0945026838207, "mean_abs_error_last_50": 556.3817021151929, "mean_pred_prob": 0.023922137581394054, "mean_pred_prob_last_10": 0.1220718533033505, "mean_pred_prob_last_25": 0.06715646663797088, "mean_pred_prob_last_50": 0.0406704418186564, "mean_token_accuracy": 0.8670000076293946, "step": 15600 }, { "epoch": 0.27749631130784136, "grad_norm": 2.8094643936767683, "learning_rate": 0.0001, "loss": 0.8198, "mean_abs_error": 366.51900689976344, "mean_abs_error_last_10": 208.27210629977498, "mean_abs_error_last_25": 271.84512985531154, "mean_abs_error_last_50": 346.026066969276, "mean_pred_prob": 0.0389768184395507, "mean_pred_prob_last_10": 0.18242254238575698, "mean_pred_prob_last_25": 0.10456726900301874, "mean_pred_prob_last_50": 0.06554218428209424, "mean_token_accuracy": 0.8764152109622956, "step": 15610 }, { "epoch": 0.2776740796046433, "grad_norm": 1.4376013972263422, "learning_rate": 0.0001, "loss": 0.8822, "mean_abs_error": 515.3405013806541, "mean_abs_error_last_10": 128.68348514180116, "mean_abs_error_last_25": 168.41751954359302, "mean_abs_error_last_50": 279.28732489891, "mean_pred_prob": 0.02285837961244397, "mean_pred_prob_last_10": 0.1328556382097304, "mean_pred_prob_last_25": 0.0679492297815159, "mean_pred_prob_last_50": 0.04001305303536355, "mean_token_accuracy": 0.8676003515720367, "step": 15620 }, { "epoch": 0.27785184790144524, "grad_norm": 0.9138766966049525, "learning_rate": 0.0001, "loss": 0.8825, "mean_abs_error": 373.74672063054567, "mean_abs_error_last_10": 171.07656194111436, "mean_abs_error_last_25": 191.92881229575784, "mean_abs_error_last_50": 221.4748022108498, "mean_pred_prob": 0.03187447461532429, "mean_pred_prob_last_10": 0.15883921105414628, "mean_pred_prob_last_25": 0.08809262234717607, "mean_pred_prob_last_50": 0.05426327119348571, "mean_token_accuracy": 0.8739081799983979, "step": 15630 }, { "epoch": 0.27802961619824723, "grad_norm": 1.6958390266937653, "learning_rate": 0.0001, "loss": 0.8462, "mean_abs_error": 1015.9620909387191, "mean_abs_error_last_10": 260.1874245380207, "mean_abs_error_last_25": 391.80905661843786, "mean_abs_error_last_50": 658.4662260724866, "mean_pred_prob": 0.024388247626484373, "mean_pred_prob_last_10": 0.1194120085798204, "mean_pred_prob_last_25": 0.06776475679944269, "mean_pred_prob_last_50": 0.041506403521634636, "mean_token_accuracy": 0.8640803694725037, "step": 15640 }, { "epoch": 0.27820738449504917, "grad_norm": 2.336410987429802, "learning_rate": 0.0001, "loss": 0.983, "mean_abs_error": 94.21203657828131, "mean_abs_error_last_10": 24.887184910789593, "mean_abs_error_last_25": 37.38638374205589, "mean_abs_error_last_50": 59.17793642574772, "mean_pred_prob": 0.04501554667949677, "mean_pred_prob_last_10": 0.23599843308329582, "mean_pred_prob_last_25": 0.1276971623301506, "mean_pred_prob_last_50": 0.0764274826273322, "mean_token_accuracy": 0.8751289784908295, "step": 15650 }, { "epoch": 0.2783851527918511, "grad_norm": 1.351224305589011, "learning_rate": 0.0001, "loss": 0.8734, "mean_abs_error": 541.1275843882468, "mean_abs_error_last_10": 77.3723223058417, "mean_abs_error_last_25": 157.28641323387666, "mean_abs_error_last_50": 312.8016556798542, "mean_pred_prob": 0.019425627891905606, "mean_pred_prob_last_10": 0.11240192614495755, "mean_pred_prob_last_25": 0.05722635369747877, "mean_pred_prob_last_50": 0.03368681604042649, "mean_token_accuracy": 0.8765465378761291, "step": 15660 }, { "epoch": 0.27856292108865305, "grad_norm": 4.7465348890104675, "learning_rate": 0.0001, "loss": 0.9704, "mean_abs_error": 887.766857846975, "mean_abs_error_last_10": 307.72546792867695, "mean_abs_error_last_25": 420.53533634086523, "mean_abs_error_last_50": 567.6563143486146, "mean_pred_prob": 0.0342736022954341, "mean_pred_prob_last_10": 0.17431639113929123, "mean_pred_prob_last_25": 0.09785317368223331, "mean_pred_prob_last_50": 0.05844556128722615, "mean_token_accuracy": 0.8759430766105651, "step": 15670 }, { "epoch": 0.278740689385455, "grad_norm": 1.0023697484616936, "learning_rate": 0.0001, "loss": 0.8079, "mean_abs_error": 463.9976241139705, "mean_abs_error_last_10": 133.36494271851024, "mean_abs_error_last_25": 209.0447133898178, "mean_abs_error_last_50": 270.2040391570951, "mean_pred_prob": 0.04082071629818529, "mean_pred_prob_last_10": 0.20123327653855086, "mean_pred_prob_last_25": 0.11445942246355116, "mean_pred_prob_last_50": 0.07069890243001282, "mean_token_accuracy": 0.8727147459983826, "step": 15680 }, { "epoch": 0.2789184576822569, "grad_norm": 2.7148577585750417, "learning_rate": 0.0001, "loss": 0.8879, "mean_abs_error": 1833.741586618885, "mean_abs_error_last_10": 833.8293404664579, "mean_abs_error_last_25": 1064.9155932182457, "mean_abs_error_last_50": 1379.547008587512, "mean_pred_prob": 0.03163334613636835, "mean_pred_prob_last_10": 0.1617202379333321, "mean_pred_prob_last_25": 0.08565981989377178, "mean_pred_prob_last_50": 0.052912797805038284, "mean_token_accuracy": 0.8745788693428039, "step": 15690 }, { "epoch": 0.2790962259790589, "grad_norm": 1.5138112337548082, "learning_rate": 0.0001, "loss": 0.8665, "mean_abs_error": 810.1903099532371, "mean_abs_error_last_10": 318.7637580004262, "mean_abs_error_last_25": 410.7649209491031, "mean_abs_error_last_50": 520.8290322947445, "mean_pred_prob": 0.02776134993764572, "mean_pred_prob_last_10": 0.14754839688539506, "mean_pred_prob_last_25": 0.08121744308737107, "mean_pred_prob_last_50": 0.04841731364431325, "mean_token_accuracy": 0.8694192051887513, "step": 15700 }, { "epoch": 0.27927399427586086, "grad_norm": 1.5731125449016778, "learning_rate": 0.0001, "loss": 0.7642, "mean_abs_error": 142.93947494215269, "mean_abs_error_last_10": 19.90083118736543, "mean_abs_error_last_25": 42.12492207629862, "mean_abs_error_last_50": 81.29670242076935, "mean_pred_prob": 0.05195776119362563, "mean_pred_prob_last_10": 0.24374996237456797, "mean_pred_prob_last_25": 0.1408554296940565, "mean_pred_prob_last_50": 0.08735641869716346, "mean_token_accuracy": 0.8788270473480224, "step": 15710 }, { "epoch": 0.2794517625726628, "grad_norm": 0.7772318100455161, "learning_rate": 0.0001, "loss": 0.8584, "mean_abs_error": 345.8154656516388, "mean_abs_error_last_10": 157.36507512036124, "mean_abs_error_last_25": 207.6599516241984, "mean_abs_error_last_50": 220.5918613291873, "mean_pred_prob": 0.03974963864311576, "mean_pred_prob_last_10": 0.20224828384816645, "mean_pred_prob_last_25": 0.11384025113657117, "mean_pred_prob_last_50": 0.06831574896350504, "mean_token_accuracy": 0.8694485902786255, "step": 15720 }, { "epoch": 0.27962953086946474, "grad_norm": 1.1821125125361445, "learning_rate": 0.0001, "loss": 0.7889, "mean_abs_error": 223.48153355157947, "mean_abs_error_last_10": 62.46836037588819, "mean_abs_error_last_25": 80.50791485587794, "mean_abs_error_last_50": 145.23602552893675, "mean_pred_prob": 0.03124780566431582, "mean_pred_prob_last_10": 0.17664279397577048, "mean_pred_prob_last_25": 0.09208641080185771, "mean_pred_prob_last_50": 0.05373037150129676, "mean_token_accuracy": 0.8772769093513488, "step": 15730 }, { "epoch": 0.2798072991662667, "grad_norm": 1.4241072934022598, "learning_rate": 0.0001, "loss": 0.8709, "mean_abs_error": 323.2728362251152, "mean_abs_error_last_10": 132.28152329294844, "mean_abs_error_last_25": 148.16030512589413, "mean_abs_error_last_50": 184.73434585932176, "mean_pred_prob": 0.03365647052414715, "mean_pred_prob_last_10": 0.17954016253352165, "mean_pred_prob_last_25": 0.09682415304705501, "mean_pred_prob_last_50": 0.058223611675202847, "mean_token_accuracy": 0.8750811696052552, "step": 15740 }, { "epoch": 0.2799850674630686, "grad_norm": 1.6621460124545657, "learning_rate": 0.0001, "loss": 0.8163, "mean_abs_error": 379.6535570309084, "mean_abs_error_last_10": 100.44653070156815, "mean_abs_error_last_25": 159.20156924689695, "mean_abs_error_last_50": 250.92591497594708, "mean_pred_prob": 0.03332885126583278, "mean_pred_prob_last_10": 0.1754870543256402, "mean_pred_prob_last_25": 0.09482759926468134, "mean_pred_prob_last_50": 0.05718982564285398, "mean_token_accuracy": 0.8827701091766358, "step": 15750 }, { "epoch": 0.2801628357598706, "grad_norm": 2.52160418092829, "learning_rate": 0.0001, "loss": 0.8124, "mean_abs_error": 1389.1422852286082, "mean_abs_error_last_10": 1019.1254776936239, "mean_abs_error_last_25": 1058.2812399978977, "mean_abs_error_last_50": 1143.0804870186182, "mean_pred_prob": 0.02507896268944023, "mean_pred_prob_last_10": 0.12968920744242496, "mean_pred_prob_last_25": 0.06894462744239718, "mean_pred_prob_last_50": 0.04225510226096958, "mean_token_accuracy": 0.8670080959796905, "step": 15760 }, { "epoch": 0.28034060405667255, "grad_norm": 1.7137294603831474, "learning_rate": 0.0001, "loss": 0.8992, "mean_abs_error": 530.8067867771973, "mean_abs_error_last_10": 177.65148995261887, "mean_abs_error_last_25": 245.76881591405746, "mean_abs_error_last_50": 363.2125307179815, "mean_pred_prob": 0.031174553907476366, "mean_pred_prob_last_10": 0.1636307781096548, "mean_pred_prob_last_25": 0.09301248856936581, "mean_pred_prob_last_50": 0.05482353255501948, "mean_token_accuracy": 0.8710255146026611, "step": 15770 }, { "epoch": 0.2805183723534745, "grad_norm": 2.350942544790978, "learning_rate": 0.0001, "loss": 0.7711, "mean_abs_error": 610.3525354641815, "mean_abs_error_last_10": 293.2787215281647, "mean_abs_error_last_25": 365.8379488808263, "mean_abs_error_last_50": 438.8083582059561, "mean_pred_prob": 0.033051968307700005, "mean_pred_prob_last_10": 0.16546433514449746, "mean_pred_prob_last_25": 0.09338416755199433, "mean_pred_prob_last_50": 0.05592364897602238, "mean_token_accuracy": 0.87601917386055, "step": 15780 }, { "epoch": 0.2806961406502764, "grad_norm": 1.224873228615711, "learning_rate": 0.0001, "loss": 0.8101, "mean_abs_error": 689.3642739968393, "mean_abs_error_last_10": 97.74097768489659, "mean_abs_error_last_25": 189.36290922618252, "mean_abs_error_last_50": 350.4766693672877, "mean_pred_prob": 0.036411975754890594, "mean_pred_prob_last_10": 0.189597152126953, "mean_pred_prob_last_25": 0.10428925883024931, "mean_pred_prob_last_50": 0.06226978192571551, "mean_token_accuracy": 0.8762082517147064, "step": 15790 }, { "epoch": 0.28087390894707837, "grad_norm": 1.5371436420745972, "learning_rate": 0.0001, "loss": 0.8129, "mean_abs_error": 224.73383138036237, "mean_abs_error_last_10": 48.62241057021164, "mean_abs_error_last_25": 76.01556979372455, "mean_abs_error_last_50": 124.15043976127045, "mean_pred_prob": 0.04208340431796387, "mean_pred_prob_last_10": 0.2220490800216794, "mean_pred_prob_last_25": 0.11953104794956744, "mean_pred_prob_last_50": 0.07174169945064932, "mean_token_accuracy": 0.8708323180675507, "step": 15800 }, { "epoch": 0.2810516772438803, "grad_norm": 2.1028176343247904, "learning_rate": 0.0001, "loss": 0.7711, "mean_abs_error": 230.91563617966762, "mean_abs_error_last_10": 46.578147730407125, "mean_abs_error_last_25": 82.51060043898688, "mean_abs_error_last_50": 130.8343065940855, "mean_pred_prob": 0.03934053317643702, "mean_pred_prob_last_10": 0.21359494775533677, "mean_pred_prob_last_25": 0.11392729934304953, "mean_pred_prob_last_50": 0.0677202932536602, "mean_token_accuracy": 0.8728261947631836, "step": 15810 }, { "epoch": 0.2812294455406823, "grad_norm": 1.9728644305208127, "learning_rate": 0.0001, "loss": 0.8146, "mean_abs_error": 1878.1716149299966, "mean_abs_error_last_10": 921.0571450728321, "mean_abs_error_last_25": 1092.6508040841081, "mean_abs_error_last_50": 1393.0094887037837, "mean_pred_prob": 0.025055388221517205, "mean_pred_prob_last_10": 0.1251631970197195, "mean_pred_prob_last_25": 0.07085875694756397, "mean_pred_prob_last_50": 0.0424414159351727, "mean_token_accuracy": 0.8764574408531189, "step": 15820 }, { "epoch": 0.28140721383748424, "grad_norm": 1.3843092672437352, "learning_rate": 0.0001, "loss": 0.7967, "mean_abs_error": 920.5014230548597, "mean_abs_error_last_10": 507.07680966879633, "mean_abs_error_last_25": 626.272478449864, "mean_abs_error_last_50": 698.922135720369, "mean_pred_prob": 0.03324051150702871, "mean_pred_prob_last_10": 0.17635958043974825, "mean_pred_prob_last_25": 0.09790446334227454, "mean_pred_prob_last_50": 0.057288278490887024, "mean_token_accuracy": 0.8731565117835999, "step": 15830 }, { "epoch": 0.2815849821342862, "grad_norm": 0.977768309361437, "learning_rate": 0.0001, "loss": 0.7874, "mean_abs_error": 910.1553100249464, "mean_abs_error_last_10": 509.135002996193, "mean_abs_error_last_25": 654.3159904507395, "mean_abs_error_last_50": 646.8393133083612, "mean_pred_prob": 0.01972907287708949, "mean_pred_prob_last_10": 0.09805128342704847, "mean_pred_prob_last_25": 0.05402625286369585, "mean_pred_prob_last_50": 0.033098843292100354, "mean_token_accuracy": 0.8726844072341919, "step": 15840 }, { "epoch": 0.2817627504310881, "grad_norm": 1.6621529588161332, "learning_rate": 0.0001, "loss": 0.7949, "mean_abs_error": 515.6364535794203, "mean_abs_error_last_10": 110.62792988939903, "mean_abs_error_last_25": 153.78873368599494, "mean_abs_error_last_50": 285.38928095216977, "mean_pred_prob": 0.02867643381468952, "mean_pred_prob_last_10": 0.14994194973260164, "mean_pred_prob_last_25": 0.08343176497146487, "mean_pred_prob_last_50": 0.04969966658391058, "mean_token_accuracy": 0.8777317106723785, "step": 15850 }, { "epoch": 0.28194051872789005, "grad_norm": 1.6205269083543947, "learning_rate": 0.0001, "loss": 0.7686, "mean_abs_error": 641.4430808000479, "mean_abs_error_last_10": 371.0978640904862, "mean_abs_error_last_25": 385.22418228600884, "mean_abs_error_last_50": 476.72383381111405, "mean_pred_prob": 0.038070547199458814, "mean_pred_prob_last_10": 0.19720580375287683, "mean_pred_prob_last_25": 0.11010550256469287, "mean_pred_prob_last_50": 0.0662963022274198, "mean_token_accuracy": 0.8641337454319, "step": 15860 }, { "epoch": 0.282118287024692, "grad_norm": 1.7781096837655523, "learning_rate": 0.0001, "loss": 0.8512, "mean_abs_error": 424.3180266086289, "mean_abs_error_last_10": 233.10616073625278, "mean_abs_error_last_25": 337.9851646026465, "mean_abs_error_last_50": 325.0556375685427, "mean_pred_prob": 0.05453652278520167, "mean_pred_prob_last_10": 0.21882728056516498, "mean_pred_prob_last_25": 0.13329240609891713, "mean_pred_prob_last_50": 0.0883095050114207, "mean_token_accuracy": 0.8767775237560272, "step": 15870 }, { "epoch": 0.282296055321494, "grad_norm": 2.8656376026157497, "learning_rate": 0.0001, "loss": 0.7623, "mean_abs_error": 497.3051924227008, "mean_abs_error_last_10": 170.3093171140853, "mean_abs_error_last_25": 218.8595348530407, "mean_abs_error_last_50": 294.74621561782226, "mean_pred_prob": 0.03537605662713759, "mean_pred_prob_last_10": 0.18089878296013923, "mean_pred_prob_last_25": 0.09842256415868178, "mean_pred_prob_last_50": 0.05975665797595866, "mean_token_accuracy": 0.8837086856365204, "step": 15880 }, { "epoch": 0.2824738236182959, "grad_norm": 1.1789992838163537, "learning_rate": 0.0001, "loss": 0.8039, "mean_abs_error": 887.1664170110546, "mean_abs_error_last_10": 208.4251613227153, "mean_abs_error_last_25": 325.57302008505656, "mean_abs_error_last_50": 517.4593205602945, "mean_pred_prob": 0.027004933630814777, "mean_pred_prob_last_10": 0.13268322478979827, "mean_pred_prob_last_25": 0.07368368040770293, "mean_pred_prob_last_50": 0.045567602501250803, "mean_token_accuracy": 0.8728470027446746, "step": 15890 }, { "epoch": 0.28265159191509787, "grad_norm": 2.133116641841599, "learning_rate": 0.0001, "loss": 0.808, "mean_abs_error": 444.84586591127515, "mean_abs_error_last_10": 208.65926676865783, "mean_abs_error_last_25": 238.26612636811495, "mean_abs_error_last_50": 308.30608848126167, "mean_pred_prob": 0.04258513362146914, "mean_pred_prob_last_10": 0.20401627644896508, "mean_pred_prob_last_25": 0.11627049529924989, "mean_pred_prob_last_50": 0.07118834499269724, "mean_token_accuracy": 0.8720804929733277, "step": 15900 }, { "epoch": 0.2828293602118998, "grad_norm": 2.183446585730962, "learning_rate": 0.0001, "loss": 0.8567, "mean_abs_error": 1116.9245780572867, "mean_abs_error_last_10": 375.958629888023, "mean_abs_error_last_25": 526.8926140217533, "mean_abs_error_last_50": 686.1400864898849, "mean_pred_prob": 0.02603222684119828, "mean_pred_prob_last_10": 0.13110339798149653, "mean_pred_prob_last_25": 0.07060002095531673, "mean_pred_prob_last_50": 0.04314706122386269, "mean_token_accuracy": 0.8655690670013427, "step": 15910 }, { "epoch": 0.28300712850870174, "grad_norm": 2.715089472373794, "learning_rate": 0.0001, "loss": 0.8717, "mean_abs_error": 111.54223681900726, "mean_abs_error_last_10": 23.027489539311606, "mean_abs_error_last_25": 48.01559853066865, "mean_abs_error_last_50": 74.10681780571103, "mean_pred_prob": 0.04674707325175405, "mean_pred_prob_last_10": 0.23075617626309394, "mean_pred_prob_last_25": 0.1315686173737049, "mean_pred_prob_last_50": 0.07989918924868107, "mean_token_accuracy": 0.8690134108066558, "step": 15920 }, { "epoch": 0.2831848968055037, "grad_norm": 2.967992355209528, "learning_rate": 0.0001, "loss": 0.8507, "mean_abs_error": 457.3077028045338, "mean_abs_error_last_10": 130.77555107503056, "mean_abs_error_last_25": 191.45963674708293, "mean_abs_error_last_50": 290.96825374136614, "mean_pred_prob": 0.0392691925400868, "mean_pred_prob_last_10": 0.18950311453081667, "mean_pred_prob_last_25": 0.10636593415401877, "mean_pred_prob_last_50": 0.06656064006965608, "mean_token_accuracy": 0.8689844667911529, "step": 15930 }, { "epoch": 0.2833626651023057, "grad_norm": 1.9084499807066868, "learning_rate": 0.0001, "loss": 0.8416, "mean_abs_error": 219.7778467452253, "mean_abs_error_last_10": 53.692191216118104, "mean_abs_error_last_25": 77.02212896618101, "mean_abs_error_last_50": 124.87797063814192, "mean_pred_prob": 0.03889543022960425, "mean_pred_prob_last_10": 0.19978296421468258, "mean_pred_prob_last_25": 0.11351737584918738, "mean_pred_prob_last_50": 0.06790637476369739, "mean_token_accuracy": 0.8697868883609772, "step": 15940 }, { "epoch": 0.2835404333991076, "grad_norm": 0.8445856857829535, "learning_rate": 0.0001, "loss": 0.9299, "mean_abs_error": 624.532536855794, "mean_abs_error_last_10": 376.6912797505372, "mean_abs_error_last_25": 389.03124914955185, "mean_abs_error_last_50": 496.3408079642562, "mean_pred_prob": 0.03643208860303275, "mean_pred_prob_last_10": 0.18116685211425648, "mean_pred_prob_last_25": 0.10164354863227346, "mean_pred_prob_last_50": 0.06151344724057708, "mean_token_accuracy": 0.8684684753417968, "step": 15950 }, { "epoch": 0.28371820169590956, "grad_norm": 1.4636194301281706, "learning_rate": 0.0001, "loss": 0.8946, "mean_abs_error": 410.73181337907755, "mean_abs_error_last_10": 104.05562971799118, "mean_abs_error_last_25": 181.956738161252, "mean_abs_error_last_50": 267.98834796057133, "mean_pred_prob": 0.024359226995147764, "mean_pred_prob_last_10": 0.13424001801759006, "mean_pred_prob_last_25": 0.07106136521324516, "mean_pred_prob_last_50": 0.04186449912376702, "mean_token_accuracy": 0.8780608832836151, "step": 15960 }, { "epoch": 0.2838959699927115, "grad_norm": 2.6591852077678984, "learning_rate": 0.0001, "loss": 0.8948, "mean_abs_error": 634.3990477782836, "mean_abs_error_last_10": 170.5180645594728, "mean_abs_error_last_25": 207.73546274450956, "mean_abs_error_last_50": 334.4280227691117, "mean_pred_prob": 0.037593297008424995, "mean_pred_prob_last_10": 0.1619250737130642, "mean_pred_prob_last_25": 0.09576231380924582, "mean_pred_prob_last_50": 0.06184318327577785, "mean_token_accuracy": 0.8698773682117462, "step": 15970 }, { "epoch": 0.28407373828951343, "grad_norm": 1.0798571123017895, "learning_rate": 0.0001, "loss": 0.7977, "mean_abs_error": 1385.5322460979824, "mean_abs_error_last_10": 728.1935145364273, "mean_abs_error_last_25": 859.8813043356922, "mean_abs_error_last_50": 1017.1556450556116, "mean_pred_prob": 0.02350515414873371, "mean_pred_prob_last_10": 0.11038908685150091, "mean_pred_prob_last_25": 0.06390952383226249, "mean_pred_prob_last_50": 0.03961613083374686, "mean_token_accuracy": 0.8758500993251801, "step": 15980 }, { "epoch": 0.2842515065863154, "grad_norm": 1.493297943581486, "learning_rate": 0.0001, "loss": 0.8059, "mean_abs_error": 578.3484123237224, "mean_abs_error_last_10": 172.12631131532666, "mean_abs_error_last_25": 247.92773206052084, "mean_abs_error_last_50": 321.1515976659248, "mean_pred_prob": 0.02620391258969903, "mean_pred_prob_last_10": 0.13732850328087806, "mean_pred_prob_last_25": 0.07362134158611297, "mean_pred_prob_last_50": 0.04488451480865478, "mean_token_accuracy": 0.8767254114151001, "step": 15990 }, { "epoch": 0.28442927488311737, "grad_norm": 1.5872275301418808, "learning_rate": 0.0001, "loss": 0.857, "mean_abs_error": 1003.2445983540198, "mean_abs_error_last_10": 450.41097882943296, "mean_abs_error_last_25": 548.2211582371729, "mean_abs_error_last_50": 662.6773663618995, "mean_pred_prob": 0.026276862630038523, "mean_pred_prob_last_10": 0.1349811743479222, "mean_pred_prob_last_25": 0.07244423149386421, "mean_pred_prob_last_50": 0.044195190700702366, "mean_token_accuracy": 0.8750294327735901, "step": 16000 }, { "epoch": 0.2846070431799193, "grad_norm": 1.4730025792952297, "learning_rate": 0.0001, "loss": 0.7748, "mean_abs_error": 364.7233294479174, "mean_abs_error_last_10": 106.59886700324898, "mean_abs_error_last_25": 122.37503525417087, "mean_abs_error_last_50": 185.2016440544505, "mean_pred_prob": 0.042463147209491584, "mean_pred_prob_last_10": 0.1937830088660121, "mean_pred_prob_last_25": 0.11450210241600871, "mean_pred_prob_last_50": 0.07014075386105105, "mean_token_accuracy": 0.8727767169475555, "step": 16010 }, { "epoch": 0.28478481147672124, "grad_norm": 1.5137253685375085, "learning_rate": 0.0001, "loss": 0.8662, "mean_abs_error": 906.5251139484224, "mean_abs_error_last_10": 412.6563769114867, "mean_abs_error_last_25": 480.7332332361126, "mean_abs_error_last_50": 665.6062225613912, "mean_pred_prob": 0.037470565123658164, "mean_pred_prob_last_10": 0.18930458474787884, "mean_pred_prob_last_25": 0.10420740883273538, "mean_pred_prob_last_50": 0.06369974510016618, "mean_token_accuracy": 0.8653553664684296, "step": 16020 }, { "epoch": 0.2849625797735232, "grad_norm": 0.9229732261359701, "learning_rate": 0.0001, "loss": 0.8638, "mean_abs_error": 478.03755921004506, "mean_abs_error_last_10": 210.46976329412573, "mean_abs_error_last_25": 254.56481865001223, "mean_abs_error_last_50": 291.2796090706871, "mean_pred_prob": 0.018683638866059483, "mean_pred_prob_last_10": 0.09436071142554284, "mean_pred_prob_last_25": 0.05134185887873173, "mean_pred_prob_last_50": 0.03141672513447702, "mean_token_accuracy": 0.8796567440032959, "step": 16030 }, { "epoch": 0.2851403480703251, "grad_norm": 0.8851936965066769, "learning_rate": 0.0001, "loss": 0.8018, "mean_abs_error": 499.67920526832177, "mean_abs_error_last_10": 64.06535715211628, "mean_abs_error_last_25": 144.00134155125954, "mean_abs_error_last_50": 260.9108874075738, "mean_pred_prob": 0.05156060621375218, "mean_pred_prob_last_10": 0.24361292500980197, "mean_pred_prob_last_25": 0.14152695422526448, "mean_pred_prob_last_50": 0.08642145132180304, "mean_token_accuracy": 0.8707761406898499, "step": 16040 }, { "epoch": 0.28531811636712706, "grad_norm": 2.187714326581973, "learning_rate": 0.0001, "loss": 0.8208, "mean_abs_error": 655.0335335501142, "mean_abs_error_last_10": 284.73763807862963, "mean_abs_error_last_25": 355.8264266339284, "mean_abs_error_last_50": 451.8490942120954, "mean_pred_prob": 0.030198260355973616, "mean_pred_prob_last_10": 0.16311014974489807, "mean_pred_prob_last_25": 0.08832348553696648, "mean_pred_prob_last_50": 0.052215613360749556, "mean_token_accuracy": 0.8716414332389831, "step": 16050 }, { "epoch": 0.28549588466392906, "grad_norm": 1.85092023940123, "learning_rate": 0.0001, "loss": 0.7736, "mean_abs_error": 681.7385287186372, "mean_abs_error_last_10": 394.90785928021916, "mean_abs_error_last_25": 409.9244839421717, "mean_abs_error_last_50": 495.19403927285896, "mean_pred_prob": 0.03964425405138172, "mean_pred_prob_last_10": 0.19644379696110265, "mean_pred_prob_last_25": 0.11042747107567266, "mean_pred_prob_last_50": 0.06691587527166121, "mean_token_accuracy": 0.8855901777744293, "step": 16060 }, { "epoch": 0.285673652960731, "grad_norm": 1.2598374543662338, "learning_rate": 0.0001, "loss": 0.8845, "mean_abs_error": 667.5390414520369, "mean_abs_error_last_10": 256.93137464750345, "mean_abs_error_last_25": 306.1837400211331, "mean_abs_error_last_50": 416.7009931582376, "mean_pred_prob": 0.018122486188076437, "mean_pred_prob_last_10": 0.09700492694973946, "mean_pred_prob_last_25": 0.05133038110798225, "mean_pred_prob_last_50": 0.03085291419411078, "mean_token_accuracy": 0.8667800486087799, "step": 16070 }, { "epoch": 0.28585142125753293, "grad_norm": 1.80781895321593, "learning_rate": 0.0001, "loss": 0.8376, "mean_abs_error": 439.1663171970351, "mean_abs_error_last_10": 87.03167559745171, "mean_abs_error_last_25": 135.4243319593307, "mean_abs_error_last_50": 239.7201148024431, "mean_pred_prob": 0.035690196650102736, "mean_pred_prob_last_10": 0.18408233362715692, "mean_pred_prob_last_25": 0.10042451036861166, "mean_pred_prob_last_50": 0.060648541164118795, "mean_token_accuracy": 0.8756790816783905, "step": 16080 }, { "epoch": 0.2860291895543349, "grad_norm": 1.0902466127545927, "learning_rate": 0.0001, "loss": 0.8849, "mean_abs_error": 440.24924983286047, "mean_abs_error_last_10": 140.38258852525342, "mean_abs_error_last_25": 177.48495969723575, "mean_abs_error_last_50": 240.26023776346514, "mean_pred_prob": 0.026959635876119138, "mean_pred_prob_last_10": 0.13753154911100865, "mean_pred_prob_last_25": 0.0772725248709321, "mean_pred_prob_last_50": 0.046600382309406996, "mean_token_accuracy": 0.8623970091342926, "step": 16090 }, { "epoch": 0.2862069578511368, "grad_norm": 1.091106420074669, "learning_rate": 0.0001, "loss": 0.7902, "mean_abs_error": 277.5964839601155, "mean_abs_error_last_10": 82.77035898045764, "mean_abs_error_last_25": 96.15924643162215, "mean_abs_error_last_50": 148.7361547176782, "mean_pred_prob": 0.03025991371832788, "mean_pred_prob_last_10": 0.16418843530118465, "mean_pred_prob_last_25": 0.08756870031356812, "mean_pred_prob_last_50": 0.052690714597702026, "mean_token_accuracy": 0.875368183851242, "step": 16100 }, { "epoch": 0.28638472614793875, "grad_norm": 2.145291312271513, "learning_rate": 0.0001, "loss": 0.8032, "mean_abs_error": 360.9528757241965, "mean_abs_error_last_10": 116.83804647372419, "mean_abs_error_last_25": 139.5886866302576, "mean_abs_error_last_50": 218.00745949723301, "mean_pred_prob": 0.04079426820389927, "mean_pred_prob_last_10": 0.19361142944544554, "mean_pred_prob_last_25": 0.10864893160760403, "mean_pred_prob_last_50": 0.0683782578445971, "mean_token_accuracy": 0.8764783084392548, "step": 16110 }, { "epoch": 0.28656249444474075, "grad_norm": 2.518719970654617, "learning_rate": 0.0001, "loss": 0.9677, "mean_abs_error": 667.9069590382663, "mean_abs_error_last_10": 110.18896906016182, "mean_abs_error_last_25": 228.59546581679496, "mean_abs_error_last_50": 439.67750555153606, "mean_pred_prob": 0.02676154957152903, "mean_pred_prob_last_10": 0.14792623333632945, "mean_pred_prob_last_25": 0.07917361408472061, "mean_pred_prob_last_50": 0.046407104935497046, "mean_token_accuracy": 0.876318484544754, "step": 16120 }, { "epoch": 0.2867402627415427, "grad_norm": 1.0280234084245188, "learning_rate": 0.0001, "loss": 0.8777, "mean_abs_error": 273.7842034993112, "mean_abs_error_last_10": 232.3224640343053, "mean_abs_error_last_25": 201.09446253745676, "mean_abs_error_last_50": 182.62664031947156, "mean_pred_prob": 0.046456468477845195, "mean_pred_prob_last_10": 0.21172514259815217, "mean_pred_prob_last_25": 0.12666175402700902, "mean_pred_prob_last_50": 0.07822586931288242, "mean_token_accuracy": 0.8644209563732147, "step": 16130 }, { "epoch": 0.2869180310383446, "grad_norm": 1.8066310976874347, "learning_rate": 0.0001, "loss": 0.8014, "mean_abs_error": 2468.479897718149, "mean_abs_error_last_10": 1344.8350699435257, "mean_abs_error_last_25": 1502.4907600017273, "mean_abs_error_last_50": 1810.9267174107704, "mean_pred_prob": 0.030631983230705373, "mean_pred_prob_last_10": 0.13355026441568044, "mean_pred_prob_last_25": 0.07780008690606337, "mean_pred_prob_last_50": 0.0497958361331257, "mean_token_accuracy": 0.8740125477313996, "step": 16140 }, { "epoch": 0.28709579933514656, "grad_norm": 1.790551983458394, "learning_rate": 0.0001, "loss": 0.8889, "mean_abs_error": 319.69932330754364, "mean_abs_error_last_10": 69.99874922532283, "mean_abs_error_last_25": 127.37212101448767, "mean_abs_error_last_50": 172.65871667697405, "mean_pred_prob": 0.03073907932266593, "mean_pred_prob_last_10": 0.16555953547358512, "mean_pred_prob_last_25": 0.08989317640662194, "mean_pred_prob_last_50": 0.052671310119330884, "mean_token_accuracy": 0.8715299963951111, "step": 16150 }, { "epoch": 0.2872735676319485, "grad_norm": 2.084108627193677, "learning_rate": 0.0001, "loss": 0.9396, "mean_abs_error": 449.49403494163164, "mean_abs_error_last_10": 127.70924693486904, "mean_abs_error_last_25": 114.36470263350775, "mean_abs_error_last_50": 155.79182464793448, "mean_pred_prob": 0.04958405434153974, "mean_pred_prob_last_10": 0.23557800184935332, "mean_pred_prob_last_25": 0.13529981961473822, "mean_pred_prob_last_50": 0.08406308419071137, "mean_token_accuracy": 0.8736607670783997, "step": 16160 }, { "epoch": 0.28745133592875044, "grad_norm": 2.489068665084181, "learning_rate": 0.0001, "loss": 0.7661, "mean_abs_error": 145.0864935536694, "mean_abs_error_last_10": 47.79092296364165, "mean_abs_error_last_25": 56.255190298556194, "mean_abs_error_last_50": 94.76270961760406, "mean_pred_prob": 0.05111399432644248, "mean_pred_prob_last_10": 0.22728739716112614, "mean_pred_prob_last_25": 0.12853631153702735, "mean_pred_prob_last_50": 0.08418534640222788, "mean_token_accuracy": 0.8774177730083466, "step": 16170 }, { "epoch": 0.28762910422555243, "grad_norm": 1.1757894365749841, "learning_rate": 0.0001, "loss": 0.7726, "mean_abs_error": 776.0877670915036, "mean_abs_error_last_10": 326.21802133486835, "mean_abs_error_last_25": 511.5122672827999, "mean_abs_error_last_50": 582.353851287836, "mean_pred_prob": 0.04556920532486401, "mean_pred_prob_last_10": 0.21773889346513897, "mean_pred_prob_last_25": 0.12635229895240627, "mean_pred_prob_last_50": 0.07728370755212381, "mean_token_accuracy": 0.8658549010753631, "step": 16180 }, { "epoch": 0.2878068725223544, "grad_norm": 1.4427034990540555, "learning_rate": 0.0001, "loss": 0.8263, "mean_abs_error": 210.78425778096525, "mean_abs_error_last_10": 59.14265554785537, "mean_abs_error_last_25": 87.77221058833553, "mean_abs_error_last_50": 122.78309684939072, "mean_pred_prob": 0.04245963576249778, "mean_pred_prob_last_10": 0.19931362457573415, "mean_pred_prob_last_25": 0.11568204667419195, "mean_pred_prob_last_50": 0.07112974943593145, "mean_token_accuracy": 0.8801730692386627, "step": 16190 }, { "epoch": 0.2879846408191563, "grad_norm": 1.6726831992457203, "learning_rate": 0.0001, "loss": 0.7416, "mean_abs_error": 776.457980003231, "mean_abs_error_last_10": 558.6290713453117, "mean_abs_error_last_25": 507.226901217486, "mean_abs_error_last_50": 514.6689128191564, "mean_pred_prob": 0.024329629790736364, "mean_pred_prob_last_10": 0.10716126614715904, "mean_pred_prob_last_25": 0.06403778401436284, "mean_pred_prob_last_50": 0.04045786280184984, "mean_token_accuracy": 0.8741832852363587, "step": 16200 }, { "epoch": 0.28816240911595825, "grad_norm": 2.6785805837896364, "learning_rate": 0.0001, "loss": 0.8332, "mean_abs_error": 773.9114785932653, "mean_abs_error_last_10": 217.29029440663007, "mean_abs_error_last_25": 279.359153291419, "mean_abs_error_last_50": 425.09691699839107, "mean_pred_prob": 0.032420990039827305, "mean_pred_prob_last_10": 0.17587783981580288, "mean_pred_prob_last_25": 0.09426097266841679, "mean_pred_prob_last_50": 0.055473906843690204, "mean_token_accuracy": 0.8723778665065766, "step": 16210 }, { "epoch": 0.2883401774127602, "grad_norm": 2.0944683717781323, "learning_rate": 0.0001, "loss": 0.7878, "mean_abs_error": 1149.6015657926207, "mean_abs_error_last_10": 778.5024405319057, "mean_abs_error_last_25": 836.0247595748236, "mean_abs_error_last_50": 933.126991685868, "mean_pred_prob": 0.037827164138070656, "mean_pred_prob_last_10": 0.17969354530214332, "mean_pred_prob_last_25": 0.10544205620899447, "mean_pred_prob_last_50": 0.06409985364516615, "mean_token_accuracy": 0.873099273443222, "step": 16220 }, { "epoch": 0.28851794570956213, "grad_norm": 1.61455834666547, "learning_rate": 0.0001, "loss": 0.783, "mean_abs_error": 323.3156164200642, "mean_abs_error_last_10": 81.9548443893029, "mean_abs_error_last_25": 96.83751556678621, "mean_abs_error_last_50": 164.9985572688835, "mean_pred_prob": 0.03582557633053511, "mean_pred_prob_last_10": 0.17807059567421674, "mean_pred_prob_last_25": 0.0992770885117352, "mean_pred_prob_last_50": 0.06125515513122082, "mean_token_accuracy": 0.8701860904693604, "step": 16230 }, { "epoch": 0.2886957140063641, "grad_norm": 1.465565698413113, "learning_rate": 0.0001, "loss": 0.8707, "mean_abs_error": 680.7488430689613, "mean_abs_error_last_10": 210.65025674447807, "mean_abs_error_last_25": 229.4682505982601, "mean_abs_error_last_50": 363.7288768229762, "mean_pred_prob": 0.03091081891907379, "mean_pred_prob_last_10": 0.1664213692303747, "mean_pred_prob_last_25": 0.0908297102432698, "mean_pred_prob_last_50": 0.05427418826147914, "mean_token_accuracy": 0.869041258096695, "step": 16240 }, { "epoch": 0.28887348230316606, "grad_norm": 4.416687760065444, "learning_rate": 0.0001, "loss": 0.8784, "mean_abs_error": 663.801721418776, "mean_abs_error_last_10": 208.73086371453775, "mean_abs_error_last_25": 263.7708755008651, "mean_abs_error_last_50": 347.07806101493384, "mean_pred_prob": 0.02711689337156713, "mean_pred_prob_last_10": 0.13776565678417682, "mean_pred_prob_last_25": 0.07597301406785846, "mean_pred_prob_last_50": 0.04651500536128879, "mean_token_accuracy": 0.8767792880535126, "step": 16250 }, { "epoch": 0.289051250599968, "grad_norm": 1.055052171541141, "learning_rate": 0.0001, "loss": 0.8086, "mean_abs_error": 445.7890887728903, "mean_abs_error_last_10": 80.95316180635542, "mean_abs_error_last_25": 134.66493416170783, "mean_abs_error_last_50": 220.5498298574057, "mean_pred_prob": 0.0324097276898101, "mean_pred_prob_last_10": 0.1475151116028428, "mean_pred_prob_last_25": 0.08363419491797686, "mean_pred_prob_last_50": 0.05299284141510725, "mean_token_accuracy": 0.8779996395111084, "step": 16260 }, { "epoch": 0.28922901889676994, "grad_norm": 2.19617522485788, "learning_rate": 0.0001, "loss": 0.8337, "mean_abs_error": 665.4962004727654, "mean_abs_error_last_10": 328.45846750487476, "mean_abs_error_last_25": 373.737944180158, "mean_abs_error_last_50": 436.71171907419404, "mean_pred_prob": 0.037302627027384, "mean_pred_prob_last_10": 0.1797594727075193, "mean_pred_prob_last_25": 0.09996690641855821, "mean_pred_prob_last_50": 0.06203318256593775, "mean_token_accuracy": 0.8733096778392792, "step": 16270 }, { "epoch": 0.2894067871935719, "grad_norm": 1.138811505225543, "learning_rate": 0.0001, "loss": 0.8666, "mean_abs_error": 857.6686092271863, "mean_abs_error_last_10": 408.9397313331775, "mean_abs_error_last_25": 507.9241443136542, "mean_abs_error_last_50": 627.1994090123068, "mean_pred_prob": 0.04473161974165123, "mean_pred_prob_last_10": 0.21369657428876962, "mean_pred_prob_last_25": 0.11946291033818852, "mean_pred_prob_last_50": 0.07508810893341433, "mean_token_accuracy": 0.8785882353782654, "step": 16280 }, { "epoch": 0.2895845554903739, "grad_norm": 2.6782894582251218, "learning_rate": 0.0001, "loss": 0.8432, "mean_abs_error": 446.5420803294186, "mean_abs_error_last_10": 133.68375066749905, "mean_abs_error_last_25": 201.98413079492806, "mean_abs_error_last_50": 292.26029122970675, "mean_pred_prob": 0.034959007916040716, "mean_pred_prob_last_10": 0.18287874683737754, "mean_pred_prob_last_25": 0.0984980314038694, "mean_pred_prob_last_50": 0.060153722669929265, "mean_token_accuracy": 0.8660575985908509, "step": 16290 }, { "epoch": 0.2897623237871758, "grad_norm": 0.8760014495611992, "learning_rate": 0.0001, "loss": 0.8073, "mean_abs_error": 343.8111961086657, "mean_abs_error_last_10": 54.80992903017706, "mean_abs_error_last_25": 135.50146217351116, "mean_abs_error_last_50": 251.74988597327942, "mean_pred_prob": 0.04044352448545396, "mean_pred_prob_last_10": 0.19078230801969767, "mean_pred_prob_last_25": 0.11298661399632692, "mean_pred_prob_last_50": 0.06846022880636156, "mean_token_accuracy": 0.8785998046398162, "step": 16300 }, { "epoch": 0.28994009208397775, "grad_norm": 1.6193603525098654, "learning_rate": 0.0001, "loss": 0.8568, "mean_abs_error": 515.9988539286279, "mean_abs_error_last_10": 274.32419016855295, "mean_abs_error_last_25": 303.5358660878843, "mean_abs_error_last_50": 327.7941932872851, "mean_pred_prob": 0.02526406043325551, "mean_pred_prob_last_10": 0.12581645778845996, "mean_pred_prob_last_25": 0.07016608279664069, "mean_pred_prob_last_50": 0.042758250015322116, "mean_token_accuracy": 0.8624778389930725, "step": 16310 }, { "epoch": 0.2901178603807797, "grad_norm": 2.928493248525324, "learning_rate": 0.0001, "loss": 0.851, "mean_abs_error": 357.02515609541854, "mean_abs_error_last_10": 105.24139991305465, "mean_abs_error_last_25": 195.0140989375138, "mean_abs_error_last_50": 225.3123489103165, "mean_pred_prob": 0.027726639760658145, "mean_pred_prob_last_10": 0.14361699298024178, "mean_pred_prob_last_25": 0.07955311816185713, "mean_pred_prob_last_50": 0.04784067776054144, "mean_token_accuracy": 0.866520631313324, "step": 16320 }, { "epoch": 0.29029562867758163, "grad_norm": 2.6059046345923322, "learning_rate": 0.0001, "loss": 0.8891, "mean_abs_error": 1349.7104177980007, "mean_abs_error_last_10": 677.567211579825, "mean_abs_error_last_25": 756.0143981937539, "mean_abs_error_last_50": 946.8008940799631, "mean_pred_prob": 0.03739553209015867, "mean_pred_prob_last_10": 0.17464429077808746, "mean_pred_prob_last_25": 0.09626263286045286, "mean_pred_prob_last_50": 0.06097497072187252, "mean_token_accuracy": 0.8824638068675995, "step": 16330 }, { "epoch": 0.29047339697438357, "grad_norm": 1.257868132480819, "learning_rate": 0.0001, "loss": 0.7705, "mean_abs_error": 425.1436811715477, "mean_abs_error_last_10": 73.64539323515858, "mean_abs_error_last_25": 112.97305607669307, "mean_abs_error_last_50": 249.16245176503315, "mean_pred_prob": 0.03775194750633091, "mean_pred_prob_last_10": 0.18691090308129787, "mean_pred_prob_last_25": 0.1067334764637053, "mean_pred_prob_last_50": 0.06445925179868936, "mean_token_accuracy": 0.8729662358760834, "step": 16340 }, { "epoch": 0.29065116527118556, "grad_norm": 1.2902806723817846, "learning_rate": 0.0001, "loss": 0.8729, "mean_abs_error": 473.8927753237537, "mean_abs_error_last_10": 163.61611856535387, "mean_abs_error_last_25": 188.26097517953977, "mean_abs_error_last_50": 310.77193036254425, "mean_pred_prob": 0.018739643902517856, "mean_pred_prob_last_10": 0.10602017631754279, "mean_pred_prob_last_25": 0.05250098453834653, "mean_pred_prob_last_50": 0.03155003800056875, "mean_token_accuracy": 0.8668790400028229, "step": 16350 }, { "epoch": 0.2908289335679875, "grad_norm": 4.898698106475649, "learning_rate": 0.0001, "loss": 0.8394, "mean_abs_error": 552.2769595995227, "mean_abs_error_last_10": 213.85167098935017, "mean_abs_error_last_25": 286.9348020532416, "mean_abs_error_last_50": 357.4503317972536, "mean_pred_prob": 0.037902730610221626, "mean_pred_prob_last_10": 0.18902369178831577, "mean_pred_prob_last_25": 0.10485305646434426, "mean_pred_prob_last_50": 0.06368863866664469, "mean_token_accuracy": 0.8718199074268341, "step": 16360 }, { "epoch": 0.29100670186478944, "grad_norm": 1.302665455018566, "learning_rate": 0.0001, "loss": 0.8306, "mean_abs_error": 553.4090219955718, "mean_abs_error_last_10": 426.28744413018387, "mean_abs_error_last_25": 468.5089042422184, "mean_abs_error_last_50": 482.4037662018078, "mean_pred_prob": 0.039073982392437756, "mean_pred_prob_last_10": 0.18835444739088417, "mean_pred_prob_last_25": 0.10661457353271545, "mean_pred_prob_last_50": 0.06560638281516731, "mean_token_accuracy": 0.8848051190376282, "step": 16370 }, { "epoch": 0.2911844701615914, "grad_norm": 1.7694375563113671, "learning_rate": 0.0001, "loss": 0.8386, "mean_abs_error": 359.14272595441827, "mean_abs_error_last_10": 95.37231324077827, "mean_abs_error_last_25": 166.26077316989682, "mean_abs_error_last_50": 282.90393269080363, "mean_pred_prob": 0.034512009215541185, "mean_pred_prob_last_10": 0.1793016204610467, "mean_pred_prob_last_25": 0.09704171540215611, "mean_pred_prob_last_50": 0.0579158116132021, "mean_token_accuracy": 0.8711772799491883, "step": 16380 }, { "epoch": 0.2913622384583933, "grad_norm": 2.2800733171283905, "learning_rate": 0.0001, "loss": 0.8255, "mean_abs_error": 426.71811897681454, "mean_abs_error_last_10": 171.3909538688443, "mean_abs_error_last_25": 192.55789868593376, "mean_abs_error_last_50": 267.2504738221868, "mean_pred_prob": 0.03519701935583726, "mean_pred_prob_last_10": 0.17144716770853846, "mean_pred_prob_last_25": 0.09786288264440372, "mean_pred_prob_last_50": 0.0592671335907653, "mean_token_accuracy": 0.8722272872924804, "step": 16390 }, { "epoch": 0.29154000675519526, "grad_norm": 1.0323589036391032, "learning_rate": 0.0001, "loss": 0.821, "mean_abs_error": 344.4790410995821, "mean_abs_error_last_10": 114.41410019907043, "mean_abs_error_last_25": 120.9332765492239, "mean_abs_error_last_50": 246.89640080338, "mean_pred_prob": 0.04173367992043495, "mean_pred_prob_last_10": 0.19134726226329804, "mean_pred_prob_last_25": 0.10835377331823111, "mean_pred_prob_last_50": 0.06835346794687211, "mean_token_accuracy": 0.8746298313140869, "step": 16400 }, { "epoch": 0.29171777505199725, "grad_norm": 0.7662031768063026, "learning_rate": 0.0001, "loss": 0.8575, "mean_abs_error": 711.6359864758274, "mean_abs_error_last_10": 455.01711051802033, "mean_abs_error_last_25": 575.6400192194658, "mean_abs_error_last_50": 584.6115724675929, "mean_pred_prob": 0.011344313935842365, "mean_pred_prob_last_10": 0.06758489736821502, "mean_pred_prob_last_25": 0.033388123079203066, "mean_pred_prob_last_50": 0.01939587787492201, "mean_token_accuracy": 0.8681254804134368, "step": 16410 }, { "epoch": 0.2918955433487992, "grad_norm": 1.9694519341241294, "learning_rate": 0.0001, "loss": 0.7747, "mean_abs_error": 745.8326943405411, "mean_abs_error_last_10": 207.54604796863313, "mean_abs_error_last_25": 399.61936026512063, "mean_abs_error_last_50": 572.7437231254827, "mean_pred_prob": 0.01724838698282838, "mean_pred_prob_last_10": 0.09484263826161624, "mean_pred_prob_last_25": 0.05000109877437353, "mean_pred_prob_last_50": 0.029602206777781248, "mean_token_accuracy": 0.8780246317386627, "step": 16420 }, { "epoch": 0.29207331164560113, "grad_norm": 1.3912601246066463, "learning_rate": 0.0001, "loss": 0.8217, "mean_abs_error": 1143.813753733846, "mean_abs_error_last_10": 690.2581609596183, "mean_abs_error_last_25": 795.8935045285872, "mean_abs_error_last_50": 922.2576154335924, "mean_pred_prob": 0.03504184707489912, "mean_pred_prob_last_10": 0.1906110404379433, "mean_pred_prob_last_25": 0.10311908386356664, "mean_pred_prob_last_50": 0.06051906587381382, "mean_token_accuracy": 0.8736114859580993, "step": 16430 }, { "epoch": 0.29225107994240307, "grad_norm": 1.3095912936778236, "learning_rate": 0.0001, "loss": 0.7928, "mean_abs_error": 438.4003340179009, "mean_abs_error_last_10": 124.70638310188045, "mean_abs_error_last_25": 196.84489077797562, "mean_abs_error_last_50": 358.01036171228577, "mean_pred_prob": 0.03820436706300825, "mean_pred_prob_last_10": 0.20531498454511166, "mean_pred_prob_last_25": 0.11023369580507278, "mean_pred_prob_last_50": 0.06493361527100205, "mean_token_accuracy": 0.8774464368820191, "step": 16440 }, { "epoch": 0.292428848239205, "grad_norm": 3.045735824960627, "learning_rate": 0.0001, "loss": 0.8029, "mean_abs_error": 406.34791364104615, "mean_abs_error_last_10": 140.95677689473567, "mean_abs_error_last_25": 150.22571000370274, "mean_abs_error_last_50": 237.33827241184036, "mean_pred_prob": 0.026068542944267394, "mean_pred_prob_last_10": 0.14360200576484203, "mean_pred_prob_last_25": 0.07539364350959658, "mean_pred_prob_last_50": 0.04458541264757514, "mean_token_accuracy": 0.8699637532234192, "step": 16450 }, { "epoch": 0.29260661653600695, "grad_norm": 1.632793159290793, "learning_rate": 0.0001, "loss": 0.7498, "mean_abs_error": 210.02988935428476, "mean_abs_error_last_10": 36.173465652646456, "mean_abs_error_last_25": 57.25267611725493, "mean_abs_error_last_50": 98.81415307968945, "mean_pred_prob": 0.03907536915503442, "mean_pred_prob_last_10": 0.20256073530763388, "mean_pred_prob_last_25": 0.11192260468378663, "mean_pred_prob_last_50": 0.06738857226446271, "mean_token_accuracy": 0.8747394442558288, "step": 16460 }, { "epoch": 0.29278438483280894, "grad_norm": 2.942430643083692, "learning_rate": 0.0001, "loss": 0.8604, "mean_abs_error": 734.8547754949761, "mean_abs_error_last_10": 328.93946248399754, "mean_abs_error_last_25": 404.8796809556792, "mean_abs_error_last_50": 549.5665403050662, "mean_pred_prob": 0.028426005961955526, "mean_pred_prob_last_10": 0.13780454713269136, "mean_pred_prob_last_25": 0.07841920761857182, "mean_pred_prob_last_50": 0.047876350954174994, "mean_token_accuracy": 0.8681694388389587, "step": 16470 }, { "epoch": 0.2929621531296109, "grad_norm": 2.0268000357718514, "learning_rate": 0.0001, "loss": 0.8323, "mean_abs_error": 155.74693526906103, "mean_abs_error_last_10": 70.07054447025993, "mean_abs_error_last_25": 100.88738333502948, "mean_abs_error_last_50": 115.47411021173498, "mean_pred_prob": 0.03601516280323267, "mean_pred_prob_last_10": 0.18565172664821147, "mean_pred_prob_last_25": 0.10146525558084249, "mean_pred_prob_last_50": 0.06170684490352869, "mean_token_accuracy": 0.867112672328949, "step": 16480 }, { "epoch": 0.2931399214264128, "grad_norm": 1.6298028290767763, "learning_rate": 0.0001, "loss": 0.7772, "mean_abs_error": 323.48268326336546, "mean_abs_error_last_10": 124.65790477727242, "mean_abs_error_last_25": 224.75772724498478, "mean_abs_error_last_50": 277.77336431935703, "mean_pred_prob": 0.03740915530361235, "mean_pred_prob_last_10": 0.20432860031723976, "mean_pred_prob_last_25": 0.10882340427488088, "mean_pred_prob_last_50": 0.06387837738730014, "mean_token_accuracy": 0.8798098206520081, "step": 16490 }, { "epoch": 0.29331768972321476, "grad_norm": 1.147276395233962, "learning_rate": 0.0001, "loss": 0.7316, "mean_abs_error": 238.188173533848, "mean_abs_error_last_10": 62.41490034808396, "mean_abs_error_last_25": 109.03569790004322, "mean_abs_error_last_50": 171.8366135623853, "mean_pred_prob": 0.034410003339871764, "mean_pred_prob_last_10": 0.17616142313927413, "mean_pred_prob_last_25": 0.09373420355841518, "mean_pred_prob_last_50": 0.05726274629123509, "mean_token_accuracy": 0.8862413167953491, "step": 16500 }, { "epoch": 0.2934954580200167, "grad_norm": 2.1208241710367153, "learning_rate": 0.0001, "loss": 0.8325, "mean_abs_error": 659.0552009797241, "mean_abs_error_last_10": 148.32767981855105, "mean_abs_error_last_25": 254.16578980616956, "mean_abs_error_last_50": 338.7309394183321, "mean_pred_prob": 0.037211820541415364, "mean_pred_prob_last_10": 0.1808951261686161, "mean_pred_prob_last_25": 0.10403865094995127, "mean_pred_prob_last_50": 0.06342891750391573, "mean_token_accuracy": 0.8732756674289703, "step": 16510 }, { "epoch": 0.29367322631681864, "grad_norm": 1.7417692148330708, "learning_rate": 0.0001, "loss": 0.8453, "mean_abs_error": 185.7120422903621, "mean_abs_error_last_10": 36.95641126468412, "mean_abs_error_last_25": 60.43156995565298, "mean_abs_error_last_50": 89.52929957744718, "mean_pred_prob": 0.041449145413935184, "mean_pred_prob_last_10": 0.21791665405035018, "mean_pred_prob_last_25": 0.11910947263240815, "mean_pred_prob_last_50": 0.07172522535547614, "mean_token_accuracy": 0.8683629512786866, "step": 16520 }, { "epoch": 0.29385099461362063, "grad_norm": 1.6300409336918722, "learning_rate": 0.0001, "loss": 0.9365, "mean_abs_error": 726.6179743547102, "mean_abs_error_last_10": 325.8848570910873, "mean_abs_error_last_25": 381.650463601046, "mean_abs_error_last_50": 494.2012529156915, "mean_pred_prob": 0.02856431910477113, "mean_pred_prob_last_10": 0.1495081031753216, "mean_pred_prob_last_25": 0.08046456996235066, "mean_pred_prob_last_50": 0.04870321209600661, "mean_token_accuracy": 0.8710467159748078, "step": 16530 }, { "epoch": 0.29402876291042257, "grad_norm": 2.095085629472145, "learning_rate": 0.0001, "loss": 0.7549, "mean_abs_error": 211.33499707947627, "mean_abs_error_last_10": 23.592754218135028, "mean_abs_error_last_25": 47.380460224819885, "mean_abs_error_last_50": 102.46595904858145, "mean_pred_prob": 0.05153708355501294, "mean_pred_prob_last_10": 0.2712034184485674, "mean_pred_prob_last_25": 0.14768022717908025, "mean_pred_prob_last_50": 0.08831896940246224, "mean_token_accuracy": 0.8837060272693634, "step": 16540 }, { "epoch": 0.2942065312072245, "grad_norm": 1.356297347026104, "learning_rate": 0.0001, "loss": 0.8277, "mean_abs_error": 612.4391745910534, "mean_abs_error_last_10": 230.43705238076032, "mean_abs_error_last_25": 303.9860461854299, "mean_abs_error_last_50": 381.86818458287513, "mean_pred_prob": 0.03239589336444624, "mean_pred_prob_last_10": 0.16866672117030249, "mean_pred_prob_last_25": 0.09408347180578858, "mean_pred_prob_last_50": 0.056508018914610145, "mean_token_accuracy": 0.8815709292888642, "step": 16550 }, { "epoch": 0.29438429950402645, "grad_norm": 1.4578611725156374, "learning_rate": 0.0001, "loss": 0.8129, "mean_abs_error": 461.8933718847047, "mean_abs_error_last_10": 118.10066808081982, "mean_abs_error_last_25": 203.25682326798182, "mean_abs_error_last_50": 291.2061670855672, "mean_pred_prob": 0.05068606604472734, "mean_pred_prob_last_10": 0.2583081841352396, "mean_pred_prob_last_25": 0.14256811663508415, "mean_pred_prob_last_50": 0.08709239506279118, "mean_token_accuracy": 0.8681075572967529, "step": 16560 }, { "epoch": 0.2945620678008284, "grad_norm": 1.1176100905705417, "learning_rate": 0.0001, "loss": 0.7533, "mean_abs_error": 535.6375454650661, "mean_abs_error_last_10": 159.96746346645241, "mean_abs_error_last_25": 231.52076399171796, "mean_abs_error_last_50": 348.23347900154016, "mean_pred_prob": 0.04322162801981903, "mean_pred_prob_last_10": 0.2057853938313201, "mean_pred_prob_last_25": 0.11311959588783793, "mean_pred_prob_last_50": 0.07100996705121361, "mean_token_accuracy": 0.8742067694664002, "step": 16570 }, { "epoch": 0.2947398360976303, "grad_norm": 0.9651825175658226, "learning_rate": 0.0001, "loss": 0.8687, "mean_abs_error": 439.95994457860854, "mean_abs_error_last_10": 101.51814562089274, "mean_abs_error_last_25": 141.07044812841872, "mean_abs_error_last_50": 226.99862760397178, "mean_pred_prob": 0.02552017434500158, "mean_pred_prob_last_10": 0.1445741828531027, "mean_pred_prob_last_25": 0.07540534595027566, "mean_pred_prob_last_50": 0.044324667379260066, "mean_token_accuracy": 0.8724375247955323, "step": 16580 }, { "epoch": 0.2949176043944323, "grad_norm": 1.5673157926440096, "learning_rate": 0.0001, "loss": 0.8062, "mean_abs_error": 430.21434988408794, "mean_abs_error_last_10": 111.07728362164838, "mean_abs_error_last_25": 197.4416735292332, "mean_abs_error_last_50": 278.8100225508339, "mean_pred_prob": 0.0294343201443553, "mean_pred_prob_last_10": 0.15068923607468604, "mean_pred_prob_last_25": 0.08155604470521212, "mean_pred_prob_last_50": 0.0492061335593462, "mean_token_accuracy": 0.8690225839614868, "step": 16590 }, { "epoch": 0.29509537269123426, "grad_norm": 3.0709757933360566, "learning_rate": 0.0001, "loss": 0.9044, "mean_abs_error": 308.1675222989655, "mean_abs_error_last_10": 97.91347333014292, "mean_abs_error_last_25": 127.56656177883181, "mean_abs_error_last_50": 179.2636833253006, "mean_pred_prob": 0.0277208274230361, "mean_pred_prob_last_10": 0.14584879484027624, "mean_pred_prob_last_25": 0.07900041304528713, "mean_pred_prob_last_50": 0.04796191491186619, "mean_token_accuracy": 0.8711359083652497, "step": 16600 }, { "epoch": 0.2952731409880362, "grad_norm": 1.0590906457906608, "learning_rate": 0.0001, "loss": 0.9279, "mean_abs_error": 981.144477913282, "mean_abs_error_last_10": 380.345231363575, "mean_abs_error_last_25": 522.1618505618312, "mean_abs_error_last_50": 725.86770750086, "mean_pred_prob": 0.0149536250741221, "mean_pred_prob_last_10": 0.08158339356305078, "mean_pred_prob_last_25": 0.04273427948937751, "mean_pred_prob_last_50": 0.02555116045405157, "mean_token_accuracy": 0.8794863760471344, "step": 16610 }, { "epoch": 0.29545090928483814, "grad_norm": 1.3243038188295637, "learning_rate": 0.0001, "loss": 0.9233, "mean_abs_error": 479.5890899669045, "mean_abs_error_last_10": 283.7577572335438, "mean_abs_error_last_25": 288.31317107679604, "mean_abs_error_last_50": 323.4316390210251, "mean_pred_prob": 0.026358884852379562, "mean_pred_prob_last_10": 0.1426201444119215, "mean_pred_prob_last_25": 0.07498876694589854, "mean_pred_prob_last_50": 0.045232650451362134, "mean_token_accuracy": 0.8631756126880645, "step": 16620 }, { "epoch": 0.2956286775816401, "grad_norm": 2.0511693378793923, "learning_rate": 0.0001, "loss": 0.7774, "mean_abs_error": 813.2395309597207, "mean_abs_error_last_10": 430.86531474283504, "mean_abs_error_last_25": 481.1868842204478, "mean_abs_error_last_50": 601.8073044398882, "mean_pred_prob": 0.03920838023914257, "mean_pred_prob_last_10": 0.18750314346980304, "mean_pred_prob_last_25": 0.10847226420592052, "mean_pred_prob_last_50": 0.06612717783282278, "mean_token_accuracy": 0.8805224061012268, "step": 16630 }, { "epoch": 0.295806445878442, "grad_norm": 2.56141713330339, "learning_rate": 0.0001, "loss": 0.8616, "mean_abs_error": 684.9324180280382, "mean_abs_error_last_10": 213.58470403165492, "mean_abs_error_last_25": 391.3837668810719, "mean_abs_error_last_50": 558.9915964684533, "mean_pred_prob": 0.029376425081864, "mean_pred_prob_last_10": 0.13580638784915208, "mean_pred_prob_last_25": 0.07733624251559376, "mean_pred_prob_last_50": 0.04773457241244614, "mean_token_accuracy": 0.8768994927406311, "step": 16640 }, { "epoch": 0.295984214175244, "grad_norm": 1.2731111851349763, "learning_rate": 0.0001, "loss": 0.8522, "mean_abs_error": 1721.3173455314402, "mean_abs_error_last_10": 672.0939416004304, "mean_abs_error_last_25": 863.4175193099352, "mean_abs_error_last_50": 1168.761055347551, "mean_pred_prob": 0.028215150759206154, "mean_pred_prob_last_10": 0.1433779568877071, "mean_pred_prob_last_25": 0.07948831340181642, "mean_pred_prob_last_50": 0.048308971180813384, "mean_token_accuracy": 0.8817414820194245, "step": 16650 }, { "epoch": 0.29616198247204595, "grad_norm": 2.006911951764558, "learning_rate": 0.0001, "loss": 0.8709, "mean_abs_error": 461.71486069657414, "mean_abs_error_last_10": 68.28177162859198, "mean_abs_error_last_25": 205.10662078138188, "mean_abs_error_last_50": 342.9064861386345, "mean_pred_prob": 0.034748908458277585, "mean_pred_prob_last_10": 0.1898945417255163, "mean_pred_prob_last_25": 0.09829272441565991, "mean_pred_prob_last_50": 0.058829475101083516, "mean_token_accuracy": 0.8742695629596711, "step": 16660 }, { "epoch": 0.2963397507688479, "grad_norm": 1.1652200473578236, "learning_rate": 0.0001, "loss": 0.8221, "mean_abs_error": 310.48473445633783, "mean_abs_error_last_10": 44.36455481694328, "mean_abs_error_last_25": 109.52853065493275, "mean_abs_error_last_50": 180.71863934354414, "mean_pred_prob": 0.038446677615866065, "mean_pred_prob_last_10": 0.20922880321741105, "mean_pred_prob_last_25": 0.11200766935944557, "mean_pred_prob_last_50": 0.06638070661574602, "mean_token_accuracy": 0.8704018771648407, "step": 16670 }, { "epoch": 0.2965175190656498, "grad_norm": 1.200661394193207, "learning_rate": 0.0001, "loss": 0.8213, "mean_abs_error": 473.1854441232782, "mean_abs_error_last_10": 70.83109627203017, "mean_abs_error_last_25": 121.2553916064837, "mean_abs_error_last_50": 248.57942264374861, "mean_pred_prob": 0.043471171433338895, "mean_pred_prob_last_10": 0.21431127786636353, "mean_pred_prob_last_25": 0.12257681635674089, "mean_pred_prob_last_50": 0.07434574065264314, "mean_token_accuracy": 0.8807635724544525, "step": 16680 }, { "epoch": 0.29669528736245177, "grad_norm": 1.6656517085101108, "learning_rate": 0.0001, "loss": 0.9073, "mean_abs_error": 467.38532333437706, "mean_abs_error_last_10": 240.38655675911878, "mean_abs_error_last_25": 261.49944906269644, "mean_abs_error_last_50": 358.1056357018295, "mean_pred_prob": 0.03371959659853019, "mean_pred_prob_last_10": 0.17431082756374963, "mean_pred_prob_last_25": 0.09531177289318293, "mean_pred_prob_last_50": 0.05745837462600321, "mean_token_accuracy": 0.8699725687503814, "step": 16690 }, { "epoch": 0.2968730556592537, "grad_norm": 2.6816833378963087, "learning_rate": 0.0001, "loss": 0.9175, "mean_abs_error": 535.8968068285591, "mean_abs_error_last_10": 129.9530400037649, "mean_abs_error_last_25": 192.4880498147494, "mean_abs_error_last_50": 300.5522858968312, "mean_pred_prob": 0.04654117938189302, "mean_pred_prob_last_10": 0.21502264427253975, "mean_pred_prob_last_25": 0.12369620762765407, "mean_pred_prob_last_50": 0.0771489798615221, "mean_token_accuracy": 0.8755419969558715, "step": 16700 }, { "epoch": 0.2970508239560557, "grad_norm": 0.9800059578854302, "learning_rate": 0.0001, "loss": 0.855, "mean_abs_error": 430.18930380071725, "mean_abs_error_last_10": 123.58976659530629, "mean_abs_error_last_25": 208.23413532188792, "mean_abs_error_last_50": 320.4950649861702, "mean_pred_prob": 0.04017371563240886, "mean_pred_prob_last_10": 0.19653040673583747, "mean_pred_prob_last_25": 0.11059670597314834, "mean_pred_prob_last_50": 0.06785084013827145, "mean_token_accuracy": 0.8787381768226623, "step": 16710 }, { "epoch": 0.29722859225285764, "grad_norm": 1.2594735052587782, "learning_rate": 0.0001, "loss": 1.1399, "mean_abs_error": 694.7308474084573, "mean_abs_error_last_10": 384.9974742132761, "mean_abs_error_last_25": 332.1621085764733, "mean_abs_error_last_50": 396.9882198815855, "mean_pred_prob": 0.016051424201577902, "mean_pred_prob_last_10": 0.0910261806100607, "mean_pred_prob_last_25": 0.04755354607477784, "mean_pred_prob_last_50": 0.027780857332982124, "mean_token_accuracy": 0.8565373063087464, "step": 16720 }, { "epoch": 0.2974063605496596, "grad_norm": 1.8093893644231076, "learning_rate": 0.0001, "loss": 0.7623, "mean_abs_error": 974.8196286049448, "mean_abs_error_last_10": 243.61818846509647, "mean_abs_error_last_25": 388.3929942899654, "mean_abs_error_last_50": 550.6510313002649, "mean_pred_prob": 0.0257933133863844, "mean_pred_prob_last_10": 0.12684410468209534, "mean_pred_prob_last_25": 0.07188950706040487, "mean_pred_prob_last_50": 0.04329593760776333, "mean_token_accuracy": 0.880879807472229, "step": 16730 }, { "epoch": 0.2975841288464615, "grad_norm": 0.8581312746985248, "learning_rate": 0.0001, "loss": 0.7094, "mean_abs_error": 609.9262856671858, "mean_abs_error_last_10": 305.2454489261344, "mean_abs_error_last_25": 373.7484866950395, "mean_abs_error_last_50": 454.213660454065, "mean_pred_prob": 0.045243127865251156, "mean_pred_prob_last_10": 0.2264759852841962, "mean_pred_prob_last_25": 0.12552284867560956, "mean_pred_prob_last_50": 0.07646399312943686, "mean_token_accuracy": 0.8821705460548401, "step": 16740 }, { "epoch": 0.29776189714326345, "grad_norm": 1.1869530140887437, "learning_rate": 0.0001, "loss": 0.8029, "mean_abs_error": 1374.2721043706817, "mean_abs_error_last_10": 592.3995468408314, "mean_abs_error_last_25": 739.4148618911051, "mean_abs_error_last_50": 925.7769388629825, "mean_pred_prob": 0.03514907881908584, "mean_pred_prob_last_10": 0.16334277727291918, "mean_pred_prob_last_25": 0.09550925933872349, "mean_pred_prob_last_50": 0.05798388381663244, "mean_token_accuracy": 0.8708364784717559, "step": 16750 }, { "epoch": 0.2979396654400654, "grad_norm": 1.9115781661203888, "learning_rate": 0.0001, "loss": 0.8405, "mean_abs_error": 1045.0637302655523, "mean_abs_error_last_10": 656.8962046484259, "mean_abs_error_last_25": 794.6911951250801, "mean_abs_error_last_50": 906.879672252034, "mean_pred_prob": 0.04534456008987035, "mean_pred_prob_last_10": 0.2202121947775595, "mean_pred_prob_last_25": 0.12382045871781884, "mean_pred_prob_last_50": 0.0763455058724503, "mean_token_accuracy": 0.8741696655750275, "step": 16760 }, { "epoch": 0.2981174337368674, "grad_norm": 4.088720361743373, "learning_rate": 0.0001, "loss": 0.857, "mean_abs_error": 207.1488318219802, "mean_abs_error_last_10": 47.40243695688556, "mean_abs_error_last_25": 77.79947614293606, "mean_abs_error_last_50": 115.14127515824653, "mean_pred_prob": 0.04698004154488444, "mean_pred_prob_last_10": 0.22270233519375324, "mean_pred_prob_last_25": 0.12707321997731924, "mean_pred_prob_last_50": 0.07882788516581059, "mean_token_accuracy": 0.8690529286861419, "step": 16770 }, { "epoch": 0.2982952020336693, "grad_norm": 1.4520412196290908, "learning_rate": 0.0001, "loss": 0.8914, "mean_abs_error": 479.1718998687103, "mean_abs_error_last_10": 96.21805211824787, "mean_abs_error_last_25": 118.96722282964247, "mean_abs_error_last_50": 251.4515355846404, "mean_pred_prob": 0.037997267302125694, "mean_pred_prob_last_10": 0.18880804621148856, "mean_pred_prob_last_25": 0.10799612378468737, "mean_pred_prob_last_50": 0.06467432404169812, "mean_token_accuracy": 0.8818938553333282, "step": 16780 }, { "epoch": 0.29847297033047127, "grad_norm": 1.9078632371578566, "learning_rate": 0.0001, "loss": 0.735, "mean_abs_error": 503.0803003346291, "mean_abs_error_last_10": 120.88945665609064, "mean_abs_error_last_25": 135.73160622027288, "mean_abs_error_last_50": 205.17237486047915, "mean_pred_prob": 0.043393792025744914, "mean_pred_prob_last_10": 0.2090308628976345, "mean_pred_prob_last_25": 0.1219992795959115, "mean_pred_prob_last_50": 0.07428469285368919, "mean_token_accuracy": 0.8725406348705291, "step": 16790 }, { "epoch": 0.2986507386272732, "grad_norm": 1.6845508611311186, "learning_rate": 0.0001, "loss": 0.9009, "mean_abs_error": 915.7782328519955, "mean_abs_error_last_10": 577.4250187719985, "mean_abs_error_last_25": 591.2208230615777, "mean_abs_error_last_50": 689.4973873696665, "mean_pred_prob": 0.04772082316485467, "mean_pred_prob_last_10": 0.22023690085334238, "mean_pred_prob_last_25": 0.12499243739293889, "mean_pred_prob_last_50": 0.07942429705581162, "mean_token_accuracy": 0.8660770714282989, "step": 16800 }, { "epoch": 0.29882850692407514, "grad_norm": 2.2322092469146235, "learning_rate": 0.0001, "loss": 0.8543, "mean_abs_error": 681.8036292348992, "mean_abs_error_last_10": 105.33159608735215, "mean_abs_error_last_25": 197.80821312775947, "mean_abs_error_last_50": 315.12616716936674, "mean_pred_prob": 0.022254606301430613, "mean_pred_prob_last_10": 0.11542107069399207, "mean_pred_prob_last_25": 0.06228149071102962, "mean_pred_prob_last_50": 0.03787972987629473, "mean_token_accuracy": 0.8633684813976288, "step": 16810 }, { "epoch": 0.2990062752208771, "grad_norm": 2.0145429633101837, "learning_rate": 0.0001, "loss": 0.8582, "mean_abs_error": 606.6828447241861, "mean_abs_error_last_10": 327.28355403328436, "mean_abs_error_last_25": 453.01951492820535, "mean_abs_error_last_50": 526.4951727733614, "mean_pred_prob": 0.02841807993245311, "mean_pred_prob_last_10": 0.15151620832039042, "mean_pred_prob_last_25": 0.07969891346292571, "mean_pred_prob_last_50": 0.048337448824895546, "mean_token_accuracy": 0.8781107068061829, "step": 16820 }, { "epoch": 0.2991840435176791, "grad_norm": 3.388824344518303, "learning_rate": 0.0001, "loss": 0.9289, "mean_abs_error": 831.3577867093378, "mean_abs_error_last_10": 274.2567627784745, "mean_abs_error_last_25": 382.5317423519433, "mean_abs_error_last_50": 520.6158009191786, "mean_pred_prob": 0.04015721493342426, "mean_pred_prob_last_10": 0.19774829993257298, "mean_pred_prob_last_25": 0.11322113577043638, "mean_pred_prob_last_50": 0.06816176166175865, "mean_token_accuracy": 0.8689371764659881, "step": 16830 }, { "epoch": 0.299361811814481, "grad_norm": 1.1389732930475465, "learning_rate": 0.0001, "loss": 0.9409, "mean_abs_error": 1413.3821746853603, "mean_abs_error_last_10": 521.6938014332484, "mean_abs_error_last_25": 571.5466963663004, "mean_abs_error_last_50": 805.0106895506249, "mean_pred_prob": 0.026548401388572528, "mean_pred_prob_last_10": 0.14536042299005203, "mean_pred_prob_last_25": 0.0761304771353025, "mean_pred_prob_last_50": 0.04559383414452896, "mean_token_accuracy": 0.8721457481384277, "step": 16840 }, { "epoch": 0.29953958011128295, "grad_norm": 1.9864816672927805, "learning_rate": 0.0001, "loss": 0.8142, "mean_abs_error": 704.8166982577343, "mean_abs_error_last_10": 457.8579235302027, "mean_abs_error_last_25": 484.5560565112957, "mean_abs_error_last_50": 593.0590706245063, "mean_pred_prob": 0.038140749518061054, "mean_pred_prob_last_10": 0.19822295434423723, "mean_pred_prob_last_25": 0.10712904109386727, "mean_pred_prob_last_50": 0.06525225470250007, "mean_token_accuracy": 0.8756106734275818, "step": 16850 }, { "epoch": 0.2997173484080849, "grad_norm": 3.0970305522523547, "learning_rate": 0.0001, "loss": 0.8316, "mean_abs_error": 531.2481093797907, "mean_abs_error_last_10": 218.00034142987397, "mean_abs_error_last_25": 215.19309792678905, "mean_abs_error_last_50": 337.2210888547763, "mean_pred_prob": 0.032045129139442, "mean_pred_prob_last_10": 0.1618617594242096, "mean_pred_prob_last_25": 0.08918801659019664, "mean_pred_prob_last_50": 0.05460717953974381, "mean_token_accuracy": 0.8699212193489074, "step": 16860 }, { "epoch": 0.29989511670488683, "grad_norm": 1.4333927799797934, "learning_rate": 0.0001, "loss": 0.7219, "mean_abs_error": 577.453847841922, "mean_abs_error_last_10": 225.93288747137976, "mean_abs_error_last_25": 234.40573230722885, "mean_abs_error_last_50": 328.23487009626535, "mean_pred_prob": 0.02421621510293335, "mean_pred_prob_last_10": 0.12909215415129438, "mean_pred_prob_last_25": 0.06983262061839923, "mean_pred_prob_last_50": 0.04173573773005046, "mean_token_accuracy": 0.8769567310810089, "step": 16870 }, { "epoch": 0.30007288500168877, "grad_norm": 1.3107401204448506, "learning_rate": 0.0001, "loss": 0.813, "mean_abs_error": 545.6440951541655, "mean_abs_error_last_10": 132.93214793565696, "mean_abs_error_last_25": 159.96521023075672, "mean_abs_error_last_50": 286.7025559402044, "mean_pred_prob": 0.02293622388970107, "mean_pred_prob_last_10": 0.11452131159603596, "mean_pred_prob_last_25": 0.06450877720490097, "mean_pred_prob_last_50": 0.038837031740695235, "mean_token_accuracy": 0.8800544023513794, "step": 16880 }, { "epoch": 0.30025065329849077, "grad_norm": 1.675137818669849, "learning_rate": 0.0001, "loss": 0.8005, "mean_abs_error": 284.643316672484, "mean_abs_error_last_10": 81.73104430239513, "mean_abs_error_last_25": 145.65217615042937, "mean_abs_error_last_50": 178.64376489560271, "mean_pred_prob": 0.03969000545330346, "mean_pred_prob_last_10": 0.19391874372959136, "mean_pred_prob_last_25": 0.11046674586832524, "mean_pred_prob_last_50": 0.06745304176583886, "mean_token_accuracy": 0.8750507056713104, "step": 16890 }, { "epoch": 0.3004284215952927, "grad_norm": 1.3814381396051343, "learning_rate": 0.0001, "loss": 0.8158, "mean_abs_error": 821.4103480521995, "mean_abs_error_last_10": 428.12905558208905, "mean_abs_error_last_25": 469.5787458791862, "mean_abs_error_last_50": 581.7323612182741, "mean_pred_prob": 0.036721481334825515, "mean_pred_prob_last_10": 0.18251739150146024, "mean_pred_prob_last_25": 0.10214244372909889, "mean_pred_prob_last_50": 0.06233882834785618, "mean_token_accuracy": 0.8734703123569488, "step": 16900 }, { "epoch": 0.30060618989209464, "grad_norm": 1.6704349131423868, "learning_rate": 0.0001, "loss": 0.857, "mean_abs_error": 400.49188999089563, "mean_abs_error_last_10": 65.05302202598725, "mean_abs_error_last_25": 101.02244476247364, "mean_abs_error_last_50": 180.95281317187496, "mean_pred_prob": 0.035900743957608935, "mean_pred_prob_last_10": 0.18224120531231164, "mean_pred_prob_last_25": 0.10015693083405494, "mean_pred_prob_last_50": 0.06097510252147913, "mean_token_accuracy": 0.8733176171779633, "step": 16910 }, { "epoch": 0.3007839581888966, "grad_norm": 1.513285865712382, "learning_rate": 0.0001, "loss": 0.8539, "mean_abs_error": 1092.4311921396622, "mean_abs_error_last_10": 716.6206057601636, "mean_abs_error_last_25": 774.0660580946919, "mean_abs_error_last_50": 873.7053565119086, "mean_pred_prob": 0.027290082738909406, "mean_pred_prob_last_10": 0.15391010774037567, "mean_pred_prob_last_25": 0.08006738843250787, "mean_pred_prob_last_50": 0.047315898149099665, "mean_token_accuracy": 0.8684132516384124, "step": 16920 }, { "epoch": 0.3009617264856985, "grad_norm": 4.041424132236595, "learning_rate": 0.0001, "loss": 0.8157, "mean_abs_error": 533.4126490469107, "mean_abs_error_last_10": 205.2928772006719, "mean_abs_error_last_25": 273.27466966783106, "mean_abs_error_last_50": 363.48952799584157, "mean_pred_prob": 0.02888843241962604, "mean_pred_prob_last_10": 0.14476305118296295, "mean_pred_prob_last_25": 0.08008183162892238, "mean_pred_prob_last_50": 0.04881055754376575, "mean_token_accuracy": 0.873327910900116, "step": 16930 }, { "epoch": 0.3011394947825005, "grad_norm": 2.173033459726879, "learning_rate": 0.0001, "loss": 0.8777, "mean_abs_error": 250.84670695188225, "mean_abs_error_last_10": 112.86878522398453, "mean_abs_error_last_25": 159.3288239905279, "mean_abs_error_last_50": 173.25716309636448, "mean_pred_prob": 0.03596061216667294, "mean_pred_prob_last_10": 0.18689426966011524, "mean_pred_prob_last_25": 0.10143581721931696, "mean_pred_prob_last_50": 0.06120566986501217, "mean_token_accuracy": 0.8751903355121613, "step": 16940 }, { "epoch": 0.30131726307930246, "grad_norm": 1.6320947160006432, "learning_rate": 0.0001, "loss": 0.818, "mean_abs_error": 892.2922202209065, "mean_abs_error_last_10": 439.3293815766603, "mean_abs_error_last_25": 457.4867410074211, "mean_abs_error_last_50": 622.9031411604009, "mean_pred_prob": 0.037142867177317386, "mean_pred_prob_last_10": 0.18614801648946014, "mean_pred_prob_last_25": 0.10345200608135201, "mean_pred_prob_last_50": 0.06202729918295517, "mean_token_accuracy": 0.8757620692253113, "step": 16950 }, { "epoch": 0.3014950313761044, "grad_norm": 1.4802945486031476, "learning_rate": 0.0001, "loss": 0.8199, "mean_abs_error": 353.73920871066065, "mean_abs_error_last_10": 123.07920523804918, "mean_abs_error_last_25": 188.6596440216916, "mean_abs_error_last_50": 261.212028690779, "mean_pred_prob": 0.03075638522859663, "mean_pred_prob_last_10": 0.17403218187391759, "mean_pred_prob_last_25": 0.09126971568912268, "mean_pred_prob_last_50": 0.053542208345606926, "mean_token_accuracy": 0.8780357778072357, "step": 16960 }, { "epoch": 0.30167279967290633, "grad_norm": 0.7083996523168812, "learning_rate": 0.0001, "loss": 0.8118, "mean_abs_error": 429.1729983497139, "mean_abs_error_last_10": 204.18218461648306, "mean_abs_error_last_25": 312.96213003166423, "mean_abs_error_last_50": 393.7219262480672, "mean_pred_prob": 0.034751741029322145, "mean_pred_prob_last_10": 0.17186062764376403, "mean_pred_prob_last_25": 0.09698535725474358, "mean_pred_prob_last_50": 0.05859618312679231, "mean_token_accuracy": 0.8771585702896119, "step": 16970 }, { "epoch": 0.30185056796970827, "grad_norm": 2.060205388999481, "learning_rate": 0.0001, "loss": 0.7353, "mean_abs_error": 467.19868604532746, "mean_abs_error_last_10": 91.30404420884634, "mean_abs_error_last_25": 150.79249059569923, "mean_abs_error_last_50": 264.0945108582225, "mean_pred_prob": 0.04481548229232431, "mean_pred_prob_last_10": 0.22243123257067055, "mean_pred_prob_last_25": 0.1255211654584855, "mean_pred_prob_last_50": 0.07609069283353165, "mean_token_accuracy": 0.8781582236289978, "step": 16980 }, { "epoch": 0.3020283362665102, "grad_norm": 2.181707461603876, "learning_rate": 0.0001, "loss": 0.9225, "mean_abs_error": 1351.7394629825142, "mean_abs_error_last_10": 171.04444180365405, "mean_abs_error_last_25": 329.79475645123296, "mean_abs_error_last_50": 626.5941423882182, "mean_pred_prob": 0.013973089557839557, "mean_pred_prob_last_10": 0.07655131986830384, "mean_pred_prob_last_25": 0.040564874059055, "mean_pred_prob_last_50": 0.02429749342845753, "mean_token_accuracy": 0.8712413907051086, "step": 16990 }, { "epoch": 0.3022061045633122, "grad_norm": 1.038773838568921, "learning_rate": 0.0001, "loss": 0.7347, "mean_abs_error": 320.83028223407314, "mean_abs_error_last_10": 170.5234490330876, "mean_abs_error_last_25": 200.10341546131093, "mean_abs_error_last_50": 237.36012181294905, "mean_pred_prob": 0.026800609938800336, "mean_pred_prob_last_10": 0.15077717415988445, "mean_pred_prob_last_25": 0.08031336041167378, "mean_pred_prob_last_50": 0.04708799729123712, "mean_token_accuracy": 0.8851471424102784, "step": 17000 }, { "epoch": 0.30238387286011414, "grad_norm": 3.1800195047131625, "learning_rate": 0.0001, "loss": 0.8231, "mean_abs_error": 559.4888970313804, "mean_abs_error_last_10": 142.9492869798815, "mean_abs_error_last_25": 195.6479600501051, "mean_abs_error_last_50": 363.3013879543378, "mean_pred_prob": 0.019168500509113073, "mean_pred_prob_last_10": 0.10730238314718007, "mean_pred_prob_last_25": 0.05500399470329285, "mean_pred_prob_last_50": 0.0328475012909621, "mean_token_accuracy": 0.8771286487579346, "step": 17010 }, { "epoch": 0.3025616411569161, "grad_norm": 1.5016124825240151, "learning_rate": 0.0001, "loss": 0.8065, "mean_abs_error": 550.1926897378884, "mean_abs_error_last_10": 146.30779180707844, "mean_abs_error_last_25": 262.1111924626374, "mean_abs_error_last_50": 358.7982615684524, "mean_pred_prob": 0.0330601736844983, "mean_pred_prob_last_10": 0.15420211935415865, "mean_pred_prob_last_25": 0.08858913918957115, "mean_pred_prob_last_50": 0.05665502940537408, "mean_token_accuracy": 0.8618213117122651, "step": 17020 }, { "epoch": 0.302739409453718, "grad_norm": 2.1114884219316075, "learning_rate": 0.0001, "loss": 0.8304, "mean_abs_error": 566.2317645243795, "mean_abs_error_last_10": 212.78769536056808, "mean_abs_error_last_25": 265.19969229587394, "mean_abs_error_last_50": 372.6908253592161, "mean_pred_prob": 0.02921227687038481, "mean_pred_prob_last_10": 0.14669399648555553, "mean_pred_prob_last_25": 0.08259527994669043, "mean_pred_prob_last_50": 0.050078549113823104, "mean_token_accuracy": 0.8689290344715118, "step": 17030 }, { "epoch": 0.30291717775051996, "grad_norm": 1.2186506792063632, "learning_rate": 0.0001, "loss": 0.8346, "mean_abs_error": 269.2411453441672, "mean_abs_error_last_10": 51.605009768927495, "mean_abs_error_last_25": 143.07920668462387, "mean_abs_error_last_50": 188.25127376968857, "mean_pred_prob": 0.031545839505270123, "mean_pred_prob_last_10": 0.16276017539203166, "mean_pred_prob_last_25": 0.08819014560431242, "mean_pred_prob_last_50": 0.053167980071157216, "mean_token_accuracy": 0.8802864909172058, "step": 17040 }, { "epoch": 0.3030949460473219, "grad_norm": 1.1587774850560157, "learning_rate": 0.0001, "loss": 0.733, "mean_abs_error": 120.2450566629092, "mean_abs_error_last_10": 22.51886176076325, "mean_abs_error_last_25": 32.99405596642511, "mean_abs_error_last_50": 50.370593677941955, "mean_pred_prob": 0.06001657987944782, "mean_pred_prob_last_10": 0.2808481890708208, "mean_pred_prob_last_25": 0.16363436114042998, "mean_pred_prob_last_50": 0.10215807212516666, "mean_token_accuracy": 0.8759193539619445, "step": 17050 }, { "epoch": 0.3032727143441239, "grad_norm": 1.5258873083364617, "learning_rate": 0.0001, "loss": 0.8564, "mean_abs_error": 230.2720545921574, "mean_abs_error_last_10": 40.601477746234565, "mean_abs_error_last_25": 64.33504782787611, "mean_abs_error_last_50": 110.55704363167547, "mean_pred_prob": 0.04881924246437848, "mean_pred_prob_last_10": 0.24848679937422274, "mean_pred_prob_last_25": 0.13829317670315505, "mean_pred_prob_last_50": 0.08361394796520472, "mean_token_accuracy": 0.8703040182590485, "step": 17060 }, { "epoch": 0.30345048264092583, "grad_norm": 1.5935663152009363, "learning_rate": 0.0001, "loss": 0.7876, "mean_abs_error": 162.24644062170208, "mean_abs_error_last_10": 72.35286740304659, "mean_abs_error_last_25": 77.78698178440713, "mean_abs_error_last_50": 105.89019144341842, "mean_pred_prob": 0.05280599785037339, "mean_pred_prob_last_10": 0.24350315369665623, "mean_pred_prob_last_25": 0.14458946082741023, "mean_pred_prob_last_50": 0.08847957747057081, "mean_token_accuracy": 0.8763740301132202, "step": 17070 }, { "epoch": 0.3036282509377278, "grad_norm": 1.5799626661211237, "learning_rate": 0.0001, "loss": 0.7481, "mean_abs_error": 459.83890131542904, "mean_abs_error_last_10": 343.05176917819574, "mean_abs_error_last_25": 411.46896574447027, "mean_abs_error_last_50": 434.55674364176974, "mean_pred_prob": 0.048029710922855885, "mean_pred_prob_last_10": 0.21646263090660794, "mean_pred_prob_last_25": 0.1267595254466869, "mean_pred_prob_last_50": 0.08003400234738364, "mean_token_accuracy": 0.8841849803924561, "step": 17080 }, { "epoch": 0.3038060192345297, "grad_norm": 1.9865182319766501, "learning_rate": 0.0001, "loss": 0.8035, "mean_abs_error": 709.5840333021658, "mean_abs_error_last_10": 302.2607242524593, "mean_abs_error_last_25": 349.7904760488524, "mean_abs_error_last_50": 445.73053544871465, "mean_pred_prob": 0.04816260789521039, "mean_pred_prob_last_10": 0.22306585669866763, "mean_pred_prob_last_25": 0.13114779104944319, "mean_pred_prob_last_50": 0.08133009681769181, "mean_token_accuracy": 0.8694306015968323, "step": 17090 }, { "epoch": 0.30398378753133165, "grad_norm": 3.0667006535935437, "learning_rate": 0.0001, "loss": 0.9539, "mean_abs_error": 755.2924291790957, "mean_abs_error_last_10": 109.85548187590398, "mean_abs_error_last_25": 226.54740224990172, "mean_abs_error_last_50": 433.34906898854143, "mean_pred_prob": 0.023417045827955008, "mean_pred_prob_last_10": 0.1309388054534793, "mean_pred_prob_last_25": 0.06949447989463806, "mean_pred_prob_last_50": 0.04085826724767685, "mean_token_accuracy": 0.8695487082004547, "step": 17100 }, { "epoch": 0.3041615558281336, "grad_norm": 1.2771842500802428, "learning_rate": 0.0001, "loss": 0.8577, "mean_abs_error": 449.0279034602254, "mean_abs_error_last_10": 114.14941071955164, "mean_abs_error_last_25": 170.94145200335032, "mean_abs_error_last_50": 289.4663713296103, "mean_pred_prob": 0.04707150478498079, "mean_pred_prob_last_10": 0.24000515413936227, "mean_pred_prob_last_25": 0.13259242947679012, "mean_pred_prob_last_50": 0.07998846194823273, "mean_token_accuracy": 0.8731601595878601, "step": 17110 }, { "epoch": 0.3043393241249356, "grad_norm": 1.0094125392131694, "learning_rate": 0.0001, "loss": 0.7872, "mean_abs_error": 230.56596527501415, "mean_abs_error_last_10": 41.81142218862981, "mean_abs_error_last_25": 87.06381626162448, "mean_abs_error_last_50": 140.49401035318473, "mean_pred_prob": 0.045327283535152675, "mean_pred_prob_last_10": 0.21218632087111472, "mean_pred_prob_last_25": 0.12279789708554745, "mean_pred_prob_last_50": 0.0762148437090218, "mean_token_accuracy": 0.8733403146266937, "step": 17120 }, { "epoch": 0.3045170924217375, "grad_norm": 1.2417599606069918, "learning_rate": 0.0001, "loss": 0.7914, "mean_abs_error": 452.04747374627215, "mean_abs_error_last_10": 128.32121200050682, "mean_abs_error_last_25": 157.8185000244444, "mean_abs_error_last_50": 234.34400899761903, "mean_pred_prob": 0.037318507325835526, "mean_pred_prob_last_10": 0.17452774811536073, "mean_pred_prob_last_25": 0.10217309249565006, "mean_pred_prob_last_50": 0.06312391068786383, "mean_token_accuracy": 0.8736889481544494, "step": 17130 }, { "epoch": 0.30469486071853946, "grad_norm": 2.3902830206021055, "learning_rate": 0.0001, "loss": 0.8033, "mean_abs_error": 664.6248221333348, "mean_abs_error_last_10": 239.30160837099598, "mean_abs_error_last_25": 264.42381541024713, "mean_abs_error_last_50": 405.36597915353366, "mean_pred_prob": 0.034108413488138464, "mean_pred_prob_last_10": 0.17609023177064956, "mean_pred_prob_last_25": 0.09900022568181158, "mean_pred_prob_last_50": 0.059173076355364174, "mean_token_accuracy": 0.8745778560638428, "step": 17140 }, { "epoch": 0.3048726290153414, "grad_norm": 3.9910691731357137, "learning_rate": 0.0001, "loss": 1.0174, "mean_abs_error": 563.5349909604132, "mean_abs_error_last_10": 214.06813138499788, "mean_abs_error_last_25": 240.12702602476566, "mean_abs_error_last_50": 347.781599108105, "mean_pred_prob": 0.022318540926789865, "mean_pred_prob_last_10": 0.1187027727952227, "mean_pred_prob_last_25": 0.0638900766032748, "mean_pred_prob_last_50": 0.0384900382719934, "mean_token_accuracy": 0.8791634857654571, "step": 17150 }, { "epoch": 0.30505039731214334, "grad_norm": 2.686175151221667, "learning_rate": 0.0001, "loss": 0.8591, "mean_abs_error": 759.676120693364, "mean_abs_error_last_10": 290.2315866238007, "mean_abs_error_last_25": 337.01876654830636, "mean_abs_error_last_50": 477.09772740877435, "mean_pred_prob": 0.028854381866403857, "mean_pred_prob_last_10": 0.15154545198311098, "mean_pred_prob_last_25": 0.07956309175351635, "mean_pred_prob_last_50": 0.04859806470340118, "mean_token_accuracy": 0.8806165456771851, "step": 17160 }, { "epoch": 0.3052281656089453, "grad_norm": 1.2521850478535572, "learning_rate": 0.0001, "loss": 0.8874, "mean_abs_error": 661.064434017326, "mean_abs_error_last_10": 141.8617592612133, "mean_abs_error_last_25": 266.89924135134186, "mean_abs_error_last_50": 399.257751263178, "mean_pred_prob": 0.04674463379196823, "mean_pred_prob_last_10": 0.22386784383561462, "mean_pred_prob_last_25": 0.1272585066151805, "mean_pred_prob_last_50": 0.07885874278144911, "mean_token_accuracy": 0.8744144260883331, "step": 17170 }, { "epoch": 0.3054059339057473, "grad_norm": 1.8534210633029073, "learning_rate": 0.0001, "loss": 1.0282, "mean_abs_error": 1330.7729330910174, "mean_abs_error_last_10": 727.0361265562956, "mean_abs_error_last_25": 921.7715886246924, "mean_abs_error_last_50": 1124.8081139666133, "mean_pred_prob": 0.03843358638987411, "mean_pred_prob_last_10": 0.17113435920909978, "mean_pred_prob_last_25": 0.10039871471526567, "mean_pred_prob_last_50": 0.06424495535902679, "mean_token_accuracy": 0.8614472627639771, "step": 17180 }, { "epoch": 0.3055837022025492, "grad_norm": 1.6722080994189084, "learning_rate": 0.0001, "loss": 0.9683, "mean_abs_error": 490.54984507931215, "mean_abs_error_last_10": 138.72366346635815, "mean_abs_error_last_25": 222.0128349379003, "mean_abs_error_last_50": 304.036271005834, "mean_pred_prob": 0.034529497532639654, "mean_pred_prob_last_10": 0.17669861576287077, "mean_pred_prob_last_25": 0.09884090581908822, "mean_pred_prob_last_50": 0.058800566225545484, "mean_token_accuracy": 0.8667669177055359, "step": 17190 }, { "epoch": 0.30576147049935115, "grad_norm": 1.8832014796146042, "learning_rate": 0.0001, "loss": 0.9504, "mean_abs_error": 1018.3105753002679, "mean_abs_error_last_10": 426.60693795451397, "mean_abs_error_last_25": 524.2548799033185, "mean_abs_error_last_50": 674.8055778872744, "mean_pred_prob": 0.02351501985831419, "mean_pred_prob_last_10": 0.11915697142248974, "mean_pred_prob_last_25": 0.06593989689426963, "mean_pred_prob_last_50": 0.039937867817934604, "mean_token_accuracy": 0.8719330072402954, "step": 17200 }, { "epoch": 0.3059392387961531, "grad_norm": 0.901503708838529, "learning_rate": 0.0001, "loss": 0.7496, "mean_abs_error": 290.71733720492614, "mean_abs_error_last_10": 63.016809909235995, "mean_abs_error_last_25": 97.3311456196075, "mean_abs_error_last_50": 182.26475786605337, "mean_pred_prob": 0.03428150052204728, "mean_pred_prob_last_10": 0.178917570784688, "mean_pred_prob_last_25": 0.09822493437677622, "mean_pred_prob_last_50": 0.05890611028298735, "mean_token_accuracy": 0.8801178276538849, "step": 17210 }, { "epoch": 0.30611700709295503, "grad_norm": 3.10480122124877, "learning_rate": 0.0001, "loss": 0.8264, "mean_abs_error": 1009.4472477631886, "mean_abs_error_last_10": 321.666550647739, "mean_abs_error_last_25": 410.01086447395517, "mean_abs_error_last_50": 604.6752694518414, "mean_pred_prob": 0.02307104815263301, "mean_pred_prob_last_10": 0.11567330203251913, "mean_pred_prob_last_25": 0.06386920420918614, "mean_pred_prob_last_50": 0.03888987375539728, "mean_token_accuracy": 0.878751277923584, "step": 17220 }, { "epoch": 0.30629477538975697, "grad_norm": 3.463391282019668, "learning_rate": 0.0001, "loss": 0.9095, "mean_abs_error": 846.1217465806367, "mean_abs_error_last_10": 195.31883087927733, "mean_abs_error_last_25": 270.14945776036063, "mean_abs_error_last_50": 471.6866929775566, "mean_pred_prob": 0.027124508007545955, "mean_pred_prob_last_10": 0.1376056645764038, "mean_pred_prob_last_25": 0.07798297136323526, "mean_pred_prob_last_50": 0.04626955722924322, "mean_token_accuracy": 0.875123131275177, "step": 17230 }, { "epoch": 0.30647254368655896, "grad_norm": 2.600515297563393, "learning_rate": 0.0001, "loss": 0.8219, "mean_abs_error": 202.6567614853121, "mean_abs_error_last_10": 107.25964837459128, "mean_abs_error_last_25": 113.55169840786877, "mean_abs_error_last_50": 138.04916845223272, "mean_pred_prob": 0.03193170167505741, "mean_pred_prob_last_10": 0.16066286228597165, "mean_pred_prob_last_25": 0.08647869005799294, "mean_pred_prob_last_50": 0.05331913256086409, "mean_token_accuracy": 0.8725739419460297, "step": 17240 }, { "epoch": 0.3066503119833609, "grad_norm": 1.404891102238168, "learning_rate": 0.0001, "loss": 0.8802, "mean_abs_error": 341.4205648487026, "mean_abs_error_last_10": 153.73132099946693, "mean_abs_error_last_25": 170.312350165469, "mean_abs_error_last_50": 200.98318719316921, "mean_pred_prob": 0.04012123174034059, "mean_pred_prob_last_10": 0.19100450575351716, "mean_pred_prob_last_25": 0.105921071767807, "mean_pred_prob_last_50": 0.06684602643363177, "mean_token_accuracy": 0.8667860567569733, "step": 17250 }, { "epoch": 0.30682808028016284, "grad_norm": 1.9648315425179053, "learning_rate": 0.0001, "loss": 0.8595, "mean_abs_error": 356.7623471709188, "mean_abs_error_last_10": 122.17781565087878, "mean_abs_error_last_25": 159.44811959666873, "mean_abs_error_last_50": 203.30981023626583, "mean_pred_prob": 0.05317299515008926, "mean_pred_prob_last_10": 0.24531822216231375, "mean_pred_prob_last_25": 0.14350869392510504, "mean_pred_prob_last_50": 0.08970342017710209, "mean_token_accuracy": 0.8758174061775208, "step": 17260 }, { "epoch": 0.3070058485769648, "grad_norm": 1.3401302682432352, "learning_rate": 0.0001, "loss": 0.8344, "mean_abs_error": 1149.6542299786774, "mean_abs_error_last_10": 574.686085162378, "mean_abs_error_last_25": 658.6431515351513, "mean_abs_error_last_50": 842.215332740591, "mean_pred_prob": 0.025346150003315415, "mean_pred_prob_last_10": 0.1314440745918546, "mean_pred_prob_last_25": 0.07070910351176281, "mean_pred_prob_last_50": 0.042525218037189914, "mean_token_accuracy": 0.8681964218616486, "step": 17270 }, { "epoch": 0.3071836168737667, "grad_norm": 3.4813238365132997, "learning_rate": 0.0001, "loss": 0.8291, "mean_abs_error": 504.3401692192495, "mean_abs_error_last_10": 124.46720873660725, "mean_abs_error_last_25": 200.30414173840066, "mean_abs_error_last_50": 295.63568639649867, "mean_pred_prob": 0.02949362761573866, "mean_pred_prob_last_10": 0.16344837008509785, "mean_pred_prob_last_25": 0.08661953052505851, "mean_pred_prob_last_50": 0.05123631415190175, "mean_token_accuracy": 0.8735986828804017, "step": 17280 }, { "epoch": 0.30736138517056866, "grad_norm": 1.7016602130532308, "learning_rate": 0.0001, "loss": 0.9117, "mean_abs_error": 1068.8692561521682, "mean_abs_error_last_10": 272.42893515230156, "mean_abs_error_last_25": 453.35269234403785, "mean_abs_error_last_50": 696.0031923982717, "mean_pred_prob": 0.028025256187538615, "mean_pred_prob_last_10": 0.14892717065522448, "mean_pred_prob_last_25": 0.08233354008989409, "mean_pred_prob_last_50": 0.047907236957689746, "mean_token_accuracy": 0.864253431558609, "step": 17290 }, { "epoch": 0.30753915346737065, "grad_norm": 3.0918858642554676, "learning_rate": 0.0001, "loss": 0.8683, "mean_abs_error": 172.02534453764025, "mean_abs_error_last_10": 56.51121239045615, "mean_abs_error_last_25": 87.92851549167676, "mean_abs_error_last_50": 123.1653591457563, "mean_pred_prob": 0.044922450510784986, "mean_pred_prob_last_10": 0.22409401470795273, "mean_pred_prob_last_25": 0.1249963722191751, "mean_pred_prob_last_50": 0.07678899299353362, "mean_token_accuracy": 0.8703806161880493, "step": 17300 }, { "epoch": 0.3077169217641726, "grad_norm": 1.5622896851512464, "learning_rate": 0.0001, "loss": 0.8304, "mean_abs_error": 517.1324295018869, "mean_abs_error_last_10": 145.7093338253298, "mean_abs_error_last_25": 268.6284795233595, "mean_abs_error_last_50": 422.48043991807253, "mean_pred_prob": 0.029801554512232542, "mean_pred_prob_last_10": 0.1571751338429749, "mean_pred_prob_last_25": 0.08331342213787138, "mean_pred_prob_last_50": 0.05032697850838304, "mean_token_accuracy": 0.8688045501708984, "step": 17310 }, { "epoch": 0.30789469006097453, "grad_norm": 1.0711981799400405, "learning_rate": 0.0001, "loss": 0.8, "mean_abs_error": 860.3121940317066, "mean_abs_error_last_10": 366.6593711572041, "mean_abs_error_last_25": 479.30206203377367, "mean_abs_error_last_50": 619.7567233979344, "mean_pred_prob": 0.038312942703487354, "mean_pred_prob_last_10": 0.1821410387812648, "mean_pred_prob_last_25": 0.10442171494069044, "mean_pred_prob_last_50": 0.0647217626858037, "mean_token_accuracy": 0.8810513257980347, "step": 17320 }, { "epoch": 0.30807245835777647, "grad_norm": 1.5877119144321672, "learning_rate": 0.0001, "loss": 0.7891, "mean_abs_error": 564.781337401137, "mean_abs_error_last_10": 176.27109029182742, "mean_abs_error_last_25": 300.07560389213745, "mean_abs_error_last_50": 377.4390741691713, "mean_pred_prob": 0.04501769514172338, "mean_pred_prob_last_10": 0.22234915625303983, "mean_pred_prob_last_25": 0.11931215850636363, "mean_pred_prob_last_50": 0.0747421336302068, "mean_token_accuracy": 0.8713748395442963, "step": 17330 }, { "epoch": 0.3082502266545784, "grad_norm": 1.0118333164766857, "learning_rate": 0.0001, "loss": 0.7739, "mean_abs_error": 412.96007931742395, "mean_abs_error_last_10": 162.52872318804714, "mean_abs_error_last_25": 263.21775913269596, "mean_abs_error_last_50": 313.40397082266315, "mean_pred_prob": 0.025696603092364968, "mean_pred_prob_last_10": 0.13483766559511423, "mean_pred_prob_last_25": 0.0711659237742424, "mean_pred_prob_last_50": 0.04344171341508627, "mean_token_accuracy": 0.8854730784893036, "step": 17340 }, { "epoch": 0.30842799495138035, "grad_norm": 1.3095177147406445, "learning_rate": 0.0001, "loss": 0.8055, "mean_abs_error": 332.9165031936154, "mean_abs_error_last_10": 73.64809574658965, "mean_abs_error_last_25": 109.34772224565889, "mean_abs_error_last_50": 169.77064886100038, "mean_pred_prob": 0.046623405592981726, "mean_pred_prob_last_10": 0.23103144937194883, "mean_pred_prob_last_25": 0.13240579613484443, "mean_pred_prob_last_50": 0.07894354979507626, "mean_token_accuracy": 0.8776930510997772, "step": 17350 }, { "epoch": 0.30860576324818234, "grad_norm": 0.9652649496220018, "learning_rate": 0.0001, "loss": 0.8068, "mean_abs_error": 393.20380966572804, "mean_abs_error_last_10": 63.42432474207417, "mean_abs_error_last_25": 171.6374098374009, "mean_abs_error_last_50": 254.81807476312338, "mean_pred_prob": 0.03573705956805497, "mean_pred_prob_last_10": 0.18697629552334546, "mean_pred_prob_last_25": 0.10015963101759554, "mean_pred_prob_last_50": 0.06100535448640585, "mean_token_accuracy": 0.8789903879165649, "step": 17360 }, { "epoch": 0.3087835315449843, "grad_norm": 2.0307296636446663, "learning_rate": 0.0001, "loss": 0.8616, "mean_abs_error": 275.0358298900399, "mean_abs_error_last_10": 68.79225003484584, "mean_abs_error_last_25": 87.40985513950055, "mean_abs_error_last_50": 148.6152329605497, "mean_pred_prob": 0.027985162381082774, "mean_pred_prob_last_10": 0.1365830458700657, "mean_pred_prob_last_25": 0.07405479736626149, "mean_pred_prob_last_50": 0.046517598908394575, "mean_token_accuracy": 0.8662365019321442, "step": 17370 }, { "epoch": 0.3089612998417862, "grad_norm": 1.136496434560606, "learning_rate": 0.0001, "loss": 0.8812, "mean_abs_error": 526.6460009551099, "mean_abs_error_last_10": 309.7385299500096, "mean_abs_error_last_25": 339.381716710435, "mean_abs_error_last_50": 409.6756954100958, "mean_pred_prob": 0.03053590510971844, "mean_pred_prob_last_10": 0.16587431859225035, "mean_pred_prob_last_25": 0.08812320288270711, "mean_pred_prob_last_50": 0.053125917445868254, "mean_token_accuracy": 0.8535804390907288, "step": 17380 }, { "epoch": 0.30913906813858816, "grad_norm": 1.7777442452407073, "learning_rate": 0.0001, "loss": 0.8186, "mean_abs_error": 251.69247384952337, "mean_abs_error_last_10": 131.99709053261194, "mean_abs_error_last_25": 104.83082580402383, "mean_abs_error_last_50": 160.96580968508502, "mean_pred_prob": 0.04763899049721658, "mean_pred_prob_last_10": 0.21494294814765452, "mean_pred_prob_last_25": 0.1269912869669497, "mean_pred_prob_last_50": 0.07957118577323855, "mean_token_accuracy": 0.8674195230007171, "step": 17390 }, { "epoch": 0.3093168364353901, "grad_norm": 1.3406589174041612, "learning_rate": 0.0001, "loss": 0.7398, "mean_abs_error": 1854.7155370614612, "mean_abs_error_last_10": 1220.2867676948313, "mean_abs_error_last_25": 1319.5374585249265, "mean_abs_error_last_50": 1545.6024304042962, "mean_pred_prob": 0.02880995345840347, "mean_pred_prob_last_10": 0.14162367393219027, "mean_pred_prob_last_25": 0.07985405372164678, "mean_pred_prob_last_50": 0.04871029322021059, "mean_token_accuracy": 0.878741192817688, "step": 17400 }, { "epoch": 0.30949460473219204, "grad_norm": 1.8391168361720218, "learning_rate": 0.0001, "loss": 0.9444, "mean_abs_error": 340.48477444253166, "mean_abs_error_last_10": 112.20057481995158, "mean_abs_error_last_25": 159.3316859297578, "mean_abs_error_last_50": 219.54835021637754, "mean_pred_prob": 0.03543510320596397, "mean_pred_prob_last_10": 0.18058854825794696, "mean_pred_prob_last_25": 0.09987548338249326, "mean_pred_prob_last_50": 0.060614530881866815, "mean_token_accuracy": 0.867755800485611, "step": 17410 }, { "epoch": 0.30967237302899403, "grad_norm": 0.9833733817562805, "learning_rate": 0.0001, "loss": 0.8562, "mean_abs_error": 133.1654138475815, "mean_abs_error_last_10": 29.84609847769655, "mean_abs_error_last_25": 84.17425959676382, "mean_abs_error_last_50": 99.73333778743414, "mean_pred_prob": 0.065609423071146, "mean_pred_prob_last_10": 0.32370719201862813, "mean_pred_prob_last_25": 0.17736557088792324, "mean_pred_prob_last_50": 0.10909042339771986, "mean_token_accuracy": 0.8664541065692901, "step": 17420 }, { "epoch": 0.30985014132579597, "grad_norm": 2.3824886402751173, "learning_rate": 0.0001, "loss": 0.8594, "mean_abs_error": 292.26248670455095, "mean_abs_error_last_10": 120.29868847962773, "mean_abs_error_last_25": 162.45845643079295, "mean_abs_error_last_50": 189.94490343244053, "mean_pred_prob": 0.028643241338431834, "mean_pred_prob_last_10": 0.15755656752735375, "mean_pred_prob_last_25": 0.0791923007927835, "mean_pred_prob_last_50": 0.048316909559071065, "mean_token_accuracy": 0.8662546217441559, "step": 17430 }, { "epoch": 0.3100279096225979, "grad_norm": 0.9973132395701156, "learning_rate": 0.0001, "loss": 0.7514, "mean_abs_error": 448.6415365558799, "mean_abs_error_last_10": 100.01487637708343, "mean_abs_error_last_25": 155.6204334905443, "mean_abs_error_last_50": 267.00704091048607, "mean_pred_prob": 0.026516842958517372, "mean_pred_prob_last_10": 0.14310781829990446, "mean_pred_prob_last_25": 0.07680160297313705, "mean_pred_prob_last_50": 0.04598046288592741, "mean_token_accuracy": 0.8682872593402863, "step": 17440 }, { "epoch": 0.31020567791939985, "grad_norm": 1.4937814740468014, "learning_rate": 0.0001, "loss": 0.776, "mean_abs_error": 654.9191820198319, "mean_abs_error_last_10": 222.06952271150013, "mean_abs_error_last_25": 353.57831590407324, "mean_abs_error_last_50": 426.44010313584283, "mean_pred_prob": 0.0218075183685869, "mean_pred_prob_last_10": 0.10984668917953969, "mean_pred_prob_last_25": 0.06010828223079443, "mean_pred_prob_last_50": 0.036783623648807405, "mean_token_accuracy": 0.867746114730835, "step": 17450 }, { "epoch": 0.3103834462162018, "grad_norm": 1.375311411111825, "learning_rate": 0.0001, "loss": 0.7457, "mean_abs_error": 448.6723224938663, "mean_abs_error_last_10": 147.1913569962461, "mean_abs_error_last_25": 168.4698794796369, "mean_abs_error_last_50": 251.1693582558812, "mean_pred_prob": 0.04540088952053338, "mean_pred_prob_last_10": 0.2220518819987774, "mean_pred_prob_last_25": 0.12609948753379285, "mean_pred_prob_last_50": 0.07755949532147496, "mean_token_accuracy": 0.8742976665496827, "step": 17460 }, { "epoch": 0.3105612145130037, "grad_norm": 1.0818307005486387, "learning_rate": 0.0001, "loss": 0.761, "mean_abs_error": 567.334258747291, "mean_abs_error_last_10": 222.27477349327506, "mean_abs_error_last_25": 299.5852651073589, "mean_abs_error_last_50": 392.23568835294395, "mean_pred_prob": 0.03742928566935007, "mean_pred_prob_last_10": 0.18364400813588871, "mean_pred_prob_last_25": 0.10408279933035373, "mean_pred_prob_last_50": 0.06370324835879729, "mean_token_accuracy": 0.8812455475330353, "step": 17470 }, { "epoch": 0.3107389828098057, "grad_norm": 2.236470459051246, "learning_rate": 0.0001, "loss": 0.8401, "mean_abs_error": 162.66069636613673, "mean_abs_error_last_10": 40.86920994769501, "mean_abs_error_last_25": 74.83748261771362, "mean_abs_error_last_50": 111.77822171369539, "mean_pred_prob": 0.03686748798936605, "mean_pred_prob_last_10": 0.19756082221865653, "mean_pred_prob_last_25": 0.10740384813398123, "mean_pred_prob_last_50": 0.06352688036859036, "mean_token_accuracy": 0.8733459055423737, "step": 17480 }, { "epoch": 0.31091675110660766, "grad_norm": 0.9247456763702656, "learning_rate": 0.0001, "loss": 0.7967, "mean_abs_error": 384.82102518324797, "mean_abs_error_last_10": 207.3988920611119, "mean_abs_error_last_25": 265.238120116505, "mean_abs_error_last_50": 245.77386455797918, "mean_pred_prob": 0.04200212614377961, "mean_pred_prob_last_10": 0.21033428052905948, "mean_pred_prob_last_25": 0.11938226133352145, "mean_pred_prob_last_50": 0.07128152345540002, "mean_token_accuracy": 0.8737075507640839, "step": 17490 }, { "epoch": 0.3110945194034096, "grad_norm": 2.2069045857260297, "learning_rate": 0.0001, "loss": 0.9811, "mean_abs_error": 282.9087956868069, "mean_abs_error_last_10": 125.58615886189925, "mean_abs_error_last_25": 162.36375171176567, "mean_abs_error_last_50": 196.24325907246757, "mean_pred_prob": 0.03387841498479247, "mean_pred_prob_last_10": 0.16947526521980763, "mean_pred_prob_last_25": 0.09290693588554859, "mean_pred_prob_last_50": 0.05719537013210356, "mean_token_accuracy": 0.8714612185955047, "step": 17500 }, { "epoch": 0.31127228770021154, "grad_norm": 3.5221735074091205, "learning_rate": 0.0001, "loss": 0.8253, "mean_abs_error": 1174.5882833682745, "mean_abs_error_last_10": 712.4354300210464, "mean_abs_error_last_25": 789.9680892711688, "mean_abs_error_last_50": 897.0449200572924, "mean_pred_prob": 0.03720562248781789, "mean_pred_prob_last_10": 0.17571773123636375, "mean_pred_prob_last_25": 0.10148449521657313, "mean_pred_prob_last_50": 0.062003104083123615, "mean_token_accuracy": 0.8754387080669404, "step": 17510 }, { "epoch": 0.3114500559970135, "grad_norm": 1.4580397913257743, "learning_rate": 0.0001, "loss": 0.7751, "mean_abs_error": 805.6383913634329, "mean_abs_error_last_10": 326.3664867774461, "mean_abs_error_last_25": 424.43090660775397, "mean_abs_error_last_50": 593.1246279050987, "mean_pred_prob": 0.030472734713112005, "mean_pred_prob_last_10": 0.1596458997402806, "mean_pred_prob_last_25": 0.087296357518062, "mean_pred_prob_last_50": 0.052251830042223446, "mean_token_accuracy": 0.869676285982132, "step": 17520 }, { "epoch": 0.3116278242938154, "grad_norm": 1.8351354165789353, "learning_rate": 0.0001, "loss": 0.8027, "mean_abs_error": 449.965758289106, "mean_abs_error_last_10": 146.78494729572108, "mean_abs_error_last_25": 146.3840837968497, "mean_abs_error_last_50": 206.62994533791397, "mean_pred_prob": 0.030389574682340027, "mean_pred_prob_last_10": 0.14795666160061954, "mean_pred_prob_last_25": 0.08668489367701113, "mean_pred_prob_last_50": 0.052166105480864644, "mean_token_accuracy": 0.8642026662826539, "step": 17530 }, { "epoch": 0.3118055925906174, "grad_norm": 1.5267432653275685, "learning_rate": 0.0001, "loss": 0.8347, "mean_abs_error": 210.95910550701106, "mean_abs_error_last_10": 30.31014773969472, "mean_abs_error_last_25": 47.3556750929381, "mean_abs_error_last_50": 86.75722320648846, "mean_pred_prob": 0.05421304190531373, "mean_pred_prob_last_10": 0.2754082839936018, "mean_pred_prob_last_25": 0.15246485751122235, "mean_pred_prob_last_50": 0.09258599085733295, "mean_token_accuracy": 0.856242173910141, "step": 17540 }, { "epoch": 0.31198336088741935, "grad_norm": 1.4215756998775428, "learning_rate": 0.0001, "loss": 0.7672, "mean_abs_error": 204.88621048938217, "mean_abs_error_last_10": 62.53079024039597, "mean_abs_error_last_25": 91.92114928125008, "mean_abs_error_last_50": 131.20081700625636, "mean_pred_prob": 0.04202724192291498, "mean_pred_prob_last_10": 0.21197536773979664, "mean_pred_prob_last_25": 0.11586941741406917, "mean_pred_prob_last_50": 0.07073742933571339, "mean_token_accuracy": 0.8791954755783081, "step": 17550 }, { "epoch": 0.3121611291842213, "grad_norm": 1.4980803494272077, "learning_rate": 0.0001, "loss": 0.8147, "mean_abs_error": 677.1173887259708, "mean_abs_error_last_10": 569.4665597502733, "mean_abs_error_last_25": 630.9433580248215, "mean_abs_error_last_50": 662.48002459297, "mean_pred_prob": 0.038953258690889926, "mean_pred_prob_last_10": 0.19516078351298347, "mean_pred_prob_last_25": 0.10766968199750408, "mean_pred_prob_last_50": 0.06649183654226362, "mean_token_accuracy": 0.8672073960304261, "step": 17560 }, { "epoch": 0.3123388974810232, "grad_norm": 0.7625226685035235, "learning_rate": 0.0001, "loss": 0.8507, "mean_abs_error": 309.4375351295132, "mean_abs_error_last_10": 107.70562468927847, "mean_abs_error_last_25": 188.160763974122, "mean_abs_error_last_50": 246.8488267385175, "mean_pred_prob": 0.03279638490639627, "mean_pred_prob_last_10": 0.16051871851086616, "mean_pred_prob_last_25": 0.09031958570703864, "mean_pred_prob_last_50": 0.055431805085390806, "mean_token_accuracy": 0.8722675204277038, "step": 17570 }, { "epoch": 0.31251666577782516, "grad_norm": 3.0319117990245736, "learning_rate": 0.0001, "loss": 0.7597, "mean_abs_error": 103.41945424946736, "mean_abs_error_last_10": 23.4768972801856, "mean_abs_error_last_25": 41.05525331208953, "mean_abs_error_last_50": 65.60314271055253, "mean_pred_prob": 0.04227452389895916, "mean_pred_prob_last_10": 0.22624619975686072, "mean_pred_prob_last_25": 0.1222646065056324, "mean_pred_prob_last_50": 0.07283128798007965, "mean_token_accuracy": 0.8912394881248474, "step": 17580 }, { "epoch": 0.3126944340746271, "grad_norm": 2.5618048663123987, "learning_rate": 0.0001, "loss": 0.8892, "mean_abs_error": 877.0395189909899, "mean_abs_error_last_10": 307.60639901822555, "mean_abs_error_last_25": 388.70056102044435, "mean_abs_error_last_50": 553.6699979437742, "mean_pred_prob": 0.02819760492129717, "mean_pred_prob_last_10": 0.14066916168667376, "mean_pred_prob_last_25": 0.0786373084585648, "mean_pred_prob_last_50": 0.04777398305595852, "mean_token_accuracy": 0.8666462659835815, "step": 17590 }, { "epoch": 0.3128722023714291, "grad_norm": 1.736068341479892, "learning_rate": 0.0001, "loss": 0.8853, "mean_abs_error": 575.2718914659665, "mean_abs_error_last_10": 139.11764269693498, "mean_abs_error_last_25": 201.38965096396265, "mean_abs_error_last_50": 300.2679912764734, "mean_pred_prob": 0.03377150020678528, "mean_pred_prob_last_10": 0.15760055594146252, "mean_pred_prob_last_25": 0.09204842884209938, "mean_pred_prob_last_50": 0.05658212286652997, "mean_token_accuracy": 0.8711968779563903, "step": 17600 }, { "epoch": 0.31304997066823104, "grad_norm": 1.2708308002776276, "learning_rate": 0.0001, "loss": 0.785, "mean_abs_error": 573.1485435542905, "mean_abs_error_last_10": 206.47895038846718, "mean_abs_error_last_25": 235.43104290330947, "mean_abs_error_last_50": 347.7084911966415, "mean_pred_prob": 0.02412086458061822, "mean_pred_prob_last_10": 0.12946056385990232, "mean_pred_prob_last_25": 0.06820968253305182, "mean_pred_prob_last_50": 0.0406005403958261, "mean_token_accuracy": 0.881771719455719, "step": 17610 }, { "epoch": 0.313227738965033, "grad_norm": 1.4820011938507474, "learning_rate": 0.0001, "loss": 0.691, "mean_abs_error": 121.91520677481515, "mean_abs_error_last_10": 15.215902298985378, "mean_abs_error_last_25": 40.67675659210307, "mean_abs_error_last_50": 71.17804494132014, "mean_pred_prob": 0.04490974275395274, "mean_pred_prob_last_10": 0.22161111123859883, "mean_pred_prob_last_25": 0.12189101707190275, "mean_pred_prob_last_50": 0.07456071991473437, "mean_token_accuracy": 0.8655403196811676, "step": 17620 }, { "epoch": 0.3134055072618349, "grad_norm": 1.7061314260045757, "learning_rate": 0.0001, "loss": 0.7892, "mean_abs_error": 344.246945540872, "mean_abs_error_last_10": 101.5298300552619, "mean_abs_error_last_25": 108.69695603405287, "mean_abs_error_last_50": 183.3410887215743, "mean_pred_prob": 0.02878520027734339, "mean_pred_prob_last_10": 0.14691150542348624, "mean_pred_prob_last_25": 0.08110137367621065, "mean_pred_prob_last_50": 0.049480016063898805, "mean_token_accuracy": 0.8750395476818085, "step": 17630 }, { "epoch": 0.31358327555863685, "grad_norm": 1.9482178939978918, "learning_rate": 0.0001, "loss": 0.746, "mean_abs_error": 550.2366067712574, "mean_abs_error_last_10": 99.65122601344157, "mean_abs_error_last_25": 123.08811649440415, "mean_abs_error_last_50": 250.5354053446531, "mean_pred_prob": 0.049294225336052475, "mean_pred_prob_last_10": 0.22687238845974206, "mean_pred_prob_last_25": 0.13154577352106572, "mean_pred_prob_last_50": 0.0822750776540488, "mean_token_accuracy": 0.8723645150661469, "step": 17640 }, { "epoch": 0.31376104385543885, "grad_norm": 2.4719329070709013, "learning_rate": 0.0001, "loss": 0.8219, "mean_abs_error": 239.91019755017078, "mean_abs_error_last_10": 130.9688850649183, "mean_abs_error_last_25": 178.09528423923834, "mean_abs_error_last_50": 179.17493002578496, "mean_pred_prob": 0.026797250332310795, "mean_pred_prob_last_10": 0.14420560747385025, "mean_pred_prob_last_25": 0.07541228756308556, "mean_pred_prob_last_50": 0.04557169508188963, "mean_token_accuracy": 0.8745075345039368, "step": 17650 }, { "epoch": 0.3139388121522408, "grad_norm": 1.9032714037393086, "learning_rate": 0.0001, "loss": 0.8318, "mean_abs_error": 244.85836272074275, "mean_abs_error_last_10": 167.27940793075442, "mean_abs_error_last_25": 219.10668763076828, "mean_abs_error_last_50": 192.11381697122712, "mean_pred_prob": 0.04623581958003342, "mean_pred_prob_last_10": 0.216115820966661, "mean_pred_prob_last_25": 0.1221383823081851, "mean_pred_prob_last_50": 0.07667765389196575, "mean_token_accuracy": 0.8696082949638366, "step": 17660 }, { "epoch": 0.3141165804490427, "grad_norm": 1.8492297561757667, "learning_rate": 0.0001, "loss": 0.8365, "mean_abs_error": 585.9270922106607, "mean_abs_error_last_10": 217.93732065754284, "mean_abs_error_last_25": 263.08476858346717, "mean_abs_error_last_50": 364.71115295092693, "mean_pred_prob": 0.02151646566344425, "mean_pred_prob_last_10": 0.11418804924469442, "mean_pred_prob_last_25": 0.06005106947850436, "mean_pred_prob_last_50": 0.03645034753717482, "mean_token_accuracy": 0.8706761479377747, "step": 17670 }, { "epoch": 0.31429434874584466, "grad_norm": 1.1588371316984865, "learning_rate": 0.0001, "loss": 0.8591, "mean_abs_error": 755.8364022002578, "mean_abs_error_last_10": 291.0631926408456, "mean_abs_error_last_25": 368.6471858393845, "mean_abs_error_last_50": 526.0877189091985, "mean_pred_prob": 0.03382602906640386, "mean_pred_prob_last_10": 0.1721220460778568, "mean_pred_prob_last_25": 0.09586380032706074, "mean_pred_prob_last_50": 0.05758843715011608, "mean_token_accuracy": 0.877311235666275, "step": 17680 }, { "epoch": 0.3144721170426466, "grad_norm": 1.3042786569782538, "learning_rate": 0.0001, "loss": 0.7304, "mean_abs_error": 78.62292909269603, "mean_abs_error_last_10": 10.91099749033574, "mean_abs_error_last_25": 24.05559993883322, "mean_abs_error_last_50": 44.30471027678425, "mean_pred_prob": 0.05544481333345175, "mean_pred_prob_last_10": 0.26792295202612876, "mean_pred_prob_last_25": 0.1515809316188097, "mean_pred_prob_last_50": 0.09343328513205051, "mean_token_accuracy": 0.8812942147254944, "step": 17690 }, { "epoch": 0.31464988533944854, "grad_norm": 1.6841891895929684, "learning_rate": 0.0001, "loss": 0.8082, "mean_abs_error": 269.7470099015826, "mean_abs_error_last_10": 40.191825002922755, "mean_abs_error_last_25": 93.25236546313047, "mean_abs_error_last_50": 151.6212866147393, "mean_pred_prob": 0.046997332386672495, "mean_pred_prob_last_10": 0.2230666872113943, "mean_pred_prob_last_25": 0.12727582156658174, "mean_pred_prob_last_50": 0.07905630571767688, "mean_token_accuracy": 0.8700463116168976, "step": 17700 }, { "epoch": 0.31482765363625054, "grad_norm": 1.973095222890209, "learning_rate": 0.0001, "loss": 0.828, "mean_abs_error": 735.2824249909927, "mean_abs_error_last_10": 150.90009943886986, "mean_abs_error_last_25": 215.70018615978384, "mean_abs_error_last_50": 378.2859414420022, "mean_pred_prob": 0.0385095477802679, "mean_pred_prob_last_10": 0.18107880457537248, "mean_pred_prob_last_25": 0.10514522768789902, "mean_pred_prob_last_50": 0.06548835394787603, "mean_token_accuracy": 0.8585861623287201, "step": 17710 }, { "epoch": 0.3150054219330525, "grad_norm": 1.0284380157426172, "learning_rate": 0.0001, "loss": 0.9439, "mean_abs_error": 563.3093093980262, "mean_abs_error_last_10": 207.70559913147326, "mean_abs_error_last_25": 199.24823280040852, "mean_abs_error_last_50": 294.75124816716396, "mean_pred_prob": 0.03010327855590731, "mean_pred_prob_last_10": 0.16397676460910587, "mean_pred_prob_last_25": 0.08567343959584832, "mean_pred_prob_last_50": 0.05167898046784103, "mean_token_accuracy": 0.8612244963645935, "step": 17720 }, { "epoch": 0.3151831902298544, "grad_norm": 1.1339097233607123, "learning_rate": 0.0001, "loss": 0.775, "mean_abs_error": 555.8668520339045, "mean_abs_error_last_10": 366.5365758816745, "mean_abs_error_last_25": 443.05772447040727, "mean_abs_error_last_50": 428.64715513917355, "mean_pred_prob": 0.044121962584904396, "mean_pred_prob_last_10": 0.20615767416311429, "mean_pred_prob_last_25": 0.11957746500265784, "mean_pred_prob_last_50": 0.0745934080448933, "mean_token_accuracy": 0.8737312912940979, "step": 17730 }, { "epoch": 0.31536095852665635, "grad_norm": 1.436104491499395, "learning_rate": 0.0001, "loss": 0.8993, "mean_abs_error": 1252.477392197777, "mean_abs_error_last_10": 357.9073160238832, "mean_abs_error_last_25": 518.5492661742477, "mean_abs_error_last_50": 693.4692002643601, "mean_pred_prob": 0.01710933118883986, "mean_pred_prob_last_10": 0.09130818575504236, "mean_pred_prob_last_25": 0.04879337103920989, "mean_pred_prob_last_50": 0.029321490338770673, "mean_token_accuracy": 0.8699865639209747, "step": 17740 }, { "epoch": 0.3155387268234583, "grad_norm": 3.183482165822054, "learning_rate": 0.0001, "loss": 0.9065, "mean_abs_error": 159.71918943064202, "mean_abs_error_last_10": 39.60173030595867, "mean_abs_error_last_25": 50.63862390305404, "mean_abs_error_last_50": 74.48836675222813, "mean_pred_prob": 0.05011433600448072, "mean_pred_prob_last_10": 0.24365312680602075, "mean_pred_prob_last_25": 0.1403118534013629, "mean_pred_prob_last_50": 0.08558272821828723, "mean_token_accuracy": 0.864046049118042, "step": 17750 }, { "epoch": 0.31571649512026023, "grad_norm": 2.0901485734904717, "learning_rate": 0.0001, "loss": 0.817, "mean_abs_error": 327.68516232092395, "mean_abs_error_last_10": 156.4815941883801, "mean_abs_error_last_25": 201.29693507406049, "mean_abs_error_last_50": 247.6233866424224, "mean_pred_prob": 0.04258213057182729, "mean_pred_prob_last_10": 0.20809911470860243, "mean_pred_prob_last_25": 0.11800511674955487, "mean_pred_prob_last_50": 0.07211348726414143, "mean_token_accuracy": 0.8700655400753021, "step": 17760 }, { "epoch": 0.3158942634170622, "grad_norm": 1.4723655871828372, "learning_rate": 0.0001, "loss": 0.7914, "mean_abs_error": 506.0857343659948, "mean_abs_error_last_10": 298.9966002026786, "mean_abs_error_last_25": 341.6086203196568, "mean_abs_error_last_50": 316.4594318036764, "mean_pred_prob": 0.02634863608982414, "mean_pred_prob_last_10": 0.12435725657269359, "mean_pred_prob_last_25": 0.07115731351077556, "mean_pred_prob_last_50": 0.04445574071723968, "mean_token_accuracy": 0.8638605117797852, "step": 17770 }, { "epoch": 0.31607203171386417, "grad_norm": 2.242622099336275, "learning_rate": 0.0001, "loss": 0.7307, "mean_abs_error": 771.1647703602239, "mean_abs_error_last_10": 368.0764639916875, "mean_abs_error_last_25": 424.4402114502369, "mean_abs_error_last_50": 533.9248538224319, "mean_pred_prob": 0.03285686957242433, "mean_pred_prob_last_10": 0.1767308484413661, "mean_pred_prob_last_25": 0.09439580611069687, "mean_pred_prob_last_50": 0.05647215721255634, "mean_token_accuracy": 0.8740766823291779, "step": 17780 }, { "epoch": 0.3162498000106661, "grad_norm": 2.1080023653884976, "learning_rate": 0.0001, "loss": 0.9171, "mean_abs_error": 346.2217944879196, "mean_abs_error_last_10": 117.38887007697531, "mean_abs_error_last_25": 209.6895322970982, "mean_abs_error_last_50": 262.4613311298177, "mean_pred_prob": 0.036978586483746766, "mean_pred_prob_last_10": 0.18740387838333844, "mean_pred_prob_last_25": 0.10122473426163196, "mean_pred_prob_last_50": 0.062310412200167774, "mean_token_accuracy": 0.8701388537883759, "step": 17790 }, { "epoch": 0.31642756830746804, "grad_norm": 1.9634368386686234, "learning_rate": 0.0001, "loss": 0.8612, "mean_abs_error": 407.6336466703523, "mean_abs_error_last_10": 152.90378671839935, "mean_abs_error_last_25": 190.38108837382237, "mean_abs_error_last_50": 253.62941353451106, "mean_pred_prob": 0.03042415310628712, "mean_pred_prob_last_10": 0.15939112640917302, "mean_pred_prob_last_25": 0.08622074788436293, "mean_pred_prob_last_50": 0.0521737125236541, "mean_token_accuracy": 0.8634999811649322, "step": 17800 }, { "epoch": 0.31660533660427, "grad_norm": 1.096847682081002, "learning_rate": 0.0001, "loss": 0.7416, "mean_abs_error": 409.25753700573523, "mean_abs_error_last_10": 61.402665844474676, "mean_abs_error_last_25": 114.42368832629947, "mean_abs_error_last_50": 222.75165577882802, "mean_pred_prob": 0.034876004606485364, "mean_pred_prob_last_10": 0.18255669213831424, "mean_pred_prob_last_25": 0.09880803655833006, "mean_pred_prob_last_50": 0.05946941804140806, "mean_token_accuracy": 0.8772039532661438, "step": 17810 }, { "epoch": 0.3167831049010719, "grad_norm": 0.9377848829578316, "learning_rate": 0.0001, "loss": 0.9784, "mean_abs_error": 311.2416800025479, "mean_abs_error_last_10": 132.30028717523558, "mean_abs_error_last_25": 152.7150985151496, "mean_abs_error_last_50": 204.0378842925773, "mean_pred_prob": 0.03537217942066491, "mean_pred_prob_last_10": 0.18046699650585651, "mean_pred_prob_last_25": 0.09790649730712175, "mean_pred_prob_last_50": 0.05995818595401943, "mean_token_accuracy": 0.8666439712047577, "step": 17820 }, { "epoch": 0.3169608731978739, "grad_norm": 0.9181190731970595, "learning_rate": 0.0001, "loss": 0.7472, "mean_abs_error": 1106.3836647432115, "mean_abs_error_last_10": 414.9401433076167, "mean_abs_error_last_25": 477.80988592470356, "mean_abs_error_last_50": 677.4834563288381, "mean_pred_prob": 0.02697209429752547, "mean_pred_prob_last_10": 0.1379558356711641, "mean_pred_prob_last_25": 0.07648707999032922, "mean_pred_prob_last_50": 0.04558123635069933, "mean_token_accuracy": 0.8770827174186706, "step": 17830 }, { "epoch": 0.31713864149467585, "grad_norm": 0.6810376945860855, "learning_rate": 0.0001, "loss": 0.8589, "mean_abs_error": 588.3296277819784, "mean_abs_error_last_10": 216.69501022654154, "mean_abs_error_last_25": 372.1847091900434, "mean_abs_error_last_50": 465.21806196500864, "mean_pred_prob": 0.018688770150765775, "mean_pred_prob_last_10": 0.10551589727401733, "mean_pred_prob_last_25": 0.05409014001488686, "mean_pred_prob_last_50": 0.03206679276190698, "mean_token_accuracy": 0.873387998342514, "step": 17840 }, { "epoch": 0.3173164097914778, "grad_norm": 1.549656187296525, "learning_rate": 0.0001, "loss": 1.0394, "mean_abs_error": 2562.2912144430247, "mean_abs_error_last_10": 1381.0167846439913, "mean_abs_error_last_25": 1573.6613198327045, "mean_abs_error_last_50": 1905.859067325382, "mean_pred_prob": 0.025450327663565985, "mean_pred_prob_last_10": 0.12705793332570464, "mean_pred_prob_last_25": 0.07204897603660357, "mean_pred_prob_last_50": 0.04364141720579937, "mean_token_accuracy": 0.8676361680030823, "step": 17850 }, { "epoch": 0.31749417808827973, "grad_norm": 2.514084468987201, "learning_rate": 0.0001, "loss": 0.855, "mean_abs_error": 227.38912008230187, "mean_abs_error_last_10": 80.33637364705017, "mean_abs_error_last_25": 107.65305701950828, "mean_abs_error_last_50": 137.64239316670597, "mean_pred_prob": 0.051737913745455444, "mean_pred_prob_last_10": 0.24100138414651157, "mean_pred_prob_last_25": 0.1380327483639121, "mean_pred_prob_last_50": 0.0861338522285223, "mean_token_accuracy": 0.8695049524307251, "step": 17860 }, { "epoch": 0.31767194638508167, "grad_norm": 1.3578342086294988, "learning_rate": 0.0001, "loss": 0.8624, "mean_abs_error": 421.5104578911202, "mean_abs_error_last_10": 159.97399552752023, "mean_abs_error_last_25": 195.04723129963193, "mean_abs_error_last_50": 256.01373303159664, "mean_pred_prob": 0.03715680928435176, "mean_pred_prob_last_10": 0.18913248660974205, "mean_pred_prob_last_25": 0.10326997693628073, "mean_pred_prob_last_50": 0.061973456363193694, "mean_token_accuracy": 0.8783282101154327, "step": 17870 }, { "epoch": 0.3178497146818836, "grad_norm": 1.112127308590956, "learning_rate": 0.0001, "loss": 0.8341, "mean_abs_error": 239.4313421634828, "mean_abs_error_last_10": 71.22479484807977, "mean_abs_error_last_25": 200.8935843718977, "mean_abs_error_last_50": 254.70463635317589, "mean_pred_prob": 0.04372003576718271, "mean_pred_prob_last_10": 0.19823015481233597, "mean_pred_prob_last_25": 0.11461332757025958, "mean_pred_prob_last_50": 0.07181195160374046, "mean_token_accuracy": 0.8779004335403442, "step": 17880 }, { "epoch": 0.3180274829786856, "grad_norm": 3.3995354234369164, "learning_rate": 0.0001, "loss": 0.8329, "mean_abs_error": 423.7887991015802, "mean_abs_error_last_10": 202.82670845992033, "mean_abs_error_last_25": 193.77639823747126, "mean_abs_error_last_50": 259.8149528693837, "mean_pred_prob": 0.027866776380687953, "mean_pred_prob_last_10": 0.15160862244665624, "mean_pred_prob_last_25": 0.08008962292224168, "mean_pred_prob_last_50": 0.04804350384511054, "mean_token_accuracy": 0.8703139781951904, "step": 17890 }, { "epoch": 0.31820525127548754, "grad_norm": 1.126844833153546, "learning_rate": 0.0001, "loss": 0.9498, "mean_abs_error": 299.67562568396346, "mean_abs_error_last_10": 71.0878720001958, "mean_abs_error_last_25": 114.6688331085087, "mean_abs_error_last_50": 170.79801370001107, "mean_pred_prob": 0.034397171542514114, "mean_pred_prob_last_10": 0.19103198223747314, "mean_pred_prob_last_25": 0.10054998141713441, "mean_pred_prob_last_50": 0.05950218981597573, "mean_token_accuracy": 0.8781204342842102, "step": 17900 }, { "epoch": 0.3183830195722895, "grad_norm": 1.065125259658855, "learning_rate": 0.0001, "loss": 0.8623, "mean_abs_error": 453.84534581362607, "mean_abs_error_last_10": 71.52435307370243, "mean_abs_error_last_25": 113.23033647143862, "mean_abs_error_last_50": 203.49601355884164, "mean_pred_prob": 0.02866328307427466, "mean_pred_prob_last_10": 0.1488202277570963, "mean_pred_prob_last_25": 0.08315755352377892, "mean_pred_prob_last_50": 0.04938764339312911, "mean_token_accuracy": 0.872767698764801, "step": 17910 }, { "epoch": 0.3185607878690914, "grad_norm": 0.9705050585574057, "learning_rate": 0.0001, "loss": 0.7736, "mean_abs_error": 233.85612733698022, "mean_abs_error_last_10": 65.05576929522768, "mean_abs_error_last_25": 98.00688486670484, "mean_abs_error_last_50": 135.78056522527646, "mean_pred_prob": 0.051043058373034, "mean_pred_prob_last_10": 0.26425811611115935, "mean_pred_prob_last_25": 0.1452750850468874, "mean_pred_prob_last_50": 0.08691985700279474, "mean_token_accuracy": 0.8807124316692352, "step": 17920 }, { "epoch": 0.31873855616589336, "grad_norm": 1.681689883128579, "learning_rate": 0.0001, "loss": 1.019, "mean_abs_error": 563.2424372915249, "mean_abs_error_last_10": 128.7759839703462, "mean_abs_error_last_25": 193.08651667120012, "mean_abs_error_last_50": 281.5379860218899, "mean_pred_prob": 0.03469182029366493, "mean_pred_prob_last_10": 0.16898061590036378, "mean_pred_prob_last_25": 0.09516180343925953, "mean_pred_prob_last_50": 0.058595739433076235, "mean_token_accuracy": 0.8687341868877411, "step": 17930 }, { "epoch": 0.3189163244626953, "grad_norm": 1.2959840126532018, "learning_rate": 0.0001, "loss": 0.8351, "mean_abs_error": 941.7095495296386, "mean_abs_error_last_10": 432.0460869780394, "mean_abs_error_last_25": 495.26981584283203, "mean_abs_error_last_50": 630.6306948782864, "mean_pred_prob": 0.04076778356684372, "mean_pred_prob_last_10": 0.1935367924394086, "mean_pred_prob_last_25": 0.11236633704975248, "mean_pred_prob_last_50": 0.06852921646204777, "mean_token_accuracy": 0.8719169616699218, "step": 17940 }, { "epoch": 0.3190940927594973, "grad_norm": 1.4765472791664238, "learning_rate": 0.0001, "loss": 0.8319, "mean_abs_error": 397.945547297147, "mean_abs_error_last_10": 103.2415230983693, "mean_abs_error_last_25": 207.2005777645117, "mean_abs_error_last_50": 251.67223120451163, "mean_pred_prob": 0.0427511302055791, "mean_pred_prob_last_10": 0.20378900766372682, "mean_pred_prob_last_25": 0.11839552512392401, "mean_pred_prob_last_50": 0.07217100290581584, "mean_token_accuracy": 0.8615862667560578, "step": 17950 }, { "epoch": 0.31927186105629923, "grad_norm": 1.6359173936275613, "learning_rate": 0.0001, "loss": 0.7624, "mean_abs_error": 228.94312560347726, "mean_abs_error_last_10": 71.47453534607209, "mean_abs_error_last_25": 103.78333791444511, "mean_abs_error_last_50": 184.83666445263833, "mean_pred_prob": 0.06993865247350187, "mean_pred_prob_last_10": 0.28751197941601275, "mean_pred_prob_last_25": 0.17406193520873786, "mean_pred_prob_last_50": 0.11288464651443064, "mean_token_accuracy": 0.8702744603157043, "step": 17960 }, { "epoch": 0.31944962935310117, "grad_norm": 1.1996751479967656, "learning_rate": 0.0001, "loss": 0.8182, "mean_abs_error": 395.13003168944545, "mean_abs_error_last_10": 161.66444586065057, "mean_abs_error_last_25": 184.39446466789818, "mean_abs_error_last_50": 243.75981960548728, "mean_pred_prob": 0.029139086068607867, "mean_pred_prob_last_10": 0.14642752092331648, "mean_pred_prob_last_25": 0.0809817597270012, "mean_pred_prob_last_50": 0.04931390024721623, "mean_token_accuracy": 0.8705956697463989, "step": 17970 }, { "epoch": 0.3196273976499031, "grad_norm": 2.3278822690727092, "learning_rate": 0.0001, "loss": 0.8791, "mean_abs_error": 1034.6836447006972, "mean_abs_error_last_10": 702.8875856251118, "mean_abs_error_last_25": 758.5255644188233, "mean_abs_error_last_50": 877.0580811199985, "mean_pred_prob": 0.040641202105325645, "mean_pred_prob_last_10": 0.20109410122968258, "mean_pred_prob_last_25": 0.11314718565117801, "mean_pred_prob_last_50": 0.06852727302757558, "mean_token_accuracy": 0.8723239600658417, "step": 17980 }, { "epoch": 0.31980516594670505, "grad_norm": 1.704764486612888, "learning_rate": 0.0001, "loss": 0.7738, "mean_abs_error": 245.87185266445832, "mean_abs_error_last_10": 69.44712157033446, "mean_abs_error_last_25": 100.83745143580941, "mean_abs_error_last_50": 149.579984742264, "mean_pred_prob": 0.040536425448954104, "mean_pred_prob_last_10": 0.17751115262508393, "mean_pred_prob_last_25": 0.10535937007516623, "mean_pred_prob_last_50": 0.0669251604937017, "mean_token_accuracy": 0.8646259069442749, "step": 17990 }, { "epoch": 0.319982934243507, "grad_norm": 1.4104023301069293, "learning_rate": 0.0001, "loss": 0.9029, "mean_abs_error": 824.4708531179831, "mean_abs_error_last_10": 519.9175933033267, "mean_abs_error_last_25": 527.9280928158057, "mean_abs_error_last_50": 604.6475715051471, "mean_pred_prob": 0.048348345208796675, "mean_pred_prob_last_10": 0.24829265194712208, "mean_pred_prob_last_25": 0.13940755399526097, "mean_pred_prob_last_50": 0.08318351681227795, "mean_token_accuracy": 0.8718900501728057, "step": 18000 }, { "epoch": 0.320160702540309, "grad_norm": 1.0287804306078896, "learning_rate": 0.0001, "loss": 0.8732, "mean_abs_error": 827.1960083157483, "mean_abs_error_last_10": 335.38847318851583, "mean_abs_error_last_25": 431.7414865412472, "mean_abs_error_last_50": 533.9895863175908, "mean_pred_prob": 0.022654327654163354, "mean_pred_prob_last_10": 0.11108647602959536, "mean_pred_prob_last_25": 0.06202022049692459, "mean_pred_prob_last_50": 0.03809146947460249, "mean_token_accuracy": 0.8737197935581207, "step": 18010 }, { "epoch": 0.3203384708371109, "grad_norm": 1.7490990425276451, "learning_rate": 0.0001, "loss": 0.9056, "mean_abs_error": 948.2349259537745, "mean_abs_error_last_10": 360.43385617632737, "mean_abs_error_last_25": 457.03726113738287, "mean_abs_error_last_50": 661.5154980455442, "mean_pred_prob": 0.021937812177930026, "mean_pred_prob_last_10": 0.10888875949895009, "mean_pred_prob_last_25": 0.060305268026422706, "mean_pred_prob_last_50": 0.03738858694559895, "mean_token_accuracy": 0.8697132229804992, "step": 18020 }, { "epoch": 0.32051623913391286, "grad_norm": 2.221717105183919, "learning_rate": 0.0001, "loss": 0.8753, "mean_abs_error": 222.06178243627966, "mean_abs_error_last_10": 70.28018437961941, "mean_abs_error_last_25": 118.88870764394088, "mean_abs_error_last_50": 163.30173846552182, "mean_pred_prob": 0.037194941332563757, "mean_pred_prob_last_10": 0.19916253853589297, "mean_pred_prob_last_25": 0.10427614450454711, "mean_pred_prob_last_50": 0.0628360572271049, "mean_token_accuracy": 0.8732763051986694, "step": 18030 }, { "epoch": 0.3206940074307148, "grad_norm": 1.822323945702761, "learning_rate": 0.0001, "loss": 0.7992, "mean_abs_error": 400.23409198040156, "mean_abs_error_last_10": 135.49723090394292, "mean_abs_error_last_25": 188.01686734233215, "mean_abs_error_last_50": 236.31365113141152, "mean_pred_prob": 0.044082579715177414, "mean_pred_prob_last_10": 0.21637576203793288, "mean_pred_prob_last_25": 0.12236755015328526, "mean_pred_prob_last_50": 0.07418169318698346, "mean_token_accuracy": 0.8740471363067627, "step": 18040 }, { "epoch": 0.32087177572751674, "grad_norm": 2.791305965471557, "learning_rate": 0.0001, "loss": 0.7772, "mean_abs_error": 530.0371646228982, "mean_abs_error_last_10": 223.17582590296684, "mean_abs_error_last_25": 279.1996934200534, "mean_abs_error_last_50": 303.5078295574027, "mean_pred_prob": 0.0366187364095822, "mean_pred_prob_last_10": 0.17326256446540356, "mean_pred_prob_last_25": 0.09808430848643183, "mean_pred_prob_last_50": 0.061754065193235876, "mean_token_accuracy": 0.8853955686092376, "step": 18050 }, { "epoch": 0.3210495440243187, "grad_norm": 2.056267989191715, "learning_rate": 0.0001, "loss": 0.8346, "mean_abs_error": 250.55463573931698, "mean_abs_error_last_10": 106.6645591168649, "mean_abs_error_last_25": 123.27607334068811, "mean_abs_error_last_50": 151.20392433920958, "mean_pred_prob": 0.03488744762726128, "mean_pred_prob_last_10": 0.17312279865145683, "mean_pred_prob_last_25": 0.09792441129684448, "mean_pred_prob_last_50": 0.05980641674250364, "mean_token_accuracy": 0.8714509963989258, "step": 18060 }, { "epoch": 0.3212273123211207, "grad_norm": 1.2629194044362089, "learning_rate": 0.0001, "loss": 0.8608, "mean_abs_error": 460.8332830235565, "mean_abs_error_last_10": 149.71141707311241, "mean_abs_error_last_25": 175.10278523683616, "mean_abs_error_last_50": 299.8213876618019, "mean_pred_prob": 0.02651064817328006, "mean_pred_prob_last_10": 0.14089675073046237, "mean_pred_prob_last_25": 0.07600998363923281, "mean_pred_prob_last_50": 0.045307177992071956, "mean_token_accuracy": 0.8850913345813751, "step": 18070 }, { "epoch": 0.3214050806179226, "grad_norm": 2.166608947291984, "learning_rate": 0.0001, "loss": 0.888, "mean_abs_error": 586.7995107299739, "mean_abs_error_last_10": 184.88345882346005, "mean_abs_error_last_25": 284.88592284833567, "mean_abs_error_last_50": 394.4086218082578, "mean_pred_prob": 0.02442877967841923, "mean_pred_prob_last_10": 0.13069597915746273, "mean_pred_prob_last_25": 0.07149201943539082, "mean_pred_prob_last_50": 0.042811528453603385, "mean_token_accuracy": 0.8669288694858551, "step": 18080 }, { "epoch": 0.32158284891472455, "grad_norm": 1.3228422535168123, "learning_rate": 0.0001, "loss": 0.8719, "mean_abs_error": 776.1445381645088, "mean_abs_error_last_10": 291.1253596180658, "mean_abs_error_last_25": 347.74052342358675, "mean_abs_error_last_50": 479.6581757329089, "mean_pred_prob": 0.02271247354510706, "mean_pred_prob_last_10": 0.13297290873597375, "mean_pred_prob_last_25": 0.06566262287087739, "mean_pred_prob_last_50": 0.03786797375651076, "mean_token_accuracy": 0.8650012791156769, "step": 18090 }, { "epoch": 0.3217606172115265, "grad_norm": 1.5270166872396365, "learning_rate": 0.0001, "loss": 0.8582, "mean_abs_error": 808.4895776320899, "mean_abs_error_last_10": 416.29189211097844, "mean_abs_error_last_25": 533.2692929047797, "mean_abs_error_last_50": 615.1633737343767, "mean_pred_prob": 0.03814596504380461, "mean_pred_prob_last_10": 0.1892145053629065, "mean_pred_prob_last_25": 0.1069736912031658, "mean_pred_prob_last_50": 0.06500805731629952, "mean_token_accuracy": 0.8708027064800262, "step": 18100 }, { "epoch": 0.32193838550832843, "grad_norm": 2.1034907437360943, "learning_rate": 0.0001, "loss": 0.7729, "mean_abs_error": 634.3146523091215, "mean_abs_error_last_10": 177.87889121281168, "mean_abs_error_last_25": 255.4299973932601, "mean_abs_error_last_50": 382.67450557295876, "mean_pred_prob": 0.026823211193550377, "mean_pred_prob_last_10": 0.14936893178964966, "mean_pred_prob_last_25": 0.07958589071058668, "mean_pred_prob_last_50": 0.046750845282804224, "mean_token_accuracy": 0.8782162725925445, "step": 18110 }, { "epoch": 0.32211615380513037, "grad_norm": 0.8952854607436131, "learning_rate": 0.0001, "loss": 0.8599, "mean_abs_error": 1308.1139624661646, "mean_abs_error_last_10": 650.1792337746576, "mean_abs_error_last_25": 698.3945799203029, "mean_abs_error_last_50": 871.3056820842114, "mean_pred_prob": 0.028661640553036705, "mean_pred_prob_last_10": 0.15321007028687744, "mean_pred_prob_last_25": 0.08291519983904436, "mean_pred_prob_last_50": 0.04949698147829622, "mean_token_accuracy": 0.8754890382289886, "step": 18120 }, { "epoch": 0.32229392210193236, "grad_norm": 2.1254432356459385, "learning_rate": 0.0001, "loss": 0.8438, "mean_abs_error": 618.6834670584662, "mean_abs_error_last_10": 265.7535299570765, "mean_abs_error_last_25": 303.24468774468204, "mean_abs_error_last_50": 386.06465825179464, "mean_pred_prob": 0.035234052821760994, "mean_pred_prob_last_10": 0.16845140544464812, "mean_pred_prob_last_25": 0.09563835022272542, "mean_pred_prob_last_50": 0.059313689521513877, "mean_token_accuracy": 0.8635033249855042, "step": 18130 }, { "epoch": 0.3224716903987343, "grad_norm": 1.9053033897375953, "learning_rate": 0.0001, "loss": 0.8226, "mean_abs_error": 696.749557151317, "mean_abs_error_last_10": 361.0340513928678, "mean_abs_error_last_25": 417.84773396607477, "mean_abs_error_last_50": 457.4206592820191, "mean_pred_prob": 0.04243210678105243, "mean_pred_prob_last_10": 0.1937501376902219, "mean_pred_prob_last_25": 0.1124532476067543, "mean_pred_prob_last_50": 0.07007789401104673, "mean_token_accuracy": 0.8737724900245667, "step": 18140 }, { "epoch": 0.32264945869553624, "grad_norm": 1.0715573811808563, "learning_rate": 0.0001, "loss": 0.906, "mean_abs_error": 954.3500972484399, "mean_abs_error_last_10": 453.9153546292565, "mean_abs_error_last_25": 668.457940060848, "mean_abs_error_last_50": 776.0366572338207, "mean_pred_prob": 0.018195606337394567, "mean_pred_prob_last_10": 0.09661661974387244, "mean_pred_prob_last_25": 0.05148514211177826, "mean_pred_prob_last_50": 0.030515771260252223, "mean_token_accuracy": 0.8617204487323761, "step": 18150 }, { "epoch": 0.3228272269923382, "grad_norm": 2.3452378126076106, "learning_rate": 0.0001, "loss": 0.8435, "mean_abs_error": 316.65106302935965, "mean_abs_error_last_10": 74.73004958307048, "mean_abs_error_last_25": 119.60026737033884, "mean_abs_error_last_50": 182.24031232248961, "mean_pred_prob": 0.028238156484439968, "mean_pred_prob_last_10": 0.1421505320817232, "mean_pred_prob_last_25": 0.0773399400524795, "mean_pred_prob_last_50": 0.04748846455477178, "mean_token_accuracy": 0.8726456463336945, "step": 18160 }, { "epoch": 0.3230049952891401, "grad_norm": 1.902726834796599, "learning_rate": 0.0001, "loss": 0.814, "mean_abs_error": 610.2017867263417, "mean_abs_error_last_10": 193.2399117827648, "mean_abs_error_last_25": 221.07860382318387, "mean_abs_error_last_50": 345.05493189394736, "mean_pred_prob": 0.0349300752277486, "mean_pred_prob_last_10": 0.16334110340103508, "mean_pred_prob_last_25": 0.09442800886463373, "mean_pred_prob_last_50": 0.05845018142135814, "mean_token_accuracy": 0.8639877140522003, "step": 18170 }, { "epoch": 0.32318276358594206, "grad_norm": 0.961901861935318, "learning_rate": 0.0001, "loss": 0.7815, "mean_abs_error": 312.3208301065457, "mean_abs_error_last_10": 290.4790092075035, "mean_abs_error_last_25": 266.63047606718237, "mean_abs_error_last_50": 294.254412760015, "mean_pred_prob": 0.038741938484599815, "mean_pred_prob_last_10": 0.20163245615549386, "mean_pred_prob_last_25": 0.10936555380467325, "mean_pred_prob_last_50": 0.06656458573415876, "mean_token_accuracy": 0.8680834352970124, "step": 18180 }, { "epoch": 0.32336053188274405, "grad_norm": 1.9657991870384195, "learning_rate": 0.0001, "loss": 0.7602, "mean_abs_error": 883.041715278078, "mean_abs_error_last_10": 308.128796042972, "mean_abs_error_last_25": 382.56425177486676, "mean_abs_error_last_50": 520.7955704622552, "mean_pred_prob": 0.025839619271573612, "mean_pred_prob_last_10": 0.10893203183077275, "mean_pred_prob_last_25": 0.06408857870846987, "mean_pred_prob_last_50": 0.0420797978003975, "mean_token_accuracy": 0.8674428701400757, "step": 18190 }, { "epoch": 0.323538300179546, "grad_norm": 1.6884796301105063, "learning_rate": 0.0001, "loss": 0.8058, "mean_abs_error": 568.0842577439869, "mean_abs_error_last_10": 240.5531251212004, "mean_abs_error_last_25": 291.8923569022005, "mean_abs_error_last_50": 390.4383697426504, "mean_pred_prob": 0.04826698828546796, "mean_pred_prob_last_10": 0.2444881010451354, "mean_pred_prob_last_25": 0.1351995050732512, "mean_pred_prob_last_50": 0.08103281386138406, "mean_token_accuracy": 0.8712025642395019, "step": 18200 }, { "epoch": 0.32371606847634793, "grad_norm": 1.116886167174442, "learning_rate": 0.0001, "loss": 0.7962, "mean_abs_error": 529.1893953378087, "mean_abs_error_last_10": 285.85180850649107, "mean_abs_error_last_25": 244.2377834765338, "mean_abs_error_last_50": 296.5409897556939, "mean_pred_prob": 0.03752595967962406, "mean_pred_prob_last_10": 0.1789314863854088, "mean_pred_prob_last_25": 0.10175964077934622, "mean_pred_prob_last_50": 0.06269467482343316, "mean_token_accuracy": 0.8689120709896088, "step": 18210 }, { "epoch": 0.32389383677314987, "grad_norm": 1.3546983426996353, "learning_rate": 0.0001, "loss": 0.773, "mean_abs_error": 212.19708692286258, "mean_abs_error_last_10": 116.75943633674379, "mean_abs_error_last_25": 217.52265737852017, "mean_abs_error_last_50": 198.57156448278283, "mean_pred_prob": 0.047191463212948294, "mean_pred_prob_last_10": 0.22360883513465524, "mean_pred_prob_last_25": 0.12510584702249616, "mean_pred_prob_last_50": 0.07829123344272375, "mean_token_accuracy": 0.8679891347885131, "step": 18220 }, { "epoch": 0.3240716050699518, "grad_norm": 2.1349009740233402, "learning_rate": 0.0001, "loss": 0.7324, "mean_abs_error": 179.5970414254588, "mean_abs_error_last_10": 29.385150001673576, "mean_abs_error_last_25": 46.303044142587126, "mean_abs_error_last_50": 85.93455359277293, "mean_pred_prob": 0.044756002351641656, "mean_pred_prob_last_10": 0.22416427582502366, "mean_pred_prob_last_25": 0.12735991925001144, "mean_pred_prob_last_50": 0.07622290272265672, "mean_token_accuracy": 0.8873585999011994, "step": 18230 }, { "epoch": 0.32424937336675375, "grad_norm": 1.3988675902931185, "learning_rate": 0.0001, "loss": 0.8933, "mean_abs_error": 275.00527994340496, "mean_abs_error_last_10": 52.94136392936216, "mean_abs_error_last_25": 98.61959566517073, "mean_abs_error_last_50": 204.3767618728283, "mean_pred_prob": 0.033493830473162234, "mean_pred_prob_last_10": 0.17620230056345462, "mean_pred_prob_last_25": 0.09467906421050429, "mean_pred_prob_last_50": 0.05683382372371852, "mean_token_accuracy": 0.877316164970398, "step": 18240 }, { "epoch": 0.32442714166355574, "grad_norm": 1.6993499944245325, "learning_rate": 0.0001, "loss": 0.7753, "mean_abs_error": 291.42983353960017, "mean_abs_error_last_10": 79.62640395450936, "mean_abs_error_last_25": 169.33925152052353, "mean_abs_error_last_50": 173.81459395079258, "mean_pred_prob": 0.05213732807897031, "mean_pred_prob_last_10": 0.23978570830076934, "mean_pred_prob_last_25": 0.14016832252964379, "mean_pred_prob_last_50": 0.08806600156240166, "mean_token_accuracy": 0.8712084710597991, "step": 18250 }, { "epoch": 0.3246049099603577, "grad_norm": 2.324161929363394, "learning_rate": 0.0001, "loss": 0.8172, "mean_abs_error": 396.7232881363617, "mean_abs_error_last_10": 78.04994899507008, "mean_abs_error_last_25": 108.61273662798332, "mean_abs_error_last_50": 214.40503069420325, "mean_pred_prob": 0.040053412318229675, "mean_pred_prob_last_10": 0.19960498809814453, "mean_pred_prob_last_25": 0.11106496267020702, "mean_pred_prob_last_50": 0.06787028545513749, "mean_token_accuracy": 0.8741066455841064, "step": 18260 }, { "epoch": 0.3247826782571596, "grad_norm": 1.9126829605262143, "learning_rate": 0.0001, "loss": 0.8739, "mean_abs_error": 1521.7132883233348, "mean_abs_error_last_10": 800.3222373575062, "mean_abs_error_last_25": 874.3385892525403, "mean_abs_error_last_50": 1065.479942802194, "mean_pred_prob": 0.021759696339722724, "mean_pred_prob_last_10": 0.11406204248487484, "mean_pred_prob_last_25": 0.06181759274040814, "mean_pred_prob_last_50": 0.03726156332122628, "mean_token_accuracy": 0.8745851039886474, "step": 18270 }, { "epoch": 0.32496044655396156, "grad_norm": 1.6386829067461333, "learning_rate": 0.0001, "loss": 0.7916, "mean_abs_error": 93.07905596202981, "mean_abs_error_last_10": 28.033360431873014, "mean_abs_error_last_25": 49.217668003172186, "mean_abs_error_last_50": 56.43551944623298, "mean_pred_prob": 0.04457079619169235, "mean_pred_prob_last_10": 0.2155615694820881, "mean_pred_prob_last_25": 0.11772014759480953, "mean_pred_prob_last_50": 0.07454419638961554, "mean_token_accuracy": 0.8823931574821472, "step": 18280 }, { "epoch": 0.3251382148507635, "grad_norm": 1.6075373414408813, "learning_rate": 0.0001, "loss": 0.8148, "mean_abs_error": 285.1001210356291, "mean_abs_error_last_10": 157.5289734647431, "mean_abs_error_last_25": 169.45576085715308, "mean_abs_error_last_50": 216.65683922372605, "mean_pred_prob": 0.03280070079490542, "mean_pred_prob_last_10": 0.16412412971258164, "mean_pred_prob_last_25": 0.09011124260723591, "mean_pred_prob_last_50": 0.05502726007252932, "mean_token_accuracy": 0.8785916566848755, "step": 18290 }, { "epoch": 0.3253159831475655, "grad_norm": 1.6241010274937742, "learning_rate": 0.0001, "loss": 0.7851, "mean_abs_error": 196.76410862079086, "mean_abs_error_last_10": 70.30149354386717, "mean_abs_error_last_25": 109.15136796879797, "mean_abs_error_last_50": 168.9771972612819, "mean_pred_prob": 0.03855428965762257, "mean_pred_prob_last_10": 0.1993277233093977, "mean_pred_prob_last_25": 0.1082953829318285, "mean_pred_prob_last_50": 0.06492542140185834, "mean_token_accuracy": 0.8791725754737854, "step": 18300 }, { "epoch": 0.32549375144436743, "grad_norm": 1.8818666805731323, "learning_rate": 0.0001, "loss": 0.8351, "mean_abs_error": 492.6776463434905, "mean_abs_error_last_10": 144.23583060344498, "mean_abs_error_last_25": 234.7337542891555, "mean_abs_error_last_50": 323.99807892200846, "mean_pred_prob": 0.03771644405787811, "mean_pred_prob_last_10": 0.18738606390543283, "mean_pred_prob_last_25": 0.1018166529480368, "mean_pred_prob_last_50": 0.0628347285790369, "mean_token_accuracy": 0.880917888879776, "step": 18310 }, { "epoch": 0.32567151974116937, "grad_norm": 2.020052835566804, "learning_rate": 0.0001, "loss": 0.8399, "mean_abs_error": 202.5813959853969, "mean_abs_error_last_10": 65.33906419178564, "mean_abs_error_last_25": 92.45138881122104, "mean_abs_error_last_50": 143.65813019339106, "mean_pred_prob": 0.038650613743811846, "mean_pred_prob_last_10": 0.20082074254751206, "mean_pred_prob_last_25": 0.10679200831800699, "mean_pred_prob_last_50": 0.06459994837641717, "mean_token_accuracy": 0.8780498445034027, "step": 18320 }, { "epoch": 0.3258492880379713, "grad_norm": 0.9512853212455407, "learning_rate": 0.0001, "loss": 0.7529, "mean_abs_error": 416.6341739781739, "mean_abs_error_last_10": 110.75473366379933, "mean_abs_error_last_25": 179.7177806959959, "mean_abs_error_last_50": 275.01341047664295, "mean_pred_prob": 0.03260158625198528, "mean_pred_prob_last_10": 0.15905254776589572, "mean_pred_prob_last_25": 0.0876653717365116, "mean_pred_prob_last_50": 0.05419018026441336, "mean_token_accuracy": 0.8838175535202026, "step": 18330 }, { "epoch": 0.32602705633477325, "grad_norm": 1.7089202373818546, "learning_rate": 0.0001, "loss": 0.9481, "mean_abs_error": 754.4807856584694, "mean_abs_error_last_10": 382.0130026491196, "mean_abs_error_last_25": 423.81747901560357, "mean_abs_error_last_50": 495.01804221703907, "mean_pred_prob": 0.0344590924854856, "mean_pred_prob_last_10": 0.1570190686150454, "mean_pred_prob_last_25": 0.09005470990668982, "mean_pred_prob_last_50": 0.05661203369381838, "mean_token_accuracy": 0.857575124502182, "step": 18340 }, { "epoch": 0.3262048246315752, "grad_norm": 0.776349074126145, "learning_rate": 0.0001, "loss": 0.7914, "mean_abs_error": 1618.441160776372, "mean_abs_error_last_10": 1186.902795967378, "mean_abs_error_last_25": 1225.302132401382, "mean_abs_error_last_50": 1320.3512789912534, "mean_pred_prob": 0.030039031150226946, "mean_pred_prob_last_10": 0.14771251318015857, "mean_pred_prob_last_25": 0.08347203008306678, "mean_pred_prob_last_50": 0.0515879333295743, "mean_token_accuracy": 0.8753811776638031, "step": 18350 }, { "epoch": 0.3263825929283772, "grad_norm": 2.338796883628838, "learning_rate": 0.0001, "loss": 0.7848, "mean_abs_error": 247.7659745526883, "mean_abs_error_last_10": 154.42595376438612, "mean_abs_error_last_25": 206.19658721183947, "mean_abs_error_last_50": 201.8007064970978, "mean_pred_prob": 0.048479388054693116, "mean_pred_prob_last_10": 0.23219931310741232, "mean_pred_prob_last_25": 0.13183535744901748, "mean_pred_prob_last_50": 0.08124720954801887, "mean_token_accuracy": 0.872379207611084, "step": 18360 }, { "epoch": 0.3265603612251791, "grad_norm": 1.669840110335666, "learning_rate": 0.0001, "loss": 0.7448, "mean_abs_error": 1052.0561395734474, "mean_abs_error_last_10": 256.63695490932923, "mean_abs_error_last_25": 344.99379604857984, "mean_abs_error_last_50": 546.7071635460813, "mean_pred_prob": 0.029024869605200364, "mean_pred_prob_last_10": 0.1495379071449861, "mean_pred_prob_last_25": 0.08347561914706603, "mean_pred_prob_last_50": 0.049757301615318286, "mean_token_accuracy": 0.8790387034416198, "step": 18370 }, { "epoch": 0.32673812952198106, "grad_norm": 1.2605787302720364, "learning_rate": 0.0001, "loss": 0.7967, "mean_abs_error": 138.7559387435859, "mean_abs_error_last_10": 58.9295388679534, "mean_abs_error_last_25": 64.39022333702127, "mean_abs_error_last_50": 91.29776793104192, "mean_pred_prob": 0.04867013175971806, "mean_pred_prob_last_10": 0.22350462935864926, "mean_pred_prob_last_25": 0.13049577102065085, "mean_pred_prob_last_50": 0.08068470442667604, "mean_token_accuracy": 0.8759751260280609, "step": 18380 }, { "epoch": 0.326915897818783, "grad_norm": 1.7247274964495225, "learning_rate": 0.0001, "loss": 0.7499, "mean_abs_error": 349.46914870745417, "mean_abs_error_last_10": 167.50618945777524, "mean_abs_error_last_25": 179.40047481748917, "mean_abs_error_last_50": 226.8755288618715, "mean_pred_prob": 0.019514894718304275, "mean_pred_prob_last_10": 0.11352579277008772, "mean_pred_prob_last_25": 0.059078690223395826, "mean_pred_prob_last_50": 0.03391144135966897, "mean_token_accuracy": 0.8733188688755036, "step": 18390 }, { "epoch": 0.32709366611558494, "grad_norm": 1.3463571342650864, "learning_rate": 0.0001, "loss": 0.7553, "mean_abs_error": 290.81382865725055, "mean_abs_error_last_10": 37.19582422911425, "mean_abs_error_last_25": 75.16443855760708, "mean_abs_error_last_50": 142.68019562204137, "mean_pred_prob": 0.029066398972645402, "mean_pred_prob_last_10": 0.15452360287308692, "mean_pred_prob_last_25": 0.08223635517060757, "mean_pred_prob_last_50": 0.04947080314159393, "mean_token_accuracy": 0.8855311632156372, "step": 18400 }, { "epoch": 0.3272714344123869, "grad_norm": 1.5829755339033535, "learning_rate": 0.0001, "loss": 0.9069, "mean_abs_error": 489.20632813650974, "mean_abs_error_last_10": 193.8702560411495, "mean_abs_error_last_25": 233.81455904885374, "mean_abs_error_last_50": 288.0192431056451, "mean_pred_prob": 0.027750764915253967, "mean_pred_prob_last_10": 0.13814691887237132, "mean_pred_prob_last_25": 0.07690458153374494, "mean_pred_prob_last_50": 0.04734831021632999, "mean_token_accuracy": 0.8647927045822144, "step": 18410 }, { "epoch": 0.32744920270918887, "grad_norm": 1.696407410205119, "learning_rate": 0.0001, "loss": 0.8683, "mean_abs_error": 360.1383861117687, "mean_abs_error_last_10": 70.98086738993652, "mean_abs_error_last_25": 113.18036292789793, "mean_abs_error_last_50": 206.29483049845726, "mean_pred_prob": 0.028487358149141072, "mean_pred_prob_last_10": 0.1464202620089054, "mean_pred_prob_last_25": 0.08003448564559221, "mean_pred_prob_last_50": 0.048058060463517904, "mean_token_accuracy": 0.8687456607818603, "step": 18420 }, { "epoch": 0.3276269710059908, "grad_norm": 1.3126810837890448, "learning_rate": 0.0001, "loss": 0.8278, "mean_abs_error": 791.0669052175543, "mean_abs_error_last_10": 205.7601231056214, "mean_abs_error_last_25": 243.82533212388662, "mean_abs_error_last_50": 423.92007410618373, "mean_pred_prob": 0.018239586404524742, "mean_pred_prob_last_10": 0.1056964315706864, "mean_pred_prob_last_25": 0.05516592503990978, "mean_pred_prob_last_50": 0.03211760655976832, "mean_token_accuracy": 0.8784541666507721, "step": 18430 }, { "epoch": 0.32780473930279275, "grad_norm": 1.5594039334468133, "learning_rate": 0.0001, "loss": 0.6892, "mean_abs_error": 145.3439268994535, "mean_abs_error_last_10": 36.69607800650509, "mean_abs_error_last_25": 75.2094688580985, "mean_abs_error_last_50": 124.53735503090243, "mean_pred_prob": 0.042434232262894514, "mean_pred_prob_last_10": 0.20400940291583539, "mean_pred_prob_last_25": 0.1152411088347435, "mean_pred_prob_last_50": 0.07083306778222323, "mean_token_accuracy": 0.881483668088913, "step": 18440 }, { "epoch": 0.3279825075995947, "grad_norm": 2.8533534974416455, "learning_rate": 0.0001, "loss": 0.8436, "mean_abs_error": 1395.528428258202, "mean_abs_error_last_10": 725.8625861533487, "mean_abs_error_last_25": 805.1862895738177, "mean_abs_error_last_50": 985.0338093070916, "mean_pred_prob": 0.03804795195173938, "mean_pred_prob_last_10": 0.18863058698188978, "mean_pred_prob_last_25": 0.10427528805157635, "mean_pred_prob_last_50": 0.06230500489182304, "mean_token_accuracy": 0.8565630912780762, "step": 18450 }, { "epoch": 0.3281602758963966, "grad_norm": 1.340211177447792, "learning_rate": 0.0001, "loss": 0.8247, "mean_abs_error": 735.1166957531242, "mean_abs_error_last_10": 279.08942423343353, "mean_abs_error_last_25": 339.80886522093067, "mean_abs_error_last_50": 501.6542433661555, "mean_pred_prob": 0.026136301271617413, "mean_pred_prob_last_10": 0.11692690346390008, "mean_pred_prob_last_25": 0.06731386790052056, "mean_pred_prob_last_50": 0.043188280425965786, "mean_token_accuracy": 0.8743503987789154, "step": 18460 }, { "epoch": 0.32833804419319856, "grad_norm": 0.9667213551135564, "learning_rate": 0.0001, "loss": 0.7624, "mean_abs_error": 368.34512664535976, "mean_abs_error_last_10": 90.24844088017048, "mean_abs_error_last_25": 137.25428411209802, "mean_abs_error_last_50": 233.86710049511368, "mean_pred_prob": 0.031182594457641245, "mean_pred_prob_last_10": 0.1674908075481653, "mean_pred_prob_last_25": 0.0903501246124506, "mean_pred_prob_last_50": 0.05412388313561678, "mean_token_accuracy": 0.8763676643371582, "step": 18470 }, { "epoch": 0.32851581249000056, "grad_norm": 1.6123779951278827, "learning_rate": 0.0001, "loss": 0.775, "mean_abs_error": 636.9141991193903, "mean_abs_error_last_10": 196.01242286493292, "mean_abs_error_last_25": 288.6297180988016, "mean_abs_error_last_50": 482.5805499550619, "mean_pred_prob": 0.03647345526551362, "mean_pred_prob_last_10": 0.1861514512100257, "mean_pred_prob_last_25": 0.10321226321393624, "mean_pred_prob_last_50": 0.062281684286426754, "mean_token_accuracy": 0.8749654412269592, "step": 18480 }, { "epoch": 0.3286935807868025, "grad_norm": 2.085386807545115, "learning_rate": 0.0001, "loss": 0.8361, "mean_abs_error": 733.2673669099773, "mean_abs_error_last_10": 205.68537999113522, "mean_abs_error_last_25": 280.0938756146688, "mean_abs_error_last_50": 444.9193173336647, "mean_pred_prob": 0.03206635061069392, "mean_pred_prob_last_10": 0.16793340498115866, "mean_pred_prob_last_25": 0.09464395438553766, "mean_pred_prob_last_50": 0.05535303192446008, "mean_token_accuracy": 0.8672299087047577, "step": 18490 }, { "epoch": 0.32887134908360444, "grad_norm": 1.218473084378851, "learning_rate": 0.0001, "loss": 0.8587, "mean_abs_error": 673.6949625576566, "mean_abs_error_last_10": 260.83391945406146, "mean_abs_error_last_25": 394.4348636508901, "mean_abs_error_last_50": 503.1600579112207, "mean_pred_prob": 0.02928116304683499, "mean_pred_prob_last_10": 0.14105685586109756, "mean_pred_prob_last_25": 0.07720308325369843, "mean_pred_prob_last_50": 0.048331803031032905, "mean_token_accuracy": 0.8723182439804077, "step": 18500 }, { "epoch": 0.3290491173804064, "grad_norm": 1.3330453520115857, "learning_rate": 0.0001, "loss": 0.7443, "mean_abs_error": 89.48138332353116, "mean_abs_error_last_10": 23.193868220661834, "mean_abs_error_last_25": 32.36313212622134, "mean_abs_error_last_50": 51.63124036942772, "mean_pred_prob": 0.05864855283871293, "mean_pred_prob_last_10": 0.27944047674536704, "mean_pred_prob_last_25": 0.1587781384587288, "mean_pred_prob_last_50": 0.09868807960301637, "mean_token_accuracy": 0.8808895766735076, "step": 18510 }, { "epoch": 0.3292268856772083, "grad_norm": 2.2445341611898217, "learning_rate": 0.0001, "loss": 0.8106, "mean_abs_error": 824.5565118541908, "mean_abs_error_last_10": 349.23311568646363, "mean_abs_error_last_25": 404.16429970525013, "mean_abs_error_last_50": 590.2053808873991, "mean_pred_prob": 0.033194558182731274, "mean_pred_prob_last_10": 0.17023599526728503, "mean_pred_prob_last_25": 0.09391593622858636, "mean_pred_prob_last_50": 0.05686689692665823, "mean_token_accuracy": 0.8780743241310119, "step": 18520 }, { "epoch": 0.32940465397401025, "grad_norm": 2.0630729656412026, "learning_rate": 0.0001, "loss": 0.7802, "mean_abs_error": 275.2553889713362, "mean_abs_error_last_10": 56.326414604798615, "mean_abs_error_last_25": 85.3051898502384, "mean_abs_error_last_50": 154.62357464700966, "mean_pred_prob": 0.03375731930136681, "mean_pred_prob_last_10": 0.19397749081254007, "mean_pred_prob_last_25": 0.10011036228388548, "mean_pred_prob_last_50": 0.05852513313293457, "mean_token_accuracy": 0.8751586735248565, "step": 18530 }, { "epoch": 0.32958242227081225, "grad_norm": 0.9502829815872511, "learning_rate": 0.0001, "loss": 0.8006, "mean_abs_error": 327.9930592859726, "mean_abs_error_last_10": 138.12980742766615, "mean_abs_error_last_25": 191.6991288196304, "mean_abs_error_last_50": 219.21928405668035, "mean_pred_prob": 0.030865928111597895, "mean_pred_prob_last_10": 0.1638521861284971, "mean_pred_prob_last_25": 0.08600162556394934, "mean_pred_prob_last_50": 0.05156432129442692, "mean_token_accuracy": 0.8816343665122985, "step": 18540 }, { "epoch": 0.3297601905676142, "grad_norm": 1.7882575635076425, "learning_rate": 0.0001, "loss": 0.7883, "mean_abs_error": 232.0357963310631, "mean_abs_error_last_10": 96.2687035643592, "mean_abs_error_last_25": 120.37423793819269, "mean_abs_error_last_50": 156.02213849699842, "mean_pred_prob": 0.03133946498855948, "mean_pred_prob_last_10": 0.16845901533961297, "mean_pred_prob_last_25": 0.08558905571699142, "mean_pred_prob_last_50": 0.05279449848458171, "mean_token_accuracy": 0.871747100353241, "step": 18550 }, { "epoch": 0.3299379588644161, "grad_norm": 1.9917256223728972, "learning_rate": 0.0001, "loss": 0.8727, "mean_abs_error": 376.42503399351114, "mean_abs_error_last_10": 61.636856914730245, "mean_abs_error_last_25": 202.65626968475584, "mean_abs_error_last_50": 305.55089259962205, "mean_pred_prob": 0.04057723726145923, "mean_pred_prob_last_10": 0.20251231752336024, "mean_pred_prob_last_25": 0.11090642353519797, "mean_pred_prob_last_50": 0.06798492707312107, "mean_token_accuracy": 0.8742652177810669, "step": 18560 }, { "epoch": 0.33011572716121806, "grad_norm": 1.1794620096253052, "learning_rate": 0.0001, "loss": 0.8098, "mean_abs_error": 816.2863725746734, "mean_abs_error_last_10": 402.0288663234593, "mean_abs_error_last_25": 485.6262332465694, "mean_abs_error_last_50": 533.0091144508411, "mean_pred_prob": 0.021979802811983973, "mean_pred_prob_last_10": 0.12162137170089408, "mean_pred_prob_last_25": 0.06286340473452583, "mean_pred_prob_last_50": 0.03759432890801691, "mean_token_accuracy": 0.8724806904792786, "step": 18570 }, { "epoch": 0.33029349545802, "grad_norm": 1.0516516943072152, "learning_rate": 0.0001, "loss": 0.8115, "mean_abs_error": 254.18756169045636, "mean_abs_error_last_10": 119.85589606935278, "mean_abs_error_last_25": 128.36691194061933, "mean_abs_error_last_50": 148.89175204331286, "mean_pred_prob": 0.033987704757601024, "mean_pred_prob_last_10": 0.17064560130238532, "mean_pred_prob_last_25": 0.09428427312523127, "mean_pred_prob_last_50": 0.057562863314524294, "mean_token_accuracy": 0.8703697979450226, "step": 18580 }, { "epoch": 0.33047126375482194, "grad_norm": 1.7757422017036102, "learning_rate": 0.0001, "loss": 0.8631, "mean_abs_error": 360.66286595189564, "mean_abs_error_last_10": 122.28872793446185, "mean_abs_error_last_25": 204.41411782331852, "mean_abs_error_last_50": 261.2654247013722, "mean_pred_prob": 0.03432408394291997, "mean_pred_prob_last_10": 0.17361882161349057, "mean_pred_prob_last_25": 0.09737260024994612, "mean_pred_prob_last_50": 0.05891823337879032, "mean_token_accuracy": 0.8775656819343567, "step": 18590 }, { "epoch": 0.33064903205162394, "grad_norm": 1.5333461616517976, "learning_rate": 0.0001, "loss": 0.8185, "mean_abs_error": 191.43658554980902, "mean_abs_error_last_10": 44.13983769594692, "mean_abs_error_last_25": 61.85153428827117, "mean_abs_error_last_50": 101.88376877950134, "mean_pred_prob": 0.034062767401337624, "mean_pred_prob_last_10": 0.17254307940602304, "mean_pred_prob_last_25": 0.09463264737278224, "mean_pred_prob_last_50": 0.05805373527109623, "mean_token_accuracy": 0.8817846655845643, "step": 18600 }, { "epoch": 0.3308268003484259, "grad_norm": 3.695454309531637, "learning_rate": 0.0001, "loss": 0.8394, "mean_abs_error": 326.5888227251326, "mean_abs_error_last_10": 42.16130551944908, "mean_abs_error_last_25": 94.61210811341638, "mean_abs_error_last_50": 174.656088388492, "mean_pred_prob": 0.030300757521763443, "mean_pred_prob_last_10": 0.1682271510362625, "mean_pred_prob_last_25": 0.09014905244112015, "mean_pred_prob_last_50": 0.05288312891498208, "mean_token_accuracy": 0.8755519390106201, "step": 18610 }, { "epoch": 0.3310045686452278, "grad_norm": 1.0041482343906072, "learning_rate": 0.0001, "loss": 0.8092, "mean_abs_error": 481.7989123154442, "mean_abs_error_last_10": 90.27738274316539, "mean_abs_error_last_25": 121.58143257315048, "mean_abs_error_last_50": 221.8441741563948, "mean_pred_prob": 0.0340229501365684, "mean_pred_prob_last_10": 0.16276951427571476, "mean_pred_prob_last_25": 0.09193083131685853, "mean_pred_prob_last_50": 0.0570507348049432, "mean_token_accuracy": 0.8773792386054993, "step": 18620 }, { "epoch": 0.33118233694202975, "grad_norm": 0.8813199611650245, "learning_rate": 0.0001, "loss": 0.7879, "mean_abs_error": 1218.7529986724123, "mean_abs_error_last_10": 647.372849229563, "mean_abs_error_last_25": 746.0516499719248, "mean_abs_error_last_50": 870.9008065752334, "mean_pred_prob": 0.04303925128333504, "mean_pred_prob_last_10": 0.2168667523568729, "mean_pred_prob_last_25": 0.1199086051841732, "mean_pred_prob_last_50": 0.07295448654331267, "mean_token_accuracy": 0.8652234137058258, "step": 18630 }, { "epoch": 0.3313601052388317, "grad_norm": 1.8157067131097862, "learning_rate": 0.0001, "loss": 0.9086, "mean_abs_error": 409.2475425052263, "mean_abs_error_last_10": 29.412177370056536, "mean_abs_error_last_25": 150.10042448973297, "mean_abs_error_last_50": 274.8722625365431, "mean_pred_prob": 0.0504309335257858, "mean_pred_prob_last_10": 0.24057886153459548, "mean_pred_prob_last_25": 0.1377820611000061, "mean_pred_prob_last_50": 0.08426203923299909, "mean_token_accuracy": 0.8688515961170197, "step": 18640 }, { "epoch": 0.33153787353563363, "grad_norm": 1.8758581662289726, "learning_rate": 0.0001, "loss": 0.7591, "mean_abs_error": 406.4326548479052, "mean_abs_error_last_10": 209.7401232498981, "mean_abs_error_last_25": 230.02566575422628, "mean_abs_error_last_50": 248.51858643032637, "mean_pred_prob": 0.030940111738163977, "mean_pred_prob_last_10": 0.16739771292777733, "mean_pred_prob_last_25": 0.08839829955250025, "mean_pred_prob_last_50": 0.05285218695644289, "mean_token_accuracy": 0.8773141205310822, "step": 18650 }, { "epoch": 0.3317156418324356, "grad_norm": 1.1674989869608678, "learning_rate": 0.0001, "loss": 0.8131, "mean_abs_error": 483.6362088759741, "mean_abs_error_last_10": 115.37511349567323, "mean_abs_error_last_25": 205.02841441150062, "mean_abs_error_last_50": 322.2976921168518, "mean_pred_prob": 0.036240383010590446, "mean_pred_prob_last_10": 0.18424006893765182, "mean_pred_prob_last_25": 0.09999196211574599, "mean_pred_prob_last_50": 0.061031876463675874, "mean_token_accuracy": 0.8794430613517761, "step": 18660 }, { "epoch": 0.33189341012923756, "grad_norm": 1.2721227216801918, "learning_rate": 0.0001, "loss": 0.8364, "mean_abs_error": 589.94309993199, "mean_abs_error_last_10": 123.77268446433341, "mean_abs_error_last_25": 278.5878911524212, "mean_abs_error_last_50": 421.14647325952666, "mean_pred_prob": 0.03447618497884832, "mean_pred_prob_last_10": 0.17475206074304878, "mean_pred_prob_last_25": 0.0967979741981253, "mean_pred_prob_last_50": 0.05863655796856619, "mean_token_accuracy": 0.8746762216091156, "step": 18670 }, { "epoch": 0.3320711784260395, "grad_norm": 0.9634238200374777, "learning_rate": 0.0001, "loss": 0.7424, "mean_abs_error": 196.06604702710746, "mean_abs_error_last_10": 55.92100281238341, "mean_abs_error_last_25": 78.01633106274637, "mean_abs_error_last_50": 115.14057969226283, "mean_pred_prob": 0.04469002531841397, "mean_pred_prob_last_10": 0.2285363208502531, "mean_pred_prob_last_25": 0.12658938784152268, "mean_pred_prob_last_50": 0.07555224150419235, "mean_token_accuracy": 0.884108978509903, "step": 18680 }, { "epoch": 0.33224894672284144, "grad_norm": 2.5466764864449374, "learning_rate": 0.0001, "loss": 0.8009, "mean_abs_error": 767.7175131439692, "mean_abs_error_last_10": 206.9268197360565, "mean_abs_error_last_25": 226.15036258094295, "mean_abs_error_last_50": 382.78219116538986, "mean_pred_prob": 0.026837825652910395, "mean_pred_prob_last_10": 0.12914552607107907, "mean_pred_prob_last_25": 0.07383736647898331, "mean_pred_prob_last_50": 0.04539728492964059, "mean_token_accuracy": 0.8734067738056183, "step": 18690 }, { "epoch": 0.3324267150196434, "grad_norm": 1.1582541563266233, "learning_rate": 0.0001, "loss": 0.9508, "mean_abs_error": 1073.9895544073092, "mean_abs_error_last_10": 620.9071989403149, "mean_abs_error_last_25": 708.2219354560668, "mean_abs_error_last_50": 826.9213490216947, "mean_pred_prob": 0.03963486780776293, "mean_pred_prob_last_10": 0.1991481154662324, "mean_pred_prob_last_25": 0.11090007830644026, "mean_pred_prob_last_50": 0.06786989071260904, "mean_token_accuracy": 0.8688885569572449, "step": 18700 }, { "epoch": 0.3326044833164453, "grad_norm": 1.276976098085854, "learning_rate": 0.0001, "loss": 0.8308, "mean_abs_error": 325.0162810580708, "mean_abs_error_last_10": 66.70309136687578, "mean_abs_error_last_25": 89.84447792478142, "mean_abs_error_last_50": 156.3783706539271, "mean_pred_prob": 0.039248327165842055, "mean_pred_prob_last_10": 0.20473566465079784, "mean_pred_prob_last_25": 0.11243942473083735, "mean_pred_prob_last_50": 0.06733413366600871, "mean_token_accuracy": 0.8766953766345977, "step": 18710 }, { "epoch": 0.3327822516132473, "grad_norm": 1.433945234560548, "learning_rate": 0.0001, "loss": 0.8987, "mean_abs_error": 885.4834415858265, "mean_abs_error_last_10": 506.65827442248127, "mean_abs_error_last_25": 569.3203038484298, "mean_abs_error_last_50": 630.680407505331, "mean_pred_prob": 0.019822400389239193, "mean_pred_prob_last_10": 0.09394922347273678, "mean_pred_prob_last_25": 0.05448156115598977, "mean_pred_prob_last_50": 0.033314400375820695, "mean_token_accuracy": 0.8669476866722107, "step": 18720 }, { "epoch": 0.33296001991004925, "grad_norm": 1.05856852982901, "learning_rate": 0.0001, "loss": 0.8905, "mean_abs_error": 932.5366946519098, "mean_abs_error_last_10": 441.69374455915784, "mean_abs_error_last_25": 553.5625646864536, "mean_abs_error_last_50": 625.1009055630951, "mean_pred_prob": 0.01088567201513797, "mean_pred_prob_last_10": 0.06706480588763952, "mean_pred_prob_last_25": 0.03259131945669651, "mean_pred_prob_last_50": 0.01903510936535895, "mean_token_accuracy": 0.8670451164245605, "step": 18730 }, { "epoch": 0.3331377882068512, "grad_norm": 0.9567694101444102, "learning_rate": 0.0001, "loss": 0.7457, "mean_abs_error": 145.95546921423625, "mean_abs_error_last_10": 71.09202057244153, "mean_abs_error_last_25": 78.97923226166193, "mean_abs_error_last_50": 93.8583637123384, "mean_pred_prob": 0.0505803216714412, "mean_pred_prob_last_10": 0.25446309596300126, "mean_pred_prob_last_25": 0.14292063629254698, "mean_pred_prob_last_50": 0.08689959496259689, "mean_token_accuracy": 0.8756300330162048, "step": 18740 }, { "epoch": 0.33331555650365313, "grad_norm": 2.6536827659030675, "learning_rate": 0.0001, "loss": 0.8219, "mean_abs_error": 478.9775855078212, "mean_abs_error_last_10": 84.3212039971329, "mean_abs_error_last_25": 171.0609300919931, "mean_abs_error_last_50": 291.3147377457723, "mean_pred_prob": 0.024800535198301076, "mean_pred_prob_last_10": 0.1445433009415865, "mean_pred_prob_last_25": 0.07447862792760133, "mean_pred_prob_last_50": 0.04357613725587726, "mean_token_accuracy": 0.8845582008361816, "step": 18750 }, { "epoch": 0.33349332480045507, "grad_norm": 1.2205032924195338, "learning_rate": 0.0001, "loss": 0.9262, "mean_abs_error": 148.1819918104207, "mean_abs_error_last_10": 45.90215810068385, "mean_abs_error_last_25": 63.465266555643176, "mean_abs_error_last_50": 97.68290839990101, "mean_pred_prob": 0.03319445122033358, "mean_pred_prob_last_10": 0.17342077605426312, "mean_pred_prob_last_25": 0.09543259404599666, "mean_pred_prob_last_50": 0.056475557200610635, "mean_token_accuracy": 0.8800024032592774, "step": 18760 }, { "epoch": 0.333671093097257, "grad_norm": 1.7204053091503928, "learning_rate": 0.0001, "loss": 0.7355, "mean_abs_error": 179.66225255222443, "mean_abs_error_last_10": 88.39679711205159, "mean_abs_error_last_25": 102.58121832574781, "mean_abs_error_last_50": 128.6429944518577, "mean_pred_prob": 0.0639046428608708, "mean_pred_prob_last_10": 0.28079352881759406, "mean_pred_prob_last_25": 0.16711463500978424, "mean_pred_prob_last_50": 0.10623506978154182, "mean_token_accuracy": 0.8714821577072144, "step": 18770 }, { "epoch": 0.333848861394059, "grad_norm": 1.6335045826010879, "learning_rate": 0.0001, "loss": 0.8605, "mean_abs_error": 455.02202868139983, "mean_abs_error_last_10": 105.14353579871575, "mean_abs_error_last_25": 120.52324724596653, "mean_abs_error_last_50": 241.65239981573419, "mean_pred_prob": 0.030714378162520005, "mean_pred_prob_last_10": 0.16424842313863336, "mean_pred_prob_last_25": 0.09287388131488114, "mean_pred_prob_last_50": 0.053919819695875046, "mean_token_accuracy": 0.8691600263118744, "step": 18780 }, { "epoch": 0.33402662969086094, "grad_norm": 1.0342952085332102, "learning_rate": 0.0001, "loss": 0.7553, "mean_abs_error": 216.61428607319, "mean_abs_error_last_10": 119.61990864924796, "mean_abs_error_last_25": 113.51374489729658, "mean_abs_error_last_50": 164.58099317909418, "mean_pred_prob": 0.037755302619189024, "mean_pred_prob_last_10": 0.19546212535351515, "mean_pred_prob_last_25": 0.10799449533224106, "mean_pred_prob_last_50": 0.06387549010105431, "mean_token_accuracy": 0.8745408236980439, "step": 18790 }, { "epoch": 0.3342043979876629, "grad_norm": 1.1664407585316152, "learning_rate": 0.0001, "loss": 0.7323, "mean_abs_error": 1309.7109668459357, "mean_abs_error_last_10": 953.4030979367413, "mean_abs_error_last_25": 1008.3385218231842, "mean_abs_error_last_50": 1068.9350785323925, "mean_pred_prob": 0.021335294857999544, "mean_pred_prob_last_10": 0.11969922811767901, "mean_pred_prob_last_25": 0.06131674098287476, "mean_pred_prob_last_50": 0.03658369092445355, "mean_token_accuracy": 0.8781621813774109, "step": 18800 }, { "epoch": 0.3343821662844648, "grad_norm": 1.0017562970533698, "learning_rate": 0.0001, "loss": 0.7709, "mean_abs_error": 164.66647599875162, "mean_abs_error_last_10": 38.22370667861966, "mean_abs_error_last_25": 71.62143676961684, "mean_abs_error_last_50": 121.49559295596919, "mean_pred_prob": 0.050243648467585444, "mean_pred_prob_last_10": 0.24219829700887202, "mean_pred_prob_last_25": 0.13684084359556437, "mean_pred_prob_last_50": 0.08339278288185596, "mean_token_accuracy": 0.8722711324691772, "step": 18810 }, { "epoch": 0.33455993458126676, "grad_norm": 1.5344096704377514, "learning_rate": 0.0001, "loss": 0.7533, "mean_abs_error": 211.69193998879345, "mean_abs_error_last_10": 54.86453780244602, "mean_abs_error_last_25": 72.85834908988599, "mean_abs_error_last_50": 108.12386461166616, "mean_pred_prob": 0.041993035236373544, "mean_pred_prob_last_10": 0.20326083786785604, "mean_pred_prob_last_25": 0.11568291112780571, "mean_pred_prob_last_50": 0.07125315060839058, "mean_token_accuracy": 0.8763066291809082, "step": 18820 }, { "epoch": 0.3347377028780687, "grad_norm": 1.8436431525453179, "learning_rate": 0.0001, "loss": 0.8501, "mean_abs_error": 304.646161847446, "mean_abs_error_last_10": 53.617421617787045, "mean_abs_error_last_25": 169.1191523938335, "mean_abs_error_last_50": 210.30903720501541, "mean_pred_prob": 0.039796673925593495, "mean_pred_prob_last_10": 0.20506888646632432, "mean_pred_prob_last_25": 0.11045397026464343, "mean_pred_prob_last_50": 0.06779924682341516, "mean_token_accuracy": 0.8758321046829224, "step": 18830 }, { "epoch": 0.3349154711748707, "grad_norm": 1.4017295185031573, "learning_rate": 0.0001, "loss": 0.7941, "mean_abs_error": 395.9890278513287, "mean_abs_error_last_10": 317.61599640510155, "mean_abs_error_last_25": 389.99307322374796, "mean_abs_error_last_50": 368.75333839023017, "mean_pred_prob": 0.0338026309851557, "mean_pred_prob_last_10": 0.16059159906581044, "mean_pred_prob_last_25": 0.09387416038662195, "mean_pred_prob_last_50": 0.05717314116191119, "mean_token_accuracy": 0.8838113903999328, "step": 18840 }, { "epoch": 0.33509323947167263, "grad_norm": 1.8776788880417066, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 150.23836695816925, "mean_abs_error_last_10": 55.75645233347127, "mean_abs_error_last_25": 68.64258339523772, "mean_abs_error_last_50": 90.13829394272844, "mean_pred_prob": 0.039283493813127277, "mean_pred_prob_last_10": 0.19398999065160752, "mean_pred_prob_last_25": 0.10757679603993893, "mean_pred_prob_last_50": 0.06609913464635611, "mean_token_accuracy": 0.8781963646411896, "step": 18850 }, { "epoch": 0.33527100776847457, "grad_norm": 0.7993072174203081, "learning_rate": 0.0001, "loss": 0.8655, "mean_abs_error": 777.1651744263141, "mean_abs_error_last_10": 123.49009705549445, "mean_abs_error_last_25": 201.1292902462638, "mean_abs_error_last_50": 393.6580840608327, "mean_pred_prob": 0.021926904236897825, "mean_pred_prob_last_10": 0.11109475998673587, "mean_pred_prob_last_25": 0.06082462056074291, "mean_pred_prob_last_50": 0.03565782653167844, "mean_token_accuracy": 0.8723459899425506, "step": 18860 }, { "epoch": 0.3354487760652765, "grad_norm": 1.6005497471883627, "learning_rate": 0.0001, "loss": 0.8584, "mean_abs_error": 384.56578740053374, "mean_abs_error_last_10": 77.05231173246025, "mean_abs_error_last_25": 125.84510091844822, "mean_abs_error_last_50": 187.96832715715252, "mean_pred_prob": 0.02444009790197015, "mean_pred_prob_last_10": 0.1404099442064762, "mean_pred_prob_last_25": 0.07133517023175955, "mean_pred_prob_last_50": 0.041848906222730874, "mean_token_accuracy": 0.8712727904319764, "step": 18870 }, { "epoch": 0.33562654436207845, "grad_norm": 1.3121122508785907, "learning_rate": 0.0001, "loss": 0.7394, "mean_abs_error": 498.31218204652896, "mean_abs_error_last_10": 181.38088547043915, "mean_abs_error_last_25": 207.75781070861632, "mean_abs_error_last_50": 333.7247889036074, "mean_pred_prob": 0.03529544236371294, "mean_pred_prob_last_10": 0.17488250695168972, "mean_pred_prob_last_25": 0.09889924642629921, "mean_pred_prob_last_50": 0.060696720122359694, "mean_token_accuracy": 0.8770423591136932, "step": 18880 }, { "epoch": 0.3358043126588804, "grad_norm": 1.5862318171219871, "learning_rate": 0.0001, "loss": 0.8165, "mean_abs_error": 350.49223356556115, "mean_abs_error_last_10": 92.73955760329011, "mean_abs_error_last_25": 103.5978877100368, "mean_abs_error_last_50": 163.86302390710105, "mean_pred_prob": 0.03866209569387138, "mean_pred_prob_last_10": 0.18455955814570188, "mean_pred_prob_last_25": 0.10569262537173926, "mean_pred_prob_last_50": 0.06525035738013685, "mean_token_accuracy": 0.8718003571033478, "step": 18890 }, { "epoch": 0.3359820809556824, "grad_norm": 2.406442889567447, "learning_rate": 0.0001, "loss": 0.8362, "mean_abs_error": 1038.7732723901227, "mean_abs_error_last_10": 255.55106674594018, "mean_abs_error_last_25": 349.33909289529777, "mean_abs_error_last_50": 660.1836803795302, "mean_pred_prob": 0.02553876050224062, "mean_pred_prob_last_10": 0.13607101038796826, "mean_pred_prob_last_25": 0.07332047898089514, "mean_pred_prob_last_50": 0.043390571110649034, "mean_token_accuracy": 0.8708150863647461, "step": 18900 }, { "epoch": 0.3361598492524843, "grad_norm": 2.182691496770027, "learning_rate": 0.0001, "loss": 0.833, "mean_abs_error": 459.70662749147823, "mean_abs_error_last_10": 127.13208975861376, "mean_abs_error_last_25": 212.23685355374488, "mean_abs_error_last_50": 327.69846871134143, "mean_pred_prob": 0.03307981246616691, "mean_pred_prob_last_10": 0.15817802995443345, "mean_pred_prob_last_25": 0.08918333789333702, "mean_pred_prob_last_50": 0.0553124578204006, "mean_token_accuracy": 0.8717093586921691, "step": 18910 }, { "epoch": 0.33633761754928626, "grad_norm": 1.8635028452204556, "learning_rate": 0.0001, "loss": 0.7699, "mean_abs_error": 622.8562853154825, "mean_abs_error_last_10": 224.55196667100049, "mean_abs_error_last_25": 303.4396631449114, "mean_abs_error_last_50": 451.99636956536534, "mean_pred_prob": 0.041699137014802545, "mean_pred_prob_last_10": 0.2085760237649083, "mean_pred_prob_last_25": 0.11373098031035625, "mean_pred_prob_last_50": 0.06954877943499013, "mean_token_accuracy": 0.8723546743392945, "step": 18920 }, { "epoch": 0.3365153858460882, "grad_norm": 2.164668307059103, "learning_rate": 0.0001, "loss": 0.7637, "mean_abs_error": 523.4995006841351, "mean_abs_error_last_10": 273.70326766098844, "mean_abs_error_last_25": 280.2523515204403, "mean_abs_error_last_50": 355.5714796283934, "mean_pred_prob": 0.03397686444222927, "mean_pred_prob_last_10": 0.16657390783075243, "mean_pred_prob_last_25": 0.09584509078413248, "mean_pred_prob_last_50": 0.057837102125631645, "mean_token_accuracy": 0.8709526598453522, "step": 18930 }, { "epoch": 0.33669315414289014, "grad_norm": 4.189952302954235, "learning_rate": 0.0001, "loss": 0.8857, "mean_abs_error": 224.4355843491391, "mean_abs_error_last_10": 97.75530798671082, "mean_abs_error_last_25": 108.19295529639226, "mean_abs_error_last_50": 123.3189636565852, "mean_pred_prob": 0.052851582178846, "mean_pred_prob_last_10": 0.26594656424131247, "mean_pred_prob_last_25": 0.14436592808924614, "mean_pred_prob_last_50": 0.08876051810802892, "mean_token_accuracy": 0.8765715062618256, "step": 18940 }, { "epoch": 0.3368709224396921, "grad_norm": 1.1239087241890378, "learning_rate": 0.0001, "loss": 0.8013, "mean_abs_error": 133.1625980684466, "mean_abs_error_last_10": 48.052326771385324, "mean_abs_error_last_25": 53.97491099280952, "mean_abs_error_last_50": 72.77747856699435, "mean_pred_prob": 0.04919148553162813, "mean_pred_prob_last_10": 0.23836033903062342, "mean_pred_prob_last_25": 0.13336877059191465, "mean_pred_prob_last_50": 0.08264417639002204, "mean_token_accuracy": 0.8880542337894439, "step": 18950 }, { "epoch": 0.33704869073649407, "grad_norm": 1.658398302223944, "learning_rate": 0.0001, "loss": 0.9069, "mean_abs_error": 1083.1016029646128, "mean_abs_error_last_10": 515.3851673239722, "mean_abs_error_last_25": 558.8453981781223, "mean_abs_error_last_50": 747.019149085184, "mean_pred_prob": 0.01825024001300335, "mean_pred_prob_last_10": 0.09900743567850441, "mean_pred_prob_last_25": 0.051401373330736536, "mean_pred_prob_last_50": 0.031233024448738433, "mean_token_accuracy": 0.8635547816753387, "step": 18960 }, { "epoch": 0.337226459033296, "grad_norm": 1.2531124148528756, "learning_rate": 0.0001, "loss": 0.7899, "mean_abs_error": 249.6622380933301, "mean_abs_error_last_10": 91.14084109894263, "mean_abs_error_last_25": 112.42148024800392, "mean_abs_error_last_50": 206.1982787460086, "mean_pred_prob": 0.048887722403742374, "mean_pred_prob_last_10": 0.23645736817270518, "mean_pred_prob_last_25": 0.13576405411586165, "mean_pred_prob_last_50": 0.08266162672080099, "mean_token_accuracy": 0.8736804485321045, "step": 18970 }, { "epoch": 0.33740422733009795, "grad_norm": 1.2502833867767456, "learning_rate": 0.0001, "loss": 0.713, "mean_abs_error": 463.3652526339195, "mean_abs_error_last_10": 176.80127913315613, "mean_abs_error_last_25": 215.46435961861735, "mean_abs_error_last_50": 287.42967191096693, "mean_pred_prob": 0.025095845758914947, "mean_pred_prob_last_10": 0.13043982107192278, "mean_pred_prob_last_25": 0.07158436449244618, "mean_pred_prob_last_50": 0.04280700013041496, "mean_token_accuracy": 0.8906546473503113, "step": 18980 }, { "epoch": 0.3375819956268999, "grad_norm": 3.209540718854706, "learning_rate": 0.0001, "loss": 0.8305, "mean_abs_error": 367.5933166231227, "mean_abs_error_last_10": 154.76701165043445, "mean_abs_error_last_25": 197.23149370692852, "mean_abs_error_last_50": 284.74681599579765, "mean_pred_prob": 0.04113089230377227, "mean_pred_prob_last_10": 0.20201239585876465, "mean_pred_prob_last_25": 0.10909438068047166, "mean_pred_prob_last_50": 0.06730891577899456, "mean_token_accuracy": 0.8659824967384339, "step": 18990 }, { "epoch": 0.33775976392370183, "grad_norm": 2.64993569233286, "learning_rate": 0.0001, "loss": 0.7722, "mean_abs_error": 541.6469064307104, "mean_abs_error_last_10": 232.62961151217468, "mean_abs_error_last_25": 301.28403887415345, "mean_abs_error_last_50": 358.8092860164017, "mean_pred_prob": 0.036942539020674306, "mean_pred_prob_last_10": 0.18021889424417167, "mean_pred_prob_last_25": 0.10111862427438609, "mean_pred_prob_last_50": 0.062429829477332534, "mean_token_accuracy": 0.8629999816417694, "step": 19000 }, { "epoch": 0.3379375322205038, "grad_norm": 1.6772724467494828, "learning_rate": 0.0001, "loss": 0.7727, "mean_abs_error": 282.46310586430934, "mean_abs_error_last_10": 46.488128300720234, "mean_abs_error_last_25": 62.667735476210495, "mean_abs_error_last_50": 112.3360639957325, "mean_pred_prob": 0.0419644673820585, "mean_pred_prob_last_10": 0.2123332254588604, "mean_pred_prob_last_25": 0.12051563365384936, "mean_pred_prob_last_50": 0.07195346849039197, "mean_token_accuracy": 0.8773443043231964, "step": 19010 }, { "epoch": 0.33811530051730576, "grad_norm": 1.20655584274432, "learning_rate": 0.0001, "loss": 0.9065, "mean_abs_error": 676.8230773934745, "mean_abs_error_last_10": 311.6624104923895, "mean_abs_error_last_25": 357.21223672908536, "mean_abs_error_last_50": 462.34353450805895, "mean_pred_prob": 0.02570545784838032, "mean_pred_prob_last_10": 0.14122123692068272, "mean_pred_prob_last_25": 0.07624213846283964, "mean_pred_prob_last_50": 0.04401717369037215, "mean_token_accuracy": 0.8727153897285461, "step": 19020 }, { "epoch": 0.3382930688141077, "grad_norm": 3.9416179767025756, "learning_rate": 0.0001, "loss": 0.7956, "mean_abs_error": 696.0446988664872, "mean_abs_error_last_10": 224.85839657957294, "mean_abs_error_last_25": 290.00070176512384, "mean_abs_error_last_50": 407.38482453208843, "mean_pred_prob": 0.015170322731137276, "mean_pred_prob_last_10": 0.08624171253759413, "mean_pred_prob_last_25": 0.04390843546716496, "mean_pred_prob_last_50": 0.026064149144804104, "mean_token_accuracy": 0.8805941343307495, "step": 19030 }, { "epoch": 0.33847083711090964, "grad_norm": 1.5870635704989013, "learning_rate": 0.0001, "loss": 0.8954, "mean_abs_error": 297.8031634585406, "mean_abs_error_last_10": 179.989190682195, "mean_abs_error_last_25": 161.20566754773637, "mean_abs_error_last_50": 185.0065564144561, "mean_pred_prob": 0.030593648075591773, "mean_pred_prob_last_10": 0.156383161037229, "mean_pred_prob_last_25": 0.08593764214310795, "mean_pred_prob_last_50": 0.05156342318514362, "mean_token_accuracy": 0.8710981607437134, "step": 19040 }, { "epoch": 0.3386486054077116, "grad_norm": 1.0916743496363726, "learning_rate": 0.0001, "loss": 0.83, "mean_abs_error": 454.05551903301875, "mean_abs_error_last_10": 245.47014974260583, "mean_abs_error_last_25": 300.89683772625773, "mean_abs_error_last_50": 350.51500365847164, "mean_pred_prob": 0.029250521771609785, "mean_pred_prob_last_10": 0.14246948882937432, "mean_pred_prob_last_25": 0.07944637406617402, "mean_pred_prob_last_50": 0.04918036540038884, "mean_token_accuracy": 0.8742889046669007, "step": 19050 }, { "epoch": 0.3388263737045135, "grad_norm": 1.2759906423106806, "learning_rate": 0.0001, "loss": 0.8489, "mean_abs_error": 474.67498785618784, "mean_abs_error_last_10": 186.92639817088224, "mean_abs_error_last_25": 268.919074892489, "mean_abs_error_last_50": 426.8472774293547, "mean_pred_prob": 0.03504406183492392, "mean_pred_prob_last_10": 0.18855799213051797, "mean_pred_prob_last_25": 0.10227532098069787, "mean_pred_prob_last_50": 0.059636954125016926, "mean_token_accuracy": 0.858293890953064, "step": 19060 }, { "epoch": 0.3390041420013155, "grad_norm": 0.9107889539858214, "learning_rate": 0.0001, "loss": 0.7689, "mean_abs_error": 119.68613025675324, "mean_abs_error_last_10": 27.766997201952933, "mean_abs_error_last_25": 43.630845929361925, "mean_abs_error_last_50": 53.63237247840393, "mean_pred_prob": 0.0518961563706398, "mean_pred_prob_last_10": 0.2680643979460001, "mean_pred_prob_last_25": 0.14643009155988693, "mean_pred_prob_last_50": 0.08895630333572627, "mean_token_accuracy": 0.8773150384426117, "step": 19070 }, { "epoch": 0.33918191029811745, "grad_norm": 2.604124045859223, "learning_rate": 0.0001, "loss": 0.8838, "mean_abs_error": 1717.4901283940258, "mean_abs_error_last_10": 817.8001103555275, "mean_abs_error_last_25": 960.6342692160099, "mean_abs_error_last_50": 1176.9469434892355, "mean_pred_prob": 0.015666168222378474, "mean_pred_prob_last_10": 0.07811115944932681, "mean_pred_prob_last_25": 0.04458512029668782, "mean_pred_prob_last_50": 0.026350114337401465, "mean_token_accuracy": 0.8586983621120453, "step": 19080 }, { "epoch": 0.3393596785949194, "grad_norm": 1.4659932198790178, "learning_rate": 0.0001, "loss": 0.7745, "mean_abs_error": 401.02044134662896, "mean_abs_error_last_10": 147.6715347228799, "mean_abs_error_last_25": 252.09101236055463, "mean_abs_error_last_50": 304.8177294041414, "mean_pred_prob": 0.03499156408943236, "mean_pred_prob_last_10": 0.18031570557504892, "mean_pred_prob_last_25": 0.09818110922351479, "mean_pred_prob_last_50": 0.059245721297338606, "mean_token_accuracy": 0.8748675465583802, "step": 19090 }, { "epoch": 0.33953744689172133, "grad_norm": 2.1269117152010715, "learning_rate": 0.0001, "loss": 0.7504, "mean_abs_error": 422.31796458395786, "mean_abs_error_last_10": 141.66238989833408, "mean_abs_error_last_25": 132.88859673896172, "mean_abs_error_last_50": 196.53480066782078, "mean_pred_prob": 0.03146009456831962, "mean_pred_prob_last_10": 0.15733129829168319, "mean_pred_prob_last_25": 0.08854143358767033, "mean_pred_prob_last_50": 0.053973091999068856, "mean_token_accuracy": 0.8815648019313812, "step": 19100 }, { "epoch": 0.33971521518852327, "grad_norm": 1.7629259548847025, "learning_rate": 0.0001, "loss": 0.7973, "mean_abs_error": 184.60584955120748, "mean_abs_error_last_10": 52.21441499941452, "mean_abs_error_last_25": 122.29640893641688, "mean_abs_error_last_50": 158.2817434790407, "mean_pred_prob": 0.03619007761590183, "mean_pred_prob_last_10": 0.18736298717558383, "mean_pred_prob_last_25": 0.1020710825920105, "mean_pred_prob_last_50": 0.061058012302964926, "mean_token_accuracy": 0.868034154176712, "step": 19110 }, { "epoch": 0.3398929834853252, "grad_norm": 2.8889849979763205, "learning_rate": 0.0001, "loss": 0.7732, "mean_abs_error": 318.58993474895505, "mean_abs_error_last_10": 125.36078828032642, "mean_abs_error_last_25": 130.0520396369633, "mean_abs_error_last_50": 171.46645918149247, "mean_pred_prob": 0.03406032426282764, "mean_pred_prob_last_10": 0.1717142377048731, "mean_pred_prob_last_25": 0.09373455187305808, "mean_pred_prob_last_50": 0.05728543843142688, "mean_token_accuracy": 0.8701669812202454, "step": 19120 }, { "epoch": 0.3400707517821272, "grad_norm": 1.4210244162059438, "learning_rate": 0.0001, "loss": 0.8814, "mean_abs_error": 409.50264338147207, "mean_abs_error_last_10": 160.75520977545906, "mean_abs_error_last_25": 226.92387463360942, "mean_abs_error_last_50": 312.39084734627534, "mean_pred_prob": 0.03087518973625265, "mean_pred_prob_last_10": 0.1619942829653155, "mean_pred_prob_last_25": 0.08534698613220826, "mean_pred_prob_last_50": 0.05126271895715036, "mean_token_accuracy": 0.8762723803520203, "step": 19130 }, { "epoch": 0.34024852007892914, "grad_norm": 1.7580187156287799, "learning_rate": 0.0001, "loss": 1.0103, "mean_abs_error": 595.0543680854266, "mean_abs_error_last_10": 315.09490699608676, "mean_abs_error_last_25": 372.89738331635834, "mean_abs_error_last_50": 411.041274071911, "mean_pred_prob": 0.03235837361426093, "mean_pred_prob_last_10": 0.15312631393317133, "mean_pred_prob_last_25": 0.08763839243911206, "mean_pred_prob_last_50": 0.05355238829506561, "mean_token_accuracy": 0.8566324949264527, "step": 19140 }, { "epoch": 0.3404262883757311, "grad_norm": 1.0811935573916285, "learning_rate": 0.0001, "loss": 0.852, "mean_abs_error": 243.10124403065365, "mean_abs_error_last_10": 90.81875044528803, "mean_abs_error_last_25": 110.20568061863597, "mean_abs_error_last_50": 135.27273947919576, "mean_pred_prob": 0.03290743688121438, "mean_pred_prob_last_10": 0.16156349331140518, "mean_pred_prob_last_25": 0.0896424489095807, "mean_pred_prob_last_50": 0.05606019608676434, "mean_token_accuracy": 0.8733970880508423, "step": 19150 }, { "epoch": 0.340604056672533, "grad_norm": 2.105975993361351, "learning_rate": 0.0001, "loss": 0.8985, "mean_abs_error": 395.9938889352445, "mean_abs_error_last_10": 87.12705681020188, "mean_abs_error_last_25": 105.15842513005016, "mean_abs_error_last_50": 208.3264390526565, "mean_pred_prob": 0.036229472229024395, "mean_pred_prob_last_10": 0.18000861462205647, "mean_pred_prob_last_25": 0.1016680542845279, "mean_pred_prob_last_50": 0.06057538357563317, "mean_token_accuracy": 0.8746414899826049, "step": 19160 }, { "epoch": 0.34078182496933496, "grad_norm": 1.288879209303499, "learning_rate": 0.0001, "loss": 0.8449, "mean_abs_error": 1210.9887428432935, "mean_abs_error_last_10": 768.2047205911516, "mean_abs_error_last_25": 809.0559379083604, "mean_abs_error_last_50": 951.3507471040518, "mean_pred_prob": 0.03238262714148732, "mean_pred_prob_last_10": 0.16170312721806113, "mean_pred_prob_last_25": 0.08912267626146786, "mean_pred_prob_last_50": 0.05412948281591525, "mean_token_accuracy": 0.8658572912216187, "step": 19170 }, { "epoch": 0.3409595932661369, "grad_norm": 1.074462722432834, "learning_rate": 0.0001, "loss": 0.7941, "mean_abs_error": 596.1965073221486, "mean_abs_error_last_10": 255.74339762239947, "mean_abs_error_last_25": 349.72740507943195, "mean_abs_error_last_50": 417.1611023706344, "mean_pred_prob": 0.032160817756084724, "mean_pred_prob_last_10": 0.1610266154515557, "mean_pred_prob_last_25": 0.08951776300091296, "mean_pred_prob_last_50": 0.054390139656607064, "mean_token_accuracy": 0.8722610831260681, "step": 19180 }, { "epoch": 0.3411373615629389, "grad_norm": 1.085219371363448, "learning_rate": 0.0001, "loss": 1.085, "mean_abs_error": 397.18419398644164, "mean_abs_error_last_10": 120.63654653407393, "mean_abs_error_last_25": 139.6558483874453, "mean_abs_error_last_50": 209.8412841546251, "mean_pred_prob": 0.036174070608103646, "mean_pred_prob_last_10": 0.187045154965017, "mean_pred_prob_last_25": 0.1005927974358201, "mean_pred_prob_last_50": 0.06074723927304149, "mean_token_accuracy": 0.8718560934066772, "step": 19190 }, { "epoch": 0.34131512985974083, "grad_norm": 1.8298429507570957, "learning_rate": 0.0001, "loss": 0.7998, "mean_abs_error": 281.92609824806175, "mean_abs_error_last_10": 316.2127605039458, "mean_abs_error_last_25": 304.06472798817856, "mean_abs_error_last_50": 288.1367755000704, "mean_pred_prob": 0.0417409154237248, "mean_pred_prob_last_10": 0.20112620142754167, "mean_pred_prob_last_25": 0.11196671708021314, "mean_pred_prob_last_50": 0.06969555970281363, "mean_token_accuracy": 0.863196974992752, "step": 19200 }, { "epoch": 0.34149289815654277, "grad_norm": 1.9971790178556141, "learning_rate": 0.0001, "loss": 0.795, "mean_abs_error": 965.1293308700702, "mean_abs_error_last_10": 549.2232605505201, "mean_abs_error_last_25": 649.1909982986381, "mean_abs_error_last_50": 762.3781245102907, "mean_pred_prob": 0.04838238663796801, "mean_pred_prob_last_10": 0.22136428222584073, "mean_pred_prob_last_25": 0.12480494044575607, "mean_pred_prob_last_50": 0.07920303535356651, "mean_token_accuracy": 0.8689371466636657, "step": 19210 }, { "epoch": 0.3416706664533447, "grad_norm": 1.356908923641322, "learning_rate": 0.0001, "loss": 0.7752, "mean_abs_error": 717.3197139025574, "mean_abs_error_last_10": 252.9150319571504, "mean_abs_error_last_25": 329.1246779560391, "mean_abs_error_last_50": 448.23260268203956, "mean_pred_prob": 0.03370184863451868, "mean_pred_prob_last_10": 0.1681459655694198, "mean_pred_prob_last_25": 0.09245023862458765, "mean_pred_prob_last_50": 0.05645727085066028, "mean_token_accuracy": 0.8782653689384461, "step": 19220 }, { "epoch": 0.34184843475014665, "grad_norm": 1.3183759271547353, "learning_rate": 0.0001, "loss": 0.7877, "mean_abs_error": 1049.4431470685247, "mean_abs_error_last_10": 577.5685193293568, "mean_abs_error_last_25": 669.6259583837489, "mean_abs_error_last_50": 752.8114900596507, "mean_pred_prob": 0.031450379102898295, "mean_pred_prob_last_10": 0.13784883505722972, "mean_pred_prob_last_25": 0.08180377851240336, "mean_pred_prob_last_50": 0.0520235704141669, "mean_token_accuracy": 0.8709002733230591, "step": 19230 }, { "epoch": 0.3420262030469486, "grad_norm": 1.5129791719414927, "learning_rate": 0.0001, "loss": 0.8731, "mean_abs_error": 890.1634600209452, "mean_abs_error_last_10": 330.48631486923654, "mean_abs_error_last_25": 506.61261032874137, "mean_abs_error_last_50": 677.1370920683588, "mean_pred_prob": 0.03715075160434935, "mean_pred_prob_last_10": 0.1861078841960989, "mean_pred_prob_last_25": 0.10229067420004867, "mean_pred_prob_last_50": 0.06271409392356872, "mean_token_accuracy": 0.869911789894104, "step": 19240 }, { "epoch": 0.3422039713437506, "grad_norm": 1.209170716449921, "learning_rate": 0.0001, "loss": 0.7299, "mean_abs_error": 535.5993633577859, "mean_abs_error_last_10": 197.09283738673585, "mean_abs_error_last_25": 208.9532573969012, "mean_abs_error_last_50": 275.90342699944614, "mean_pred_prob": 0.031597783556208014, "mean_pred_prob_last_10": 0.14448820408433677, "mean_pred_prob_last_25": 0.08334511765278876, "mean_pred_prob_last_50": 0.05215337418485433, "mean_token_accuracy": 0.8804426550865173, "step": 19250 }, { "epoch": 0.3423817396405525, "grad_norm": 1.1758627079295167, "learning_rate": 0.0001, "loss": 0.8038, "mean_abs_error": 640.5690950538543, "mean_abs_error_last_10": 285.7713295580384, "mean_abs_error_last_25": 329.5260781030551, "mean_abs_error_last_50": 443.4533777255536, "mean_pred_prob": 0.03455258864269126, "mean_pred_prob_last_10": 0.17013837064732798, "mean_pred_prob_last_25": 0.09458427545614541, "mean_pred_prob_last_50": 0.05684841909387615, "mean_token_accuracy": 0.8705307483673096, "step": 19260 }, { "epoch": 0.34255950793735446, "grad_norm": 1.7379599231828324, "learning_rate": 0.0001, "loss": 0.9061, "mean_abs_error": 438.87428484695494, "mean_abs_error_last_10": 69.57117864222438, "mean_abs_error_last_25": 128.00274703948986, "mean_abs_error_last_50": 228.10046878958573, "mean_pred_prob": 0.03514564400538802, "mean_pred_prob_last_10": 0.17881734985858203, "mean_pred_prob_last_25": 0.0995156180113554, "mean_pred_prob_last_50": 0.060290708160027864, "mean_token_accuracy": 0.8773008763790131, "step": 19270 }, { "epoch": 0.3427372762341564, "grad_norm": 0.9725665900585235, "learning_rate": 0.0001, "loss": 0.6995, "mean_abs_error": 142.51502334898214, "mean_abs_error_last_10": 54.06555160505727, "mean_abs_error_last_25": 88.89355115057558, "mean_abs_error_last_50": 94.31748712699932, "mean_pred_prob": 0.057664558431133627, "mean_pred_prob_last_10": 0.27962684631347656, "mean_pred_prob_last_25": 0.15676430128514768, "mean_pred_prob_last_50": 0.09671097621321678, "mean_token_accuracy": 0.8821576297283172, "step": 19280 }, { "epoch": 0.34291504453095834, "grad_norm": 2.2918437099778113, "learning_rate": 0.0001, "loss": 0.8984, "mean_abs_error": 257.4410867158145, "mean_abs_error_last_10": 178.2681500678477, "mean_abs_error_last_25": 196.28242110199864, "mean_abs_error_last_50": 212.54253728879507, "mean_pred_prob": 0.03364731320179999, "mean_pred_prob_last_10": 0.1670622949488461, "mean_pred_prob_last_25": 0.09193194573745131, "mean_pred_prob_last_50": 0.05703064408153295, "mean_token_accuracy": 0.8714470326900482, "step": 19290 }, { "epoch": 0.3430928128277603, "grad_norm": 1.3480446263365153, "learning_rate": 0.0001, "loss": 0.805, "mean_abs_error": 257.13086071504915, "mean_abs_error_last_10": 53.7613628023315, "mean_abs_error_last_25": 96.68316761420861, "mean_abs_error_last_50": 156.59136281062837, "mean_pred_prob": 0.05079038105905056, "mean_pred_prob_last_10": 0.23018335178494453, "mean_pred_prob_last_25": 0.13521401342004538, "mean_pred_prob_last_50": 0.08530531860888005, "mean_token_accuracy": 0.8744656026363373, "step": 19300 }, { "epoch": 0.34327058112456227, "grad_norm": 1.222572419405048, "learning_rate": 0.0001, "loss": 0.7903, "mean_abs_error": 442.30056396364546, "mean_abs_error_last_10": 163.31706846591277, "mean_abs_error_last_25": 254.12714830836322, "mean_abs_error_last_50": 364.305021609992, "mean_pred_prob": 0.025707074720412493, "mean_pred_prob_last_10": 0.1384530019015074, "mean_pred_prob_last_25": 0.07390063917264342, "mean_pred_prob_last_50": 0.044450463727116585, "mean_token_accuracy": 0.8683073401451111, "step": 19310 }, { "epoch": 0.3434483494213642, "grad_norm": 2.5615097298338165, "learning_rate": 0.0001, "loss": 0.849, "mean_abs_error": 762.4148079652306, "mean_abs_error_last_10": 231.34848070133938, "mean_abs_error_last_25": 305.05078547831266, "mean_abs_error_last_50": 501.49264167376225, "mean_pred_prob": 0.03790936997102108, "mean_pred_prob_last_10": 0.18286264038761146, "mean_pred_prob_last_25": 0.10432322939741426, "mean_pred_prob_last_50": 0.06390501410351135, "mean_token_accuracy": 0.8756788492202758, "step": 19320 }, { "epoch": 0.34362611771816615, "grad_norm": 1.1400811594787843, "learning_rate": 0.0001, "loss": 0.7153, "mean_abs_error": 696.3342364440174, "mean_abs_error_last_10": 242.57501358222436, "mean_abs_error_last_25": 289.74904841106934, "mean_abs_error_last_50": 418.1362221126589, "mean_pred_prob": 0.02610139914904721, "mean_pred_prob_last_10": 0.14259402093011886, "mean_pred_prob_last_25": 0.07445703615667298, "mean_pred_prob_last_50": 0.044658111833268774, "mean_token_accuracy": 0.8815633535385132, "step": 19330 }, { "epoch": 0.3438038860149681, "grad_norm": 1.8482349656312933, "learning_rate": 0.0001, "loss": 0.8319, "mean_abs_error": 405.9801592481373, "mean_abs_error_last_10": 218.0697005710437, "mean_abs_error_last_25": 355.6159748352401, "mean_abs_error_last_50": 348.99144625304336, "mean_pred_prob": 0.03193130830768496, "mean_pred_prob_last_10": 0.17012668065726758, "mean_pred_prob_last_25": 0.09097814401611685, "mean_pred_prob_last_50": 0.0543464221060276, "mean_token_accuracy": 0.8729274153709412, "step": 19340 }, { "epoch": 0.34398165431177, "grad_norm": 1.2690985934045498, "learning_rate": 0.0001, "loss": 0.8015, "mean_abs_error": 703.579207948573, "mean_abs_error_last_10": 432.8007230114441, "mean_abs_error_last_25": 441.0634783990293, "mean_abs_error_last_50": 518.4264791883901, "mean_pred_prob": 0.03576666832377669, "mean_pred_prob_last_10": 0.1895108897879254, "mean_pred_prob_last_25": 0.10046564693911933, "mean_pred_prob_last_50": 0.06058887767139822, "mean_token_accuracy": 0.8706641912460327, "step": 19350 }, { "epoch": 0.34415942260857196, "grad_norm": 1.1631992441596095, "learning_rate": 0.0001, "loss": 0.7865, "mean_abs_error": 209.63349460530804, "mean_abs_error_last_10": 68.18499523437376, "mean_abs_error_last_25": 98.4926493259306, "mean_abs_error_last_50": 134.65654901623913, "mean_pred_prob": 0.026295892149209976, "mean_pred_prob_last_10": 0.14543399773538113, "mean_pred_prob_last_25": 0.07628185618668795, "mean_pred_prob_last_50": 0.04512869855388999, "mean_token_accuracy": 0.8699900805950165, "step": 19360 }, { "epoch": 0.34433719090537396, "grad_norm": 3.077748636834193, "learning_rate": 0.0001, "loss": 0.8089, "mean_abs_error": 250.53897842459574, "mean_abs_error_last_10": 106.61740088638214, "mean_abs_error_last_25": 108.07782449547169, "mean_abs_error_last_50": 137.04469165758553, "mean_pred_prob": 0.037312360713258386, "mean_pred_prob_last_10": 0.191056196577847, "mean_pred_prob_last_25": 0.10702437674626708, "mean_pred_prob_last_50": 0.06509399395436048, "mean_token_accuracy": 0.885128653049469, "step": 19370 }, { "epoch": 0.3445149592021759, "grad_norm": 3.3265389233297333, "learning_rate": 0.0001, "loss": 1.0395, "mean_abs_error": 1707.3369299359827, "mean_abs_error_last_10": 611.4790365138472, "mean_abs_error_last_25": 769.8269117898107, "mean_abs_error_last_50": 1038.3710712868958, "mean_pred_prob": 0.018857727089198306, "mean_pred_prob_last_10": 0.09733700850047171, "mean_pred_prob_last_25": 0.05250861314125359, "mean_pred_prob_last_50": 0.03214234507468063, "mean_token_accuracy": 0.8685325443744659, "step": 19380 }, { "epoch": 0.34469272749897784, "grad_norm": 0.759998585780978, "learning_rate": 0.0001, "loss": 0.9061, "mean_abs_error": 452.3982933274474, "mean_abs_error_last_10": 154.44352329792918, "mean_abs_error_last_25": 215.0950530065708, "mean_abs_error_last_50": 253.4847446073271, "mean_pred_prob": 0.02952458228683099, "mean_pred_prob_last_10": 0.14221099803689868, "mean_pred_prob_last_25": 0.07860549718607217, "mean_pred_prob_last_50": 0.04908024417236447, "mean_token_accuracy": 0.8712768733501435, "step": 19390 }, { "epoch": 0.3448704957957798, "grad_norm": 2.128465632551705, "learning_rate": 0.0001, "loss": 0.7072, "mean_abs_error": 203.44162597552136, "mean_abs_error_last_10": 51.92694366392285, "mean_abs_error_last_25": 87.22043972389375, "mean_abs_error_last_50": 127.57046205916538, "mean_pred_prob": 0.050382960890419784, "mean_pred_prob_last_10": 0.25094689819961785, "mean_pred_prob_last_25": 0.13875227151438593, "mean_pred_prob_last_50": 0.08413345031440259, "mean_token_accuracy": 0.8870726108551026, "step": 19400 }, { "epoch": 0.3450482640925817, "grad_norm": 0.8737097833774896, "learning_rate": 0.0001, "loss": 0.8296, "mean_abs_error": 179.4857821457231, "mean_abs_error_last_10": 38.96253243843055, "mean_abs_error_last_25": 90.04903146186616, "mean_abs_error_last_50": 133.25957530378048, "mean_pred_prob": 0.045463809324428436, "mean_pred_prob_last_10": 0.22842898964881897, "mean_pred_prob_last_25": 0.1301817934960127, "mean_pred_prob_last_50": 0.077771548461169, "mean_token_accuracy": 0.8806338906288147, "step": 19410 }, { "epoch": 0.34522603238938365, "grad_norm": 1.5268626589426972, "learning_rate": 0.0001, "loss": 0.8325, "mean_abs_error": 1308.4960363143214, "mean_abs_error_last_10": 627.2748484822853, "mean_abs_error_last_25": 714.0331342102934, "mean_abs_error_last_50": 933.4669806664015, "mean_pred_prob": 0.021918722796544898, "mean_pred_prob_last_10": 0.11583005108404905, "mean_pred_prob_last_25": 0.06260423113417346, "mean_pred_prob_last_50": 0.037532253153040075, "mean_token_accuracy": 0.870426344871521, "step": 19420 }, { "epoch": 0.34540380068618565, "grad_norm": 1.6364077704955258, "learning_rate": 0.0001, "loss": 0.754, "mean_abs_error": 598.1815228675007, "mean_abs_error_last_10": 268.34132363830844, "mean_abs_error_last_25": 305.882929751923, "mean_abs_error_last_50": 341.281559059004, "mean_pred_prob": 0.03830945823574439, "mean_pred_prob_last_10": 0.17336742558982224, "mean_pred_prob_last_25": 0.10059918605256826, "mean_pred_prob_last_50": 0.061909252172335984, "mean_token_accuracy": 0.8766123294830322, "step": 19430 }, { "epoch": 0.3455815689829876, "grad_norm": 1.5287884621089067, "learning_rate": 0.0001, "loss": 0.8286, "mean_abs_error": 541.6790174898833, "mean_abs_error_last_10": 226.62024575419045, "mean_abs_error_last_25": 312.9438523820877, "mean_abs_error_last_50": 348.94136483585453, "mean_pred_prob": 0.03276660722331144, "mean_pred_prob_last_10": 0.15288751516491175, "mean_pred_prob_last_25": 0.08809224039432592, "mean_pred_prob_last_50": 0.05491577393840998, "mean_token_accuracy": 0.8817319869995117, "step": 19440 }, { "epoch": 0.3457593372797895, "grad_norm": 1.5240120471909564, "learning_rate": 0.0001, "loss": 0.8555, "mean_abs_error": 874.9381183954569, "mean_abs_error_last_10": 317.7533513785555, "mean_abs_error_last_25": 424.93898496359253, "mean_abs_error_last_50": 560.7401026157996, "mean_pred_prob": 0.022661016552592627, "mean_pred_prob_last_10": 0.11676030241069384, "mean_pred_prob_last_25": 0.061812801630003376, "mean_pred_prob_last_50": 0.03837093692854978, "mean_token_accuracy": 0.8692181348800659, "step": 19450 }, { "epoch": 0.34593710557659146, "grad_norm": 1.3480425653453498, "learning_rate": 0.0001, "loss": 0.7239, "mean_abs_error": 164.926552984693, "mean_abs_error_last_10": 38.55807667066114, "mean_abs_error_last_25": 53.840556434902, "mean_abs_error_last_50": 101.41837636081635, "mean_pred_prob": 0.0393808557651937, "mean_pred_prob_last_10": 0.2016486581414938, "mean_pred_prob_last_25": 0.10934151839464903, "mean_pred_prob_last_50": 0.06673373281955719, "mean_token_accuracy": 0.8819046258926392, "step": 19460 }, { "epoch": 0.3461148738733934, "grad_norm": 1.1797588945095272, "learning_rate": 0.0001, "loss": 0.8042, "mean_abs_error": 117.7900372792737, "mean_abs_error_last_10": 33.52544623989784, "mean_abs_error_last_25": 63.28054615503523, "mean_abs_error_last_50": 89.69686231534361, "mean_pred_prob": 0.058294351166114214, "mean_pred_prob_last_10": 0.2794370099902153, "mean_pred_prob_last_25": 0.1592148993164301, "mean_pred_prob_last_50": 0.09810466561466455, "mean_token_accuracy": 0.8686121046543122, "step": 19470 }, { "epoch": 0.34629264217019534, "grad_norm": 2.1035289306208953, "learning_rate": 0.0001, "loss": 0.8532, "mean_abs_error": 553.4729032741559, "mean_abs_error_last_10": 136.8975638632309, "mean_abs_error_last_25": 162.10491237102022, "mean_abs_error_last_50": 266.87069156302016, "mean_pred_prob": 0.03521188467275351, "mean_pred_prob_last_10": 0.16248008962720634, "mean_pred_prob_last_25": 0.09415714628994465, "mean_pred_prob_last_50": 0.05822303588502109, "mean_token_accuracy": 0.8741804242134095, "step": 19480 }, { "epoch": 0.34647041046699734, "grad_norm": 1.468521292338833, "learning_rate": 0.0001, "loss": 0.7192, "mean_abs_error": 165.03818731242842, "mean_abs_error_last_10": 42.43227211744197, "mean_abs_error_last_25": 77.70608806043595, "mean_abs_error_last_50": 105.68349851312148, "mean_pred_prob": 0.041218712087720634, "mean_pred_prob_last_10": 0.2145101312547922, "mean_pred_prob_last_25": 0.11821964643895626, "mean_pred_prob_last_50": 0.07108944859355688, "mean_token_accuracy": 0.8755995213985444, "step": 19490 }, { "epoch": 0.3466481787637993, "grad_norm": 2.158410264531715, "learning_rate": 0.0001, "loss": 0.8119, "mean_abs_error": 261.040963022121, "mean_abs_error_last_10": 34.25621122640972, "mean_abs_error_last_25": 74.40948097840777, "mean_abs_error_last_50": 116.62626154467459, "mean_pred_prob": 0.04852355164475739, "mean_pred_prob_last_10": 0.23252039700746535, "mean_pred_prob_last_25": 0.1300771625712514, "mean_pred_prob_last_50": 0.08138831648975611, "mean_token_accuracy": 0.8699421644210815, "step": 19500 }, { "epoch": 0.3468259470606012, "grad_norm": 0.8556456827662283, "learning_rate": 0.0001, "loss": 0.8145, "mean_abs_error": 301.93315475162024, "mean_abs_error_last_10": 111.43890010726393, "mean_abs_error_last_25": 127.0919911799161, "mean_abs_error_last_50": 184.1178808320201, "mean_pred_prob": 0.0436601169873029, "mean_pred_prob_last_10": 0.20513291731476785, "mean_pred_prob_last_25": 0.1170634925365448, "mean_pred_prob_last_50": 0.07387731242924929, "mean_token_accuracy": 0.8760866105556488, "step": 19510 }, { "epoch": 0.34700371535740315, "grad_norm": 2.9642752099419223, "learning_rate": 0.0001, "loss": 0.812, "mean_abs_error": 505.06892295977707, "mean_abs_error_last_10": 245.18060193312857, "mean_abs_error_last_25": 280.3107382861179, "mean_abs_error_last_50": 355.1053550891141, "mean_pred_prob": 0.039959104638546704, "mean_pred_prob_last_10": 0.20330148227512837, "mean_pred_prob_last_25": 0.11290255123749375, "mean_pred_prob_last_50": 0.06830690535716713, "mean_token_accuracy": 0.8776135385036469, "step": 19520 }, { "epoch": 0.3471814836542051, "grad_norm": 2.1829125567566656, "learning_rate": 0.0001, "loss": 0.8267, "mean_abs_error": 754.4959160685817, "mean_abs_error_last_10": 318.4375432361518, "mean_abs_error_last_25": 426.8085534126738, "mean_abs_error_last_50": 530.7453588300846, "mean_pred_prob": 0.028787596564507113, "mean_pred_prob_last_10": 0.14582421872764825, "mean_pred_prob_last_25": 0.08098450547549874, "mean_pred_prob_last_50": 0.0485448447696399, "mean_token_accuracy": 0.8727614462375641, "step": 19530 }, { "epoch": 0.34735925195100703, "grad_norm": 1.8861128957624522, "learning_rate": 0.0001, "loss": 0.7865, "mean_abs_error": 445.791997418612, "mean_abs_error_last_10": 91.54687282800732, "mean_abs_error_last_25": 134.30892323367786, "mean_abs_error_last_50": 244.2180186580918, "mean_pred_prob": 0.061328787394450045, "mean_pred_prob_last_10": 0.2848774575162679, "mean_pred_prob_last_25": 0.16896262868540363, "mean_pred_prob_last_50": 0.10363004964310676, "mean_token_accuracy": 0.8767149269580841, "step": 19540 }, { "epoch": 0.347537020247809, "grad_norm": 1.265792833213859, "learning_rate": 0.0001, "loss": 0.7508, "mean_abs_error": 835.1414506301193, "mean_abs_error_last_10": 472.70462158051, "mean_abs_error_last_25": 611.1503292957017, "mean_abs_error_last_50": 692.0640098734491, "mean_pred_prob": 0.035114166533458044, "mean_pred_prob_last_10": 0.1738122505194042, "mean_pred_prob_last_25": 0.10072327504458371, "mean_pred_prob_last_50": 0.05973763280489948, "mean_token_accuracy": 0.884975129365921, "step": 19550 }, { "epoch": 0.34771478854461096, "grad_norm": 1.3501741151945164, "learning_rate": 0.0001, "loss": 0.7878, "mean_abs_error": 730.2094938462427, "mean_abs_error_last_10": 428.14597542815284, "mean_abs_error_last_25": 458.2810041552701, "mean_abs_error_last_50": 481.9076593881797, "mean_pred_prob": 0.04850061095203273, "mean_pred_prob_last_10": 0.21514026886434295, "mean_pred_prob_last_25": 0.1296240910713095, "mean_pred_prob_last_50": 0.08050904161063954, "mean_token_accuracy": 0.8704127490520477, "step": 19560 }, { "epoch": 0.3478925568414129, "grad_norm": 1.1339085564239018, "learning_rate": 0.0001, "loss": 0.8477, "mean_abs_error": 190.61816556676493, "mean_abs_error_last_10": 77.0257481416522, "mean_abs_error_last_25": 143.56237411315382, "mean_abs_error_last_50": 183.88887770066646, "mean_pred_prob": 0.04186383257620037, "mean_pred_prob_last_10": 0.20403814762830735, "mean_pred_prob_last_25": 0.11087447572499513, "mean_pred_prob_last_50": 0.06909103235229849, "mean_token_accuracy": 0.8713231384754181, "step": 19570 }, { "epoch": 0.34807032513821484, "grad_norm": 1.3192984961477183, "learning_rate": 0.0001, "loss": 0.903, "mean_abs_error": 966.0171967391482, "mean_abs_error_last_10": 322.92573769704376, "mean_abs_error_last_25": 362.8639377059648, "mean_abs_error_last_50": 589.1565047668486, "mean_pred_prob": 0.020387253249646166, "mean_pred_prob_last_10": 0.1098474294762127, "mean_pred_prob_last_25": 0.056822612596442924, "mean_pred_prob_last_50": 0.03453101222985424, "mean_token_accuracy": 0.875220775604248, "step": 19580 }, { "epoch": 0.3482480934350168, "grad_norm": 1.3424424990135913, "learning_rate": 0.0001, "loss": 0.7659, "mean_abs_error": 503.34269592270675, "mean_abs_error_last_10": 234.8262264537575, "mean_abs_error_last_25": 232.5972595167811, "mean_abs_error_last_50": 295.1139923453918, "mean_pred_prob": 0.03337706151069142, "mean_pred_prob_last_10": 0.14736559564480559, "mean_pred_prob_last_25": 0.08680733041837811, "mean_pred_prob_last_50": 0.05548721074592322, "mean_token_accuracy": 0.8610452592372895, "step": 19590 }, { "epoch": 0.3484258617318187, "grad_norm": 1.207648610550623, "learning_rate": 0.0001, "loss": 0.7769, "mean_abs_error": 111.5189944058455, "mean_abs_error_last_10": 23.289582688396727, "mean_abs_error_last_25": 41.4294081012289, "mean_abs_error_last_50": 64.67513463267895, "mean_pred_prob": 0.04670649748295545, "mean_pred_prob_last_10": 0.23454653695225716, "mean_pred_prob_last_25": 0.1306594680994749, "mean_pred_prob_last_50": 0.07925984840840102, "mean_token_accuracy": 0.8744618713855743, "step": 19600 }, { "epoch": 0.3486036300286207, "grad_norm": 1.559127593367047, "learning_rate": 0.0001, "loss": 0.8248, "mean_abs_error": 807.6222264441869, "mean_abs_error_last_10": 195.70860923416421, "mean_abs_error_last_25": 319.24338784754207, "mean_abs_error_last_50": 435.9295570625821, "mean_pred_prob": 0.025428824103437365, "mean_pred_prob_last_10": 0.14212634306168184, "mean_pred_prob_last_25": 0.07197960816556588, "mean_pred_prob_last_50": 0.04319480448612012, "mean_token_accuracy": 0.878375667333603, "step": 19610 }, { "epoch": 0.34878139832542265, "grad_norm": 1.9405102397515743, "learning_rate": 0.0001, "loss": 0.9718, "mean_abs_error": 499.41076843482125, "mean_abs_error_last_10": 338.8047463890001, "mean_abs_error_last_25": 327.64202863699575, "mean_abs_error_last_50": 308.5774210301095, "mean_pred_prob": 0.030104155000299216, "mean_pred_prob_last_10": 0.1510232573375106, "mean_pred_prob_last_25": 0.08413566609378904, "mean_pred_prob_last_50": 0.05127994826762006, "mean_token_accuracy": 0.8685251891613006, "step": 19620 }, { "epoch": 0.3489591666222246, "grad_norm": 1.5475822557194332, "learning_rate": 0.0001, "loss": 0.8718, "mean_abs_error": 232.3232035447764, "mean_abs_error_last_10": 111.62287190609852, "mean_abs_error_last_25": 179.28021155095607, "mean_abs_error_last_50": 189.5282875108737, "mean_pred_prob": 0.03697193134576082, "mean_pred_prob_last_10": 0.18426640816032885, "mean_pred_prob_last_25": 0.10019055111333727, "mean_pred_prob_last_50": 0.061944572068750856, "mean_token_accuracy": 0.8738524854183197, "step": 19630 }, { "epoch": 0.34913693491902653, "grad_norm": 1.9243867983426113, "learning_rate": 0.0001, "loss": 0.846, "mean_abs_error": 1088.0009691031582, "mean_abs_error_last_10": 435.5866839653778, "mean_abs_error_last_25": 504.51381079326495, "mean_abs_error_last_50": 705.0619415956455, "mean_pred_prob": 0.031098083007964305, "mean_pred_prob_last_10": 0.1508996068383567, "mean_pred_prob_last_25": 0.08626368587720208, "mean_pred_prob_last_50": 0.05191885084786918, "mean_token_accuracy": 0.8688587307929992, "step": 19640 }, { "epoch": 0.34931470321582847, "grad_norm": 1.4984055968511734, "learning_rate": 0.0001, "loss": 0.9277, "mean_abs_error": 582.7132056907705, "mean_abs_error_last_10": 177.0148710099623, "mean_abs_error_last_25": 190.59216255989236, "mean_abs_error_last_50": 334.53605130982174, "mean_pred_prob": 0.016669533582171426, "mean_pred_prob_last_10": 0.07634500288404525, "mean_pred_prob_last_25": 0.04502489303704351, "mean_pred_prob_last_50": 0.027692657383158802, "mean_token_accuracy": 0.865116810798645, "step": 19650 }, { "epoch": 0.34949247151263046, "grad_norm": 2.7289425117182744, "learning_rate": 0.0001, "loss": 0.7976, "mean_abs_error": 234.07986252105866, "mean_abs_error_last_10": 113.07700240960703, "mean_abs_error_last_25": 133.66968121983393, "mean_abs_error_last_50": 179.67563498533826, "mean_pred_prob": 0.046979217790067196, "mean_pred_prob_last_10": 0.2093762092292309, "mean_pred_prob_last_25": 0.1210237855091691, "mean_pred_prob_last_50": 0.07654599649831653, "mean_token_accuracy": 0.8734775841236114, "step": 19660 }, { "epoch": 0.3496702398094324, "grad_norm": 1.9839554377657398, "learning_rate": 0.0001, "loss": 0.871, "mean_abs_error": 353.93936713763424, "mean_abs_error_last_10": 94.5703006401567, "mean_abs_error_last_25": 110.91562009261085, "mean_abs_error_last_50": 174.14914507953614, "mean_pred_prob": 0.04055275064893067, "mean_pred_prob_last_10": 0.1811563890427351, "mean_pred_prob_last_25": 0.1087847768329084, "mean_pred_prob_last_50": 0.068121721688658, "mean_token_accuracy": 0.8739267349243164, "step": 19670 }, { "epoch": 0.34984800810623434, "grad_norm": 1.2968113853058543, "learning_rate": 0.0001, "loss": 0.9707, "mean_abs_error": 1021.9951342449182, "mean_abs_error_last_10": 525.300304583672, "mean_abs_error_last_25": 602.7055237869452, "mean_abs_error_last_50": 789.8630255960111, "mean_pred_prob": 0.03748630657355534, "mean_pred_prob_last_10": 0.18480360120011027, "mean_pred_prob_last_25": 0.1040398049808573, "mean_pred_prob_last_50": 0.06344467500166502, "mean_token_accuracy": 0.8661251187324523, "step": 19680 }, { "epoch": 0.3500257764030363, "grad_norm": 1.0000059281307798, "learning_rate": 0.0001, "loss": 0.8498, "mean_abs_error": 1054.984199804037, "mean_abs_error_last_10": 508.6102976813801, "mean_abs_error_last_25": 617.4603406786788, "mean_abs_error_last_50": 760.6679752023509, "mean_pred_prob": 0.02577106483222451, "mean_pred_prob_last_10": 0.13205781856086105, "mean_pred_prob_last_25": 0.07046260862844064, "mean_pred_prob_last_50": 0.043269354401854795, "mean_token_accuracy": 0.8691797971725463, "step": 19690 }, { "epoch": 0.3502035446998382, "grad_norm": 1.2882796522196078, "learning_rate": 0.0001, "loss": 0.79, "mean_abs_error": 692.1371213376298, "mean_abs_error_last_10": 192.81781723869736, "mean_abs_error_last_25": 251.1908394077098, "mean_abs_error_last_50": 400.50382309940426, "mean_pred_prob": 0.021475839894264936, "mean_pred_prob_last_10": 0.11478281911695376, "mean_pred_prob_last_25": 0.0602025980129838, "mean_pred_prob_last_50": 0.036606945330277085, "mean_token_accuracy": 0.8654849112033844, "step": 19700 }, { "epoch": 0.35038131299664016, "grad_norm": 1.038073506602992, "learning_rate": 0.0001, "loss": 0.8159, "mean_abs_error": 313.88162930184563, "mean_abs_error_last_10": 89.72152731018215, "mean_abs_error_last_25": 111.75923406772611, "mean_abs_error_last_50": 171.81741339790796, "mean_pred_prob": 0.0457314002327621, "mean_pred_prob_last_10": 0.21903893165290356, "mean_pred_prob_last_25": 0.12328199539333581, "mean_pred_prob_last_50": 0.07701218081638217, "mean_token_accuracy": 0.8700763285160065, "step": 19710 }, { "epoch": 0.35055908129344215, "grad_norm": 2.727933615467024, "learning_rate": 0.0001, "loss": 0.812, "mean_abs_error": 452.20836851082277, "mean_abs_error_last_10": 191.6434183187996, "mean_abs_error_last_25": 270.25945451445057, "mean_abs_error_last_50": 358.30008273596536, "mean_pred_prob": 0.029601785354316235, "mean_pred_prob_last_10": 0.15502975462004542, "mean_pred_prob_last_25": 0.08401846825145184, "mean_pred_prob_last_50": 0.05043653945904225, "mean_token_accuracy": 0.8707990646362305, "step": 19720 }, { "epoch": 0.3507368495902441, "grad_norm": 1.8773960383688129, "learning_rate": 0.0001, "loss": 0.8478, "mean_abs_error": 655.404974129219, "mean_abs_error_last_10": 296.2503206129426, "mean_abs_error_last_25": 374.9633158460812, "mean_abs_error_last_50": 419.78535865256043, "mean_pred_prob": 0.032180904503911735, "mean_pred_prob_last_10": 0.14588577123358845, "mean_pred_prob_last_25": 0.08523115208372474, "mean_pred_prob_last_50": 0.053613946051336826, "mean_token_accuracy": 0.8661850810050964, "step": 19730 }, { "epoch": 0.35091461788704603, "grad_norm": 1.6717561139146517, "learning_rate": 0.0001, "loss": 0.8427, "mean_abs_error": 879.1012290739212, "mean_abs_error_last_10": 341.73583402226444, "mean_abs_error_last_25": 389.5620196532348, "mean_abs_error_last_50": 540.5340213141435, "mean_pred_prob": 0.025402054752339608, "mean_pred_prob_last_10": 0.14206619909382426, "mean_pred_prob_last_25": 0.07475758907967248, "mean_pred_prob_last_50": 0.044096498930593954, "mean_token_accuracy": 0.8729907631874084, "step": 19740 }, { "epoch": 0.35109238618384797, "grad_norm": 1.170782702412127, "learning_rate": 0.0001, "loss": 0.8134, "mean_abs_error": 447.844968441868, "mean_abs_error_last_10": 68.7686840036356, "mean_abs_error_last_25": 121.35394733285516, "mean_abs_error_last_50": 259.3233062644411, "mean_pred_prob": 0.038336925412295386, "mean_pred_prob_last_10": 0.19717561479192228, "mean_pred_prob_last_25": 0.10708240587264299, "mean_pred_prob_last_50": 0.06472663444001228, "mean_token_accuracy": 0.877691102027893, "step": 19750 }, { "epoch": 0.3512701544806499, "grad_norm": 2.0552246498017865, "learning_rate": 0.0001, "loss": 0.8095, "mean_abs_error": 260.9382419526824, "mean_abs_error_last_10": 41.20276095745052, "mean_abs_error_last_25": 80.11257774972786, "mean_abs_error_last_50": 146.3831044644095, "mean_pred_prob": 0.032793646398931744, "mean_pred_prob_last_10": 0.1706839043647051, "mean_pred_prob_last_25": 0.09382394328713417, "mean_pred_prob_last_50": 0.056289375852793457, "mean_token_accuracy": 0.8751446604728699, "step": 19760 }, { "epoch": 0.35144792277745185, "grad_norm": 0.8490287363450891, "learning_rate": 0.0001, "loss": 0.775, "mean_abs_error": 896.5687640090437, "mean_abs_error_last_10": 499.74029044793076, "mean_abs_error_last_25": 581.0918996395427, "mean_abs_error_last_50": 683.7168533540639, "mean_pred_prob": 0.04542466662242077, "mean_pred_prob_last_10": 0.21419128381821792, "mean_pred_prob_last_25": 0.11875439019931947, "mean_pred_prob_last_50": 0.0735958064411534, "mean_token_accuracy": 0.8755644619464874, "step": 19770 }, { "epoch": 0.35162569107425384, "grad_norm": 1.837590959773285, "learning_rate": 0.0001, "loss": 0.7086, "mean_abs_error": 337.3585076376941, "mean_abs_error_last_10": 115.54421614237404, "mean_abs_error_last_25": 116.21494603300258, "mean_abs_error_last_50": 175.8550971320755, "mean_pred_prob": 0.051954551786184314, "mean_pred_prob_last_10": 0.23397213742136955, "mean_pred_prob_last_25": 0.13986428771167994, "mean_pred_prob_last_50": 0.08595120441168547, "mean_token_accuracy": 0.8778127133846283, "step": 19780 }, { "epoch": 0.3518034593710558, "grad_norm": 1.3776179388376086, "learning_rate": 0.0001, "loss": 0.7064, "mean_abs_error": 538.4022152776895, "mean_abs_error_last_10": 180.23023283799265, "mean_abs_error_last_25": 245.657463756597, "mean_abs_error_last_50": 434.06289409984055, "mean_pred_prob": 0.047734595858491956, "mean_pred_prob_last_10": 0.23944388257223181, "mean_pred_prob_last_25": 0.13331879481556824, "mean_pred_prob_last_50": 0.08047832351876423, "mean_token_accuracy": 0.8751761019229889, "step": 19790 }, { "epoch": 0.3519812276678577, "grad_norm": 1.7264228123990992, "learning_rate": 0.0001, "loss": 0.7971, "mean_abs_error": 180.42420799204177, "mean_abs_error_last_10": 73.78111954269752, "mean_abs_error_last_25": 150.05976569809633, "mean_abs_error_last_50": 147.4088000744154, "mean_pred_prob": 0.052690487913787366, "mean_pred_prob_last_10": 0.23431794233620168, "mean_pred_prob_last_25": 0.1427209885790944, "mean_pred_prob_last_50": 0.08857533913105727, "mean_token_accuracy": 0.8709024071693421, "step": 19800 }, { "epoch": 0.35215899596465966, "grad_norm": 1.1974870424826076, "learning_rate": 0.0001, "loss": 0.7807, "mean_abs_error": 273.36661605240886, "mean_abs_error_last_10": 46.87285877145541, "mean_abs_error_last_25": 67.27239948973337, "mean_abs_error_last_50": 120.56170656096222, "mean_pred_prob": 0.04351743930019438, "mean_pred_prob_last_10": 0.22651201561093331, "mean_pred_prob_last_25": 0.1253654483705759, "mean_pred_prob_last_50": 0.07466411152854562, "mean_token_accuracy": 0.8689274966716767, "step": 19810 }, { "epoch": 0.3523367642614616, "grad_norm": 1.5853417397974932, "learning_rate": 0.0001, "loss": 0.7682, "mean_abs_error": 781.5154478417796, "mean_abs_error_last_10": 300.65135784796996, "mean_abs_error_last_25": 373.60761837843756, "mean_abs_error_last_50": 443.44079730275126, "mean_pred_prob": 0.025236722588306292, "mean_pred_prob_last_10": 0.11973661188967526, "mean_pred_prob_last_25": 0.06751842364901676, "mean_pred_prob_last_50": 0.04215880489209667, "mean_token_accuracy": 0.8738775193691254, "step": 19820 }, { "epoch": 0.35251453255826354, "grad_norm": 0.9439354903101507, "learning_rate": 0.0001, "loss": 0.8071, "mean_abs_error": 692.2358452601447, "mean_abs_error_last_10": 308.1507392039121, "mean_abs_error_last_25": 370.98372656265025, "mean_abs_error_last_50": 438.42450445264967, "mean_pred_prob": 0.03616600563400425, "mean_pred_prob_last_10": 0.1736501064209733, "mean_pred_prob_last_25": 0.09593576264451258, "mean_pred_prob_last_50": 0.06003633252112195, "mean_token_accuracy": 0.8681935369968414, "step": 19830 }, { "epoch": 0.35269230085506553, "grad_norm": 1.486696648155336, "learning_rate": 0.0001, "loss": 0.7893, "mean_abs_error": 1153.2083512387512, "mean_abs_error_last_10": 744.8572074126116, "mean_abs_error_last_25": 840.6597035455676, "mean_abs_error_last_50": 941.75379501196, "mean_pred_prob": 0.03464255655562738, "mean_pred_prob_last_10": 0.17252037802973064, "mean_pred_prob_last_25": 0.09586752388568129, "mean_pred_prob_last_50": 0.05881415080075385, "mean_token_accuracy": 0.8708796203136444, "step": 19840 }, { "epoch": 0.35287006915186747, "grad_norm": 1.6216571910303426, "learning_rate": 0.0001, "loss": 0.7734, "mean_abs_error": 516.2917350966839, "mean_abs_error_last_10": 130.64250827386573, "mean_abs_error_last_25": 207.30960424523232, "mean_abs_error_last_50": 294.97633991563305, "mean_pred_prob": 0.03197972766938619, "mean_pred_prob_last_10": 0.1680371557828039, "mean_pred_prob_last_25": 0.09016072026570328, "mean_pred_prob_last_50": 0.05487387232715264, "mean_token_accuracy": 0.8676835238933563, "step": 19850 }, { "epoch": 0.3530478374486694, "grad_norm": 1.5815924974941054, "learning_rate": 0.0001, "loss": 0.7149, "mean_abs_error": 239.80593187504172, "mean_abs_error_last_10": 69.42156975499863, "mean_abs_error_last_25": 94.23507092150554, "mean_abs_error_last_50": 126.20759898031936, "mean_pred_prob": 0.041307353368029, "mean_pred_prob_last_10": 0.19945419803261757, "mean_pred_prob_last_25": 0.11092377221211791, "mean_pred_prob_last_50": 0.0684459582902491, "mean_token_accuracy": 0.8815883219242096, "step": 19860 }, { "epoch": 0.35322560574547135, "grad_norm": 1.075205077788303, "learning_rate": 0.0001, "loss": 0.7508, "mean_abs_error": 499.9588485302459, "mean_abs_error_last_10": 134.80248612426453, "mean_abs_error_last_25": 221.94711979612322, "mean_abs_error_last_50": 326.42348468684327, "mean_pred_prob": 0.04266118923260365, "mean_pred_prob_last_10": 0.21224087537266315, "mean_pred_prob_last_25": 0.12008256991393865, "mean_pred_prob_last_50": 0.07289574728929438, "mean_token_accuracy": 0.8742094278335572, "step": 19870 }, { "epoch": 0.3534033740422733, "grad_norm": 1.655076193608684, "learning_rate": 0.0001, "loss": 0.7996, "mean_abs_error": 376.7486094049317, "mean_abs_error_last_10": 193.6085815307693, "mean_abs_error_last_25": 227.45168857131938, "mean_abs_error_last_50": 287.8776675416458, "mean_pred_prob": 0.03664645096869208, "mean_pred_prob_last_10": 0.19343249325174838, "mean_pred_prob_last_25": 0.10533362085698172, "mean_pred_prob_last_50": 0.0634774899110198, "mean_token_accuracy": 0.875395530462265, "step": 19880 }, { "epoch": 0.3535811423390752, "grad_norm": 1.6144399878413103, "learning_rate": 0.0001, "loss": 0.9035, "mean_abs_error": 827.1351400235419, "mean_abs_error_last_10": 365.1952359906717, "mean_abs_error_last_25": 424.3587783765582, "mean_abs_error_last_50": 522.1145639514897, "mean_pred_prob": 0.035212201095419005, "mean_pred_prob_last_10": 0.17221314587513917, "mean_pred_prob_last_25": 0.0948806804837659, "mean_pred_prob_last_50": 0.05889934648294002, "mean_token_accuracy": 0.8702304303646088, "step": 19890 }, { "epoch": 0.3537589106358772, "grad_norm": 2.193964584235577, "learning_rate": 0.0001, "loss": 0.8609, "mean_abs_error": 454.2421327732883, "mean_abs_error_last_10": 153.5116946930475, "mean_abs_error_last_25": 162.46153578006385, "mean_abs_error_last_50": 234.2083464819951, "mean_pred_prob": 0.03751769150840119, "mean_pred_prob_last_10": 0.1977836306905374, "mean_pred_prob_last_25": 0.11024775740224868, "mean_pred_prob_last_50": 0.06442785825347527, "mean_token_accuracy": 0.8699782490730286, "step": 19900 }, { "epoch": 0.35393667893267916, "grad_norm": 1.5942078767553562, "learning_rate": 0.0001, "loss": 0.7755, "mean_abs_error": 878.6304205018909, "mean_abs_error_last_10": 204.40842851968185, "mean_abs_error_last_25": 369.7520747599456, "mean_abs_error_last_50": 567.2814898205775, "mean_pred_prob": 0.041967634827597064, "mean_pred_prob_last_10": 0.19613544237799943, "mean_pred_prob_last_25": 0.11382427950156852, "mean_pred_prob_last_50": 0.06999471128801815, "mean_token_accuracy": 0.8806790828704834, "step": 19910 }, { "epoch": 0.3541144472294811, "grad_norm": 0.9881985869218373, "learning_rate": 0.0001, "loss": 0.853, "mean_abs_error": 288.1273125963156, "mean_abs_error_last_10": 155.15653431229663, "mean_abs_error_last_25": 188.9222495808669, "mean_abs_error_last_50": 209.99148301536616, "mean_pred_prob": 0.04036235760431737, "mean_pred_prob_last_10": 0.2090058157220483, "mean_pred_prob_last_25": 0.11203195238485933, "mean_pred_prob_last_50": 0.06721527487970888, "mean_token_accuracy": 0.8598689436912537, "step": 19920 }, { "epoch": 0.35429221552628304, "grad_norm": 2.3120446548227207, "learning_rate": 0.0001, "loss": 0.8181, "mean_abs_error": 206.2635409313811, "mean_abs_error_last_10": 42.689558907464765, "mean_abs_error_last_25": 59.726403064492295, "mean_abs_error_last_50": 115.8149430139161, "mean_pred_prob": 0.03916104794479906, "mean_pred_prob_last_10": 0.19165225178003312, "mean_pred_prob_last_25": 0.10848282016813755, "mean_pred_prob_last_50": 0.06669151885434985, "mean_token_accuracy": 0.8696764767169952, "step": 19930 }, { "epoch": 0.354469983823085, "grad_norm": 1.180819179954005, "learning_rate": 0.0001, "loss": 0.7948, "mean_abs_error": 752.3054342154579, "mean_abs_error_last_10": 224.82043047247822, "mean_abs_error_last_25": 327.42516913572655, "mean_abs_error_last_50": 450.8354139668694, "mean_pred_prob": 0.01591541012166999, "mean_pred_prob_last_10": 0.09461805239552631, "mean_pred_prob_last_25": 0.047815559711307284, "mean_pred_prob_last_50": 0.027996761351823808, "mean_token_accuracy": 0.8764831483364105, "step": 19940 }, { "epoch": 0.3546477521198869, "grad_norm": 1.560834772475478, "learning_rate": 0.0001, "loss": 0.7719, "mean_abs_error": 265.0511700647602, "mean_abs_error_last_10": 126.12550129355427, "mean_abs_error_last_25": 174.51796111273873, "mean_abs_error_last_50": 199.88586613482187, "mean_pred_prob": 0.02821723874658346, "mean_pred_prob_last_10": 0.154053264670074, "mean_pred_prob_last_25": 0.08298739055171608, "mean_pred_prob_last_50": 0.048789120931178334, "mean_token_accuracy": 0.8731724858283997, "step": 19950 }, { "epoch": 0.3548255204166889, "grad_norm": 1.8102597221803791, "learning_rate": 0.0001, "loss": 0.94, "mean_abs_error": 495.36546041640014, "mean_abs_error_last_10": 105.07351546766097, "mean_abs_error_last_25": 170.79875805691245, "mean_abs_error_last_50": 332.7631977690236, "mean_pred_prob": 0.048407399794086814, "mean_pred_prob_last_10": 0.21392613053321838, "mean_pred_prob_last_25": 0.12959260474890472, "mean_pred_prob_last_50": 0.08093050490133465, "mean_token_accuracy": 0.8723914504051209, "step": 19960 }, { "epoch": 0.35500328871349085, "grad_norm": 4.874364793758953, "learning_rate": 0.0001, "loss": 0.8302, "mean_abs_error": 1166.1741675067074, "mean_abs_error_last_10": 728.1262443747639, "mean_abs_error_last_25": 818.9783187129858, "mean_abs_error_last_50": 944.0326160199918, "mean_pred_prob": 0.010971598290780094, "mean_pred_prob_last_10": 0.06866249705199153, "mean_pred_prob_last_25": 0.03327055351255694, "mean_pred_prob_last_50": 0.01909390620276099, "mean_token_accuracy": 0.8777611017227173, "step": 19970 }, { "epoch": 0.3551810570102928, "grad_norm": 1.2743060658489562, "learning_rate": 0.0001, "loss": 0.7989, "mean_abs_error": 906.7660388377857, "mean_abs_error_last_10": 561.4958401955671, "mean_abs_error_last_25": 608.3594826459001, "mean_abs_error_last_50": 626.5008215919017, "mean_pred_prob": 0.015847412578295916, "mean_pred_prob_last_10": 0.08061439030570909, "mean_pred_prob_last_25": 0.044547294406220315, "mean_pred_prob_last_50": 0.026514639030210674, "mean_token_accuracy": 0.8725549161434174, "step": 19980 }, { "epoch": 0.35535882530709473, "grad_norm": 1.972660824782507, "learning_rate": 0.0001, "loss": 0.8078, "mean_abs_error": 144.4550425832715, "mean_abs_error_last_10": 67.50433376085049, "mean_abs_error_last_25": 75.66790576907611, "mean_abs_error_last_50": 106.16766184129588, "mean_pred_prob": 0.0503361975774169, "mean_pred_prob_last_10": 0.25150791816413404, "mean_pred_prob_last_25": 0.14196857269853352, "mean_pred_prob_last_50": 0.0857904358766973, "mean_token_accuracy": 0.8673911392688751, "step": 19990 }, { "epoch": 0.35553659360389667, "grad_norm": 1.7419985292985487, "learning_rate": 0.0001, "loss": 0.8066, "mean_abs_error": 1491.890506594703, "mean_abs_error_last_10": 837.6681513949645, "mean_abs_error_last_25": 1023.3304999023119, "mean_abs_error_last_50": 1216.307004717777, "mean_pred_prob": 0.025859739619772882, "mean_pred_prob_last_10": 0.1333850882045226, "mean_pred_prob_last_25": 0.07321662761969491, "mean_pred_prob_last_50": 0.04305928213289008, "mean_token_accuracy": 0.8670256972312927, "step": 20000 }, { "epoch": 0.3557143619006986, "grad_norm": 1.5209033403204602, "learning_rate": 0.0001, "loss": 0.7951, "mean_abs_error": 695.2105604431248, "mean_abs_error_last_10": 100.58449979315164, "mean_abs_error_last_25": 163.86846955517646, "mean_abs_error_last_50": 373.22193074508465, "mean_pred_prob": 0.023973899125121535, "mean_pred_prob_last_10": 0.13733399230986834, "mean_pred_prob_last_25": 0.07205753792077303, "mean_pred_prob_last_50": 0.04251733096316457, "mean_token_accuracy": 0.8810430467128754, "step": 20010 }, { "epoch": 0.3558921301975006, "grad_norm": 1.5939096045376182, "learning_rate": 0.0001, "loss": 0.8434, "mean_abs_error": 296.65783770257144, "mean_abs_error_last_10": 66.23558413656848, "mean_abs_error_last_25": 91.37387621374975, "mean_abs_error_last_50": 173.47223782861718, "mean_pred_prob": 0.031708904821425674, "mean_pred_prob_last_10": 0.17140644416213036, "mean_pred_prob_last_25": 0.09014684557914734, "mean_pred_prob_last_50": 0.05395577028393746, "mean_token_accuracy": 0.8654798567295074, "step": 20020 }, { "epoch": 0.35606989849430254, "grad_norm": 3.274085017928998, "learning_rate": 0.0001, "loss": 0.8604, "mean_abs_error": 1193.3434363038455, "mean_abs_error_last_10": 777.5183176291433, "mean_abs_error_last_25": 744.0496385311093, "mean_abs_error_last_50": 862.0774646069885, "mean_pred_prob": 0.027788549789693206, "mean_pred_prob_last_10": 0.14272980902751442, "mean_pred_prob_last_25": 0.07874930546386168, "mean_pred_prob_last_50": 0.04752573538280558, "mean_token_accuracy": 0.8717715799808502, "step": 20030 }, { "epoch": 0.3562476667911045, "grad_norm": 1.3134088154232821, "learning_rate": 0.0001, "loss": 0.7017, "mean_abs_error": 86.08065766329075, "mean_abs_error_last_10": 40.389902268472234, "mean_abs_error_last_25": 38.61852933926444, "mean_abs_error_last_50": 54.074754684596975, "mean_pred_prob": 0.05839893780648708, "mean_pred_prob_last_10": 0.26714483126997945, "mean_pred_prob_last_25": 0.1577917668968439, "mean_pred_prob_last_50": 0.09691849984228611, "mean_token_accuracy": 0.8696475148200988, "step": 20040 }, { "epoch": 0.3564254350879064, "grad_norm": 1.2561604424969095, "learning_rate": 0.0001, "loss": 0.8308, "mean_abs_error": 502.8424111397885, "mean_abs_error_last_10": 231.9999585560783, "mean_abs_error_last_25": 272.60764036001206, "mean_abs_error_last_50": 275.0750306423452, "mean_pred_prob": 0.032142157503403726, "mean_pred_prob_last_10": 0.15601155757904053, "mean_pred_prob_last_25": 0.09064312451519072, "mean_pred_prob_last_50": 0.05380248916335404, "mean_token_accuracy": 0.8783552050590515, "step": 20050 }, { "epoch": 0.35660320338470836, "grad_norm": 1.4898590529767661, "learning_rate": 0.0001, "loss": 0.8507, "mean_abs_error": 876.3450722389889, "mean_abs_error_last_10": 428.284942146744, "mean_abs_error_last_25": 456.9817090267803, "mean_abs_error_last_50": 606.1922900802958, "mean_pred_prob": 0.03521408137748949, "mean_pred_prob_last_10": 0.1759371419902891, "mean_pred_prob_last_25": 0.09812401535164099, "mean_pred_prob_last_50": 0.06004530379723292, "mean_token_accuracy": 0.8736858308315277, "step": 20060 }, { "epoch": 0.3567809716815103, "grad_norm": 2.519628437163065, "learning_rate": 0.0001, "loss": 0.7522, "mean_abs_error": 837.09538912013, "mean_abs_error_last_10": 197.12245026305828, "mean_abs_error_last_25": 318.4571370240675, "mean_abs_error_last_50": 494.05474339823525, "mean_pred_prob": 0.03647277844138443, "mean_pred_prob_last_10": 0.18974782061995937, "mean_pred_prob_last_25": 0.1044387623318471, "mean_pred_prob_last_50": 0.06260076142498291, "mean_token_accuracy": 0.880811995267868, "step": 20070 }, { "epoch": 0.3569587399783123, "grad_norm": 1.403201341465868, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 386.8192638072129, "mean_abs_error_last_10": 96.03661047400617, "mean_abs_error_last_25": 217.33894705653455, "mean_abs_error_last_50": 250.84558367833034, "mean_pred_prob": 0.04036833129357546, "mean_pred_prob_last_10": 0.1907969828695059, "mean_pred_prob_last_25": 0.10870282845571637, "mean_pred_prob_last_50": 0.06741630989126861, "mean_token_accuracy": 0.8727561831474304, "step": 20080 }, { "epoch": 0.35713650827511423, "grad_norm": 1.9411522995803454, "learning_rate": 0.0001, "loss": 0.786, "mean_abs_error": 257.3064601000653, "mean_abs_error_last_10": 102.25853649529768, "mean_abs_error_last_25": 122.81558926840674, "mean_abs_error_last_50": 137.71122094064225, "mean_pred_prob": 0.05768391927704215, "mean_pred_prob_last_10": 0.2630709562450647, "mean_pred_prob_last_25": 0.15535921324044466, "mean_pred_prob_last_50": 0.09683803515508771, "mean_token_accuracy": 0.8736123263835907, "step": 20090 }, { "epoch": 0.35731427657191617, "grad_norm": 1.1003537101621221, "learning_rate": 0.0001, "loss": 0.8836, "mean_abs_error": 203.24008163383286, "mean_abs_error_last_10": 53.577329738857486, "mean_abs_error_last_25": 79.7927207575724, "mean_abs_error_last_50": 127.57481823490484, "mean_pred_prob": 0.04297165777534247, "mean_pred_prob_last_10": 0.20710904896259308, "mean_pred_prob_last_25": 0.12019704449921846, "mean_pred_prob_last_50": 0.07339146956801415, "mean_token_accuracy": 0.8712428271770477, "step": 20100 }, { "epoch": 0.3574920448687181, "grad_norm": 2.0542384339016446, "learning_rate": 0.0001, "loss": 0.7966, "mean_abs_error": 1171.1466708321943, "mean_abs_error_last_10": 852.9082706785427, "mean_abs_error_last_25": 974.2508596677868, "mean_abs_error_last_50": 1022.860979948887, "mean_pred_prob": 0.027425916923675686, "mean_pred_prob_last_10": 0.14501518205506728, "mean_pred_prob_last_25": 0.07659315723722102, "mean_pred_prob_last_50": 0.04673116138001206, "mean_token_accuracy": 0.874455201625824, "step": 20110 }, { "epoch": 0.35766981316552005, "grad_norm": 1.3932237053512844, "learning_rate": 0.0001, "loss": 0.7401, "mean_abs_error": 224.2209496787688, "mean_abs_error_last_10": 39.90735229443047, "mean_abs_error_last_25": 57.529494333870915, "mean_abs_error_last_50": 115.19874781909417, "mean_pred_prob": 0.03320985701866448, "mean_pred_prob_last_10": 0.173866561986506, "mean_pred_prob_last_25": 0.09684391003102064, "mean_pred_prob_last_50": 0.05750804096460342, "mean_token_accuracy": 0.8805912077426911, "step": 20120 }, { "epoch": 0.357847581462322, "grad_norm": 1.1445324650666917, "learning_rate": 0.0001, "loss": 0.784, "mean_abs_error": 314.63684284456673, "mean_abs_error_last_10": 123.09785550871706, "mean_abs_error_last_25": 159.93040641453427, "mean_abs_error_last_50": 185.3132589507415, "mean_pred_prob": 0.036342878639698026, "mean_pred_prob_last_10": 0.1701432129368186, "mean_pred_prob_last_25": 0.09691853420808912, "mean_pred_prob_last_50": 0.060195654584094885, "mean_token_accuracy": 0.8609475910663604, "step": 20130 }, { "epoch": 0.358025349759124, "grad_norm": 3.108464859587828, "learning_rate": 0.0001, "loss": 0.8835, "mean_abs_error": 430.08088347929606, "mean_abs_error_last_10": 99.15357300546373, "mean_abs_error_last_25": 159.07125394369655, "mean_abs_error_last_50": 232.19375147833202, "mean_pred_prob": 0.045056055393069984, "mean_pred_prob_last_10": 0.20936311669647695, "mean_pred_prob_last_25": 0.12355672447010875, "mean_pred_prob_last_50": 0.07502765720710158, "mean_token_accuracy": 0.8683322370052338, "step": 20140 }, { "epoch": 0.3582031180559259, "grad_norm": 1.864242863989603, "learning_rate": 0.0001, "loss": 0.7372, "mean_abs_error": 397.21338179560104, "mean_abs_error_last_10": 148.25339795547129, "mean_abs_error_last_25": 189.16927414971855, "mean_abs_error_last_50": 272.6970605100663, "mean_pred_prob": 0.03136753791477531, "mean_pred_prob_last_10": 0.16910657230764628, "mean_pred_prob_last_25": 0.08959925444796682, "mean_pred_prob_last_50": 0.053419326152652505, "mean_token_accuracy": 0.8721322238445282, "step": 20150 }, { "epoch": 0.35838088635272786, "grad_norm": 2.4391997851432743, "learning_rate": 0.0001, "loss": 0.9719, "mean_abs_error": 255.99728921426745, "mean_abs_error_last_10": 109.282266478172, "mean_abs_error_last_25": 160.62287726562857, "mean_abs_error_last_50": 211.4790066442468, "mean_pred_prob": 0.03486767546273768, "mean_pred_prob_last_10": 0.17523575872182845, "mean_pred_prob_last_25": 0.09784430461004376, "mean_pred_prob_last_50": 0.05926843555644155, "mean_token_accuracy": 0.8622652292251587, "step": 20160 }, { "epoch": 0.3585586546495298, "grad_norm": 1.4571257193314908, "learning_rate": 0.0001, "loss": 0.8794, "mean_abs_error": 183.3090288434575, "mean_abs_error_last_10": 46.9552953806935, "mean_abs_error_last_25": 83.79141607803606, "mean_abs_error_last_50": 99.31950146175095, "mean_pred_prob": 0.035133489314466713, "mean_pred_prob_last_10": 0.18983736634254456, "mean_pred_prob_last_25": 0.09848958123475313, "mean_pred_prob_last_50": 0.059754449501633645, "mean_token_accuracy": 0.8661264777183533, "step": 20170 }, { "epoch": 0.35873642294633173, "grad_norm": 2.124309862675158, "learning_rate": 0.0001, "loss": 0.7414, "mean_abs_error": 238.74749864159975, "mean_abs_error_last_10": 68.01827153207708, "mean_abs_error_last_25": 86.45311570783656, "mean_abs_error_last_50": 119.8356824550747, "mean_pred_prob": 0.053162584925303236, "mean_pred_prob_last_10": 0.2747981080552563, "mean_pred_prob_last_25": 0.15040192698361352, "mean_pred_prob_last_50": 0.0908799885539338, "mean_token_accuracy": 0.8838793516159058, "step": 20180 }, { "epoch": 0.3589141912431337, "grad_norm": 1.7056483157742028, "learning_rate": 0.0001, "loss": 0.8043, "mean_abs_error": 935.1151449008437, "mean_abs_error_last_10": 304.33665945277414, "mean_abs_error_last_25": 340.53023053622127, "mean_abs_error_last_50": 595.7777810803306, "mean_pred_prob": 0.030127567541785538, "mean_pred_prob_last_10": 0.14055276720318943, "mean_pred_prob_last_25": 0.08214721526019275, "mean_pred_prob_last_50": 0.050656583113595846, "mean_token_accuracy": 0.8737870752811432, "step": 20190 }, { "epoch": 0.35909195953993567, "grad_norm": 1.2222155127864929, "learning_rate": 0.0001, "loss": 0.8972, "mean_abs_error": 1587.8120573844494, "mean_abs_error_last_10": 1090.9325615876717, "mean_abs_error_last_25": 1176.7828868158233, "mean_abs_error_last_50": 1300.0756391744892, "mean_pred_prob": 0.023197067856017384, "mean_pred_prob_last_10": 0.11803190160026134, "mean_pred_prob_last_25": 0.0635242568958347, "mean_pred_prob_last_50": 0.039129029951436675, "mean_token_accuracy": 0.8758903086185456, "step": 20200 }, { "epoch": 0.3592697278367376, "grad_norm": 1.4640808301945034, "learning_rate": 0.0001, "loss": 0.7865, "mean_abs_error": 773.220035271108, "mean_abs_error_last_10": 268.1839719631452, "mean_abs_error_last_25": 366.0822123778266, "mean_abs_error_last_50": 497.63119723204375, "mean_pred_prob": 0.03907611099421047, "mean_pred_prob_last_10": 0.19181007270235567, "mean_pred_prob_last_25": 0.1071056176093407, "mean_pred_prob_last_50": 0.06634174046921544, "mean_token_accuracy": 0.8686724483966828, "step": 20210 }, { "epoch": 0.35944749613353955, "grad_norm": 1.1118729581947204, "learning_rate": 0.0001, "loss": 0.8584, "mean_abs_error": 599.2016804401896, "mean_abs_error_last_10": 144.8629439590249, "mean_abs_error_last_25": 235.94735028073, "mean_abs_error_last_50": 384.7144871064388, "mean_pred_prob": 0.022172456665430218, "mean_pred_prob_last_10": 0.1247999447863549, "mean_pred_prob_last_25": 0.064772192761302, "mean_pred_prob_last_50": 0.03837243730667979, "mean_token_accuracy": 0.8689752459526062, "step": 20220 }, { "epoch": 0.3596252644303415, "grad_norm": 1.225953623104951, "learning_rate": 0.0001, "loss": 0.8029, "mean_abs_error": 1154.792532717262, "mean_abs_error_last_10": 683.8108135125055, "mean_abs_error_last_25": 795.326478401172, "mean_abs_error_last_50": 902.1910210574003, "mean_pred_prob": 0.031471841865277385, "mean_pred_prob_last_10": 0.15158444966073148, "mean_pred_prob_last_25": 0.08640329839254264, "mean_pred_prob_last_50": 0.05385654317069566, "mean_token_accuracy": 0.8778860449790955, "step": 20230 }, { "epoch": 0.3598030327271434, "grad_norm": 2.3171286240920232, "learning_rate": 0.0001, "loss": 0.9012, "mean_abs_error": 2544.8165855002044, "mean_abs_error_last_10": 1735.7758882530538, "mean_abs_error_last_25": 1838.3416272204893, "mean_abs_error_last_50": 2016.9865782009279, "mean_pred_prob": 0.0149363515265577, "mean_pred_prob_last_10": 0.0740747706455295, "mean_pred_prob_last_25": 0.039824452198081416, "mean_pred_prob_last_50": 0.025052205993415556, "mean_token_accuracy": 0.8724314272403717, "step": 20240 }, { "epoch": 0.35998080102394536, "grad_norm": 1.632644674436217, "learning_rate": 0.0001, "loss": 0.8695, "mean_abs_error": 317.9893554749281, "mean_abs_error_last_10": 91.39209882921577, "mean_abs_error_last_25": 130.59692924829193, "mean_abs_error_last_50": 188.79678209103753, "mean_pred_prob": 0.04295142695773393, "mean_pred_prob_last_10": 0.20375063549727201, "mean_pred_prob_last_25": 0.11643543969839812, "mean_pred_prob_last_50": 0.07214467148296536, "mean_token_accuracy": 0.8629110991954804, "step": 20250 }, { "epoch": 0.36015856932074736, "grad_norm": 1.244401611606446, "learning_rate": 0.0001, "loss": 0.8708, "mean_abs_error": 731.285028194221, "mean_abs_error_last_10": 335.32713435181984, "mean_abs_error_last_25": 347.2880413503817, "mean_abs_error_last_50": 439.5471843991448, "mean_pred_prob": 0.03291722124558873, "mean_pred_prob_last_10": 0.15872554072411732, "mean_pred_prob_last_25": 0.09083689707331359, "mean_pred_prob_last_50": 0.05619111183914356, "mean_token_accuracy": 0.862561047077179, "step": 20260 }, { "epoch": 0.3603363376175493, "grad_norm": 1.610711401343483, "learning_rate": 0.0001, "loss": 0.8999, "mean_abs_error": 585.3637118443643, "mean_abs_error_last_10": 246.8298470266333, "mean_abs_error_last_25": 300.1518443573179, "mean_abs_error_last_50": 398.9731178400128, "mean_pred_prob": 0.05032084165140986, "mean_pred_prob_last_10": 0.2373688860971015, "mean_pred_prob_last_25": 0.13613863860664424, "mean_pred_prob_last_50": 0.08369427392899524, "mean_token_accuracy": 0.8692204058170319, "step": 20270 }, { "epoch": 0.36051410591435123, "grad_norm": 1.9587235736349702, "learning_rate": 0.0001, "loss": 0.745, "mean_abs_error": 508.655085370289, "mean_abs_error_last_10": 102.92608298794153, "mean_abs_error_last_25": 167.0918178630144, "mean_abs_error_last_50": 268.98645572819544, "mean_pred_prob": 0.04525285626295954, "mean_pred_prob_last_10": 0.2120736427605152, "mean_pred_prob_last_25": 0.12211947133764625, "mean_pred_prob_last_50": 0.07557850363664328, "mean_token_accuracy": 0.8830235540866852, "step": 20280 }, { "epoch": 0.3606918742111532, "grad_norm": 3.1195375289551546, "learning_rate": 0.0001, "loss": 0.8176, "mean_abs_error": 633.0778944540797, "mean_abs_error_last_10": 143.79315339965024, "mean_abs_error_last_25": 223.17024954504555, "mean_abs_error_last_50": 351.06689030473103, "mean_pred_prob": 0.04021305434871465, "mean_pred_prob_last_10": 0.20794380152365194, "mean_pred_prob_last_25": 0.11222559653688222, "mean_pred_prob_last_50": 0.06739474888308905, "mean_token_accuracy": 0.8714169800281525, "step": 20290 }, { "epoch": 0.3608696425079551, "grad_norm": 1.3769449087621313, "learning_rate": 0.0001, "loss": 0.7727, "mean_abs_error": 835.7817092939895, "mean_abs_error_last_10": 276.3554761289423, "mean_abs_error_last_25": 369.41666241982387, "mean_abs_error_last_50": 552.0410045193884, "mean_pred_prob": 0.052091558909160084, "mean_pred_prob_last_10": 0.24753786841174588, "mean_pred_prob_last_25": 0.14310789118753747, "mean_pred_prob_last_50": 0.08847838730434887, "mean_token_accuracy": 0.8720468819141388, "step": 20300 }, { "epoch": 0.3610474108047571, "grad_norm": 1.6241444324742775, "learning_rate": 0.0001, "loss": 0.8397, "mean_abs_error": 300.77252404307137, "mean_abs_error_last_10": 78.26431105914827, "mean_abs_error_last_25": 149.7606484854286, "mean_abs_error_last_50": 211.35637991555095, "mean_pred_prob": 0.041755758505314586, "mean_pred_prob_last_10": 0.19896924383938314, "mean_pred_prob_last_25": 0.11238776678219438, "mean_pred_prob_last_50": 0.06963090747594833, "mean_token_accuracy": 0.8713684737682342, "step": 20310 }, { "epoch": 0.36122517910155905, "grad_norm": 1.51885979830375, "learning_rate": 0.0001, "loss": 0.8154, "mean_abs_error": 530.005057592208, "mean_abs_error_last_10": 201.26620490452493, "mean_abs_error_last_25": 220.20192419157237, "mean_abs_error_last_50": 285.90603190528935, "mean_pred_prob": 0.03491814116714522, "mean_pred_prob_last_10": 0.15991063034161926, "mean_pred_prob_last_25": 0.09340965242590756, "mean_pred_prob_last_50": 0.057993122120387854, "mean_token_accuracy": 0.8756175577640534, "step": 20320 }, { "epoch": 0.361402947398361, "grad_norm": 9.335987552802749, "learning_rate": 0.0001, "loss": 0.8377, "mean_abs_error": 460.549316529063, "mean_abs_error_last_10": 258.8608410002896, "mean_abs_error_last_25": 313.72857452194637, "mean_abs_error_last_50": 330.6013250021974, "mean_pred_prob": 0.025004468695260584, "mean_pred_prob_last_10": 0.13932554014027118, "mean_pred_prob_last_25": 0.07373517518863082, "mean_pred_prob_last_50": 0.04341734577901661, "mean_token_accuracy": 0.8570919871330261, "step": 20330 }, { "epoch": 0.3615807156951629, "grad_norm": 1.16630082643915, "learning_rate": 0.0001, "loss": 0.846, "mean_abs_error": 718.7729131888149, "mean_abs_error_last_10": 289.2046289943597, "mean_abs_error_last_25": 405.71603938249143, "mean_abs_error_last_50": 570.0120976777167, "mean_pred_prob": 0.04094158921507187, "mean_pred_prob_last_10": 0.17896230021724477, "mean_pred_prob_last_25": 0.10818601421779021, "mean_pred_prob_last_50": 0.06825221057515592, "mean_token_accuracy": 0.8632021367549896, "step": 20340 }, { "epoch": 0.36175848399196486, "grad_norm": 1.873672836448615, "learning_rate": 0.0001, "loss": 0.8751, "mean_abs_error": 664.3407587213015, "mean_abs_error_last_10": 96.81853856146076, "mean_abs_error_last_25": 201.86628742430733, "mean_abs_error_last_50": 402.8580903647735, "mean_pred_prob": 0.03646797824185342, "mean_pred_prob_last_10": 0.16576106785796582, "mean_pred_prob_last_25": 0.09462566757574678, "mean_pred_prob_last_50": 0.060268389503471555, "mean_token_accuracy": 0.8613707482814789, "step": 20350 }, { "epoch": 0.3619362522887668, "grad_norm": 1.8652987617982728, "learning_rate": 0.0001, "loss": 0.7751, "mean_abs_error": 734.5556643568524, "mean_abs_error_last_10": 241.11937446370615, "mean_abs_error_last_25": 314.2851204960673, "mean_abs_error_last_50": 455.7458959559561, "mean_pred_prob": 0.031902950094081464, "mean_pred_prob_last_10": 0.15655229387339203, "mean_pred_prob_last_25": 0.08720590227749199, "mean_pred_prob_last_50": 0.05412589703919366, "mean_token_accuracy": 0.880362045764923, "step": 20360 }, { "epoch": 0.3621140205855688, "grad_norm": 1.2005966893399087, "learning_rate": 0.0001, "loss": 0.8685, "mean_abs_error": 953.5714434815202, "mean_abs_error_last_10": 501.6055911312562, "mean_abs_error_last_25": 601.1879454502289, "mean_abs_error_last_50": 660.6088354421657, "mean_pred_prob": 0.01992869942623656, "mean_pred_prob_last_10": 0.10252428647945636, "mean_pred_prob_last_25": 0.0539967987977434, "mean_pred_prob_last_50": 0.03311342718952801, "mean_token_accuracy": 0.8712468087673187, "step": 20370 }, { "epoch": 0.36229178888237074, "grad_norm": 0.9160883209916973, "learning_rate": 0.0001, "loss": 0.7675, "mean_abs_error": 923.8457924287198, "mean_abs_error_last_10": 591.578838595511, "mean_abs_error_last_25": 662.627568183532, "mean_abs_error_last_50": 731.8862518662393, "mean_pred_prob": 0.029721261640952436, "mean_pred_prob_last_10": 0.13182262282061857, "mean_pred_prob_last_25": 0.07719924571574666, "mean_pred_prob_last_50": 0.04849853083869675, "mean_token_accuracy": 0.8762546300888061, "step": 20380 }, { "epoch": 0.3624695571791727, "grad_norm": 1.6965683855554998, "learning_rate": 0.0001, "loss": 0.7987, "mean_abs_error": 615.4444879673479, "mean_abs_error_last_10": 211.7557721363941, "mean_abs_error_last_25": 316.47455102998066, "mean_abs_error_last_50": 408.30099152806054, "mean_pred_prob": 0.03654200751625467, "mean_pred_prob_last_10": 0.17864171984838323, "mean_pred_prob_last_25": 0.10402549558202737, "mean_pred_prob_last_50": 0.0626592553977389, "mean_token_accuracy": 0.8677019655704499, "step": 20390 }, { "epoch": 0.3626473254759746, "grad_norm": 1.4128358274986075, "learning_rate": 0.0001, "loss": 0.7353, "mean_abs_error": 438.2127772288949, "mean_abs_error_last_10": 180.4358574384327, "mean_abs_error_last_25": 232.516036266194, "mean_abs_error_last_50": 270.1114695799442, "mean_pred_prob": 0.03281744219129905, "mean_pred_prob_last_10": 0.16900230711326003, "mean_pred_prob_last_25": 0.09219039115123451, "mean_pred_prob_last_50": 0.05598220301326364, "mean_token_accuracy": 0.880646574497223, "step": 20400 }, { "epoch": 0.36282509377277655, "grad_norm": 1.5758935280056885, "learning_rate": 0.0001, "loss": 0.7342, "mean_abs_error": 151.39436297335632, "mean_abs_error_last_10": 31.940364633225574, "mean_abs_error_last_25": 62.52973767403405, "mean_abs_error_last_50": 93.10385012912181, "mean_pred_prob": 0.04783004675991833, "mean_pred_prob_last_10": 0.2305448167026043, "mean_pred_prob_last_25": 0.129302679002285, "mean_pred_prob_last_50": 0.08072480428963899, "mean_token_accuracy": 0.8849444925785065, "step": 20410 }, { "epoch": 0.3630028620695785, "grad_norm": 1.4838274343816769, "learning_rate": 0.0001, "loss": 0.8416, "mean_abs_error": 1071.9865345458606, "mean_abs_error_last_10": 312.7846181676709, "mean_abs_error_last_25": 413.03726607837405, "mean_abs_error_last_50": 575.0030427923972, "mean_pred_prob": 0.023692628197022714, "mean_pred_prob_last_10": 0.12795222903368994, "mean_pred_prob_last_25": 0.0690547104459256, "mean_pred_prob_last_50": 0.040473585034487766, "mean_token_accuracy": 0.869438499212265, "step": 20420 }, { "epoch": 0.3631806303663805, "grad_norm": 1.667025918877656, "learning_rate": 0.0001, "loss": 0.7793, "mean_abs_error": 383.48955365739977, "mean_abs_error_last_10": 261.01705916408554, "mean_abs_error_last_25": 266.7684817390446, "mean_abs_error_last_50": 266.3568425387894, "mean_pred_prob": 0.02543583211954683, "mean_pred_prob_last_10": 0.11610120099503547, "mean_pred_prob_last_25": 0.0663301148917526, "mean_pred_prob_last_50": 0.04087361415149644, "mean_token_accuracy": 0.8772284328937531, "step": 20430 }, { "epoch": 0.3633583986631824, "grad_norm": 1.5587677292078836, "learning_rate": 0.0001, "loss": 0.7992, "mean_abs_error": 266.74444457947453, "mean_abs_error_last_10": 95.46155520271023, "mean_abs_error_last_25": 148.55158753301126, "mean_abs_error_last_50": 170.03318335831074, "mean_pred_prob": 0.02713723639026284, "mean_pred_prob_last_10": 0.1456984354183078, "mean_pred_prob_last_25": 0.07805735804140568, "mean_pred_prob_last_50": 0.04649194362573326, "mean_token_accuracy": 0.875922030210495, "step": 20440 }, { "epoch": 0.36353616695998436, "grad_norm": 1.5489991761757715, "learning_rate": 0.0001, "loss": 0.8085, "mean_abs_error": 609.168430689398, "mean_abs_error_last_10": 182.3074096010244, "mean_abs_error_last_25": 241.8148558951671, "mean_abs_error_last_50": 380.4838256881974, "mean_pred_prob": 0.0517733623215463, "mean_pred_prob_last_10": 0.2565938192536123, "mean_pred_prob_last_25": 0.14409778354456648, "mean_pred_prob_last_50": 0.0879543338145595, "mean_token_accuracy": 0.8817089200019836, "step": 20450 }, { "epoch": 0.3637139352567863, "grad_norm": 1.435208853867919, "learning_rate": 0.0001, "loss": 0.7845, "mean_abs_error": 456.93155428817374, "mean_abs_error_last_10": 263.1877871962123, "mean_abs_error_last_25": 281.58150893432463, "mean_abs_error_last_50": 315.3003043382447, "mean_pred_prob": 0.04907262691413052, "mean_pred_prob_last_10": 0.24224143851315602, "mean_pred_prob_last_25": 0.13846615036018192, "mean_pred_prob_last_50": 0.08371799201704562, "mean_token_accuracy": 0.8697437465190887, "step": 20460 }, { "epoch": 0.36389170355358824, "grad_norm": 1.3076493196301056, "learning_rate": 0.0001, "loss": 0.9631, "mean_abs_error": 890.6656243124582, "mean_abs_error_last_10": 367.5208078850159, "mean_abs_error_last_25": 435.2388755509057, "mean_abs_error_last_50": 619.9783228438416, "mean_pred_prob": 0.028571143076987937, "mean_pred_prob_last_10": 0.15288877432467415, "mean_pred_prob_last_25": 0.08213185105123558, "mean_pred_prob_last_50": 0.04896647472633049, "mean_token_accuracy": 0.8558753371238709, "step": 20470 }, { "epoch": 0.3640694718503902, "grad_norm": 1.2017987227697742, "learning_rate": 0.0001, "loss": 0.6882, "mean_abs_error": 661.207988933922, "mean_abs_error_last_10": 304.70309018995783, "mean_abs_error_last_25": 331.1620289483366, "mean_abs_error_last_50": 366.0386912878004, "mean_pred_prob": 0.029572593199554832, "mean_pred_prob_last_10": 0.14463957571424543, "mean_pred_prob_last_25": 0.0812369802268222, "mean_pred_prob_last_50": 0.049804520909674466, "mean_token_accuracy": 0.8690249681472778, "step": 20480 }, { "epoch": 0.3642472401471922, "grad_norm": 2.3672143442793105, "learning_rate": 0.0001, "loss": 0.8186, "mean_abs_error": 360.86186731565647, "mean_abs_error_last_10": 119.90340180494266, "mean_abs_error_last_25": 148.22661840422316, "mean_abs_error_last_50": 217.08545942293804, "mean_pred_prob": 0.038668125355616215, "mean_pred_prob_last_10": 0.18924725744873286, "mean_pred_prob_last_25": 0.10516050811856985, "mean_pred_prob_last_50": 0.06497520552948118, "mean_token_accuracy": 0.8709498047828674, "step": 20490 }, { "epoch": 0.3644250084439941, "grad_norm": 1.8643178387669532, "learning_rate": 0.0001, "loss": 0.7803, "mean_abs_error": 462.8895021441729, "mean_abs_error_last_10": 252.95350512161204, "mean_abs_error_last_25": 352.92350100344663, "mean_abs_error_last_50": 444.04260875126266, "mean_pred_prob": 0.025335133331827818, "mean_pred_prob_last_10": 0.12961319107562302, "mean_pred_prob_last_25": 0.07009194782003761, "mean_pred_prob_last_50": 0.04271733057685197, "mean_token_accuracy": 0.879252290725708, "step": 20500 }, { "epoch": 0.36460277674079605, "grad_norm": 1.2993665224286308, "learning_rate": 0.0001, "loss": 0.8093, "mean_abs_error": 863.6588050851271, "mean_abs_error_last_10": 353.44513852893067, "mean_abs_error_last_25": 418.9823536566097, "mean_abs_error_last_50": 600.8128514315084, "mean_pred_prob": 0.026021213072817774, "mean_pred_prob_last_10": 0.12594143303576857, "mean_pred_prob_last_25": 0.07136967153055593, "mean_pred_prob_last_50": 0.043692256725626065, "mean_token_accuracy": 0.8694261372089386, "step": 20510 }, { "epoch": 0.364780545037598, "grad_norm": 1.2645978190750784, "learning_rate": 0.0001, "loss": 0.8263, "mean_abs_error": 429.85535570642395, "mean_abs_error_last_10": 119.49592146236, "mean_abs_error_last_25": 132.88076010680993, "mean_abs_error_last_50": 236.74878963880874, "mean_pred_prob": 0.03218860304914415, "mean_pred_prob_last_10": 0.1508847050368786, "mean_pred_prob_last_25": 0.08617962710559368, "mean_pred_prob_last_50": 0.05405517322942614, "mean_token_accuracy": 0.8685651004314423, "step": 20520 }, { "epoch": 0.36495831333439993, "grad_norm": 4.08576455199023, "learning_rate": 0.0001, "loss": 0.8062, "mean_abs_error": 215.467206849633, "mean_abs_error_last_10": 58.1051457625842, "mean_abs_error_last_25": 102.96835350751515, "mean_abs_error_last_50": 153.09368031311493, "mean_pred_prob": 0.04509275932796299, "mean_pred_prob_last_10": 0.21791037321090698, "mean_pred_prob_last_25": 0.12246953630819916, "mean_pred_prob_last_50": 0.07514373920857906, "mean_token_accuracy": 0.8690460383892059, "step": 20530 }, { "epoch": 0.36513608163120187, "grad_norm": 1.7369062066834708, "learning_rate": 0.0001, "loss": 0.9577, "mean_abs_error": 187.95157822689114, "mean_abs_error_last_10": 23.01622130726334, "mean_abs_error_last_25": 68.80689616703792, "mean_abs_error_last_50": 122.96262063978853, "mean_pred_prob": 0.05223926389589906, "mean_pred_prob_last_10": 0.23916865587234498, "mean_pred_prob_last_25": 0.1373579151928425, "mean_pred_prob_last_50": 0.08655927758663892, "mean_token_accuracy": 0.8708144426345825, "step": 20540 }, { "epoch": 0.36531384992800386, "grad_norm": 2.430243524276294, "learning_rate": 0.0001, "loss": 0.9355, "mean_abs_error": 668.5543323597428, "mean_abs_error_last_10": 247.0184406099172, "mean_abs_error_last_25": 424.30047543217097, "mean_abs_error_last_50": 489.1251319483904, "mean_pred_prob": 0.02619283804669976, "mean_pred_prob_last_10": 0.14570606611669062, "mean_pred_prob_last_25": 0.07603885279968381, "mean_pred_prob_last_50": 0.045182976359501484, "mean_token_accuracy": 0.8743898332118988, "step": 20550 }, { "epoch": 0.3654916182248058, "grad_norm": 1.9269467576985126, "learning_rate": 0.0001, "loss": 0.7873, "mean_abs_error": 280.8799340444425, "mean_abs_error_last_10": 163.6104555180936, "mean_abs_error_last_25": 162.78592273090726, "mean_abs_error_last_50": 168.43964259411075, "mean_pred_prob": 0.03858672971837222, "mean_pred_prob_last_10": 0.19683519415557385, "mean_pred_prob_last_25": 0.10603036424145103, "mean_pred_prob_last_50": 0.064656514627859, "mean_token_accuracy": 0.8800005555152893, "step": 20560 }, { "epoch": 0.36566938652160774, "grad_norm": 2.81916873814415, "learning_rate": 0.0001, "loss": 0.8798, "mean_abs_error": 355.0447357072282, "mean_abs_error_last_10": 69.92076849696613, "mean_abs_error_last_25": 142.16757451771795, "mean_abs_error_last_50": 254.98961434468302, "mean_pred_prob": 0.041336849331855774, "mean_pred_prob_last_10": 0.1971980534493923, "mean_pred_prob_last_25": 0.1129941863939166, "mean_pred_prob_last_50": 0.06960116541013121, "mean_token_accuracy": 0.8737178266048431, "step": 20570 }, { "epoch": 0.3658471548184097, "grad_norm": 3.0882966967251, "learning_rate": 0.0001, "loss": 0.8735, "mean_abs_error": 695.4397417518796, "mean_abs_error_last_10": 123.0385287031568, "mean_abs_error_last_25": 183.93302776645075, "mean_abs_error_last_50": 309.5012866127933, "mean_pred_prob": 0.029411375743802638, "mean_pred_prob_last_10": 0.16045691643375903, "mean_pred_prob_last_25": 0.08570299157872796, "mean_pred_prob_last_50": 0.050606961757875976, "mean_token_accuracy": 0.8730454802513122, "step": 20580 }, { "epoch": 0.3660249231152116, "grad_norm": 1.5772921117104997, "learning_rate": 0.0001, "loss": 0.8227, "mean_abs_error": 796.073274370568, "mean_abs_error_last_10": 275.4612516606385, "mean_abs_error_last_25": 406.8520829791513, "mean_abs_error_last_50": 562.3167814898965, "mean_pred_prob": 0.030538042364059947, "mean_pred_prob_last_10": 0.1722300689900294, "mean_pred_prob_last_25": 0.08970581380417571, "mean_pred_prob_last_50": 0.052637154166586694, "mean_token_accuracy": 0.8769575715065002, "step": 20590 }, { "epoch": 0.36620269141201356, "grad_norm": 2.1991432389551515, "learning_rate": 0.0001, "loss": 0.8343, "mean_abs_error": 1159.5442841350855, "mean_abs_error_last_10": 586.6988897475027, "mean_abs_error_last_25": 655.8658233986531, "mean_abs_error_last_50": 821.9763157817515, "mean_pred_prob": 0.05240913280504174, "mean_pred_prob_last_10": 0.24288652319664833, "mean_pred_prob_last_25": 0.14415880598826333, "mean_pred_prob_last_50": 0.08910886417288566, "mean_token_accuracy": 0.8747866868972778, "step": 20600 }, { "epoch": 0.36638045970881555, "grad_norm": 1.1305141975658575, "learning_rate": 0.0001, "loss": 0.6524, "mean_abs_error": 838.5203707565612, "mean_abs_error_last_10": 494.1348482219777, "mean_abs_error_last_25": 558.7489615431734, "mean_abs_error_last_50": 641.2783177525503, "mean_pred_prob": 0.0498513703874778, "mean_pred_prob_last_10": 0.20700718360603787, "mean_pred_prob_last_25": 0.12439559389604256, "mean_pred_prob_last_50": 0.0809071121562738, "mean_token_accuracy": 0.8822075366973877, "step": 20610 }, { "epoch": 0.3665582280056175, "grad_norm": 1.9151968856458164, "learning_rate": 0.0001, "loss": 0.7766, "mean_abs_error": 201.5986802430328, "mean_abs_error_last_10": 114.62604224853305, "mean_abs_error_last_25": 93.1150375244586, "mean_abs_error_last_50": 111.58763303869424, "mean_pred_prob": 0.03558335802517831, "mean_pred_prob_last_10": 0.1826583757996559, "mean_pred_prob_last_25": 0.0987712349742651, "mean_pred_prob_last_50": 0.060683187656104566, "mean_token_accuracy": 0.8723383367061615, "step": 20620 }, { "epoch": 0.36673599630241943, "grad_norm": 2.1837136652257145, "learning_rate": 0.0001, "loss": 0.8088, "mean_abs_error": 873.8996178063326, "mean_abs_error_last_10": 477.1182348711944, "mean_abs_error_last_25": 544.1394772612659, "mean_abs_error_last_50": 669.7745126737733, "mean_pred_prob": 0.04250797917338787, "mean_pred_prob_last_10": 0.21834026370197535, "mean_pred_prob_last_25": 0.11974156469805167, "mean_pred_prob_last_50": 0.07204943969263695, "mean_token_accuracy": 0.8712177455425263, "step": 20630 }, { "epoch": 0.36691376459922137, "grad_norm": 1.5563973784966434, "learning_rate": 0.0001, "loss": 0.8087, "mean_abs_error": 1181.6902457929925, "mean_abs_error_last_10": 714.822033509494, "mean_abs_error_last_25": 800.1678442442942, "mean_abs_error_last_50": 953.4917207717447, "mean_pred_prob": 0.032362814451335, "mean_pred_prob_last_10": 0.15956290500762407, "mean_pred_prob_last_25": 0.09002965944964672, "mean_pred_prob_last_50": 0.05444918660214171, "mean_token_accuracy": 0.8771683752536774, "step": 20640 }, { "epoch": 0.3670915328960233, "grad_norm": 3.0588686213172926, "learning_rate": 0.0001, "loss": 0.8736, "mean_abs_error": 1313.5556504067922, "mean_abs_error_last_10": 777.0643527427487, "mean_abs_error_last_25": 881.258739079806, "mean_abs_error_last_50": 1009.5950842777487, "mean_pred_prob": 0.03245447349763708, "mean_pred_prob_last_10": 0.1503521773192915, "mean_pred_prob_last_25": 0.08613154862541705, "mean_pred_prob_last_50": 0.05409096739458619, "mean_token_accuracy": 0.8703018724918365, "step": 20650 }, { "epoch": 0.36726930119282525, "grad_norm": 1.0666443493057918, "learning_rate": 0.0001, "loss": 0.7966, "mean_abs_error": 625.62462278356, "mean_abs_error_last_10": 260.74900247929565, "mean_abs_error_last_25": 364.07838274675527, "mean_abs_error_last_50": 464.37017761224644, "mean_pred_prob": 0.04368352082092315, "mean_pred_prob_last_10": 0.20571067351847888, "mean_pred_prob_last_25": 0.11161703122779727, "mean_pred_prob_last_50": 0.07117897290736437, "mean_token_accuracy": 0.8720964074134827, "step": 20660 }, { "epoch": 0.36744706948962724, "grad_norm": 2.255194922168669, "learning_rate": 0.0001, "loss": 0.8615, "mean_abs_error": 329.7368125887202, "mean_abs_error_last_10": 177.1380303701879, "mean_abs_error_last_25": 235.38737937575866, "mean_abs_error_last_50": 271.47663103997877, "mean_pred_prob": 0.02859416245482862, "mean_pred_prob_last_10": 0.13019070122390985, "mean_pred_prob_last_25": 0.07437404952943324, "mean_pred_prob_last_50": 0.04641919881105423, "mean_token_accuracy": 0.8709878206253052, "step": 20670 }, { "epoch": 0.3676248377864292, "grad_norm": 0.8705074133418238, "learning_rate": 0.0001, "loss": 0.916, "mean_abs_error": 414.03672146298334, "mean_abs_error_last_10": 77.76510978829387, "mean_abs_error_last_25": 177.9526119918944, "mean_abs_error_last_50": 259.22729124732325, "mean_pred_prob": 0.03567525618709624, "mean_pred_prob_last_10": 0.17447883263230324, "mean_pred_prob_last_25": 0.09687574105337263, "mean_pred_prob_last_50": 0.060015497636049984, "mean_token_accuracy": 0.860787981748581, "step": 20680 }, { "epoch": 0.3678026060832311, "grad_norm": 2.120313195292596, "learning_rate": 0.0001, "loss": 0.8707, "mean_abs_error": 716.293889842881, "mean_abs_error_last_10": 361.46846391619323, "mean_abs_error_last_25": 404.3766326053539, "mean_abs_error_last_50": 492.0084355440643, "mean_pred_prob": 0.032495458360062915, "mean_pred_prob_last_10": 0.17723124035983345, "mean_pred_prob_last_25": 0.09357227345753927, "mean_pred_prob_last_50": 0.05645864657708444, "mean_token_accuracy": 0.8712742626667023, "step": 20690 }, { "epoch": 0.36798037438003306, "grad_norm": 2.1568413172772662, "learning_rate": 0.0001, "loss": 0.8153, "mean_abs_error": 301.05391950719496, "mean_abs_error_last_10": 53.823770830763145, "mean_abs_error_last_25": 103.99829807378053, "mean_abs_error_last_50": 157.39289381743583, "mean_pred_prob": 0.03370495941489935, "mean_pred_prob_last_10": 0.18307205382734537, "mean_pred_prob_last_25": 0.09733883747830988, "mean_pred_prob_last_50": 0.05791420596651733, "mean_token_accuracy": 0.8720989644527435, "step": 20700 }, { "epoch": 0.368158142676835, "grad_norm": 1.7873795819948182, "learning_rate": 0.0001, "loss": 0.7586, "mean_abs_error": 656.2910103334941, "mean_abs_error_last_10": 174.63763918757257, "mean_abs_error_last_25": 240.22895289548873, "mean_abs_error_last_50": 370.00534991781524, "mean_pred_prob": 0.04038556473387871, "mean_pred_prob_last_10": 0.1925836459733546, "mean_pred_prob_last_25": 0.11021733078523539, "mean_pred_prob_last_50": 0.06774819738348015, "mean_token_accuracy": 0.8760346531867981, "step": 20710 }, { "epoch": 0.36833591097363694, "grad_norm": 1.6977081075999065, "learning_rate": 0.0001, "loss": 0.7905, "mean_abs_error": 1004.8885884548047, "mean_abs_error_last_10": 679.4197281303103, "mean_abs_error_last_25": 678.8289334964827, "mean_abs_error_last_50": 780.4937762570759, "mean_pred_prob": 0.0275169530956191, "mean_pred_prob_last_10": 0.14848525122797582, "mean_pred_prob_last_25": 0.07762274946726393, "mean_pred_prob_last_50": 0.04651064436475281, "mean_token_accuracy": 0.883169162273407, "step": 20720 }, { "epoch": 0.36851367927043893, "grad_norm": 0.8209159803582997, "learning_rate": 0.0001, "loss": 0.7373, "mean_abs_error": 235.45440396487274, "mean_abs_error_last_10": 25.993582984571425, "mean_abs_error_last_25": 60.73248063507375, "mean_abs_error_last_50": 169.67633698204745, "mean_pred_prob": 0.054682857869192955, "mean_pred_prob_last_10": 0.2674506030976772, "mean_pred_prob_last_25": 0.15316078253090382, "mean_pred_prob_last_50": 0.09242027755826712, "mean_token_accuracy": 0.8767334282398224, "step": 20730 }, { "epoch": 0.36869144756724087, "grad_norm": 1.145434580073363, "learning_rate": 0.0001, "loss": 0.7347, "mean_abs_error": 151.93045457426052, "mean_abs_error_last_10": 30.255909298214554, "mean_abs_error_last_25": 47.90366715933735, "mean_abs_error_last_50": 74.64839020253648, "mean_pred_prob": 0.05601482130587101, "mean_pred_prob_last_10": 0.24317182078957558, "mean_pred_prob_last_25": 0.14821882471442221, "mean_pred_prob_last_50": 0.09069924354553223, "mean_token_accuracy": 0.8593495547771454, "step": 20740 }, { "epoch": 0.3688692158640428, "grad_norm": 1.9545070737051078, "learning_rate": 0.0001, "loss": 0.7359, "mean_abs_error": 354.1424209013995, "mean_abs_error_last_10": 127.84862956443126, "mean_abs_error_last_25": 185.27546038994234, "mean_abs_error_last_50": 247.40202282499337, "mean_pred_prob": 0.033171700313687325, "mean_pred_prob_last_10": 0.16986479014158248, "mean_pred_prob_last_25": 0.0929805476218462, "mean_pred_prob_last_50": 0.056077917898073794, "mean_token_accuracy": 0.8818339943885803, "step": 20750 }, { "epoch": 0.36904698416084475, "grad_norm": 1.7540541276816237, "learning_rate": 0.0001, "loss": 0.8252, "mean_abs_error": 297.12906369524904, "mean_abs_error_last_10": 123.07363552834597, "mean_abs_error_last_25": 136.44082492780527, "mean_abs_error_last_50": 184.15113850954413, "mean_pred_prob": 0.0330876755528152, "mean_pred_prob_last_10": 0.15398881286382676, "mean_pred_prob_last_25": 0.0881054736673832, "mean_pred_prob_last_50": 0.05529200723394752, "mean_token_accuracy": 0.8808652281761169, "step": 20760 }, { "epoch": 0.3692247524576467, "grad_norm": 1.1705877151798112, "learning_rate": 0.0001, "loss": 0.7762, "mean_abs_error": 482.4600399909467, "mean_abs_error_last_10": 180.53624717232086, "mean_abs_error_last_25": 239.44780170243308, "mean_abs_error_last_50": 329.66087869407227, "mean_pred_prob": 0.03138349442742765, "mean_pred_prob_last_10": 0.1621014297939837, "mean_pred_prob_last_25": 0.08859346224926412, "mean_pred_prob_last_50": 0.053849300183355805, "mean_token_accuracy": 0.8742718636989594, "step": 20770 }, { "epoch": 0.3694025207544486, "grad_norm": 1.0459114644960572, "learning_rate": 0.0001, "loss": 0.8462, "mean_abs_error": 564.660742566619, "mean_abs_error_last_10": 152.03617649386996, "mean_abs_error_last_25": 232.45761073349905, "mean_abs_error_last_50": 351.1202162381236, "mean_pred_prob": 0.04150888965814374, "mean_pred_prob_last_10": 0.19694275020156055, "mean_pred_prob_last_25": 0.10945085457060486, "mean_pred_prob_last_50": 0.0691717371926643, "mean_token_accuracy": 0.8667223751544952, "step": 20780 }, { "epoch": 0.3695802890512506, "grad_norm": 0.9939694562512983, "learning_rate": 0.0001, "loss": 0.8001, "mean_abs_error": 556.2869740827423, "mean_abs_error_last_10": 254.3177973449675, "mean_abs_error_last_25": 262.7638091931407, "mean_abs_error_last_50": 285.87053799581633, "mean_pred_prob": 0.0419779660878703, "mean_pred_prob_last_10": 0.21170358486706392, "mean_pred_prob_last_25": 0.11816427102312446, "mean_pred_prob_last_50": 0.07203580852365121, "mean_token_accuracy": 0.8785622596740723, "step": 20790 }, { "epoch": 0.36975805734805256, "grad_norm": 1.1249695643244129, "learning_rate": 0.0001, "loss": 0.7853, "mean_abs_error": 196.09385137874173, "mean_abs_error_last_10": 45.439577411613115, "mean_abs_error_last_25": 69.2595362051591, "mean_abs_error_last_50": 122.82180386004443, "mean_pred_prob": 0.04005284598097205, "mean_pred_prob_last_10": 0.18437629677355288, "mean_pred_prob_last_25": 0.1054461132735014, "mean_pred_prob_last_50": 0.06524490211158991, "mean_token_accuracy": 0.8717301607131958, "step": 20800 }, { "epoch": 0.3699358256448545, "grad_norm": 1.2366247798862053, "learning_rate": 0.0001, "loss": 0.9784, "mean_abs_error": 262.9101626723599, "mean_abs_error_last_10": 144.87393033604613, "mean_abs_error_last_25": 211.32125468897692, "mean_abs_error_last_50": 221.75664700454453, "mean_pred_prob": 0.03526209995616227, "mean_pred_prob_last_10": 0.18825294310227036, "mean_pred_prob_last_25": 0.10102230068296195, "mean_pred_prob_last_50": 0.060098031722009185, "mean_token_accuracy": 0.8728086352348328, "step": 20810 }, { "epoch": 0.37011359394165644, "grad_norm": 1.6758958852160184, "learning_rate": 0.0001, "loss": 0.8312, "mean_abs_error": 351.5888449015253, "mean_abs_error_last_10": 76.81088624260194, "mean_abs_error_last_25": 160.24429761075575, "mean_abs_error_last_50": 234.25219124458235, "mean_pred_prob": 0.037174317380413414, "mean_pred_prob_last_10": 0.17991997115314007, "mean_pred_prob_last_25": 0.10229619313031435, "mean_pred_prob_last_50": 0.06316513950005173, "mean_token_accuracy": 0.8718087494373321, "step": 20820 }, { "epoch": 0.3702913622384584, "grad_norm": 1.2932416254891328, "learning_rate": 0.0001, "loss": 0.7257, "mean_abs_error": 102.4538821520654, "mean_abs_error_last_10": 21.99672394410199, "mean_abs_error_last_25": 64.63816955612472, "mean_abs_error_last_50": 83.56134992636012, "mean_pred_prob": 0.04998396150767803, "mean_pred_prob_last_10": 0.23389712758362294, "mean_pred_prob_last_25": 0.13439046852290631, "mean_pred_prob_last_50": 0.08357129134237766, "mean_token_accuracy": 0.8908570766448974, "step": 20830 }, { "epoch": 0.3704691305352603, "grad_norm": 1.20992858229914, "learning_rate": 0.0001, "loss": 0.8801, "mean_abs_error": 474.00691851857135, "mean_abs_error_last_10": 82.85420579239721, "mean_abs_error_last_25": 133.43262630754444, "mean_abs_error_last_50": 235.81301859846113, "mean_pred_prob": 0.02375677917152643, "mean_pred_prob_last_10": 0.13299436010420324, "mean_pred_prob_last_25": 0.06985109392553568, "mean_pred_prob_last_50": 0.04083526143804193, "mean_token_accuracy": 0.8724743008613587, "step": 20840 }, { "epoch": 0.3706468988320623, "grad_norm": 1.4369092580880962, "learning_rate": 0.0001, "loss": 0.823, "mean_abs_error": 2255.5849198858073, "mean_abs_error_last_10": 1539.2101735164083, "mean_abs_error_last_25": 1639.3478137423765, "mean_abs_error_last_50": 1877.7554500378872, "mean_pred_prob": 0.022265676979441196, "mean_pred_prob_last_10": 0.10518799679848598, "mean_pred_prob_last_25": 0.05907477414511959, "mean_pred_prob_last_50": 0.03716572598277708, "mean_token_accuracy": 0.8794479668140411, "step": 20850 }, { "epoch": 0.37082466712886425, "grad_norm": 2.368401756418708, "learning_rate": 0.0001, "loss": 0.6936, "mean_abs_error": 481.0905549138787, "mean_abs_error_last_10": 230.1059300377839, "mean_abs_error_last_25": 197.9330608100692, "mean_abs_error_last_50": 288.62234773857034, "mean_pred_prob": 0.044178398721851406, "mean_pred_prob_last_10": 0.21482571273227222, "mean_pred_prob_last_25": 0.12278730151010678, "mean_pred_prob_last_50": 0.07483431420405395, "mean_token_accuracy": 0.8688794851303101, "step": 20860 }, { "epoch": 0.3710024354256662, "grad_norm": 1.6607202405655244, "learning_rate": 0.0001, "loss": 0.9145, "mean_abs_error": 453.3855729718627, "mean_abs_error_last_10": 255.90960448789707, "mean_abs_error_last_25": 362.5457223801623, "mean_abs_error_last_50": 352.0208995824072, "mean_pred_prob": 0.04687567768851295, "mean_pred_prob_last_10": 0.21506086834706367, "mean_pred_prob_last_25": 0.12439363426528871, "mean_pred_prob_last_50": 0.07791842536535114, "mean_token_accuracy": 0.8625323116779328, "step": 20870 }, { "epoch": 0.3711802037224681, "grad_norm": 1.5484557164985675, "learning_rate": 0.0001, "loss": 0.8202, "mean_abs_error": 789.1067838880806, "mean_abs_error_last_10": 374.6293450046536, "mean_abs_error_last_25": 533.6628027234635, "mean_abs_error_last_50": 608.720678459262, "mean_pred_prob": 0.023403876333031804, "mean_pred_prob_last_10": 0.1239789643092081, "mean_pred_prob_last_25": 0.06618918168824166, "mean_pred_prob_last_50": 0.03992233579047024, "mean_token_accuracy": 0.8795179009437561, "step": 20880 }, { "epoch": 0.37135797201927007, "grad_norm": 2.199240394066069, "learning_rate": 0.0001, "loss": 0.8149, "mean_abs_error": 280.8755895757814, "mean_abs_error_last_10": 56.61573981610705, "mean_abs_error_last_25": 96.80486136408993, "mean_abs_error_last_50": 171.50398745121043, "mean_pred_prob": 0.02739306171424687, "mean_pred_prob_last_10": 0.1365595579147339, "mean_pred_prob_last_25": 0.07351082973182202, "mean_pred_prob_last_50": 0.04613701440393925, "mean_token_accuracy": 0.8768019378185272, "step": 20890 }, { "epoch": 0.371535740316072, "grad_norm": 0.9263144771933687, "learning_rate": 0.0001, "loss": 0.8361, "mean_abs_error": 930.977899589532, "mean_abs_error_last_10": 472.7865230041346, "mean_abs_error_last_25": 577.380385349417, "mean_abs_error_last_50": 753.4821323828119, "mean_pred_prob": 0.03415233718114905, "mean_pred_prob_last_10": 0.17799784230883234, "mean_pred_prob_last_25": 0.09958000858314335, "mean_pred_prob_last_50": 0.05916966123186285, "mean_token_accuracy": 0.8720506310462952, "step": 20900 }, { "epoch": 0.371713508612874, "grad_norm": 1.1625968084136693, "learning_rate": 0.0001, "loss": 0.7182, "mean_abs_error": 328.85168877884877, "mean_abs_error_last_10": 132.1944115751042, "mean_abs_error_last_25": 168.08166459948038, "mean_abs_error_last_50": 205.33693917724457, "mean_pred_prob": 0.035795329697430135, "mean_pred_prob_last_10": 0.18841002508997917, "mean_pred_prob_last_25": 0.09992217449471355, "mean_pred_prob_last_50": 0.06059573916718364, "mean_token_accuracy": 0.8787810683250428, "step": 20910 }, { "epoch": 0.37189127690967594, "grad_norm": 1.5682173658828593, "learning_rate": 0.0001, "loss": 0.8818, "mean_abs_error": 867.7868764170696, "mean_abs_error_last_10": 285.9163463385182, "mean_abs_error_last_25": 304.2039197632724, "mean_abs_error_last_50": 403.48810004118104, "mean_pred_prob": 0.016505310172215105, "mean_pred_prob_last_10": 0.08655314005445688, "mean_pred_prob_last_25": 0.04632501821033656, "mean_pred_prob_last_50": 0.028208180738147348, "mean_token_accuracy": 0.880016702413559, "step": 20920 }, { "epoch": 0.3720690452064779, "grad_norm": 1.0814928330251636, "learning_rate": 0.0001, "loss": 0.7039, "mean_abs_error": 839.347003469857, "mean_abs_error_last_10": 491.43154595604284, "mean_abs_error_last_25": 538.5757603887786, "mean_abs_error_last_50": 643.5789355107207, "mean_pred_prob": 0.0405421433824813, "mean_pred_prob_last_10": 0.20482339324662463, "mean_pred_prob_last_25": 0.11243378262443002, "mean_pred_prob_last_50": 0.06858650435169693, "mean_token_accuracy": 0.8774840593338012, "step": 20930 }, { "epoch": 0.3722468135032798, "grad_norm": 2.111547573088516, "learning_rate": 0.0001, "loss": 0.8543, "mean_abs_error": 482.41867676525135, "mean_abs_error_last_10": 215.34174888993735, "mean_abs_error_last_25": 296.7142091151317, "mean_abs_error_last_50": 380.4676064101539, "mean_pred_prob": 0.02738559028948657, "mean_pred_prob_last_10": 0.14201373410178347, "mean_pred_prob_last_25": 0.07514533931971527, "mean_pred_prob_last_50": 0.04592223196523264, "mean_token_accuracy": 0.8673939883708954, "step": 20940 }, { "epoch": 0.37242458180008176, "grad_norm": 1.5456459098862252, "learning_rate": 0.0001, "loss": 0.8358, "mean_abs_error": 411.26029436223297, "mean_abs_error_last_10": 90.26812038851142, "mean_abs_error_last_25": 160.6574766888079, "mean_abs_error_last_50": 239.5287025381292, "mean_pred_prob": 0.03175229467451572, "mean_pred_prob_last_10": 0.1530880257487297, "mean_pred_prob_last_25": 0.08733670664951206, "mean_pred_prob_last_50": 0.05308959605172277, "mean_token_accuracy": 0.8766206741333008, "step": 20950 }, { "epoch": 0.3726023500968837, "grad_norm": 3.086981386646606, "learning_rate": 0.0001, "loss": 0.8031, "mean_abs_error": 506.9998625341151, "mean_abs_error_last_10": 313.98559516848945, "mean_abs_error_last_25": 322.6088130927385, "mean_abs_error_last_50": 318.69477590527987, "mean_pred_prob": 0.038937131268903614, "mean_pred_prob_last_10": 0.1986444635782391, "mean_pred_prob_last_25": 0.11066630806308239, "mean_pred_prob_last_50": 0.06657602870836854, "mean_token_accuracy": 0.8716272413730621, "step": 20960 }, { "epoch": 0.3727801183936857, "grad_norm": 1.1368752788330432, "learning_rate": 0.0001, "loss": 0.7778, "mean_abs_error": 410.0205804361407, "mean_abs_error_last_10": 117.08159142341034, "mean_abs_error_last_25": 191.56515567275179, "mean_abs_error_last_50": 227.62236758686927, "mean_pred_prob": 0.0310302231926471, "mean_pred_prob_last_10": 0.1628864137455821, "mean_pred_prob_last_25": 0.08695424068719149, "mean_pred_prob_last_50": 0.0531665607355535, "mean_token_accuracy": 0.8763815224170685, "step": 20970 }, { "epoch": 0.37295788669048763, "grad_norm": 0.913562559566624, "learning_rate": 0.0001, "loss": 0.8092, "mean_abs_error": 900.514827062301, "mean_abs_error_last_10": 510.26417706540803, "mean_abs_error_last_25": 575.8937011849513, "mean_abs_error_last_50": 661.6453840782476, "mean_pred_prob": 0.047034812178753785, "mean_pred_prob_last_10": 0.22155695633264258, "mean_pred_prob_last_25": 0.12895582740020473, "mean_pred_prob_last_50": 0.07938106932269876, "mean_token_accuracy": 0.8726398289203644, "step": 20980 }, { "epoch": 0.37313565498728957, "grad_norm": 2.938461771919527, "learning_rate": 0.0001, "loss": 0.8213, "mean_abs_error": 1126.3874102565337, "mean_abs_error_last_10": 655.9515735835175, "mean_abs_error_last_25": 745.5592359790643, "mean_abs_error_last_50": 855.60801319502, "mean_pred_prob": 0.022762036448693835, "mean_pred_prob_last_10": 0.12369892748974962, "mean_pred_prob_last_25": 0.06515852306765738, "mean_pred_prob_last_50": 0.03836952408018988, "mean_token_accuracy": 0.8745302736759186, "step": 20990 }, { "epoch": 0.3733134232840915, "grad_norm": 1.6360439662943316, "learning_rate": 0.0001, "loss": 0.8218, "mean_abs_error": 1285.4844884400802, "mean_abs_error_last_10": 597.8226406757525, "mean_abs_error_last_25": 717.2748968226255, "mean_abs_error_last_50": 937.5204683275472, "mean_pred_prob": 0.03452827744331444, "mean_pred_prob_last_10": 0.1647346714424202, "mean_pred_prob_last_25": 0.09216081527119968, "mean_pred_prob_last_50": 0.05724875345331384, "mean_token_accuracy": 0.8748133480548859, "step": 21000 }, { "epoch": 0.37349119158089344, "grad_norm": 1.6261823737412826, "learning_rate": 0.0001, "loss": 0.8395, "mean_abs_error": 1006.3001698626595, "mean_abs_error_last_10": 736.2783874312448, "mean_abs_error_last_25": 740.9075444827736, "mean_abs_error_last_50": 809.0560016132525, "mean_pred_prob": 0.03612837730761385, "mean_pred_prob_last_10": 0.19006653770338744, "mean_pred_prob_last_25": 0.1026246293913573, "mean_pred_prob_last_50": 0.06200409514713101, "mean_token_accuracy": 0.8786415874958038, "step": 21010 }, { "epoch": 0.37366895987769544, "grad_norm": 2.467968230888839, "learning_rate": 0.0001, "loss": 0.7868, "mean_abs_error": 270.5657573892395, "mean_abs_error_last_10": 76.07378624439232, "mean_abs_error_last_25": 110.71549243072647, "mean_abs_error_last_50": 166.8376611048449, "mean_pred_prob": 0.03495550542138517, "mean_pred_prob_last_10": 0.1813282260671258, "mean_pred_prob_last_25": 0.09914576634764671, "mean_pred_prob_last_50": 0.05996611760929227, "mean_token_accuracy": 0.8638407230377197, "step": 21020 }, { "epoch": 0.3738467281744974, "grad_norm": 0.8425916667705324, "learning_rate": 0.0001, "loss": 0.779, "mean_abs_error": 1110.0234456382127, "mean_abs_error_last_10": 374.04286885461045, "mean_abs_error_last_25": 473.4719765512674, "mean_abs_error_last_50": 684.334160785236, "mean_pred_prob": 0.022642163233831523, "mean_pred_prob_last_10": 0.11027694690274074, "mean_pred_prob_last_25": 0.06001812872709707, "mean_pred_prob_last_50": 0.0378222991945222, "mean_token_accuracy": 0.883837229013443, "step": 21030 }, { "epoch": 0.3740244964712993, "grad_norm": 0.756767809327653, "learning_rate": 0.0001, "loss": 0.6992, "mean_abs_error": 232.74932528310623, "mean_abs_error_last_10": 53.86160125102106, "mean_abs_error_last_25": 109.09431078267751, "mean_abs_error_last_50": 146.36627983097753, "mean_pred_prob": 0.041409887932240964, "mean_pred_prob_last_10": 0.22088487595319747, "mean_pred_prob_last_25": 0.11999769024550914, "mean_pred_prob_last_50": 0.07153205014765263, "mean_token_accuracy": 0.8767791450023651, "step": 21040 }, { "epoch": 0.37420226476810126, "grad_norm": 3.1466851568457845, "learning_rate": 0.0001, "loss": 0.8476, "mean_abs_error": 472.08119632983, "mean_abs_error_last_10": 201.7929861689475, "mean_abs_error_last_25": 229.83372779646893, "mean_abs_error_last_50": 280.3878822828018, "mean_pred_prob": 0.03157678730785847, "mean_pred_prob_last_10": 0.15903819166123867, "mean_pred_prob_last_25": 0.08954277168959379, "mean_pred_prob_last_50": 0.0535074170678854, "mean_token_accuracy": 0.8757174849510193, "step": 21050 }, { "epoch": 0.3743800330649032, "grad_norm": 1.202364214454642, "learning_rate": 0.0001, "loss": 0.7484, "mean_abs_error": 483.79222177307713, "mean_abs_error_last_10": 173.90412228542567, "mean_abs_error_last_25": 305.75733256923877, "mean_abs_error_last_50": 441.5662131628931, "mean_pred_prob": 0.03261532220058143, "mean_pred_prob_last_10": 0.16660703271627425, "mean_pred_prob_last_25": 0.09050353630445898, "mean_pred_prob_last_50": 0.05510809514671564, "mean_token_accuracy": 0.8783898651599884, "step": 21060 }, { "epoch": 0.37455780136170513, "grad_norm": 1.3675029506312584, "learning_rate": 0.0001, "loss": 0.7822, "mean_abs_error": 296.2799834700758, "mean_abs_error_last_10": 87.9633232498416, "mean_abs_error_last_25": 106.14117567403805, "mean_abs_error_last_50": 204.3883008600041, "mean_pred_prob": 0.044617005111649635, "mean_pred_prob_last_10": 0.22945859469473362, "mean_pred_prob_last_25": 0.126698091160506, "mean_pred_prob_last_50": 0.07647137409076095, "mean_token_accuracy": 0.8750845551490783, "step": 21070 }, { "epoch": 0.37473556965850713, "grad_norm": 1.7454063410847398, "learning_rate": 0.0001, "loss": 0.7352, "mean_abs_error": 737.290181572792, "mean_abs_error_last_10": 207.01877461699854, "mean_abs_error_last_25": 332.2000536327911, "mean_abs_error_last_50": 469.2979078671733, "mean_pred_prob": 0.03274194484110922, "mean_pred_prob_last_10": 0.1559818954439834, "mean_pred_prob_last_25": 0.08705203519202769, "mean_pred_prob_last_50": 0.053988045308506115, "mean_token_accuracy": 0.8730021238327026, "step": 21080 }, { "epoch": 0.37491333795530907, "grad_norm": 0.6927679915748041, "learning_rate": 0.0001, "loss": 0.8681, "mean_abs_error": 418.4994443556011, "mean_abs_error_last_10": 144.97412605442565, "mean_abs_error_last_25": 217.65617070293834, "mean_abs_error_last_50": 281.27431371073294, "mean_pred_prob": 0.02787924788426608, "mean_pred_prob_last_10": 0.15047710463404657, "mean_pred_prob_last_25": 0.08103267000988126, "mean_pred_prob_last_50": 0.04833432054147124, "mean_token_accuracy": 0.8724369823932647, "step": 21090 }, { "epoch": 0.375091106252111, "grad_norm": 0.8245239292314788, "learning_rate": 0.0001, "loss": 0.7836, "mean_abs_error": 93.66406194788127, "mean_abs_error_last_10": 18.554274912448783, "mean_abs_error_last_25": 37.08925765939158, "mean_abs_error_last_50": 46.88593772323605, "mean_pred_prob": 0.05951902661472559, "mean_pred_prob_last_10": 0.29538456425070764, "mean_pred_prob_last_25": 0.16350037306547166, "mean_pred_prob_last_50": 0.10071639027446508, "mean_token_accuracy": 0.8652261316776275, "step": 21100 }, { "epoch": 0.37526887454891295, "grad_norm": 1.8681002084044047, "learning_rate": 0.0001, "loss": 0.7537, "mean_abs_error": 405.2570028322491, "mean_abs_error_last_10": 131.69905544300246, "mean_abs_error_last_25": 138.56217111812379, "mean_abs_error_last_50": 242.6559662297821, "mean_pred_prob": 0.02999212561408058, "mean_pred_prob_last_10": 0.16021464278455824, "mean_pred_prob_last_25": 0.08614078504033387, "mean_pred_prob_last_50": 0.05133136468939483, "mean_token_accuracy": 0.8820868074893952, "step": 21110 }, { "epoch": 0.3754466428457149, "grad_norm": 2.666772617464959, "learning_rate": 0.0001, "loss": 0.7713, "mean_abs_error": 622.6338926239675, "mean_abs_error_last_10": 154.20971385908592, "mean_abs_error_last_25": 203.43794064953164, "mean_abs_error_last_50": 364.7303973884639, "mean_pred_prob": 0.04980218625278212, "mean_pred_prob_last_10": 0.2212013581651263, "mean_pred_prob_last_25": 0.13133334867889063, "mean_pred_prob_last_50": 0.08272003043093719, "mean_token_accuracy": 0.8778964102268219, "step": 21120 }, { "epoch": 0.3756244111425168, "grad_norm": 2.1044863244065586, "learning_rate": 0.0001, "loss": 0.8118, "mean_abs_error": 419.0779187637228, "mean_abs_error_last_10": 99.38328600398987, "mean_abs_error_last_25": 134.45558413908458, "mean_abs_error_last_50": 203.15470414309476, "mean_pred_prob": 0.044857795303687456, "mean_pred_prob_last_10": 0.2292075286852196, "mean_pred_prob_last_25": 0.12452507071429864, "mean_pred_prob_last_50": 0.07443403138313442, "mean_token_accuracy": 0.8762424468994141, "step": 21130 }, { "epoch": 0.3758021794393188, "grad_norm": 1.639806529099278, "learning_rate": 0.0001, "loss": 0.7527, "mean_abs_error": 403.1119395997154, "mean_abs_error_last_10": 79.45667548112151, "mean_abs_error_last_25": 117.0036456447518, "mean_abs_error_last_50": 219.56324323430735, "mean_pred_prob": 0.032316616456955674, "mean_pred_prob_last_10": 0.1665548339486122, "mean_pred_prob_last_25": 0.08860353324562312, "mean_pred_prob_last_50": 0.054533299058675766, "mean_token_accuracy": 0.8701645016670227, "step": 21140 }, { "epoch": 0.37597994773612076, "grad_norm": 2.1678057879946393, "learning_rate": 0.0001, "loss": 0.8598, "mean_abs_error": 850.5522876405794, "mean_abs_error_last_10": 179.40637577129067, "mean_abs_error_last_25": 279.8772044988183, "mean_abs_error_last_50": 535.5896594856144, "mean_pred_prob": 0.023807777412002906, "mean_pred_prob_last_10": 0.11359653547406197, "mean_pred_prob_last_25": 0.06538328900933266, "mean_pred_prob_last_50": 0.04014025426004082, "mean_token_accuracy": 0.8740826606750488, "step": 21150 }, { "epoch": 0.3761577160329227, "grad_norm": 2.0586877770023366, "learning_rate": 0.0001, "loss": 0.7927, "mean_abs_error": 414.49523889732563, "mean_abs_error_last_10": 152.17907298007714, "mean_abs_error_last_25": 192.35953550474886, "mean_abs_error_last_50": 274.80023550092835, "mean_pred_prob": 0.0390139882278163, "mean_pred_prob_last_10": 0.20828746159095318, "mean_pred_prob_last_25": 0.11414341027848422, "mean_pred_prob_last_50": 0.06749074283288792, "mean_token_accuracy": 0.869280469417572, "step": 21160 }, { "epoch": 0.37633548432972463, "grad_norm": 1.3083305877599822, "learning_rate": 0.0001, "loss": 0.7341, "mean_abs_error": 478.4510110159431, "mean_abs_error_last_10": 58.82697067322018, "mean_abs_error_last_25": 182.91449131089783, "mean_abs_error_last_50": 365.7200181176513, "mean_pred_prob": 0.0431226986926049, "mean_pred_prob_last_10": 0.2004721064120531, "mean_pred_prob_last_25": 0.11639838349074125, "mean_pred_prob_last_50": 0.07269605658948422, "mean_token_accuracy": 0.8808230757713318, "step": 21170 }, { "epoch": 0.3765132526265266, "grad_norm": 1.0732089764524202, "learning_rate": 0.0001, "loss": 0.7545, "mean_abs_error": 1209.9180562840324, "mean_abs_error_last_10": 605.0953568920438, "mean_abs_error_last_25": 744.8215113986842, "mean_abs_error_last_50": 884.3748284303283, "mean_pred_prob": 0.029952131719619503, "mean_pred_prob_last_10": 0.1490263515879633, "mean_pred_prob_last_25": 0.08400398136873263, "mean_pred_prob_last_50": 0.05172748266486451, "mean_token_accuracy": 0.8616735219955445, "step": 21180 }, { "epoch": 0.3766910209233285, "grad_norm": 1.756136586812219, "learning_rate": 0.0001, "loss": 0.8154, "mean_abs_error": 353.06953704051534, "mean_abs_error_last_10": 184.21673844840836, "mean_abs_error_last_25": 223.30857200770546, "mean_abs_error_last_50": 231.96719157479, "mean_pred_prob": 0.04543056655675173, "mean_pred_prob_last_10": 0.2308604871854186, "mean_pred_prob_last_25": 0.13064883374609054, "mean_pred_prob_last_50": 0.07858128743246198, "mean_token_accuracy": 0.8755818605422974, "step": 21190 }, { "epoch": 0.3768687892201305, "grad_norm": 1.5998724470801873, "learning_rate": 0.0001, "loss": 0.7078, "mean_abs_error": 215.91079458505524, "mean_abs_error_last_10": 77.5657598382464, "mean_abs_error_last_25": 90.38489491090174, "mean_abs_error_last_50": 121.75664031273236, "mean_pred_prob": 0.04742971200030297, "mean_pred_prob_last_10": 0.22884938679635525, "mean_pred_prob_last_25": 0.12926130183041096, "mean_pred_prob_last_50": 0.08044612924568355, "mean_token_accuracy": 0.883741271495819, "step": 21200 }, { "epoch": 0.37704655751693245, "grad_norm": 2.729646901533407, "learning_rate": 0.0001, "loss": 0.849, "mean_abs_error": 448.0582933135059, "mean_abs_error_last_10": 76.4306658544983, "mean_abs_error_last_25": 90.19299165243419, "mean_abs_error_last_50": 177.7661776892596, "mean_pred_prob": 0.054884774959646165, "mean_pred_prob_last_10": 0.25529156625270844, "mean_pred_prob_last_25": 0.14799269894137979, "mean_pred_prob_last_50": 0.09189957864582539, "mean_token_accuracy": 0.8714121639728546, "step": 21210 }, { "epoch": 0.3772243258137344, "grad_norm": 1.1973233801097316, "learning_rate": 0.0001, "loss": 0.7652, "mean_abs_error": 214.9498234859631, "mean_abs_error_last_10": 91.00561310082857, "mean_abs_error_last_25": 138.49328049249357, "mean_abs_error_last_50": 175.89288602153152, "mean_pred_prob": 0.04460315247997641, "mean_pred_prob_last_10": 0.23641178645193578, "mean_pred_prob_last_25": 0.12651749197393655, "mean_pred_prob_last_50": 0.07603015033528208, "mean_token_accuracy": 0.8808708250522613, "step": 21220 }, { "epoch": 0.3774020941105363, "grad_norm": 1.1497324289559816, "learning_rate": 0.0001, "loss": 0.8087, "mean_abs_error": 202.549923399428, "mean_abs_error_last_10": 70.73914315223723, "mean_abs_error_last_25": 92.60131646964172, "mean_abs_error_last_50": 141.42822594693217, "mean_pred_prob": 0.032926962710916996, "mean_pred_prob_last_10": 0.17706058472394942, "mean_pred_prob_last_25": 0.09467009454965591, "mean_pred_prob_last_50": 0.05693486835807562, "mean_token_accuracy": 0.8699803173542022, "step": 21230 }, { "epoch": 0.37757986240733826, "grad_norm": 1.304681697304813, "learning_rate": 0.0001, "loss": 0.8952, "mean_abs_error": 1138.2593175636437, "mean_abs_error_last_10": 608.5763197167952, "mean_abs_error_last_25": 659.8695743924807, "mean_abs_error_last_50": 797.2626512007522, "mean_pred_prob": 0.035758633790828755, "mean_pred_prob_last_10": 0.16616997157834704, "mean_pred_prob_last_25": 0.09563061922672204, "mean_pred_prob_last_50": 0.059239238980808295, "mean_token_accuracy": 0.8757417917251586, "step": 21240 }, { "epoch": 0.3777576307041402, "grad_norm": 2.1981849384932994, "learning_rate": 0.0001, "loss": 0.8627, "mean_abs_error": 99.93702266438073, "mean_abs_error_last_10": 35.65556467636256, "mean_abs_error_last_25": 52.148686624361666, "mean_abs_error_last_50": 64.38147817457585, "mean_pred_prob": 0.059329346567392346, "mean_pred_prob_last_10": 0.29278321862220763, "mean_pred_prob_last_25": 0.1624286353588104, "mean_pred_prob_last_50": 0.09935732875019312, "mean_token_accuracy": 0.8738751530647277, "step": 21250 }, { "epoch": 0.3779353990009422, "grad_norm": 3.3342290326609465, "learning_rate": 0.0001, "loss": 0.7894, "mean_abs_error": 208.77437907212894, "mean_abs_error_last_10": 69.16644792543575, "mean_abs_error_last_25": 82.8473609300934, "mean_abs_error_last_50": 130.89651110491417, "mean_pred_prob": 0.04498597492929548, "mean_pred_prob_last_10": 0.22237255033105613, "mean_pred_prob_last_25": 0.12233992274850607, "mean_pred_prob_last_50": 0.07497176923789084, "mean_token_accuracy": 0.8756974697113037, "step": 21260 }, { "epoch": 0.37811316729774413, "grad_norm": 1.3011454205328754, "learning_rate": 0.0001, "loss": 0.8684, "mean_abs_error": 1384.6141736146928, "mean_abs_error_last_10": 712.1873004585767, "mean_abs_error_last_25": 699.2574360984243, "mean_abs_error_last_50": 906.1129967613255, "mean_pred_prob": 0.02253381137852557, "mean_pred_prob_last_10": 0.11382960072078277, "mean_pred_prob_last_25": 0.06248642900318373, "mean_pred_prob_last_50": 0.03788885577523615, "mean_token_accuracy": 0.8675074338912964, "step": 21270 }, { "epoch": 0.3782909355945461, "grad_norm": 1.7679221228532496, "learning_rate": 0.0001, "loss": 0.7467, "mean_abs_error": 590.8174697803736, "mean_abs_error_last_10": 195.08330213746476, "mean_abs_error_last_25": 272.32111827712873, "mean_abs_error_last_50": 347.85558864109794, "mean_pred_prob": 0.03898724454047624, "mean_pred_prob_last_10": 0.19908238122588956, "mean_pred_prob_last_25": 0.11150312405079603, "mean_pred_prob_last_50": 0.06678151265368797, "mean_token_accuracy": 0.8672164976596832, "step": 21280 }, { "epoch": 0.378468703891348, "grad_norm": 1.6388139060873894, "learning_rate": 0.0001, "loss": 0.8159, "mean_abs_error": 967.0568718179193, "mean_abs_error_last_10": 216.1772819832769, "mean_abs_error_last_25": 343.98000720172723, "mean_abs_error_last_50": 591.1524793643373, "mean_pred_prob": 0.019679818977601828, "mean_pred_prob_last_10": 0.1073294674511999, "mean_pred_prob_last_25": 0.05534421728225425, "mean_pred_prob_last_50": 0.03320741155184805, "mean_token_accuracy": 0.8652178466320037, "step": 21290 }, { "epoch": 0.37864647218814995, "grad_norm": 1.5413067842094492, "learning_rate": 0.0001, "loss": 0.8036, "mean_abs_error": 938.3714268601516, "mean_abs_error_last_10": 461.307102465333, "mean_abs_error_last_25": 555.6936561153121, "mean_abs_error_last_50": 692.1832618740281, "mean_pred_prob": 0.036385649123985786, "mean_pred_prob_last_10": 0.18833585797692648, "mean_pred_prob_last_25": 0.10391210386005696, "mean_pred_prob_last_50": 0.061757525388384235, "mean_token_accuracy": 0.8686333417892456, "step": 21300 }, { "epoch": 0.3788242404849519, "grad_norm": 1.6975963341418152, "learning_rate": 0.0001, "loss": 0.7342, "mean_abs_error": 571.8010192102125, "mean_abs_error_last_10": 174.72511154122992, "mean_abs_error_last_25": 219.53245002896452, "mean_abs_error_last_50": 346.43762728583886, "mean_pred_prob": 0.04318876335164532, "mean_pred_prob_last_10": 0.21361922656651586, "mean_pred_prob_last_25": 0.12134624280733988, "mean_pred_prob_last_50": 0.07324405575054697, "mean_token_accuracy": 0.887444007396698, "step": 21310 }, { "epoch": 0.3790020087817539, "grad_norm": 6.907146760238726, "learning_rate": 0.0001, "loss": 0.8753, "mean_abs_error": 1477.0703329615985, "mean_abs_error_last_10": 494.8413720005028, "mean_abs_error_last_25": 789.7331440738667, "mean_abs_error_last_50": 1055.2296207306774, "mean_pred_prob": 0.021777623047819362, "mean_pred_prob_last_10": 0.10472918241866865, "mean_pred_prob_last_25": 0.05954853128641844, "mean_pred_prob_last_50": 0.03645178188453428, "mean_token_accuracy": 0.8648404717445374, "step": 21320 }, { "epoch": 0.3791797770785558, "grad_norm": 2.081916140621852, "learning_rate": 0.0001, "loss": 0.8799, "mean_abs_error": 523.7700418901885, "mean_abs_error_last_10": 167.9702867538399, "mean_abs_error_last_25": 309.05478767089255, "mean_abs_error_last_50": 332.736954551902, "mean_pred_prob": 0.027297224733047187, "mean_pred_prob_last_10": 0.14893222078680993, "mean_pred_prob_last_25": 0.07879906175658107, "mean_pred_prob_last_50": 0.04712300761602819, "mean_token_accuracy": 0.8770677506923675, "step": 21330 }, { "epoch": 0.37935754537535776, "grad_norm": 0.8227018826511437, "learning_rate": 0.0001, "loss": 0.7595, "mean_abs_error": 1363.8828505521521, "mean_abs_error_last_10": 846.8334449666602, "mean_abs_error_last_25": 854.8726522087057, "mean_abs_error_last_50": 1009.9756655814979, "mean_pred_prob": 0.02936146823776653, "mean_pred_prob_last_10": 0.15041532428876964, "mean_pred_prob_last_25": 0.0817719148733886, "mean_pred_prob_last_50": 0.04987039538682438, "mean_token_accuracy": 0.8688116073608398, "step": 21340 }, { "epoch": 0.3795353136721597, "grad_norm": 2.342600290076587, "learning_rate": 0.0001, "loss": 0.8173, "mean_abs_error": 879.0702309780914, "mean_abs_error_last_10": 400.9728648990925, "mean_abs_error_last_25": 490.45858199839734, "mean_abs_error_last_50": 647.513065217871, "mean_pred_prob": 0.04028689953702269, "mean_pred_prob_last_10": 0.20971534472482745, "mean_pred_prob_last_25": 0.11122236010269262, "mean_pred_prob_last_50": 0.06764262795622926, "mean_token_accuracy": 0.8756425678730011, "step": 21350 }, { "epoch": 0.37971308196896164, "grad_norm": 1.2275136105900701, "learning_rate": 0.0001, "loss": 0.8216, "mean_abs_error": 420.9213157564842, "mean_abs_error_last_10": 226.85669508966848, "mean_abs_error_last_25": 252.2001122445979, "mean_abs_error_last_50": 275.7662193260706, "mean_pred_prob": 0.03612395598902367, "mean_pred_prob_last_10": 0.16428520664339885, "mean_pred_prob_last_25": 0.09547117744223214, "mean_pred_prob_last_50": 0.05976532400236465, "mean_token_accuracy": 0.8621761083602906, "step": 21360 }, { "epoch": 0.3798908502657636, "grad_norm": 1.8129116450196054, "learning_rate": 0.0001, "loss": 0.7451, "mean_abs_error": 271.7868121885193, "mean_abs_error_last_10": 52.35565542918041, "mean_abs_error_last_25": 116.73365967026402, "mean_abs_error_last_50": 196.4909188495059, "mean_pred_prob": 0.045242903055623174, "mean_pred_prob_last_10": 0.22224691174924374, "mean_pred_prob_last_25": 0.12615815103054046, "mean_pred_prob_last_50": 0.0767422067001462, "mean_token_accuracy": 0.8745772063732147, "step": 21370 }, { "epoch": 0.3800686185625656, "grad_norm": 1.9195636495413375, "learning_rate": 0.0001, "loss": 0.9722, "mean_abs_error": 1448.1709235794715, "mean_abs_error_last_10": 482.9059513553813, "mean_abs_error_last_25": 602.0660426894838, "mean_abs_error_last_50": 911.8894129645403, "mean_pred_prob": 0.021589261759072543, "mean_pred_prob_last_10": 0.11072873408556916, "mean_pred_prob_last_25": 0.05940828078018967, "mean_pred_prob_last_50": 0.03635229965148028, "mean_token_accuracy": 0.8645800173282623, "step": 21380 }, { "epoch": 0.3802463868593675, "grad_norm": 1.0230126056360154, "learning_rate": 0.0001, "loss": 0.7755, "mean_abs_error": 288.6568092808494, "mean_abs_error_last_10": 71.77535501710932, "mean_abs_error_last_25": 109.17518993326196, "mean_abs_error_last_50": 166.55311366266034, "mean_pred_prob": 0.036831344058737156, "mean_pred_prob_last_10": 0.18254616484045982, "mean_pred_prob_last_25": 0.10112498253583908, "mean_pred_prob_last_50": 0.06228453433141112, "mean_token_accuracy": 0.8718704104423523, "step": 21390 }, { "epoch": 0.38042415515616945, "grad_norm": 2.3321212168735816, "learning_rate": 0.0001, "loss": 0.8244, "mean_abs_error": 479.708553067899, "mean_abs_error_last_10": 225.5654186976216, "mean_abs_error_last_25": 308.6584130809269, "mean_abs_error_last_50": 386.59420059992414, "mean_pred_prob": 0.03125677437055856, "mean_pred_prob_last_10": 0.1533384325914085, "mean_pred_prob_last_25": 0.08411861318163574, "mean_pred_prob_last_50": 0.05266304854303598, "mean_token_accuracy": 0.8664171636104584, "step": 21400 }, { "epoch": 0.3806019234529714, "grad_norm": 1.7765072969400448, "learning_rate": 0.0001, "loss": 0.7148, "mean_abs_error": 318.95707095005923, "mean_abs_error_last_10": 48.295403590128004, "mean_abs_error_last_25": 78.92800507057606, "mean_abs_error_last_50": 129.4943827020422, "mean_pred_prob": 0.05054230710957199, "mean_pred_prob_last_10": 0.24399779327213764, "mean_pred_prob_last_25": 0.13886779621243478, "mean_pred_prob_last_50": 0.08434043764136731, "mean_token_accuracy": 0.8808372437953949, "step": 21410 }, { "epoch": 0.38077969174977333, "grad_norm": 1.9827677014854117, "learning_rate": 0.0001, "loss": 0.8146, "mean_abs_error": 549.1688335841133, "mean_abs_error_last_10": 176.30601898893232, "mean_abs_error_last_25": 276.0367098490136, "mean_abs_error_last_50": 367.1716707022075, "mean_pred_prob": 0.03573305942118168, "mean_pred_prob_last_10": 0.17826664578169585, "mean_pred_prob_last_25": 0.10049914363771677, "mean_pred_prob_last_50": 0.06106112045235932, "mean_token_accuracy": 0.8781271755695343, "step": 21420 }, { "epoch": 0.38095746004657527, "grad_norm": 1.3969127207296173, "learning_rate": 0.0001, "loss": 0.9102, "mean_abs_error": 2484.9234472914313, "mean_abs_error_last_10": 1525.0747105057706, "mean_abs_error_last_25": 1696.6328206884925, "mean_abs_error_last_50": 1940.5940292542425, "mean_pred_prob": 0.031527070051379266, "mean_pred_prob_last_10": 0.16324363008170623, "mean_pred_prob_last_25": 0.08934868438882404, "mean_pred_prob_last_50": 0.053655985206569314, "mean_token_accuracy": 0.8620282769203186, "step": 21430 }, { "epoch": 0.38113522834337726, "grad_norm": 1.7042912445421492, "learning_rate": 0.0001, "loss": 0.7953, "mean_abs_error": 157.35174447908625, "mean_abs_error_last_10": 43.246646821322365, "mean_abs_error_last_25": 59.446315090788424, "mean_abs_error_last_50": 88.2301356612207, "mean_pred_prob": 0.04491241341456771, "mean_pred_prob_last_10": 0.22490940764546394, "mean_pred_prob_last_25": 0.12303139567375183, "mean_pred_prob_last_50": 0.07577105090022088, "mean_token_accuracy": 0.8759368896484375, "step": 21440 }, { "epoch": 0.3813129966401792, "grad_norm": 0.9596413788343531, "learning_rate": 0.0001, "loss": 0.7514, "mean_abs_error": 456.552514516083, "mean_abs_error_last_10": 144.8096134253284, "mean_abs_error_last_25": 221.0067383574486, "mean_abs_error_last_50": 370.98366042252326, "mean_pred_prob": 0.02704423349350691, "mean_pred_prob_last_10": 0.14179594870656728, "mean_pred_prob_last_25": 0.0757534104399383, "mean_pred_prob_last_50": 0.045851240679621694, "mean_token_accuracy": 0.8765881896018982, "step": 21450 }, { "epoch": 0.38149076493698114, "grad_norm": 2.0429017704850136, "learning_rate": 0.0001, "loss": 0.8029, "mean_abs_error": 1178.5902643529423, "mean_abs_error_last_10": 560.9856606136159, "mean_abs_error_last_25": 704.5096978339743, "mean_abs_error_last_50": 815.29031319579, "mean_pred_prob": 0.030650810623774304, "mean_pred_prob_last_10": 0.14192014954751359, "mean_pred_prob_last_25": 0.07912119577085833, "mean_pred_prob_last_50": 0.05010566907585599, "mean_token_accuracy": 0.8688432455062867, "step": 21460 }, { "epoch": 0.3816685332337831, "grad_norm": 1.1028326259326586, "learning_rate": 0.0001, "loss": 0.7695, "mean_abs_error": 765.7939666927272, "mean_abs_error_last_10": 220.2846074061118, "mean_abs_error_last_25": 296.2383713521449, "mean_abs_error_last_50": 493.58552725220625, "mean_pred_prob": 0.019831159320892765, "mean_pred_prob_last_10": 0.10106129262130707, "mean_pred_prob_last_25": 0.055181972798891366, "mean_pred_prob_last_50": 0.033754987630527464, "mean_token_accuracy": 0.8675633609294892, "step": 21470 }, { "epoch": 0.381846301530585, "grad_norm": 1.2662682639147727, "learning_rate": 0.0001, "loss": 0.8303, "mean_abs_error": 252.55328955490353, "mean_abs_error_last_10": 98.31798120011072, "mean_abs_error_last_25": 124.34400197310643, "mean_abs_error_last_50": 178.01571615609163, "mean_pred_prob": 0.05534082008525729, "mean_pred_prob_last_10": 0.2483729563653469, "mean_pred_prob_last_25": 0.1430245378986001, "mean_pred_prob_last_50": 0.09162973696365953, "mean_token_accuracy": 0.8785911202430725, "step": 21480 }, { "epoch": 0.38202406982738696, "grad_norm": 1.0420827916800988, "learning_rate": 0.0001, "loss": 0.7923, "mean_abs_error": 273.38068971059334, "mean_abs_error_last_10": 91.40358054786404, "mean_abs_error_last_25": 106.22759197606301, "mean_abs_error_last_50": 124.67396747082068, "mean_pred_prob": 0.04335699491202831, "mean_pred_prob_last_10": 0.20044630952179432, "mean_pred_prob_last_25": 0.11843252815306186, "mean_pred_prob_last_50": 0.07322121514007449, "mean_token_accuracy": 0.877143132686615, "step": 21490 }, { "epoch": 0.38220183812418895, "grad_norm": 1.5379847491768073, "learning_rate": 0.0001, "loss": 0.7725, "mean_abs_error": 243.96306793925373, "mean_abs_error_last_10": 42.64256916773583, "mean_abs_error_last_25": 72.55018029005151, "mean_abs_error_last_50": 125.96347874793734, "mean_pred_prob": 0.05699680456891656, "mean_pred_prob_last_10": 0.2727627906948328, "mean_pred_prob_last_25": 0.1503318589180708, "mean_pred_prob_last_50": 0.09457268929108978, "mean_token_accuracy": 0.8717399477958679, "step": 21500 }, { "epoch": 0.3823796064209909, "grad_norm": 1.0811507630262545, "learning_rate": 0.0001, "loss": 0.7672, "mean_abs_error": 401.56749781702035, "mean_abs_error_last_10": 113.67060095149495, "mean_abs_error_last_25": 274.4930853981707, "mean_abs_error_last_50": 383.36565116424623, "mean_pred_prob": 0.04050906202755868, "mean_pred_prob_last_10": 0.19884661883115767, "mean_pred_prob_last_25": 0.11287345858290791, "mean_pred_prob_last_50": 0.06897685532458127, "mean_token_accuracy": 0.8747185647487641, "step": 21510 }, { "epoch": 0.38255737471779283, "grad_norm": 1.275776795254311, "learning_rate": 0.0001, "loss": 0.8376, "mean_abs_error": 503.74477687702, "mean_abs_error_last_10": 177.84896462004022, "mean_abs_error_last_25": 194.08878545498388, "mean_abs_error_last_50": 258.9987940298305, "mean_pred_prob": 0.02709087636321783, "mean_pred_prob_last_10": 0.14142904058098793, "mean_pred_prob_last_25": 0.07362521905452013, "mean_pred_prob_last_50": 0.0454586548730731, "mean_token_accuracy": 0.8720307230949402, "step": 21520 }, { "epoch": 0.38273514301459477, "grad_norm": 1.9047522320481634, "learning_rate": 0.0001, "loss": 0.8544, "mean_abs_error": 297.31203293919145, "mean_abs_error_last_10": 116.76268772504122, "mean_abs_error_last_25": 144.84601425891933, "mean_abs_error_last_50": 204.4186503171212, "mean_pred_prob": 0.041670747892931105, "mean_pred_prob_last_10": 0.1427448119968176, "mean_pred_prob_last_25": 0.092893420252949, "mean_pred_prob_last_50": 0.06495330305770039, "mean_token_accuracy": 0.8648772716522217, "step": 21530 }, { "epoch": 0.3829129113113967, "grad_norm": 1.4335055844681874, "learning_rate": 0.0001, "loss": 0.7706, "mean_abs_error": 327.31490368855253, "mean_abs_error_last_10": 53.66128061462439, "mean_abs_error_last_25": 146.15045275867558, "mean_abs_error_last_50": 226.41743087497176, "mean_pred_prob": 0.04558187634684145, "mean_pred_prob_last_10": 0.23549053259193897, "mean_pred_prob_last_25": 0.12899162182584406, "mean_pred_prob_last_50": 0.07691910080611705, "mean_token_accuracy": 0.8827137887477875, "step": 21540 }, { "epoch": 0.38309067960819865, "grad_norm": 1.1948751860741131, "learning_rate": 0.0001, "loss": 0.7935, "mean_abs_error": 738.5589215427863, "mean_abs_error_last_10": 186.2437264446297, "mean_abs_error_last_25": 214.35813765420488, "mean_abs_error_last_50": 321.334636633516, "mean_pred_prob": 0.019898272462887688, "mean_pred_prob_last_10": 0.11390547885093838, "mean_pred_prob_last_25": 0.05735046735499054, "mean_pred_prob_last_50": 0.03385736766504124, "mean_token_accuracy": 0.8739705801010131, "step": 21550 }, { "epoch": 0.38326844790500064, "grad_norm": 1.616211307562333, "learning_rate": 0.0001, "loss": 0.837, "mean_abs_error": 159.42372333198426, "mean_abs_error_last_10": 32.51045405173969, "mean_abs_error_last_25": 55.22461663801384, "mean_abs_error_last_50": 89.57396458664837, "mean_pred_prob": 0.04960132925771177, "mean_pred_prob_last_10": 0.25510308146476746, "mean_pred_prob_last_25": 0.13918008115142583, "mean_pred_prob_last_50": 0.08401889521628618, "mean_token_accuracy": 0.8707712531089783, "step": 21560 }, { "epoch": 0.3834462162018026, "grad_norm": 1.2065950094612503, "learning_rate": 0.0001, "loss": 0.7353, "mean_abs_error": 412.36108521344903, "mean_abs_error_last_10": 63.66518410510355, "mean_abs_error_last_25": 156.18934282172071, "mean_abs_error_last_50": 310.6304047780744, "mean_pred_prob": 0.0499936880543828, "mean_pred_prob_last_10": 0.23032471388578415, "mean_pred_prob_last_25": 0.13187490394338966, "mean_pred_prob_last_50": 0.08262223163619638, "mean_token_accuracy": 0.8799527943134308, "step": 21570 }, { "epoch": 0.3836239844986045, "grad_norm": 3.1994982018349054, "learning_rate": 0.0001, "loss": 0.8657, "mean_abs_error": 665.5409118579872, "mean_abs_error_last_10": 210.3765016279669, "mean_abs_error_last_25": 285.9268808340478, "mean_abs_error_last_50": 376.1830381561104, "mean_pred_prob": 0.05411150033469312, "mean_pred_prob_last_10": 0.2373645889456384, "mean_pred_prob_last_25": 0.1421814120199997, "mean_pred_prob_last_50": 0.08958508525392972, "mean_token_accuracy": 0.8576958298683166, "step": 21580 }, { "epoch": 0.38380175279540646, "grad_norm": 1.0704003596250848, "learning_rate": 0.0001, "loss": 0.8522, "mean_abs_error": 938.8094246546561, "mean_abs_error_last_10": 446.54099361514017, "mean_abs_error_last_25": 618.0068718149162, "mean_abs_error_last_50": 769.3554165979057, "mean_pred_prob": 0.02084194535855204, "mean_pred_prob_last_10": 0.09340370846912265, "mean_pred_prob_last_25": 0.055685661360621454, "mean_pred_prob_last_50": 0.03499286831356585, "mean_token_accuracy": 0.863944160938263, "step": 21590 }, { "epoch": 0.3839795210922084, "grad_norm": 1.8004521169249068, "learning_rate": 0.0001, "loss": 0.7684, "mean_abs_error": 445.4765713662353, "mean_abs_error_last_10": 97.80819649151279, "mean_abs_error_last_25": 144.63000967203826, "mean_abs_error_last_50": 273.42751893433035, "mean_pred_prob": 0.03732268218882382, "mean_pred_prob_last_10": 0.1744473526137881, "mean_pred_prob_last_25": 0.0997195182950236, "mean_pred_prob_last_50": 0.06250865143956616, "mean_token_accuracy": 0.8789802610874176, "step": 21600 }, { "epoch": 0.38415728938901034, "grad_norm": 2.0600905645014826, "learning_rate": 0.0001, "loss": 0.7777, "mean_abs_error": 171.35666045155, "mean_abs_error_last_10": 62.67574031231504, "mean_abs_error_last_25": 90.92343149413792, "mean_abs_error_last_50": 103.92919016406654, "mean_pred_prob": 0.045264460565522316, "mean_pred_prob_last_10": 0.22248995248228312, "mean_pred_prob_last_25": 0.12481382852420211, "mean_pred_prob_last_50": 0.0760033412836492, "mean_token_accuracy": 0.8764277219772338, "step": 21610 }, { "epoch": 0.38433505768581233, "grad_norm": 1.471209454211076, "learning_rate": 0.0001, "loss": 0.7546, "mean_abs_error": 641.0832058903422, "mean_abs_error_last_10": 260.7169387801479, "mean_abs_error_last_25": 421.28160028882104, "mean_abs_error_last_50": 417.7473519357056, "mean_pred_prob": 0.0205368893337436, "mean_pred_prob_last_10": 0.10380339808762074, "mean_pred_prob_last_25": 0.057289347099140284, "mean_pred_prob_last_50": 0.03497106563299894, "mean_token_accuracy": 0.8713491439819336, "step": 21620 }, { "epoch": 0.38451282598261427, "grad_norm": 4.278196161600165, "learning_rate": 0.0001, "loss": 0.8011, "mean_abs_error": 240.30126522257106, "mean_abs_error_last_10": 84.2025803402913, "mean_abs_error_last_25": 131.662444128191, "mean_abs_error_last_50": 172.12972941958415, "mean_pred_prob": 0.03873561602085829, "mean_pred_prob_last_10": 0.19738609995692968, "mean_pred_prob_last_25": 0.10739329615607858, "mean_pred_prob_last_50": 0.06540194838307797, "mean_token_accuracy": 0.8763117730617523, "step": 21630 }, { "epoch": 0.3846905942794162, "grad_norm": 1.651701390483455, "learning_rate": 0.0001, "loss": 0.8026, "mean_abs_error": 315.7791172649671, "mean_abs_error_last_10": 105.23742777845818, "mean_abs_error_last_25": 148.81827402554285, "mean_abs_error_last_50": 169.40243957218257, "mean_pred_prob": 0.029703633254393937, "mean_pred_prob_last_10": 0.15623389817774297, "mean_pred_prob_last_25": 0.08462199550122022, "mean_pred_prob_last_50": 0.050294738914817574, "mean_token_accuracy": 0.8778160691261292, "step": 21640 }, { "epoch": 0.38486836257621815, "grad_norm": 1.1198221336075294, "learning_rate": 0.0001, "loss": 0.745, "mean_abs_error": 442.17913535805064, "mean_abs_error_last_10": 143.01104732781408, "mean_abs_error_last_25": 163.89487214068347, "mean_abs_error_last_50": 219.4281726716512, "mean_pred_prob": 0.030241196136921646, "mean_pred_prob_last_10": 0.14920432362705469, "mean_pred_prob_last_25": 0.08242506359238178, "mean_pred_prob_last_50": 0.05078880076762289, "mean_token_accuracy": 0.8768546044826507, "step": 21650 }, { "epoch": 0.3850461308730201, "grad_norm": 2.821917395948384, "learning_rate": 0.0001, "loss": 0.8188, "mean_abs_error": 295.49222926320715, "mean_abs_error_last_10": 49.55632614862682, "mean_abs_error_last_25": 99.65886753235775, "mean_abs_error_last_50": 156.28526769921362, "mean_pred_prob": 0.034237783309072255, "mean_pred_prob_last_10": 0.18024394921958448, "mean_pred_prob_last_25": 0.09686162956058979, "mean_pred_prob_last_50": 0.05843701669946313, "mean_token_accuracy": 0.8810510873794556, "step": 21660 }, { "epoch": 0.3852238991698221, "grad_norm": 1.259272919500508, "learning_rate": 0.0001, "loss": 0.7238, "mean_abs_error": 91.6973147800582, "mean_abs_error_last_10": 13.52983959764216, "mean_abs_error_last_25": 53.62842288926405, "mean_abs_error_last_50": 74.32710490428647, "mean_pred_prob": 0.0434423292055726, "mean_pred_prob_last_10": 0.20458560883998872, "mean_pred_prob_last_25": 0.11643578670918941, "mean_pred_prob_last_50": 0.07266345042735338, "mean_token_accuracy": 0.8779450297355652, "step": 21670 }, { "epoch": 0.385401667466624, "grad_norm": 1.3222117306870034, "learning_rate": 0.0001, "loss": 0.8522, "mean_abs_error": 229.05337470231365, "mean_abs_error_last_10": 61.528949184585976, "mean_abs_error_last_25": 69.98598302754542, "mean_abs_error_last_50": 94.57489023936618, "mean_pred_prob": 0.056586812157183884, "mean_pred_prob_last_10": 0.268994003534317, "mean_pred_prob_last_25": 0.14832930620759727, "mean_pred_prob_last_50": 0.09460292663425207, "mean_token_accuracy": 0.8679147005081177, "step": 21680 }, { "epoch": 0.38557943576342596, "grad_norm": 1.7176290937812022, "learning_rate": 0.0001, "loss": 0.8242, "mean_abs_error": 942.0496737485261, "mean_abs_error_last_10": 315.78531590158457, "mean_abs_error_last_25": 490.33480958518703, "mean_abs_error_last_50": 621.5257477992544, "mean_pred_prob": 0.018476662813918666, "mean_pred_prob_last_10": 0.10296393316239119, "mean_pred_prob_last_25": 0.05360714796697721, "mean_pred_prob_last_50": 0.0319140801904723, "mean_token_accuracy": 0.8656870543956756, "step": 21690 }, { "epoch": 0.3857572040602279, "grad_norm": 1.8337974315638323, "learning_rate": 0.0001, "loss": 0.708, "mean_abs_error": 291.25824059033886, "mean_abs_error_last_10": 83.18259436097341, "mean_abs_error_last_25": 98.59462788721792, "mean_abs_error_last_50": 152.25083109985528, "mean_pred_prob": 0.05032176785171032, "mean_pred_prob_last_10": 0.2356417251750827, "mean_pred_prob_last_25": 0.13815274490043522, "mean_pred_prob_last_50": 0.08402879405766725, "mean_token_accuracy": 0.8812557458877563, "step": 21700 }, { "epoch": 0.38593497235702984, "grad_norm": 1.4358377874505033, "learning_rate": 0.0001, "loss": 1.0387, "mean_abs_error": 577.8626697440392, "mean_abs_error_last_10": 322.92713186026793, "mean_abs_error_last_25": 384.29679859137013, "mean_abs_error_last_50": 422.31530052560703, "mean_pred_prob": 0.027687352686189114, "mean_pred_prob_last_10": 0.1295356683433056, "mean_pred_prob_last_25": 0.07542001940310002, "mean_pred_prob_last_50": 0.04580275868065655, "mean_token_accuracy": 0.8643673479557037, "step": 21710 }, { "epoch": 0.3861127406538318, "grad_norm": 1.4248937252038743, "learning_rate": 0.0001, "loss": 0.7102, "mean_abs_error": 366.5962839745386, "mean_abs_error_last_10": 112.25617491372809, "mean_abs_error_last_25": 237.602156465697, "mean_abs_error_last_50": 370.1942653868406, "mean_pred_prob": 0.0398139403667301, "mean_pred_prob_last_10": 0.19633469581604004, "mean_pred_prob_last_25": 0.11019042879343033, "mean_pred_prob_last_50": 0.0670012479647994, "mean_token_accuracy": 0.8756683230400085, "step": 21720 }, { "epoch": 0.38629050895063377, "grad_norm": 1.3615266724366033, "learning_rate": 0.0001, "loss": 0.6718, "mean_abs_error": 62.57396294557799, "mean_abs_error_last_10": 9.637064448124471, "mean_abs_error_last_25": 23.706809797133857, "mean_abs_error_last_50": 37.18689183319852, "mean_pred_prob": 0.06007911106571555, "mean_pred_prob_last_10": 0.2780161887407303, "mean_pred_prob_last_25": 0.15856994837522506, "mean_pred_prob_last_50": 0.0990359777584672, "mean_token_accuracy": 0.8774998486042023, "step": 21730 }, { "epoch": 0.3864682772474357, "grad_norm": 1.1794829762239116, "learning_rate": 0.0001, "loss": 0.7201, "mean_abs_error": 981.5283340755843, "mean_abs_error_last_10": 615.8382837762949, "mean_abs_error_last_25": 647.8397472240277, "mean_abs_error_last_50": 722.9760514534826, "mean_pred_prob": 0.04487675724521978, "mean_pred_prob_last_10": 0.19238098872592674, "mean_pred_prob_last_25": 0.11840741307241842, "mean_pred_prob_last_50": 0.0741020989487879, "mean_token_accuracy": 0.8724208354949952, "step": 21740 }, { "epoch": 0.38664604554423765, "grad_norm": 2.0357988952114807, "learning_rate": 0.0001, "loss": 0.7643, "mean_abs_error": 372.21213422180995, "mean_abs_error_last_10": 104.80500491964094, "mean_abs_error_last_25": 145.91568295776727, "mean_abs_error_last_50": 202.68158917979048, "mean_pred_prob": 0.0381550817284733, "mean_pred_prob_last_10": 0.1939827786758542, "mean_pred_prob_last_25": 0.10868608485907316, "mean_pred_prob_last_50": 0.06596431359648705, "mean_token_accuracy": 0.8793901383876801, "step": 21750 }, { "epoch": 0.3868238138410396, "grad_norm": 2.5359690763722993, "learning_rate": 0.0001, "loss": 0.7471, "mean_abs_error": 766.912896227965, "mean_abs_error_last_10": 260.0371026519894, "mean_abs_error_last_25": 332.3565983743936, "mean_abs_error_last_50": 505.5983361088157, "mean_pred_prob": 0.04627213111380115, "mean_pred_prob_last_10": 0.1955252618296072, "mean_pred_prob_last_25": 0.11059443733538502, "mean_pred_prob_last_50": 0.07340123203466646, "mean_token_accuracy": 0.8737694978713989, "step": 21760 }, { "epoch": 0.3870015821378415, "grad_norm": 1.57147091823724, "learning_rate": 0.0001, "loss": 0.9091, "mean_abs_error": 376.8875364728763, "mean_abs_error_last_10": 235.75153162621405, "mean_abs_error_last_25": 220.6796017632011, "mean_abs_error_last_50": 279.58158364698664, "mean_pred_prob": 0.04147474700585008, "mean_pred_prob_last_10": 0.18690556306391953, "mean_pred_prob_last_25": 0.10947525699157268, "mean_pred_prob_last_50": 0.06812642910517752, "mean_token_accuracy": 0.8649550974369049, "step": 21770 }, { "epoch": 0.38717935043464347, "grad_norm": 1.3934430112884506, "learning_rate": 0.0001, "loss": 0.8298, "mean_abs_error": 405.99651243831846, "mean_abs_error_last_10": 104.6992138945321, "mean_abs_error_last_25": 138.35000052028025, "mean_abs_error_last_50": 195.36387914213435, "mean_pred_prob": 0.03646680675446987, "mean_pred_prob_last_10": 0.19069104117807, "mean_pred_prob_last_25": 0.10232154345139861, "mean_pred_prob_last_50": 0.06194592891260982, "mean_token_accuracy": 0.874019730091095, "step": 21780 }, { "epoch": 0.38735711873144546, "grad_norm": 1.8938302549264372, "learning_rate": 0.0001, "loss": 0.7442, "mean_abs_error": 677.8595702012404, "mean_abs_error_last_10": 413.0684301596237, "mean_abs_error_last_25": 474.42907893498784, "mean_abs_error_last_50": 529.9470071717064, "mean_pred_prob": 0.013933763513341546, "mean_pred_prob_last_10": 0.0824825834017247, "mean_pred_prob_last_25": 0.042245542479213326, "mean_pred_prob_last_50": 0.02400716980919242, "mean_token_accuracy": 0.8725325465202332, "step": 21790 }, { "epoch": 0.3875348870282474, "grad_norm": 2.6710853002985915, "learning_rate": 0.0001, "loss": 0.8095, "mean_abs_error": 671.7969216213844, "mean_abs_error_last_10": 131.51244885022183, "mean_abs_error_last_25": 250.26241417058867, "mean_abs_error_last_50": 370.01724112781136, "mean_pred_prob": 0.031549995776731524, "mean_pred_prob_last_10": 0.14017199608497322, "mean_pred_prob_last_25": 0.08382227467373013, "mean_pred_prob_last_50": 0.051881388179026544, "mean_token_accuracy": 0.8764360845088959, "step": 21800 }, { "epoch": 0.38771265532504934, "grad_norm": 1.0940446095266028, "learning_rate": 0.0001, "loss": 0.8029, "mean_abs_error": 212.74276022794658, "mean_abs_error_last_10": 50.40796136498867, "mean_abs_error_last_25": 92.12559082908452, "mean_abs_error_last_50": 126.6550568181292, "mean_pred_prob": 0.04203644632361829, "mean_pred_prob_last_10": 0.2030955072492361, "mean_pred_prob_last_25": 0.11329814810305834, "mean_pred_prob_last_50": 0.06997144035995007, "mean_token_accuracy": 0.8724126100540162, "step": 21810 }, { "epoch": 0.3878904236218513, "grad_norm": 1.8972799287898703, "learning_rate": 0.0001, "loss": 0.8044, "mean_abs_error": 310.66242097880865, "mean_abs_error_last_10": 211.5919079087556, "mean_abs_error_last_25": 295.61954588553317, "mean_abs_error_last_50": 338.4543774106416, "mean_pred_prob": 0.04034705145750195, "mean_pred_prob_last_10": 0.19511861484497786, "mean_pred_prob_last_25": 0.11114674145355821, "mean_pred_prob_last_50": 0.06740811672061682, "mean_token_accuracy": 0.87153559923172, "step": 21820 }, { "epoch": 0.3880681919186532, "grad_norm": 2.246027791942993, "learning_rate": 0.0001, "loss": 0.6928, "mean_abs_error": 222.07345265320583, "mean_abs_error_last_10": 106.20964236365178, "mean_abs_error_last_25": 96.05448666183663, "mean_abs_error_last_50": 153.3187239306164, "mean_pred_prob": 0.04562187748961151, "mean_pred_prob_last_10": 0.22915874905884265, "mean_pred_prob_last_25": 0.1284265429712832, "mean_pred_prob_last_50": 0.07772923423908651, "mean_token_accuracy": 0.8691592812538147, "step": 21830 }, { "epoch": 0.38824596021545515, "grad_norm": 2.825047453858511, "learning_rate": 0.0001, "loss": 0.832, "mean_abs_error": 410.87654720813924, "mean_abs_error_last_10": 172.62800565785176, "mean_abs_error_last_25": 177.64843781003717, "mean_abs_error_last_50": 242.71287207597243, "mean_pred_prob": 0.03478647726587951, "mean_pred_prob_last_10": 0.1758122036466375, "mean_pred_prob_last_25": 0.09774541547521949, "mean_pred_prob_last_50": 0.06005879068979993, "mean_token_accuracy": 0.8763185381889343, "step": 21840 }, { "epoch": 0.38842372851225715, "grad_norm": 1.4007831880790693, "learning_rate": 0.0001, "loss": 0.9297, "mean_abs_error": 630.9469189980664, "mean_abs_error_last_10": 261.73594574078294, "mean_abs_error_last_25": 350.1760975300433, "mean_abs_error_last_50": 433.6189125077177, "mean_pred_prob": 0.02747677543084137, "mean_pred_prob_last_10": 0.1416293061687611, "mean_pred_prob_last_25": 0.07593597611412406, "mean_pred_prob_last_50": 0.04650600074674003, "mean_token_accuracy": 0.859731251001358, "step": 21850 }, { "epoch": 0.3886014968090591, "grad_norm": 1.8699643164036663, "learning_rate": 0.0001, "loss": 0.6575, "mean_abs_error": 850.0505864727451, "mean_abs_error_last_10": 527.132141703027, "mean_abs_error_last_25": 522.0327300819343, "mean_abs_error_last_50": 598.9176606804492, "mean_pred_prob": 0.03885779593547341, "mean_pred_prob_last_10": 0.18142414576432203, "mean_pred_prob_last_25": 0.10420352800865658, "mean_pred_prob_last_50": 0.0652142504346557, "mean_token_accuracy": 0.8840713441371918, "step": 21860 }, { "epoch": 0.388779265105861, "grad_norm": 2.0571782153804605, "learning_rate": 0.0001, "loss": 0.7445, "mean_abs_error": 888.9874210993885, "mean_abs_error_last_10": 397.45650858833136, "mean_abs_error_last_25": 464.72344616568887, "mean_abs_error_last_50": 611.5340105378484, "mean_pred_prob": 0.03151564242434688, "mean_pred_prob_last_10": 0.15947878825245426, "mean_pred_prob_last_25": 0.08821289410989266, "mean_pred_prob_last_50": 0.05370851185289212, "mean_token_accuracy": 0.8667143225669861, "step": 21870 }, { "epoch": 0.38895703340266297, "grad_norm": 2.3154960841202583, "learning_rate": 0.0001, "loss": 0.8982, "mean_abs_error": 429.59481483847975, "mean_abs_error_last_10": 126.14010607287128, "mean_abs_error_last_25": 228.1036963035894, "mean_abs_error_last_50": 280.5104538082918, "mean_pred_prob": 0.026839498616755008, "mean_pred_prob_last_10": 0.1399217838421464, "mean_pred_prob_last_25": 0.07740725222975016, "mean_pred_prob_last_50": 0.046496298909187314, "mean_token_accuracy": 0.8665607333183288, "step": 21880 }, { "epoch": 0.3891348016994649, "grad_norm": 2.8010588427700833, "learning_rate": 0.0001, "loss": 0.7077, "mean_abs_error": 343.67391067389974, "mean_abs_error_last_10": 232.5785374291344, "mean_abs_error_last_25": 191.87519814250533, "mean_abs_error_last_50": 210.6635924546885, "mean_pred_prob": 0.04159862698288634, "mean_pred_prob_last_10": 0.2097170816385187, "mean_pred_prob_last_25": 0.11544351213378831, "mean_pred_prob_last_50": 0.07114120842888952, "mean_token_accuracy": 0.8686647891998291, "step": 21890 }, { "epoch": 0.38931256999626684, "grad_norm": 1.8125536361871732, "learning_rate": 0.0001, "loss": 0.6935, "mean_abs_error": 290.51746963356254, "mean_abs_error_last_10": 178.02984963042093, "mean_abs_error_last_25": 178.08519138781574, "mean_abs_error_last_50": 193.77782502757697, "mean_pred_prob": 0.04154978904407471, "mean_pred_prob_last_10": 0.20948148164898156, "mean_pred_prob_last_25": 0.11749801244586706, "mean_pred_prob_last_50": 0.07137740924954414, "mean_token_accuracy": 0.8844773948192597, "step": 21900 }, { "epoch": 0.38949033829306884, "grad_norm": 3.1206006617893705, "learning_rate": 0.0001, "loss": 0.7854, "mean_abs_error": 566.6455395669594, "mean_abs_error_last_10": 134.56753155741242, "mean_abs_error_last_25": 225.19636415611336, "mean_abs_error_last_50": 334.24029595252694, "mean_pred_prob": 0.02770023312186822, "mean_pred_prob_last_10": 0.1347978963982314, "mean_pred_prob_last_25": 0.07483294429257512, "mean_pred_prob_last_50": 0.045938273123465476, "mean_token_accuracy": 0.8737937450408936, "step": 21910 }, { "epoch": 0.3896681065898708, "grad_norm": 2.7215202488101324, "learning_rate": 0.0001, "loss": 0.7853, "mean_abs_error": 226.6967277729159, "mean_abs_error_last_10": 151.09487159444183, "mean_abs_error_last_25": 178.54045407947416, "mean_abs_error_last_50": 183.5675727146512, "mean_pred_prob": 0.0286168796941638, "mean_pred_prob_last_10": 0.1485575396567583, "mean_pred_prob_last_25": 0.08067725393921137, "mean_pred_prob_last_50": 0.048981279134750366, "mean_token_accuracy": 0.8805816829204559, "step": 21920 }, { "epoch": 0.3898458748866727, "grad_norm": 4.19549279311886, "learning_rate": 0.0001, "loss": 0.7699, "mean_abs_error": 695.6657101922111, "mean_abs_error_last_10": 275.9900051315946, "mean_abs_error_last_25": 337.996886655027, "mean_abs_error_last_50": 453.15532178459455, "mean_pred_prob": 0.04014823150646407, "mean_pred_prob_last_10": 0.18936589443474078, "mean_pred_prob_last_25": 0.10826087687164546, "mean_pred_prob_last_50": 0.0669361275038682, "mean_token_accuracy": 0.8795011520385743, "step": 21930 }, { "epoch": 0.39002364318347466, "grad_norm": 1.3970500830957278, "learning_rate": 0.0001, "loss": 0.7966, "mean_abs_error": 653.6216381280666, "mean_abs_error_last_10": 173.10136324275229, "mean_abs_error_last_25": 290.9299237352498, "mean_abs_error_last_50": 418.3543592853419, "mean_pred_prob": 0.037445792031940074, "mean_pred_prob_last_10": 0.18913455265574158, "mean_pred_prob_last_25": 0.10300611788989045, "mean_pred_prob_last_50": 0.06269835529383272, "mean_token_accuracy": 0.8725109040737152, "step": 21940 }, { "epoch": 0.3902014114802766, "grad_norm": 1.5878880739087256, "learning_rate": 0.0001, "loss": 0.8391, "mean_abs_error": 537.1163859710114, "mean_abs_error_last_10": 290.51191488301254, "mean_abs_error_last_25": 337.4626666446328, "mean_abs_error_last_50": 395.48690266887604, "mean_pred_prob": 0.030710198427550496, "mean_pred_prob_last_10": 0.14956317096948624, "mean_pred_prob_last_25": 0.08357340786606074, "mean_pred_prob_last_50": 0.05168025596067309, "mean_token_accuracy": 0.8668832063674927, "step": 21950 }, { "epoch": 0.39037917977707853, "grad_norm": 1.9606754180211885, "learning_rate": 0.0001, "loss": 0.8736, "mean_abs_error": 886.5930135638167, "mean_abs_error_last_10": 267.23100987833016, "mean_abs_error_last_25": 471.5778347831436, "mean_abs_error_last_50": 614.9117601266588, "mean_pred_prob": 0.04555809337180108, "mean_pred_prob_last_10": 0.17350210258737206, "mean_pred_prob_last_25": 0.1087766692508012, "mean_pred_prob_last_50": 0.0717641263094265, "mean_token_accuracy": 0.8714292347431183, "step": 21960 }, { "epoch": 0.3905569480738805, "grad_norm": 1.0936165661452173, "learning_rate": 0.0001, "loss": 0.8313, "mean_abs_error": 329.55407115050036, "mean_abs_error_last_10": 123.94392726982922, "mean_abs_error_last_25": 137.9034619238922, "mean_abs_error_last_50": 168.07936933936222, "mean_pred_prob": 0.044800832960754634, "mean_pred_prob_last_10": 0.2200016789138317, "mean_pred_prob_last_25": 0.1213856072165072, "mean_pred_prob_last_50": 0.07479892196133733, "mean_token_accuracy": 0.8736252129077912, "step": 21970 }, { "epoch": 0.39073471637068247, "grad_norm": 1.8295402150187665, "learning_rate": 0.0001, "loss": 0.8111, "mean_abs_error": 989.1915293762562, "mean_abs_error_last_10": 625.0447860825946, "mean_abs_error_last_25": 608.8704390875305, "mean_abs_error_last_50": 688.8078129060807, "mean_pred_prob": 0.03977372939698398, "mean_pred_prob_last_10": 0.1552938210312277, "mean_pred_prob_last_25": 0.10110914357355796, "mean_pred_prob_last_50": 0.06489756505470723, "mean_token_accuracy": 0.8717760622501374, "step": 21980 }, { "epoch": 0.3909124846674844, "grad_norm": 1.79531757170945, "learning_rate": 0.0001, "loss": 0.8695, "mean_abs_error": 392.2468983864759, "mean_abs_error_last_10": 140.22750091540243, "mean_abs_error_last_25": 241.57833436182628, "mean_abs_error_last_50": 301.42911920108327, "mean_pred_prob": 0.03684401283971965, "mean_pred_prob_last_10": 0.1700299298390746, "mean_pred_prob_last_25": 0.09935084544122219, "mean_pred_prob_last_50": 0.06169820069335401, "mean_token_accuracy": 0.8787725865840912, "step": 21990 }, { "epoch": 0.39109025296428634, "grad_norm": 0.5920495548577417, "learning_rate": 0.0001, "loss": 0.7459, "mean_abs_error": 221.8276268697859, "mean_abs_error_last_10": 17.204286836051285, "mean_abs_error_last_25": 47.67155306509614, "mean_abs_error_last_50": 104.2642271384752, "mean_pred_prob": 0.05164156230166554, "mean_pred_prob_last_10": 0.2633043259382248, "mean_pred_prob_last_25": 0.14419740494340658, "mean_pred_prob_last_50": 0.08788712788373232, "mean_token_accuracy": 0.8692625164985657, "step": 22000 }, { "epoch": 0.3912680212610883, "grad_norm": 1.3778274436044344, "learning_rate": 0.0001, "loss": 0.878, "mean_abs_error": 923.4241402219056, "mean_abs_error_last_10": 537.3145705939919, "mean_abs_error_last_25": 599.5742892549154, "mean_abs_error_last_50": 742.618591407175, "mean_pred_prob": 0.03787745442095911, "mean_pred_prob_last_10": 0.19020244020503013, "mean_pred_prob_last_25": 0.10239740806282498, "mean_pred_prob_last_50": 0.06266829750675243, "mean_token_accuracy": 0.8741698801517487, "step": 22010 }, { "epoch": 0.3914457895578902, "grad_norm": 1.4313372015127426, "learning_rate": 0.0001, "loss": 0.8021, "mean_abs_error": 1062.580083830403, "mean_abs_error_last_10": 344.63459511490043, "mean_abs_error_last_25": 470.5115994847997, "mean_abs_error_last_50": 627.0336545177762, "mean_pred_prob": 0.039217660391295796, "mean_pred_prob_last_10": 0.1940743089071475, "mean_pred_prob_last_25": 0.10793281115184072, "mean_pred_prob_last_50": 0.06635725284577347, "mean_token_accuracy": 0.8750362277030945, "step": 22020 }, { "epoch": 0.3916235578546922, "grad_norm": 2.9733275065922133, "learning_rate": 0.0001, "loss": 0.9086, "mean_abs_error": 1247.581275003392, "mean_abs_error_last_10": 837.5293761650524, "mean_abs_error_last_25": 889.4405613463883, "mean_abs_error_last_50": 979.7497602256284, "mean_pred_prob": 0.03377317298873095, "mean_pred_prob_last_10": 0.1729323397827102, "mean_pred_prob_last_25": 0.09521158339484828, "mean_pred_prob_last_50": 0.05770249100169167, "mean_token_accuracy": 0.8662797033786773, "step": 22030 }, { "epoch": 0.39180132615149416, "grad_norm": 1.113289345224012, "learning_rate": 0.0001, "loss": 0.8556, "mean_abs_error": 237.977198335634, "mean_abs_error_last_10": 23.29936798580343, "mean_abs_error_last_25": 41.136339761635995, "mean_abs_error_last_50": 98.78371172443212, "mean_pred_prob": 0.04522370947524905, "mean_pred_prob_last_10": 0.23210925459861756, "mean_pred_prob_last_25": 0.13189707100391387, "mean_pred_prob_last_50": 0.07804067255929112, "mean_token_accuracy": 0.862292867898941, "step": 22040 }, { "epoch": 0.3919790944482961, "grad_norm": 1.6187566119694976, "learning_rate": 0.0001, "loss": 0.7989, "mean_abs_error": 225.39700351638248, "mean_abs_error_last_10": 118.1878797506487, "mean_abs_error_last_25": 117.36768940829992, "mean_abs_error_last_50": 130.40289051075604, "mean_pred_prob": 0.049224722757935525, "mean_pred_prob_last_10": 0.23108918741345405, "mean_pred_prob_last_25": 0.13049977030605078, "mean_pred_prob_last_50": 0.08202257752418518, "mean_token_accuracy": 0.8712099730968476, "step": 22050 }, { "epoch": 0.39215686274509803, "grad_norm": 2.4185181054598237, "learning_rate": 0.0001, "loss": 0.9242, "mean_abs_error": 639.657648469204, "mean_abs_error_last_10": 307.96804912831345, "mean_abs_error_last_25": 381.8931244855173, "mean_abs_error_last_50": 471.8769130828694, "mean_pred_prob": 0.04049214137776289, "mean_pred_prob_last_10": 0.20955734452581964, "mean_pred_prob_last_25": 0.1153373061621096, "mean_pred_prob_last_50": 0.06825425815186463, "mean_token_accuracy": 0.8658730149269104, "step": 22060 }, { "epoch": 0.3923346310419, "grad_norm": 1.557945797640696, "learning_rate": 0.0001, "loss": 0.8718, "mean_abs_error": 153.85524708336328, "mean_abs_error_last_10": 34.95456149806097, "mean_abs_error_last_25": 53.228592369873255, "mean_abs_error_last_50": 101.68877230199915, "mean_pred_prob": 0.04435696434229612, "mean_pred_prob_last_10": 0.2280499368906021, "mean_pred_prob_last_25": 0.12191332820802928, "mean_pred_prob_last_50": 0.07389744790270925, "mean_token_accuracy": 0.8677342534065247, "step": 22070 }, { "epoch": 0.3925123993387019, "grad_norm": 1.4891480427761643, "learning_rate": 0.0001, "loss": 0.7775, "mean_abs_error": 134.3972870935592, "mean_abs_error_last_10": 37.51782417662309, "mean_abs_error_last_25": 51.139044011638454, "mean_abs_error_last_50": 71.38247367911053, "mean_pred_prob": 0.050436653476208446, "mean_pred_prob_last_10": 0.2558984443545341, "mean_pred_prob_last_25": 0.1407800331711769, "mean_pred_prob_last_50": 0.08667066674679517, "mean_token_accuracy": 0.8716497898101807, "step": 22080 }, { "epoch": 0.3926901676355039, "grad_norm": 1.750053343378316, "learning_rate": 0.0001, "loss": 0.7443, "mean_abs_error": 529.7067872813714, "mean_abs_error_last_10": 151.80297640500586, "mean_abs_error_last_25": 213.7280288608199, "mean_abs_error_last_50": 309.323851386872, "mean_pred_prob": 0.03739240455033723, "mean_pred_prob_last_10": 0.1796113431919366, "mean_pred_prob_last_25": 0.1018538971315138, "mean_pred_prob_last_50": 0.06349825173965655, "mean_token_accuracy": 0.8763672649860382, "step": 22090 }, { "epoch": 0.39286793593230585, "grad_norm": 1.0835472859503754, "learning_rate": 0.0001, "loss": 0.8323, "mean_abs_error": 1267.5079985776683, "mean_abs_error_last_10": 698.1536216286091, "mean_abs_error_last_25": 812.1955792947658, "mean_abs_error_last_50": 991.6833992335416, "mean_pred_prob": 0.018906434436212295, "mean_pred_prob_last_10": 0.11169248035876081, "mean_pred_prob_last_25": 0.056456226759473795, "mean_pred_prob_last_50": 0.03235697289783275, "mean_token_accuracy": 0.8731166601181031, "step": 22100 }, { "epoch": 0.3930457042291078, "grad_norm": 1.2287670952151062, "learning_rate": 0.0001, "loss": 0.7981, "mean_abs_error": 255.62362981256575, "mean_abs_error_last_10": 135.6981135837109, "mean_abs_error_last_25": 162.15876847229828, "mean_abs_error_last_50": 187.1233300372016, "mean_pred_prob": 0.03680337800178677, "mean_pred_prob_last_10": 0.18578860703855754, "mean_pred_prob_last_25": 0.1065347509458661, "mean_pred_prob_last_50": 0.06248440518975258, "mean_token_accuracy": 0.8822699427604676, "step": 22110 }, { "epoch": 0.3932234725259097, "grad_norm": 0.9801449944868535, "learning_rate": 0.0001, "loss": 0.741, "mean_abs_error": 401.4838340598974, "mean_abs_error_last_10": 232.15124623881084, "mean_abs_error_last_25": 285.04810993485114, "mean_abs_error_last_50": 329.41777262729966, "mean_pred_prob": 0.03957608020864427, "mean_pred_prob_last_10": 0.18279978223145008, "mean_pred_prob_last_25": 0.10557056590914726, "mean_pred_prob_last_50": 0.06570946532301605, "mean_token_accuracy": 0.8876899600028991, "step": 22120 }, { "epoch": 0.39340124082271166, "grad_norm": 2.0590495642172546, "learning_rate": 0.0001, "loss": 0.8747, "mean_abs_error": 316.5498229827792, "mean_abs_error_last_10": 81.90124268555371, "mean_abs_error_last_25": 111.19074593340238, "mean_abs_error_last_50": 199.1032463640466, "mean_pred_prob": 0.035810612700879575, "mean_pred_prob_last_10": 0.17814736235886813, "mean_pred_prob_last_25": 0.10234398925676942, "mean_pred_prob_last_50": 0.06142070991918445, "mean_token_accuracy": 0.8714546740055085, "step": 22130 }, { "epoch": 0.3935790091195136, "grad_norm": 1.4408744010740826, "learning_rate": 0.0001, "loss": 0.8356, "mean_abs_error": 436.3839347906202, "mean_abs_error_last_10": 89.99070317649611, "mean_abs_error_last_25": 122.89271545414014, "mean_abs_error_last_50": 213.33574405154687, "mean_pred_prob": 0.028805041685700417, "mean_pred_prob_last_10": 0.15093854069709778, "mean_pred_prob_last_25": 0.0828198385424912, "mean_pred_prob_last_50": 0.04908175626769662, "mean_token_accuracy": 0.8613804280757904, "step": 22140 }, { "epoch": 0.3937567774163156, "grad_norm": 3.7980109598855067, "learning_rate": 0.0001, "loss": 0.7904, "mean_abs_error": 570.3573673266844, "mean_abs_error_last_10": 238.59439713877947, "mean_abs_error_last_25": 339.6725053795904, "mean_abs_error_last_50": 461.26192117583804, "mean_pred_prob": 0.046213888161582874, "mean_pred_prob_last_10": 0.2250473740277812, "mean_pred_prob_last_25": 0.12846671607694587, "mean_pred_prob_last_50": 0.07821031290804967, "mean_token_accuracy": 0.8773742020130157, "step": 22150 }, { "epoch": 0.39393454571311753, "grad_norm": 1.2150563932669114, "learning_rate": 0.0001, "loss": 0.7677, "mean_abs_error": 605.5294008177378, "mean_abs_error_last_10": 190.976311444353, "mean_abs_error_last_25": 246.21775028383905, "mean_abs_error_last_50": 344.40885019405664, "mean_pred_prob": 0.029784794157603756, "mean_pred_prob_last_10": 0.14435336856404318, "mean_pred_prob_last_25": 0.07939378814771772, "mean_pred_prob_last_50": 0.04917082983301953, "mean_token_accuracy": 0.8679122030735016, "step": 22160 }, { "epoch": 0.3941123140099195, "grad_norm": 1.181988961383769, "learning_rate": 0.0001, "loss": 0.7679, "mean_abs_error": 575.925813994683, "mean_abs_error_last_10": 187.48429186473274, "mean_abs_error_last_25": 275.51690511754185, "mean_abs_error_last_50": 414.2778148446847, "mean_pred_prob": 0.031053459364920856, "mean_pred_prob_last_10": 0.18444527154788376, "mean_pred_prob_last_25": 0.09343697354197503, "mean_pred_prob_last_50": 0.053990566078573464, "mean_token_accuracy": 0.8729315280914307, "step": 22170 }, { "epoch": 0.3942900823067214, "grad_norm": 1.149665348404404, "learning_rate": 0.0001, "loss": 0.796, "mean_abs_error": 138.97019016881444, "mean_abs_error_last_10": 40.18864673139237, "mean_abs_error_last_25": 71.17393814261655, "mean_abs_error_last_50": 82.11504203412684, "mean_pred_prob": 0.044061545841395856, "mean_pred_prob_last_10": 0.22143888771533965, "mean_pred_prob_last_25": 0.12231981977820397, "mean_pred_prob_last_50": 0.07452918719500304, "mean_token_accuracy": 0.8756095349788666, "step": 22180 }, { "epoch": 0.39446785060352335, "grad_norm": 1.813300558414415, "learning_rate": 0.0001, "loss": 0.7916, "mean_abs_error": 418.97325957916473, "mean_abs_error_last_10": 121.93092440785763, "mean_abs_error_last_25": 168.5649222783313, "mean_abs_error_last_50": 256.4023527092375, "mean_pred_prob": 0.02489421973004937, "mean_pred_prob_last_10": 0.13086023237556219, "mean_pred_prob_last_25": 0.07120483377948403, "mean_pred_prob_last_50": 0.04200660856440663, "mean_token_accuracy": 0.8696511209011077, "step": 22190 }, { "epoch": 0.3946456189003253, "grad_norm": 1.026316246471997, "learning_rate": 0.0001, "loss": 0.8317, "mean_abs_error": 622.8194203674084, "mean_abs_error_last_10": 210.6157638140888, "mean_abs_error_last_25": 257.9599883599147, "mean_abs_error_last_50": 406.5553698154516, "mean_pred_prob": 0.04776215435995255, "mean_pred_prob_last_10": 0.23000909889815374, "mean_pred_prob_last_25": 0.12917815674445593, "mean_pred_prob_last_50": 0.08000452417763881, "mean_token_accuracy": 0.8767358541488648, "step": 22200 }, { "epoch": 0.3948233871971273, "grad_norm": 2.9045326084134775, "learning_rate": 0.0001, "loss": 0.9342, "mean_abs_error": 854.1623008999746, "mean_abs_error_last_10": 277.23394487278546, "mean_abs_error_last_25": 372.5164852900614, "mean_abs_error_last_50": 527.0187686843897, "mean_pred_prob": 0.021700298217183445, "mean_pred_prob_last_10": 0.11492143115028738, "mean_pred_prob_last_25": 0.06144442071672529, "mean_pred_prob_last_50": 0.03712385880353395, "mean_token_accuracy": 0.8736126661300659, "step": 22210 }, { "epoch": 0.3950011554939292, "grad_norm": 1.6697521191361182, "learning_rate": 0.0001, "loss": 0.8082, "mean_abs_error": 90.96136270701012, "mean_abs_error_last_10": 15.653328771820943, "mean_abs_error_last_25": 32.483145894274216, "mean_abs_error_last_50": 54.490882799566236, "mean_pred_prob": 0.04665278773754835, "mean_pred_prob_last_10": 0.24198732376098633, "mean_pred_prob_last_25": 0.13260262757539748, "mean_pred_prob_last_50": 0.0801339890807867, "mean_token_accuracy": 0.887414813041687, "step": 22220 }, { "epoch": 0.39517892379073116, "grad_norm": 1.4063178728558174, "learning_rate": 0.0001, "loss": 0.777, "mean_abs_error": 963.4566848166999, "mean_abs_error_last_10": 548.516037716147, "mean_abs_error_last_25": 580.1519662143154, "mean_abs_error_last_50": 708.9301629094819, "mean_pred_prob": 0.02720103137253318, "mean_pred_prob_last_10": 0.13852851551782805, "mean_pred_prob_last_25": 0.07640093621448614, "mean_pred_prob_last_50": 0.046415940931183286, "mean_token_accuracy": 0.877413135766983, "step": 22230 }, { "epoch": 0.3953566920875331, "grad_norm": 3.775994139168583, "learning_rate": 0.0001, "loss": 0.7436, "mean_abs_error": 261.44066902597393, "mean_abs_error_last_10": 90.46060681791765, "mean_abs_error_last_25": 170.12840143420655, "mean_abs_error_last_50": 215.2196674046989, "mean_pred_prob": 0.0473083047196269, "mean_pred_prob_last_10": 0.20729649513959886, "mean_pred_prob_last_25": 0.12464393703266978, "mean_pred_prob_last_50": 0.07813202682882547, "mean_token_accuracy": 0.8798463225364686, "step": 22240 }, { "epoch": 0.39553446038433504, "grad_norm": 2.7317902479959373, "learning_rate": 0.0001, "loss": 0.842, "mean_abs_error": 1067.486441099219, "mean_abs_error_last_10": 621.5958955151309, "mean_abs_error_last_25": 716.3552411205641, "mean_abs_error_last_50": 820.3290669572514, "mean_pred_prob": 0.021321928476390896, "mean_pred_prob_last_10": 0.1154407434296445, "mean_pred_prob_last_25": 0.06268057952402159, "mean_pred_prob_last_50": 0.037333917296200524, "mean_token_accuracy": 0.8756089091300965, "step": 22250 }, { "epoch": 0.395712228681137, "grad_norm": 2.1590258568744143, "learning_rate": 0.0001, "loss": 0.7788, "mean_abs_error": 797.9248269282076, "mean_abs_error_last_10": 195.16335243178057, "mean_abs_error_last_25": 267.04941371677074, "mean_abs_error_last_50": 421.67237987475016, "mean_pred_prob": 0.031013965798774735, "mean_pred_prob_last_10": 0.15249869726831095, "mean_pred_prob_last_25": 0.08226822626311332, "mean_pred_prob_last_50": 0.050498522841371594, "mean_token_accuracy": 0.8812777101993561, "step": 22260 }, { "epoch": 0.395889996977939, "grad_norm": 2.0494854139775107, "learning_rate": 0.0001, "loss": 0.8622, "mean_abs_error": 661.5326538916167, "mean_abs_error_last_10": 356.4330092697845, "mean_abs_error_last_25": 432.1826205855805, "mean_abs_error_last_50": 480.9613618684102, "mean_pred_prob": 0.029171536560170352, "mean_pred_prob_last_10": 0.1586943448986858, "mean_pred_prob_last_25": 0.0832367914263159, "mean_pred_prob_last_50": 0.05025086040841416, "mean_token_accuracy": 0.8607386648654938, "step": 22270 }, { "epoch": 0.3960677652747409, "grad_norm": 1.1427241424370873, "learning_rate": 0.0001, "loss": 0.8505, "mean_abs_error": 436.3587546923748, "mean_abs_error_last_10": 100.58172151889022, "mean_abs_error_last_25": 147.36256451495996, "mean_abs_error_last_50": 258.6465212772187, "mean_pred_prob": 0.052454091113759205, "mean_pred_prob_last_10": 0.23678391363937407, "mean_pred_prob_last_25": 0.1382248328649439, "mean_pred_prob_last_50": 0.0872956589621026, "mean_token_accuracy": 0.863217431306839, "step": 22280 }, { "epoch": 0.39624553357154285, "grad_norm": 1.9383668630754631, "learning_rate": 0.0001, "loss": 0.8573, "mean_abs_error": 280.4940800229406, "mean_abs_error_last_10": 146.08807024624787, "mean_abs_error_last_25": 160.24865461358962, "mean_abs_error_last_50": 219.10447369053063, "mean_pred_prob": 0.03901525116525591, "mean_pred_prob_last_10": 0.20037474129348992, "mean_pred_prob_last_25": 0.1087049612775445, "mean_pred_prob_last_50": 0.06502809179946781, "mean_token_accuracy": 0.8717356503009797, "step": 22290 }, { "epoch": 0.3964233018683448, "grad_norm": 1.5063248471372082, "learning_rate": 0.0001, "loss": 0.8011, "mean_abs_error": 205.17062848262063, "mean_abs_error_last_10": 73.83140561007684, "mean_abs_error_last_25": 91.3412564382445, "mean_abs_error_last_50": 131.4442274000397, "mean_pred_prob": 0.03765353374183178, "mean_pred_prob_last_10": 0.2104675266891718, "mean_pred_prob_last_25": 0.1106236157938838, "mean_pred_prob_last_50": 0.06526747336611152, "mean_token_accuracy": 0.8729483306407928, "step": 22300 }, { "epoch": 0.39660107016514673, "grad_norm": 1.4698880787837378, "learning_rate": 0.0001, "loss": 0.7463, "mean_abs_error": 407.3273901532843, "mean_abs_error_last_10": 101.12213805027537, "mean_abs_error_last_25": 137.16881075343989, "mean_abs_error_last_50": 222.6470884232855, "mean_pred_prob": 0.032781773328315465, "mean_pred_prob_last_10": 0.16613542328123004, "mean_pred_prob_last_25": 0.09151482596062124, "mean_pred_prob_last_50": 0.055611985258292404, "mean_token_accuracy": 0.8750198125839234, "step": 22310 }, { "epoch": 0.39677883846194867, "grad_norm": 1.9106172620354183, "learning_rate": 0.0001, "loss": 0.7653, "mean_abs_error": 307.46233922797376, "mean_abs_error_last_10": 67.45027330992579, "mean_abs_error_last_25": 139.10234734454895, "mean_abs_error_last_50": 229.73826923452853, "mean_pred_prob": 0.041185155604034664, "mean_pred_prob_last_10": 0.21904579922556877, "mean_pred_prob_last_25": 0.12130134729668499, "mean_pred_prob_last_50": 0.0713759466074407, "mean_token_accuracy": 0.8810236752033234, "step": 22320 }, { "epoch": 0.39695660675875066, "grad_norm": 1.1496676075340264, "learning_rate": 0.0001, "loss": 0.8052, "mean_abs_error": 285.3545589370062, "mean_abs_error_last_10": 131.79478236063952, "mean_abs_error_last_25": 196.68917536112423, "mean_abs_error_last_50": 240.7313135340525, "mean_pred_prob": 0.046292919153347614, "mean_pred_prob_last_10": 0.20684427507221698, "mean_pred_prob_last_25": 0.12051619067788125, "mean_pred_prob_last_50": 0.07549044890329241, "mean_token_accuracy": 0.8739374876022339, "step": 22330 }, { "epoch": 0.3971343750555526, "grad_norm": 1.355356696198477, "learning_rate": 0.0001, "loss": 0.7815, "mean_abs_error": 383.3087716713921, "mean_abs_error_last_10": 75.07315752479357, "mean_abs_error_last_25": 103.1966435536893, "mean_abs_error_last_50": 174.3655970394788, "mean_pred_prob": 0.04991924027563073, "mean_pred_prob_last_10": 0.2596693988190964, "mean_pred_prob_last_25": 0.1402987307170406, "mean_pred_prob_last_50": 0.08430575957754627, "mean_token_accuracy": 0.8771869480609894, "step": 22340 }, { "epoch": 0.39731214335235454, "grad_norm": 1.5986312520527157, "learning_rate": 0.0001, "loss": 0.7591, "mean_abs_error": 721.7542401179535, "mean_abs_error_last_10": 212.31614274685072, "mean_abs_error_last_25": 295.0433278824174, "mean_abs_error_last_50": 451.03856173459826, "mean_pred_prob": 0.028978032642044127, "mean_pred_prob_last_10": 0.15781681610969828, "mean_pred_prob_last_25": 0.08297778433770872, "mean_pred_prob_last_50": 0.04876308290986344, "mean_token_accuracy": 0.8702204346656799, "step": 22350 }, { "epoch": 0.3974899116491565, "grad_norm": 0.9466267861472009, "learning_rate": 0.0001, "loss": 0.7393, "mean_abs_error": 329.48364023485857, "mean_abs_error_last_10": 50.41704315777027, "mean_abs_error_last_25": 83.04228793639197, "mean_abs_error_last_50": 159.67403544220542, "mean_pred_prob": 0.040623797196894885, "mean_pred_prob_last_10": 0.21211634650826455, "mean_pred_prob_last_25": 0.1157500708475709, "mean_pred_prob_last_50": 0.06944666178897023, "mean_token_accuracy": 0.874972540140152, "step": 22360 }, { "epoch": 0.3976676799459584, "grad_norm": 1.4194907650209951, "learning_rate": 0.0001, "loss": 0.7134, "mean_abs_error": 1167.4840547761241, "mean_abs_error_last_10": 414.6059655203515, "mean_abs_error_last_25": 490.09870466413986, "mean_abs_error_last_50": 670.772170843495, "mean_pred_prob": 0.00790680700156372, "mean_pred_prob_last_10": 0.04575559154618532, "mean_pred_prob_last_25": 0.022911754727829248, "mean_pred_prob_last_50": 0.01359868426225148, "mean_token_accuracy": 0.8764076113700867, "step": 22370 }, { "epoch": 0.3978454482427604, "grad_norm": 1.3968832658958785, "learning_rate": 0.0001, "loss": 0.8917, "mean_abs_error": 480.2972598512744, "mean_abs_error_last_10": 210.22390683867465, "mean_abs_error_last_25": 272.43509165982863, "mean_abs_error_last_50": 333.5778331253582, "mean_pred_prob": 0.051959924632683396, "mean_pred_prob_last_10": 0.23922472819685936, "mean_pred_prob_last_25": 0.14103091601282358, "mean_pred_prob_last_50": 0.0855628866236657, "mean_token_accuracy": 0.8603994309902191, "step": 22380 }, { "epoch": 0.39802321653956235, "grad_norm": 0.6982344035576126, "learning_rate": 0.0001, "loss": 0.7361, "mean_abs_error": 108.62867502652105, "mean_abs_error_last_10": 31.564921555644396, "mean_abs_error_last_25": 74.87436017869317, "mean_abs_error_last_50": 78.71545726072517, "mean_pred_prob": 0.0529548198916018, "mean_pred_prob_last_10": 0.23820361196994783, "mean_pred_prob_last_25": 0.14056471027433873, "mean_pred_prob_last_50": 0.08746699392795562, "mean_token_accuracy": 0.8856449246406555, "step": 22390 }, { "epoch": 0.3982009848363643, "grad_norm": 1.341390044701943, "learning_rate": 0.0001, "loss": 0.7806, "mean_abs_error": 1117.8602240382445, "mean_abs_error_last_10": 366.5033093271931, "mean_abs_error_last_25": 492.39881044045876, "mean_abs_error_last_50": 713.4365945826112, "mean_pred_prob": 0.013670358021045104, "mean_pred_prob_last_10": 0.05868204587604851, "mean_pred_prob_last_25": 0.0324480410781689, "mean_pred_prob_last_50": 0.021649427647935225, "mean_token_accuracy": 0.8744902074337005, "step": 22400 }, { "epoch": 0.39837875313316623, "grad_norm": 1.2474044688503205, "learning_rate": 0.0001, "loss": 0.6618, "mean_abs_error": 1053.7212536976178, "mean_abs_error_last_10": 738.6783745800292, "mean_abs_error_last_25": 806.9111815039684, "mean_abs_error_last_50": 842.2725416659323, "mean_pred_prob": 0.033002074058458675, "mean_pred_prob_last_10": 0.163251228898298, "mean_pred_prob_last_25": 0.09261315944604576, "mean_pred_prob_last_50": 0.05555383630999131, "mean_token_accuracy": 0.8757998764514923, "step": 22410 }, { "epoch": 0.39855652142996817, "grad_norm": 1.4006169333693521, "learning_rate": 0.0001, "loss": 0.7136, "mean_abs_error": 278.58250930338363, "mean_abs_error_last_10": 86.59492114116163, "mean_abs_error_last_25": 154.93109611983635, "mean_abs_error_last_50": 218.63664886810722, "mean_pred_prob": 0.04494578735902906, "mean_pred_prob_last_10": 0.22445170395076275, "mean_pred_prob_last_25": 0.1251316166482866, "mean_pred_prob_last_50": 0.07677787458524107, "mean_token_accuracy": 0.874278062582016, "step": 22420 }, { "epoch": 0.3987342897267701, "grad_norm": 2.0197573081507927, "learning_rate": 0.0001, "loss": 0.8412, "mean_abs_error": 601.2059177436098, "mean_abs_error_last_10": 113.11030113309702, "mean_abs_error_last_25": 217.85362007153867, "mean_abs_error_last_50": 367.3964792664551, "mean_pred_prob": 0.022188427671790124, "mean_pred_prob_last_10": 0.12185310069471597, "mean_pred_prob_last_25": 0.06392825003713369, "mean_pred_prob_last_50": 0.03839587061665952, "mean_token_accuracy": 0.8655198693275452, "step": 22430 }, { "epoch": 0.3989120580235721, "grad_norm": 2.1473670544769385, "learning_rate": 0.0001, "loss": 0.8743, "mean_abs_error": 650.0054518539704, "mean_abs_error_last_10": 126.43467536026246, "mean_abs_error_last_25": 199.6552607038234, "mean_abs_error_last_50": 315.4305538955167, "mean_pred_prob": 0.0505420065484941, "mean_pred_prob_last_10": 0.21392431468702852, "mean_pred_prob_last_25": 0.13113421231973915, "mean_pred_prob_last_50": 0.08396441724617035, "mean_token_accuracy": 0.8639658868312836, "step": 22440 }, { "epoch": 0.39908982632037404, "grad_norm": 1.2229293992425794, "learning_rate": 0.0001, "loss": 0.7532, "mean_abs_error": 287.0760243436069, "mean_abs_error_last_10": 155.76139099250767, "mean_abs_error_last_25": 165.846900380127, "mean_abs_error_last_50": 193.3493924353089, "mean_pred_prob": 0.04189179013483226, "mean_pred_prob_last_10": 0.21008928697556256, "mean_pred_prob_last_25": 0.11550086988136173, "mean_pred_prob_last_50": 0.0712403510697186, "mean_token_accuracy": 0.869526743888855, "step": 22450 }, { "epoch": 0.399267594617176, "grad_norm": 2.0714259955550793, "learning_rate": 0.0001, "loss": 0.7525, "mean_abs_error": 549.5203395470448, "mean_abs_error_last_10": 108.15391394860899, "mean_abs_error_last_25": 162.88582778345614, "mean_abs_error_last_50": 292.2040053745334, "mean_pred_prob": 0.03862445703125559, "mean_pred_prob_last_10": 0.19771718623815104, "mean_pred_prob_last_25": 0.10917084006941877, "mean_pred_prob_last_50": 0.06624254053458571, "mean_token_accuracy": 0.8785590946674346, "step": 22460 }, { "epoch": 0.3994453629139779, "grad_norm": 1.339825703455623, "learning_rate": 0.0001, "loss": 0.6706, "mean_abs_error": 559.5359801454176, "mean_abs_error_last_10": 226.09801576353183, "mean_abs_error_last_25": 277.60638424869995, "mean_abs_error_last_50": 372.70092656819037, "mean_pred_prob": 0.025180945306783543, "mean_pred_prob_last_10": 0.13113768227631226, "mean_pred_prob_last_25": 0.06892980877310037, "mean_pred_prob_last_50": 0.042540396249387415, "mean_token_accuracy": 0.8704352855682373, "step": 22470 }, { "epoch": 0.39962313121077986, "grad_norm": 0.9245875110046837, "learning_rate": 0.0001, "loss": 0.861, "mean_abs_error": 183.194330966295, "mean_abs_error_last_10": 40.82445503146265, "mean_abs_error_last_25": 68.14229277635062, "mean_abs_error_last_50": 101.23500708167717, "mean_pred_prob": 0.04309436101466417, "mean_pred_prob_last_10": 0.22506623566150666, "mean_pred_prob_last_25": 0.1219558347016573, "mean_pred_prob_last_50": 0.0739397507160902, "mean_token_accuracy": 0.8680388331413269, "step": 22480 }, { "epoch": 0.3998008995075818, "grad_norm": 1.0525969484154245, "learning_rate": 0.0001, "loss": 0.7705, "mean_abs_error": 1497.358198677091, "mean_abs_error_last_10": 902.5316374355618, "mean_abs_error_last_25": 967.0835461358187, "mean_abs_error_last_50": 1152.0024695081215, "mean_pred_prob": 0.0397552947542863, "mean_pred_prob_last_10": 0.18998127542436122, "mean_pred_prob_last_25": 0.10404821645352058, "mean_pred_prob_last_50": 0.06521843560185517, "mean_token_accuracy": 0.8827136993408203, "step": 22490 }, { "epoch": 0.3999786678043838, "grad_norm": 1.134849668984926, "learning_rate": 0.0001, "loss": 0.7007, "mean_abs_error": 551.6973906049813, "mean_abs_error_last_10": 147.5805867136075, "mean_abs_error_last_25": 197.9277705593523, "mean_abs_error_last_50": 349.57358544146075, "mean_pred_prob": 0.04148339699022472, "mean_pred_prob_last_10": 0.21226461865007878, "mean_pred_prob_last_25": 0.11622344246134161, "mean_pred_prob_last_50": 0.0699431896675378, "mean_token_accuracy": 0.8737362861633301, "step": 22500 }, { "epoch": 0.40015643610118573, "grad_norm": 1.2631916160630536, "learning_rate": 0.0001, "loss": 0.7746, "mean_abs_error": 534.2086066231283, "mean_abs_error_last_10": 164.10830833517193, "mean_abs_error_last_25": 222.94602680696798, "mean_abs_error_last_50": 329.6562665909122, "mean_pred_prob": 0.04448963123140857, "mean_pred_prob_last_10": 0.2148111279355362, "mean_pred_prob_last_25": 0.11925965757109225, "mean_pred_prob_last_50": 0.07428614303935319, "mean_token_accuracy": 0.8810945928096772, "step": 22510 }, { "epoch": 0.40033420439798767, "grad_norm": 1.2594332141696725, "learning_rate": 0.0001, "loss": 0.8074, "mean_abs_error": 1302.495469708568, "mean_abs_error_last_10": 792.6071541323892, "mean_abs_error_last_25": 836.1866151585052, "mean_abs_error_last_50": 943.2655891904672, "mean_pred_prob": 0.03316916503827087, "mean_pred_prob_last_10": 0.1518646951415576, "mean_pred_prob_last_25": 0.08723715774249285, "mean_pred_prob_last_50": 0.05470618174294941, "mean_token_accuracy": 0.8776749432086944, "step": 22520 }, { "epoch": 0.4005119726947896, "grad_norm": 0.982874481278919, "learning_rate": 0.0001, "loss": 0.8405, "mean_abs_error": 413.00412876095714, "mean_abs_error_last_10": 84.09675163615366, "mean_abs_error_last_25": 125.05805350846954, "mean_abs_error_last_50": 214.3206948098031, "mean_pred_prob": 0.03246515733189881, "mean_pred_prob_last_10": 0.16187495607882738, "mean_pred_prob_last_25": 0.09035458201542497, "mean_pred_prob_last_50": 0.055256415624171495, "mean_token_accuracy": 0.8744914174079895, "step": 22530 }, { "epoch": 0.40068974099159155, "grad_norm": 1.8360514360052074, "learning_rate": 0.0001, "loss": 0.9077, "mean_abs_error": 289.2988639395411, "mean_abs_error_last_10": 69.97185130148907, "mean_abs_error_last_25": 130.13503901158361, "mean_abs_error_last_50": 179.9677635477588, "mean_pred_prob": 0.04049217710271478, "mean_pred_prob_last_10": 0.21108757313340903, "mean_pred_prob_last_25": 0.11127139376476407, "mean_pred_prob_last_50": 0.06804659627377987, "mean_token_accuracy": 0.8735814273357392, "step": 22540 }, { "epoch": 0.4008675092883935, "grad_norm": 1.4728218456734397, "learning_rate": 0.0001, "loss": 0.9099, "mean_abs_error": 550.6305097622076, "mean_abs_error_last_10": 172.99861269909943, "mean_abs_error_last_25": 215.83146145672217, "mean_abs_error_last_50": 295.5470221667554, "mean_pred_prob": 0.026348960853647442, "mean_pred_prob_last_10": 0.14626822562422603, "mean_pred_prob_last_25": 0.07622921518050134, "mean_pred_prob_last_50": 0.04508969079470262, "mean_token_accuracy": 0.861213105916977, "step": 22550 }, { "epoch": 0.4010452775851955, "grad_norm": 2.384779683458261, "learning_rate": 0.0001, "loss": 0.675, "mean_abs_error": 433.0664896059332, "mean_abs_error_last_10": 157.27050990438073, "mean_abs_error_last_25": 180.5982434057129, "mean_abs_error_last_50": 224.20833968575135, "mean_pred_prob": 0.031205297191627325, "mean_pred_prob_last_10": 0.15152343325316905, "mean_pred_prob_last_25": 0.08338308501988649, "mean_pred_prob_last_50": 0.05178816430270672, "mean_token_accuracy": 0.8696236610412598, "step": 22560 }, { "epoch": 0.4012230458819974, "grad_norm": 1.765441161311917, "learning_rate": 0.0001, "loss": 0.8574, "mean_abs_error": 873.7620539657855, "mean_abs_error_last_10": 426.56422789796807, "mean_abs_error_last_25": 514.6540628037103, "mean_abs_error_last_50": 643.1742805557383, "mean_pred_prob": 0.04571711163807777, "mean_pred_prob_last_10": 0.2151020401273854, "mean_pred_prob_last_25": 0.12442080069013173, "mean_pred_prob_last_50": 0.0767485051881522, "mean_token_accuracy": 0.8642046689987183, "step": 22570 }, { "epoch": 0.40140081417879936, "grad_norm": 1.4002162816685644, "learning_rate": 0.0001, "loss": 0.7233, "mean_abs_error": 1209.265185685779, "mean_abs_error_last_10": 551.777615917005, "mean_abs_error_last_25": 659.8624628947463, "mean_abs_error_last_50": 828.3697779732759, "mean_pred_prob": 0.03340468285023235, "mean_pred_prob_last_10": 0.16653431787271983, "mean_pred_prob_last_25": 0.09088860832271166, "mean_pred_prob_last_50": 0.05609864628931973, "mean_token_accuracy": 0.8742227733135224, "step": 22580 }, { "epoch": 0.4015785824756013, "grad_norm": 1.0005224742501624, "learning_rate": 0.0001, "loss": 0.705, "mean_abs_error": 284.3546134340535, "mean_abs_error_last_10": 102.22547774909143, "mean_abs_error_last_25": 136.2485604220752, "mean_abs_error_last_50": 208.01139437729063, "mean_pred_prob": 0.036055635567754506, "mean_pred_prob_last_10": 0.18197614066302775, "mean_pred_prob_last_25": 0.1003710892982781, "mean_pred_prob_last_50": 0.06135600847192109, "mean_token_accuracy": 0.8804805278778076, "step": 22590 }, { "epoch": 0.40175635077240324, "grad_norm": 1.5913810460227151, "learning_rate": 0.0001, "loss": 0.6978, "mean_abs_error": 350.75228029993525, "mean_abs_error_last_10": 52.51839809897079, "mean_abs_error_last_25": 98.57977797024932, "mean_abs_error_last_50": 182.51301698080252, "mean_pred_prob": 0.039242231752723455, "mean_pred_prob_last_10": 0.19804862719029187, "mean_pred_prob_last_25": 0.10623237527906895, "mean_pred_prob_last_50": 0.06601500995457173, "mean_token_accuracy": 0.8761402547359467, "step": 22600 }, { "epoch": 0.4019341190692052, "grad_norm": 1.3465011200190897, "learning_rate": 0.0001, "loss": 0.8844, "mean_abs_error": 958.3211990995894, "mean_abs_error_last_10": 413.5514530672634, "mean_abs_error_last_25": 546.8375906667764, "mean_abs_error_last_50": 662.6375502884223, "mean_pred_prob": 0.02625297638005577, "mean_pred_prob_last_10": 0.1396775549161248, "mean_pred_prob_last_25": 0.07548127124027815, "mean_pred_prob_last_50": 0.04544167853309773, "mean_token_accuracy": 0.8708739042282104, "step": 22610 }, { "epoch": 0.40211188736600717, "grad_norm": 0.9407584206667798, "learning_rate": 0.0001, "loss": 0.8224, "mean_abs_error": 355.4492428550956, "mean_abs_error_last_10": 185.95280692802453, "mean_abs_error_last_25": 202.09259999471084, "mean_abs_error_last_50": 245.2102240110392, "mean_pred_prob": 0.031031945976428686, "mean_pred_prob_last_10": 0.15215885918587446, "mean_pred_prob_last_25": 0.08623509183526039, "mean_pred_prob_last_50": 0.0532751580234617, "mean_token_accuracy": 0.8804454505443573, "step": 22620 }, { "epoch": 0.4022896556628091, "grad_norm": 1.4949839558334337, "learning_rate": 0.0001, "loss": 0.8697, "mean_abs_error": 251.87056060195036, "mean_abs_error_last_10": 46.68707660213276, "mean_abs_error_last_25": 70.76087757112609, "mean_abs_error_last_50": 135.85854606902413, "mean_pred_prob": 0.04083095306996256, "mean_pred_prob_last_10": 0.20912442784756421, "mean_pred_prob_last_25": 0.11380825834348798, "mean_pred_prob_last_50": 0.0685468222014606, "mean_token_accuracy": 0.8625832438468933, "step": 22630 }, { "epoch": 0.40246742395961105, "grad_norm": 2.005057982293833, "learning_rate": 0.0001, "loss": 0.8246, "mean_abs_error": 251.07311094485559, "mean_abs_error_last_10": 100.15346571387514, "mean_abs_error_last_25": 133.52317105087016, "mean_abs_error_last_50": 168.89788674649262, "mean_pred_prob": 0.047813096828758714, "mean_pred_prob_last_10": 0.23437125384807586, "mean_pred_prob_last_25": 0.13052986599504948, "mean_pred_prob_last_50": 0.08024635920301079, "mean_token_accuracy": 0.87393639087677, "step": 22640 }, { "epoch": 0.402645192256413, "grad_norm": 1.1026525571149244, "learning_rate": 0.0001, "loss": 0.7437, "mean_abs_error": 302.3301273862073, "mean_abs_error_last_10": 200.6479617863369, "mean_abs_error_last_25": 224.6773478013882, "mean_abs_error_last_50": 282.2783444197109, "mean_pred_prob": 0.03946570117259398, "mean_pred_prob_last_10": 0.18901200492400677, "mean_pred_prob_last_25": 0.10936982512939722, "mean_pred_prob_last_50": 0.06603249283507466, "mean_token_accuracy": 0.866168600320816, "step": 22650 }, { "epoch": 0.4028229605532149, "grad_norm": 1.0315607871452301, "learning_rate": 0.0001, "loss": 0.7743, "mean_abs_error": 789.4215544437205, "mean_abs_error_last_10": 137.71873150665766, "mean_abs_error_last_25": 230.39012664340208, "mean_abs_error_last_50": 413.819691201797, "mean_pred_prob": 0.028360495029482992, "mean_pred_prob_last_10": 0.14928037375211717, "mean_pred_prob_last_25": 0.08067506244406104, "mean_pred_prob_last_50": 0.0485693329712376, "mean_token_accuracy": 0.8803452670574188, "step": 22660 }, { "epoch": 0.40300072885001686, "grad_norm": 1.5907534282978073, "learning_rate": 0.0001, "loss": 0.7244, "mean_abs_error": 784.134818830385, "mean_abs_error_last_10": 508.25484302845206, "mean_abs_error_last_25": 519.2599804581845, "mean_abs_error_last_50": 595.6231425601984, "mean_pred_prob": 0.05931445268361131, "mean_pred_prob_last_10": 0.3020506079294137, "mean_pred_prob_last_25": 0.1670242458509165, "mean_pred_prob_last_50": 0.09934640932187903, "mean_token_accuracy": 0.8723830163478852, "step": 22670 }, { "epoch": 0.40317849714681886, "grad_norm": 1.0820791726497736, "learning_rate": 0.0001, "loss": 1.0137, "mean_abs_error": 1529.7089091433452, "mean_abs_error_last_10": 543.3492275129445, "mean_abs_error_last_25": 666.4554474712023, "mean_abs_error_last_50": 916.1967113652388, "mean_pred_prob": 0.024404928865260446, "mean_pred_prob_last_10": 0.11707376401755028, "mean_pred_prob_last_25": 0.0669015429331921, "mean_pred_prob_last_50": 0.041274388847523366, "mean_token_accuracy": 0.8813995838165283, "step": 22680 }, { "epoch": 0.4033562654436208, "grad_norm": 1.4340793903712246, "learning_rate": 0.0001, "loss": 0.8644, "mean_abs_error": 891.6121015167324, "mean_abs_error_last_10": 379.86927586967556, "mean_abs_error_last_25": 518.8452708305539, "mean_abs_error_last_50": 605.5304469156995, "mean_pred_prob": 0.02906340004992671, "mean_pred_prob_last_10": 0.1390424197772518, "mean_pred_prob_last_25": 0.07636998165107797, "mean_pred_prob_last_50": 0.048389693335047924, "mean_token_accuracy": 0.8745173215866089, "step": 22690 }, { "epoch": 0.40353403374042274, "grad_norm": 1.006758364411169, "learning_rate": 0.0001, "loss": 0.7707, "mean_abs_error": 982.2450635026886, "mean_abs_error_last_10": 410.66836069232596, "mean_abs_error_last_25": 516.6615810480367, "mean_abs_error_last_50": 681.83166648168, "mean_pred_prob": 0.02765227587369736, "mean_pred_prob_last_10": 0.13108630339847877, "mean_pred_prob_last_25": 0.07331429283658508, "mean_pred_prob_last_50": 0.04541201072570402, "mean_token_accuracy": 0.8668291568756104, "step": 22700 }, { "epoch": 0.4037118020372247, "grad_norm": 1.3254528827064247, "learning_rate": 0.0001, "loss": 0.8007, "mean_abs_error": 817.1900914565301, "mean_abs_error_last_10": 326.8133000662496, "mean_abs_error_last_25": 401.26310171919215, "mean_abs_error_last_50": 546.155832756933, "mean_pred_prob": 0.027313885354669765, "mean_pred_prob_last_10": 0.13866355592617766, "mean_pred_prob_last_25": 0.0765708934282884, "mean_pred_prob_last_50": 0.04594262217869982, "mean_token_accuracy": 0.8698496758937836, "step": 22710 }, { "epoch": 0.4038895703340266, "grad_norm": 1.520752575930945, "learning_rate": 0.0001, "loss": 0.8294, "mean_abs_error": 831.5969568637113, "mean_abs_error_last_10": 213.3710388286169, "mean_abs_error_last_25": 363.4810839823177, "mean_abs_error_last_50": 523.6987670838705, "mean_pred_prob": 0.041178879042854534, "mean_pred_prob_last_10": 0.2006009151111357, "mean_pred_prob_last_25": 0.11166289857937954, "mean_pred_prob_last_50": 0.07018403190886602, "mean_token_accuracy": 0.8630118012428284, "step": 22720 }, { "epoch": 0.40406733863082855, "grad_norm": 1.8411360780149961, "learning_rate": 0.0001, "loss": 0.8973, "mean_abs_error": 505.10475138778327, "mean_abs_error_last_10": 138.65712919595455, "mean_abs_error_last_25": 205.52454157040333, "mean_abs_error_last_50": 289.65661648555545, "mean_pred_prob": 0.02690755616640672, "mean_pred_prob_last_10": 0.15343109355308115, "mean_pred_prob_last_25": 0.07879808777943254, "mean_pred_prob_last_50": 0.04643574722576886, "mean_token_accuracy": 0.8727098345756531, "step": 22730 }, { "epoch": 0.40424510692763055, "grad_norm": 1.3069831275875679, "learning_rate": 0.0001, "loss": 0.7277, "mean_abs_error": 311.51394890639256, "mean_abs_error_last_10": 271.4679263293011, "mean_abs_error_last_25": 305.0845798213596, "mean_abs_error_last_50": 328.1607566210324, "mean_pred_prob": 0.0565821026917547, "mean_pred_prob_last_10": 0.2782256960868835, "mean_pred_prob_last_25": 0.1578926949761808, "mean_pred_prob_last_50": 0.09655421255156398, "mean_token_accuracy": 0.8767743825912475, "step": 22740 }, { "epoch": 0.4044228752244325, "grad_norm": 1.7878743045049672, "learning_rate": 0.0001, "loss": 0.794, "mean_abs_error": 386.5565542084927, "mean_abs_error_last_10": 48.68945348890098, "mean_abs_error_last_25": 100.84494921500436, "mean_abs_error_last_50": 184.25682529885495, "mean_pred_prob": 0.03875629184767604, "mean_pred_prob_last_10": 0.1908613208681345, "mean_pred_prob_last_25": 0.10854180175811053, "mean_pred_prob_last_50": 0.06611309694126248, "mean_token_accuracy": 0.8764659821987152, "step": 22750 }, { "epoch": 0.4046006435212344, "grad_norm": 1.9289724645224857, "learning_rate": 0.0001, "loss": 0.7962, "mean_abs_error": 676.5224594011285, "mean_abs_error_last_10": 286.22749652783943, "mean_abs_error_last_25": 356.8371346332584, "mean_abs_error_last_50": 428.1510672044542, "mean_pred_prob": 0.029199801688082515, "mean_pred_prob_last_10": 0.13575854285154493, "mean_pred_prob_last_25": 0.07770675783976913, "mean_pred_prob_last_50": 0.0481604466214776, "mean_token_accuracy": 0.8738449573516845, "step": 22760 }, { "epoch": 0.40477841181803637, "grad_norm": 2.2154686185132104, "learning_rate": 0.0001, "loss": 0.811, "mean_abs_error": 925.0084240568898, "mean_abs_error_last_10": 436.648477522159, "mean_abs_error_last_25": 517.3503515417282, "mean_abs_error_last_50": 642.2240924971553, "mean_pred_prob": 0.03630941735464148, "mean_pred_prob_last_10": 0.17346912805805914, "mean_pred_prob_last_25": 0.09800172646646388, "mean_pred_prob_last_50": 0.0611810440197587, "mean_token_accuracy": 0.8905090034008026, "step": 22770 }, { "epoch": 0.4049561801148383, "grad_norm": 1.7781138500121798, "learning_rate": 0.0001, "loss": 0.8466, "mean_abs_error": 262.32138926419117, "mean_abs_error_last_10": 48.889838650355834, "mean_abs_error_last_25": 75.61460027524447, "mean_abs_error_last_50": 162.14694306670262, "mean_pred_prob": 0.04755828948691487, "mean_pred_prob_last_10": 0.24412145279347897, "mean_pred_prob_last_25": 0.1343198860064149, "mean_pred_prob_last_50": 0.07997501101344824, "mean_token_accuracy": 0.8738631010055542, "step": 22780 }, { "epoch": 0.40513394841164024, "grad_norm": 2.017392906534095, "learning_rate": 0.0001, "loss": 0.743, "mean_abs_error": 249.31753070933672, "mean_abs_error_last_10": 152.3248481806915, "mean_abs_error_last_25": 149.01495283391014, "mean_abs_error_last_50": 166.26384047167392, "mean_pred_prob": 0.040383395837852734, "mean_pred_prob_last_10": 0.19687631527194754, "mean_pred_prob_last_25": 0.11039734845981002, "mean_pred_prob_last_50": 0.06825434091733769, "mean_token_accuracy": 0.8730891585350037, "step": 22790 }, { "epoch": 0.40531171670844224, "grad_norm": 1.942301077231868, "learning_rate": 0.0001, "loss": 0.7771, "mean_abs_error": 867.6401522964712, "mean_abs_error_last_10": 461.68237316046304, "mean_abs_error_last_25": 546.8484571760404, "mean_abs_error_last_50": 652.2283260696156, "mean_pred_prob": 0.04162087531440193, "mean_pred_prob_last_10": 0.2033497845986858, "mean_pred_prob_last_25": 0.11526571627473459, "mean_pred_prob_last_50": 0.07105295431538253, "mean_token_accuracy": 0.8728208720684052, "step": 22800 }, { "epoch": 0.4054894850052442, "grad_norm": 1.0412942533899026, "learning_rate": 0.0001, "loss": 0.7502, "mean_abs_error": 1058.6576852339265, "mean_abs_error_last_10": 414.0441707974428, "mean_abs_error_last_25": 549.9623744679902, "mean_abs_error_last_50": 769.2008182619459, "mean_pred_prob": 0.038733672964735885, "mean_pred_prob_last_10": 0.17495943172834813, "mean_pred_prob_last_25": 0.10283680229622405, "mean_pred_prob_last_50": 0.06436916339735035, "mean_token_accuracy": 0.8728298664093017, "step": 22810 }, { "epoch": 0.4056672533020461, "grad_norm": 1.7985620197351575, "learning_rate": 0.0001, "loss": 0.8191, "mean_abs_error": 278.82121028613733, "mean_abs_error_last_10": 101.61501394805283, "mean_abs_error_last_25": 183.6024001089985, "mean_abs_error_last_50": 203.46193756058977, "mean_pred_prob": 0.030901964032091202, "mean_pred_prob_last_10": 0.1622262205928564, "mean_pred_prob_last_25": 0.08675679340958595, "mean_pred_prob_last_50": 0.052291246596723795, "mean_token_accuracy": 0.8715655207633972, "step": 22820 }, { "epoch": 0.40584502159884805, "grad_norm": 1.459778587034917, "learning_rate": 0.0001, "loss": 0.787, "mean_abs_error": 508.19311842634835, "mean_abs_error_last_10": 119.21783160729133, "mean_abs_error_last_25": 191.25890059528513, "mean_abs_error_last_50": 284.89488316139375, "mean_pred_prob": 0.040707924167509194, "mean_pred_prob_last_10": 0.22457603046204894, "mean_pred_prob_last_25": 0.11985595506848767, "mean_pred_prob_last_50": 0.07078163840342314, "mean_token_accuracy": 0.8662684679031372, "step": 22830 }, { "epoch": 0.40602278989565, "grad_norm": 1.1475704183108162, "learning_rate": 0.0001, "loss": 0.8361, "mean_abs_error": 355.7653743670112, "mean_abs_error_last_10": 97.19978407883687, "mean_abs_error_last_25": 187.21271321121452, "mean_abs_error_last_50": 301.2867426311466, "mean_pred_prob": 0.0420057509560138, "mean_pred_prob_last_10": 0.19806498514954002, "mean_pred_prob_last_25": 0.11417184205492958, "mean_pred_prob_last_50": 0.07037053365493193, "mean_token_accuracy": 0.8686948597431183, "step": 22840 }, { "epoch": 0.40620055819245193, "grad_norm": 1.7367227270967294, "learning_rate": 0.0001, "loss": 0.7749, "mean_abs_error": 1146.3579770242861, "mean_abs_error_last_10": 522.4650189916744, "mean_abs_error_last_25": 634.4995900271903, "mean_abs_error_last_50": 792.9973422219895, "mean_pred_prob": 0.022863787708047312, "mean_pred_prob_last_10": 0.1128557374497177, "mean_pred_prob_last_25": 0.062075931922299786, "mean_pred_prob_last_50": 0.038506682685692795, "mean_token_accuracy": 0.8793274819850921, "step": 22850 }, { "epoch": 0.4063783264892539, "grad_norm": 1.897502678333924, "learning_rate": 0.0001, "loss": 0.8607, "mean_abs_error": 1184.7048584573836, "mean_abs_error_last_10": 500.9198865067025, "mean_abs_error_last_25": 558.237116545612, "mean_abs_error_last_50": 700.1714789855517, "mean_pred_prob": 0.023590513445378748, "mean_pred_prob_last_10": 0.10944793363451026, "mean_pred_prob_last_25": 0.06342660869995598, "mean_pred_prob_last_50": 0.039967295472160914, "mean_token_accuracy": 0.8663339793682099, "step": 22860 }, { "epoch": 0.40655609478605587, "grad_norm": 2.1037098419527256, "learning_rate": 0.0001, "loss": 0.9677, "mean_abs_error": 593.1256394965636, "mean_abs_error_last_10": 271.9217096884846, "mean_abs_error_last_25": 367.253141549773, "mean_abs_error_last_50": 435.9764528972199, "mean_pred_prob": 0.024174483446404337, "mean_pred_prob_last_10": 0.1144087073393166, "mean_pred_prob_last_25": 0.06335119865834712, "mean_pred_prob_last_50": 0.039884755457751456, "mean_token_accuracy": 0.8607653439044952, "step": 22870 }, { "epoch": 0.4067338630828578, "grad_norm": 1.0779929085811322, "learning_rate": 0.0001, "loss": 0.7805, "mean_abs_error": 704.9459812186332, "mean_abs_error_last_10": 313.41832574832955, "mean_abs_error_last_25": 448.4231024624697, "mean_abs_error_last_50": 532.5610687138044, "mean_pred_prob": 0.024263684276957066, "mean_pred_prob_last_10": 0.1169298104941845, "mean_pred_prob_last_25": 0.06602540683234111, "mean_pred_prob_last_50": 0.04078499014722183, "mean_token_accuracy": 0.870538330078125, "step": 22880 }, { "epoch": 0.40691163137965974, "grad_norm": 0.8903957092771854, "learning_rate": 0.0001, "loss": 0.7831, "mean_abs_error": 1216.1172095038323, "mean_abs_error_last_10": 719.0440595255175, "mean_abs_error_last_25": 801.6230613831474, "mean_abs_error_last_50": 977.9533975149167, "mean_pred_prob": 0.026489173099253093, "mean_pred_prob_last_10": 0.144475524907466, "mean_pred_prob_last_25": 0.0768938286419143, "mean_pred_prob_last_50": 0.04520918750640703, "mean_token_accuracy": 0.8738194406032562, "step": 22890 }, { "epoch": 0.4070893996764617, "grad_norm": 2.0757999804160163, "learning_rate": 0.0001, "loss": 0.8559, "mean_abs_error": 1220.0174111187857, "mean_abs_error_last_10": 793.8969825685476, "mean_abs_error_last_25": 885.3018958683954, "mean_abs_error_last_50": 958.7759960993187, "mean_pred_prob": 0.040296590283105616, "mean_pred_prob_last_10": 0.20885347410512622, "mean_pred_prob_last_25": 0.11517685060680379, "mean_pred_prob_last_50": 0.06845347993294126, "mean_token_accuracy": 0.8762272953987121, "step": 22900 }, { "epoch": 0.4072671679732636, "grad_norm": 1.5168024175683785, "learning_rate": 0.0001, "loss": 0.7495, "mean_abs_error": 574.333734480883, "mean_abs_error_last_10": 312.10989350217255, "mean_abs_error_last_25": 393.2766054787864, "mean_abs_error_last_50": 355.0776340468659, "mean_pred_prob": 0.039485009689815345, "mean_pred_prob_last_10": 0.18109304402023554, "mean_pred_prob_last_25": 0.10559916542842984, "mean_pred_prob_last_50": 0.0658053696155548, "mean_token_accuracy": 0.8771541774272918, "step": 22910 }, { "epoch": 0.4074449362700656, "grad_norm": 2.8486964343938097, "learning_rate": 0.0001, "loss": 0.7512, "mean_abs_error": 612.6674469293068, "mean_abs_error_last_10": 270.8168321969446, "mean_abs_error_last_25": 327.7768349510533, "mean_abs_error_last_50": 423.3160413809459, "mean_pred_prob": 0.037404651805991306, "mean_pred_prob_last_10": 0.18815125205437652, "mean_pred_prob_last_25": 0.10447678738855756, "mean_pred_prob_last_50": 0.06388544317451306, "mean_token_accuracy": 0.8779123365879059, "step": 22920 }, { "epoch": 0.40762270456686756, "grad_norm": 1.8615430206701116, "learning_rate": 0.0001, "loss": 0.7107, "mean_abs_error": 711.2916777614039, "mean_abs_error_last_10": 381.977266513229, "mean_abs_error_last_25": 433.49505572742447, "mean_abs_error_last_50": 490.8627525686208, "mean_pred_prob": 0.03703279227192979, "mean_pred_prob_last_10": 0.17959503243328073, "mean_pred_prob_last_25": 0.10166372227831744, "mean_pred_prob_last_50": 0.06220545129617676, "mean_token_accuracy": 0.8662180960178375, "step": 22930 }, { "epoch": 0.4078004728636695, "grad_norm": 1.700075883932308, "learning_rate": 0.0001, "loss": 0.8269, "mean_abs_error": 262.655764215604, "mean_abs_error_last_10": 48.02983450101279, "mean_abs_error_last_25": 74.87651590490967, "mean_abs_error_last_50": 125.10183749599484, "mean_pred_prob": 0.04605950592085719, "mean_pred_prob_last_10": 0.21931811422109604, "mean_pred_prob_last_25": 0.12669533621519805, "mean_pred_prob_last_50": 0.07762623513117432, "mean_token_accuracy": 0.8684986770153046, "step": 22940 }, { "epoch": 0.40797824116047143, "grad_norm": 1.967346509054278, "learning_rate": 0.0001, "loss": 0.7806, "mean_abs_error": 778.74278488301, "mean_abs_error_last_10": 415.16443295932606, "mean_abs_error_last_25": 511.9224109220089, "mean_abs_error_last_50": 590.1417195547795, "mean_pred_prob": 0.03740578279248439, "mean_pred_prob_last_10": 0.19425403054337947, "mean_pred_prob_last_25": 0.10538026905851439, "mean_pred_prob_last_50": 0.0631320638320176, "mean_token_accuracy": 0.8660043835639953, "step": 22950 }, { "epoch": 0.40815600945727337, "grad_norm": 1.629502527549533, "learning_rate": 0.0001, "loss": 0.8507, "mean_abs_error": 531.4194114543078, "mean_abs_error_last_10": 105.05684672048059, "mean_abs_error_last_25": 161.463883202473, "mean_abs_error_last_50": 313.7575666448362, "mean_pred_prob": 0.031642441148869695, "mean_pred_prob_last_10": 0.17340526580810547, "mean_pred_prob_last_25": 0.0931643077172339, "mean_pred_prob_last_50": 0.05494840070605278, "mean_token_accuracy": 0.8735022008419037, "step": 22960 }, { "epoch": 0.4083337777540753, "grad_norm": 1.3003889816498746, "learning_rate": 0.0001, "loss": 0.773, "mean_abs_error": 1470.0526716077818, "mean_abs_error_last_10": 668.9633833505702, "mean_abs_error_last_25": 813.7810136117324, "mean_abs_error_last_50": 1088.1656981063934, "mean_pred_prob": 0.03477881608705502, "mean_pred_prob_last_10": 0.17643774572061374, "mean_pred_prob_last_25": 0.09626599094481207, "mean_pred_prob_last_50": 0.05828886684612371, "mean_token_accuracy": 0.8741774141788483, "step": 22970 }, { "epoch": 0.4085115460508773, "grad_norm": 1.3251232541207043, "learning_rate": 0.0001, "loss": 0.7836, "mean_abs_error": 224.7114337828028, "mean_abs_error_last_10": 71.46243642966577, "mean_abs_error_last_25": 95.35919697085981, "mean_abs_error_last_50": 141.54185033630682, "mean_pred_prob": 0.051712321047671136, "mean_pred_prob_last_10": 0.23697379641234875, "mean_pred_prob_last_25": 0.1372741217724979, "mean_pred_prob_last_50": 0.08510630684904755, "mean_token_accuracy": 0.8805793881416321, "step": 22980 }, { "epoch": 0.40868931434767924, "grad_norm": 2.7749500450618925, "learning_rate": 0.0001, "loss": 0.7516, "mean_abs_error": 550.3689553667912, "mean_abs_error_last_10": 79.23117566933152, "mean_abs_error_last_25": 148.69935027858807, "mean_abs_error_last_50": 272.73967243708023, "mean_pred_prob": 0.030174912692746147, "mean_pred_prob_last_10": 0.1426957981195301, "mean_pred_prob_last_25": 0.08124995925463736, "mean_pred_prob_last_50": 0.050322839326690884, "mean_token_accuracy": 0.8706805527210235, "step": 22990 }, { "epoch": 0.4088670826444812, "grad_norm": 2.881966763500759, "learning_rate": 0.0001, "loss": 0.9219, "mean_abs_error": 312.1707744334236, "mean_abs_error_last_10": 131.07052254585537, "mean_abs_error_last_25": 139.8548722087151, "mean_abs_error_last_50": 211.28063754030228, "mean_pred_prob": 0.034509535133838656, "mean_pred_prob_last_10": 0.16905549447983503, "mean_pred_prob_last_25": 0.0909717750735581, "mean_pred_prob_last_50": 0.056936136027798054, "mean_token_accuracy": 0.8742436170578003, "step": 23000 }, { "epoch": 0.4090448509412831, "grad_norm": 1.598271155677555, "learning_rate": 0.0001, "loss": 0.7326, "mean_abs_error": 185.7956268496261, "mean_abs_error_last_10": 44.64837698926241, "mean_abs_error_last_25": 79.00476694094256, "mean_abs_error_last_50": 126.92836859742374, "mean_pred_prob": 0.04568139747716486, "mean_pred_prob_last_10": 0.2228340931236744, "mean_pred_prob_last_25": 0.12922140676528215, "mean_pred_prob_last_50": 0.07694151028990745, "mean_token_accuracy": 0.8824480891227722, "step": 23010 }, { "epoch": 0.40922261923808506, "grad_norm": 1.3229576691028937, "learning_rate": 0.0001, "loss": 0.7945, "mean_abs_error": 321.42013495973583, "mean_abs_error_last_10": 110.52889195736218, "mean_abs_error_last_25": 133.31307192397554, "mean_abs_error_last_50": 207.3711238077094, "mean_pred_prob": 0.041777614859165625, "mean_pred_prob_last_10": 0.2044694485492073, "mean_pred_prob_last_25": 0.11566625932464376, "mean_pred_prob_last_50": 0.07078166696010157, "mean_token_accuracy": 0.8737138986587525, "step": 23020 }, { "epoch": 0.40940038753488706, "grad_norm": 2.1248734432298937, "learning_rate": 0.0001, "loss": 0.7779, "mean_abs_error": 278.60131581109914, "mean_abs_error_last_10": 105.81880600842605, "mean_abs_error_last_25": 98.71553561289602, "mean_abs_error_last_50": 155.4873314163139, "mean_pred_prob": 0.04702779038343578, "mean_pred_prob_last_10": 0.23963375110179186, "mean_pred_prob_last_25": 0.12939695157110692, "mean_pred_prob_last_50": 0.07916552014648914, "mean_token_accuracy": 0.8776682138442993, "step": 23030 }, { "epoch": 0.409578155831689, "grad_norm": 1.5031211804889912, "learning_rate": 0.0001, "loss": 0.784, "mean_abs_error": 451.9048555990864, "mean_abs_error_last_10": 180.87227291055802, "mean_abs_error_last_25": 225.96034163023236, "mean_abs_error_last_50": 312.99807492841876, "mean_pred_prob": 0.03656759224832058, "mean_pred_prob_last_10": 0.17361218500882386, "mean_pred_prob_last_25": 0.09973059045150875, "mean_pred_prob_last_50": 0.061419208208099006, "mean_token_accuracy": 0.8708956301212311, "step": 23040 }, { "epoch": 0.40975592412849093, "grad_norm": 1.025274430202054, "learning_rate": 0.0001, "loss": 0.8448, "mean_abs_error": 293.73394911899425, "mean_abs_error_last_10": 111.34769583127668, "mean_abs_error_last_25": 252.79251688303216, "mean_abs_error_last_50": 291.7651392534375, "mean_pred_prob": 0.039930059341713786, "mean_pred_prob_last_10": 0.17551963534206153, "mean_pred_prob_last_25": 0.10324126332998276, "mean_pred_prob_last_50": 0.06570658860728144, "mean_token_accuracy": 0.8664335429668426, "step": 23050 }, { "epoch": 0.4099336924252929, "grad_norm": 2.5230969343131138, "learning_rate": 0.0001, "loss": 0.7875, "mean_abs_error": 333.68002208643287, "mean_abs_error_last_10": 129.57099043096133, "mean_abs_error_last_25": 136.1240143877665, "mean_abs_error_last_50": 182.22877167462076, "mean_pred_prob": 0.03267515822662972, "mean_pred_prob_last_10": 0.16397726705763488, "mean_pred_prob_last_25": 0.09039097947534173, "mean_pred_prob_last_50": 0.05472972270799801, "mean_token_accuracy": 0.8665337204933167, "step": 23060 }, { "epoch": 0.4101114607220948, "grad_norm": 0.9751087293383806, "learning_rate": 0.0001, "loss": 0.8009, "mean_abs_error": 655.5420887990647, "mean_abs_error_last_10": 169.31617620931314, "mean_abs_error_last_25": 234.17467155400215, "mean_abs_error_last_50": 368.1558011319058, "mean_pred_prob": 0.023733623168664054, "mean_pred_prob_last_10": 0.11909262413391844, "mean_pred_prob_last_25": 0.0674701934447512, "mean_pred_prob_last_50": 0.040420353802619505, "mean_token_accuracy": 0.8728368043899536, "step": 23070 }, { "epoch": 0.41028922901889675, "grad_norm": 1.8274072148241263, "learning_rate": 0.0001, "loss": 0.7261, "mean_abs_error": 979.3501491629792, "mean_abs_error_last_10": 565.207559753045, "mean_abs_error_last_25": 630.8456017570245, "mean_abs_error_last_50": 745.9159782664794, "mean_pred_prob": 0.04760964255256113, "mean_pred_prob_last_10": 0.22575354537111708, "mean_pred_prob_last_25": 0.12677525438630255, "mean_pred_prob_last_50": 0.0786080451493035, "mean_token_accuracy": 0.8717262208461761, "step": 23080 }, { "epoch": 0.41046699731569875, "grad_norm": 1.0514931248792918, "learning_rate": 0.0001, "loss": 0.7841, "mean_abs_error": 426.5839169243806, "mean_abs_error_last_10": 182.40014509417622, "mean_abs_error_last_25": 242.4270745400525, "mean_abs_error_last_50": 309.32870265582807, "mean_pred_prob": 0.027903649141080678, "mean_pred_prob_last_10": 0.14963235314935447, "mean_pred_prob_last_25": 0.07816717401146889, "mean_pred_prob_last_50": 0.04725896609015763, "mean_token_accuracy": 0.8659749031066895, "step": 23090 }, { "epoch": 0.4106447656125007, "grad_norm": 1.3566568867533073, "learning_rate": 0.0001, "loss": 0.8129, "mean_abs_error": 538.8210832068643, "mean_abs_error_last_10": 205.63579432592633, "mean_abs_error_last_25": 301.8713043492042, "mean_abs_error_last_50": 295.1391693186702, "mean_pred_prob": 0.0374577759183012, "mean_pred_prob_last_10": 0.17238945607095957, "mean_pred_prob_last_25": 0.09927191543392837, "mean_pred_prob_last_50": 0.0623229343444109, "mean_token_accuracy": 0.8723847031593323, "step": 23100 }, { "epoch": 0.4108225339093026, "grad_norm": 2.390665313576452, "learning_rate": 0.0001, "loss": 0.6817, "mean_abs_error": 398.69701575064164, "mean_abs_error_last_10": 213.14967832442477, "mean_abs_error_last_25": 225.46756386676347, "mean_abs_error_last_50": 275.5895298509011, "mean_pred_prob": 0.04064466630807147, "mean_pred_prob_last_10": 0.2060122740047518, "mean_pred_prob_last_25": 0.11080357427126727, "mean_pred_prob_last_50": 0.06818062735255807, "mean_token_accuracy": 0.878050708770752, "step": 23110 }, { "epoch": 0.41100030220610456, "grad_norm": 1.39649515530736, "learning_rate": 0.0001, "loss": 0.7081, "mean_abs_error": 558.0849852754499, "mean_abs_error_last_10": 174.36050892406598, "mean_abs_error_last_25": 226.91712033993605, "mean_abs_error_last_50": 371.93367224989214, "mean_pred_prob": 0.04724577055312693, "mean_pred_prob_last_10": 0.2349649765063077, "mean_pred_prob_last_25": 0.1291504634777084, "mean_pred_prob_last_50": 0.07962778517976403, "mean_token_accuracy": 0.8752619504928589, "step": 23120 }, { "epoch": 0.4111780705029065, "grad_norm": 1.2343031378247946, "learning_rate": 0.0001, "loss": 0.773, "mean_abs_error": 265.88684111937005, "mean_abs_error_last_10": 112.40964879115822, "mean_abs_error_last_25": 141.75259711290144, "mean_abs_error_last_50": 187.92171393073153, "mean_pred_prob": 0.05013267579488456, "mean_pred_prob_last_10": 0.22660333551466466, "mean_pred_prob_last_25": 0.13043020591139792, "mean_pred_prob_last_50": 0.08185232388786971, "mean_token_accuracy": 0.8752282083034515, "step": 23130 }, { "epoch": 0.41135583879970844, "grad_norm": 1.6462339478733121, "learning_rate": 0.0001, "loss": 0.7862, "mean_abs_error": 130.09654236141978, "mean_abs_error_last_10": 72.44745702825648, "mean_abs_error_last_25": 81.99353646394584, "mean_abs_error_last_50": 105.40165881422438, "mean_pred_prob": 0.0545586034655571, "mean_pred_prob_last_10": 0.26668384186923505, "mean_pred_prob_last_25": 0.14624924045056104, "mean_pred_prob_last_50": 0.09051232356578112, "mean_token_accuracy": 0.8790381729602814, "step": 23140 }, { "epoch": 0.41153360709651043, "grad_norm": 1.440124926689548, "learning_rate": 0.0001, "loss": 0.7021, "mean_abs_error": 1013.8674792092381, "mean_abs_error_last_10": 468.48858558624386, "mean_abs_error_last_25": 504.46021160821937, "mean_abs_error_last_50": 621.9371027072306, "mean_pred_prob": 0.04179668816796038, "mean_pred_prob_last_10": 0.19214231774094515, "mean_pred_prob_last_25": 0.11301714540459215, "mean_pred_prob_last_50": 0.07061816779896617, "mean_token_accuracy": 0.8728764533996582, "step": 23150 }, { "epoch": 0.4117113753933124, "grad_norm": 0.8904464441463287, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 450.88441294384063, "mean_abs_error_last_10": 150.10993054907584, "mean_abs_error_last_25": 145.96844457825154, "mean_abs_error_last_50": 220.60847929822125, "mean_pred_prob": 0.03938374019926414, "mean_pred_prob_last_10": 0.19315700335428118, "mean_pred_prob_last_25": 0.11104007741669193, "mean_pred_prob_last_50": 0.06784666213206947, "mean_token_accuracy": 0.8722355782985687, "step": 23160 }, { "epoch": 0.4118891436901143, "grad_norm": 1.4941219041723053, "learning_rate": 0.0001, "loss": 0.7635, "mean_abs_error": 463.3597168403177, "mean_abs_error_last_10": 119.39875754279001, "mean_abs_error_last_25": 143.66639679697184, "mean_abs_error_last_50": 224.49836600628788, "mean_pred_prob": 0.03850648745428771, "mean_pred_prob_last_10": 0.195838313549757, "mean_pred_prob_last_25": 0.11254704026505351, "mean_pred_prob_last_50": 0.06526608425192534, "mean_token_accuracy": 0.8725027024745942, "step": 23170 }, { "epoch": 0.41206691198691625, "grad_norm": 1.1415539174953315, "learning_rate": 0.0001, "loss": 0.7765, "mean_abs_error": 447.21133019872576, "mean_abs_error_last_10": 132.258349582124, "mean_abs_error_last_25": 171.2475712613049, "mean_abs_error_last_50": 317.5558324064642, "mean_pred_prob": 0.05623096760828048, "mean_pred_prob_last_10": 0.24690424408763648, "mean_pred_prob_last_25": 0.15098677994683385, "mean_pred_prob_last_50": 0.09177831392735243, "mean_token_accuracy": 0.8749763667583466, "step": 23180 }, { "epoch": 0.4122446802837182, "grad_norm": 1.4076179725208215, "learning_rate": 0.0001, "loss": 0.8427, "mean_abs_error": 681.0320102581156, "mean_abs_error_last_10": 139.21348559236102, "mean_abs_error_last_25": 168.02029362182537, "mean_abs_error_last_50": 310.0060036365418, "mean_pred_prob": 0.03411087226122618, "mean_pred_prob_last_10": 0.17662044474855065, "mean_pred_prob_last_25": 0.09514345384668559, "mean_pred_prob_last_50": 0.056785017950460315, "mean_token_accuracy": 0.8769804954528808, "step": 23190 }, { "epoch": 0.41242244858052013, "grad_norm": 1.2515125044802502, "learning_rate": 0.0001, "loss": 0.8642, "mean_abs_error": 302.333465022287, "mean_abs_error_last_10": 90.74285919954389, "mean_abs_error_last_25": 125.9724111638366, "mean_abs_error_last_50": 200.35571625968382, "mean_pred_prob": 0.04232944450341165, "mean_pred_prob_last_10": 0.2002571627497673, "mean_pred_prob_last_25": 0.11557001573964953, "mean_pred_prob_last_50": 0.07119076885282993, "mean_token_accuracy": 0.8697784960269928, "step": 23200 }, { "epoch": 0.4126002168773221, "grad_norm": 1.0784533779056673, "learning_rate": 0.0001, "loss": 0.7717, "mean_abs_error": 271.29895458293777, "mean_abs_error_last_10": 120.68318899460985, "mean_abs_error_last_25": 141.21145531355748, "mean_abs_error_last_50": 166.5269582822417, "mean_pred_prob": 0.047457205061800775, "mean_pred_prob_last_10": 0.23771110270172358, "mean_pred_prob_last_25": 0.1318830178119242, "mean_pred_prob_last_50": 0.08117605089209974, "mean_token_accuracy": 0.8756193578243255, "step": 23210 }, { "epoch": 0.41277798517412406, "grad_norm": 2.300535126551268, "learning_rate": 0.0001, "loss": 0.8213, "mean_abs_error": 437.3948501172622, "mean_abs_error_last_10": 88.94729742442965, "mean_abs_error_last_25": 129.10343302445477, "mean_abs_error_last_50": 210.34951346081607, "mean_pred_prob": 0.048805533349514006, "mean_pred_prob_last_10": 0.23471753392368555, "mean_pred_prob_last_25": 0.13155489023774863, "mean_pred_prob_last_50": 0.08057405650615693, "mean_token_accuracy": 0.8704477787017822, "step": 23220 }, { "epoch": 0.412955753470926, "grad_norm": 1.6540231250414013, "learning_rate": 0.0001, "loss": 0.7885, "mean_abs_error": 146.14879757811593, "mean_abs_error_last_10": 88.63239452571256, "mean_abs_error_last_25": 91.16608111744044, "mean_abs_error_last_50": 113.61527860188941, "mean_pred_prob": 0.05213654646649957, "mean_pred_prob_last_10": 0.2409839393571019, "mean_pred_prob_last_25": 0.13940904242917895, "mean_pred_prob_last_50": 0.08630316220223903, "mean_token_accuracy": 0.8653761506080627, "step": 23230 }, { "epoch": 0.41313352176772794, "grad_norm": 1.8406189203227163, "learning_rate": 0.0001, "loss": 0.8133, "mean_abs_error": 396.12670848245256, "mean_abs_error_last_10": 289.7904402050375, "mean_abs_error_last_25": 240.2044857002551, "mean_abs_error_last_50": 299.07691323860763, "mean_pred_prob": 0.033633155154529956, "mean_pred_prob_last_10": 0.17441616125870496, "mean_pred_prob_last_25": 0.09312129896134139, "mean_pred_prob_last_50": 0.0562201012740843, "mean_token_accuracy": 0.8737267971038818, "step": 23240 }, { "epoch": 0.4133112900645299, "grad_norm": 1.1219086349480443, "learning_rate": 0.0001, "loss": 0.8667, "mean_abs_error": 233.4805207682767, "mean_abs_error_last_10": 82.07594379972771, "mean_abs_error_last_25": 107.6065285234296, "mean_abs_error_last_50": 138.7799417043405, "mean_pred_prob": 0.034356242627836764, "mean_pred_prob_last_10": 0.17732969373464585, "mean_pred_prob_last_25": 0.09466223614290356, "mean_pred_prob_last_50": 0.057689416827633976, "mean_token_accuracy": 0.8761728584766388, "step": 23250 }, { "epoch": 0.4134890583613318, "grad_norm": 1.8540426514682768, "learning_rate": 0.0001, "loss": 0.69, "mean_abs_error": 176.60774979452205, "mean_abs_error_last_10": 48.77635941480287, "mean_abs_error_last_25": 57.956738330047095, "mean_abs_error_last_50": 104.03205301793847, "mean_pred_prob": 0.0419092817697674, "mean_pred_prob_last_10": 0.2142615906894207, "mean_pred_prob_last_25": 0.11720352489501237, "mean_pred_prob_last_50": 0.07153473012149333, "mean_token_accuracy": 0.8737027823925019, "step": 23260 }, { "epoch": 0.4136668266581338, "grad_norm": 1.5159251333087143, "learning_rate": 0.0001, "loss": 0.7657, "mean_abs_error": 1654.6684585698454, "mean_abs_error_last_10": 877.4219618062777, "mean_abs_error_last_25": 1006.532051647075, "mean_abs_error_last_50": 1177.9696708208692, "mean_pred_prob": 0.017967133523779922, "mean_pred_prob_last_10": 0.1009871009067865, "mean_pred_prob_last_25": 0.05292649858747609, "mean_pred_prob_last_50": 0.03077600524557056, "mean_token_accuracy": 0.8702822685241699, "step": 23270 }, { "epoch": 0.41384459495493575, "grad_norm": 1.5630537116843466, "learning_rate": 0.0001, "loss": 0.8004, "mean_abs_error": 162.04478877879268, "mean_abs_error_last_10": 60.68079335122093, "mean_abs_error_last_25": 71.44645220215732, "mean_abs_error_last_50": 88.25062163590505, "mean_pred_prob": 0.038804352004081014, "mean_pred_prob_last_10": 0.18811372425407172, "mean_pred_prob_last_25": 0.10466991988942027, "mean_pred_prob_last_50": 0.06495740627869964, "mean_token_accuracy": 0.8833114683628083, "step": 23280 }, { "epoch": 0.4140223632517377, "grad_norm": 0.9645904435454141, "learning_rate": 0.0001, "loss": 0.7614, "mean_abs_error": 382.433456393473, "mean_abs_error_last_10": 90.89414715267145, "mean_abs_error_last_25": 116.5331213426363, "mean_abs_error_last_50": 200.22084814424014, "mean_pred_prob": 0.0427843016339466, "mean_pred_prob_last_10": 0.20915999487042428, "mean_pred_prob_last_25": 0.11757634934037924, "mean_pred_prob_last_50": 0.0712875435128808, "mean_token_accuracy": 0.8740721762180328, "step": 23290 }, { "epoch": 0.41420013154853963, "grad_norm": 1.2379325093227056, "learning_rate": 0.0001, "loss": 0.719, "mean_abs_error": 715.1114346859331, "mean_abs_error_last_10": 390.2363535065947, "mean_abs_error_last_25": 356.74440576179296, "mean_abs_error_last_50": 474.46242906417393, "mean_pred_prob": 0.03010265563498251, "mean_pred_prob_last_10": 0.14807182338263375, "mean_pred_prob_last_25": 0.084117470739875, "mean_pred_prob_last_50": 0.05171182253398001, "mean_token_accuracy": 0.8721150398254395, "step": 23300 }, { "epoch": 0.41437789984534157, "grad_norm": 1.7500841417824275, "learning_rate": 0.0001, "loss": 0.7964, "mean_abs_error": 580.623676730525, "mean_abs_error_last_10": 197.87709963255878, "mean_abs_error_last_25": 236.11720171520219, "mean_abs_error_last_50": 327.7300656583046, "mean_pred_prob": 0.02724242864642292, "mean_pred_prob_last_10": 0.1481492469087243, "mean_pred_prob_last_25": 0.08271870268508792, "mean_pred_prob_last_50": 0.047410880541428925, "mean_token_accuracy": 0.8677585363388062, "step": 23310 }, { "epoch": 0.4145556681421435, "grad_norm": 2.1863233159095876, "learning_rate": 0.0001, "loss": 0.7761, "mean_abs_error": 560.1028032433562, "mean_abs_error_last_10": 275.6355223203393, "mean_abs_error_last_25": 355.88884390900455, "mean_abs_error_last_50": 430.2837890057337, "mean_pred_prob": 0.05008354982710443, "mean_pred_prob_last_10": 0.2459670993150212, "mean_pred_prob_last_25": 0.13782213997328654, "mean_pred_prob_last_50": 0.0834436027216725, "mean_token_accuracy": 0.8812273383140564, "step": 23320 }, { "epoch": 0.4147334364389455, "grad_norm": 1.7251733865085792, "learning_rate": 0.0001, "loss": 0.8884, "mean_abs_error": 1375.7395799426638, "mean_abs_error_last_10": 577.6889629600735, "mean_abs_error_last_25": 710.7188521459781, "mean_abs_error_last_50": 874.2326947961595, "mean_pred_prob": 0.017725521608372218, "mean_pred_prob_last_10": 0.09386146271717735, "mean_pred_prob_last_25": 0.04964993409230374, "mean_pred_prob_last_50": 0.03014083849848248, "mean_token_accuracy": 0.8681800603866577, "step": 23330 }, { "epoch": 0.41491120473574744, "grad_norm": 0.9751892038162053, "learning_rate": 0.0001, "loss": 0.7572, "mean_abs_error": 74.63667406013155, "mean_abs_error_last_10": 6.665113773057276, "mean_abs_error_last_25": 22.76752480218689, "mean_abs_error_last_50": 43.759079526422866, "mean_pred_prob": 0.06318369805812836, "mean_pred_prob_last_10": 0.30937096327543256, "mean_pred_prob_last_25": 0.17191390097141265, "mean_pred_prob_last_50": 0.10481882020831108, "mean_token_accuracy": 0.8771803438663482, "step": 23340 }, { "epoch": 0.4150889730325494, "grad_norm": 1.2488399997532647, "learning_rate": 0.0001, "loss": 0.7546, "mean_abs_error": 660.7581483155061, "mean_abs_error_last_10": 178.47370052938678, "mean_abs_error_last_25": 279.14592878395086, "mean_abs_error_last_50": 366.0843347398028, "mean_pred_prob": 0.020147744135465474, "mean_pred_prob_last_10": 0.10664024413563311, "mean_pred_prob_last_25": 0.05615503416629508, "mean_pred_prob_last_50": 0.034076135419309136, "mean_token_accuracy": 0.8669660210609436, "step": 23350 }, { "epoch": 0.4152667413293513, "grad_norm": 0.9884035516416587, "learning_rate": 0.0001, "loss": 0.8117, "mean_abs_error": 225.74620898918346, "mean_abs_error_last_10": 62.35776743773694, "mean_abs_error_last_25": 109.14322388698365, "mean_abs_error_last_50": 174.64679084005382, "mean_pred_prob": 0.042572270054370163, "mean_pred_prob_last_10": 0.21491769552230836, "mean_pred_prob_last_25": 0.11823526620864869, "mean_pred_prob_last_50": 0.07279546866193413, "mean_token_accuracy": 0.8765714108943939, "step": 23360 }, { "epoch": 0.41544450962615326, "grad_norm": 1.7014270611117337, "learning_rate": 0.0001, "loss": 0.8386, "mean_abs_error": 569.8257295763724, "mean_abs_error_last_10": 261.48988551588656, "mean_abs_error_last_25": 293.7579656420221, "mean_abs_error_last_50": 368.46643731147253, "mean_pred_prob": 0.023093844181858003, "mean_pred_prob_last_10": 0.12391300611197949, "mean_pred_prob_last_25": 0.06572435621637851, "mean_pred_prob_last_50": 0.03985897647216916, "mean_token_accuracy": 0.8652298629283905, "step": 23370 }, { "epoch": 0.4156222779229552, "grad_norm": 1.203956624974191, "learning_rate": 0.0001, "loss": 0.8098, "mean_abs_error": 449.72846572068954, "mean_abs_error_last_10": 226.85918270449218, "mean_abs_error_last_25": 278.07292818446524, "mean_abs_error_last_50": 310.60353568571696, "mean_pred_prob": 0.028865843336097895, "mean_pred_prob_last_10": 0.1521301593631506, "mean_pred_prob_last_25": 0.08030506651848554, "mean_pred_prob_last_50": 0.048757100198417905, "mean_token_accuracy": 0.8643232345581054, "step": 23380 }, { "epoch": 0.4158000462197572, "grad_norm": 1.6863959774270483, "learning_rate": 0.0001, "loss": 0.8214, "mean_abs_error": 1245.6781355140636, "mean_abs_error_last_10": 675.199049557909, "mean_abs_error_last_25": 763.5213131802561, "mean_abs_error_last_50": 897.2107139892798, "mean_pred_prob": 0.03352824036337552, "mean_pred_prob_last_10": 0.17568120057840134, "mean_pred_prob_last_25": 0.095707795463386, "mean_pred_prob_last_50": 0.057762776225717974, "mean_token_accuracy": 0.8731922328472137, "step": 23390 }, { "epoch": 0.41597781451655913, "grad_norm": 1.2767711475226988, "learning_rate": 0.0001, "loss": 0.8412, "mean_abs_error": 517.6008314863341, "mean_abs_error_last_10": 72.5875415584454, "mean_abs_error_last_25": 139.61682502969742, "mean_abs_error_last_50": 234.6600688207142, "mean_pred_prob": 0.03894358377438038, "mean_pred_prob_last_10": 0.18951286301016806, "mean_pred_prob_last_25": 0.10627638762816786, "mean_pred_prob_last_50": 0.06557292104698717, "mean_token_accuracy": 0.8706826448440552, "step": 23400 }, { "epoch": 0.41615558281336107, "grad_norm": 2.051630144575905, "learning_rate": 0.0001, "loss": 0.8858, "mean_abs_error": 988.9543882721885, "mean_abs_error_last_10": 505.9669623994033, "mean_abs_error_last_25": 565.9447843296739, "mean_abs_error_last_50": 749.7495915350777, "mean_pred_prob": 0.03473861421225592, "mean_pred_prob_last_10": 0.1775286280317232, "mean_pred_prob_last_25": 0.0968322506902041, "mean_pred_prob_last_50": 0.05815194813621929, "mean_token_accuracy": 0.870983898639679, "step": 23410 }, { "epoch": 0.416333351110163, "grad_norm": 1.4650869956488188, "learning_rate": 0.0001, "loss": 0.8341, "mean_abs_error": 462.6061591658944, "mean_abs_error_last_10": 133.13387968331895, "mean_abs_error_last_25": 150.90596654002618, "mean_abs_error_last_50": 228.7032210436501, "mean_pred_prob": 0.035478764976141976, "mean_pred_prob_last_10": 0.1838055135216564, "mean_pred_prob_last_25": 0.09911687199492007, "mean_pred_prob_last_50": 0.06026463023154065, "mean_token_accuracy": 0.857075959444046, "step": 23420 }, { "epoch": 0.41651111940696495, "grad_norm": 3.9947235051134786, "learning_rate": 0.0001, "loss": 0.8709, "mean_abs_error": 783.5921492576083, "mean_abs_error_last_10": 441.46172993420396, "mean_abs_error_last_25": 513.342265553971, "mean_abs_error_last_50": 596.934209245276, "mean_pred_prob": 0.04813511403335724, "mean_pred_prob_last_10": 0.22559222296986264, "mean_pred_prob_last_25": 0.13071789744426496, "mean_pred_prob_last_50": 0.08157236963306787, "mean_token_accuracy": 0.8679733455181122, "step": 23430 }, { "epoch": 0.4166888877037669, "grad_norm": 1.5452578990129124, "learning_rate": 0.0001, "loss": 0.7364, "mean_abs_error": 1904.6377797804714, "mean_abs_error_last_10": 954.8082494184542, "mean_abs_error_last_25": 1093.6388824306803, "mean_abs_error_last_50": 1383.8810944118522, "mean_pred_prob": 0.019699451363703702, "mean_pred_prob_last_10": 0.100780242869223, "mean_pred_prob_last_25": 0.05416556036507245, "mean_pred_prob_last_50": 0.033278867549961434, "mean_token_accuracy": 0.8674617648124695, "step": 23440 }, { "epoch": 0.4168666560005689, "grad_norm": 1.863578217902248, "learning_rate": 0.0001, "loss": 0.8841, "mean_abs_error": 427.92035890464103, "mean_abs_error_last_10": 218.66492311181773, "mean_abs_error_last_25": 248.8364700348688, "mean_abs_error_last_50": 368.5768691139996, "mean_pred_prob": 0.03543006109539419, "mean_pred_prob_last_10": 0.18714940566569566, "mean_pred_prob_last_25": 0.10219128299504518, "mean_pred_prob_last_50": 0.06078970222733915, "mean_token_accuracy": 0.8688018321990967, "step": 23450 }, { "epoch": 0.4170444242973708, "grad_norm": 1.0290639128986179, "learning_rate": 0.0001, "loss": 0.7196, "mean_abs_error": 178.71660549913662, "mean_abs_error_last_10": 28.362357187133632, "mean_abs_error_last_25": 59.429286620469114, "mean_abs_error_last_50": 98.58648454537281, "mean_pred_prob": 0.04760557846166193, "mean_pred_prob_last_10": 0.23421205319464206, "mean_pred_prob_last_25": 0.12865476943552495, "mean_pred_prob_last_50": 0.0792810034006834, "mean_token_accuracy": 0.8738795280456543, "step": 23460 }, { "epoch": 0.41722219259417276, "grad_norm": 3.2195471993192806, "learning_rate": 0.0001, "loss": 0.7572, "mean_abs_error": 1031.193999845775, "mean_abs_error_last_10": 554.2962816378689, "mean_abs_error_last_25": 642.8952000407438, "mean_abs_error_last_50": 758.2725423536871, "mean_pred_prob": 0.03547482894500718, "mean_pred_prob_last_10": 0.17386868967441843, "mean_pred_prob_last_25": 0.09610667186498176, "mean_pred_prob_last_50": 0.05966587248258293, "mean_token_accuracy": 0.880244380235672, "step": 23470 }, { "epoch": 0.4173999608909747, "grad_norm": 1.540927723404935, "learning_rate": 0.0001, "loss": 0.7556, "mean_abs_error": 266.3053317336934, "mean_abs_error_last_10": 114.08153429478732, "mean_abs_error_last_25": 164.38151835946564, "mean_abs_error_last_50": 188.41899531301144, "mean_pred_prob": 0.05896052573807538, "mean_pred_prob_last_10": 0.27277465984225274, "mean_pred_prob_last_25": 0.1574223625473678, "mean_pred_prob_last_50": 0.09905897490680218, "mean_token_accuracy": 0.8777383327484131, "step": 23480 }, { "epoch": 0.41757772918777664, "grad_norm": 0.7278296784130427, "learning_rate": 0.0001, "loss": 0.697, "mean_abs_error": 468.4903395424014, "mean_abs_error_last_10": 155.36221093312594, "mean_abs_error_last_25": 166.51222604629726, "mean_abs_error_last_50": 235.9240207788288, "mean_pred_prob": 0.021676200797082858, "mean_pred_prob_last_10": 0.1193078822339885, "mean_pred_prob_last_25": 0.06525623117340729, "mean_pred_prob_last_50": 0.03850487881572917, "mean_token_accuracy": 0.8774775743484498, "step": 23490 }, { "epoch": 0.4177554974845786, "grad_norm": 1.6873116473640957, "learning_rate": 0.0001, "loss": 0.7272, "mean_abs_error": 1162.028170314245, "mean_abs_error_last_10": 726.0711290824374, "mean_abs_error_last_25": 789.9863825895291, "mean_abs_error_last_50": 886.764523042389, "mean_pred_prob": 0.029571020483854227, "mean_pred_prob_last_10": 0.15159906448388938, "mean_pred_prob_last_25": 0.08369857106299605, "mean_pred_prob_last_50": 0.05024296881892951, "mean_token_accuracy": 0.8707047224044799, "step": 23500 }, { "epoch": 0.41793326578138057, "grad_norm": 1.824603637501211, "learning_rate": 0.0001, "loss": 0.8482, "mean_abs_error": 1379.1673426118716, "mean_abs_error_last_10": 572.2918589736917, "mean_abs_error_last_25": 717.5995926482005, "mean_abs_error_last_50": 934.78227259227, "mean_pred_prob": 0.03564395144057926, "mean_pred_prob_last_10": 0.1622407001850661, "mean_pred_prob_last_25": 0.0933297319104895, "mean_pred_prob_last_50": 0.05913447353232186, "mean_token_accuracy": 0.8598717391490937, "step": 23510 }, { "epoch": 0.4181110340781825, "grad_norm": 2.3506863610924484, "learning_rate": 0.0001, "loss": 0.6839, "mean_abs_error": 470.5203280464478, "mean_abs_error_last_10": 197.2226519644522, "mean_abs_error_last_25": 299.5683502319345, "mean_abs_error_last_50": 328.92981871742415, "mean_pred_prob": 0.02550654866499826, "mean_pred_prob_last_10": 0.13922891770489515, "mean_pred_prob_last_25": 0.07033560702111571, "mean_pred_prob_last_50": 0.04289633696898818, "mean_token_accuracy": 0.8864325821399689, "step": 23520 }, { "epoch": 0.41828880237498445, "grad_norm": 1.9709336063377965, "learning_rate": 0.0001, "loss": 0.7024, "mean_abs_error": 574.3172089479116, "mean_abs_error_last_10": 161.19745662014128, "mean_abs_error_last_25": 190.2641352326791, "mean_abs_error_last_50": 284.3304877504789, "mean_pred_prob": 0.02460064140614122, "mean_pred_prob_last_10": 0.12386900214478373, "mean_pred_prob_last_25": 0.06589818829670549, "mean_pred_prob_last_50": 0.04108561407774687, "mean_token_accuracy": 0.8764202117919921, "step": 23530 }, { "epoch": 0.4184665706717864, "grad_norm": 2.2315678813664257, "learning_rate": 0.0001, "loss": 0.8623, "mean_abs_error": 199.61222969681253, "mean_abs_error_last_10": 48.99982416784172, "mean_abs_error_last_25": 79.91768527557909, "mean_abs_error_last_50": 130.09209544245095, "mean_pred_prob": 0.04365439703688025, "mean_pred_prob_last_10": 0.21566395051777362, "mean_pred_prob_last_25": 0.11605894323438407, "mean_pred_prob_last_50": 0.07211099024862051, "mean_token_accuracy": 0.875247061252594, "step": 23540 }, { "epoch": 0.4186443389685883, "grad_norm": 1.5774800334344656, "learning_rate": 0.0001, "loss": 0.7689, "mean_abs_error": 219.3067480266265, "mean_abs_error_last_10": 65.52409943586566, "mean_abs_error_last_25": 94.62248385322803, "mean_abs_error_last_50": 157.38654258886748, "mean_pred_prob": 0.03824046226218343, "mean_pred_prob_last_10": 0.19974135607481003, "mean_pred_prob_last_25": 0.10812337696552277, "mean_pred_prob_last_50": 0.06489648101851345, "mean_token_accuracy": 0.8783164262771607, "step": 23550 }, { "epoch": 0.41882210726539026, "grad_norm": 0.9453101933282373, "learning_rate": 0.0001, "loss": 0.8513, "mean_abs_error": 757.4607221076976, "mean_abs_error_last_10": 418.94947700293733, "mean_abs_error_last_25": 491.26416741496286, "mean_abs_error_last_50": 547.4932700519968, "mean_pred_prob": 0.0386870579503011, "mean_pred_prob_last_10": 0.21061428314715158, "mean_pred_prob_last_25": 0.10476469757268206, "mean_pred_prob_last_50": 0.06351667980488855, "mean_token_accuracy": 0.8745596468448639, "step": 23560 }, { "epoch": 0.41899987556219226, "grad_norm": 1.9812260972278655, "learning_rate": 0.0001, "loss": 0.6751, "mean_abs_error": 231.85399106186733, "mean_abs_error_last_10": 110.77859734404441, "mean_abs_error_last_25": 119.72211456796285, "mean_abs_error_last_50": 156.70911182571237, "mean_pred_prob": 0.03197832037694752, "mean_pred_prob_last_10": 0.15670020766556264, "mean_pred_prob_last_25": 0.08656732216477395, "mean_pred_prob_last_50": 0.053242855798453094, "mean_token_accuracy": 0.8797272443771362, "step": 23570 }, { "epoch": 0.4191776438589942, "grad_norm": 1.2803511677412487, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 710.3710200165312, "mean_abs_error_last_10": 350.10696288698364, "mean_abs_error_last_25": 364.9892352389105, "mean_abs_error_last_50": 479.67244454393256, "mean_pred_prob": 0.04423503205471206, "mean_pred_prob_last_10": 0.2265650893910788, "mean_pred_prob_last_25": 0.12598982137860731, "mean_pred_prob_last_50": 0.07605704782181419, "mean_token_accuracy": 0.8723776638507843, "step": 23580 }, { "epoch": 0.41935541215579614, "grad_norm": 1.3430541957873576, "learning_rate": 0.0001, "loss": 0.7172, "mean_abs_error": 770.0630365380631, "mean_abs_error_last_10": 368.7567436681721, "mean_abs_error_last_25": 400.18316371584945, "mean_abs_error_last_50": 504.0254248841687, "mean_pred_prob": 0.0367816037760349, "mean_pred_prob_last_10": 0.1710564569977578, "mean_pred_prob_last_25": 0.10028779733693227, "mean_pred_prob_last_50": 0.06172573181684129, "mean_token_accuracy": 0.870776504278183, "step": 23590 }, { "epoch": 0.4195331804525981, "grad_norm": 1.7155556063217285, "learning_rate": 0.0001, "loss": 0.7582, "mean_abs_error": 411.66515957440214, "mean_abs_error_last_10": 143.65319681442188, "mean_abs_error_last_25": 225.0438367899886, "mean_abs_error_last_50": 308.44782887744657, "mean_pred_prob": 0.032876038225367665, "mean_pred_prob_last_10": 0.16862993128597736, "mean_pred_prob_last_25": 0.09392732921987772, "mean_pred_prob_last_50": 0.056141229905188085, "mean_token_accuracy": 0.8731438994407654, "step": 23600 }, { "epoch": 0.4197109487494, "grad_norm": 2.34247562301264, "learning_rate": 0.0001, "loss": 0.6981, "mean_abs_error": 385.551243177721, "mean_abs_error_last_10": 116.52008097893909, "mean_abs_error_last_25": 169.9343563783712, "mean_abs_error_last_50": 278.7764234795251, "mean_pred_prob": 0.04271196159534156, "mean_pred_prob_last_10": 0.21441732496023178, "mean_pred_prob_last_25": 0.11827387737575919, "mean_pred_prob_last_50": 0.07152259454596788, "mean_token_accuracy": 0.8701256930828094, "step": 23610 }, { "epoch": 0.41988871704620195, "grad_norm": 2.4789717963451317, "learning_rate": 0.0001, "loss": 0.7372, "mean_abs_error": 1175.814144976601, "mean_abs_error_last_10": 399.34267426487816, "mean_abs_error_last_25": 539.006205020162, "mean_abs_error_last_50": 742.8681802627538, "mean_pred_prob": 0.0346637000591727, "mean_pred_prob_last_10": 0.15933093607309273, "mean_pred_prob_last_25": 0.09376264376332984, "mean_pred_prob_last_50": 0.057899808225920425, "mean_token_accuracy": 0.865268224477768, "step": 23620 }, { "epoch": 0.42006648534300395, "grad_norm": 2.5388590688897326, "learning_rate": 0.0001, "loss": 0.8806, "mean_abs_error": 647.2269806964503, "mean_abs_error_last_10": 193.78624921228243, "mean_abs_error_last_25": 223.76100365290586, "mean_abs_error_last_50": 326.32354511831477, "mean_pred_prob": 0.039982812135713176, "mean_pred_prob_last_10": 0.18568473614286632, "mean_pred_prob_last_25": 0.11103544124634937, "mean_pred_prob_last_50": 0.06891936558531597, "mean_token_accuracy": 0.8603239595890045, "step": 23630 }, { "epoch": 0.4202442536398059, "grad_norm": 1.7752467008504436, "learning_rate": 0.0001, "loss": 0.8087, "mean_abs_error": 733.6673896840764, "mean_abs_error_last_10": 280.5814460862275, "mean_abs_error_last_25": 368.05874876053576, "mean_abs_error_last_50": 515.0850100308837, "mean_pred_prob": 0.029310543288011102, "mean_pred_prob_last_10": 0.16324556776089594, "mean_pred_prob_last_25": 0.08467313941800966, "mean_pred_prob_last_50": 0.04957709456793964, "mean_token_accuracy": 0.870462155342102, "step": 23640 }, { "epoch": 0.4204220219366078, "grad_norm": 1.1441127118811218, "learning_rate": 0.0001, "loss": 0.8208, "mean_abs_error": 553.2384773709653, "mean_abs_error_last_10": 143.04655492706578, "mean_abs_error_last_25": 200.12126528485175, "mean_abs_error_last_50": 307.3230060314182, "mean_pred_prob": 0.03405011267168447, "mean_pred_prob_last_10": 0.17177854282781482, "mean_pred_prob_last_25": 0.0969939025118947, "mean_pred_prob_last_50": 0.057412347733043136, "mean_token_accuracy": 0.8650770545005798, "step": 23650 }, { "epoch": 0.42059979023340976, "grad_norm": 4.57473541692004, "learning_rate": 0.0001, "loss": 0.8667, "mean_abs_error": 764.6236028400571, "mean_abs_error_last_10": 274.9913053486804, "mean_abs_error_last_25": 418.5948736224574, "mean_abs_error_last_50": 600.3890437413163, "mean_pred_prob": 0.035144177306210625, "mean_pred_prob_last_10": 0.1720649352762848, "mean_pred_prob_last_25": 0.09551698005525396, "mean_pred_prob_last_50": 0.05921592428348958, "mean_token_accuracy": 0.8667383432388306, "step": 23660 }, { "epoch": 0.4207775585302117, "grad_norm": 1.4764503699795128, "learning_rate": 0.0001, "loss": 0.7423, "mean_abs_error": 421.86213024840606, "mean_abs_error_last_10": 105.44913593683575, "mean_abs_error_last_25": 137.22460297368758, "mean_abs_error_last_50": 228.8564816464866, "mean_pred_prob": 0.03565798373892903, "mean_pred_prob_last_10": 0.1759923543781042, "mean_pred_prob_last_25": 0.10028660667594522, "mean_pred_prob_last_50": 0.06092098921071738, "mean_token_accuracy": 0.8787653744220734, "step": 23670 }, { "epoch": 0.42095532682701364, "grad_norm": 1.727630147687307, "learning_rate": 0.0001, "loss": 0.6946, "mean_abs_error": 311.3335782356576, "mean_abs_error_last_10": 137.02889758378532, "mean_abs_error_last_25": 142.14714572367197, "mean_abs_error_last_50": 180.05954202137175, "mean_pred_prob": 0.0550450511276722, "mean_pred_prob_last_10": 0.2595696359872818, "mean_pred_prob_last_25": 0.15266401842236518, "mean_pred_prob_last_50": 0.09343417650088667, "mean_token_accuracy": 0.8793492972850799, "step": 23680 }, { "epoch": 0.42113309512381564, "grad_norm": 2.510679998776855, "learning_rate": 0.0001, "loss": 0.8274, "mean_abs_error": 555.7129521090827, "mean_abs_error_last_10": 362.3877295500156, "mean_abs_error_last_25": 348.66623522702287, "mean_abs_error_last_50": 423.4067870748107, "mean_pred_prob": 0.04498299072729424, "mean_pred_prob_last_10": 0.19605966695817187, "mean_pred_prob_last_25": 0.11734481130260974, "mean_pred_prob_last_50": 0.07335531209828332, "mean_token_accuracy": 0.8727155864238739, "step": 23690 }, { "epoch": 0.4213108634206176, "grad_norm": 0.7860058089976004, "learning_rate": 0.0001, "loss": 0.8073, "mean_abs_error": 921.3561200568909, "mean_abs_error_last_10": 371.9597796026845, "mean_abs_error_last_25": 503.0714897423495, "mean_abs_error_last_50": 603.6691936068127, "mean_pred_prob": 0.00891585691133514, "mean_pred_prob_last_10": 0.05200067755067721, "mean_pred_prob_last_25": 0.025534405512735246, "mean_pred_prob_last_50": 0.015178697917144745, "mean_token_accuracy": 0.8606118142604828, "step": 23700 }, { "epoch": 0.4214886317174195, "grad_norm": 2.1061026435950074, "learning_rate": 0.0001, "loss": 0.8315, "mean_abs_error": 256.9074658605857, "mean_abs_error_last_10": 141.9708752320198, "mean_abs_error_last_25": 136.5736962694819, "mean_abs_error_last_50": 158.04374816986473, "mean_pred_prob": 0.05332314599072561, "mean_pred_prob_last_10": 0.23707067370414733, "mean_pred_prob_last_25": 0.14006418942008167, "mean_pred_prob_last_50": 0.08707617556210608, "mean_token_accuracy": 0.8626657247543335, "step": 23710 }, { "epoch": 0.42166640001422145, "grad_norm": 1.8120349845145338, "learning_rate": 0.0001, "loss": 0.7615, "mean_abs_error": 1042.5054621082477, "mean_abs_error_last_10": 386.6469973132505, "mean_abs_error_last_25": 433.5019796041655, "mean_abs_error_last_50": 598.1435027849286, "mean_pred_prob": 0.02934333815355785, "mean_pred_prob_last_10": 0.15863889745087362, "mean_pred_prob_last_25": 0.08712753179715946, "mean_pred_prob_last_50": 0.05137176064308733, "mean_token_accuracy": 0.8718111872673034, "step": 23720 }, { "epoch": 0.4218441683110234, "grad_norm": 1.2135678656308733, "learning_rate": 0.0001, "loss": 0.7927, "mean_abs_error": 597.8071910984477, "mean_abs_error_last_10": 158.86870466201194, "mean_abs_error_last_25": 220.02968836344766, "mean_abs_error_last_50": 301.99564773746584, "mean_pred_prob": 0.029019386251457037, "mean_pred_prob_last_10": 0.13486509397625923, "mean_pred_prob_last_25": 0.07832719730213285, "mean_pred_prob_last_50": 0.04847895745187998, "mean_token_accuracy": 0.8701323091983795, "step": 23730 }, { "epoch": 0.4220219366078254, "grad_norm": 2.301774084978804, "learning_rate": 0.0001, "loss": 0.689, "mean_abs_error": 224.4486230809594, "mean_abs_error_last_10": 156.32622469931863, "mean_abs_error_last_25": 151.79560798047933, "mean_abs_error_last_50": 192.18023131723248, "mean_pred_prob": 0.05738262052182108, "mean_pred_prob_last_10": 0.2618752678856254, "mean_pred_prob_last_25": 0.1513788069598377, "mean_pred_prob_last_50": 0.0935625835787505, "mean_token_accuracy": 0.886697655916214, "step": 23740 }, { "epoch": 0.4221997049046273, "grad_norm": 1.4472570743436453, "learning_rate": 0.0001, "loss": 0.9158, "mean_abs_error": 328.4864971563613, "mean_abs_error_last_10": 130.44407690595665, "mean_abs_error_last_25": 138.30678297771456, "mean_abs_error_last_50": 214.59926410419385, "mean_pred_prob": 0.03362167617306113, "mean_pred_prob_last_10": 0.16576219014823437, "mean_pred_prob_last_25": 0.08965371502563357, "mean_pred_prob_last_50": 0.05644067814573646, "mean_token_accuracy": 0.8649100840091706, "step": 23750 }, { "epoch": 0.42237747320142927, "grad_norm": 1.9481162906538634, "learning_rate": 0.0001, "loss": 0.7811, "mean_abs_error": 491.9618497986694, "mean_abs_error_last_10": 179.78282424021108, "mean_abs_error_last_25": 226.28299106146702, "mean_abs_error_last_50": 317.9804663323336, "mean_pred_prob": 0.019082696409896016, "mean_pred_prob_last_10": 0.10342190358787776, "mean_pred_prob_last_25": 0.053959323465824126, "mean_pred_prob_last_50": 0.03256698385812342, "mean_token_accuracy": 0.875260865688324, "step": 23760 }, { "epoch": 0.4225552414982312, "grad_norm": 1.996170785101355, "learning_rate": 0.0001, "loss": 0.7707, "mean_abs_error": 273.09807796372286, "mean_abs_error_last_10": 90.34189375091685, "mean_abs_error_last_25": 158.8332088881826, "mean_abs_error_last_50": 201.04003126799444, "mean_pred_prob": 0.029003632115200163, "mean_pred_prob_last_10": 0.15114229172468185, "mean_pred_prob_last_25": 0.08035953715443611, "mean_pred_prob_last_50": 0.04858652539551258, "mean_token_accuracy": 0.8778849124908448, "step": 23770 }, { "epoch": 0.42273300979503314, "grad_norm": 1.9474800092356905, "learning_rate": 0.0001, "loss": 0.9268, "mean_abs_error": 1020.9799085355633, "mean_abs_error_last_10": 505.1615025086571, "mean_abs_error_last_25": 583.0620965016441, "mean_abs_error_last_50": 717.1898297237183, "mean_pred_prob": 0.05589154852350475, "mean_pred_prob_last_10": 0.26195474846463185, "mean_pred_prob_last_25": 0.15311416366457706, "mean_pred_prob_last_50": 0.09751844915444963, "mean_token_accuracy": 0.8851526200771331, "step": 23780 }, { "epoch": 0.4229107780918351, "grad_norm": 0.8839393224457716, "learning_rate": 0.0001, "loss": 0.7629, "mean_abs_error": 1068.5425899959253, "mean_abs_error_last_10": 349.8797155132102, "mean_abs_error_last_25": 551.5588117555734, "mean_abs_error_last_50": 792.8373404291398, "mean_pred_prob": 0.02703962549858261, "mean_pred_prob_last_10": 0.1310628776322119, "mean_pred_prob_last_25": 0.07322206994285807, "mean_pred_prob_last_50": 0.04491052199737169, "mean_token_accuracy": 0.8735875368118287, "step": 23790 }, { "epoch": 0.4230885463886371, "grad_norm": 1.1662924271402775, "learning_rate": 0.0001, "loss": 0.8088, "mean_abs_error": 553.8837128106917, "mean_abs_error_last_10": 298.4712264791797, "mean_abs_error_last_25": 411.88933715892273, "mean_abs_error_last_50": 439.6978558581142, "mean_pred_prob": 0.046006091628805736, "mean_pred_prob_last_10": 0.2223283634404652, "mean_pred_prob_last_25": 0.12626492918352597, "mean_pred_prob_last_50": 0.07689410300226882, "mean_token_accuracy": 0.873475581407547, "step": 23800 }, { "epoch": 0.423266314685439, "grad_norm": 2.703691766386718, "learning_rate": 0.0001, "loss": 0.908, "mean_abs_error": 913.7090429470887, "mean_abs_error_last_10": 264.4189919088141, "mean_abs_error_last_25": 345.5074997033482, "mean_abs_error_last_50": 536.0358087602856, "mean_pred_prob": 0.030488553788745775, "mean_pred_prob_last_10": 0.1624361409456469, "mean_pred_prob_last_25": 0.09067705602501519, "mean_pred_prob_last_50": 0.05345641535823233, "mean_token_accuracy": 0.8579653978347779, "step": 23810 }, { "epoch": 0.42344408298224095, "grad_norm": 1.6970542367564276, "learning_rate": 0.0001, "loss": 0.7956, "mean_abs_error": 702.8448690660297, "mean_abs_error_last_10": 292.3063662115022, "mean_abs_error_last_25": 353.38350828120656, "mean_abs_error_last_50": 467.6278938231145, "mean_pred_prob": 0.02957144902902655, "mean_pred_prob_last_10": 0.15657272536191158, "mean_pred_prob_last_25": 0.08436189366038889, "mean_pred_prob_last_50": 0.05035580166731961, "mean_token_accuracy": 0.8701793253421783, "step": 23820 }, { "epoch": 0.4236218512790429, "grad_norm": 0.9880979863480583, "learning_rate": 0.0001, "loss": 0.8262, "mean_abs_error": 153.08527003072473, "mean_abs_error_last_10": 76.59014767848014, "mean_abs_error_last_25": 76.19948663643977, "mean_abs_error_last_50": 88.68511159318265, "mean_pred_prob": 0.03507873695343733, "mean_pred_prob_last_10": 0.17959940619766712, "mean_pred_prob_last_25": 0.09774148296564818, "mean_pred_prob_last_50": 0.059660769253969195, "mean_token_accuracy": 0.8766055107116699, "step": 23830 }, { "epoch": 0.42379961957584483, "grad_norm": 1.2216853266355723, "learning_rate": 0.0001, "loss": 0.7894, "mean_abs_error": 256.1850662333392, "mean_abs_error_last_10": 62.41550138435569, "mean_abs_error_last_25": 153.3980540453217, "mean_abs_error_last_50": 214.7153190554464, "mean_pred_prob": 0.0390183191280812, "mean_pred_prob_last_10": 0.20563571192324162, "mean_pred_prob_last_25": 0.11180238611996174, "mean_pred_prob_last_50": 0.0662943148985505, "mean_token_accuracy": 0.8731871962547302, "step": 23840 }, { "epoch": 0.42397738787264677, "grad_norm": 2.1506275135690447, "learning_rate": 0.0001, "loss": 0.7547, "mean_abs_error": 218.58817139875237, "mean_abs_error_last_10": 55.668772896181494, "mean_abs_error_last_25": 83.17479212425482, "mean_abs_error_last_50": 128.59969113141116, "mean_pred_prob": 0.04612744664773345, "mean_pred_prob_last_10": 0.2258881436660886, "mean_pred_prob_last_25": 0.13097968427464365, "mean_pred_prob_last_50": 0.07933109807781875, "mean_token_accuracy": 0.8738889932632447, "step": 23850 }, { "epoch": 0.42415515616944877, "grad_norm": 2.031304906095649, "learning_rate": 0.0001, "loss": 0.7216, "mean_abs_error": 154.59502439816504, "mean_abs_error_last_10": 13.497910290922865, "mean_abs_error_last_25": 41.034662341313876, "mean_abs_error_last_50": 76.69581957128851, "mean_pred_prob": 0.06157608721405268, "mean_pred_prob_last_10": 0.310714541003108, "mean_pred_prob_last_25": 0.1724505638703704, "mean_pred_prob_last_50": 0.10453890431672334, "mean_token_accuracy": 0.8788235604763031, "step": 23860 }, { "epoch": 0.4243329244662507, "grad_norm": 1.6713748964313762, "learning_rate": 0.0001, "loss": 0.8142, "mean_abs_error": 102.72761984646021, "mean_abs_error_last_10": 51.72317167810578, "mean_abs_error_last_25": 71.77829645795093, "mean_abs_error_last_50": 87.9309976171799, "mean_pred_prob": 0.04950976110994816, "mean_pred_prob_last_10": 0.2553581211715937, "mean_pred_prob_last_25": 0.1379183093085885, "mean_pred_prob_last_50": 0.083167012501508, "mean_token_accuracy": 0.8796683073043823, "step": 23870 }, { "epoch": 0.42451069276305264, "grad_norm": 1.3600723340760568, "learning_rate": 0.0001, "loss": 0.7634, "mean_abs_error": 352.61404247363845, "mean_abs_error_last_10": 114.30575136556908, "mean_abs_error_last_25": 159.76508484118102, "mean_abs_error_last_50": 297.1222966515034, "mean_pred_prob": 0.028881540102884174, "mean_pred_prob_last_10": 0.1639025840908289, "mean_pred_prob_last_25": 0.08579453900456428, "mean_pred_prob_last_50": 0.049524565879255536, "mean_token_accuracy": 0.8708237707614899, "step": 23880 }, { "epoch": 0.4246884610598546, "grad_norm": 2.099375618589153, "learning_rate": 0.0001, "loss": 0.7193, "mean_abs_error": 805.6085851717535, "mean_abs_error_last_10": 460.3427850383067, "mean_abs_error_last_25": 450.189760473853, "mean_abs_error_last_50": 559.5526481566494, "mean_pred_prob": 0.03510453149792738, "mean_pred_prob_last_10": 0.1522653609397821, "mean_pred_prob_last_25": 0.09063334201928228, "mean_pred_prob_last_50": 0.058639640983892606, "mean_token_accuracy": 0.8723773181438446, "step": 23890 }, { "epoch": 0.4248662293566565, "grad_norm": 0.8947580272430677, "learning_rate": 0.0001, "loss": 0.7404, "mean_abs_error": 544.5473847839077, "mean_abs_error_last_10": 240.0949572559785, "mean_abs_error_last_25": 291.2511207199011, "mean_abs_error_last_50": 364.4335700080885, "mean_pred_prob": 0.057093876483850184, "mean_pred_prob_last_10": 0.2537656063097529, "mean_pred_prob_last_25": 0.15130589791806415, "mean_pred_prob_last_50": 0.09475304160732775, "mean_token_accuracy": 0.8734489381313324, "step": 23900 }, { "epoch": 0.42504399765345846, "grad_norm": 2.456193340633778, "learning_rate": 0.0001, "loss": 0.8135, "mean_abs_error": 736.6927241741366, "mean_abs_error_last_10": 306.0202366053994, "mean_abs_error_last_25": 364.6632701445522, "mean_abs_error_last_50": 544.8485465898914, "mean_pred_prob": 0.030113717875792645, "mean_pred_prob_last_10": 0.1456079258758109, "mean_pred_prob_last_25": 0.08083544910768978, "mean_pred_prob_last_50": 0.050403318149619736, "mean_token_accuracy": 0.873255318403244, "step": 23910 }, { "epoch": 0.42522176595026046, "grad_norm": 2.1904901015995857, "learning_rate": 0.0001, "loss": 0.8432, "mean_abs_error": 532.118354607364, "mean_abs_error_last_10": 97.92976496028714, "mean_abs_error_last_25": 134.74029424335328, "mean_abs_error_last_50": 253.27313793208555, "mean_pred_prob": 0.03076866876799613, "mean_pred_prob_last_10": 0.16923578586429358, "mean_pred_prob_last_25": 0.08807075945660472, "mean_pred_prob_last_50": 0.05219860249198973, "mean_token_accuracy": 0.8779560267925263, "step": 23920 }, { "epoch": 0.4253995342470624, "grad_norm": 2.306858903839696, "learning_rate": 0.0001, "loss": 0.8345, "mean_abs_error": 564.6147312780404, "mean_abs_error_last_10": 177.71190830853845, "mean_abs_error_last_25": 282.7293093720578, "mean_abs_error_last_50": 376.5552671262714, "mean_pred_prob": 0.021729901223443447, "mean_pred_prob_last_10": 0.10621772259473801, "mean_pred_prob_last_25": 0.05742000425234437, "mean_pred_prob_last_50": 0.03592147445306182, "mean_token_accuracy": 0.8627377688884735, "step": 23930 }, { "epoch": 0.42557730254386433, "grad_norm": 2.028130252480514, "learning_rate": 0.0001, "loss": 0.7566, "mean_abs_error": 642.8281367886485, "mean_abs_error_last_10": 254.6077144127543, "mean_abs_error_last_25": 314.0373916207132, "mean_abs_error_last_50": 418.2468424957384, "mean_pred_prob": 0.025869779754430056, "mean_pred_prob_last_10": 0.13912710249423982, "mean_pred_prob_last_25": 0.07094317162409425, "mean_pred_prob_last_50": 0.043275655899196866, "mean_token_accuracy": 0.8770961940288544, "step": 23940 }, { "epoch": 0.42575507084066627, "grad_norm": 1.081660529228682, "learning_rate": 0.0001, "loss": 0.7357, "mean_abs_error": 312.2040834859588, "mean_abs_error_last_10": 175.8358702440882, "mean_abs_error_last_25": 165.7033792784676, "mean_abs_error_last_50": 206.82981526755447, "mean_pred_prob": 0.03485577467363328, "mean_pred_prob_last_10": 0.17860811352729797, "mean_pred_prob_last_25": 0.09402326475828886, "mean_pred_prob_last_50": 0.057235179003328085, "mean_token_accuracy": 0.8651530981063843, "step": 23950 }, { "epoch": 0.4259328391374682, "grad_norm": 0.9682803128657563, "learning_rate": 0.0001, "loss": 0.8532, "mean_abs_error": 446.42599768510775, "mean_abs_error_last_10": 155.45182968384773, "mean_abs_error_last_25": 178.07295613468744, "mean_abs_error_last_50": 293.71834972498925, "mean_pred_prob": 0.03848506650538184, "mean_pred_prob_last_10": 0.18532069402281195, "mean_pred_prob_last_25": 0.1058877975330688, "mean_pred_prob_last_50": 0.06441797621664591, "mean_token_accuracy": 0.8772720515727996, "step": 23960 }, { "epoch": 0.42611060743427015, "grad_norm": 1.4197316351107843, "learning_rate": 0.0001, "loss": 0.8981, "mean_abs_error": 337.77958941573723, "mean_abs_error_last_10": 70.21692874950999, "mean_abs_error_last_25": 118.40650061002205, "mean_abs_error_last_50": 193.10524736231778, "mean_pred_prob": 0.024623002856969833, "mean_pred_prob_last_10": 0.13921298198401927, "mean_pred_prob_last_25": 0.07038477761670947, "mean_pred_prob_last_50": 0.041944926232099534, "mean_token_accuracy": 0.8737363338470459, "step": 23970 }, { "epoch": 0.42628837573107214, "grad_norm": 1.7581707901219412, "learning_rate": 0.0001, "loss": 0.713, "mean_abs_error": 816.7644248899103, "mean_abs_error_last_10": 352.0822515690737, "mean_abs_error_last_25": 419.0078692129544, "mean_abs_error_last_50": 511.6437685609632, "mean_pred_prob": 0.025119070918299256, "mean_pred_prob_last_10": 0.11694582897471265, "mean_pred_prob_last_25": 0.06738642191630788, "mean_pred_prob_last_50": 0.04251567063038238, "mean_token_accuracy": 0.8760711312294006, "step": 23980 }, { "epoch": 0.4264661440278741, "grad_norm": 1.2740343919635386, "learning_rate": 0.0001, "loss": 0.8176, "mean_abs_error": 975.3323136152572, "mean_abs_error_last_10": 483.5947432576207, "mean_abs_error_last_25": 657.5798817580506, "mean_abs_error_last_50": 755.9703137808517, "mean_pred_prob": 0.027010361800785178, "mean_pred_prob_last_10": 0.14511502702953294, "mean_pred_prob_last_25": 0.07635258406226057, "mean_pred_prob_last_50": 0.0456496112732566, "mean_token_accuracy": 0.8725746393203735, "step": 23990 }, { "epoch": 0.426643912324676, "grad_norm": 1.6918289071441441, "learning_rate": 0.0001, "loss": 0.8531, "mean_abs_error": 348.62632345829854, "mean_abs_error_last_10": 45.45844738407548, "mean_abs_error_last_25": 102.47004398149784, "mean_abs_error_last_50": 187.51556559684252, "mean_pred_prob": 0.04181331489235163, "mean_pred_prob_last_10": 0.20290153808891773, "mean_pred_prob_last_25": 0.11601105947047471, "mean_pred_prob_last_50": 0.07113716527819633, "mean_token_accuracy": 0.858604621887207, "step": 24000 }, { "epoch": 0.42682168062147796, "grad_norm": 1.9811450558177701, "learning_rate": 0.0001, "loss": 0.7296, "mean_abs_error": 1578.8790571573377, "mean_abs_error_last_10": 904.1757701933711, "mean_abs_error_last_25": 1007.0413445094013, "mean_abs_error_last_50": 1225.9889479191002, "mean_pred_prob": 0.03610820952890208, "mean_pred_prob_last_10": 0.1814593857328873, "mean_pred_prob_last_25": 0.10256794208544306, "mean_pred_prob_last_50": 0.062295981237548405, "mean_token_accuracy": 0.8730903565883636, "step": 24010 }, { "epoch": 0.4269994489182799, "grad_norm": 1.2250460086859718, "learning_rate": 0.0001, "loss": 0.7828, "mean_abs_error": 809.661594854643, "mean_abs_error_last_10": 339.9570268154766, "mean_abs_error_last_25": 407.7036884003354, "mean_abs_error_last_50": 540.5513862252911, "mean_pred_prob": 0.030727758137800265, "mean_pred_prob_last_10": 0.1624213765928289, "mean_pred_prob_last_25": 0.08815933298319578, "mean_pred_prob_last_50": 0.05299569973140024, "mean_token_accuracy": 0.8735167264938355, "step": 24020 }, { "epoch": 0.42717721721508184, "grad_norm": 1.6384905537769483, "learning_rate": 0.0001, "loss": 0.845, "mean_abs_error": 687.8382904742915, "mean_abs_error_last_10": 290.1814910980577, "mean_abs_error_last_25": 430.0287271222792, "mean_abs_error_last_50": 568.6115972074842, "mean_pred_prob": 0.023804464284330605, "mean_pred_prob_last_10": 0.12681609932333232, "mean_pred_prob_last_25": 0.06888961871154606, "mean_pred_prob_last_50": 0.041123735392466186, "mean_token_accuracy": 0.8721681952476501, "step": 24030 }, { "epoch": 0.42735498551188383, "grad_norm": 2.699428438393188, "learning_rate": 0.0001, "loss": 0.7428, "mean_abs_error": 554.3943444831447, "mean_abs_error_last_10": 111.72687310258725, "mean_abs_error_last_25": 152.10222016130996, "mean_abs_error_last_50": 280.02362962120117, "mean_pred_prob": 0.045847588148899374, "mean_pred_prob_last_10": 0.21956144492141902, "mean_pred_prob_last_25": 0.1236676275730133, "mean_pred_prob_last_50": 0.07711946077179163, "mean_token_accuracy": 0.8710427522659302, "step": 24040 }, { "epoch": 0.4275327538086858, "grad_norm": 1.8686501145387038, "learning_rate": 0.0001, "loss": 0.7953, "mean_abs_error": 318.3801663409406, "mean_abs_error_last_10": 89.89378170870545, "mean_abs_error_last_25": 143.87865921573402, "mean_abs_error_last_50": 205.15881560145763, "mean_pred_prob": 0.04839124158024788, "mean_pred_prob_last_10": 0.22765508703887463, "mean_pred_prob_last_25": 0.1333277921192348, "mean_pred_prob_last_50": 0.0811326414346695, "mean_token_accuracy": 0.870760315656662, "step": 24050 }, { "epoch": 0.4277105221054877, "grad_norm": 2.072797415869432, "learning_rate": 0.0001, "loss": 0.8019, "mean_abs_error": 602.004993371608, "mean_abs_error_last_10": 403.9517462294428, "mean_abs_error_last_25": 385.29469413924755, "mean_abs_error_last_50": 388.64720513794884, "mean_pred_prob": 0.02033772973809391, "mean_pred_prob_last_10": 0.10562571212649345, "mean_pred_prob_last_25": 0.05732886795885861, "mean_pred_prob_last_50": 0.03482583885779604, "mean_token_accuracy": 0.8618895530700683, "step": 24060 }, { "epoch": 0.42788829040228965, "grad_norm": 1.2805082195371222, "learning_rate": 0.0001, "loss": 0.9284, "mean_abs_error": 799.7815253423543, "mean_abs_error_last_10": 279.9719504932997, "mean_abs_error_last_25": 467.04515356724613, "mean_abs_error_last_50": 590.683076218898, "mean_pred_prob": 0.01473284347448498, "mean_pred_prob_last_10": 0.08081127200275659, "mean_pred_prob_last_25": 0.04371102536097169, "mean_pred_prob_last_50": 0.026181019935756923, "mean_token_accuracy": 0.8656615912914276, "step": 24070 }, { "epoch": 0.4280660586990916, "grad_norm": 1.08534558695711, "learning_rate": 0.0001, "loss": 0.7798, "mean_abs_error": 867.838890136389, "mean_abs_error_last_10": 536.4661518941901, "mean_abs_error_last_25": 570.5169060747252, "mean_abs_error_last_50": 669.2197119777938, "mean_pred_prob": 0.03611318323528394, "mean_pred_prob_last_10": 0.19819497120042798, "mean_pred_prob_last_25": 0.10470478579809424, "mean_pred_prob_last_50": 0.06218921904219314, "mean_token_accuracy": 0.8646558165550232, "step": 24080 }, { "epoch": 0.42824382699589353, "grad_norm": 1.8437246221571055, "learning_rate": 0.0001, "loss": 0.7731, "mean_abs_error": 375.87358010584205, "mean_abs_error_last_10": 197.77836712362608, "mean_abs_error_last_25": 200.93836642041487, "mean_abs_error_last_50": 230.40374691149276, "mean_pred_prob": 0.039464104524813595, "mean_pred_prob_last_10": 0.1944012988358736, "mean_pred_prob_last_25": 0.10830120304599404, "mean_pred_prob_last_50": 0.06638108445331455, "mean_token_accuracy": 0.8770140111446381, "step": 24090 }, { "epoch": 0.4284215952926955, "grad_norm": 3.666273664701702, "learning_rate": 0.0001, "loss": 0.8515, "mean_abs_error": 1149.5968066213845, "mean_abs_error_last_10": 722.7723288264654, "mean_abs_error_last_25": 806.9981354784117, "mean_abs_error_last_50": 926.3130975341686, "mean_pred_prob": 0.02780191662313882, "mean_pred_prob_last_10": 0.13928552450815915, "mean_pred_prob_last_25": 0.07467326481128111, "mean_pred_prob_last_50": 0.045982195626129395, "mean_token_accuracy": 0.8702922105789185, "step": 24100 }, { "epoch": 0.42859936358949746, "grad_norm": 1.314906061025016, "learning_rate": 0.0001, "loss": 0.8296, "mean_abs_error": 297.761477343173, "mean_abs_error_last_10": 85.71228679231922, "mean_abs_error_last_25": 146.95161449816993, "mean_abs_error_last_50": 198.25561393053906, "mean_pred_prob": 0.03438908294774592, "mean_pred_prob_last_10": 0.17755376044660806, "mean_pred_prob_last_25": 0.09518668428063393, "mean_pred_prob_last_50": 0.05746396090835333, "mean_token_accuracy": 0.8712752163410187, "step": 24110 }, { "epoch": 0.4287771318862994, "grad_norm": 1.5592263105455317, "learning_rate": 0.0001, "loss": 0.6904, "mean_abs_error": 177.32112854439748, "mean_abs_error_last_10": 91.30667695685443, "mean_abs_error_last_25": 90.1432789389513, "mean_abs_error_last_50": 114.55491141337006, "mean_pred_prob": 0.04022782826796174, "mean_pred_prob_last_10": 0.19557978585362434, "mean_pred_prob_last_25": 0.10927471183240414, "mean_pred_prob_last_50": 0.06782069546170533, "mean_token_accuracy": 0.8868794739246368, "step": 24120 }, { "epoch": 0.42895490018310134, "grad_norm": 1.6136958770749468, "learning_rate": 0.0001, "loss": 0.8488, "mean_abs_error": 1326.927051930338, "mean_abs_error_last_10": 491.4468771634379, "mean_abs_error_last_25": 650.2758959046332, "mean_abs_error_last_50": 870.5407058429992, "mean_pred_prob": 0.0323363750561839, "mean_pred_prob_last_10": 0.1601670363510493, "mean_pred_prob_last_25": 0.0892238922562683, "mean_pred_prob_last_50": 0.05496870556671638, "mean_token_accuracy": 0.8749957025051117, "step": 24130 }, { "epoch": 0.4291326684799033, "grad_norm": 0.9520963278089031, "learning_rate": 0.0001, "loss": 0.8107, "mean_abs_error": 1844.7286638348476, "mean_abs_error_last_10": 1086.8850472923243, "mean_abs_error_last_25": 1174.8645036698063, "mean_abs_error_last_50": 1386.9001041055483, "mean_pred_prob": 0.018587005777226294, "mean_pred_prob_last_10": 0.0979997749265749, "mean_pred_prob_last_25": 0.05145513049210422, "mean_pred_prob_last_50": 0.030964203443727457, "mean_token_accuracy": 0.8659589886665344, "step": 24140 }, { "epoch": 0.4293104367767052, "grad_norm": 1.9447346863444817, "learning_rate": 0.0001, "loss": 0.8013, "mean_abs_error": 615.597567505945, "mean_abs_error_last_10": 145.08841710565008, "mean_abs_error_last_25": 280.9346732204825, "mean_abs_error_last_50": 403.6257233009975, "mean_pred_prob": 0.031587680085795, "mean_pred_prob_last_10": 0.169384554377757, "mean_pred_prob_last_25": 0.09068608331726864, "mean_pred_prob_last_50": 0.05410928185447119, "mean_token_accuracy": 0.8705695867538452, "step": 24150 }, { "epoch": 0.4294882050735072, "grad_norm": 1.4902918446742415, "learning_rate": 0.0001, "loss": 0.7389, "mean_abs_error": 828.1863930441417, "mean_abs_error_last_10": 343.8613071220449, "mean_abs_error_last_25": 393.78190741137854, "mean_abs_error_last_50": 491.589557240343, "mean_pred_prob": 0.03456893827533349, "mean_pred_prob_last_10": 0.15234038100461475, "mean_pred_prob_last_25": 0.09181612945976667, "mean_pred_prob_last_50": 0.05736018823226914, "mean_token_accuracy": 0.8671691536903381, "step": 24160 }, { "epoch": 0.42966597337030915, "grad_norm": 2.2365440018689378, "learning_rate": 0.0001, "loss": 0.7665, "mean_abs_error": 412.3893819861854, "mean_abs_error_last_10": 258.1565182742569, "mean_abs_error_last_25": 259.8378643324638, "mean_abs_error_last_50": 283.02104145874375, "mean_pred_prob": 0.03356793662533164, "mean_pred_prob_last_10": 0.17335920822806655, "mean_pred_prob_last_25": 0.09690026161260903, "mean_pred_prob_last_50": 0.05745221287943423, "mean_token_accuracy": 0.860177505016327, "step": 24170 }, { "epoch": 0.4298437416671111, "grad_norm": 2.5775823436324754, "learning_rate": 0.0001, "loss": 0.7514, "mean_abs_error": 918.607279223166, "mean_abs_error_last_10": 376.5713906189011, "mean_abs_error_last_25": 501.8929323017005, "mean_abs_error_last_50": 610.1982196133375, "mean_pred_prob": 0.02226593907689676, "mean_pred_prob_last_10": 0.1214277719031088, "mean_pred_prob_last_25": 0.06470020822016523, "mean_pred_prob_last_50": 0.038496312056668106, "mean_token_accuracy": 0.8685594856739044, "step": 24180 }, { "epoch": 0.43002150996391303, "grad_norm": 1.7506721795917455, "learning_rate": 0.0001, "loss": 0.7997, "mean_abs_error": 580.5598139177962, "mean_abs_error_last_10": 87.57675355343653, "mean_abs_error_last_25": 233.62358700465106, "mean_abs_error_last_50": 395.36053511987177, "mean_pred_prob": 0.040411689202301204, "mean_pred_prob_last_10": 0.1907478662673384, "mean_pred_prob_last_25": 0.10863956408575177, "mean_pred_prob_last_50": 0.06631629185285419, "mean_token_accuracy": 0.8684302151203156, "step": 24190 }, { "epoch": 0.43019927826071497, "grad_norm": 3.0054487590124066, "learning_rate": 0.0001, "loss": 0.8678, "mean_abs_error": 575.845423896937, "mean_abs_error_last_10": 123.3508758145651, "mean_abs_error_last_25": 161.3946794020839, "mean_abs_error_last_50": 266.34706318215547, "mean_pred_prob": 0.029597369022667408, "mean_pred_prob_last_10": 0.16668451223522424, "mean_pred_prob_last_25": 0.08565236041322351, "mean_pred_prob_last_50": 0.05020508654415608, "mean_token_accuracy": 0.8759596943855286, "step": 24200 }, { "epoch": 0.4303770465575169, "grad_norm": 1.1890692979371982, "learning_rate": 0.0001, "loss": 0.8244, "mean_abs_error": 389.3577526369946, "mean_abs_error_last_10": 101.88179835563136, "mean_abs_error_last_25": 143.86350944845466, "mean_abs_error_last_50": 216.06640794474583, "mean_pred_prob": 0.027106501488015057, "mean_pred_prob_last_10": 0.14103717198595406, "mean_pred_prob_last_25": 0.07543703634291887, "mean_pred_prob_last_50": 0.04578657876700163, "mean_token_accuracy": 0.86965012550354, "step": 24210 }, { "epoch": 0.4305548148543189, "grad_norm": 1.1359399878116387, "learning_rate": 0.0001, "loss": 0.8266, "mean_abs_error": 295.4944224672082, "mean_abs_error_last_10": 109.56794831544566, "mean_abs_error_last_25": 135.86672639772908, "mean_abs_error_last_50": 206.83903746963043, "mean_pred_prob": 0.03962610275484622, "mean_pred_prob_last_10": 0.19389947447925807, "mean_pred_prob_last_25": 0.11008934527635575, "mean_pred_prob_last_50": 0.06673658955842257, "mean_token_accuracy": 0.8717778325080872, "step": 24220 }, { "epoch": 0.43073258315112084, "grad_norm": 1.2453049435872583, "learning_rate": 0.0001, "loss": 0.7906, "mean_abs_error": 192.67609611428867, "mean_abs_error_last_10": 37.93206877574945, "mean_abs_error_last_25": 54.40307449952936, "mean_abs_error_last_50": 121.16909842357704, "mean_pred_prob": 0.058089503552764654, "mean_pred_prob_last_10": 0.25497030541300775, "mean_pred_prob_last_25": 0.1464350663125515, "mean_pred_prob_last_50": 0.09343230165541172, "mean_token_accuracy": 0.8712473750114441, "step": 24230 }, { "epoch": 0.4309103514479228, "grad_norm": 1.0128409722488176, "learning_rate": 0.0001, "loss": 0.8294, "mean_abs_error": 1430.083029741143, "mean_abs_error_last_10": 746.3373354038542, "mean_abs_error_last_25": 927.5442599429367, "mean_abs_error_last_50": 1071.80854458992, "mean_pred_prob": 0.012148748079198413, "mean_pred_prob_last_10": 0.06367997478519101, "mean_pred_prob_last_25": 0.033874239391298036, "mean_pred_prob_last_50": 0.02033111214695964, "mean_token_accuracy": 0.8749928832054138, "step": 24240 }, { "epoch": 0.4310881197447247, "grad_norm": 1.69913433329553, "learning_rate": 0.0001, "loss": 0.7967, "mean_abs_error": 995.922710205478, "mean_abs_error_last_10": 468.711231839584, "mean_abs_error_last_25": 564.7894752577571, "mean_abs_error_last_50": 692.4844039329682, "mean_pred_prob": 0.027385116426739842, "mean_pred_prob_last_10": 0.1321222126338398, "mean_pred_prob_last_25": 0.07478468333429192, "mean_pred_prob_last_50": 0.04502522239636164, "mean_token_accuracy": 0.8677034795284271, "step": 24250 }, { "epoch": 0.43126588804152666, "grad_norm": 1.7585614956519076, "learning_rate": 0.0001, "loss": 0.6842, "mean_abs_error": 163.8232649520653, "mean_abs_error_last_10": 128.1552517560154, "mean_abs_error_last_25": 118.84147887972222, "mean_abs_error_last_50": 110.44137191394994, "mean_pred_prob": 0.058103846735320985, "mean_pred_prob_last_10": 0.2652375053614378, "mean_pred_prob_last_25": 0.15799041660502552, "mean_pred_prob_last_50": 0.09722936982288957, "mean_token_accuracy": 0.8780400872230529, "step": 24260 }, { "epoch": 0.4314436563383286, "grad_norm": 3.37671036630292, "learning_rate": 0.0001, "loss": 0.7751, "mean_abs_error": 309.14082102044244, "mean_abs_error_last_10": 83.59805130908089, "mean_abs_error_last_25": 141.3946328050867, "mean_abs_error_last_50": 237.32275279806814, "mean_pred_prob": 0.03889270210638642, "mean_pred_prob_last_10": 0.2005625195801258, "mean_pred_prob_last_25": 0.11133758630603552, "mean_pred_prob_last_50": 0.06763981627300382, "mean_token_accuracy": 0.8759657800197601, "step": 24270 }, { "epoch": 0.4316214246351306, "grad_norm": 3.044277909297175, "learning_rate": 0.0001, "loss": 0.8069, "mean_abs_error": 1126.0778334953457, "mean_abs_error_last_10": 260.0700512840361, "mean_abs_error_last_25": 424.8574972732722, "mean_abs_error_last_50": 708.3078262192769, "mean_pred_prob": 0.02204654352972284, "mean_pred_prob_last_10": 0.11732190941693262, "mean_pred_prob_last_25": 0.060341140953823924, "mean_pred_prob_last_50": 0.036618738959077746, "mean_token_accuracy": 0.8723149061203003, "step": 24280 }, { "epoch": 0.43179919293193253, "grad_norm": 2.0762336738032463, "learning_rate": 0.0001, "loss": 0.7081, "mean_abs_error": 799.4945820136375, "mean_abs_error_last_10": 296.0734882844467, "mean_abs_error_last_25": 510.08755993310916, "mean_abs_error_last_50": 662.9019083812358, "mean_pred_prob": 0.03208683285629377, "mean_pred_prob_last_10": 0.16561668429058046, "mean_pred_prob_last_25": 0.08924227856332437, "mean_pred_prob_last_50": 0.054502433212473986, "mean_token_accuracy": 0.8708488881587982, "step": 24290 }, { "epoch": 0.43197696122873447, "grad_norm": 1.9635983169632165, "learning_rate": 0.0001, "loss": 0.7796, "mean_abs_error": 942.2403151843184, "mean_abs_error_last_10": 369.774884263279, "mean_abs_error_last_25": 444.6394795175253, "mean_abs_error_last_50": 636.0381588718517, "mean_pred_prob": 0.03845526337681804, "mean_pred_prob_last_10": 0.17625329520669766, "mean_pred_prob_last_25": 0.10338410229305736, "mean_pred_prob_last_50": 0.0641861233394593, "mean_token_accuracy": 0.8715436398983002, "step": 24300 }, { "epoch": 0.4321547295255364, "grad_norm": 1.4799385421119733, "learning_rate": 0.0001, "loss": 0.7606, "mean_abs_error": 298.49651202671663, "mean_abs_error_last_10": 66.63827647926156, "mean_abs_error_last_25": 138.35693815016748, "mean_abs_error_last_50": 230.77715138935386, "mean_pred_prob": 0.04758996292948723, "mean_pred_prob_last_10": 0.23474114742130042, "mean_pred_prob_last_25": 0.13175038974732162, "mean_pred_prob_last_50": 0.08035866031423211, "mean_token_accuracy": 0.8702318549156189, "step": 24310 }, { "epoch": 0.43233249782233835, "grad_norm": 1.6169622255981675, "learning_rate": 0.0001, "loss": 0.8837, "mean_abs_error": 219.64285813829105, "mean_abs_error_last_10": 74.40579279652522, "mean_abs_error_last_25": 108.36104002437212, "mean_abs_error_last_50": 160.3550690351987, "mean_pred_prob": 0.02952768886461854, "mean_pred_prob_last_10": 0.15630252212285994, "mean_pred_prob_last_25": 0.0862789573147893, "mean_pred_prob_last_50": 0.05094403000548482, "mean_token_accuracy": 0.8682738304138183, "step": 24320 }, { "epoch": 0.4325102661191403, "grad_norm": 1.1213867139188818, "learning_rate": 0.0001, "loss": 0.6297, "mean_abs_error": 87.67713815603977, "mean_abs_error_last_10": 23.96568805292449, "mean_abs_error_last_25": 34.5780513524177, "mean_abs_error_last_50": 61.894306223782266, "mean_pred_prob": 0.060683285119012, "mean_pred_prob_last_10": 0.26650087870657446, "mean_pred_prob_last_25": 0.15703832283616065, "mean_pred_prob_last_50": 0.09946959121152758, "mean_token_accuracy": 0.8904488921165467, "step": 24330 }, { "epoch": 0.4326880344159423, "grad_norm": 0.798776575250199, "learning_rate": 0.0001, "loss": 0.7332, "mean_abs_error": 280.8348074285305, "mean_abs_error_last_10": 68.54150540135602, "mean_abs_error_last_25": 90.75854360344665, "mean_abs_error_last_50": 169.68441205307067, "mean_pred_prob": 0.03552785287611186, "mean_pred_prob_last_10": 0.1776114733889699, "mean_pred_prob_last_25": 0.10083838868886233, "mean_pred_prob_last_50": 0.060463123861700295, "mean_token_accuracy": 0.8830227971076965, "step": 24340 }, { "epoch": 0.4328658027127442, "grad_norm": 2.292373182879875, "learning_rate": 0.0001, "loss": 0.6823, "mean_abs_error": 1547.4360284939924, "mean_abs_error_last_10": 1064.6482396199174, "mean_abs_error_last_25": 1092.0469755859403, "mean_abs_error_last_50": 1190.7556506928493, "mean_pred_prob": 0.03502365408930928, "mean_pred_prob_last_10": 0.1708446310614818, "mean_pred_prob_last_25": 0.09647671340644592, "mean_pred_prob_last_50": 0.058910061612550635, "mean_token_accuracy": 0.8731376230716705, "step": 24350 }, { "epoch": 0.43304357100954616, "grad_norm": 1.0609868322517522, "learning_rate": 0.0001, "loss": 0.7056, "mean_abs_error": 523.9063131128158, "mean_abs_error_last_10": 165.9785834879135, "mean_abs_error_last_25": 230.05083219834947, "mean_abs_error_last_50": 319.0858316495255, "mean_pred_prob": 0.0388091970293317, "mean_pred_prob_last_10": 0.1798584915464744, "mean_pred_prob_last_25": 0.10216276926221327, "mean_pred_prob_last_50": 0.0639393495337572, "mean_token_accuracy": 0.8719787359237671, "step": 24360 }, { "epoch": 0.4332213393063481, "grad_norm": 0.9722697958391704, "learning_rate": 0.0001, "loss": 0.7818, "mean_abs_error": 227.1595244762284, "mean_abs_error_last_10": 37.23699970321222, "mean_abs_error_last_25": 74.2577131911282, "mean_abs_error_last_50": 142.95580290524313, "mean_pred_prob": 0.03813149882480502, "mean_pred_prob_last_10": 0.1999405100941658, "mean_pred_prob_last_25": 0.10968126393854619, "mean_pred_prob_last_50": 0.06529775615781545, "mean_token_accuracy": 0.866493684053421, "step": 24370 }, { "epoch": 0.43339910760315004, "grad_norm": 1.3254133505885342, "learning_rate": 0.0001, "loss": 0.7393, "mean_abs_error": 547.6816981995481, "mean_abs_error_last_10": 154.85591053618023, "mean_abs_error_last_25": 178.5766494363162, "mean_abs_error_last_50": 307.38188476895175, "mean_pred_prob": 0.03495646923547611, "mean_pred_prob_last_10": 0.16690215214621276, "mean_pred_prob_last_25": 0.09628265615319834, "mean_pred_prob_last_50": 0.059539063356351105, "mean_token_accuracy": 0.8729160368442536, "step": 24380 }, { "epoch": 0.43357687589995203, "grad_norm": 1.2060455359056332, "learning_rate": 0.0001, "loss": 0.6848, "mean_abs_error": 569.1589073446446, "mean_abs_error_last_10": 171.44795109582626, "mean_abs_error_last_25": 218.86457947437253, "mean_abs_error_last_50": 351.92828976989165, "mean_pred_prob": 0.029041062825126574, "mean_pred_prob_last_10": 0.1589005974587053, "mean_pred_prob_last_25": 0.08278425192693248, "mean_pred_prob_last_50": 0.04919032329926267, "mean_token_accuracy": 0.8829955637454987, "step": 24390 }, { "epoch": 0.43375464419675397, "grad_norm": 1.8312884683343036, "learning_rate": 0.0001, "loss": 0.8464, "mean_abs_error": 460.8606955106591, "mean_abs_error_last_10": 149.75602454283438, "mean_abs_error_last_25": 259.35259616635375, "mean_abs_error_last_50": 375.1904743638266, "mean_pred_prob": 0.04744910584995523, "mean_pred_prob_last_10": 0.23656012159772216, "mean_pred_prob_last_25": 0.12465499995741994, "mean_pred_prob_last_50": 0.07764316701795906, "mean_token_accuracy": 0.87271289229393, "step": 24400 }, { "epoch": 0.4339324124935559, "grad_norm": 1.0597966229407743, "learning_rate": 0.0001, "loss": 0.8421, "mean_abs_error": 394.70118343962486, "mean_abs_error_last_10": 118.75083210614245, "mean_abs_error_last_25": 143.40632841048665, "mean_abs_error_last_50": 184.84539851947986, "mean_pred_prob": 0.038743268535472455, "mean_pred_prob_last_10": 0.19604718275368213, "mean_pred_prob_last_25": 0.11077022505924106, "mean_pred_prob_last_50": 0.06676819333806634, "mean_token_accuracy": 0.8690267384052277, "step": 24410 }, { "epoch": 0.43411018079035785, "grad_norm": 1.7014933266947097, "learning_rate": 0.0001, "loss": 0.8438, "mean_abs_error": 329.227879774948, "mean_abs_error_last_10": 124.26631337442896, "mean_abs_error_last_25": 155.7618664717232, "mean_abs_error_last_50": 205.23477259580508, "mean_pred_prob": 0.0399947109632194, "mean_pred_prob_last_10": 0.19506187904626132, "mean_pred_prob_last_25": 0.10868408968672157, "mean_pred_prob_last_50": 0.06723753199912608, "mean_token_accuracy": 0.8755825340747834, "step": 24420 }, { "epoch": 0.4342879490871598, "grad_norm": 2.5712650043007566, "learning_rate": 0.0001, "loss": 0.7756, "mean_abs_error": 609.74589357415, "mean_abs_error_last_10": 399.57793783471664, "mean_abs_error_last_25": 406.2890101711711, "mean_abs_error_last_50": 452.98847117813983, "mean_pred_prob": 0.03328323416644707, "mean_pred_prob_last_10": 0.1709968408336863, "mean_pred_prob_last_25": 0.0986475236946717, "mean_pred_prob_last_50": 0.0574862263048999, "mean_token_accuracy": 0.8715142667293548, "step": 24430 }, { "epoch": 0.4344657173839617, "grad_norm": 2.1986320026328103, "learning_rate": 0.0001, "loss": 0.8607, "mean_abs_error": 740.1287827778475, "mean_abs_error_last_10": 395.96629764202464, "mean_abs_error_last_25": 445.64133605403975, "mean_abs_error_last_50": 553.7464647094482, "mean_pred_prob": 0.028830068226670846, "mean_pred_prob_last_10": 0.14114844832802192, "mean_pred_prob_last_25": 0.07770739699481055, "mean_pred_prob_last_50": 0.0482005871279398, "mean_token_accuracy": 0.8693968892097473, "step": 24440 }, { "epoch": 0.4346434856807637, "grad_norm": 1.8507761370557145, "learning_rate": 0.0001, "loss": 0.7494, "mean_abs_error": 427.3111401760986, "mean_abs_error_last_10": 102.48452852350792, "mean_abs_error_last_25": 146.0796377446823, "mean_abs_error_last_50": 233.94776957830564, "mean_pred_prob": 0.02685510003939271, "mean_pred_prob_last_10": 0.143802822008729, "mean_pred_prob_last_25": 0.07750857416540384, "mean_pred_prob_last_50": 0.04614679310470819, "mean_token_accuracy": 0.8658148586750031, "step": 24450 }, { "epoch": 0.43482125397756566, "grad_norm": 2.991501523429242, "learning_rate": 0.0001, "loss": 0.6808, "mean_abs_error": 335.7343905892396, "mean_abs_error_last_10": 120.05811705485159, "mean_abs_error_last_25": 145.3450811116511, "mean_abs_error_last_50": 214.7958724005267, "mean_pred_prob": 0.04561341218650341, "mean_pred_prob_last_10": 0.203142149746418, "mean_pred_prob_last_25": 0.12092294916510582, "mean_pred_prob_last_50": 0.07546452321112156, "mean_token_accuracy": 0.8811684608459472, "step": 24460 }, { "epoch": 0.4349990222743676, "grad_norm": 1.994082296557397, "learning_rate": 0.0001, "loss": 0.8564, "mean_abs_error": 380.3321827449944, "mean_abs_error_last_10": 141.3621125874204, "mean_abs_error_last_25": 160.52312575258998, "mean_abs_error_last_50": 191.1344210835884, "mean_pred_prob": 0.054340016114292664, "mean_pred_prob_last_10": 0.24513501782203093, "mean_pred_prob_last_25": 0.1465921897557564, "mean_pred_prob_last_50": 0.09031042154529131, "mean_token_accuracy": 0.8699127793312073, "step": 24470 }, { "epoch": 0.43517679057116954, "grad_norm": 2.401551568083264, "learning_rate": 0.0001, "loss": 0.8995, "mean_abs_error": 1093.4995325043978, "mean_abs_error_last_10": 750.0504675945551, "mean_abs_error_last_25": 850.9283476761348, "mean_abs_error_last_50": 958.1225052454799, "mean_pred_prob": 0.04198986068513477, "mean_pred_prob_last_10": 0.21324390502413734, "mean_pred_prob_last_25": 0.11584954628051491, "mean_pred_prob_last_50": 0.07083435187159921, "mean_token_accuracy": 0.8708072364330292, "step": 24480 }, { "epoch": 0.4353545588679715, "grad_norm": 2.2788857290659266, "learning_rate": 0.0001, "loss": 0.7987, "mean_abs_error": 492.14091749447954, "mean_abs_error_last_10": 243.75941507025578, "mean_abs_error_last_25": 245.17218866035483, "mean_abs_error_last_50": 316.064978809425, "mean_pred_prob": 0.03025804616045207, "mean_pred_prob_last_10": 0.16717286817729474, "mean_pred_prob_last_25": 0.08876680936664343, "mean_pred_prob_last_50": 0.051745519135147335, "mean_token_accuracy": 0.888816100358963, "step": 24490 }, { "epoch": 0.4355323271647734, "grad_norm": 1.0184089020293223, "learning_rate": 0.0001, "loss": 0.765, "mean_abs_error": 840.7623344974397, "mean_abs_error_last_10": 376.2655752182682, "mean_abs_error_last_25": 447.951651726645, "mean_abs_error_last_50": 601.1431458797243, "mean_pred_prob": 0.04385897577740252, "mean_pred_prob_last_10": 0.21400646161346232, "mean_pred_prob_last_25": 0.1196606156474445, "mean_pred_prob_last_50": 0.07352835718775168, "mean_token_accuracy": 0.8712898015975952, "step": 24500 }, { "epoch": 0.4357100954615754, "grad_norm": 3.001110731133009, "learning_rate": 0.0001, "loss": 0.8385, "mean_abs_error": 485.505461479817, "mean_abs_error_last_10": 125.45283386735363, "mean_abs_error_last_25": 146.8389961255105, "mean_abs_error_last_50": 286.44788501036135, "mean_pred_prob": 0.041362149070482704, "mean_pred_prob_last_10": 0.20814727196702734, "mean_pred_prob_last_25": 0.11881720026722178, "mean_pred_prob_last_50": 0.0713258255389519, "mean_token_accuracy": 0.8642643094062805, "step": 24510 }, { "epoch": 0.43588786375837735, "grad_norm": 0.9319718498527142, "learning_rate": 0.0001, "loss": 0.8019, "mean_abs_error": 869.2989440371719, "mean_abs_error_last_10": 473.65330816992935, "mean_abs_error_last_25": 498.6631803439788, "mean_abs_error_last_50": 611.7361812276713, "mean_pred_prob": 0.03279007443343289, "mean_pred_prob_last_10": 0.15209679240651894, "mean_pred_prob_last_25": 0.08832580006856006, "mean_pred_prob_last_50": 0.05489964270964265, "mean_token_accuracy": 0.8706310808658599, "step": 24520 }, { "epoch": 0.4360656320551793, "grad_norm": 1.4651881129997169, "learning_rate": 0.0001, "loss": 0.7957, "mean_abs_error": 323.5282156631854, "mean_abs_error_last_10": 79.01088146929868, "mean_abs_error_last_25": 116.44343528653985, "mean_abs_error_last_50": 196.54864979022858, "mean_pred_prob": 0.04509450511541217, "mean_pred_prob_last_10": 0.216839172039181, "mean_pred_prob_last_25": 0.12270467339549214, "mean_pred_prob_last_50": 0.07549476486165077, "mean_token_accuracy": 0.8692014873027801, "step": 24530 }, { "epoch": 0.4362434003519812, "grad_norm": 2.5173967023480652, "learning_rate": 0.0001, "loss": 0.7901, "mean_abs_error": 467.60166002659014, "mean_abs_error_last_10": 120.73367729484221, "mean_abs_error_last_25": 248.55789401985922, "mean_abs_error_last_50": 359.6192211646658, "mean_pred_prob": 0.047332833369728176, "mean_pred_prob_last_10": 0.21259220184292643, "mean_pred_prob_last_25": 0.12524210206465797, "mean_pred_prob_last_50": 0.07802566204918548, "mean_token_accuracy": 0.8778691589832306, "step": 24540 }, { "epoch": 0.43642116864878316, "grad_norm": 1.782520219289854, "learning_rate": 0.0001, "loss": 0.8426, "mean_abs_error": 620.8288623031774, "mean_abs_error_last_10": 262.7457882932871, "mean_abs_error_last_25": 290.515566621497, "mean_abs_error_last_50": 437.8165178797145, "mean_pred_prob": 0.018276178638916462, "mean_pred_prob_last_10": 0.10002843409311027, "mean_pred_prob_last_25": 0.05316831221571192, "mean_pred_prob_last_50": 0.03123128038132563, "mean_token_accuracy": 0.8677375316619873, "step": 24550 }, { "epoch": 0.4365989369455851, "grad_norm": 1.0006371985073537, "learning_rate": 0.0001, "loss": 0.8151, "mean_abs_error": 164.51998108176576, "mean_abs_error_last_10": 74.20995670751138, "mean_abs_error_last_25": 94.19304059626293, "mean_abs_error_last_50": 126.15676173852981, "mean_pred_prob": 0.03884064140729606, "mean_pred_prob_last_10": 0.17678828425705434, "mean_pred_prob_last_25": 0.10187997631728649, "mean_pred_prob_last_50": 0.06348419059067964, "mean_token_accuracy": 0.8693343281745911, "step": 24560 }, { "epoch": 0.4367767052423871, "grad_norm": 1.050489323873987, "learning_rate": 0.0001, "loss": 0.7519, "mean_abs_error": 795.1229300536814, "mean_abs_error_last_10": 375.99421231464225, "mean_abs_error_last_25": 422.1058840700222, "mean_abs_error_last_50": 564.0999032921866, "mean_pred_prob": 0.029923400378902443, "mean_pred_prob_last_10": 0.13882839038851671, "mean_pred_prob_last_25": 0.07922900478006341, "mean_pred_prob_last_50": 0.04934452037268784, "mean_token_accuracy": 0.8657815515995025, "step": 24570 }, { "epoch": 0.43695447353918904, "grad_norm": 2.3265450973804205, "learning_rate": 0.0001, "loss": 0.7798, "mean_abs_error": 97.12228167369716, "mean_abs_error_last_10": 24.465172305093414, "mean_abs_error_last_25": 48.948395439398055, "mean_abs_error_last_50": 70.90909681558051, "mean_pred_prob": 0.06043595718219876, "mean_pred_prob_last_10": 0.29491555169224737, "mean_pred_prob_last_25": 0.16481233723461627, "mean_pred_prob_last_50": 0.10223914748057723, "mean_token_accuracy": 0.8790363371372223, "step": 24580 }, { "epoch": 0.437132241835991, "grad_norm": 2.9222397030266265, "learning_rate": 0.0001, "loss": 0.7814, "mean_abs_error": 351.63726473646176, "mean_abs_error_last_10": 47.47895579706502, "mean_abs_error_last_25": 85.47441618818746, "mean_abs_error_last_50": 166.79053299406732, "mean_pred_prob": 0.02832902721129358, "mean_pred_prob_last_10": 0.15214393958449363, "mean_pred_prob_last_25": 0.08409996926784516, "mean_pred_prob_last_50": 0.049308438505977395, "mean_token_accuracy": 0.8838298976421356, "step": 24590 }, { "epoch": 0.4373100101327929, "grad_norm": 1.2356879251061306, "learning_rate": 0.0001, "loss": 0.8019, "mean_abs_error": 316.9830872784972, "mean_abs_error_last_10": 165.8454102407121, "mean_abs_error_last_25": 164.5005021988025, "mean_abs_error_last_50": 204.89522417552453, "mean_pred_prob": 0.037852605991065505, "mean_pred_prob_last_10": 0.17465137857943774, "mean_pred_prob_last_25": 0.10375647060573101, "mean_pred_prob_last_50": 0.06448056111112237, "mean_token_accuracy": 0.874577921628952, "step": 24600 }, { "epoch": 0.43748777842959485, "grad_norm": 1.157965550411057, "learning_rate": 0.0001, "loss": 0.7747, "mean_abs_error": 147.0309895433848, "mean_abs_error_last_10": 57.90651032139853, "mean_abs_error_last_25": 72.8581079502307, "mean_abs_error_last_50": 97.07572619771288, "mean_pred_prob": 0.04550989712588489, "mean_pred_prob_last_10": 0.23361623026430606, "mean_pred_prob_last_25": 0.1264376886188984, "mean_pred_prob_last_50": 0.07746091969311238, "mean_token_accuracy": 0.870782446861267, "step": 24610 }, { "epoch": 0.4376655467263968, "grad_norm": 1.1977308048646726, "learning_rate": 0.0001, "loss": 0.6919, "mean_abs_error": 259.50870614038564, "mean_abs_error_last_10": 19.739258494079536, "mean_abs_error_last_25": 57.816841113210444, "mean_abs_error_last_50": 125.3855397176751, "mean_pred_prob": 0.04180570454336703, "mean_pred_prob_last_10": 0.21248369589447974, "mean_pred_prob_last_25": 0.11870371587574483, "mean_pred_prob_last_50": 0.07102494910359383, "mean_token_accuracy": 0.8702456891536713, "step": 24620 }, { "epoch": 0.4378433150231988, "grad_norm": 1.2055078775150259, "learning_rate": 0.0001, "loss": 0.7825, "mean_abs_error": 324.63219235046415, "mean_abs_error_last_10": 137.66918046875674, "mean_abs_error_last_25": 182.19492128811947, "mean_abs_error_last_50": 284.2491597588243, "mean_pred_prob": 0.038032881333492696, "mean_pred_prob_last_10": 0.18350213458761572, "mean_pred_prob_last_25": 0.10225175628438592, "mean_pred_prob_last_50": 0.06329284715466202, "mean_token_accuracy": 0.8693164348602295, "step": 24630 }, { "epoch": 0.4380210833200007, "grad_norm": 1.1532226969470276, "learning_rate": 0.0001, "loss": 0.6866, "mean_abs_error": 149.2972273994769, "mean_abs_error_last_10": 26.176368829291597, "mean_abs_error_last_25": 50.959982378573855, "mean_abs_error_last_50": 85.50921559209226, "mean_pred_prob": 0.05690958043560386, "mean_pred_prob_last_10": 0.27816074937582014, "mean_pred_prob_last_25": 0.1536364084109664, "mean_pred_prob_last_50": 0.09483012389391661, "mean_token_accuracy": 0.8699406266212464, "step": 24640 }, { "epoch": 0.43819885161680266, "grad_norm": 1.1730688414872126, "learning_rate": 0.0001, "loss": 0.7596, "mean_abs_error": 187.68195570057173, "mean_abs_error_last_10": 49.62197307122649, "mean_abs_error_last_25": 69.4454869864362, "mean_abs_error_last_50": 100.11533416131935, "mean_pred_prob": 0.03490868560038507, "mean_pred_prob_last_10": 0.18094415478408338, "mean_pred_prob_last_25": 0.09624274298548699, "mean_pred_prob_last_50": 0.058633662015199664, "mean_token_accuracy": 0.8707710206508636, "step": 24650 }, { "epoch": 0.4383766199136046, "grad_norm": 1.6678473066424373, "learning_rate": 0.0001, "loss": 0.8152, "mean_abs_error": 764.207153359883, "mean_abs_error_last_10": 474.678034873892, "mean_abs_error_last_25": 530.4487712933386, "mean_abs_error_last_50": 639.7036511613209, "mean_pred_prob": 0.023747390299104155, "mean_pred_prob_last_10": 0.12208611464593559, "mean_pred_prob_last_25": 0.0677257773349993, "mean_pred_prob_last_50": 0.03975913878530264, "mean_token_accuracy": 0.8634759962558747, "step": 24660 }, { "epoch": 0.43855438821040654, "grad_norm": 2.1124966207766844, "learning_rate": 0.0001, "loss": 0.6622, "mean_abs_error": 971.6448461133723, "mean_abs_error_last_10": 374.82296993649703, "mean_abs_error_last_25": 492.77055885503313, "mean_abs_error_last_50": 698.343800916971, "mean_pred_prob": 0.030419728349079377, "mean_pred_prob_last_10": 0.14734183682594448, "mean_pred_prob_last_25": 0.08182762696524151, "mean_pred_prob_last_50": 0.0513355746603338, "mean_token_accuracy": 0.8777786433696747, "step": 24670 }, { "epoch": 0.4387321565072085, "grad_norm": 1.568762983759965, "learning_rate": 0.0001, "loss": 0.7811, "mean_abs_error": 423.1799816809893, "mean_abs_error_last_10": 59.856116084171575, "mean_abs_error_last_25": 118.98803220113783, "mean_abs_error_last_50": 229.1667743721519, "mean_pred_prob": 0.028100566705688834, "mean_pred_prob_last_10": 0.1458479266613722, "mean_pred_prob_last_25": 0.07910898048430681, "mean_pred_prob_last_50": 0.047427271399646996, "mean_token_accuracy": 0.8709474921226501, "step": 24680 }, { "epoch": 0.4389099248040105, "grad_norm": 1.4443501641517875, "learning_rate": 0.0001, "loss": 0.8304, "mean_abs_error": 609.5760642930321, "mean_abs_error_last_10": 168.7686593490451, "mean_abs_error_last_25": 243.8880442388096, "mean_abs_error_last_50": 321.15530146773074, "mean_pred_prob": 0.028070510330144315, "mean_pred_prob_last_10": 0.14469813390169292, "mean_pred_prob_last_25": 0.07763332232134416, "mean_pred_prob_last_50": 0.047507130715530366, "mean_token_accuracy": 0.8697534620761871, "step": 24690 }, { "epoch": 0.4390876931008124, "grad_norm": 1.378794932415927, "learning_rate": 0.0001, "loss": 0.8243, "mean_abs_error": 973.9102740133858, "mean_abs_error_last_10": 496.5797118691272, "mean_abs_error_last_25": 525.8604266268426, "mean_abs_error_last_50": 672.1238122355196, "mean_pred_prob": 0.03787761273852084, "mean_pred_prob_last_10": 0.1844734433980193, "mean_pred_prob_last_25": 0.10764561950927601, "mean_pred_prob_last_50": 0.06492449821380433, "mean_token_accuracy": 0.8779324769973755, "step": 24700 }, { "epoch": 0.43926546139761435, "grad_norm": 1.0463801717148846, "learning_rate": 0.0001, "loss": 0.697, "mean_abs_error": 501.2613859259103, "mean_abs_error_last_10": 82.2483668549976, "mean_abs_error_last_25": 113.65472122140413, "mean_abs_error_last_50": 236.61499956492293, "mean_pred_prob": 0.027149089006707074, "mean_pred_prob_last_10": 0.14935692818835378, "mean_pred_prob_last_25": 0.07867216290906072, "mean_pred_prob_last_50": 0.04653461035341024, "mean_token_accuracy": 0.8818603813648224, "step": 24710 }, { "epoch": 0.4394432296944163, "grad_norm": 1.3016691189683203, "learning_rate": 0.0001, "loss": 0.8422, "mean_abs_error": 681.9029044252252, "mean_abs_error_last_10": 260.34783353115836, "mean_abs_error_last_25": 239.76850202791883, "mean_abs_error_last_50": 362.3270972098565, "mean_pred_prob": 0.0210540333064273, "mean_pred_prob_last_10": 0.11697359457612037, "mean_pred_prob_last_25": 0.06317312987521291, "mean_pred_prob_last_50": 0.03704279507510364, "mean_token_accuracy": 0.8724995732307435, "step": 24720 }, { "epoch": 0.43962099799121823, "grad_norm": 1.4305322158852554, "learning_rate": 0.0001, "loss": 0.7173, "mean_abs_error": 657.7623143465605, "mean_abs_error_last_10": 339.4750020197794, "mean_abs_error_last_25": 401.6084958009304, "mean_abs_error_last_50": 483.47680668841275, "mean_pred_prob": 0.051916205455199815, "mean_pred_prob_last_10": 0.2465318358037621, "mean_pred_prob_last_25": 0.1420109262515325, "mean_pred_prob_last_50": 0.08842651032900903, "mean_token_accuracy": 0.8812264800071716, "step": 24730 }, { "epoch": 0.43979876628802017, "grad_norm": 1.0778147660893864, "learning_rate": 0.0001, "loss": 0.7964, "mean_abs_error": 846.7179233016543, "mean_abs_error_last_10": 214.28779075847228, "mean_abs_error_last_25": 252.8706078972074, "mean_abs_error_last_50": 482.94304130346836, "mean_pred_prob": 0.029213067312957718, "mean_pred_prob_last_10": 0.14522185327950865, "mean_pred_prob_last_25": 0.08152471585199236, "mean_pred_prob_last_50": 0.0499194789736066, "mean_token_accuracy": 0.8719278216362, "step": 24740 }, { "epoch": 0.43997653458482217, "grad_norm": 2.7662107480260065, "learning_rate": 0.0001, "loss": 0.7412, "mean_abs_error": 1245.2763200826387, "mean_abs_error_last_10": 483.18904015792975, "mean_abs_error_last_25": 659.5711533284311, "mean_abs_error_last_50": 860.4668081786106, "mean_pred_prob": 0.028888460584857965, "mean_pred_prob_last_10": 0.14214458384958562, "mean_pred_prob_last_25": 0.07966717993549537, "mean_pred_prob_last_50": 0.04842133250785992, "mean_token_accuracy": 0.8778417825698852, "step": 24750 }, { "epoch": 0.4401543028816241, "grad_norm": 1.7556804634321794, "learning_rate": 0.0001, "loss": 0.7195, "mean_abs_error": 145.27878834588145, "mean_abs_error_last_10": 105.47621730259792, "mean_abs_error_last_25": 103.88799660857869, "mean_abs_error_last_50": 108.12859077401637, "mean_pred_prob": 0.0673807048238814, "mean_pred_prob_last_10": 0.3051944676786661, "mean_pred_prob_last_25": 0.17948705554008484, "mean_pred_prob_last_50": 0.11210512556135654, "mean_token_accuracy": 0.8681845605373383, "step": 24760 }, { "epoch": 0.44033207117842604, "grad_norm": 1.347962571211748, "learning_rate": 0.0001, "loss": 0.8126, "mean_abs_error": 1316.4441274373996, "mean_abs_error_last_10": 496.91885322323435, "mean_abs_error_last_25": 606.289577076557, "mean_abs_error_last_50": 826.5445278308798, "mean_pred_prob": 0.028147992210870144, "mean_pred_prob_last_10": 0.15269324399414472, "mean_pred_prob_last_25": 0.0814188248332357, "mean_pred_prob_last_50": 0.0491091973381117, "mean_token_accuracy": 0.8707551062107086, "step": 24770 }, { "epoch": 0.440509839475228, "grad_norm": 1.1990736372056363, "learning_rate": 0.0001, "loss": 0.7376, "mean_abs_error": 391.29568709257643, "mean_abs_error_last_10": 79.7527897798, "mean_abs_error_last_25": 135.65435452851995, "mean_abs_error_last_50": 261.7996102032264, "mean_pred_prob": 0.03434514490654692, "mean_pred_prob_last_10": 0.16044408833840862, "mean_pred_prob_last_25": 0.09548791863489896, "mean_pred_prob_last_50": 0.05809777129907161, "mean_token_accuracy": 0.8757867693901062, "step": 24780 }, { "epoch": 0.4406876077720299, "grad_norm": 2.8262454280066707, "learning_rate": 0.0001, "loss": 0.7534, "mean_abs_error": 786.8557088469472, "mean_abs_error_last_10": 192.5668442408558, "mean_abs_error_last_25": 246.01019093667483, "mean_abs_error_last_50": 397.3137304768748, "mean_pred_prob": 0.034242252045078206, "mean_pred_prob_last_10": 0.16754098767414688, "mean_pred_prob_last_25": 0.0955750718829222, "mean_pred_prob_last_50": 0.05896681309677661, "mean_token_accuracy": 0.8715869665145874, "step": 24790 }, { "epoch": 0.44086537606883186, "grad_norm": 1.5818161833308109, "learning_rate": 0.0001, "loss": 0.8043, "mean_abs_error": 967.5141700785986, "mean_abs_error_last_10": 534.2563489103968, "mean_abs_error_last_25": 561.6410817392672, "mean_abs_error_last_50": 667.0665553181134, "mean_pred_prob": 0.045548259004135616, "mean_pred_prob_last_10": 0.20664628217346034, "mean_pred_prob_last_25": 0.12368008289777208, "mean_pred_prob_last_50": 0.07598531853873283, "mean_token_accuracy": 0.8729816019535065, "step": 24800 }, { "epoch": 0.44104314436563385, "grad_norm": 1.705835646595248, "learning_rate": 0.0001, "loss": 0.7505, "mean_abs_error": 994.5297919323187, "mean_abs_error_last_10": 511.4551722157993, "mean_abs_error_last_25": 638.7820532595357, "mean_abs_error_last_50": 804.0212935043817, "mean_pred_prob": 0.03610105089610442, "mean_pred_prob_last_10": 0.17911738866823726, "mean_pred_prob_last_25": 0.10165008103649598, "mean_pred_prob_last_50": 0.06200780989602208, "mean_token_accuracy": 0.8732902765274048, "step": 24810 }, { "epoch": 0.4412209126624358, "grad_norm": 2.0355460563521675, "learning_rate": 0.0001, "loss": 0.8362, "mean_abs_error": 229.07915099441038, "mean_abs_error_last_10": 47.38468428686241, "mean_abs_error_last_25": 72.97649818275647, "mean_abs_error_last_50": 139.42771286197404, "mean_pred_prob": 0.040540046570822594, "mean_pred_prob_last_10": 0.2038043923676014, "mean_pred_prob_last_25": 0.11286364663392305, "mean_pred_prob_last_50": 0.06872914927080273, "mean_token_accuracy": 0.8637424230575561, "step": 24820 }, { "epoch": 0.44139868095923773, "grad_norm": 1.4903839431848538, "learning_rate": 0.0001, "loss": 0.7334, "mean_abs_error": 690.517740546895, "mean_abs_error_last_10": 310.7103938850666, "mean_abs_error_last_25": 391.3393693224383, "mean_abs_error_last_50": 506.5294846563937, "mean_pred_prob": 0.023709245637292042, "mean_pred_prob_last_10": 0.12271790843224153, "mean_pred_prob_last_25": 0.06527786081423984, "mean_pred_prob_last_50": 0.03999605691060424, "mean_token_accuracy": 0.8735016763210297, "step": 24830 }, { "epoch": 0.44157644925603967, "grad_norm": 1.9446495295168535, "learning_rate": 0.0001, "loss": 0.6843, "mean_abs_error": 488.26940810565185, "mean_abs_error_last_10": 111.14574154148197, "mean_abs_error_last_25": 168.34582955718076, "mean_abs_error_last_50": 269.16174601149925, "mean_pred_prob": 0.051734638056950645, "mean_pred_prob_last_10": 0.22684443491743878, "mean_pred_prob_last_25": 0.1365312007139437, "mean_pred_prob_last_50": 0.08538190929684789, "mean_token_accuracy": 0.8761994779109955, "step": 24840 }, { "epoch": 0.4417542175528416, "grad_norm": 1.4486107527454195, "learning_rate": 0.0001, "loss": 0.7862, "mean_abs_error": 1761.0278188505242, "mean_abs_error_last_10": 878.1070362231849, "mean_abs_error_last_25": 967.059499156341, "mean_abs_error_last_50": 1238.8557338131884, "mean_pred_prob": 0.033185947711172045, "mean_pred_prob_last_10": 0.16627876357815694, "mean_pred_prob_last_25": 0.09252361702383496, "mean_pred_prob_last_50": 0.05611077121429844, "mean_token_accuracy": 0.8655652642250061, "step": 24850 }, { "epoch": 0.44193198584964355, "grad_norm": 1.452132317571474, "learning_rate": 0.0001, "loss": 0.8549, "mean_abs_error": 318.37981493214227, "mean_abs_error_last_10": 155.37401553964966, "mean_abs_error_last_25": 164.74151331022495, "mean_abs_error_last_50": 214.00660734646718, "mean_pred_prob": 0.03300456218421459, "mean_pred_prob_last_10": 0.17450275998562575, "mean_pred_prob_last_25": 0.09325620280578732, "mean_pred_prob_last_50": 0.05543652004562318, "mean_token_accuracy": 0.8806252419948578, "step": 24860 }, { "epoch": 0.44210975414644554, "grad_norm": 0.8519670036565123, "learning_rate": 0.0001, "loss": 0.7829, "mean_abs_error": 288.8249113470322, "mean_abs_error_last_10": 98.43450440098528, "mean_abs_error_last_25": 126.43585343212365, "mean_abs_error_last_50": 195.5648140296675, "mean_pred_prob": 0.03814577932935208, "mean_pred_prob_last_10": 0.1862957626581192, "mean_pred_prob_last_25": 0.10559994587674737, "mean_pred_prob_last_50": 0.06448029414750636, "mean_token_accuracy": 0.8831174552440644, "step": 24870 }, { "epoch": 0.4422875224432475, "grad_norm": 2.6841265730851607, "learning_rate": 0.0001, "loss": 0.7157, "mean_abs_error": 613.8753099673005, "mean_abs_error_last_10": 320.93270179758815, "mean_abs_error_last_25": 370.09114750149996, "mean_abs_error_last_50": 457.7757472171899, "mean_pred_prob": 0.052372167020803315, "mean_pred_prob_last_10": 0.2628580474061891, "mean_pred_prob_last_25": 0.14475290618720466, "mean_pred_prob_last_50": 0.08822629861242604, "mean_token_accuracy": 0.8827349722385407, "step": 24880 }, { "epoch": 0.4424652907400494, "grad_norm": 1.8619182730951047, "learning_rate": 0.0001, "loss": 0.779, "mean_abs_error": 98.9031927940907, "mean_abs_error_last_10": 12.534116946922278, "mean_abs_error_last_25": 29.833454542953262, "mean_abs_error_last_50": 53.964027116687895, "mean_pred_prob": 0.04839974893257022, "mean_pred_prob_last_10": 0.23366137370467185, "mean_pred_prob_last_25": 0.1326925378292799, "mean_pred_prob_last_50": 0.0817509263753891, "mean_token_accuracy": 0.8683359980583191, "step": 24890 }, { "epoch": 0.44264305903685136, "grad_norm": 3.57004579677706, "learning_rate": 0.0001, "loss": 0.8144, "mean_abs_error": 402.42800046955495, "mean_abs_error_last_10": 196.68862685042416, "mean_abs_error_last_25": 190.2289048393454, "mean_abs_error_last_50": 251.31031971895518, "mean_pred_prob": 0.046327380259754136, "mean_pred_prob_last_10": 0.21783001967705787, "mean_pred_prob_last_25": 0.12577066532103345, "mean_pred_prob_last_50": 0.07762052807956935, "mean_token_accuracy": 0.8773411333560943, "step": 24900 }, { "epoch": 0.4428208273336533, "grad_norm": 2.229489402727557, "learning_rate": 0.0001, "loss": 0.6954, "mean_abs_error": 228.57176794258734, "mean_abs_error_last_10": 31.695334476518205, "mean_abs_error_last_25": 87.93926680916464, "mean_abs_error_last_50": 111.68081705518948, "mean_pred_prob": 0.04947487153112888, "mean_pred_prob_last_10": 0.22923934683203698, "mean_pred_prob_last_25": 0.13023822251707315, "mean_pred_prob_last_50": 0.08246099036186934, "mean_token_accuracy": 0.8780246913433075, "step": 24910 }, { "epoch": 0.44299859563045524, "grad_norm": 1.8465860181312737, "learning_rate": 0.0001, "loss": 0.8077, "mean_abs_error": 292.63366032844664, "mean_abs_error_last_10": 145.57278689678606, "mean_abs_error_last_25": 181.81707832967646, "mean_abs_error_last_50": 198.20549854428677, "mean_pred_prob": 0.048646916868165135, "mean_pred_prob_last_10": 0.24392439536750316, "mean_pred_prob_last_25": 0.1352601457387209, "mean_pred_prob_last_50": 0.08239840702153742, "mean_token_accuracy": 0.8712671756744385, "step": 24920 }, { "epoch": 0.44317636392725723, "grad_norm": 1.6067120943229094, "learning_rate": 0.0001, "loss": 0.8064, "mean_abs_error": 1037.6553597167117, "mean_abs_error_last_10": 477.17866682901905, "mean_abs_error_last_25": 530.7539653837667, "mean_abs_error_last_50": 651.8398473995921, "mean_pred_prob": 0.028094734193291514, "mean_pred_prob_last_10": 0.12006434233044275, "mean_pred_prob_last_25": 0.07236169421812519, "mean_pred_prob_last_50": 0.045336570485960695, "mean_token_accuracy": 0.8723389446735382, "step": 24930 }, { "epoch": 0.44335413222405917, "grad_norm": 1.2930073381125233, "learning_rate": 0.0001, "loss": 0.6831, "mean_abs_error": 861.5447560984369, "mean_abs_error_last_10": 345.3376059354045, "mean_abs_error_last_25": 421.9096362880881, "mean_abs_error_last_50": 560.2643170998272, "mean_pred_prob": 0.05016532419831492, "mean_pred_prob_last_10": 0.23011467990581877, "mean_pred_prob_last_25": 0.13147232078481466, "mean_pred_prob_last_50": 0.08276439971814398, "mean_token_accuracy": 0.8676537096500396, "step": 24940 }, { "epoch": 0.4435319005208611, "grad_norm": 1.5414146088880976, "learning_rate": 0.0001, "loss": 0.8798, "mean_abs_error": 816.9501755173337, "mean_abs_error_last_10": 400.7775694160233, "mean_abs_error_last_25": 499.6575704168625, "mean_abs_error_last_50": 619.5048529695102, "mean_pred_prob": 0.04104025223350618, "mean_pred_prob_last_10": 0.18873941607307643, "mean_pred_prob_last_25": 0.10890097818337381, "mean_pred_prob_last_50": 0.06786939594021532, "mean_token_accuracy": 0.8711057305335999, "step": 24950 }, { "epoch": 0.44370966881766305, "grad_norm": 2.467751040687679, "learning_rate": 0.0001, "loss": 0.9558, "mean_abs_error": 289.2467218157007, "mean_abs_error_last_10": 83.75238832538405, "mean_abs_error_last_25": 89.74675063020234, "mean_abs_error_last_50": 199.20681928016492, "mean_pred_prob": 0.04957665530964732, "mean_pred_prob_last_10": 0.2510302674025297, "mean_pred_prob_last_25": 0.1441839661449194, "mean_pred_prob_last_50": 0.0854857163503766, "mean_token_accuracy": 0.8724304378032685, "step": 24960 }, { "epoch": 0.443887437114465, "grad_norm": 1.0048435800791538, "learning_rate": 0.0001, "loss": 0.7719, "mean_abs_error": 590.7565111099907, "mean_abs_error_last_10": 169.7735997264411, "mean_abs_error_last_25": 201.54008070940702, "mean_abs_error_last_50": 340.634482615643, "mean_pred_prob": 0.04276110628270544, "mean_pred_prob_last_10": 0.20444190287962555, "mean_pred_prob_last_25": 0.12054522960679606, "mean_pred_prob_last_50": 0.07298910167301073, "mean_token_accuracy": 0.8695617914199829, "step": 24970 }, { "epoch": 0.44406520541126693, "grad_norm": 1.7866334228616843, "learning_rate": 0.0001, "loss": 0.7931, "mean_abs_error": 1276.525488998947, "mean_abs_error_last_10": 780.1519300661756, "mean_abs_error_last_25": 821.4118903832347, "mean_abs_error_last_50": 942.0323561073626, "mean_pred_prob": 0.04832608131691814, "mean_pred_prob_last_10": 0.22011379515897717, "mean_pred_prob_last_25": 0.12911327130714198, "mean_pred_prob_last_50": 0.08012608716453543, "mean_token_accuracy": 0.8697487711906433, "step": 24980 }, { "epoch": 0.4442429737080689, "grad_norm": 2.2127456080081522, "learning_rate": 0.0001, "loss": 0.9274, "mean_abs_error": 419.95685421973224, "mean_abs_error_last_10": 152.54457591457583, "mean_abs_error_last_25": 189.25588092199436, "mean_abs_error_last_50": 258.2112853920158, "mean_pred_prob": 0.040612079296261074, "mean_pred_prob_last_10": 0.21148397624492646, "mean_pred_prob_last_25": 0.11442949343472719, "mean_pred_prob_last_50": 0.06955579933710396, "mean_token_accuracy": 0.8727945566177369, "step": 24990 }, { "epoch": 0.44442074200487086, "grad_norm": 1.0696778337810553, "learning_rate": 0.0001, "loss": 0.8246, "mean_abs_error": 1321.6470326585375, "mean_abs_error_last_10": 486.415062402073, "mean_abs_error_last_25": 585.8863349493232, "mean_abs_error_last_50": 807.6252831842875, "mean_pred_prob": 0.02218158553587273, "mean_pred_prob_last_10": 0.11538783079013229, "mean_pred_prob_last_25": 0.06452871659421362, "mean_pred_prob_last_50": 0.03830780395073816, "mean_token_accuracy": 0.8707023382186889, "step": 25000 }, { "epoch": 0.4445985103016728, "grad_norm": 1.536248031606648, "learning_rate": 0.0001, "loss": 0.6809, "mean_abs_error": 303.57070643495933, "mean_abs_error_last_10": 63.03524530727755, "mean_abs_error_last_25": 90.2764935845171, "mean_abs_error_last_50": 158.42974769628765, "mean_pred_prob": 0.036822191625833514, "mean_pred_prob_last_10": 0.2059483051300049, "mean_pred_prob_last_25": 0.10714491624385118, "mean_pred_prob_last_50": 0.06321483757346869, "mean_token_accuracy": 0.8748287677764892, "step": 25010 }, { "epoch": 0.44477627859847474, "grad_norm": 1.158817756718246, "learning_rate": 0.0001, "loss": 0.7492, "mean_abs_error": 283.8438929415858, "mean_abs_error_last_10": 83.01703122086587, "mean_abs_error_last_25": 116.09806158745512, "mean_abs_error_last_50": 177.5609031236293, "mean_pred_prob": 0.0416861824458465, "mean_pred_prob_last_10": 0.198615138605237, "mean_pred_prob_last_25": 0.11527504390105606, "mean_pred_prob_last_50": 0.0707038490101695, "mean_token_accuracy": 0.880205100774765, "step": 25020 }, { "epoch": 0.4449540468952767, "grad_norm": 1.9940853231043847, "learning_rate": 0.0001, "loss": 0.8152, "mean_abs_error": 261.895277571855, "mean_abs_error_last_10": 114.34650549203738, "mean_abs_error_last_25": 138.66567631354116, "mean_abs_error_last_50": 198.71885429001637, "mean_pred_prob": 0.03810483873821795, "mean_pred_prob_last_10": 0.19479708559811115, "mean_pred_prob_last_25": 0.10674955900758505, "mean_pred_prob_last_50": 0.06411000406369567, "mean_token_accuracy": 0.8727831542491913, "step": 25030 }, { "epoch": 0.4451318151920786, "grad_norm": 1.2999149785031925, "learning_rate": 0.0001, "loss": 0.7439, "mean_abs_error": 815.1806189622016, "mean_abs_error_last_10": 184.76742209328853, "mean_abs_error_last_25": 285.0841557534949, "mean_abs_error_last_50": 465.52132672382174, "mean_pred_prob": 0.040651467483257875, "mean_pred_prob_last_10": 0.19925076173385606, "mean_pred_prob_last_25": 0.11234559318982065, "mean_pred_prob_last_50": 0.06866737643722445, "mean_token_accuracy": 0.8781587839126587, "step": 25040 }, { "epoch": 0.4453095834888806, "grad_norm": 0.9036707511914409, "learning_rate": 0.0001, "loss": 0.6515, "mean_abs_error": 228.26731421148097, "mean_abs_error_last_10": 56.92941132471358, "mean_abs_error_last_25": 83.64605023310715, "mean_abs_error_last_50": 138.84050254286564, "mean_pred_prob": 0.04265695679932833, "mean_pred_prob_last_10": 0.18757064677774907, "mean_pred_prob_last_25": 0.10892506670206785, "mean_pred_prob_last_50": 0.06916234754025936, "mean_token_accuracy": 0.8779588282108307, "step": 25050 }, { "epoch": 0.44548735178568255, "grad_norm": 1.6263788887420092, "learning_rate": 0.0001, "loss": 0.8621, "mean_abs_error": 1026.3573899966398, "mean_abs_error_last_10": 615.5287341453987, "mean_abs_error_last_25": 652.2376833108149, "mean_abs_error_last_50": 795.2269242220232, "mean_pred_prob": 0.031535656929190733, "mean_pred_prob_last_10": 0.17536901732819388, "mean_pred_prob_last_25": 0.09390307811700041, "mean_pred_prob_last_50": 0.05477735901076812, "mean_token_accuracy": 0.8787413537502289, "step": 25060 }, { "epoch": 0.4456651200824845, "grad_norm": 1.7359608617985893, "learning_rate": 0.0001, "loss": 0.785, "mean_abs_error": 1312.6912096204678, "mean_abs_error_last_10": 827.3439784471402, "mean_abs_error_last_25": 912.6187768137279, "mean_abs_error_last_50": 1060.499854939851, "mean_pred_prob": 0.036092138468666234, "mean_pred_prob_last_10": 0.1723632168592303, "mean_pred_prob_last_25": 0.09690374626952689, "mean_pred_prob_last_50": 0.05944997479673475, "mean_token_accuracy": 0.8692715287208557, "step": 25070 }, { "epoch": 0.44584288837928643, "grad_norm": 1.876668187976231, "learning_rate": 0.0001, "loss": 0.7444, "mean_abs_error": 1203.6543270513425, "mean_abs_error_last_10": 462.50801395433336, "mean_abs_error_last_25": 628.222030367499, "mean_abs_error_last_50": 764.1672722881154, "mean_pred_prob": 0.02067612345854286, "mean_pred_prob_last_10": 0.1066661326913163, "mean_pred_prob_last_25": 0.06089071262395009, "mean_pred_prob_last_50": 0.03542303730500862, "mean_token_accuracy": 0.8678562819957734, "step": 25080 }, { "epoch": 0.44602065667608837, "grad_norm": 1.6118455505486822, "learning_rate": 0.0001, "loss": 0.6796, "mean_abs_error": 209.05714366290894, "mean_abs_error_last_10": 83.77237389184918, "mean_abs_error_last_25": 110.02007179171599, "mean_abs_error_last_50": 143.48835490464143, "mean_pred_prob": 0.041081803198903796, "mean_pred_prob_last_10": 0.20581078715622425, "mean_pred_prob_last_25": 0.11702045127749443, "mean_pred_prob_last_50": 0.0707577264867723, "mean_token_accuracy": 0.8765123903751373, "step": 25090 }, { "epoch": 0.44619842497289036, "grad_norm": 1.9301002239794547, "learning_rate": 0.0001, "loss": 0.7449, "mean_abs_error": 288.19861283921466, "mean_abs_error_last_10": 34.69426529458907, "mean_abs_error_last_25": 58.96128908704382, "mean_abs_error_last_50": 118.07692172205384, "mean_pred_prob": 0.05588006768375635, "mean_pred_prob_last_10": 0.24985675625503062, "mean_pred_prob_last_25": 0.15208316445350648, "mean_pred_prob_last_50": 0.09516863534227013, "mean_token_accuracy": 0.8709718763828278, "step": 25100 }, { "epoch": 0.4463761932696923, "grad_norm": 2.472470693327289, "learning_rate": 0.0001, "loss": 0.8203, "mean_abs_error": 1158.873997965849, "mean_abs_error_last_10": 541.2443191641825, "mean_abs_error_last_25": 620.1370360254448, "mean_abs_error_last_50": 795.0053083412366, "mean_pred_prob": 0.04060911198612303, "mean_pred_prob_last_10": 0.18259312584414147, "mean_pred_prob_last_25": 0.10615840641548857, "mean_pred_prob_last_50": 0.06667554387822747, "mean_token_accuracy": 0.8785027742385865, "step": 25110 }, { "epoch": 0.44655396156649424, "grad_norm": 1.364095186852675, "learning_rate": 0.0001, "loss": 0.729, "mean_abs_error": 117.77923164027479, "mean_abs_error_last_10": 28.262006889179666, "mean_abs_error_last_25": 43.47778709875415, "mean_abs_error_last_50": 68.60800435807079, "mean_pred_prob": 0.042353205382823944, "mean_pred_prob_last_10": 0.22771501280367373, "mean_pred_prob_last_25": 0.12425308115780354, "mean_pred_prob_last_50": 0.07323181126266717, "mean_token_accuracy": 0.8733776807785034, "step": 25120 }, { "epoch": 0.4467317298632962, "grad_norm": 2.6365603846791146, "learning_rate": 0.0001, "loss": 0.7919, "mean_abs_error": 285.40992123917397, "mean_abs_error_last_10": 171.914414806861, "mean_abs_error_last_25": 150.20380698348123, "mean_abs_error_last_50": 196.65324292972565, "mean_pred_prob": 0.04160765493288636, "mean_pred_prob_last_10": 0.19805174134671688, "mean_pred_prob_last_25": 0.11408956330269575, "mean_pred_prob_last_50": 0.07048673839308321, "mean_token_accuracy": 0.8702625811100007, "step": 25130 }, { "epoch": 0.4469094981600981, "grad_norm": 1.8869590848131281, "learning_rate": 0.0001, "loss": 0.7628, "mean_abs_error": 391.49021764482967, "mean_abs_error_last_10": 166.57820418258575, "mean_abs_error_last_25": 177.16886479161036, "mean_abs_error_last_50": 212.93117266213198, "mean_pred_prob": 0.025836818758398293, "mean_pred_prob_last_10": 0.1309467988088727, "mean_pred_prob_last_25": 0.07233041478320956, "mean_pred_prob_last_50": 0.04369858140125871, "mean_token_accuracy": 0.8768525838851928, "step": 25140 }, { "epoch": 0.44708726645690006, "grad_norm": 1.183198664960285, "learning_rate": 0.0001, "loss": 0.72, "mean_abs_error": 234.57407570574574, "mean_abs_error_last_10": 71.3143264413675, "mean_abs_error_last_25": 111.71907721519524, "mean_abs_error_last_50": 166.98045724825437, "mean_pred_prob": 0.0319390628952533, "mean_pred_prob_last_10": 0.17201018519699574, "mean_pred_prob_last_25": 0.08987970892339944, "mean_pred_prob_last_50": 0.05396935734897852, "mean_token_accuracy": 0.8734710514545441, "step": 25150 }, { "epoch": 0.44726503475370205, "grad_norm": 1.7641699881224078, "learning_rate": 0.0001, "loss": 0.7517, "mean_abs_error": 414.64321766054246, "mean_abs_error_last_10": 160.30380554132267, "mean_abs_error_last_25": 188.922242979754, "mean_abs_error_last_50": 244.40681996461262, "mean_pred_prob": 0.03573516042088158, "mean_pred_prob_last_10": 0.16058103734394535, "mean_pred_prob_last_25": 0.09388323647435755, "mean_pred_prob_last_50": 0.058773092192132025, "mean_token_accuracy": 0.8637688517570495, "step": 25160 }, { "epoch": 0.447442803050504, "grad_norm": 1.3591767793642062, "learning_rate": 0.0001, "loss": 0.7454, "mean_abs_error": 212.88521141463457, "mean_abs_error_last_10": 84.20149706458099, "mean_abs_error_last_25": 123.17979134670108, "mean_abs_error_last_50": 153.94135827549044, "mean_pred_prob": 0.04208442931994796, "mean_pred_prob_last_10": 0.21426290944218634, "mean_pred_prob_last_25": 0.11617109719663858, "mean_pred_prob_last_50": 0.06948224548250437, "mean_token_accuracy": 0.8700044810771942, "step": 25170 }, { "epoch": 0.44762057134730593, "grad_norm": 1.14511481013555, "learning_rate": 0.0001, "loss": 0.7588, "mean_abs_error": 1087.8496804100832, "mean_abs_error_last_10": 508.48648320981346, "mean_abs_error_last_25": 550.6808118833883, "mean_abs_error_last_50": 728.8183167578607, "mean_pred_prob": 0.03418575219984632, "mean_pred_prob_last_10": 0.16818372679990717, "mean_pred_prob_last_25": 0.09590596412890591, "mean_pred_prob_last_50": 0.05775526776560582, "mean_token_accuracy": 0.8709015905857086, "step": 25180 }, { "epoch": 0.44779833964410787, "grad_norm": 1.1246158479280584, "learning_rate": 0.0001, "loss": 0.9209, "mean_abs_error": 671.6103102861905, "mean_abs_error_last_10": 125.09240007001782, "mean_abs_error_last_25": 176.17590274086055, "mean_abs_error_last_50": 342.1589350114544, "mean_pred_prob": 0.036615834577241914, "mean_pred_prob_last_10": 0.18512205535080284, "mean_pred_prob_last_25": 0.10275055015226826, "mean_pred_prob_last_50": 0.062229531747289, "mean_token_accuracy": 0.8651061952114105, "step": 25190 }, { "epoch": 0.4479761079409098, "grad_norm": 2.3108727650115557, "learning_rate": 0.0001, "loss": 0.7844, "mean_abs_error": 213.84928949257187, "mean_abs_error_last_10": 91.15323571912835, "mean_abs_error_last_25": 92.77171605090746, "mean_abs_error_last_50": 120.97964195793948, "mean_pred_prob": 0.037462909473106265, "mean_pred_prob_last_10": 0.19471207931637763, "mean_pred_prob_last_25": 0.10484179146587849, "mean_pred_prob_last_50": 0.06273059360682964, "mean_token_accuracy": 0.8732879519462585, "step": 25200 }, { "epoch": 0.44815387623771175, "grad_norm": 0.9891442307583974, "learning_rate": 0.0001, "loss": 0.79, "mean_abs_error": 334.5076253929682, "mean_abs_error_last_10": 79.65709771128596, "mean_abs_error_last_25": 119.19426998166982, "mean_abs_error_last_50": 210.0684109339658, "mean_pred_prob": 0.034703594725579025, "mean_pred_prob_last_10": 0.18931678794324397, "mean_pred_prob_last_25": 0.1023320073261857, "mean_pred_prob_last_50": 0.06037094565108418, "mean_token_accuracy": 0.8734497129917145, "step": 25210 }, { "epoch": 0.44833164453451374, "grad_norm": 2.833266556191575, "learning_rate": 0.0001, "loss": 0.9669, "mean_abs_error": 875.2802855288552, "mean_abs_error_last_10": 470.15639421576117, "mean_abs_error_last_25": 578.9844416493282, "mean_abs_error_last_50": 684.7483525734285, "mean_pred_prob": 0.03898405953805195, "mean_pred_prob_last_10": 0.1965561326855095, "mean_pred_prob_last_25": 0.11173206531093456, "mean_pred_prob_last_50": 0.06707613285107072, "mean_token_accuracy": 0.8660319685935974, "step": 25220 }, { "epoch": 0.4485094128313157, "grad_norm": 1.2239915765261342, "learning_rate": 0.0001, "loss": 0.6975, "mean_abs_error": 864.2578737285888, "mean_abs_error_last_10": 398.95174928694115, "mean_abs_error_last_25": 455.14755581770277, "mean_abs_error_last_50": 550.6451840493522, "mean_pred_prob": 0.016836047085234895, "mean_pred_prob_last_10": 0.09205707695218734, "mean_pred_prob_last_25": 0.04734168343711644, "mean_pred_prob_last_50": 0.028932007891125978, "mean_token_accuracy": 0.8663255631923675, "step": 25230 }, { "epoch": 0.4486871811281176, "grad_norm": 1.2255690193317732, "learning_rate": 0.0001, "loss": 0.7143, "mean_abs_error": 335.56270105096104, "mean_abs_error_last_10": 153.4824579836931, "mean_abs_error_last_25": 184.8598289536923, "mean_abs_error_last_50": 211.98217791509896, "mean_pred_prob": 0.0386456839623861, "mean_pred_prob_last_10": 0.1736377980094403, "mean_pred_prob_last_25": 0.10323086080607027, "mean_pred_prob_last_50": 0.06417059281375259, "mean_token_accuracy": 0.8818370223045349, "step": 25240 }, { "epoch": 0.44886494942491956, "grad_norm": 0.9674788467911019, "learning_rate": 0.0001, "loss": 0.7338, "mean_abs_error": 864.7086803623577, "mean_abs_error_last_10": 202.23342662329594, "mean_abs_error_last_25": 329.7714751631954, "mean_abs_error_last_50": 505.1529297925625, "mean_pred_prob": 0.03360968854976818, "mean_pred_prob_last_10": 0.16581482402980327, "mean_pred_prob_last_25": 0.09599981773062609, "mean_pred_prob_last_50": 0.05697213878156617, "mean_token_accuracy": 0.8758626222610474, "step": 25250 }, { "epoch": 0.4490427177217215, "grad_norm": 1.153403694814061, "learning_rate": 0.0001, "loss": 0.8424, "mean_abs_error": 314.3137277284824, "mean_abs_error_last_10": 78.46553723889124, "mean_abs_error_last_25": 102.27807415096328, "mean_abs_error_last_50": 161.3580688371665, "mean_pred_prob": 0.03389739193953574, "mean_pred_prob_last_10": 0.17192201856523753, "mean_pred_prob_last_25": 0.09298656079918147, "mean_pred_prob_last_50": 0.056616001995280385, "mean_token_accuracy": 0.8690725862979889, "step": 25260 }, { "epoch": 0.44922048601852343, "grad_norm": 1.0090134469081482, "learning_rate": 0.0001, "loss": 0.8709, "mean_abs_error": 315.0246731653942, "mean_abs_error_last_10": 156.32609138810417, "mean_abs_error_last_25": 162.1910930361118, "mean_abs_error_last_50": 221.749881607478, "mean_pred_prob": 0.04255972849205136, "mean_pred_prob_last_10": 0.20573733262717725, "mean_pred_prob_last_25": 0.118169279769063, "mean_pred_prob_last_50": 0.07205657195299864, "mean_token_accuracy": 0.869562953710556, "step": 25270 }, { "epoch": 0.44939825431532543, "grad_norm": 1.2123564712239208, "learning_rate": 0.0001, "loss": 0.7643, "mean_abs_error": 355.3542405015854, "mean_abs_error_last_10": 80.56718785912184, "mean_abs_error_last_25": 126.81245520478042, "mean_abs_error_last_50": 193.57457307435823, "mean_pred_prob": 0.035923552326858045, "mean_pred_prob_last_10": 0.1781059205532074, "mean_pred_prob_last_25": 0.09711293252184987, "mean_pred_prob_last_50": 0.05962966321967542, "mean_token_accuracy": 0.874667489528656, "step": 25280 }, { "epoch": 0.44957602261212737, "grad_norm": 0.9174902380075214, "learning_rate": 0.0001, "loss": 0.7955, "mean_abs_error": 653.1817638143792, "mean_abs_error_last_10": 225.15636179961842, "mean_abs_error_last_25": 227.73149515961555, "mean_abs_error_last_50": 346.149696320074, "mean_pred_prob": 0.03867152197053656, "mean_pred_prob_last_10": 0.19236130386125297, "mean_pred_prob_last_25": 0.10466386291664094, "mean_pred_prob_last_50": 0.06388860389124602, "mean_token_accuracy": 0.8706487238407135, "step": 25290 }, { "epoch": 0.4497537909089293, "grad_norm": 1.3380526456512463, "learning_rate": 0.0001, "loss": 0.7555, "mean_abs_error": 250.82065341520448, "mean_abs_error_last_10": 60.396359571101605, "mean_abs_error_last_25": 108.28334960228224, "mean_abs_error_last_50": 175.95934048541685, "mean_pred_prob": 0.046052097575739026, "mean_pred_prob_last_10": 0.23071364760398866, "mean_pred_prob_last_25": 0.12757863998413085, "mean_pred_prob_last_50": 0.07734335688874125, "mean_token_accuracy": 0.8788107216358185, "step": 25300 }, { "epoch": 0.44993155920573125, "grad_norm": 1.403476318135201, "learning_rate": 0.0001, "loss": 0.8718, "mean_abs_error": 355.2317757081655, "mean_abs_error_last_10": 184.6466427090064, "mean_abs_error_last_25": 224.03733110917196, "mean_abs_error_last_50": 246.4345530415986, "mean_pred_prob": 0.02659372889902443, "mean_pred_prob_last_10": 0.14545001275837421, "mean_pred_prob_last_25": 0.07717004269361497, "mean_pred_prob_last_50": 0.04534241398796439, "mean_token_accuracy": 0.8675140619277955, "step": 25310 }, { "epoch": 0.4501093275025332, "grad_norm": 1.2941711235690185, "learning_rate": 0.0001, "loss": 0.7726, "mean_abs_error": 1675.6156511397726, "mean_abs_error_last_10": 746.098499719559, "mean_abs_error_last_25": 828.5103307372681, "mean_abs_error_last_50": 1055.7052034039966, "mean_pred_prob": 0.033468260709196326, "mean_pred_prob_last_10": 0.15874822397599928, "mean_pred_prob_last_25": 0.08753057308204007, "mean_pred_prob_last_50": 0.05526753001031466, "mean_token_accuracy": 0.8664167702198029, "step": 25320 }, { "epoch": 0.4502870957993351, "grad_norm": 3.087751924797266, "learning_rate": 0.0001, "loss": 0.7776, "mean_abs_error": 217.65874125327565, "mean_abs_error_last_10": 44.47379813083681, "mean_abs_error_last_25": 63.03224337953194, "mean_abs_error_last_50": 109.18642202630699, "mean_pred_prob": 0.050089096650481224, "mean_pred_prob_last_10": 0.23605770487338304, "mean_pred_prob_last_25": 0.13515634052455425, "mean_pred_prob_last_50": 0.0827550639398396, "mean_token_accuracy": 0.8857820808887482, "step": 25330 }, { "epoch": 0.4504648640961371, "grad_norm": 0.8032194204896298, "learning_rate": 0.0001, "loss": 0.7544, "mean_abs_error": 507.04162691278015, "mean_abs_error_last_10": 216.85842751280103, "mean_abs_error_last_25": 202.167771606293, "mean_abs_error_last_50": 273.74944379633433, "mean_pred_prob": 0.03194375874008983, "mean_pred_prob_last_10": 0.15295249737100675, "mean_pred_prob_last_25": 0.08569816849194467, "mean_pred_prob_last_50": 0.053232153225690124, "mean_token_accuracy": 0.8737509608268738, "step": 25340 }, { "epoch": 0.45064263239293906, "grad_norm": 1.1781680408489297, "learning_rate": 0.0001, "loss": 0.722, "mean_abs_error": 263.9479936664405, "mean_abs_error_last_10": 63.86203991688882, "mean_abs_error_last_25": 109.77035691363207, "mean_abs_error_last_50": 182.7790610155745, "mean_pred_prob": 0.04205800569616258, "mean_pred_prob_last_10": 0.19479480627924203, "mean_pred_prob_last_25": 0.11127981711179018, "mean_pred_prob_last_50": 0.06981408782303333, "mean_token_accuracy": 0.8799973726272583, "step": 25350 }, { "epoch": 0.450820400689741, "grad_norm": 2.414076845292201, "learning_rate": 0.0001, "loss": 0.7984, "mean_abs_error": 313.8951944078697, "mean_abs_error_last_10": 85.7702422691697, "mean_abs_error_last_25": 110.67894451272466, "mean_abs_error_last_50": 190.49470938481915, "mean_pred_prob": 0.039556148229166865, "mean_pred_prob_last_10": 0.1799789022654295, "mean_pred_prob_last_25": 0.10494778892025351, "mean_pred_prob_last_50": 0.06501611871644855, "mean_token_accuracy": 0.8764083743095398, "step": 25360 }, { "epoch": 0.45099816898654294, "grad_norm": 1.7506510213653015, "learning_rate": 0.0001, "loss": 0.6401, "mean_abs_error": 553.3855221270625, "mean_abs_error_last_10": 152.6741909341112, "mean_abs_error_last_25": 257.99639042776795, "mean_abs_error_last_50": 330.31585502013553, "mean_pred_prob": 0.0353998061211314, "mean_pred_prob_last_10": 0.18117368834791706, "mean_pred_prob_last_25": 0.09797313986928202, "mean_pred_prob_last_50": 0.05948122456320561, "mean_token_accuracy": 0.8864789187908173, "step": 25370 }, { "epoch": 0.4511759372833449, "grad_norm": 1.2115116220115578, "learning_rate": 0.0001, "loss": 0.8142, "mean_abs_error": 542.3576739214136, "mean_abs_error_last_10": 130.28103968880106, "mean_abs_error_last_25": 168.85382856526994, "mean_abs_error_last_50": 294.41478026920583, "mean_pred_prob": 0.0332171703223139, "mean_pred_prob_last_10": 0.16977828606031836, "mean_pred_prob_last_25": 0.09193767316173762, "mean_pred_prob_last_50": 0.0558263789745979, "mean_token_accuracy": 0.887037843465805, "step": 25380 }, { "epoch": 0.4513537055801468, "grad_norm": 1.1156641092379345, "learning_rate": 0.0001, "loss": 0.6329, "mean_abs_error": 303.341439781403, "mean_abs_error_last_10": 93.76782174975578, "mean_abs_error_last_25": 152.77985739330705, "mean_abs_error_last_50": 214.6292470229554, "mean_pred_prob": 0.03271659435704351, "mean_pred_prob_last_10": 0.17082208767533302, "mean_pred_prob_last_25": 0.09372628694400191, "mean_pred_prob_last_50": 0.055562407989054916, "mean_token_accuracy": 0.8836990594863892, "step": 25390 }, { "epoch": 0.4515314738769488, "grad_norm": 2.9369434129574867, "learning_rate": 0.0001, "loss": 0.8409, "mean_abs_error": 402.3007466715527, "mean_abs_error_last_10": 242.69390110597837, "mean_abs_error_last_25": 230.34305084488363, "mean_abs_error_last_50": 253.92139829937486, "mean_pred_prob": 0.03706320861238055, "mean_pred_prob_last_10": 0.195279450295493, "mean_pred_prob_last_25": 0.1024136182735674, "mean_pred_prob_last_50": 0.06276532098418101, "mean_token_accuracy": 0.8730865538120269, "step": 25400 }, { "epoch": 0.45170924217375075, "grad_norm": 1.0144255658001256, "learning_rate": 0.0001, "loss": 0.6401, "mean_abs_error": 378.10313282101293, "mean_abs_error_last_10": 94.38513978092378, "mean_abs_error_last_25": 147.35307970524588, "mean_abs_error_last_50": 226.83997132611103, "mean_pred_prob": 0.029369056597352027, "mean_pred_prob_last_10": 0.16329109854996204, "mean_pred_prob_last_25": 0.08603501562029123, "mean_pred_prob_last_50": 0.05052874982357025, "mean_token_accuracy": 0.8811288595199585, "step": 25410 }, { "epoch": 0.4518870104705527, "grad_norm": 0.9649697436617716, "learning_rate": 0.0001, "loss": 0.6785, "mean_abs_error": 99.42090044998562, "mean_abs_error_last_10": 23.060529331563973, "mean_abs_error_last_25": 37.085480184200506, "mean_abs_error_last_50": 52.91384236653628, "mean_pred_prob": 0.06273304000496864, "mean_pred_prob_last_10": 0.29549340419471265, "mean_pred_prob_last_25": 0.16698068268597127, "mean_pred_prob_last_50": 0.10384821705520153, "mean_token_accuracy": 0.8821167886257172, "step": 25420 }, { "epoch": 0.4520647787673546, "grad_norm": 0.9410460485024019, "learning_rate": 0.0001, "loss": 0.6688, "mean_abs_error": 488.251851645259, "mean_abs_error_last_10": 185.9997459629997, "mean_abs_error_last_25": 277.9775217144355, "mean_abs_error_last_50": 313.2899728411835, "mean_pred_prob": 0.029811088531278073, "mean_pred_prob_last_10": 0.15217050762148573, "mean_pred_prob_last_25": 0.08346285209991038, "mean_pred_prob_last_50": 0.05069719572784379, "mean_token_accuracy": 0.879855352640152, "step": 25430 }, { "epoch": 0.45224254706415656, "grad_norm": 1.0479131654991178, "learning_rate": 0.0001, "loss": 0.8211, "mean_abs_error": 490.57489037891963, "mean_abs_error_last_10": 301.0977639921208, "mean_abs_error_last_25": 326.10727331619745, "mean_abs_error_last_50": 401.2381383158569, "mean_pred_prob": 0.049630977842025456, "mean_pred_prob_last_10": 0.2195606780005619, "mean_pred_prob_last_25": 0.12893601614050568, "mean_pred_prob_last_50": 0.08225626474013552, "mean_token_accuracy": 0.8643516540527344, "step": 25440 }, { "epoch": 0.4524203153609585, "grad_norm": 1.5951816983221112, "learning_rate": 0.0001, "loss": 0.8349, "mean_abs_error": 207.6556275430324, "mean_abs_error_last_10": 135.9136254143611, "mean_abs_error_last_25": 129.40353318900605, "mean_abs_error_last_50": 134.99496650231566, "mean_pred_prob": 0.052906247845385224, "mean_pred_prob_last_10": 0.25746287966612724, "mean_pred_prob_last_25": 0.14360572067089378, "mean_pred_prob_last_50": 0.08799337185919284, "mean_token_accuracy": 0.8708842873573304, "step": 25450 }, { "epoch": 0.4525980836577605, "grad_norm": 1.6330863624073575, "learning_rate": 0.0001, "loss": 0.6914, "mean_abs_error": 1050.2265925936752, "mean_abs_error_last_10": 498.4433353491846, "mean_abs_error_last_25": 624.3650749006454, "mean_abs_error_last_50": 749.9307735767013, "mean_pred_prob": 0.04893028988444712, "mean_pred_prob_last_10": 0.24316936622199137, "mean_pred_prob_last_25": 0.1351076217222726, "mean_pred_prob_last_50": 0.08103764312400016, "mean_token_accuracy": 0.8694196522235871, "step": 25460 }, { "epoch": 0.45277585195456244, "grad_norm": 2.1579504229623048, "learning_rate": 0.0001, "loss": 0.7196, "mean_abs_error": 284.63430030300003, "mean_abs_error_last_10": 115.18179257944458, "mean_abs_error_last_25": 217.04375629599116, "mean_abs_error_last_50": 234.57520626303898, "mean_pred_prob": 0.05719464058056474, "mean_pred_prob_last_10": 0.2795033693313599, "mean_pred_prob_last_25": 0.1593225572258234, "mean_pred_prob_last_50": 0.0981498694512993, "mean_token_accuracy": 0.8780098617076874, "step": 25470 }, { "epoch": 0.4529536202513644, "grad_norm": 1.2233955295487007, "learning_rate": 0.0001, "loss": 0.8483, "mean_abs_error": 869.1317665178962, "mean_abs_error_last_10": 271.43043918871774, "mean_abs_error_last_25": 381.46431522757007, "mean_abs_error_last_50": 528.9414866980628, "mean_pred_prob": 0.02384581619990058, "mean_pred_prob_last_10": 0.1415059144375846, "mean_pred_prob_last_25": 0.07031899940920994, "mean_pred_prob_last_50": 0.0409450403531082, "mean_token_accuracy": 0.8714038252830505, "step": 25480 }, { "epoch": 0.4531313885481663, "grad_norm": 2.2638574943637058, "learning_rate": 0.0001, "loss": 0.8145, "mean_abs_error": 1352.0732574867768, "mean_abs_error_last_10": 549.9876985171052, "mean_abs_error_last_25": 658.1025105947525, "mean_abs_error_last_50": 861.1165720187233, "mean_pred_prob": 0.02869439024507301, "mean_pred_prob_last_10": 0.15091446493170224, "mean_pred_prob_last_25": 0.08123104523692745, "mean_pred_prob_last_50": 0.04813555154541973, "mean_token_accuracy": 0.8675524294376373, "step": 25490 }, { "epoch": 0.45330915684496825, "grad_norm": 2.354960598619748, "learning_rate": 0.0001, "loss": 0.7924, "mean_abs_error": 323.793349866279, "mean_abs_error_last_10": 161.56378760082254, "mean_abs_error_last_25": 199.51123229419093, "mean_abs_error_last_50": 247.54482725533776, "mean_pred_prob": 0.037166264234110714, "mean_pred_prob_last_10": 0.19906597398221493, "mean_pred_prob_last_25": 0.10917634442448616, "mean_pred_prob_last_50": 0.0634708520025015, "mean_token_accuracy": 0.8716414988040924, "step": 25500 }, { "epoch": 0.4534869251417702, "grad_norm": 1.7485093152468876, "learning_rate": 0.0001, "loss": 0.9177, "mean_abs_error": 147.41311721377247, "mean_abs_error_last_10": 23.556917127461073, "mean_abs_error_last_25": 41.32589861850437, "mean_abs_error_last_50": 85.15296372905905, "mean_pred_prob": 0.056223833747208116, "mean_pred_prob_last_10": 0.2612060509622097, "mean_pred_prob_last_25": 0.1533262912184, "mean_pred_prob_last_50": 0.09476295933127403, "mean_token_accuracy": 0.8669009923934936, "step": 25510 }, { "epoch": 0.4536646934385722, "grad_norm": 1.8792583986414606, "learning_rate": 0.0001, "loss": 0.8641, "mean_abs_error": 524.0405528411408, "mean_abs_error_last_10": 121.69464698284841, "mean_abs_error_last_25": 151.4032011752623, "mean_abs_error_last_50": 276.85345006256756, "mean_pred_prob": 0.024007794074714185, "mean_pred_prob_last_10": 0.12604942824691534, "mean_pred_prob_last_25": 0.06776007525622844, "mean_pred_prob_last_50": 0.0409856709651649, "mean_token_accuracy": 0.8731602668762207, "step": 25520 }, { "epoch": 0.4538424617353741, "grad_norm": 2.5148859442729057, "learning_rate": 0.0001, "loss": 0.7549, "mean_abs_error": 339.95091512999045, "mean_abs_error_last_10": 125.42879539358701, "mean_abs_error_last_25": 152.25667295662205, "mean_abs_error_last_50": 219.93462236277463, "mean_pred_prob": 0.025298254028894008, "mean_pred_prob_last_10": 0.1366963366046548, "mean_pred_prob_last_25": 0.07136230794712901, "mean_pred_prob_last_50": 0.0432986309286207, "mean_token_accuracy": 0.8769789457321167, "step": 25530 }, { "epoch": 0.45402023003217606, "grad_norm": 1.848920056200125, "learning_rate": 0.0001, "loss": 0.8415, "mean_abs_error": 263.72203570644535, "mean_abs_error_last_10": 89.13902565218388, "mean_abs_error_last_25": 157.17354073180994, "mean_abs_error_last_50": 232.4145979579044, "mean_pred_prob": 0.043498342484235765, "mean_pred_prob_last_10": 0.2013195987790823, "mean_pred_prob_last_25": 0.11745000034570693, "mean_pred_prob_last_50": 0.07251565884798765, "mean_token_accuracy": 0.8672114968299866, "step": 25540 }, { "epoch": 0.454197998328978, "grad_norm": 1.4155386280284286, "learning_rate": 0.0001, "loss": 0.8184, "mean_abs_error": 480.54405992002484, "mean_abs_error_last_10": 233.88633745514917, "mean_abs_error_last_25": 261.3876312382393, "mean_abs_error_last_50": 328.83492004307925, "mean_pred_prob": 0.04241648614406586, "mean_pred_prob_last_10": 0.20722948871552943, "mean_pred_prob_last_25": 0.11564281340688468, "mean_pred_prob_last_50": 0.071081129508093, "mean_token_accuracy": 0.8779441893100739, "step": 25550 }, { "epoch": 0.45437576662577994, "grad_norm": 1.6273897131706434, "learning_rate": 0.0001, "loss": 0.7165, "mean_abs_error": 453.51186705522207, "mean_abs_error_last_10": 144.01519321704467, "mean_abs_error_last_25": 210.06588243459873, "mean_abs_error_last_50": 318.63169699607874, "mean_pred_prob": 0.026983465481316672, "mean_pred_prob_last_10": 0.14297135489759966, "mean_pred_prob_last_25": 0.07479996794136241, "mean_pred_prob_last_50": 0.045723886892665176, "mean_token_accuracy": 0.8705442786216736, "step": 25560 }, { "epoch": 0.4545535349225819, "grad_norm": 0.8396011416845337, "learning_rate": 0.0001, "loss": 0.7786, "mean_abs_error": 362.70091640715026, "mean_abs_error_last_10": 173.03992440755707, "mean_abs_error_last_25": 138.85359148818412, "mean_abs_error_last_50": 160.1934046564619, "mean_pred_prob": 0.05005096064414829, "mean_pred_prob_last_10": 0.24253860835451632, "mean_pred_prob_last_25": 0.13367235094774516, "mean_pred_prob_last_50": 0.08338343047071248, "mean_token_accuracy": 0.866790521144867, "step": 25570 }, { "epoch": 0.4547313032193839, "grad_norm": 1.1302783016156852, "learning_rate": 0.0001, "loss": 0.7082, "mean_abs_error": 349.50394616804755, "mean_abs_error_last_10": 83.03326448448658, "mean_abs_error_last_25": 108.23801168498487, "mean_abs_error_last_50": 172.5141995870259, "mean_pred_prob": 0.026384666562080383, "mean_pred_prob_last_10": 0.14487269446253775, "mean_pred_prob_last_25": 0.07636795984581113, "mean_pred_prob_last_50": 0.04485141213517636, "mean_token_accuracy": 0.8816450595855713, "step": 25580 }, { "epoch": 0.4549090715161858, "grad_norm": 1.4657715651015484, "learning_rate": 0.0001, "loss": 0.6738, "mean_abs_error": 368.58365493929676, "mean_abs_error_last_10": 130.88265117610462, "mean_abs_error_last_25": 260.1176050619168, "mean_abs_error_last_50": 330.6196002423015, "mean_pred_prob": 0.04485955538693816, "mean_pred_prob_last_10": 0.23296263851225377, "mean_pred_prob_last_25": 0.12843761555850505, "mean_pred_prob_last_50": 0.07622493109665811, "mean_token_accuracy": 0.8855522930622101, "step": 25590 }, { "epoch": 0.45508683981298775, "grad_norm": 2.589885095765919, "learning_rate": 0.0001, "loss": 0.7616, "mean_abs_error": 1049.9329883368428, "mean_abs_error_last_10": 475.8788190362796, "mean_abs_error_last_25": 577.0673297947221, "mean_abs_error_last_50": 774.6916797764343, "mean_pred_prob": 0.03848263159597991, "mean_pred_prob_last_10": 0.18658356787927916, "mean_pred_prob_last_25": 0.10127734911220614, "mean_pred_prob_last_50": 0.06365191260410938, "mean_token_accuracy": 0.8705628335475921, "step": 25600 }, { "epoch": 0.4552646081097897, "grad_norm": 1.8937729191593422, "learning_rate": 0.0001, "loss": 0.7812, "mean_abs_error": 102.37554249442418, "mean_abs_error_last_10": 18.145509953492045, "mean_abs_error_last_25": 33.38711747963247, "mean_abs_error_last_50": 65.33869428342278, "mean_pred_prob": 0.05910104280337691, "mean_pred_prob_last_10": 0.2996870048344135, "mean_pred_prob_last_25": 0.1644229620695114, "mean_pred_prob_last_50": 0.0989644156768918, "mean_token_accuracy": 0.8748626410961151, "step": 25610 }, { "epoch": 0.45544237640659163, "grad_norm": 1.0831648413857808, "learning_rate": 0.0001, "loss": 0.9322, "mean_abs_error": 558.6732693253109, "mean_abs_error_last_10": 185.64284835891556, "mean_abs_error_last_25": 369.96476833890983, "mean_abs_error_last_50": 441.3101665439377, "mean_pred_prob": 0.030752888554707168, "mean_pred_prob_last_10": 0.14528755564242601, "mean_pred_prob_last_25": 0.0829843851737678, "mean_pred_prob_last_50": 0.05120076914317906, "mean_token_accuracy": 0.8704098165035248, "step": 25620 }, { "epoch": 0.45562014470339357, "grad_norm": 1.5099113023575768, "learning_rate": 0.0001, "loss": 0.7625, "mean_abs_error": 298.0558370261788, "mean_abs_error_last_10": 78.84374617561693, "mean_abs_error_last_25": 80.58795778654795, "mean_abs_error_last_50": 124.93120085538646, "mean_pred_prob": 0.058397945982869716, "mean_pred_prob_last_10": 0.2826567355194129, "mean_pred_prob_last_25": 0.16296545191435144, "mean_pred_prob_last_50": 0.09982569597195834, "mean_token_accuracy": 0.8748613119125366, "step": 25630 }, { "epoch": 0.45579791300019556, "grad_norm": 1.68271005141747, "learning_rate": 0.0001, "loss": 0.759, "mean_abs_error": 161.4281837653984, "mean_abs_error_last_10": 50.54200808148515, "mean_abs_error_last_25": 161.1626844072215, "mean_abs_error_last_50": 159.16225283703517, "mean_pred_prob": 0.05267379311844707, "mean_pred_prob_last_10": 0.25480547081679106, "mean_pred_prob_last_25": 0.1434481312520802, "mean_pred_prob_last_50": 0.08771092183887959, "mean_token_accuracy": 0.8825305044651032, "step": 25640 }, { "epoch": 0.4559756812969975, "grad_norm": 1.6885446807654563, "learning_rate": 0.0001, "loss": 0.8128, "mean_abs_error": 956.3240408236367, "mean_abs_error_last_10": 440.61408808268664, "mean_abs_error_last_25": 539.9400541023283, "mean_abs_error_last_50": 709.0849928973198, "mean_pred_prob": 0.028924465732416138, "mean_pred_prob_last_10": 0.15549185385752934, "mean_pred_prob_last_25": 0.08400636090373155, "mean_pred_prob_last_50": 0.048990564414998515, "mean_token_accuracy": 0.8736427247524261, "step": 25650 }, { "epoch": 0.45615344959379944, "grad_norm": 1.939947778378777, "learning_rate": 0.0001, "loss": 0.7779, "mean_abs_error": 89.6897610702498, "mean_abs_error_last_10": 42.91448860293351, "mean_abs_error_last_25": 52.26078631390398, "mean_abs_error_last_50": 53.86718110586047, "mean_pred_prob": 0.07519808155484498, "mean_pred_prob_last_10": 0.33464598543941976, "mean_pred_prob_last_25": 0.19788630977272986, "mean_pred_prob_last_50": 0.12486878568306566, "mean_token_accuracy": 0.8749847888946534, "step": 25660 }, { "epoch": 0.4563312178906014, "grad_norm": 1.8248949539386274, "learning_rate": 0.0001, "loss": 0.8102, "mean_abs_error": 862.7142398408585, "mean_abs_error_last_10": 182.4655307825434, "mean_abs_error_last_25": 282.98364777449166, "mean_abs_error_last_50": 486.3211135965183, "mean_pred_prob": 0.02309812660096213, "mean_pred_prob_last_10": 0.11419979342026636, "mean_pred_prob_last_25": 0.06377559811808169, "mean_pred_prob_last_50": 0.03911260459572077, "mean_token_accuracy": 0.8734156608581543, "step": 25670 }, { "epoch": 0.4565089861874033, "grad_norm": 1.3180245131773332, "learning_rate": 0.0001, "loss": 0.6717, "mean_abs_error": 571.4627559263564, "mean_abs_error_last_10": 232.4613548514482, "mean_abs_error_last_25": 285.19064606022573, "mean_abs_error_last_50": 394.88634999025066, "mean_pred_prob": 0.07235129970358685, "mean_pred_prob_last_10": 0.3188639673346188, "mean_pred_prob_last_25": 0.18632539853570051, "mean_pred_prob_last_50": 0.11815194235823583, "mean_token_accuracy": 0.8756582200527191, "step": 25680 }, { "epoch": 0.45668675448420526, "grad_norm": 1.130038124991128, "learning_rate": 0.0001, "loss": 0.924, "mean_abs_error": 756.8538710099851, "mean_abs_error_last_10": 275.408360613494, "mean_abs_error_last_25": 319.00161423820333, "mean_abs_error_last_50": 473.10865473149806, "mean_pred_prob": 0.026412703352980314, "mean_pred_prob_last_10": 0.14772355441236867, "mean_pred_prob_last_25": 0.08079475540434941, "mean_pred_prob_last_50": 0.04724824147997424, "mean_token_accuracy": 0.8702439904212952, "step": 25690 }, { "epoch": 0.45686452278100725, "grad_norm": 2.5322676581563424, "learning_rate": 0.0001, "loss": 0.8391, "mean_abs_error": 1432.398562566556, "mean_abs_error_last_10": 1045.055101341974, "mean_abs_error_last_25": 1169.7850570811968, "mean_abs_error_last_50": 1294.8391771122028, "mean_pred_prob": 0.008623468615405727, "mean_pred_prob_last_10": 0.0531561348507239, "mean_pred_prob_last_25": 0.025738790899777087, "mean_pred_prob_last_50": 0.014871365365252132, "mean_token_accuracy": 0.8695063471794129, "step": 25700 }, { "epoch": 0.4570422910778092, "grad_norm": 1.1722409623991885, "learning_rate": 0.0001, "loss": 0.7285, "mean_abs_error": 221.80506598190527, "mean_abs_error_last_10": 47.07062936189614, "mean_abs_error_last_25": 79.81376791284632, "mean_abs_error_last_50": 150.6648290367927, "mean_pred_prob": 0.036978024151176216, "mean_pred_prob_last_10": 0.19091339372098445, "mean_pred_prob_last_25": 0.0992624593898654, "mean_pred_prob_last_50": 0.06057577347382903, "mean_token_accuracy": 0.8845114469528198, "step": 25710 }, { "epoch": 0.45722005937461113, "grad_norm": 1.5371970778020412, "learning_rate": 0.0001, "loss": 0.8884, "mean_abs_error": 2023.8951793857661, "mean_abs_error_last_10": 1175.213772875619, "mean_abs_error_last_25": 1312.6010896733446, "mean_abs_error_last_50": 1541.4588938727984, "mean_pred_prob": 0.040431393047038, "mean_pred_prob_last_10": 0.20493889858480543, "mean_pred_prob_last_25": 0.11898011953162495, "mean_pred_prob_last_50": 0.07061210500251036, "mean_token_accuracy": 0.8733866453170777, "step": 25720 }, { "epoch": 0.45739782767141307, "grad_norm": 1.448560803204634, "learning_rate": 0.0001, "loss": 0.8269, "mean_abs_error": 530.7633183417541, "mean_abs_error_last_10": 490.07853078658064, "mean_abs_error_last_25": 412.39092335523003, "mean_abs_error_last_50": 442.5589413959498, "mean_pred_prob": 0.022536845062859356, "mean_pred_prob_last_10": 0.11360732782632113, "mean_pred_prob_last_25": 0.06367862964980305, "mean_pred_prob_last_50": 0.03884352520108223, "mean_token_accuracy": 0.8670879125595092, "step": 25730 }, { "epoch": 0.457575595968215, "grad_norm": 6.2828221483806495, "learning_rate": 0.0001, "loss": 0.7855, "mean_abs_error": 219.63740821426882, "mean_abs_error_last_10": 70.85112849790146, "mean_abs_error_last_25": 100.74292662494864, "mean_abs_error_last_50": 149.11478332067878, "mean_pred_prob": 0.03423519409261644, "mean_pred_prob_last_10": 0.18881422616541385, "mean_pred_prob_last_25": 0.09742945414036512, "mean_pred_prob_last_50": 0.0574868103954941, "mean_token_accuracy": 0.8728519082069397, "step": 25740 }, { "epoch": 0.457753364265017, "grad_norm": 2.0909712284202397, "learning_rate": 0.0001, "loss": 0.8274, "mean_abs_error": 340.50224038233733, "mean_abs_error_last_10": 71.73425831748241, "mean_abs_error_last_25": 101.25228941925918, "mean_abs_error_last_50": 190.21733491130948, "mean_pred_prob": 0.05071025921497494, "mean_pred_prob_last_10": 0.2444048622623086, "mean_pred_prob_last_25": 0.1394579650834203, "mean_pred_prob_last_50": 0.08554089735262096, "mean_token_accuracy": 0.8626639008522033, "step": 25750 }, { "epoch": 0.45793113256181894, "grad_norm": 2.7287890184507697, "learning_rate": 0.0001, "loss": 0.7188, "mean_abs_error": 792.6848589987945, "mean_abs_error_last_10": 297.41045054140375, "mean_abs_error_last_25": 331.0444734045863, "mean_abs_error_last_50": 484.5315805667482, "mean_pred_prob": 0.02621514569618739, "mean_pred_prob_last_10": 0.1335822931607254, "mean_pred_prob_last_25": 0.07492528287693859, "mean_pred_prob_last_50": 0.04497971788514406, "mean_token_accuracy": 0.8793201625347138, "step": 25760 }, { "epoch": 0.4581089008586209, "grad_norm": 1.3461618811751166, "learning_rate": 0.0001, "loss": 0.705, "mean_abs_error": 796.4597366285205, "mean_abs_error_last_10": 400.7949673232294, "mean_abs_error_last_25": 476.0479382110649, "mean_abs_error_last_50": 568.8601962529987, "mean_pred_prob": 0.0546463223232422, "mean_pred_prob_last_10": 0.2448010401800275, "mean_pred_prob_last_25": 0.14295278837089426, "mean_pred_prob_last_50": 0.09085929339635186, "mean_token_accuracy": 0.8830817222595215, "step": 25770 }, { "epoch": 0.4582866691554228, "grad_norm": 1.6971484026887553, "learning_rate": 0.0001, "loss": 0.7709, "mean_abs_error": 252.63253128612268, "mean_abs_error_last_10": 117.69514035685884, "mean_abs_error_last_25": 195.47197599897464, "mean_abs_error_last_50": 209.29220456068384, "mean_pred_prob": 0.04303363556973636, "mean_pred_prob_last_10": 0.21764396615326403, "mean_pred_prob_last_25": 0.11810443382710219, "mean_pred_prob_last_50": 0.0713244209997356, "mean_token_accuracy": 0.8721536934375763, "step": 25780 }, { "epoch": 0.45846443745222476, "grad_norm": 1.6738847609649121, "learning_rate": 0.0001, "loss": 0.8734, "mean_abs_error": 868.6822760109453, "mean_abs_error_last_10": 419.46182156679714, "mean_abs_error_last_25": 517.8843431858376, "mean_abs_error_last_50": 654.6985809683204, "mean_pred_prob": 0.04693474429805065, "mean_pred_prob_last_10": 0.23726534630404786, "mean_pred_prob_last_25": 0.13511030455119907, "mean_pred_prob_last_50": 0.08058044626377522, "mean_token_accuracy": 0.8757330775260925, "step": 25790 }, { "epoch": 0.4586422057490267, "grad_norm": 1.2798367614112192, "learning_rate": 0.0001, "loss": 0.8713, "mean_abs_error": 403.5521037884051, "mean_abs_error_last_10": 149.07694872343023, "mean_abs_error_last_25": 164.91656309499356, "mean_abs_error_last_50": 326.267030356293, "mean_pred_prob": 0.04371117665432393, "mean_pred_prob_last_10": 0.21650599017739297, "mean_pred_prob_last_25": 0.120795682631433, "mean_pred_prob_last_50": 0.07403047280386091, "mean_token_accuracy": 0.8612005650997162, "step": 25800 }, { "epoch": 0.4588199740458287, "grad_norm": 2.24788444451881, "learning_rate": 0.0001, "loss": 0.799, "mean_abs_error": 400.6549678210329, "mean_abs_error_last_10": 104.73065697642626, "mean_abs_error_last_25": 133.62446949814836, "mean_abs_error_last_50": 197.62098279711367, "mean_pred_prob": 0.04564874991774559, "mean_pred_prob_last_10": 0.20263652671128513, "mean_pred_prob_last_25": 0.11511356178671121, "mean_pred_prob_last_50": 0.07418935797177255, "mean_token_accuracy": 0.8614151537418365, "step": 25810 }, { "epoch": 0.45899774234263063, "grad_norm": 1.4618217644209737, "learning_rate": 0.0001, "loss": 0.8314, "mean_abs_error": 194.92990698190104, "mean_abs_error_last_10": 109.31189645292667, "mean_abs_error_last_25": 146.84159254870792, "mean_abs_error_last_50": 176.07245590710153, "mean_pred_prob": 0.04685655781067908, "mean_pred_prob_last_10": 0.23942616935819389, "mean_pred_prob_last_25": 0.1258369419723749, "mean_pred_prob_last_50": 0.07633440815843642, "mean_token_accuracy": 0.876255351305008, "step": 25820 }, { "epoch": 0.45917551063943257, "grad_norm": 2.2760520302521523, "learning_rate": 0.0001, "loss": 0.7884, "mean_abs_error": 880.3221899265442, "mean_abs_error_last_10": 354.48723581308366, "mean_abs_error_last_25": 468.4046956775643, "mean_abs_error_last_50": 651.8123437555835, "mean_pred_prob": 0.028926467327983117, "mean_pred_prob_last_10": 0.1403058390249498, "mean_pred_prob_last_25": 0.07864645157242194, "mean_pred_prob_last_50": 0.04776004669838585, "mean_token_accuracy": 0.8705365598201752, "step": 25830 }, { "epoch": 0.4593532789362345, "grad_norm": 1.6771509264062618, "learning_rate": 0.0001, "loss": 0.798, "mean_abs_error": 535.4013669014328, "mean_abs_error_last_10": 232.15782186959913, "mean_abs_error_last_25": 298.99402023510333, "mean_abs_error_last_50": 329.8333114669071, "mean_pred_prob": 0.04886249077972025, "mean_pred_prob_last_10": 0.2227359302341938, "mean_pred_prob_last_25": 0.12662436603568494, "mean_pred_prob_last_50": 0.08004805013188161, "mean_token_accuracy": 0.8671450853347779, "step": 25840 }, { "epoch": 0.45953104723303645, "grad_norm": 1.5750487313318453, "learning_rate": 0.0001, "loss": 0.7989, "mean_abs_error": 248.99560351974483, "mean_abs_error_last_10": 56.27782028023644, "mean_abs_error_last_25": 83.00870033381058, "mean_abs_error_last_50": 136.7941487414306, "mean_pred_prob": 0.038263937435112895, "mean_pred_prob_last_10": 0.2063542276620865, "mean_pred_prob_last_25": 0.10834193900227547, "mean_pred_prob_last_50": 0.06505663758143783, "mean_token_accuracy": 0.8742014467716217, "step": 25850 }, { "epoch": 0.4597088155298384, "grad_norm": 1.550808212750395, "learning_rate": 0.0001, "loss": 0.7422, "mean_abs_error": 845.0308888356118, "mean_abs_error_last_10": 315.8011913146928, "mean_abs_error_last_25": 384.85984120314066, "mean_abs_error_last_50": 500.4596952861658, "mean_pred_prob": 0.037311642424901946, "mean_pred_prob_last_10": 0.18172338338335975, "mean_pred_prob_last_25": 0.1017797323409468, "mean_pred_prob_last_50": 0.06215806701802649, "mean_token_accuracy": 0.8773394167423249, "step": 25860 }, { "epoch": 0.4598865838266404, "grad_norm": 1.008502616070336, "learning_rate": 0.0001, "loss": 0.7907, "mean_abs_error": 289.77442853160824, "mean_abs_error_last_10": 26.39544640934603, "mean_abs_error_last_25": 49.233746660917646, "mean_abs_error_last_50": 134.85472466551232, "mean_pred_prob": 0.04176923790946603, "mean_pred_prob_last_10": 0.22682506069540978, "mean_pred_prob_last_25": 0.12240582536906004, "mean_pred_prob_last_50": 0.0722123458981514, "mean_token_accuracy": 0.872555959224701, "step": 25870 }, { "epoch": 0.4600643521234423, "grad_norm": 1.0316598793531826, "learning_rate": 0.0001, "loss": 0.7858, "mean_abs_error": 501.0846732475514, "mean_abs_error_last_10": 291.732678940366, "mean_abs_error_last_25": 366.4899245344358, "mean_abs_error_last_50": 388.58745809777986, "mean_pred_prob": 0.03151878658682108, "mean_pred_prob_last_10": 0.15786211984232068, "mean_pred_prob_last_25": 0.08474835325032473, "mean_pred_prob_last_50": 0.052133937971666454, "mean_token_accuracy": 0.8662637233734131, "step": 25880 }, { "epoch": 0.46024212042024426, "grad_norm": 1.1987121978819795, "learning_rate": 0.0001, "loss": 0.8456, "mean_abs_error": 238.5306558046243, "mean_abs_error_last_10": 140.94918805487504, "mean_abs_error_last_25": 151.45668329007356, "mean_abs_error_last_50": 194.8564226420641, "mean_pred_prob": 0.038599888957105574, "mean_pred_prob_last_10": 0.19028603229671717, "mean_pred_prob_last_25": 0.10324511788785458, "mean_pred_prob_last_50": 0.06418137392029166, "mean_token_accuracy": 0.8771242797374725, "step": 25890 }, { "epoch": 0.4604198887170462, "grad_norm": 1.5601316258378886, "learning_rate": 0.0001, "loss": 0.7912, "mean_abs_error": 1270.2178771510519, "mean_abs_error_last_10": 418.47901005465474, "mean_abs_error_last_25": 586.5626163475159, "mean_abs_error_last_50": 919.3198560147588, "mean_pred_prob": 0.02018916299857665, "mean_pred_prob_last_10": 0.10362312404322438, "mean_pred_prob_last_25": 0.056584449938964096, "mean_pred_prob_last_50": 0.03417453525471501, "mean_token_accuracy": 0.8778315544128418, "step": 25900 }, { "epoch": 0.46059765701384814, "grad_norm": 3.290719154341592, "learning_rate": 0.0001, "loss": 0.823, "mean_abs_error": 622.6869189337352, "mean_abs_error_last_10": 250.17820538613933, "mean_abs_error_last_25": 239.1627804828097, "mean_abs_error_last_50": 300.24212310558414, "mean_pred_prob": 0.0350631543144118, "mean_pred_prob_last_10": 0.18091639131307602, "mean_pred_prob_last_25": 0.09857018367620185, "mean_pred_prob_last_50": 0.06051386158214882, "mean_token_accuracy": 0.8749177515506744, "step": 25910 }, { "epoch": 0.4607754253106501, "grad_norm": 1.3555266302510567, "learning_rate": 0.0001, "loss": 0.8474, "mean_abs_error": 531.5334688846883, "mean_abs_error_last_10": 144.0057989005008, "mean_abs_error_last_25": 176.40763547266084, "mean_abs_error_last_50": 267.98109621646114, "mean_pred_prob": 0.030626833799760788, "mean_pred_prob_last_10": 0.1543947291560471, "mean_pred_prob_last_25": 0.09188661760417745, "mean_pred_prob_last_50": 0.05343435250688344, "mean_token_accuracy": 0.8704466938972473, "step": 25920 }, { "epoch": 0.46095319360745207, "grad_norm": 1.422114738918505, "learning_rate": 0.0001, "loss": 0.8011, "mean_abs_error": 2390.4333977435517, "mean_abs_error_last_10": 1174.0735397501792, "mean_abs_error_last_25": 1531.3974200720866, "mean_abs_error_last_50": 1811.2483818776695, "mean_pred_prob": 0.034127693936170544, "mean_pred_prob_last_10": 0.15131296128674876, "mean_pred_prob_last_25": 0.0895997881423682, "mean_pred_prob_last_50": 0.057161160961550196, "mean_token_accuracy": 0.8663152992725373, "step": 25930 }, { "epoch": 0.461130961904254, "grad_norm": 2.289609533790372, "learning_rate": 0.0001, "loss": 0.8607, "mean_abs_error": 1319.520434117359, "mean_abs_error_last_10": 595.8428242181375, "mean_abs_error_last_25": 705.4531850361809, "mean_abs_error_last_50": 944.9006814636468, "mean_pred_prob": 0.02053886486683041, "mean_pred_prob_last_10": 0.1077362590120174, "mean_pred_prob_last_25": 0.057285368177690546, "mean_pred_prob_last_50": 0.035008004093833735, "mean_token_accuracy": 0.8685321271419525, "step": 25940 }, { "epoch": 0.46130873020105595, "grad_norm": 1.7620018758364488, "learning_rate": 0.0001, "loss": 0.8724, "mean_abs_error": 834.125437364087, "mean_abs_error_last_10": 379.9790152381829, "mean_abs_error_last_25": 443.58108436764985, "mean_abs_error_last_50": 574.1101015497127, "mean_pred_prob": 0.04285067767923465, "mean_pred_prob_last_10": 0.20762210033717565, "mean_pred_prob_last_25": 0.11573268445208669, "mean_pred_prob_last_50": 0.07184017084800871, "mean_token_accuracy": 0.8669342696666718, "step": 25950 }, { "epoch": 0.4614864984978579, "grad_norm": 1.585256636971639, "learning_rate": 0.0001, "loss": 0.78, "mean_abs_error": 616.6236123353434, "mean_abs_error_last_10": 166.23139919894456, "mean_abs_error_last_25": 238.80034607248498, "mean_abs_error_last_50": 401.44112671894726, "mean_pred_prob": 0.03233771824161522, "mean_pred_prob_last_10": 0.16023160750046372, "mean_pred_prob_last_25": 0.08727787469979376, "mean_pred_prob_last_50": 0.05371163544477895, "mean_token_accuracy": 0.8734753549098968, "step": 25960 }, { "epoch": 0.4616642667946598, "grad_norm": 1.87129916428314, "learning_rate": 0.0001, "loss": 0.8116, "mean_abs_error": 1367.1188945424665, "mean_abs_error_last_10": 654.3095237465562, "mean_abs_error_last_25": 900.1805904997746, "mean_abs_error_last_50": 1098.1376811162286, "mean_pred_prob": 0.04069748597976286, "mean_pred_prob_last_10": 0.1717611406056676, "mean_pred_prob_last_25": 0.10294758113159333, "mean_pred_prob_last_50": 0.06645738756633364, "mean_token_accuracy": 0.8718662798404694, "step": 25970 }, { "epoch": 0.46184203509146177, "grad_norm": 2.14474866884786, "learning_rate": 0.0001, "loss": 0.6383, "mean_abs_error": 124.88210752914638, "mean_abs_error_last_10": 32.078182488926345, "mean_abs_error_last_25": 50.265564839030745, "mean_abs_error_last_50": 63.755033087325145, "mean_pred_prob": 0.05904723256826401, "mean_pred_prob_last_10": 0.2838634166866541, "mean_pred_prob_last_25": 0.15788020892068744, "mean_pred_prob_last_50": 0.09742759922519326, "mean_token_accuracy": 0.884809535741806, "step": 25980 }, { "epoch": 0.46201980338826376, "grad_norm": 1.5077296520366819, "learning_rate": 0.0001, "loss": 0.7068, "mean_abs_error": 650.0251523282571, "mean_abs_error_last_10": 386.04271337877384, "mean_abs_error_last_25": 442.1309440276288, "mean_abs_error_last_50": 511.6476292972842, "mean_pred_prob": 0.013483471865765751, "mean_pred_prob_last_10": 0.0801669280976057, "mean_pred_prob_last_25": 0.03942964700981975, "mean_pred_prob_last_50": 0.023111317493021487, "mean_token_accuracy": 0.8745878875255585, "step": 25990 }, { "epoch": 0.4621975716850657, "grad_norm": 1.6732645729128812, "learning_rate": 0.0001, "loss": 0.8296, "mean_abs_error": 563.584449465347, "mean_abs_error_last_10": 238.20874208818879, "mean_abs_error_last_25": 333.5438769732695, "mean_abs_error_last_50": 451.1430765816923, "mean_pred_prob": 0.018317683646455408, "mean_pred_prob_last_10": 0.09145466722548008, "mean_pred_prob_last_25": 0.049651551991701126, "mean_pred_prob_last_50": 0.03036680384539068, "mean_token_accuracy": 0.8651305735111237, "step": 26000 }, { "epoch": 0.46237533998186764, "grad_norm": 1.2245382711137343, "learning_rate": 0.0001, "loss": 0.9646, "mean_abs_error": 248.72499208244722, "mean_abs_error_last_10": 160.20695886920166, "mean_abs_error_last_25": 162.91118782088816, "mean_abs_error_last_50": 191.97942263859304, "mean_pred_prob": 0.03347640852443874, "mean_pred_prob_last_10": 0.17035450115799905, "mean_pred_prob_last_25": 0.0893281196244061, "mean_pred_prob_last_50": 0.055156820872798565, "mean_token_accuracy": 0.8646864235401154, "step": 26010 }, { "epoch": 0.4625531082786696, "grad_norm": 2.648126779172405, "learning_rate": 0.0001, "loss": 0.7545, "mean_abs_error": 537.8827431260677, "mean_abs_error_last_10": 324.87869422044554, "mean_abs_error_last_25": 327.6573288398512, "mean_abs_error_last_50": 394.59691944918063, "mean_pred_prob": 0.04512568073405419, "mean_pred_prob_last_10": 0.22065895283012651, "mean_pred_prob_last_25": 0.12297220625332557, "mean_pred_prob_last_50": 0.07568782685848419, "mean_token_accuracy": 0.8773816823959351, "step": 26020 }, { "epoch": 0.4627308765754715, "grad_norm": 2.080769023763766, "learning_rate": 0.0001, "loss": 0.6849, "mean_abs_error": 412.6508295491825, "mean_abs_error_last_10": 128.8610901847943, "mean_abs_error_last_25": 172.23083597261274, "mean_abs_error_last_50": 260.03711254842256, "mean_pred_prob": 0.043270262499572706, "mean_pred_prob_last_10": 0.23223550572292878, "mean_pred_prob_last_25": 0.12485587349510752, "mean_pred_prob_last_50": 0.07454862025915646, "mean_token_accuracy": 0.8781784653663636, "step": 26030 }, { "epoch": 0.46290864487227346, "grad_norm": 2.8952268338453004, "learning_rate": 0.0001, "loss": 0.8693, "mean_abs_error": 1250.01220758682, "mean_abs_error_last_10": 491.1683233322231, "mean_abs_error_last_25": 490.0908401408743, "mean_abs_error_last_50": 696.4878232687423, "mean_pred_prob": 0.017576311124139467, "mean_pred_prob_last_10": 0.09524668605299666, "mean_pred_prob_last_25": 0.04897305594640784, "mean_pred_prob_last_50": 0.028888979548355564, "mean_token_accuracy": 0.866573566198349, "step": 26040 }, { "epoch": 0.46308641316907545, "grad_norm": 1.6571948996224257, "learning_rate": 0.0001, "loss": 1.0151, "mean_abs_error": 559.5943617261435, "mean_abs_error_last_10": 237.92351686001498, "mean_abs_error_last_25": 301.12850289483015, "mean_abs_error_last_50": 402.6773884331726, "mean_pred_prob": 0.03830341639404651, "mean_pred_prob_last_10": 0.19671897051739506, "mean_pred_prob_last_25": 0.10577005809172987, "mean_pred_prob_last_50": 0.06407461216440424, "mean_token_accuracy": 0.8712512731552124, "step": 26050 }, { "epoch": 0.4632641814658774, "grad_norm": 1.7904893675770182, "learning_rate": 0.0001, "loss": 0.8278, "mean_abs_error": 1674.9237534087206, "mean_abs_error_last_10": 993.7684653335937, "mean_abs_error_last_25": 954.9149623111509, "mean_abs_error_last_50": 1123.0763031021422, "mean_pred_prob": 0.018841361252998467, "mean_pred_prob_last_10": 0.09560274687974016, "mean_pred_prob_last_25": 0.051788344582018905, "mean_pred_prob_last_50": 0.03160260366857983, "mean_token_accuracy": 0.8658212304115296, "step": 26060 }, { "epoch": 0.46344194976267933, "grad_norm": 1.2363999970828912, "learning_rate": 0.0001, "loss": 0.8077, "mean_abs_error": 298.13778441647594, "mean_abs_error_last_10": 191.4849656920521, "mean_abs_error_last_25": 245.03125132252845, "mean_abs_error_last_50": 257.28106705249047, "mean_pred_prob": 0.027830500225536525, "mean_pred_prob_last_10": 0.12604955900460482, "mean_pred_prob_last_25": 0.07120642736554146, "mean_pred_prob_last_50": 0.0447636691853404, "mean_token_accuracy": 0.867827194929123, "step": 26070 }, { "epoch": 0.46361971805948127, "grad_norm": 1.137148109560872, "learning_rate": 0.0001, "loss": 0.7447, "mean_abs_error": 568.1408852725465, "mean_abs_error_last_10": 116.30242455764194, "mean_abs_error_last_25": 234.44538466693407, "mean_abs_error_last_50": 366.1057674717602, "mean_pred_prob": 0.02767363387392834, "mean_pred_prob_last_10": 0.13077059788629414, "mean_pred_prob_last_25": 0.07060925015248358, "mean_pred_prob_last_50": 0.0445626060012728, "mean_token_accuracy": 0.8664466321468354, "step": 26080 }, { "epoch": 0.4637974863562832, "grad_norm": 2.390327943981704, "learning_rate": 0.0001, "loss": 0.7402, "mean_abs_error": 390.63185587273915, "mean_abs_error_last_10": 248.21707289258106, "mean_abs_error_last_25": 290.87805076272815, "mean_abs_error_last_50": 290.67399801891963, "mean_pred_prob": 0.03735756349051371, "mean_pred_prob_last_10": 0.18308180095627904, "mean_pred_prob_last_25": 0.10237397877499461, "mean_pred_prob_last_50": 0.06177986433031037, "mean_token_accuracy": 0.8705342829227447, "step": 26090 }, { "epoch": 0.46397525465308515, "grad_norm": 1.4920601408830734, "learning_rate": 0.0001, "loss": 0.7292, "mean_abs_error": 867.6845260412938, "mean_abs_error_last_10": 491.97338590239895, "mean_abs_error_last_25": 583.7063364788471, "mean_abs_error_last_50": 675.1954743489064, "mean_pred_prob": 0.037051268521463496, "mean_pred_prob_last_10": 0.1669135618605651, "mean_pred_prob_last_25": 0.09967676957603544, "mean_pred_prob_last_50": 0.06217868651438039, "mean_token_accuracy": 0.8748473286628723, "step": 26100 }, { "epoch": 0.46415302294988714, "grad_norm": 1.087936811811299, "learning_rate": 0.0001, "loss": 0.7851, "mean_abs_error": 295.2461841450007, "mean_abs_error_last_10": 81.11189071358042, "mean_abs_error_last_25": 106.03733972121651, "mean_abs_error_last_50": 175.765870446272, "mean_pred_prob": 0.03778819977305829, "mean_pred_prob_last_10": 0.19353682417422532, "mean_pred_prob_last_25": 0.104653865005821, "mean_pred_prob_last_50": 0.06283213156275451, "mean_token_accuracy": 0.8666405737400055, "step": 26110 }, { "epoch": 0.4643307912466891, "grad_norm": 2.1729575622788184, "learning_rate": 0.0001, "loss": 0.8089, "mean_abs_error": 291.85316255562634, "mean_abs_error_last_10": 151.95922635500546, "mean_abs_error_last_25": 197.79215287153602, "mean_abs_error_last_50": 211.60314747626407, "mean_pred_prob": 0.03660791467409581, "mean_pred_prob_last_10": 0.18625299390405417, "mean_pred_prob_last_25": 0.10165543332695962, "mean_pred_prob_last_50": 0.06218764306977391, "mean_token_accuracy": 0.8678768396377563, "step": 26120 }, { "epoch": 0.464508559543491, "grad_norm": 2.631281417622155, "learning_rate": 0.0001, "loss": 0.6837, "mean_abs_error": 791.2933908147088, "mean_abs_error_last_10": 262.9550631522082, "mean_abs_error_last_25": 329.0234067007528, "mean_abs_error_last_50": 448.5542578424447, "mean_pred_prob": 0.048967227051616644, "mean_pred_prob_last_10": 0.22346058844123035, "mean_pred_prob_last_25": 0.1315722258470487, "mean_pred_prob_last_50": 0.08181906881509349, "mean_token_accuracy": 0.8769092261791229, "step": 26130 }, { "epoch": 0.46468632784029296, "grad_norm": 1.318904084723387, "learning_rate": 0.0001, "loss": 0.9025, "mean_abs_error": 1968.7568417692678, "mean_abs_error_last_10": 1144.1365284044718, "mean_abs_error_last_25": 1258.2951834971886, "mean_abs_error_last_50": 1498.9483452706959, "mean_pred_prob": 0.01818502159876516, "mean_pred_prob_last_10": 0.095590421257657, "mean_pred_prob_last_25": 0.05116597015730804, "mean_pred_prob_last_50": 0.030704936299298425, "mean_token_accuracy": 0.8675463974475861, "step": 26140 }, { "epoch": 0.4648640961370949, "grad_norm": 0.8407958893377228, "learning_rate": 0.0001, "loss": 0.8315, "mean_abs_error": 615.782643889479, "mean_abs_error_last_10": 175.05370968230164, "mean_abs_error_last_25": 279.9646316798713, "mean_abs_error_last_50": 439.98521409033003, "mean_pred_prob": 0.028611124178860338, "mean_pred_prob_last_10": 0.14955091120209546, "mean_pred_prob_last_25": 0.08086798915173858, "mean_pred_prob_last_50": 0.04863498019403778, "mean_token_accuracy": 0.8596560776233673, "step": 26150 }, { "epoch": 0.46504186443389683, "grad_norm": 0.8518310458514633, "learning_rate": 0.0001, "loss": 0.7468, "mean_abs_error": 189.2624594407483, "mean_abs_error_last_10": 46.22855190845776, "mean_abs_error_last_25": 58.575359075890425, "mean_abs_error_last_50": 94.16513210355075, "mean_pred_prob": 0.04586648317053914, "mean_pred_prob_last_10": 0.20948068164288997, "mean_pred_prob_last_25": 0.12061796467751265, "mean_pred_prob_last_50": 0.07623016126453877, "mean_token_accuracy": 0.8852458238601685, "step": 26160 }, { "epoch": 0.46521963273069883, "grad_norm": 3.5594340086817766, "learning_rate": 0.0001, "loss": 0.761, "mean_abs_error": 1354.6583596312696, "mean_abs_error_last_10": 735.1507143823591, "mean_abs_error_last_25": 846.7959290641651, "mean_abs_error_last_50": 1026.2207538854561, "mean_pred_prob": 0.044566309438596366, "mean_pred_prob_last_10": 0.21638257503072963, "mean_pred_prob_last_25": 0.12273605501977727, "mean_pred_prob_last_50": 0.0748470910191827, "mean_token_accuracy": 0.8670527040958405, "step": 26170 }, { "epoch": 0.46539740102750077, "grad_norm": 2.400634935231208, "learning_rate": 0.0001, "loss": 0.7685, "mean_abs_error": 290.21003160968604, "mean_abs_error_last_10": 177.16277535465048, "mean_abs_error_last_25": 187.55551383501555, "mean_abs_error_last_50": 190.33332463070363, "mean_pred_prob": 0.047483927058056, "mean_pred_prob_last_10": 0.227635645121336, "mean_pred_prob_last_25": 0.12551967995241284, "mean_pred_prob_last_50": 0.07881014570593833, "mean_token_accuracy": 0.8734512805938721, "step": 26180 }, { "epoch": 0.4655751693243027, "grad_norm": 1.5095400316239602, "learning_rate": 0.0001, "loss": 0.7463, "mean_abs_error": 822.6640849415378, "mean_abs_error_last_10": 367.1473434383025, "mean_abs_error_last_25": 473.85983155533876, "mean_abs_error_last_50": 645.0303138170608, "mean_pred_prob": 0.041139693997683933, "mean_pred_prob_last_10": 0.19735382963554002, "mean_pred_prob_last_25": 0.10973651618114673, "mean_pred_prob_last_50": 0.06853010281920432, "mean_token_accuracy": 0.8818429708480835, "step": 26190 }, { "epoch": 0.46575293762110465, "grad_norm": 3.3309818005998793, "learning_rate": 0.0001, "loss": 0.7437, "mean_abs_error": 780.3526310982508, "mean_abs_error_last_10": 185.71442669911298, "mean_abs_error_last_25": 311.9394854126425, "mean_abs_error_last_50": 489.2147351706241, "mean_pred_prob": 0.029335407429607586, "mean_pred_prob_last_10": 0.14642097753239797, "mean_pred_prob_last_25": 0.08034470529528334, "mean_pred_prob_last_50": 0.048861594765912744, "mean_token_accuracy": 0.8630036056041718, "step": 26200 }, { "epoch": 0.4659307059179066, "grad_norm": 2.2909379660833498, "learning_rate": 0.0001, "loss": 0.7231, "mean_abs_error": 282.1884717408586, "mean_abs_error_last_10": 158.2177657270705, "mean_abs_error_last_25": 230.71071423936093, "mean_abs_error_last_50": 212.78132775813938, "mean_pred_prob": 0.03683684126008302, "mean_pred_prob_last_10": 0.2082206167280674, "mean_pred_prob_last_25": 0.11019850373268128, "mean_pred_prob_last_50": 0.0645239747595042, "mean_token_accuracy": 0.8726611018180848, "step": 26210 }, { "epoch": 0.4661084742147085, "grad_norm": 2.493984847463436, "learning_rate": 0.0001, "loss": 0.8432, "mean_abs_error": 1456.8529110759207, "mean_abs_error_last_10": 782.135208725778, "mean_abs_error_last_25": 887.7474058577834, "mean_abs_error_last_50": 1057.4110186626153, "mean_pred_prob": 0.01764928729608073, "mean_pred_prob_last_10": 0.09796909822907765, "mean_pred_prob_last_25": 0.04951848719574627, "mean_pred_prob_last_50": 0.029523259554844118, "mean_token_accuracy": 0.8679537296295166, "step": 26220 }, { "epoch": 0.4662862425115105, "grad_norm": 1.0676096680408294, "learning_rate": 0.0001, "loss": 0.8236, "mean_abs_error": 781.0369125405061, "mean_abs_error_last_10": 334.9972845817564, "mean_abs_error_last_25": 387.20911004652095, "mean_abs_error_last_50": 527.9373445287164, "mean_pred_prob": 0.053878891875501725, "mean_pred_prob_last_10": 0.23612094687414356, "mean_pred_prob_last_25": 0.13792704063816927, "mean_pred_prob_last_50": 0.0870817481627455, "mean_token_accuracy": 0.8588743269443512, "step": 26230 }, { "epoch": 0.46646401080831246, "grad_norm": 1.3153506263594497, "learning_rate": 0.0001, "loss": 0.7468, "mean_abs_error": 287.2102600848528, "mean_abs_error_last_10": 111.8991063739442, "mean_abs_error_last_25": 179.35153044512782, "mean_abs_error_last_50": 210.22831336045084, "mean_pred_prob": 0.043076741485856475, "mean_pred_prob_last_10": 0.21988831097260117, "mean_pred_prob_last_25": 0.11869733144994825, "mean_pred_prob_last_50": 0.07263769165147096, "mean_token_accuracy": 0.875569862127304, "step": 26240 }, { "epoch": 0.4666417791051144, "grad_norm": 0.9942213508526415, "learning_rate": 0.0001, "loss": 0.6965, "mean_abs_error": 238.69631307262233, "mean_abs_error_last_10": 37.79445637089166, "mean_abs_error_last_25": 114.82107173005917, "mean_abs_error_last_50": 145.0635522234556, "mean_pred_prob": 0.06060377666726709, "mean_pred_prob_last_10": 0.23891563005745411, "mean_pred_prob_last_25": 0.1543758587911725, "mean_pred_prob_last_50": 0.0978621649555862, "mean_token_accuracy": 0.8832648813724517, "step": 26250 }, { "epoch": 0.46681954740191633, "grad_norm": 1.3206295567445445, "learning_rate": 0.0001, "loss": 0.7783, "mean_abs_error": 208.6359252126436, "mean_abs_error_last_10": 69.95409975129647, "mean_abs_error_last_25": 75.44589281934331, "mean_abs_error_last_50": 114.80514015095116, "mean_pred_prob": 0.0459651401732117, "mean_pred_prob_last_10": 0.21843310445547104, "mean_pred_prob_last_25": 0.12608490511775017, "mean_pred_prob_last_50": 0.07776235211640596, "mean_token_accuracy": 0.8727248311042786, "step": 26260 }, { "epoch": 0.4669973156987183, "grad_norm": 1.2390302710025334, "learning_rate": 0.0001, "loss": 0.7693, "mean_abs_error": 100.5393314068951, "mean_abs_error_last_10": 13.636656360122862, "mean_abs_error_last_25": 25.525896618668632, "mean_abs_error_last_50": 49.81064392146272, "mean_pred_prob": 0.06662194039672613, "mean_pred_prob_last_10": 0.31204778850078585, "mean_pred_prob_last_25": 0.18513414822518826, "mean_pred_prob_last_50": 0.11299531552940607, "mean_token_accuracy": 0.8782989203929901, "step": 26270 }, { "epoch": 0.4671750839955202, "grad_norm": 1.5378098401639344, "learning_rate": 0.0001, "loss": 0.69, "mean_abs_error": 626.6429371110951, "mean_abs_error_last_10": 248.72105168183685, "mean_abs_error_last_25": 260.83837931294755, "mean_abs_error_last_50": 366.10565491760735, "mean_pred_prob": 0.027627382933860644, "mean_pred_prob_last_10": 0.14849042303394527, "mean_pred_prob_last_25": 0.07664386835531331, "mean_pred_prob_last_50": 0.047710058017401025, "mean_token_accuracy": 0.879205048084259, "step": 26280 }, { "epoch": 0.4673528522923222, "grad_norm": 1.0844842948807698, "learning_rate": 0.0001, "loss": 0.8131, "mean_abs_error": 531.2863508820582, "mean_abs_error_last_10": 237.30696300292766, "mean_abs_error_last_25": 264.357841419798, "mean_abs_error_last_50": 367.7096677082605, "mean_pred_prob": 0.03702974568586796, "mean_pred_prob_last_10": 0.1791172847035341, "mean_pred_prob_last_25": 0.10207691692048684, "mean_pred_prob_last_50": 0.06244114955770783, "mean_token_accuracy": 0.867564034461975, "step": 26290 }, { "epoch": 0.46753062058912415, "grad_norm": 1.9313640391647715, "learning_rate": 0.0001, "loss": 0.8817, "mean_abs_error": 999.851813088141, "mean_abs_error_last_10": 423.15291269791226, "mean_abs_error_last_25": 596.5090344364769, "mean_abs_error_last_50": 734.8343995204033, "mean_pred_prob": 0.015563341410597786, "mean_pred_prob_last_10": 0.09293790793744847, "mean_pred_prob_last_25": 0.04619979853741825, "mean_pred_prob_last_50": 0.026800348120741547, "mean_token_accuracy": 0.8643340528011322, "step": 26300 }, { "epoch": 0.4677083888859261, "grad_norm": 1.1832271908927738, "learning_rate": 0.0001, "loss": 0.9163, "mean_abs_error": 582.6045588306083, "mean_abs_error_last_10": 234.71364309932156, "mean_abs_error_last_25": 306.8983926961013, "mean_abs_error_last_50": 423.66337157587367, "mean_pred_prob": 0.017617002001497895, "mean_pred_prob_last_10": 0.09869768847711384, "mean_pred_prob_last_25": 0.05117859252495691, "mean_pred_prob_last_50": 0.03033333799103275, "mean_token_accuracy": 0.8728001415729523, "step": 26310 }, { "epoch": 0.467886157182728, "grad_norm": 1.1797000231956045, "learning_rate": 0.0001, "loss": 0.7833, "mean_abs_error": 992.272245147585, "mean_abs_error_last_10": 377.6354358976004, "mean_abs_error_last_25": 432.9242302971112, "mean_abs_error_last_50": 630.3840738992743, "mean_pred_prob": 0.03904362098837737, "mean_pred_prob_last_10": 0.19278125278069638, "mean_pred_prob_last_25": 0.10594830276386347, "mean_pred_prob_last_50": 0.06447021780186332, "mean_token_accuracy": 0.8684523165225982, "step": 26320 }, { "epoch": 0.46806392547952996, "grad_norm": 0.944926946164192, "learning_rate": 0.0001, "loss": 0.8466, "mean_abs_error": 267.6326672434083, "mean_abs_error_last_10": 55.930708070138486, "mean_abs_error_last_25": 83.79821351009541, "mean_abs_error_last_50": 186.06581697395725, "mean_pred_prob": 0.05102030076086521, "mean_pred_prob_last_10": 0.2539638590067625, "mean_pred_prob_last_25": 0.1439304741099477, "mean_pred_prob_last_50": 0.08674261942505837, "mean_token_accuracy": 0.873583847284317, "step": 26330 }, { "epoch": 0.4682416937763319, "grad_norm": 1.3841778373541336, "learning_rate": 0.0001, "loss": 0.7574, "mean_abs_error": 591.956659201003, "mean_abs_error_last_10": 123.97412181369182, "mean_abs_error_last_25": 163.26811920330044, "mean_abs_error_last_50": 324.12744302534156, "mean_pred_prob": 0.025799415935762227, "mean_pred_prob_last_10": 0.13245953489094972, "mean_pred_prob_last_25": 0.07280130507424473, "mean_pred_prob_last_50": 0.043307809112593534, "mean_token_accuracy": 0.8721960186958313, "step": 26340 }, { "epoch": 0.4684194620731339, "grad_norm": 3.101596899309637, "learning_rate": 0.0001, "loss": 0.7355, "mean_abs_error": 756.013186016255, "mean_abs_error_last_10": 411.94005249222363, "mean_abs_error_last_25": 418.92831284007104, "mean_abs_error_last_50": 502.2129330709321, "mean_pred_prob": 0.04262921984191052, "mean_pred_prob_last_10": 0.1951388194138417, "mean_pred_prob_last_25": 0.11260362025932409, "mean_pred_prob_last_50": 0.07023528923746199, "mean_token_accuracy": 0.8749839663505554, "step": 26350 }, { "epoch": 0.46859723036993584, "grad_norm": 0.8646182921035631, "learning_rate": 0.0001, "loss": 0.8176, "mean_abs_error": 1166.993749174778, "mean_abs_error_last_10": 648.3817375245674, "mean_abs_error_last_25": 787.1119270080363, "mean_abs_error_last_50": 900.933933554062, "mean_pred_prob": 0.02340822138794465, "mean_pred_prob_last_10": 0.1303951001056703, "mean_pred_prob_last_25": 0.06801040595746599, "mean_pred_prob_last_50": 0.04037744438246591, "mean_token_accuracy": 0.8608652770519256, "step": 26360 }, { "epoch": 0.4687749986667378, "grad_norm": 1.109616584616582, "learning_rate": 0.0001, "loss": 0.6189, "mean_abs_error": 1493.6210293597974, "mean_abs_error_last_10": 398.4986459211761, "mean_abs_error_last_25": 569.1651748488059, "mean_abs_error_last_50": 893.8921723330972, "mean_pred_prob": 0.021334067429415883, "mean_pred_prob_last_10": 0.11170759074157104, "mean_pred_prob_last_25": 0.060056687134783716, "mean_pred_prob_last_50": 0.03582375274272635, "mean_token_accuracy": 0.884828382730484, "step": 26370 }, { "epoch": 0.4689527669635397, "grad_norm": 1.6530059959895507, "learning_rate": 0.0001, "loss": 0.8295, "mean_abs_error": 277.3730535838825, "mean_abs_error_last_10": 67.44685428946815, "mean_abs_error_last_25": 84.28408929912516, "mean_abs_error_last_50": 145.32247549974244, "mean_pred_prob": 0.03824316626414657, "mean_pred_prob_last_10": 0.1976981706917286, "mean_pred_prob_last_25": 0.10107731567695737, "mean_pred_prob_last_50": 0.061501822201535106, "mean_token_accuracy": 0.8680337846279145, "step": 26380 }, { "epoch": 0.46913053526034165, "grad_norm": 1.5963416204787562, "learning_rate": 0.0001, "loss": 0.8143, "mean_abs_error": 690.0566620987026, "mean_abs_error_last_10": 294.8372595435892, "mean_abs_error_last_25": 333.69742376058343, "mean_abs_error_last_50": 452.3386039001595, "mean_pred_prob": 0.031418004090664906, "mean_pred_prob_last_10": 0.1791199490253348, "mean_pred_prob_last_25": 0.09448985580238514, "mean_pred_prob_last_50": 0.05476292543462478, "mean_token_accuracy": 0.8710807859897614, "step": 26390 }, { "epoch": 0.4693083035571436, "grad_norm": 1.28121623024828, "learning_rate": 0.0001, "loss": 0.6639, "mean_abs_error": 661.8039084210784, "mean_abs_error_last_10": 260.4751001319811, "mean_abs_error_last_25": 346.38083822875325, "mean_abs_error_last_50": 420.1610704280955, "mean_pred_prob": 0.03125924224150367, "mean_pred_prob_last_10": 0.14949724366888403, "mean_pred_prob_last_25": 0.08260142345679924, "mean_pred_prob_last_50": 0.051611779897939414, "mean_token_accuracy": 0.8693563222885132, "step": 26400 }, { "epoch": 0.4694860718539456, "grad_norm": 1.7412824717653896, "learning_rate": 0.0001, "loss": 0.8567, "mean_abs_error": 415.7980383166443, "mean_abs_error_last_10": 103.17951867076552, "mean_abs_error_last_25": 146.49118407720078, "mean_abs_error_last_50": 234.71186486362086, "mean_pred_prob": 0.0414344932185486, "mean_pred_prob_last_10": 0.20665431089000777, "mean_pred_prob_last_25": 0.11111430580494926, "mean_pred_prob_last_50": 0.06907075026538223, "mean_token_accuracy": 0.8763996601104737, "step": 26410 }, { "epoch": 0.4696638401507475, "grad_norm": 1.7952977761137787, "learning_rate": 0.0001, "loss": 0.7284, "mean_abs_error": 746.082708338429, "mean_abs_error_last_10": 291.79387649214317, "mean_abs_error_last_25": 344.2457932041185, "mean_abs_error_last_50": 479.6627665429766, "mean_pred_prob": 0.029755917360307648, "mean_pred_prob_last_10": 0.1341819405206479, "mean_pred_prob_last_25": 0.07661413118476049, "mean_pred_prob_last_50": 0.04878592718741857, "mean_token_accuracy": 0.8766875445842743, "step": 26420 }, { "epoch": 0.46984160844754946, "grad_norm": 1.979278334724962, "learning_rate": 0.0001, "loss": 0.7472, "mean_abs_error": 264.0053773474657, "mean_abs_error_last_10": 66.09894678802834, "mean_abs_error_last_25": 121.07725543662075, "mean_abs_error_last_50": 168.8733337831109, "mean_pred_prob": 0.043949639145284894, "mean_pred_prob_last_10": 0.200281573086977, "mean_pred_prob_last_25": 0.11362664457410573, "mean_pred_prob_last_50": 0.07239343635737897, "mean_token_accuracy": 0.8733612477779389, "step": 26430 }, { "epoch": 0.4700193767443514, "grad_norm": 0.8298555958913358, "learning_rate": 0.0001, "loss": 0.7093, "mean_abs_error": 189.7442554366548, "mean_abs_error_last_10": 69.88669335681223, "mean_abs_error_last_25": 83.10089479591645, "mean_abs_error_last_50": 108.4673235821386, "mean_pred_prob": 0.05173857714980841, "mean_pred_prob_last_10": 0.2430382791906595, "mean_pred_prob_last_25": 0.14115063641220332, "mean_pred_prob_last_50": 0.08712530732154847, "mean_token_accuracy": 0.8800803124904633, "step": 26440 }, { "epoch": 0.47019714504115334, "grad_norm": 2.1277453066051812, "learning_rate": 0.0001, "loss": 0.8212, "mean_abs_error": 1245.9254070183465, "mean_abs_error_last_10": 765.5059604145197, "mean_abs_error_last_25": 849.470530896711, "mean_abs_error_last_50": 999.6515599625861, "mean_pred_prob": 0.03346092096762732, "mean_pred_prob_last_10": 0.15918178514693865, "mean_pred_prob_last_25": 0.09049844812252558, "mean_pred_prob_last_50": 0.05614117810764583, "mean_token_accuracy": 0.8769463896751404, "step": 26450 }, { "epoch": 0.47037491333795534, "grad_norm": 1.2668728141327452, "learning_rate": 0.0001, "loss": 0.7451, "mean_abs_error": 114.26429386179962, "mean_abs_error_last_10": 26.178242989963298, "mean_abs_error_last_25": 41.756982746098764, "mean_abs_error_last_50": 66.04803790564732, "mean_pred_prob": 0.053530057054013015, "mean_pred_prob_last_10": 0.2836767606437206, "mean_pred_prob_last_25": 0.1547701995819807, "mean_pred_prob_last_50": 0.0930010223761201, "mean_token_accuracy": 0.8778634369373322, "step": 26460 }, { "epoch": 0.4705526816347573, "grad_norm": 1.481525797074716, "learning_rate": 0.0001, "loss": 0.7813, "mean_abs_error": 1359.6675482265255, "mean_abs_error_last_10": 749.210925498207, "mean_abs_error_last_25": 830.2470687467345, "mean_abs_error_last_50": 980.7457589955441, "mean_pred_prob": 0.024164950755948666, "mean_pred_prob_last_10": 0.12792755792761454, "mean_pred_prob_last_25": 0.06959712004463654, "mean_pred_prob_last_50": 0.041460913632181476, "mean_token_accuracy": 0.8706027328968048, "step": 26470 }, { "epoch": 0.4707304499315592, "grad_norm": 1.6749262966927907, "learning_rate": 0.0001, "loss": 0.7547, "mean_abs_error": 244.9528737614187, "mean_abs_error_last_10": 73.16651740246014, "mean_abs_error_last_25": 113.1533582295713, "mean_abs_error_last_50": 177.29290849139434, "mean_pred_prob": 0.0626352405641228, "mean_pred_prob_last_10": 0.2755643635988235, "mean_pred_prob_last_25": 0.16554897893220186, "mean_pred_prob_last_50": 0.10270932046696543, "mean_token_accuracy": 0.8727596044540405, "step": 26480 }, { "epoch": 0.47090821822836115, "grad_norm": 1.779076742374962, "learning_rate": 0.0001, "loss": 0.7253, "mean_abs_error": 549.0930774633722, "mean_abs_error_last_10": 211.44101497558208, "mean_abs_error_last_25": 261.48929868144893, "mean_abs_error_last_50": 322.3110928657252, "mean_pred_prob": 0.02948904451332055, "mean_pred_prob_last_10": 0.15589130701264367, "mean_pred_prob_last_25": 0.084900296991691, "mean_pred_prob_last_50": 0.050967347429832444, "mean_token_accuracy": 0.8706844508647918, "step": 26490 }, { "epoch": 0.4710859865251631, "grad_norm": 1.3650591596967396, "learning_rate": 0.0001, "loss": 0.8041, "mean_abs_error": 227.7503435150138, "mean_abs_error_last_10": 98.44940453877757, "mean_abs_error_last_25": 131.40415296558695, "mean_abs_error_last_50": 178.9955006336441, "mean_pred_prob": 0.05970921700354666, "mean_pred_prob_last_10": 0.26839490607380867, "mean_pred_prob_last_25": 0.16214548209682106, "mean_pred_prob_last_50": 0.10064045023173093, "mean_token_accuracy": 0.8684606730937958, "step": 26500 }, { "epoch": 0.47126375482196503, "grad_norm": 1.2343259381623979, "learning_rate": 0.0001, "loss": 0.7206, "mean_abs_error": 530.9586625301466, "mean_abs_error_last_10": 158.00049358120083, "mean_abs_error_last_25": 232.4892051723924, "mean_abs_error_last_50": 336.40504886671386, "mean_pred_prob": 0.030637259728973732, "mean_pred_prob_last_10": 0.1605178705183789, "mean_pred_prob_last_25": 0.08716546524665318, "mean_pred_prob_last_50": 0.05241722704959102, "mean_token_accuracy": 0.8675938189029694, "step": 26510 }, { "epoch": 0.471441523118767, "grad_norm": 2.255483379801555, "learning_rate": 0.0001, "loss": 0.7637, "mean_abs_error": 120.61092302199559, "mean_abs_error_last_10": 30.77804583980779, "mean_abs_error_last_25": 57.03671238747571, "mean_abs_error_last_50": 79.76147703136218, "mean_pred_prob": 0.05550549728795886, "mean_pred_prob_last_10": 0.26382071897387505, "mean_pred_prob_last_25": 0.14974562786519527, "mean_pred_prob_last_50": 0.0923878476023674, "mean_token_accuracy": 0.8659635663032532, "step": 26520 }, { "epoch": 0.47161929141556896, "grad_norm": 2.283086831685497, "learning_rate": 0.0001, "loss": 0.7548, "mean_abs_error": 499.4359224992976, "mean_abs_error_last_10": 142.58849209515517, "mean_abs_error_last_25": 224.33881340835788, "mean_abs_error_last_50": 336.81172099557915, "mean_pred_prob": 0.06557745181489735, "mean_pred_prob_last_10": 0.3030953160021454, "mean_pred_prob_last_25": 0.1803114460024517, "mean_pred_prob_last_50": 0.11061776625574567, "mean_token_accuracy": 0.8736365497112274, "step": 26530 }, { "epoch": 0.4717970597123709, "grad_norm": 1.7100646520643035, "learning_rate": 0.0001, "loss": 0.76, "mean_abs_error": 1048.1298865214223, "mean_abs_error_last_10": 441.4427602434177, "mean_abs_error_last_25": 563.5257728864236, "mean_abs_error_last_50": 778.9234506885844, "mean_pred_prob": 0.039084352293866684, "mean_pred_prob_last_10": 0.18435699516558088, "mean_pred_prob_last_25": 0.10399559687939472, "mean_pred_prob_last_50": 0.0646776372101158, "mean_token_accuracy": 0.873412013053894, "step": 26540 }, { "epoch": 0.47197482800917284, "grad_norm": 1.533282591215158, "learning_rate": 0.0001, "loss": 0.8624, "mean_abs_error": 287.1118478652196, "mean_abs_error_last_10": 155.93221078526022, "mean_abs_error_last_25": 149.44945136884536, "mean_abs_error_last_50": 192.3245019171085, "mean_pred_prob": 0.05064615621231496, "mean_pred_prob_last_10": 0.24077741554938256, "mean_pred_prob_last_25": 0.139393976284191, "mean_pred_prob_last_50": 0.08478003774071112, "mean_token_accuracy": 0.8696069300174714, "step": 26550 }, { "epoch": 0.4721525963059748, "grad_norm": 1.223160765262998, "learning_rate": 0.0001, "loss": 0.6981, "mean_abs_error": 246.85716184467907, "mean_abs_error_last_10": 175.38204952982636, "mean_abs_error_last_25": 263.2248682393603, "mean_abs_error_last_50": 252.82309122957304, "mean_pred_prob": 0.037374525074847044, "mean_pred_prob_last_10": 0.1928621746599674, "mean_pred_prob_last_25": 0.10385765684768558, "mean_pred_prob_last_50": 0.06324180448427796, "mean_token_accuracy": 0.877165412902832, "step": 26560 }, { "epoch": 0.4723303646027767, "grad_norm": 1.2578938437103053, "learning_rate": 0.0001, "loss": 0.8015, "mean_abs_error": 259.000017376648, "mean_abs_error_last_10": 68.8223687594699, "mean_abs_error_last_25": 74.52816192211745, "mean_abs_error_last_50": 114.1609490114229, "mean_pred_prob": 0.04879408224951476, "mean_pred_prob_last_10": 0.22580408342182637, "mean_pred_prob_last_25": 0.13341617630794644, "mean_pred_prob_last_50": 0.08178023686632514, "mean_token_accuracy": 0.8664313971996307, "step": 26570 }, { "epoch": 0.4725081328995787, "grad_norm": 3.5957881864985883, "learning_rate": 0.0001, "loss": 0.8131, "mean_abs_error": 681.7814470800355, "mean_abs_error_last_10": 318.11377166603626, "mean_abs_error_last_25": 397.7402966994344, "mean_abs_error_last_50": 511.5820139236601, "mean_pred_prob": 0.02611588689906057, "mean_pred_prob_last_10": 0.14619522301363758, "mean_pred_prob_last_25": 0.07761529501876793, "mean_pred_prob_last_50": 0.04507339100819081, "mean_token_accuracy": 0.8742624402046204, "step": 26580 }, { "epoch": 0.47268590119638065, "grad_norm": 2.4147865443815153, "learning_rate": 0.0001, "loss": 0.812, "mean_abs_error": 130.03783070639855, "mean_abs_error_last_10": 88.93561600209962, "mean_abs_error_last_25": 89.6458727989864, "mean_abs_error_last_50": 100.03511804539927, "mean_pred_prob": 0.045019431179389356, "mean_pred_prob_last_10": 0.2279921744018793, "mean_pred_prob_last_25": 0.12351707983762025, "mean_pred_prob_last_50": 0.07533516902476549, "mean_token_accuracy": 0.8613084077835083, "step": 26590 }, { "epoch": 0.4728636694931826, "grad_norm": 2.59063711550807, "learning_rate": 0.0001, "loss": 0.8387, "mean_abs_error": 920.8266340885451, "mean_abs_error_last_10": 486.74531674222607, "mean_abs_error_last_25": 540.631101038851, "mean_abs_error_last_50": 638.9610125562274, "mean_pred_prob": 0.04382154305931181, "mean_pred_prob_last_10": 0.20785639139940032, "mean_pred_prob_last_25": 0.11742481076798868, "mean_pred_prob_last_50": 0.07227016823890153, "mean_token_accuracy": 0.8879217684268952, "step": 26600 }, { "epoch": 0.47304143778998453, "grad_norm": 1.4322051546671426, "learning_rate": 0.0001, "loss": 0.6816, "mean_abs_error": 299.97663812677564, "mean_abs_error_last_10": 128.31301118566728, "mean_abs_error_last_25": 161.97688822521917, "mean_abs_error_last_50": 192.14499046495607, "mean_pred_prob": 0.04399365852586925, "mean_pred_prob_last_10": 0.20128856953233482, "mean_pred_prob_last_25": 0.11744213085621595, "mean_pred_prob_last_50": 0.07360663148574531, "mean_token_accuracy": 0.8751517117023468, "step": 26610 }, { "epoch": 0.47321920608678647, "grad_norm": 0.9763629817239748, "learning_rate": 0.0001, "loss": 0.7487, "mean_abs_error": 744.3750586418502, "mean_abs_error_last_10": 330.52601790157166, "mean_abs_error_last_25": 387.4177002069584, "mean_abs_error_last_50": 497.6604407508668, "mean_pred_prob": 0.039136329089524226, "mean_pred_prob_last_10": 0.1831528947863262, "mean_pred_prob_last_25": 0.10615087005426176, "mean_pred_prob_last_50": 0.065934222121723, "mean_token_accuracy": 0.8683833241462707, "step": 26620 }, { "epoch": 0.4733969743835884, "grad_norm": 1.4589665375713259, "learning_rate": 0.0001, "loss": 0.8164, "mean_abs_error": 707.8411514425475, "mean_abs_error_last_10": 158.67762685787926, "mean_abs_error_last_25": 222.1005698262194, "mean_abs_error_last_50": 397.17259621146917, "mean_pred_prob": 0.03280200496665202, "mean_pred_prob_last_10": 0.17966822426533327, "mean_pred_prob_last_25": 0.09537766582798213, "mean_pred_prob_last_50": 0.05553215977270156, "mean_token_accuracy": 0.8710585355758667, "step": 26630 }, { "epoch": 0.4735747426803904, "grad_norm": 0.9593340934352063, "learning_rate": 0.0001, "loss": 0.6829, "mean_abs_error": 239.81907293671188, "mean_abs_error_last_10": 220.2221464085858, "mean_abs_error_last_25": 288.5372578601878, "mean_abs_error_last_50": 265.3847116239867, "mean_pred_prob": 0.04696145080961287, "mean_pred_prob_last_10": 0.24742561336606742, "mean_pred_prob_last_25": 0.12955677900463342, "mean_pred_prob_last_50": 0.07969735199585556, "mean_token_accuracy": 0.8790133357048034, "step": 26640 }, { "epoch": 0.47375251097719234, "grad_norm": 1.1748626711573842, "learning_rate": 0.0001, "loss": 0.7524, "mean_abs_error": 141.33370249485512, "mean_abs_error_last_10": 26.10830001154876, "mean_abs_error_last_25": 51.57679408187586, "mean_abs_error_last_50": 89.49074086396206, "mean_pred_prob": 0.06089092628099024, "mean_pred_prob_last_10": 0.28370135501027105, "mean_pred_prob_last_25": 0.15361693929880857, "mean_pred_prob_last_50": 0.09800740433856844, "mean_token_accuracy": 0.8740029811859131, "step": 26650 }, { "epoch": 0.4739302792739943, "grad_norm": 1.5647415381589551, "learning_rate": 0.0001, "loss": 0.7189, "mean_abs_error": 613.7601006662469, "mean_abs_error_last_10": 136.464019137815, "mean_abs_error_last_25": 156.99908265713697, "mean_abs_error_last_50": 263.0410475254543, "mean_pred_prob": 0.030124990525655447, "mean_pred_prob_last_10": 0.15515267197042704, "mean_pred_prob_last_25": 0.08381216246634722, "mean_pred_prob_last_50": 0.05128765911795199, "mean_token_accuracy": 0.8729426085948944, "step": 26660 }, { "epoch": 0.4741080475707962, "grad_norm": 2.843799409038232, "learning_rate": 0.0001, "loss": 0.7164, "mean_abs_error": 872.1454773342405, "mean_abs_error_last_10": 297.6924823864382, "mean_abs_error_last_25": 444.8630493969481, "mean_abs_error_last_50": 570.5131540686402, "mean_pred_prob": 0.03463280840951484, "mean_pred_prob_last_10": 0.1573404833499808, "mean_pred_prob_last_25": 0.0933872577676084, "mean_pred_prob_last_50": 0.058200762438355015, "mean_token_accuracy": 0.8775970101356506, "step": 26670 }, { "epoch": 0.47428581586759816, "grad_norm": 1.6571272573064604, "learning_rate": 0.0001, "loss": 0.7956, "mean_abs_error": 345.8869884943031, "mean_abs_error_last_10": 224.4541972112961, "mean_abs_error_last_25": 225.9313554433059, "mean_abs_error_last_50": 244.22115010067324, "mean_pred_prob": 0.04431579262018204, "mean_pred_prob_last_10": 0.21449954530689866, "mean_pred_prob_last_25": 0.12042944051790982, "mean_pred_prob_last_50": 0.07478339137742296, "mean_token_accuracy": 0.8691492915153504, "step": 26680 }, { "epoch": 0.4744635841644001, "grad_norm": 2.245464818558926, "learning_rate": 0.0001, "loss": 0.7094, "mean_abs_error": 535.8447267145714, "mean_abs_error_last_10": 160.89637081619048, "mean_abs_error_last_25": 204.53022269703547, "mean_abs_error_last_50": 316.17024886682213, "mean_pred_prob": 0.03309002029709518, "mean_pred_prob_last_10": 0.16009769048541783, "mean_pred_prob_last_25": 0.08999243900179862, "mean_pred_prob_last_50": 0.055484468769282105, "mean_token_accuracy": 0.8646587789058685, "step": 26690 }, { "epoch": 0.4746413524612021, "grad_norm": 1.2523044908016376, "learning_rate": 0.0001, "loss": 0.6728, "mean_abs_error": 308.69541579023337, "mean_abs_error_last_10": 166.9172395509204, "mean_abs_error_last_25": 154.76751856000695, "mean_abs_error_last_50": 203.7711532548007, "mean_pred_prob": 0.030439787544310093, "mean_pred_prob_last_10": 0.1461604258045554, "mean_pred_prob_last_25": 0.08213452417403459, "mean_pred_prob_last_50": 0.05070554153062403, "mean_token_accuracy": 0.8832308351993561, "step": 26700 }, { "epoch": 0.47481912075800403, "grad_norm": 1.0893572387441253, "learning_rate": 0.0001, "loss": 0.6533, "mean_abs_error": 540.3288104505543, "mean_abs_error_last_10": 164.3446999819285, "mean_abs_error_last_25": 249.40449856046516, "mean_abs_error_last_50": 351.6125241269491, "mean_pred_prob": 0.04995717274723575, "mean_pred_prob_last_10": 0.24129612988326699, "mean_pred_prob_last_25": 0.13549183348659427, "mean_pred_prob_last_50": 0.08402806571684777, "mean_token_accuracy": 0.8795343399047851, "step": 26710 }, { "epoch": 0.47499688905480597, "grad_norm": 1.6611619978393377, "learning_rate": 0.0001, "loss": 0.6857, "mean_abs_error": 782.3094816440508, "mean_abs_error_last_10": 457.3673933562187, "mean_abs_error_last_25": 483.35573095207354, "mean_abs_error_last_50": 570.376313610396, "mean_pred_prob": 0.043243338997126554, "mean_pred_prob_last_10": 0.2184290838689776, "mean_pred_prob_last_25": 0.1217741521919379, "mean_pred_prob_last_50": 0.07347548637771979, "mean_token_accuracy": 0.8820594906806946, "step": 26720 }, { "epoch": 0.4751746573516079, "grad_norm": 1.0105349318791172, "learning_rate": 0.0001, "loss": 0.65, "mean_abs_error": 352.29881535200605, "mean_abs_error_last_10": 136.07244909000104, "mean_abs_error_last_25": 179.80691506564546, "mean_abs_error_last_50": 217.3939396798327, "mean_pred_prob": 0.029861048207385464, "mean_pred_prob_last_10": 0.1526605678605847, "mean_pred_prob_last_25": 0.08146237846231089, "mean_pred_prob_last_50": 0.05026386281242594, "mean_token_accuracy": 0.8802177548408509, "step": 26730 }, { "epoch": 0.47535242564840985, "grad_norm": 2.393345833591952, "learning_rate": 0.0001, "loss": 0.7607, "mean_abs_error": 342.4304995542648, "mean_abs_error_last_10": 91.87646974797977, "mean_abs_error_last_25": 162.65481077650233, "mean_abs_error_last_50": 234.14569440091773, "mean_pred_prob": 0.031743338285014036, "mean_pred_prob_last_10": 0.17480500414967537, "mean_pred_prob_last_25": 0.0908951205201447, "mean_pred_prob_last_50": 0.053824020503088835, "mean_token_accuracy": 0.8702291369438171, "step": 26740 }, { "epoch": 0.4755301939452118, "grad_norm": 1.2140336604401754, "learning_rate": 0.0001, "loss": 0.7459, "mean_abs_error": 216.53600252818782, "mean_abs_error_last_10": 53.70997717610027, "mean_abs_error_last_25": 95.33547068508707, "mean_abs_error_last_50": 127.44124850706666, "mean_pred_prob": 0.05133465905673802, "mean_pred_prob_last_10": 0.23234672024846076, "mean_pred_prob_last_25": 0.13582641314715146, "mean_pred_prob_last_50": 0.08547130161896348, "mean_token_accuracy": 0.8722403168678283, "step": 26750 }, { "epoch": 0.4757079622420138, "grad_norm": 0.9818346613834107, "learning_rate": 0.0001, "loss": 0.7739, "mean_abs_error": 324.3698926607445, "mean_abs_error_last_10": 117.28503164737542, "mean_abs_error_last_25": 169.1390510679353, "mean_abs_error_last_50": 210.19963898814262, "mean_pred_prob": 0.02995755630545318, "mean_pred_prob_last_10": 0.13776532392948865, "mean_pred_prob_last_25": 0.0794211464934051, "mean_pred_prob_last_50": 0.04951022434979677, "mean_token_accuracy": 0.8644422292709351, "step": 26760 }, { "epoch": 0.4758857305388157, "grad_norm": 1.1169540207934787, "learning_rate": 0.0001, "loss": 0.7392, "mean_abs_error": 424.93912924299394, "mean_abs_error_last_10": 114.88773184579956, "mean_abs_error_last_25": 145.3275476952328, "mean_abs_error_last_50": 237.6113895024841, "mean_pred_prob": 0.045572329545393586, "mean_pred_prob_last_10": 0.23804016653448343, "mean_pred_prob_last_25": 0.13164028953760862, "mean_pred_prob_last_50": 0.07801996795460582, "mean_token_accuracy": 0.8644766569137573, "step": 26770 }, { "epoch": 0.47606349883561766, "grad_norm": 2.2156895642726346, "learning_rate": 0.0001, "loss": 0.8373, "mean_abs_error": 1409.932562558711, "mean_abs_error_last_10": 758.7104469347757, "mean_abs_error_last_25": 937.7221302581924, "mean_abs_error_last_50": 1104.7806166645782, "mean_pred_prob": 0.02012108964845538, "mean_pred_prob_last_10": 0.10739755549584515, "mean_pred_prob_last_25": 0.05916583623620682, "mean_pred_prob_last_50": 0.034774357083369976, "mean_token_accuracy": 0.8688163340091706, "step": 26780 }, { "epoch": 0.4762412671324196, "grad_norm": 2.295028608518373, "learning_rate": 0.0001, "loss": 0.8941, "mean_abs_error": 570.8455345055091, "mean_abs_error_last_10": 84.47283812479795, "mean_abs_error_last_25": 151.8061352398898, "mean_abs_error_last_50": 271.3673110351867, "mean_pred_prob": 0.034955368004739286, "mean_pred_prob_last_10": 0.18007728103548287, "mean_pred_prob_last_25": 0.09773571435362101, "mean_pred_prob_last_50": 0.059285628609359264, "mean_token_accuracy": 0.8644549727439881, "step": 26790 }, { "epoch": 0.47641903542922154, "grad_norm": 1.8400485347674322, "learning_rate": 0.0001, "loss": 0.7916, "mean_abs_error": 608.0066545214544, "mean_abs_error_last_10": 219.94471650616393, "mean_abs_error_last_25": 315.25550198368023, "mean_abs_error_last_50": 417.9698576669759, "mean_pred_prob": 0.051265199296176435, "mean_pred_prob_last_10": 0.2536368195724208, "mean_pred_prob_last_25": 0.1402202137571294, "mean_pred_prob_last_50": 0.08550511814828496, "mean_token_accuracy": 0.8809881925582885, "step": 26800 }, { "epoch": 0.4765968037260235, "grad_norm": 1.660348857400978, "learning_rate": 0.0001, "loss": 0.885, "mean_abs_error": 592.4396839058379, "mean_abs_error_last_10": 287.09863978087975, "mean_abs_error_last_25": 313.8739836852483, "mean_abs_error_last_50": 383.92539952042745, "mean_pred_prob": 0.03826787973521277, "mean_pred_prob_last_10": 0.17859715222730302, "mean_pred_prob_last_25": 0.10146514688385651, "mean_pred_prob_last_50": 0.06345386986504309, "mean_token_accuracy": 0.8702094078063964, "step": 26810 }, { "epoch": 0.47677457202282547, "grad_norm": 1.0251376590047283, "learning_rate": 0.0001, "loss": 0.7945, "mean_abs_error": 1126.2101905913692, "mean_abs_error_last_10": 567.9532227858083, "mean_abs_error_last_25": 687.0953376338883, "mean_abs_error_last_50": 866.2945762213897, "mean_pred_prob": 0.026371434023894837, "mean_pred_prob_last_10": 0.12263350999855901, "mean_pred_prob_last_25": 0.06837161151925102, "mean_pred_prob_last_50": 0.04342304860183503, "mean_token_accuracy": 0.8821183443069458, "step": 26820 }, { "epoch": 0.4769523403196274, "grad_norm": 1.0755335476890815, "learning_rate": 0.0001, "loss": 0.6947, "mean_abs_error": 1382.9341954629078, "mean_abs_error_last_10": 598.5852967277881, "mean_abs_error_last_25": 740.4766430792284, "mean_abs_error_last_50": 975.2285671485163, "mean_pred_prob": 0.02764876735309372, "mean_pred_prob_last_10": 0.12566964967991226, "mean_pred_prob_last_25": 0.07273767208098433, "mean_pred_prob_last_50": 0.045861129323020576, "mean_token_accuracy": 0.8782580316066741, "step": 26830 }, { "epoch": 0.47713010861642935, "grad_norm": 1.4633002189839035, "learning_rate": 0.0001, "loss": 0.7272, "mean_abs_error": 278.8292124036158, "mean_abs_error_last_10": 108.3239091576038, "mean_abs_error_last_25": 130.74419095562126, "mean_abs_error_last_50": 179.7332235658949, "mean_pred_prob": 0.040186098869889976, "mean_pred_prob_last_10": 0.1999830264598131, "mean_pred_prob_last_25": 0.11013414841145278, "mean_pred_prob_last_50": 0.06771117383614182, "mean_token_accuracy": 0.8805695414543152, "step": 26840 }, { "epoch": 0.4773078769132313, "grad_norm": 2.0641071978952206, "learning_rate": 0.0001, "loss": 0.7797, "mean_abs_error": 965.6948071908591, "mean_abs_error_last_10": 281.68079697186465, "mean_abs_error_last_25": 349.35197420983303, "mean_abs_error_last_50": 517.5521465276368, "mean_pred_prob": 0.030056835373397917, "mean_pred_prob_last_10": 0.15051283495267853, "mean_pred_prob_last_25": 0.07968667487148196, "mean_pred_prob_last_50": 0.04940097913495265, "mean_token_accuracy": 0.8686817646026611, "step": 26850 }, { "epoch": 0.4774856452100332, "grad_norm": 1.1465915462521308, "learning_rate": 0.0001, "loss": 0.6447, "mean_abs_error": 159.27587531243125, "mean_abs_error_last_10": 25.339471598304762, "mean_abs_error_last_25": 49.93810917748971, "mean_abs_error_last_50": 83.97082321739182, "mean_pred_prob": 0.053295342205092314, "mean_pred_prob_last_10": 0.27252480685710906, "mean_pred_prob_last_25": 0.1493886575102806, "mean_pred_prob_last_50": 0.09068197896704078, "mean_token_accuracy": 0.8889629721641541, "step": 26860 }, { "epoch": 0.47766341350683517, "grad_norm": 1.1956443525407712, "learning_rate": 0.0001, "loss": 0.6835, "mean_abs_error": 586.7364471457182, "mean_abs_error_last_10": 291.0332769970522, "mean_abs_error_last_25": 360.49982066109027, "mean_abs_error_last_50": 446.95627300667684, "mean_pred_prob": 0.04863801399769727, "mean_pred_prob_last_10": 0.2103777185198851, "mean_pred_prob_last_25": 0.12637027312302962, "mean_pred_prob_last_50": 0.08038049266324379, "mean_token_accuracy": 0.8780975878238678, "step": 26870 }, { "epoch": 0.47784118180363716, "grad_norm": 5.043512007251779, "learning_rate": 0.0001, "loss": 1.2622, "mean_abs_error": 500.65790055670004, "mean_abs_error_last_10": 124.14483548571029, "mean_abs_error_last_25": 161.66155333628822, "mean_abs_error_last_50": 231.3246557939995, "mean_pred_prob": 0.03437968431389891, "mean_pred_prob_last_10": 0.18149711682926864, "mean_pred_prob_last_25": 0.09757758461637422, "mean_pred_prob_last_50": 0.05863564638420939, "mean_token_accuracy": 0.8758970618247985, "step": 26880 }, { "epoch": 0.4780189501004391, "grad_norm": 1.8155757955110732, "learning_rate": 0.0001, "loss": 0.7609, "mean_abs_error": 1386.001843011648, "mean_abs_error_last_10": 795.5250885961048, "mean_abs_error_last_25": 864.2644158299848, "mean_abs_error_last_50": 992.493448395347, "mean_pred_prob": 0.038953009145188844, "mean_pred_prob_last_10": 0.18064539393526502, "mean_pred_prob_last_25": 0.10576499868329847, "mean_pred_prob_last_50": 0.0665132622641977, "mean_token_accuracy": 0.8686839044094086, "step": 26890 }, { "epoch": 0.47819671839724104, "grad_norm": 2.2112660118815373, "learning_rate": 0.0001, "loss": 0.7114, "mean_abs_error": 160.95298077070262, "mean_abs_error_last_10": 111.99696125583284, "mean_abs_error_last_25": 117.43350065022594, "mean_abs_error_last_50": 120.26867705163974, "mean_pred_prob": 0.0440714071970433, "mean_pred_prob_last_10": 0.2084257945418358, "mean_pred_prob_last_25": 0.11770002553239464, "mean_pred_prob_last_50": 0.07321383883245289, "mean_token_accuracy": 0.8788025081157684, "step": 26900 }, { "epoch": 0.478374486694043, "grad_norm": 1.0468045944327107, "learning_rate": 0.0001, "loss": 0.714, "mean_abs_error": 669.2874838899954, "mean_abs_error_last_10": 211.6377511937256, "mean_abs_error_last_25": 307.53819705613853, "mean_abs_error_last_50": 428.9011216373477, "mean_pred_prob": 0.03463072715385351, "mean_pred_prob_last_10": 0.15943620256148278, "mean_pred_prob_last_25": 0.09153381045325659, "mean_pred_prob_last_50": 0.05759607508662157, "mean_token_accuracy": 0.8800638437271118, "step": 26910 }, { "epoch": 0.4785522549908449, "grad_norm": 2.063289039173032, "learning_rate": 0.0001, "loss": 0.7876, "mean_abs_error": 743.3367364797745, "mean_abs_error_last_10": 281.5145519151546, "mean_abs_error_last_25": 339.56506869091766, "mean_abs_error_last_50": 397.417332749107, "mean_pred_prob": 0.03488040710799396, "mean_pred_prob_last_10": 0.16665293148253113, "mean_pred_prob_last_25": 0.09400460972683504, "mean_pred_prob_last_50": 0.05850609273184091, "mean_token_accuracy": 0.8738790214061737, "step": 26920 }, { "epoch": 0.47873002328764686, "grad_norm": 1.4877379816794472, "learning_rate": 0.0001, "loss": 0.8693, "mean_abs_error": 1151.0777804413851, "mean_abs_error_last_10": 687.2976433335832, "mean_abs_error_last_25": 852.2402630286451, "mean_abs_error_last_50": 990.7855736660691, "mean_pred_prob": 0.018623665549966972, "mean_pred_prob_last_10": 0.11577740205830196, "mean_pred_prob_last_25": 0.05595937122270698, "mean_pred_prob_last_50": 0.03251792871451471, "mean_token_accuracy": 0.8709072232246399, "step": 26930 }, { "epoch": 0.47890779158444885, "grad_norm": 1.0581961936270352, "learning_rate": 0.0001, "loss": 0.6608, "mean_abs_error": 234.0419707641077, "mean_abs_error_last_10": 148.01968829102753, "mean_abs_error_last_25": 182.04147872030603, "mean_abs_error_last_50": 192.3191192189904, "mean_pred_prob": 0.04878855610731989, "mean_pred_prob_last_10": 0.2509133025072515, "mean_pred_prob_last_25": 0.13680649949237705, "mean_pred_prob_last_50": 0.08156639002263547, "mean_token_accuracy": 0.8838493406772614, "step": 26940 }, { "epoch": 0.4790855598812508, "grad_norm": 1.5640148798543678, "learning_rate": 0.0001, "loss": 0.8513, "mean_abs_error": 729.1929131374543, "mean_abs_error_last_10": 288.7355155512426, "mean_abs_error_last_25": 385.16051425380084, "mean_abs_error_last_50": 508.4447629584041, "mean_pred_prob": 0.028876499168109148, "mean_pred_prob_last_10": 0.15348388045094907, "mean_pred_prob_last_25": 0.08271926412708126, "mean_pred_prob_last_50": 0.04913228466757573, "mean_token_accuracy": 0.8636266767978669, "step": 26950 }, { "epoch": 0.4792633281780527, "grad_norm": 1.3218733096499866, "learning_rate": 0.0001, "loss": 0.7503, "mean_abs_error": 288.84581188397823, "mean_abs_error_last_10": 100.41834702859235, "mean_abs_error_last_25": 117.36027042197802, "mean_abs_error_last_50": 182.70983592917105, "mean_pred_prob": 0.02884085555560887, "mean_pred_prob_last_10": 0.15246928930282594, "mean_pred_prob_last_25": 0.08066817987710237, "mean_pred_prob_last_50": 0.04892055997624993, "mean_token_accuracy": 0.8732468128204346, "step": 26960 }, { "epoch": 0.47944109647485467, "grad_norm": 0.7324307132178183, "learning_rate": 0.0001, "loss": 0.755, "mean_abs_error": 309.6959203044982, "mean_abs_error_last_10": 106.9600073834569, "mean_abs_error_last_25": 152.81081235752612, "mean_abs_error_last_50": 188.29390074992622, "mean_pred_prob": 0.039943357929587364, "mean_pred_prob_last_10": 0.18141431994736196, "mean_pred_prob_last_25": 0.10564194694161415, "mean_pred_prob_last_50": 0.06553867114707827, "mean_token_accuracy": 0.8689801752567291, "step": 26970 }, { "epoch": 0.4796188647716566, "grad_norm": 1.1650066024961383, "learning_rate": 0.0001, "loss": 0.6447, "mean_abs_error": 722.9145795489252, "mean_abs_error_last_10": 289.46877885217583, "mean_abs_error_last_25": 316.6783553230865, "mean_abs_error_last_50": 415.38943425876494, "mean_pred_prob": 0.03605874871718697, "mean_pred_prob_last_10": 0.1695991518208757, "mean_pred_prob_last_25": 0.09536848624702543, "mean_pred_prob_last_50": 0.060143119818530974, "mean_token_accuracy": 0.8882929682731628, "step": 26980 }, { "epoch": 0.47979663306845854, "grad_norm": 1.501232085459037, "learning_rate": 0.0001, "loss": 0.6713, "mean_abs_error": 650.9271583206557, "mean_abs_error_last_10": 198.8283397291479, "mean_abs_error_last_25": 228.80298746667486, "mean_abs_error_last_50": 334.2053839226611, "mean_pred_prob": 0.03770868626888842, "mean_pred_prob_last_10": 0.20188631289638578, "mean_pred_prob_last_25": 0.10615169956581667, "mean_pred_prob_last_50": 0.06393102826550603, "mean_token_accuracy": 0.8770217776298523, "step": 26990 }, { "epoch": 0.47997440136526054, "grad_norm": 1.6098123489661578, "learning_rate": 0.0001, "loss": 0.6329, "mean_abs_error": 295.3632641873337, "mean_abs_error_last_10": 93.96869335713681, "mean_abs_error_last_25": 202.04267951554837, "mean_abs_error_last_50": 265.7660175803661, "mean_pred_prob": 0.042099223006516695, "mean_pred_prob_last_10": 0.19532254990190268, "mean_pred_prob_last_25": 0.11383065516129136, "mean_pred_prob_last_50": 0.07060437919571996, "mean_token_accuracy": 0.8905728459358215, "step": 27000 }, { "epoch": 0.4801521696620625, "grad_norm": 1.800073168671239, "learning_rate": 0.0001, "loss": 0.7823, "mean_abs_error": 740.5388505384785, "mean_abs_error_last_10": 169.46361391562138, "mean_abs_error_last_25": 388.93176328102777, "mean_abs_error_last_50": 577.4177494522612, "mean_pred_prob": 0.02645739186555147, "mean_pred_prob_last_10": 0.14933860152959824, "mean_pred_prob_last_25": 0.07549070967361331, "mean_pred_prob_last_50": 0.04540476864203811, "mean_token_accuracy": 0.8609293222427368, "step": 27010 }, { "epoch": 0.4803299379588644, "grad_norm": 1.570518006196276, "learning_rate": 0.0001, "loss": 0.7284, "mean_abs_error": 385.54112231647855, "mean_abs_error_last_10": 181.1715086184196, "mean_abs_error_last_25": 180.53971107496756, "mean_abs_error_last_50": 199.9665994153885, "mean_pred_prob": 0.05296842181123793, "mean_pred_prob_last_10": 0.2331342525780201, "mean_pred_prob_last_25": 0.13171685142442585, "mean_pred_prob_last_50": 0.08459883504547179, "mean_token_accuracy": 0.8715181708335876, "step": 27020 }, { "epoch": 0.48050770625566636, "grad_norm": 1.5337317015119656, "learning_rate": 0.0001, "loss": 0.7895, "mean_abs_error": 977.2992949918996, "mean_abs_error_last_10": 594.6129175230433, "mean_abs_error_last_25": 691.6437114743295, "mean_abs_error_last_50": 812.8131158664968, "mean_pred_prob": 0.034643587860045955, "mean_pred_prob_last_10": 0.16775191923952662, "mean_pred_prob_last_25": 0.0952835823118221, "mean_pred_prob_last_50": 0.05830519120208919, "mean_token_accuracy": 0.8728525698184967, "step": 27030 }, { "epoch": 0.4806854745524683, "grad_norm": 1.2222170634270437, "learning_rate": 0.0001, "loss": 0.7585, "mean_abs_error": 679.7897985527275, "mean_abs_error_last_10": 121.10490123222428, "mean_abs_error_last_25": 205.39287981833778, "mean_abs_error_last_50": 352.8359713864457, "mean_pred_prob": 0.023448215151438488, "mean_pred_prob_last_10": 0.12460499524604529, "mean_pred_prob_last_25": 0.06725425974000246, "mean_pred_prob_last_50": 0.04025544126052409, "mean_token_accuracy": 0.8767422497272491, "step": 27040 }, { "epoch": 0.48086324284927023, "grad_norm": 1.5190855565633863, "learning_rate": 0.0001, "loss": 0.737, "mean_abs_error": 221.42114451206103, "mean_abs_error_last_10": 113.40110238322015, "mean_abs_error_last_25": 179.37780976798274, "mean_abs_error_last_50": 179.3167929893418, "mean_pred_prob": 0.045546545460820195, "mean_pred_prob_last_10": 0.23175334129482508, "mean_pred_prob_last_25": 0.12408962659537792, "mean_pred_prob_last_50": 0.07629211358726025, "mean_token_accuracy": 0.87626713514328, "step": 27050 }, { "epoch": 0.48104101114607223, "grad_norm": 1.2637328194921669, "learning_rate": 0.0001, "loss": 0.9023, "mean_abs_error": 732.749268792001, "mean_abs_error_last_10": 310.8647594496955, "mean_abs_error_last_25": 416.3919251068349, "mean_abs_error_last_50": 495.88501368415336, "mean_pred_prob": 0.03781023157062009, "mean_pred_prob_last_10": 0.2003992203564849, "mean_pred_prob_last_25": 0.10612425993313082, "mean_pred_prob_last_50": 0.06382802928274031, "mean_token_accuracy": 0.8689472436904907, "step": 27060 }, { "epoch": 0.48121877944287417, "grad_norm": 1.9314734217464968, "learning_rate": 0.0001, "loss": 0.7078, "mean_abs_error": 355.2442094872375, "mean_abs_error_last_10": 51.51082335476481, "mean_abs_error_last_25": 81.64616973960787, "mean_abs_error_last_50": 207.70525779220856, "mean_pred_prob": 0.04426896390505135, "mean_pred_prob_last_10": 0.21223966218531132, "mean_pred_prob_last_25": 0.12072909250855446, "mean_pred_prob_last_50": 0.07319470904767514, "mean_token_accuracy": 0.8716757237911225, "step": 27070 }, { "epoch": 0.4813965477396761, "grad_norm": 2.424597701463565, "learning_rate": 0.0001, "loss": 0.8502, "mean_abs_error": 470.2144093395752, "mean_abs_error_last_10": 273.21268374899415, "mean_abs_error_last_25": 258.5313455369963, "mean_abs_error_last_50": 296.6562515186882, "mean_pred_prob": 0.04509086072794162, "mean_pred_prob_last_10": 0.22968154156114906, "mean_pred_prob_last_25": 0.12919206061633304, "mean_pred_prob_last_50": 0.07751763416454197, "mean_token_accuracy": 0.8707748293876648, "step": 27080 }, { "epoch": 0.48157431603647805, "grad_norm": 2.086171509295581, "learning_rate": 0.0001, "loss": 0.7715, "mean_abs_error": 536.9630185244306, "mean_abs_error_last_10": 246.14388239548293, "mean_abs_error_last_25": 271.66135259212797, "mean_abs_error_last_50": 356.97664595456973, "mean_pred_prob": 0.044001736812060696, "mean_pred_prob_last_10": 0.18932568526361138, "mean_pred_prob_last_25": 0.11026008011540397, "mean_pred_prob_last_50": 0.07021381338126957, "mean_token_accuracy": 0.875655061006546, "step": 27090 }, { "epoch": 0.48175208433328, "grad_norm": 1.3763876525220353, "learning_rate": 0.0001, "loss": 0.7332, "mean_abs_error": 605.4323241473633, "mean_abs_error_last_10": 96.59058608264397, "mean_abs_error_last_25": 157.06638783325053, "mean_abs_error_last_50": 297.3507144417545, "mean_pred_prob": 0.04068736999761313, "mean_pred_prob_last_10": 0.19447793730068952, "mean_pred_prob_last_25": 0.11138004155363887, "mean_pred_prob_last_50": 0.06787690377095715, "mean_token_accuracy": 0.8749545753002167, "step": 27100 }, { "epoch": 0.481929852630082, "grad_norm": 1.5324367063317335, "learning_rate": 0.0001, "loss": 0.786, "mean_abs_error": 753.3876465520636, "mean_abs_error_last_10": 407.0080761939804, "mean_abs_error_last_25": 399.99596715966885, "mean_abs_error_last_50": 702.5103787228443, "mean_pred_prob": 0.030777097475947812, "mean_pred_prob_last_10": 0.1726676184218377, "mean_pred_prob_last_25": 0.08944309576181694, "mean_pred_prob_last_50": 0.052485431614331904, "mean_token_accuracy": 0.8744912266731262, "step": 27110 }, { "epoch": 0.4821076209268839, "grad_norm": 2.917140756719731, "learning_rate": 0.0001, "loss": 0.8065, "mean_abs_error": 515.8914638488367, "mean_abs_error_last_10": 199.83184527399197, "mean_abs_error_last_25": 222.02144617504382, "mean_abs_error_last_50": 287.85927151291173, "mean_pred_prob": 0.03287554401904345, "mean_pred_prob_last_10": 0.16475568395107984, "mean_pred_prob_last_25": 0.09246836369857192, "mean_pred_prob_last_50": 0.05610830876976251, "mean_token_accuracy": 0.8705001056194306, "step": 27120 }, { "epoch": 0.48228538922368586, "grad_norm": 1.665446155998693, "learning_rate": 0.0001, "loss": 0.7813, "mean_abs_error": 996.8932226290535, "mean_abs_error_last_10": 326.7094475820995, "mean_abs_error_last_25": 449.4753483896552, "mean_abs_error_last_50": 643.2082340729679, "mean_pred_prob": 0.03431818345852662, "mean_pred_prob_last_10": 0.18261209470801987, "mean_pred_prob_last_25": 0.09809783330420033, "mean_pred_prob_last_50": 0.0590304886281956, "mean_token_accuracy": 0.8714954078197479, "step": 27130 }, { "epoch": 0.4824631575204878, "grad_norm": 2.1698216840466995, "learning_rate": 0.0001, "loss": 0.8024, "mean_abs_error": 308.3844227017245, "mean_abs_error_last_10": 88.85022445768594, "mean_abs_error_last_25": 208.7780372174061, "mean_abs_error_last_50": 251.28646810096967, "mean_pred_prob": 0.061430505267344414, "mean_pred_prob_last_10": 0.2530229017138481, "mean_pred_prob_last_25": 0.15542854070663453, "mean_pred_prob_last_50": 0.09873024420812726, "mean_token_accuracy": 0.8690689504146576, "step": 27140 }, { "epoch": 0.48264092581728973, "grad_norm": 1.1615546739404226, "learning_rate": 0.0001, "loss": 0.6274, "mean_abs_error": 487.36215726041036, "mean_abs_error_last_10": 128.34804272580146, "mean_abs_error_last_25": 165.8469172955765, "mean_abs_error_last_50": 241.69776359047023, "mean_pred_prob": 0.01804740387015045, "mean_pred_prob_last_10": 0.10391744822263718, "mean_pred_prob_last_25": 0.05331695694476366, "mean_pred_prob_last_50": 0.031379205640405416, "mean_token_accuracy": 0.8762307286262512, "step": 27150 }, { "epoch": 0.4828186941140917, "grad_norm": 1.7818175675941756, "learning_rate": 0.0001, "loss": 0.8706, "mean_abs_error": 537.7340244704767, "mean_abs_error_last_10": 105.03461108692504, "mean_abs_error_last_25": 190.69543034633736, "mean_abs_error_last_50": 354.0134210689606, "mean_pred_prob": 0.02616423354484141, "mean_pred_prob_last_10": 0.14197541028261185, "mean_pred_prob_last_25": 0.0775697004981339, "mean_pred_prob_last_50": 0.04564309143461287, "mean_token_accuracy": 0.874872761964798, "step": 27160 }, { "epoch": 0.48299646241089367, "grad_norm": 1.0090901382731108, "learning_rate": 0.0001, "loss": 0.6783, "mean_abs_error": 205.06868448784604, "mean_abs_error_last_10": 89.97770981435825, "mean_abs_error_last_25": 103.27639085326237, "mean_abs_error_last_50": 115.88206993901932, "mean_pred_prob": 0.03079616669565439, "mean_pred_prob_last_10": 0.16122216433286668, "mean_pred_prob_last_25": 0.08779391143471002, "mean_pred_prob_last_50": 0.05227179042994976, "mean_token_accuracy": 0.8833321511745453, "step": 27170 }, { "epoch": 0.4831742307076956, "grad_norm": 3.5853935464521802, "learning_rate": 0.0001, "loss": 0.9248, "mean_abs_error": 1328.2643642890584, "mean_abs_error_last_10": 907.8984411136922, "mean_abs_error_last_25": 1001.4052874202176, "mean_abs_error_last_50": 1134.123380507733, "mean_pred_prob": 0.027471555255760904, "mean_pred_prob_last_10": 0.13500804367940872, "mean_pred_prob_last_25": 0.07360094203759218, "mean_pred_prob_last_50": 0.04535584625846241, "mean_token_accuracy": 0.8643081903457641, "step": 27180 }, { "epoch": 0.48335199900449755, "grad_norm": 2.64696838664778, "learning_rate": 0.0001, "loss": 0.7521, "mean_abs_error": 263.1572447759339, "mean_abs_error_last_10": 63.0176019983006, "mean_abs_error_last_25": 130.59129476278503, "mean_abs_error_last_50": 166.58571521219537, "mean_pred_prob": 0.04666894399560988, "mean_pred_prob_last_10": 0.2215973362326622, "mean_pred_prob_last_25": 0.12324383463710546, "mean_pred_prob_last_50": 0.07661407692357898, "mean_token_accuracy": 0.8796586573123932, "step": 27190 }, { "epoch": 0.4835297673012995, "grad_norm": 1.6946472814744364, "learning_rate": 0.0001, "loss": 0.7699, "mean_abs_error": 1181.5091064903859, "mean_abs_error_last_10": 552.1563892897872, "mean_abs_error_last_25": 641.3246446437287, "mean_abs_error_last_50": 784.8918601904797, "mean_pred_prob": 0.05048993275268003, "mean_pred_prob_last_10": 0.24362742666271514, "mean_pred_prob_last_25": 0.1359870936779771, "mean_pred_prob_last_50": 0.084075029779342, "mean_token_accuracy": 0.8675263285636902, "step": 27200 }, { "epoch": 0.4837075355981014, "grad_norm": 1.0924089493843254, "learning_rate": 0.0001, "loss": 0.8082, "mean_abs_error": 592.1325482928099, "mean_abs_error_last_10": 189.41055935911177, "mean_abs_error_last_25": 243.3308428099608, "mean_abs_error_last_50": 320.5816424598237, "mean_pred_prob": 0.027089910372160374, "mean_pred_prob_last_10": 0.12347594015300274, "mean_pred_prob_last_25": 0.06973479557782411, "mean_pred_prob_last_50": 0.044270028453320266, "mean_token_accuracy": 0.8813570439815521, "step": 27210 }, { "epoch": 0.48388530389490336, "grad_norm": 2.0061413527439633, "learning_rate": 0.0001, "loss": 0.7229, "mean_abs_error": 1098.7670502102235, "mean_abs_error_last_10": 590.3540858395417, "mean_abs_error_last_25": 617.2472118877309, "mean_abs_error_last_50": 702.4600001305096, "mean_pred_prob": 0.026904196315445005, "mean_pred_prob_last_10": 0.13678587450413032, "mean_pred_prob_last_25": 0.07698038849630393, "mean_pred_prob_last_50": 0.046054070070385936, "mean_token_accuracy": 0.8633784532546998, "step": 27220 }, { "epoch": 0.48406307219170536, "grad_norm": 1.3642465398351118, "learning_rate": 0.0001, "loss": 0.6685, "mean_abs_error": 656.7019679303428, "mean_abs_error_last_10": 183.5531310607083, "mean_abs_error_last_25": 202.59802426396456, "mean_abs_error_last_50": 340.5302400878014, "mean_pred_prob": 0.03387801258359104, "mean_pred_prob_last_10": 0.1605301447212696, "mean_pred_prob_last_25": 0.0919258731417358, "mean_pred_prob_last_50": 0.05669195638038218, "mean_token_accuracy": 0.8710405826568604, "step": 27230 }, { "epoch": 0.4842408404885073, "grad_norm": 1.1980911442638822, "learning_rate": 0.0001, "loss": 0.7922, "mean_abs_error": 1539.89538133335, "mean_abs_error_last_10": 671.6081700429522, "mean_abs_error_last_25": 815.1768157066366, "mean_abs_error_last_50": 1109.3661174457848, "mean_pred_prob": 0.05045895378352725, "mean_pred_prob_last_10": 0.22500735601934138, "mean_pred_prob_last_25": 0.13447930919210194, "mean_pred_prob_last_50": 0.08343883676279802, "mean_token_accuracy": 0.8645274877548218, "step": 27240 }, { "epoch": 0.48441860878530923, "grad_norm": 1.5110061976893259, "learning_rate": 0.0001, "loss": 0.6964, "mean_abs_error": 461.58875458027944, "mean_abs_error_last_10": 254.94097720450273, "mean_abs_error_last_25": 334.2417147599984, "mean_abs_error_last_50": 410.9630476235517, "mean_pred_prob": 0.03463516470510512, "mean_pred_prob_last_10": 0.17610254026949407, "mean_pred_prob_last_25": 0.09619863345287741, "mean_pred_prob_last_50": 0.05782516109757126, "mean_token_accuracy": 0.8762535095214844, "step": 27250 }, { "epoch": 0.4845963770821112, "grad_norm": 1.311743773749261, "learning_rate": 0.0001, "loss": 0.7004, "mean_abs_error": 287.66482899433737, "mean_abs_error_last_10": 61.288080539107014, "mean_abs_error_last_25": 103.98793692388271, "mean_abs_error_last_50": 171.09670979670892, "mean_pred_prob": 0.040249138278886674, "mean_pred_prob_last_10": 0.20710078701376916, "mean_pred_prob_last_25": 0.11023283898830413, "mean_pred_prob_last_50": 0.06746454695239663, "mean_token_accuracy": 0.8729682445526123, "step": 27260 }, { "epoch": 0.4847741453789131, "grad_norm": 3.2563586788910546, "learning_rate": 0.0001, "loss": 0.8639, "mean_abs_error": 652.070824894802, "mean_abs_error_last_10": 398.19441624561216, "mean_abs_error_last_25": 436.22612953490295, "mean_abs_error_last_50": 477.3218749542858, "mean_pred_prob": 0.03293170613178518, "mean_pred_prob_last_10": 0.1720760362688452, "mean_pred_prob_last_25": 0.09616880081302952, "mean_pred_prob_last_50": 0.057238194084493446, "mean_token_accuracy": 0.8750689685344696, "step": 27270 }, { "epoch": 0.48495191367571505, "grad_norm": 1.2891800090844914, "learning_rate": 0.0001, "loss": 0.944, "mean_abs_error": 240.5108491402052, "mean_abs_error_last_10": 65.5427943272995, "mean_abs_error_last_25": 77.77536508938539, "mean_abs_error_last_50": 143.2568314267451, "mean_pred_prob": 0.05159393954090774, "mean_pred_prob_last_10": 0.24843791425228118, "mean_pred_prob_last_25": 0.14929529512301087, "mean_pred_prob_last_50": 0.08931348798796535, "mean_token_accuracy": 0.8748648226261139, "step": 27280 }, { "epoch": 0.48512968197251705, "grad_norm": 1.8255378303245615, "learning_rate": 0.0001, "loss": 0.7871, "mean_abs_error": 837.7027108413301, "mean_abs_error_last_10": 210.25583323904775, "mean_abs_error_last_25": 299.65570281019905, "mean_abs_error_last_50": 468.0007855105654, "mean_pred_prob": 0.03786782749230042, "mean_pred_prob_last_10": 0.17594910408370196, "mean_pred_prob_last_25": 0.10451403700280934, "mean_pred_prob_last_50": 0.06305584537331015, "mean_token_accuracy": 0.8730930387973785, "step": 27290 }, { "epoch": 0.485307450269319, "grad_norm": 1.1050725075917793, "learning_rate": 0.0001, "loss": 0.6957, "mean_abs_error": 169.8238594590249, "mean_abs_error_last_10": 56.70920830350129, "mean_abs_error_last_25": 77.83424982242317, "mean_abs_error_last_50": 119.39581936058948, "mean_pred_prob": 0.05634823115542531, "mean_pred_prob_last_10": 0.2676183141767979, "mean_pred_prob_last_25": 0.15790447760373355, "mean_pred_prob_last_50": 0.09625756433233619, "mean_token_accuracy": 0.8689898014068603, "step": 27300 }, { "epoch": 0.4854852185661209, "grad_norm": 1.3783860459187713, "learning_rate": 0.0001, "loss": 0.8585, "mean_abs_error": 482.68523935924577, "mean_abs_error_last_10": 112.61037422585284, "mean_abs_error_last_25": 153.41971163813534, "mean_abs_error_last_50": 242.3525100215505, "mean_pred_prob": 0.04361446276307106, "mean_pred_prob_last_10": 0.213916107325349, "mean_pred_prob_last_25": 0.11916073185857386, "mean_pred_prob_last_50": 0.07297521377913654, "mean_token_accuracy": 0.8722039103507996, "step": 27310 }, { "epoch": 0.48566298686292286, "grad_norm": 1.65571471149986, "learning_rate": 0.0001, "loss": 0.6546, "mean_abs_error": 234.854135139445, "mean_abs_error_last_10": 85.08916804710519, "mean_abs_error_last_25": 114.72028931532425, "mean_abs_error_last_50": 167.50560380676794, "mean_pred_prob": 0.042490617278963326, "mean_pred_prob_last_10": 0.20794630236923695, "mean_pred_prob_last_25": 0.11677987230941653, "mean_pred_prob_last_50": 0.07073396029882133, "mean_token_accuracy": 0.8769596934318542, "step": 27320 }, { "epoch": 0.4858407551597248, "grad_norm": 2.9407018765279265, "learning_rate": 0.0001, "loss": 0.7267, "mean_abs_error": 212.65972711131812, "mean_abs_error_last_10": 142.73678254464133, "mean_abs_error_last_25": 144.26882504255633, "mean_abs_error_last_50": 158.5942928875654, "mean_pred_prob": 0.03679129513911903, "mean_pred_prob_last_10": 0.1746873389929533, "mean_pred_prob_last_25": 0.10037923771888017, "mean_pred_prob_last_50": 0.061950696259737016, "mean_token_accuracy": 0.8760949850082398, "step": 27330 }, { "epoch": 0.48601852345652674, "grad_norm": 2.4933173239373807, "learning_rate": 0.0001, "loss": 0.7761, "mean_abs_error": 218.37459202232054, "mean_abs_error_last_10": 99.01050839716798, "mean_abs_error_last_25": 118.72132953895309, "mean_abs_error_last_50": 131.45795852200837, "mean_pred_prob": 0.04203988774679601, "mean_pred_prob_last_10": 0.20849675573408605, "mean_pred_prob_last_25": 0.11823881678283214, "mean_pred_prob_last_50": 0.07181634120643139, "mean_token_accuracy": 0.8596165180206299, "step": 27340 }, { "epoch": 0.48619629175332874, "grad_norm": 1.9801049374244732, "learning_rate": 0.0001, "loss": 0.7514, "mean_abs_error": 662.2730787612576, "mean_abs_error_last_10": 162.71073460596932, "mean_abs_error_last_25": 429.3360927481173, "mean_abs_error_last_50": 478.1894198900521, "mean_pred_prob": 0.02811978159006685, "mean_pred_prob_last_10": 0.1460442764684558, "mean_pred_prob_last_25": 0.0774027232080698, "mean_pred_prob_last_50": 0.04721743841655553, "mean_token_accuracy": 0.877424156665802, "step": 27350 }, { "epoch": 0.4863740600501307, "grad_norm": 1.6221758073885624, "learning_rate": 0.0001, "loss": 0.7333, "mean_abs_error": 305.1929654693673, "mean_abs_error_last_10": 111.5462209828271, "mean_abs_error_last_25": 177.14628992362992, "mean_abs_error_last_50": 201.25272282167015, "mean_pred_prob": 0.05296428555157036, "mean_pred_prob_last_10": 0.21811134221497924, "mean_pred_prob_last_25": 0.13200499840313568, "mean_pred_prob_last_50": 0.0861426048912108, "mean_token_accuracy": 0.8734404802322387, "step": 27360 }, { "epoch": 0.4865518283469326, "grad_norm": 1.8758278213893804, "learning_rate": 0.0001, "loss": 0.8339, "mean_abs_error": 380.5919881588537, "mean_abs_error_last_10": 120.78268106720527, "mean_abs_error_last_25": 158.71168410113063, "mean_abs_error_last_50": 219.6300783798889, "mean_pred_prob": 0.0475191039906349, "mean_pred_prob_last_10": 0.2323136368766427, "mean_pred_prob_last_25": 0.13234069745521992, "mean_pred_prob_last_50": 0.08032772768056021, "mean_token_accuracy": 0.8710181772708893, "step": 27370 }, { "epoch": 0.48672959664373455, "grad_norm": 1.8773375245906838, "learning_rate": 0.0001, "loss": 0.7572, "mean_abs_error": 1031.41755294589, "mean_abs_error_last_10": 359.8528706655854, "mean_abs_error_last_25": 442.1812045282803, "mean_abs_error_last_50": 555.8325927496874, "mean_pred_prob": 0.03149627960519865, "mean_pred_prob_last_10": 0.15777190822991544, "mean_pred_prob_last_25": 0.08635251794476062, "mean_pred_prob_last_50": 0.05344039362971671, "mean_token_accuracy": 0.868835723400116, "step": 27380 }, { "epoch": 0.4869073649405365, "grad_norm": 1.8693594103215394, "learning_rate": 0.0001, "loss": 0.8141, "mean_abs_error": 513.833167712091, "mean_abs_error_last_10": 221.14885443770223, "mean_abs_error_last_25": 308.7487780964467, "mean_abs_error_last_50": 374.7726521624822, "mean_pred_prob": 0.04160973426187411, "mean_pred_prob_last_10": 0.18550216768635436, "mean_pred_prob_last_25": 0.11080654672696255, "mean_pred_prob_last_50": 0.06888974875328131, "mean_token_accuracy": 0.8731107234954834, "step": 27390 }, { "epoch": 0.48708513323733843, "grad_norm": 1.9867675155605269, "learning_rate": 0.0001, "loss": 0.8451, "mean_abs_error": 529.0797687710508, "mean_abs_error_last_10": 333.11934274798733, "mean_abs_error_last_25": 296.92235682542696, "mean_abs_error_last_50": 349.8773961753253, "mean_pred_prob": 0.02398724724771455, "mean_pred_prob_last_10": 0.11076337308622897, "mean_pred_prob_last_25": 0.06310791978612543, "mean_pred_prob_last_50": 0.03978370788972825, "mean_token_accuracy": 0.8613498985767365, "step": 27400 }, { "epoch": 0.4872629015341404, "grad_norm": 1.664702832698146, "learning_rate": 0.0001, "loss": 0.7461, "mean_abs_error": 464.2938794269029, "mean_abs_error_last_10": 237.5543887540877, "mean_abs_error_last_25": 283.7350311207814, "mean_abs_error_last_50": 301.6575079380191, "mean_pred_prob": 0.03706576731055975, "mean_pred_prob_last_10": 0.17529492620378734, "mean_pred_prob_last_25": 0.0925202064216137, "mean_pred_prob_last_50": 0.058621923346072434, "mean_token_accuracy": 0.8666884362697601, "step": 27410 }, { "epoch": 0.48744066983094236, "grad_norm": 1.2397058982008637, "learning_rate": 0.0001, "loss": 0.8188, "mean_abs_error": 349.5734682322225, "mean_abs_error_last_10": 122.63142152750228, "mean_abs_error_last_25": 164.05450654534224, "mean_abs_error_last_50": 254.1608279144137, "mean_pred_prob": 0.026893889578059316, "mean_pred_prob_last_10": 0.1395590053871274, "mean_pred_prob_last_25": 0.07576804542914033, "mean_pred_prob_last_50": 0.04576076553203166, "mean_token_accuracy": 0.8771149396896363, "step": 27420 }, { "epoch": 0.4876184381277443, "grad_norm": 1.1986836068705755, "learning_rate": 0.0001, "loss": 0.845, "mean_abs_error": 408.8758329730587, "mean_abs_error_last_10": 127.76505646733594, "mean_abs_error_last_25": 191.74780769081053, "mean_abs_error_last_50": 266.20293828673925, "mean_pred_prob": 0.04373017373145558, "mean_pred_prob_last_10": 0.21641314869048073, "mean_pred_prob_last_25": 0.117165023740381, "mean_pred_prob_last_50": 0.07246790998615324, "mean_token_accuracy": 0.8639617800712586, "step": 27430 }, { "epoch": 0.48779620642454624, "grad_norm": 1.1991325885600206, "learning_rate": 0.0001, "loss": 0.9421, "mean_abs_error": 253.81113888058084, "mean_abs_error_last_10": 78.70860157838419, "mean_abs_error_last_25": 99.57231373107086, "mean_abs_error_last_50": 153.50841372566865, "mean_pred_prob": 0.03954985234886408, "mean_pred_prob_last_10": 0.19461134895682336, "mean_pred_prob_last_25": 0.10882830005139113, "mean_pred_prob_last_50": 0.06654302915558219, "mean_token_accuracy": 0.8693852901458741, "step": 27440 }, { "epoch": 0.4879739747213482, "grad_norm": 0.8926891814746013, "learning_rate": 0.0001, "loss": 0.7129, "mean_abs_error": 979.6960620177808, "mean_abs_error_last_10": 525.5145131648446, "mean_abs_error_last_25": 634.079912595141, "mean_abs_error_last_50": 720.0204553784195, "mean_pred_prob": 0.026717559769167565, "mean_pred_prob_last_10": 0.14299790523946285, "mean_pred_prob_last_25": 0.07924454151361715, "mean_pred_prob_last_50": 0.046592171626980414, "mean_token_accuracy": 0.8760653138160706, "step": 27450 }, { "epoch": 0.4881517430181501, "grad_norm": 1.9985733923900721, "learning_rate": 0.0001, "loss": 0.7661, "mean_abs_error": 533.5185797783231, "mean_abs_error_last_10": 227.42171906339587, "mean_abs_error_last_25": 235.9932609892109, "mean_abs_error_last_50": 289.2413345953502, "mean_pred_prob": 0.020835378067567946, "mean_pred_prob_last_10": 0.11782247030641883, "mean_pred_prob_last_25": 0.06004075054079294, "mean_pred_prob_last_50": 0.03536024171626195, "mean_token_accuracy": 0.8819698393344879, "step": 27460 }, { "epoch": 0.4883295113149521, "grad_norm": 2.4529687476753925, "learning_rate": 0.0001, "loss": 0.793, "mean_abs_error": 570.6496839976037, "mean_abs_error_last_10": 129.8605304727892, "mean_abs_error_last_25": 334.538689564153, "mean_abs_error_last_50": 470.47027023579085, "mean_pred_prob": 0.02315923317801207, "mean_pred_prob_last_10": 0.1286363245919347, "mean_pred_prob_last_25": 0.0649931906722486, "mean_pred_prob_last_50": 0.03877099044620991, "mean_token_accuracy": 0.8640794396400452, "step": 27470 }, { "epoch": 0.48850727961175405, "grad_norm": 1.9671738528815235, "learning_rate": 0.0001, "loss": 0.7595, "mean_abs_error": 96.94386631690746, "mean_abs_error_last_10": 40.686807872593064, "mean_abs_error_last_25": 54.675315108770505, "mean_abs_error_last_50": 67.26405605046519, "mean_pred_prob": 0.05835921373218298, "mean_pred_prob_last_10": 0.285646603256464, "mean_pred_prob_last_25": 0.16344806384295224, "mean_pred_prob_last_50": 0.09846327025443316, "mean_token_accuracy": 0.8647372305393219, "step": 27480 }, { "epoch": 0.488685047908556, "grad_norm": 1.2529215185046896, "learning_rate": 0.0001, "loss": 0.715, "mean_abs_error": 101.78653659408761, "mean_abs_error_last_10": 33.87430742312499, "mean_abs_error_last_25": 53.18620179540649, "mean_abs_error_last_50": 70.60295289378732, "mean_pred_prob": 0.06093705128878355, "mean_pred_prob_last_10": 0.2809339564293623, "mean_pred_prob_last_25": 0.15318014118820428, "mean_pred_prob_last_50": 0.09756549429148435, "mean_token_accuracy": 0.8828345000743866, "step": 27490 }, { "epoch": 0.48886281620535793, "grad_norm": 1.3618740844814723, "learning_rate": 0.0001, "loss": 0.7551, "mean_abs_error": 1174.146116307808, "mean_abs_error_last_10": 581.4810974091668, "mean_abs_error_last_25": 682.2624040062107, "mean_abs_error_last_50": 860.3521679524498, "mean_pred_prob": 0.06454686894430779, "mean_pred_prob_last_10": 0.273413525451906, "mean_pred_prob_last_25": 0.1642004433000693, "mean_pred_prob_last_50": 0.10379785133700352, "mean_token_accuracy": 0.8663171410560608, "step": 27500 }, { "epoch": 0.48904058450215987, "grad_norm": 1.667689592534617, "learning_rate": 0.0001, "loss": 0.7619, "mean_abs_error": 223.04466338580772, "mean_abs_error_last_10": 35.900574326514416, "mean_abs_error_last_25": 91.17459746283049, "mean_abs_error_last_50": 175.84153236785158, "mean_pred_prob": 0.048542801290750504, "mean_pred_prob_last_10": 0.22777837850153446, "mean_pred_prob_last_25": 0.1288132479414344, "mean_pred_prob_last_50": 0.08026258088648319, "mean_token_accuracy": 0.8687343418598175, "step": 27510 }, { "epoch": 0.4892183527989618, "grad_norm": 1.319847947138216, "learning_rate": 0.0001, "loss": 0.7906, "mean_abs_error": 1341.9409249805879, "mean_abs_error_last_10": 608.1346101712386, "mean_abs_error_last_25": 651.6846999103597, "mean_abs_error_last_50": 856.7224630488756, "mean_pred_prob": 0.01331654804525897, "mean_pred_prob_last_10": 0.07926083938800729, "mean_pred_prob_last_25": 0.040720238591893575, "mean_pred_prob_last_50": 0.02326297796680592, "mean_token_accuracy": 0.8677259743213653, "step": 27520 }, { "epoch": 0.4893961210957638, "grad_norm": 1.1182159222544434, "learning_rate": 0.0001, "loss": 0.815, "mean_abs_error": 1074.6193540697398, "mean_abs_error_last_10": 602.2108487571726, "mean_abs_error_last_25": 629.360830847684, "mean_abs_error_last_50": 796.7472864303995, "mean_pred_prob": 0.03877332243573619, "mean_pred_prob_last_10": 0.20358547692885623, "mean_pred_prob_last_25": 0.11088969416450709, "mean_pred_prob_last_50": 0.06649856611329596, "mean_token_accuracy": 0.8734529912471771, "step": 27530 }, { "epoch": 0.48957388939256574, "grad_norm": 1.423177763893846, "learning_rate": 0.0001, "loss": 0.8412, "mean_abs_error": 216.11069099943916, "mean_abs_error_last_10": 86.2555333144785, "mean_abs_error_last_25": 117.7902948944948, "mean_abs_error_last_50": 155.4792324891565, "mean_pred_prob": 0.04138994216918945, "mean_pred_prob_last_10": 0.21071486175060272, "mean_pred_prob_last_25": 0.11569615285843611, "mean_pred_prob_last_50": 0.06969826109707355, "mean_token_accuracy": 0.8725154161453247, "step": 27540 }, { "epoch": 0.4897516576893677, "grad_norm": 1.6760609476411583, "learning_rate": 0.0001, "loss": 0.8473, "mean_abs_error": 155.60010207076078, "mean_abs_error_last_10": 67.84942562444375, "mean_abs_error_last_25": 80.99828307136669, "mean_abs_error_last_50": 123.45477687179626, "mean_pred_prob": 0.04234459409490228, "mean_pred_prob_last_10": 0.21389747075736523, "mean_pred_prob_last_25": 0.11630111280828714, "mean_pred_prob_last_50": 0.07063923161476851, "mean_token_accuracy": 0.8738908767700195, "step": 27550 }, { "epoch": 0.4899294259861696, "grad_norm": 1.1542042960061025, "learning_rate": 0.0001, "loss": 0.806, "mean_abs_error": 860.3222027247105, "mean_abs_error_last_10": 372.85082532513695, "mean_abs_error_last_25": 465.8203513282515, "mean_abs_error_last_50": 586.3534908860527, "mean_pred_prob": 0.04104122432181612, "mean_pred_prob_last_10": 0.19357176245248411, "mean_pred_prob_last_25": 0.10802789338340517, "mean_pred_prob_last_50": 0.06847392480121925, "mean_token_accuracy": 0.8727589964866638, "step": 27560 }, { "epoch": 0.49010719428297156, "grad_norm": 2.454026295715888, "learning_rate": 0.0001, "loss": 0.8791, "mean_abs_error": 276.46484912458646, "mean_abs_error_last_10": 161.48933174156446, "mean_abs_error_last_25": 153.3741652295985, "mean_abs_error_last_50": 160.6270354224269, "mean_pred_prob": 0.04685862341430038, "mean_pred_prob_last_10": 0.22872104719281197, "mean_pred_prob_last_25": 0.12888444494456053, "mean_pred_prob_last_50": 0.07868777508847416, "mean_token_accuracy": 0.8678605735301972, "step": 27570 }, { "epoch": 0.4902849625797735, "grad_norm": 1.3724638452693272, "learning_rate": 0.0001, "loss": 0.7668, "mean_abs_error": 180.0517080127267, "mean_abs_error_last_10": 53.90961790658686, "mean_abs_error_last_25": 86.89279373544036, "mean_abs_error_last_50": 121.94723428711862, "mean_pred_prob": 0.03189388094469905, "mean_pred_prob_last_10": 0.1633495081216097, "mean_pred_prob_last_25": 0.08704805988818407, "mean_pred_prob_last_50": 0.05374131444841623, "mean_token_accuracy": 0.871826809644699, "step": 27580 }, { "epoch": 0.4904627308765755, "grad_norm": 1.5157305155075338, "learning_rate": 0.0001, "loss": 0.771, "mean_abs_error": 418.01455058731733, "mean_abs_error_last_10": 145.977932854666, "mean_abs_error_last_25": 186.10710254706524, "mean_abs_error_last_50": 288.40839509912155, "mean_pred_prob": 0.021744298445992172, "mean_pred_prob_last_10": 0.11473431512713432, "mean_pred_prob_last_25": 0.06108526857569814, "mean_pred_prob_last_50": 0.03683268604800105, "mean_token_accuracy": 0.8701841175556183, "step": 27590 }, { "epoch": 0.49064049917337743, "grad_norm": 2.0979819018173305, "learning_rate": 0.0001, "loss": 0.8036, "mean_abs_error": 163.487398629599, "mean_abs_error_last_10": 45.98075021482201, "mean_abs_error_last_25": 73.93126845631562, "mean_abs_error_last_50": 105.92555482701786, "mean_pred_prob": 0.05086289923638106, "mean_pred_prob_last_10": 0.2472103714942932, "mean_pred_prob_last_25": 0.1408525349572301, "mean_pred_prob_last_50": 0.08625019993633032, "mean_token_accuracy": 0.8680519938468934, "step": 27600 }, { "epoch": 0.49081826747017937, "grad_norm": 1.037388057701121, "learning_rate": 0.0001, "loss": 0.7731, "mean_abs_error": 962.8297625770283, "mean_abs_error_last_10": 511.259999845742, "mean_abs_error_last_25": 600.7284731525881, "mean_abs_error_last_50": 712.3084981059841, "mean_pred_prob": 0.028775146682164632, "mean_pred_prob_last_10": 0.14681527003704103, "mean_pred_prob_last_25": 0.08105157935351599, "mean_pred_prob_last_50": 0.048898215347435325, "mean_token_accuracy": 0.8737205147743226, "step": 27610 }, { "epoch": 0.4909960357669813, "grad_norm": 1.1308013134414034, "learning_rate": 0.0001, "loss": 0.7683, "mean_abs_error": 286.27859107437996, "mean_abs_error_last_10": 80.42365464820192, "mean_abs_error_last_25": 90.03911479761375, "mean_abs_error_last_50": 146.87898382049286, "mean_pred_prob": 0.027782967081293464, "mean_pred_prob_last_10": 0.15357586666941642, "mean_pred_prob_last_25": 0.0787921192124486, "mean_pred_prob_last_50": 0.04755912525579333, "mean_token_accuracy": 0.8705261826515198, "step": 27620 }, { "epoch": 0.49117380406378325, "grad_norm": 2.465365449475145, "learning_rate": 0.0001, "loss": 0.699, "mean_abs_error": 123.02468950516361, "mean_abs_error_last_10": 31.513118818972607, "mean_abs_error_last_25": 79.13999159208242, "mean_abs_error_last_50": 86.97246095009369, "mean_pred_prob": 0.04778702277690172, "mean_pred_prob_last_10": 0.21391279771924018, "mean_pred_prob_last_25": 0.1250953983515501, "mean_pred_prob_last_50": 0.07903308942914009, "mean_token_accuracy": 0.8743866503238678, "step": 27630 }, { "epoch": 0.4913515723605852, "grad_norm": 2.2850101662446396, "learning_rate": 0.0001, "loss": 0.7637, "mean_abs_error": 1472.5586116206714, "mean_abs_error_last_10": 786.9843306413857, "mean_abs_error_last_25": 928.8711713636816, "mean_abs_error_last_50": 1156.207005050935, "mean_pred_prob": 0.032065964203502516, "mean_pred_prob_last_10": 0.15783398755593225, "mean_pred_prob_last_25": 0.08801213817787357, "mean_pred_prob_last_50": 0.05311548092286102, "mean_token_accuracy": 0.8742304265499115, "step": 27640 }, { "epoch": 0.4915293406573872, "grad_norm": 0.9102487474453413, "learning_rate": 0.0001, "loss": 0.7482, "mean_abs_error": 368.31541138159156, "mean_abs_error_last_10": 82.30466400836447, "mean_abs_error_last_25": 133.72672700311097, "mean_abs_error_last_50": 175.31231767616427, "mean_pred_prob": 0.026265671057626606, "mean_pred_prob_last_10": 0.1304654572159052, "mean_pred_prob_last_25": 0.0713858488947153, "mean_pred_prob_last_50": 0.04476153133437037, "mean_token_accuracy": 0.8740793824195862, "step": 27650 }, { "epoch": 0.4917071089541891, "grad_norm": 1.443763971363003, "learning_rate": 0.0001, "loss": 0.7863, "mean_abs_error": 849.8182868336611, "mean_abs_error_last_10": 192.26004307862337, "mean_abs_error_last_25": 296.3520344913104, "mean_abs_error_last_50": 485.1544642043403, "mean_pred_prob": 0.04253992505837232, "mean_pred_prob_last_10": 0.19110828986158596, "mean_pred_prob_last_25": 0.11114010679884814, "mean_pred_prob_last_50": 0.07081967484555207, "mean_token_accuracy": 0.8802192032337188, "step": 27660 }, { "epoch": 0.49188487725099106, "grad_norm": 2.4308215357574423, "learning_rate": 0.0001, "loss": 0.7944, "mean_abs_error": 692.6694343314327, "mean_abs_error_last_10": 316.0920056694312, "mean_abs_error_last_25": 394.2817353906941, "mean_abs_error_last_50": 521.8996605684081, "mean_pred_prob": 0.041046265955083074, "mean_pred_prob_last_10": 0.20288570751436055, "mean_pred_prob_last_25": 0.11584480023011565, "mean_pred_prob_last_50": 0.07009772929595784, "mean_token_accuracy": 0.8690858900547027, "step": 27670 }, { "epoch": 0.492062645547793, "grad_norm": 1.2517782586904045, "learning_rate": 0.0001, "loss": 0.647, "mean_abs_error": 170.3076339995314, "mean_abs_error_last_10": 67.90153991297706, "mean_abs_error_last_25": 68.77683811825707, "mean_abs_error_last_50": 90.97581916324069, "mean_pred_prob": 0.05033931653015315, "mean_pred_prob_last_10": 0.24561448730528354, "mean_pred_prob_last_25": 0.13712008409202098, "mean_pred_prob_last_50": 0.08396130045875907, "mean_token_accuracy": 0.8789664804935455, "step": 27680 }, { "epoch": 0.49224041384459494, "grad_norm": 1.2501868919233827, "learning_rate": 0.0001, "loss": 0.7763, "mean_abs_error": 686.9659950738612, "mean_abs_error_last_10": 362.12283392526183, "mean_abs_error_last_25": 373.946345649568, "mean_abs_error_last_50": 505.7878340383387, "mean_pred_prob": 0.04521587751514744, "mean_pred_prob_last_10": 0.21755316473427228, "mean_pred_prob_last_25": 0.12261108384409454, "mean_pred_prob_last_50": 0.07609106160816737, "mean_token_accuracy": 0.8631934583187103, "step": 27690 }, { "epoch": 0.4924181821413969, "grad_norm": 1.7441103941343572, "learning_rate": 0.0001, "loss": 0.8159, "mean_abs_error": 606.6035209448818, "mean_abs_error_last_10": 142.98196963703361, "mean_abs_error_last_25": 189.90191874540528, "mean_abs_error_last_50": 282.66501320242617, "mean_pred_prob": 0.02412196627119556, "mean_pred_prob_last_10": 0.1098746666451916, "mean_pred_prob_last_25": 0.06090972241945565, "mean_pred_prob_last_50": 0.039102000650018454, "mean_token_accuracy": 0.865010005235672, "step": 27700 }, { "epoch": 0.49259595043819887, "grad_norm": 1.6534862858623627, "learning_rate": 0.0001, "loss": 0.7178, "mean_abs_error": 362.03069855269143, "mean_abs_error_last_10": 124.56514681841392, "mean_abs_error_last_25": 162.45028077851663, "mean_abs_error_last_50": 200.33366242649024, "mean_pred_prob": 0.03687965155113489, "mean_pred_prob_last_10": 0.19661035817116498, "mean_pred_prob_last_25": 0.10926356469281018, "mean_pred_prob_last_50": 0.0647992511279881, "mean_token_accuracy": 0.8733442723751068, "step": 27710 }, { "epoch": 0.4927737187350008, "grad_norm": 1.0783715282308342, "learning_rate": 0.0001, "loss": 0.7851, "mean_abs_error": 454.58203215214206, "mean_abs_error_last_10": 196.22628611506383, "mean_abs_error_last_25": 179.3909836317303, "mean_abs_error_last_50": 272.13425566139415, "mean_pred_prob": 0.036203797440975904, "mean_pred_prob_last_10": 0.17850044798105955, "mean_pred_prob_last_25": 0.10010827220976352, "mean_pred_prob_last_50": 0.06116462294012308, "mean_token_accuracy": 0.8720869898796082, "step": 27720 }, { "epoch": 0.49295148703180275, "grad_norm": 1.6435124191232133, "learning_rate": 0.0001, "loss": 0.7779, "mean_abs_error": 1092.8078218612832, "mean_abs_error_last_10": 399.23385713708274, "mean_abs_error_last_25": 517.1879744878732, "mean_abs_error_last_50": 695.2391380853587, "mean_pred_prob": 0.03579910565604223, "mean_pred_prob_last_10": 0.18434804253047332, "mean_pred_prob_last_25": 0.09786200434318744, "mean_pred_prob_last_50": 0.05917219989350997, "mean_token_accuracy": 0.8771387159824371, "step": 27730 }, { "epoch": 0.4931292553286047, "grad_norm": 1.6279782271733767, "learning_rate": 0.0001, "loss": 0.8271, "mean_abs_error": 1324.06563080253, "mean_abs_error_last_10": 803.0857234382446, "mean_abs_error_last_25": 906.753726040393, "mean_abs_error_last_50": 1070.77372068248, "mean_pred_prob": 0.03138340027071536, "mean_pred_prob_last_10": 0.15443561222491553, "mean_pred_prob_last_25": 0.08759548388043185, "mean_pred_prob_last_50": 0.053599465114530176, "mean_token_accuracy": 0.8723067104816437, "step": 27740 }, { "epoch": 0.4933070236254066, "grad_norm": 2.6140888713600554, "learning_rate": 0.0001, "loss": 0.7941, "mean_abs_error": 421.6162321748619, "mean_abs_error_last_10": 171.74346147232652, "mean_abs_error_last_25": 206.45868547673052, "mean_abs_error_last_50": 273.70487327753295, "mean_pred_prob": 0.044669457734562455, "mean_pred_prob_last_10": 0.23506003498332576, "mean_pred_prob_last_25": 0.122319989441894, "mean_pred_prob_last_50": 0.07505121977301314, "mean_token_accuracy": 0.8767761886119843, "step": 27750 }, { "epoch": 0.4934847919222086, "grad_norm": 0.9839890110235199, "learning_rate": 0.0001, "loss": 0.8493, "mean_abs_error": 288.3733966654577, "mean_abs_error_last_10": 106.62103665247852, "mean_abs_error_last_25": 125.01135123830757, "mean_abs_error_last_50": 147.27414850431774, "mean_pred_prob": 0.04395298620220274, "mean_pred_prob_last_10": 0.21778079196810723, "mean_pred_prob_last_25": 0.12245346661657094, "mean_pred_prob_last_50": 0.07411294626072049, "mean_token_accuracy": 0.8691534698009491, "step": 27760 }, { "epoch": 0.49366256021901056, "grad_norm": 1.887597621270556, "learning_rate": 0.0001, "loss": 0.7619, "mean_abs_error": 905.5440738416889, "mean_abs_error_last_10": 472.90090859906223, "mean_abs_error_last_25": 528.1996581380553, "mean_abs_error_last_50": 637.0751457389977, "mean_pred_prob": 0.04252477021655068, "mean_pred_prob_last_10": 0.1993490204855334, "mean_pred_prob_last_25": 0.11028353128349408, "mean_pred_prob_last_50": 0.06927944918861613, "mean_token_accuracy": 0.86934494972229, "step": 27770 }, { "epoch": 0.4938403285158125, "grad_norm": 1.2458052049336952, "learning_rate": 0.0001, "loss": 0.7494, "mean_abs_error": 232.94206115521047, "mean_abs_error_last_10": 108.16541639815992, "mean_abs_error_last_25": 146.19176421389847, "mean_abs_error_last_50": 164.10144461519553, "mean_pred_prob": 0.047776213416364045, "mean_pred_prob_last_10": 0.2376327390782535, "mean_pred_prob_last_25": 0.13393145016161725, "mean_pred_prob_last_50": 0.08151160628767684, "mean_token_accuracy": 0.8717563092708588, "step": 27780 }, { "epoch": 0.49401809681261444, "grad_norm": 1.1837818268778557, "learning_rate": 0.0001, "loss": 0.7847, "mean_abs_error": 1054.408445556284, "mean_abs_error_last_10": 553.6848431047194, "mean_abs_error_last_25": 601.6916594635, "mean_abs_error_last_50": 729.3131872131275, "mean_pred_prob": 0.049395515728974715, "mean_pred_prob_last_10": 0.23350901762314608, "mean_pred_prob_last_25": 0.1301907439745264, "mean_pred_prob_last_50": 0.08150754247326404, "mean_token_accuracy": 0.8766048610210418, "step": 27790 }, { "epoch": 0.4941958651094164, "grad_norm": 1.088722772273168, "learning_rate": 0.0001, "loss": 0.7597, "mean_abs_error": 381.9150317489399, "mean_abs_error_last_10": 154.7671740546388, "mean_abs_error_last_25": 174.97822347167386, "mean_abs_error_last_50": 221.34866891239653, "mean_pred_prob": 0.030056407558731735, "mean_pred_prob_last_10": 0.14672910049557686, "mean_pred_prob_last_25": 0.08199649406597018, "mean_pred_prob_last_50": 0.05014941478148103, "mean_token_accuracy": 0.868233060836792, "step": 27800 }, { "epoch": 0.4943736334062183, "grad_norm": 4.400860491292725, "learning_rate": 0.0001, "loss": 0.7474, "mean_abs_error": 403.81028185376186, "mean_abs_error_last_10": 137.02118637090655, "mean_abs_error_last_25": 239.26238772191442, "mean_abs_error_last_50": 307.2625203803977, "mean_pred_prob": 0.027849157759919762, "mean_pred_prob_last_10": 0.13787956777960061, "mean_pred_prob_last_25": 0.07615093355998397, "mean_pred_prob_last_50": 0.04675867035984993, "mean_token_accuracy": 0.8794460952281952, "step": 27810 }, { "epoch": 0.4945514017030203, "grad_norm": 2.0895213710103437, "learning_rate": 0.0001, "loss": 0.849, "mean_abs_error": 468.63016514962067, "mean_abs_error_last_10": 101.25393773178146, "mean_abs_error_last_25": 134.26635875486338, "mean_abs_error_last_50": 227.79497630705495, "mean_pred_prob": 0.033640696690417825, "mean_pred_prob_last_10": 0.17473098114132882, "mean_pred_prob_last_25": 0.09430476240813732, "mean_pred_prob_last_50": 0.05716084516607225, "mean_token_accuracy": 0.8698600769042969, "step": 27820 }, { "epoch": 0.49472916999982225, "grad_norm": 1.5858842264023185, "learning_rate": 0.0001, "loss": 0.6882, "mean_abs_error": 161.05696813204943, "mean_abs_error_last_10": 81.94396077114972, "mean_abs_error_last_25": 86.75690975285339, "mean_abs_error_last_50": 120.28366211972066, "mean_pred_prob": 0.04945876197889447, "mean_pred_prob_last_10": 0.24324616882950068, "mean_pred_prob_last_25": 0.13357729520648717, "mean_pred_prob_last_50": 0.08090111445635557, "mean_token_accuracy": 0.8801419019699097, "step": 27830 }, { "epoch": 0.4949069382966242, "grad_norm": 1.0971648356631627, "learning_rate": 0.0001, "loss": 0.6791, "mean_abs_error": 414.3015737789907, "mean_abs_error_last_10": 221.05627379693652, "mean_abs_error_last_25": 189.86271770419447, "mean_abs_error_last_50": 220.55553146066754, "mean_pred_prob": 0.03212260247673839, "mean_pred_prob_last_10": 0.15616178347263485, "mean_pred_prob_last_25": 0.08798297452740371, "mean_pred_prob_last_50": 0.054085264273453504, "mean_token_accuracy": 0.881171703338623, "step": 27840 }, { "epoch": 0.4950847065934261, "grad_norm": 2.8961096087573472, "learning_rate": 0.0001, "loss": 0.7583, "mean_abs_error": 1043.370929055361, "mean_abs_error_last_10": 832.6349077811171, "mean_abs_error_last_25": 802.2601819476578, "mean_abs_error_last_50": 859.6328086050177, "mean_pred_prob": 0.05263452178041916, "mean_pred_prob_last_10": 0.25083801948931067, "mean_pred_prob_last_25": 0.14257355364970864, "mean_pred_prob_last_50": 0.08753978096792707, "mean_token_accuracy": 0.8722512543201446, "step": 27850 }, { "epoch": 0.49526247489022807, "grad_norm": 1.1198754866193257, "learning_rate": 0.0001, "loss": 0.8407, "mean_abs_error": 489.1494041403553, "mean_abs_error_last_10": 140.78229666849796, "mean_abs_error_last_25": 179.3206834437108, "mean_abs_error_last_50": 275.83051173853835, "mean_pred_prob": 0.028735605301335454, "mean_pred_prob_last_10": 0.13098192270845174, "mean_pred_prob_last_25": 0.07567309085279703, "mean_pred_prob_last_50": 0.04737334381788969, "mean_token_accuracy": 0.8715530812740326, "step": 27860 }, { "epoch": 0.49544024318703, "grad_norm": 1.175275168792805, "learning_rate": 0.0001, "loss": 0.8537, "mean_abs_error": 306.3571018900487, "mean_abs_error_last_10": 116.6310138988797, "mean_abs_error_last_25": 198.78926578040222, "mean_abs_error_last_50": 230.82173806274164, "mean_pred_prob": 0.027857492817565797, "mean_pred_prob_last_10": 0.14491232689470052, "mean_pred_prob_last_25": 0.07775436295196414, "mean_pred_prob_last_50": 0.04705081391148269, "mean_token_accuracy": 0.8599210679531097, "step": 27870 }, { "epoch": 0.495618011483832, "grad_norm": 1.5411947617439024, "learning_rate": 0.0001, "loss": 0.7522, "mean_abs_error": 921.1024931891327, "mean_abs_error_last_10": 510.65220416152425, "mean_abs_error_last_25": 573.4600646285883, "mean_abs_error_last_50": 675.7553745061082, "mean_pred_prob": 0.025336554905516097, "mean_pred_prob_last_10": 0.13985242106718943, "mean_pred_prob_last_25": 0.07345383004867471, "mean_pred_prob_last_50": 0.0437986733566504, "mean_token_accuracy": 0.8693397283554077, "step": 27880 }, { "epoch": 0.49579577978063394, "grad_norm": 1.2408345846839264, "learning_rate": 0.0001, "loss": 0.7279, "mean_abs_error": 308.6241605549113, "mean_abs_error_last_10": 59.031104288590214, "mean_abs_error_last_25": 89.70305329225232, "mean_abs_error_last_50": 157.74354133529533, "mean_pred_prob": 0.034028047462925315, "mean_pred_prob_last_10": 0.16333045810461044, "mean_pred_prob_last_25": 0.09068933762609958, "mean_pred_prob_last_50": 0.056403875537216666, "mean_token_accuracy": 0.8676224410533905, "step": 27890 }, { "epoch": 0.4959735480774359, "grad_norm": 0.7715986203442461, "learning_rate": 0.0001, "loss": 0.733, "mean_abs_error": 346.6466659879537, "mean_abs_error_last_10": 148.1465255554159, "mean_abs_error_last_25": 253.09798186863932, "mean_abs_error_last_50": 294.6130146904443, "mean_pred_prob": 0.03232300046365708, "mean_pred_prob_last_10": 0.16639798283576965, "mean_pred_prob_last_25": 0.0908804229926318, "mean_pred_prob_last_50": 0.0552547771949321, "mean_token_accuracy": 0.8691883444786072, "step": 27900 }, { "epoch": 0.4961513163742378, "grad_norm": 2.365971544022191, "learning_rate": 0.0001, "loss": 0.8058, "mean_abs_error": 147.68998953299996, "mean_abs_error_last_10": 32.13470069657588, "mean_abs_error_last_25": 66.48084731841058, "mean_abs_error_last_50": 89.76952531638227, "mean_pred_prob": 0.039331646636128426, "mean_pred_prob_last_10": 0.19366145245730876, "mean_pred_prob_last_25": 0.1052139038220048, "mean_pred_prob_last_50": 0.06536410246044397, "mean_token_accuracy": 0.8688435912132263, "step": 27910 }, { "epoch": 0.49632908467103976, "grad_norm": 1.4048626505075836, "learning_rate": 0.0001, "loss": 0.7342, "mean_abs_error": 262.7490106720803, "mean_abs_error_last_10": 154.7038899339735, "mean_abs_error_last_25": 206.76965670371078, "mean_abs_error_last_50": 184.03200297780103, "mean_pred_prob": 0.05261978928465396, "mean_pred_prob_last_10": 0.25791149847209455, "mean_pred_prob_last_25": 0.14146443242207168, "mean_pred_prob_last_50": 0.08736625546589494, "mean_token_accuracy": 0.8762225925922393, "step": 27920 }, { "epoch": 0.4965068529678417, "grad_norm": 3.1464270527712297, "learning_rate": 0.0001, "loss": 0.7542, "mean_abs_error": 917.7852719237187, "mean_abs_error_last_10": 487.93013375696717, "mean_abs_error_last_25": 541.8617347163256, "mean_abs_error_last_50": 705.2835947216047, "mean_pred_prob": 0.04636182974500116, "mean_pred_prob_last_10": 0.21183665151474998, "mean_pred_prob_last_25": 0.12196858130628244, "mean_pred_prob_last_50": 0.07566889095178339, "mean_token_accuracy": 0.8721214413642884, "step": 27930 }, { "epoch": 0.4966846212646437, "grad_norm": 1.7269975650925404, "learning_rate": 0.0001, "loss": 0.8646, "mean_abs_error": 362.0922669021518, "mean_abs_error_last_10": 213.34424884338569, "mean_abs_error_last_25": 185.8211134939465, "mean_abs_error_last_50": 236.80641063374, "mean_pred_prob": 0.05102504827082157, "mean_pred_prob_last_10": 0.1859225034713745, "mean_pred_prob_last_25": 0.11789654875174164, "mean_pred_prob_last_50": 0.08080623489804566, "mean_token_accuracy": 0.8714042901992798, "step": 27940 }, { "epoch": 0.4968623895614456, "grad_norm": 1.6242855958030042, "learning_rate": 0.0001, "loss": 0.7357, "mean_abs_error": 129.14532908986655, "mean_abs_error_last_10": 53.67576409876214, "mean_abs_error_last_25": 96.32685417246162, "mean_abs_error_last_50": 122.5997993519691, "mean_pred_prob": 0.048935569217428566, "mean_pred_prob_last_10": 0.226319370418787, "mean_pred_prob_last_25": 0.13112878799438477, "mean_pred_prob_last_50": 0.08306900644674897, "mean_token_accuracy": 0.8666463077068329, "step": 27950 }, { "epoch": 0.49704015785824757, "grad_norm": 1.7681702101896108, "learning_rate": 0.0001, "loss": 0.6794, "mean_abs_error": 355.9262240082031, "mean_abs_error_last_10": 111.95288889543151, "mean_abs_error_last_25": 167.74237878745697, "mean_abs_error_last_50": 269.9164628587374, "mean_pred_prob": 0.04223558912053704, "mean_pred_prob_last_10": 0.19902192344889044, "mean_pred_prob_last_25": 0.11079715313389897, "mean_pred_prob_last_50": 0.06953227850608527, "mean_token_accuracy": 0.8776442527770996, "step": 27960 }, { "epoch": 0.4972179261550495, "grad_norm": 1.5049987320086997, "learning_rate": 0.0001, "loss": 0.8519, "mean_abs_error": 1527.33182593501, "mean_abs_error_last_10": 765.1407936811872, "mean_abs_error_last_25": 863.1671527846966, "mean_abs_error_last_50": 1133.480380250307, "mean_pred_prob": 0.013727100114920177, "mean_pred_prob_last_10": 0.07465346819371917, "mean_pred_prob_last_25": 0.0394502503448166, "mean_pred_prob_last_50": 0.02353888808283955, "mean_token_accuracy": 0.8678438663482666, "step": 27970 }, { "epoch": 0.49739569445185144, "grad_norm": 2.744808276712105, "learning_rate": 0.0001, "loss": 0.7491, "mean_abs_error": 106.28655348821985, "mean_abs_error_last_10": 53.69593386606833, "mean_abs_error_last_25": 52.63232406504515, "mean_abs_error_last_50": 66.48024859708684, "mean_pred_prob": 0.05472072940319776, "mean_pred_prob_last_10": 0.2759605247527361, "mean_pred_prob_last_25": 0.15362367909401656, "mean_pred_prob_last_50": 0.09280692543834448, "mean_token_accuracy": 0.8668600857257843, "step": 27980 }, { "epoch": 0.4975734627486534, "grad_norm": 1.8179658233595484, "learning_rate": 0.0001, "loss": 0.7232, "mean_abs_error": 439.504924493394, "mean_abs_error_last_10": 135.8393353246273, "mean_abs_error_last_25": 183.35726105385083, "mean_abs_error_last_50": 283.2657126300743, "mean_pred_prob": 0.046621408520149996, "mean_pred_prob_last_10": 0.22567227280233054, "mean_pred_prob_last_25": 0.1281658305437304, "mean_pred_prob_last_50": 0.07848071769694798, "mean_token_accuracy": 0.8638694703578949, "step": 27990 }, { "epoch": 0.4977512310454554, "grad_norm": 1.955288294547067, "learning_rate": 0.0001, "loss": 0.8023, "mean_abs_error": 680.6911846922621, "mean_abs_error_last_10": 233.49288872283068, "mean_abs_error_last_25": 283.5689620263805, "mean_abs_error_last_50": 386.0116729728086, "mean_pred_prob": 0.024923629441764204, "mean_pred_prob_last_10": 0.12677453068317845, "mean_pred_prob_last_25": 0.06795982314506546, "mean_pred_prob_last_50": 0.041152329568285496, "mean_token_accuracy": 0.8682774603366852, "step": 28000 }, { "epoch": 0.4979289993422573, "grad_norm": 4.551812283205833, "learning_rate": 0.0001, "loss": 0.7926, "mean_abs_error": 219.79798055094088, "mean_abs_error_last_10": 70.99902877722369, "mean_abs_error_last_25": 96.62301924988273, "mean_abs_error_last_50": 117.08221606211723, "mean_pred_prob": 0.04070041780360043, "mean_pred_prob_last_10": 0.2030756738036871, "mean_pred_prob_last_25": 0.11044714041054249, "mean_pred_prob_last_50": 0.06771950609982014, "mean_token_accuracy": 0.8771225273609161, "step": 28010 }, { "epoch": 0.49810676763905926, "grad_norm": 1.7545457422687134, "learning_rate": 0.0001, "loss": 0.8634, "mean_abs_error": 493.69911469708387, "mean_abs_error_last_10": 100.05252349644661, "mean_abs_error_last_25": 174.34386983264477, "mean_abs_error_last_50": 272.19861597827105, "mean_pred_prob": 0.05638891518465243, "mean_pred_prob_last_10": 0.2824289201293141, "mean_pred_prob_last_25": 0.15691151978680865, "mean_pred_prob_last_50": 0.09604136790148914, "mean_token_accuracy": 0.8675015807151795, "step": 28020 }, { "epoch": 0.4982845359358612, "grad_norm": 2.432145284153575, "learning_rate": 0.0001, "loss": 0.7254, "mean_abs_error": 360.26778860957245, "mean_abs_error_last_10": 163.47566853108307, "mean_abs_error_last_25": 150.05668222899735, "mean_abs_error_last_50": 222.4407206264901, "mean_pred_prob": 0.031802722439169887, "mean_pred_prob_last_10": 0.1603221006691456, "mean_pred_prob_last_25": 0.08829087680205702, "mean_pred_prob_last_50": 0.05384033150039613, "mean_token_accuracy": 0.873433256149292, "step": 28030 }, { "epoch": 0.49846230423266313, "grad_norm": 2.851141867009704, "learning_rate": 0.0001, "loss": 0.8378, "mean_abs_error": 144.56565799222182, "mean_abs_error_last_10": 49.41858284895452, "mean_abs_error_last_25": 63.57636658638434, "mean_abs_error_last_50": 79.56729308854987, "mean_pred_prob": 0.05058258110657334, "mean_pred_prob_last_10": 0.22833045665174723, "mean_pred_prob_last_25": 0.13411514041945338, "mean_pred_prob_last_50": 0.08482825690880418, "mean_token_accuracy": 0.8678063035011292, "step": 28040 }, { "epoch": 0.4986400725294651, "grad_norm": 1.3179721788175167, "learning_rate": 0.0001, "loss": 0.6913, "mean_abs_error": 349.5721040688184, "mean_abs_error_last_10": 219.14044967873352, "mean_abs_error_last_25": 294.9507835507628, "mean_abs_error_last_50": 363.98918890466604, "mean_pred_prob": 0.048401327163446695, "mean_pred_prob_last_10": 0.24742354068439454, "mean_pred_prob_last_25": 0.13630270994035526, "mean_pred_prob_last_50": 0.08171148110413924, "mean_token_accuracy": 0.8703364610671998, "step": 28050 }, { "epoch": 0.49881784082626707, "grad_norm": 2.030481794943703, "learning_rate": 0.0001, "loss": 0.7783, "mean_abs_error": 721.7126866164788, "mean_abs_error_last_10": 461.85539876839556, "mean_abs_error_last_25": 447.7101785302408, "mean_abs_error_last_50": 494.42866794455813, "mean_pred_prob": 0.021419405471533537, "mean_pred_prob_last_10": 0.11551275620295201, "mean_pred_prob_last_25": 0.06144120401004329, "mean_pred_prob_last_50": 0.03642918802797794, "mean_token_accuracy": 0.8722730696201324, "step": 28060 }, { "epoch": 0.498995609123069, "grad_norm": 1.2816790395359734, "learning_rate": 0.0001, "loss": 0.7134, "mean_abs_error": 519.1574886679539, "mean_abs_error_last_10": 154.03784561698265, "mean_abs_error_last_25": 200.22302761974538, "mean_abs_error_last_50": 288.32733686252595, "mean_pred_prob": 0.04498354507959448, "mean_pred_prob_last_10": 0.20460072734858842, "mean_pred_prob_last_25": 0.12213506489060819, "mean_pred_prob_last_50": 0.07405077489092945, "mean_token_accuracy": 0.8875348389148712, "step": 28070 }, { "epoch": 0.49917337741987095, "grad_norm": 1.8545530112415543, "learning_rate": 0.0001, "loss": 0.756, "mean_abs_error": 466.9094094233571, "mean_abs_error_last_10": 70.23335192922677, "mean_abs_error_last_25": 131.75871623172924, "mean_abs_error_last_50": 261.41271731974086, "mean_pred_prob": 0.04193246480426751, "mean_pred_prob_last_10": 0.20371843290049582, "mean_pred_prob_last_25": 0.11630937420995906, "mean_pred_prob_last_50": 0.07081811733078211, "mean_token_accuracy": 0.8688255429267884, "step": 28080 }, { "epoch": 0.4993511457166729, "grad_norm": 1.8143104856240948, "learning_rate": 0.0001, "loss": 0.745, "mean_abs_error": 1145.1753581315118, "mean_abs_error_last_10": 419.97318119061373, "mean_abs_error_last_25": 573.9576464487571, "mean_abs_error_last_50": 781.1068688790506, "mean_pred_prob": 0.02264769592147786, "mean_pred_prob_last_10": 0.11297966894926503, "mean_pred_prob_last_25": 0.06145080712158233, "mean_pred_prob_last_50": 0.037847162334946914, "mean_token_accuracy": 0.8675996899604798, "step": 28090 }, { "epoch": 0.4995289140134748, "grad_norm": 1.2876462481825204, "learning_rate": 0.0001, "loss": 0.7198, "mean_abs_error": 468.69419662673715, "mean_abs_error_last_10": 129.27652571518252, "mean_abs_error_last_25": 137.18171916184113, "mean_abs_error_last_50": 268.917879746179, "mean_pred_prob": 0.03983769334154204, "mean_pred_prob_last_10": 0.19692992499331013, "mean_pred_prob_last_25": 0.11007835960481316, "mean_pred_prob_last_50": 0.06775000998750329, "mean_token_accuracy": 0.8669935882091522, "step": 28100 }, { "epoch": 0.49970668231027676, "grad_norm": 2.827642425364458, "learning_rate": 0.0001, "loss": 0.8443, "mean_abs_error": 489.22881792999885, "mean_abs_error_last_10": 86.72298015772446, "mean_abs_error_last_25": 141.44324678085374, "mean_abs_error_last_50": 239.1586030406173, "mean_pred_prob": 0.03264752135146409, "mean_pred_prob_last_10": 0.17243497110903264, "mean_pred_prob_last_25": 0.09350430136546492, "mean_pred_prob_last_50": 0.05572246410883963, "mean_token_accuracy": 0.8713579952716828, "step": 28110 }, { "epoch": 0.49988445060707876, "grad_norm": 1.274583033060069, "learning_rate": 0.0001, "loss": 0.7287, "mean_abs_error": 406.0167923126655, "mean_abs_error_last_10": 208.50616888991763, "mean_abs_error_last_25": 179.7986246485045, "mean_abs_error_last_50": 267.3429046903978, "mean_pred_prob": 0.05188343772897497, "mean_pred_prob_last_10": 0.24948225284460931, "mean_pred_prob_last_25": 0.14096370562911034, "mean_pred_prob_last_50": 0.08693936918862163, "mean_token_accuracy": 0.8759344756603241, "step": 28120 }, { "epoch": 0.5000622189038807, "grad_norm": 1.2358695301868903, "learning_rate": 0.0001, "loss": 0.8074, "mean_abs_error": 913.400320033699, "mean_abs_error_last_10": 293.86003988512783, "mean_abs_error_last_25": 383.1953697731202, "mean_abs_error_last_50": 575.5878498328792, "mean_pred_prob": 0.034838210145244373, "mean_pred_prob_last_10": 0.16088596818735823, "mean_pred_prob_last_25": 0.0941198127809912, "mean_pred_prob_last_50": 0.058601366967195645, "mean_token_accuracy": 0.8701533198356628, "step": 28130 }, { "epoch": 0.5002399872006826, "grad_norm": 2.9233680966047277, "learning_rate": 0.0001, "loss": 0.7858, "mean_abs_error": 697.314595295011, "mean_abs_error_last_10": 295.5445003717253, "mean_abs_error_last_25": 368.2866643973254, "mean_abs_error_last_50": 509.57803798611275, "mean_pred_prob": 0.043777546964702194, "mean_pred_prob_last_10": 0.21551509402925148, "mean_pred_prob_last_25": 0.11922514015459455, "mean_pred_prob_last_50": 0.07311441240017302, "mean_token_accuracy": 0.8680476307868957, "step": 28140 }, { "epoch": 0.5004177554974846, "grad_norm": 1.849087689146295, "learning_rate": 0.0001, "loss": 0.8149, "mean_abs_error": 946.0824864751197, "mean_abs_error_last_10": 385.6150052581014, "mean_abs_error_last_25": 508.88653257199513, "mean_abs_error_last_50": 674.3652247006074, "mean_pred_prob": 0.031289530964568255, "mean_pred_prob_last_10": 0.15892092012800277, "mean_pred_prob_last_25": 0.08723016747971997, "mean_pred_prob_last_50": 0.05307965979445726, "mean_token_accuracy": 0.8742341756820678, "step": 28150 }, { "epoch": 0.5005955237942865, "grad_norm": 1.3811616057971812, "learning_rate": 0.0001, "loss": 0.6707, "mean_abs_error": 336.56564219288117, "mean_abs_error_last_10": 67.10120190435872, "mean_abs_error_last_25": 92.68373526091048, "mean_abs_error_last_50": 184.92007261406758, "mean_pred_prob": 0.03551974734291434, "mean_pred_prob_last_10": 0.17107404954731464, "mean_pred_prob_last_25": 0.09872569348663092, "mean_pred_prob_last_50": 0.0600654823705554, "mean_token_accuracy": 0.8875207304954529, "step": 28160 }, { "epoch": 0.5007732920910885, "grad_norm": 1.1245462528653363, "learning_rate": 0.0001, "loss": 0.75, "mean_abs_error": 308.01910026661966, "mean_abs_error_last_10": 94.04530647105186, "mean_abs_error_last_25": 165.2544045367692, "mean_abs_error_last_50": 208.03206858413273, "mean_pred_prob": 0.04407711583189666, "mean_pred_prob_last_10": 0.2115684948861599, "mean_pred_prob_last_25": 0.11890769954770804, "mean_pred_prob_last_50": 0.07216776609420776, "mean_token_accuracy": 0.867666494846344, "step": 28170 }, { "epoch": 0.5009510603878904, "grad_norm": 1.7936279522347947, "learning_rate": 0.0001, "loss": 0.6589, "mean_abs_error": 418.88423313444747, "mean_abs_error_last_10": 83.99346114822615, "mean_abs_error_last_25": 104.74835135786138, "mean_abs_error_last_50": 217.3708461628114, "mean_pred_prob": 0.04407863086089492, "mean_pred_prob_last_10": 0.20351523759309204, "mean_pred_prob_last_25": 0.11269406765932218, "mean_pred_prob_last_50": 0.07308699475834146, "mean_token_accuracy": 0.8726889491081238, "step": 28180 }, { "epoch": 0.5011288286846923, "grad_norm": 1.2580837639407965, "learning_rate": 0.0001, "loss": 0.6595, "mean_abs_error": 165.06111829093953, "mean_abs_error_last_10": 75.6012338155291, "mean_abs_error_last_25": 93.7581060098091, "mean_abs_error_last_50": 103.2140024231804, "mean_pred_prob": 0.0597987902816385, "mean_pred_prob_last_10": 0.24969730228185655, "mean_pred_prob_last_25": 0.15169737711548806, "mean_pred_prob_last_50": 0.09792163139209151, "mean_token_accuracy": 0.8784935474395752, "step": 28190 }, { "epoch": 0.5013065969814943, "grad_norm": 2.5400453017693874, "learning_rate": 0.0001, "loss": 0.8086, "mean_abs_error": 446.038206864194, "mean_abs_error_last_10": 80.41187269839023, "mean_abs_error_last_25": 165.74848016894902, "mean_abs_error_last_50": 252.42367524379893, "mean_pred_prob": 0.043595583108253774, "mean_pred_prob_last_10": 0.20318852197378873, "mean_pred_prob_last_25": 0.12102629970759153, "mean_pred_prob_last_50": 0.07442618680652231, "mean_token_accuracy": 0.8687286376953125, "step": 28200 }, { "epoch": 0.5014843652782963, "grad_norm": 1.6345612863850931, "learning_rate": 0.0001, "loss": 0.7961, "mean_abs_error": 482.38975131452116, "mean_abs_error_last_10": 221.50956327431322, "mean_abs_error_last_25": 310.0048336753081, "mean_abs_error_last_50": 421.18978296429424, "mean_pred_prob": 0.03362458769697696, "mean_pred_prob_last_10": 0.1758162356680259, "mean_pred_prob_last_25": 0.09527762953657656, "mean_pred_prob_last_50": 0.05737035096390173, "mean_token_accuracy": 0.8734778344631196, "step": 28210 }, { "epoch": 0.5016621335750983, "grad_norm": 1.456920850770258, "learning_rate": 0.0001, "loss": 0.6847, "mean_abs_error": 347.18083360633625, "mean_abs_error_last_10": 169.07525687256646, "mean_abs_error_last_25": 168.1016775813195, "mean_abs_error_last_50": 236.9458730744469, "mean_pred_prob": 0.033917422592639926, "mean_pred_prob_last_10": 0.17042362415231765, "mean_pred_prob_last_25": 0.09448533928953111, "mean_pred_prob_last_50": 0.0571002964861691, "mean_token_accuracy": 0.8829315721988678, "step": 28220 }, { "epoch": 0.5018399018719002, "grad_norm": 1.9655872750221486, "learning_rate": 0.0001, "loss": 0.6732, "mean_abs_error": 566.0319910916809, "mean_abs_error_last_10": 212.58178508610746, "mean_abs_error_last_25": 291.5785246765366, "mean_abs_error_last_50": 376.1951842615981, "mean_pred_prob": 0.03055325207533315, "mean_pred_prob_last_10": 0.15555665544234215, "mean_pred_prob_last_25": 0.08434536394197494, "mean_pred_prob_last_50": 0.0516825353493914, "mean_token_accuracy": 0.8803495168685913, "step": 28230 }, { "epoch": 0.5020176701687021, "grad_norm": 3.030131709009814, "learning_rate": 0.0001, "loss": 0.6471, "mean_abs_error": 198.18372157107228, "mean_abs_error_last_10": 45.37919461965088, "mean_abs_error_last_25": 137.11802577716645, "mean_abs_error_last_50": 209.6787348884332, "mean_pred_prob": 0.05103613650426268, "mean_pred_prob_last_10": 0.24678806103765966, "mean_pred_prob_last_25": 0.13916228115558624, "mean_pred_prob_last_50": 0.08537246733903885, "mean_token_accuracy": 0.890699303150177, "step": 28240 }, { "epoch": 0.5021954384655041, "grad_norm": 2.263436244100687, "learning_rate": 0.0001, "loss": 0.8148, "mean_abs_error": 861.0944668742341, "mean_abs_error_last_10": 427.47794839023345, "mean_abs_error_last_25": 536.9693012987857, "mean_abs_error_last_50": 673.8424816380084, "mean_pred_prob": 0.044331363125820644, "mean_pred_prob_last_10": 0.20662000081501902, "mean_pred_prob_last_25": 0.11755845283623785, "mean_pred_prob_last_50": 0.07413467150763609, "mean_token_accuracy": 0.8710006892681121, "step": 28250 }, { "epoch": 0.502373206762306, "grad_norm": 1.1325629856336277, "learning_rate": 0.0001, "loss": 0.7787, "mean_abs_error": 510.18773155701217, "mean_abs_error_last_10": 131.08269597016323, "mean_abs_error_last_25": 195.7136803975924, "mean_abs_error_last_50": 313.36893247105235, "mean_pred_prob": 0.034544948232360186, "mean_pred_prob_last_10": 0.18220257498323916, "mean_pred_prob_last_25": 0.09826659876853228, "mean_pred_prob_last_50": 0.05863639796152711, "mean_token_accuracy": 0.8695578396320343, "step": 28260 }, { "epoch": 0.502550975059108, "grad_norm": 1.3250709563996648, "learning_rate": 0.0001, "loss": 0.7271, "mean_abs_error": 178.79627722798105, "mean_abs_error_last_10": 38.675448156455616, "mean_abs_error_last_25": 57.87233167037995, "mean_abs_error_last_50": 107.45165579782943, "mean_pred_prob": 0.0660446876194328, "mean_pred_prob_last_10": 0.32528402768075465, "mean_pred_prob_last_25": 0.1854480099864304, "mean_pred_prob_last_50": 0.11092225117608905, "mean_token_accuracy": 0.8675043880939484, "step": 28270 }, { "epoch": 0.5027287433559099, "grad_norm": 1.6878709588566834, "learning_rate": 0.0001, "loss": 0.7591, "mean_abs_error": 341.97416628606965, "mean_abs_error_last_10": 83.9810215858051, "mean_abs_error_last_25": 138.9923838901463, "mean_abs_error_last_50": 198.87054111070387, "mean_pred_prob": 0.057696379104163496, "mean_pred_prob_last_10": 0.27635049873497336, "mean_pred_prob_last_25": 0.15785018145106733, "mean_pred_prob_last_50": 0.09631611076183617, "mean_token_accuracy": 0.8682983815670013, "step": 28280 }, { "epoch": 0.5029065116527118, "grad_norm": 1.2699714880310982, "learning_rate": 0.0001, "loss": 0.7079, "mean_abs_error": 212.5181343805401, "mean_abs_error_last_10": 118.61500964727728, "mean_abs_error_last_25": 143.35771563172813, "mean_abs_error_last_50": 178.13316545155433, "mean_pred_prob": 0.050562936603091656, "mean_pred_prob_last_10": 0.2459704212844372, "mean_pred_prob_last_25": 0.13829548005014658, "mean_pred_prob_last_50": 0.085395870776847, "mean_token_accuracy": 0.8781984508037567, "step": 28290 }, { "epoch": 0.5030842799495138, "grad_norm": 1.2428742480196509, "learning_rate": 0.0001, "loss": 0.7576, "mean_abs_error": 290.64591543439445, "mean_abs_error_last_10": 107.72512893001112, "mean_abs_error_last_25": 106.66580002146614, "mean_abs_error_last_50": 184.19762893136118, "mean_pred_prob": 0.024358957028016447, "mean_pred_prob_last_10": 0.11724059544503689, "mean_pred_prob_last_25": 0.06537069082260132, "mean_pred_prob_last_50": 0.040447847079485656, "mean_token_accuracy": 0.8705113887786865, "step": 28300 }, { "epoch": 0.5032620482463157, "grad_norm": 0.8393905669522576, "learning_rate": 0.0001, "loss": 0.8267, "mean_abs_error": 1179.234318028152, "mean_abs_error_last_10": 637.4330508985652, "mean_abs_error_last_25": 720.7070940268142, "mean_abs_error_last_50": 836.1707619754121, "mean_pred_prob": 0.025531556908390485, "mean_pred_prob_last_10": 0.13020645148353652, "mean_pred_prob_last_25": 0.07303515393577982, "mean_pred_prob_last_50": 0.04396294217585819, "mean_token_accuracy": 0.8790045499801635, "step": 28310 }, { "epoch": 0.5034398165431176, "grad_norm": 2.2000940261773017, "learning_rate": 0.0001, "loss": 0.7737, "mean_abs_error": 234.18317733016843, "mean_abs_error_last_10": 43.09215321130817, "mean_abs_error_last_25": 68.7042126233826, "mean_abs_error_last_50": 156.25818783728934, "mean_pred_prob": 0.04496035547927022, "mean_pred_prob_last_10": 0.2054460182785988, "mean_pred_prob_last_25": 0.11985422503203154, "mean_pred_prob_last_50": 0.07454057186841964, "mean_token_accuracy": 0.862975662946701, "step": 28320 }, { "epoch": 0.5036175848399197, "grad_norm": 1.0420201709893897, "learning_rate": 0.0001, "loss": 0.6442, "mean_abs_error": 208.68261214336275, "mean_abs_error_last_10": 138.81918211830776, "mean_abs_error_last_25": 115.69139167701833, "mean_abs_error_last_50": 137.42740996063205, "mean_pred_prob": 0.06219327999278903, "mean_pred_prob_last_10": 0.28977579083293675, "mean_pred_prob_last_25": 0.16665504621341826, "mean_pred_prob_last_50": 0.10189323453232646, "mean_token_accuracy": 0.8718260884284973, "step": 28330 }, { "epoch": 0.5037953531367216, "grad_norm": 2.0641384774507734, "learning_rate": 0.0001, "loss": 0.6899, "mean_abs_error": 565.1448105865227, "mean_abs_error_last_10": 281.31145513911827, "mean_abs_error_last_25": 254.91709090010687, "mean_abs_error_last_50": 358.34973199920466, "mean_pred_prob": 0.045566213189158586, "mean_pred_prob_last_10": 0.17312978783156724, "mean_pred_prob_last_25": 0.10578363004606217, "mean_pred_prob_last_50": 0.07092205256922171, "mean_token_accuracy": 0.8787753164768219, "step": 28340 }, { "epoch": 0.5039731214335236, "grad_norm": 2.5948408465497437, "learning_rate": 0.0001, "loss": 0.6991, "mean_abs_error": 486.5846869305992, "mean_abs_error_last_10": 141.18436056710783, "mean_abs_error_last_25": 140.22534404995702, "mean_abs_error_last_50": 271.1260643509889, "mean_pred_prob": 0.04290972214075737, "mean_pred_prob_last_10": 0.19651225861161947, "mean_pred_prob_last_25": 0.11409396810922771, "mean_pred_prob_last_50": 0.07116675976430997, "mean_token_accuracy": 0.8826643228530884, "step": 28350 }, { "epoch": 0.5041508897303255, "grad_norm": 1.3646178797736348, "learning_rate": 0.0001, "loss": 0.7442, "mean_abs_error": 840.2392728914128, "mean_abs_error_last_10": 378.81872320760357, "mean_abs_error_last_25": 451.04942473836024, "mean_abs_error_last_50": 632.0763725989162, "mean_pred_prob": 0.04624623687413987, "mean_pred_prob_last_10": 0.2347352767304983, "mean_pred_prob_last_25": 0.1289357865258353, "mean_pred_prob_last_50": 0.07820470859587658, "mean_token_accuracy": 0.8736792504787445, "step": 28360 }, { "epoch": 0.5043286580271275, "grad_norm": 1.760514019332896, "learning_rate": 0.0001, "loss": 0.6313, "mean_abs_error": 714.5994533107535, "mean_abs_error_last_10": 429.88131152242795, "mean_abs_error_last_25": 453.5439985836054, "mean_abs_error_last_50": 518.1972934055889, "mean_pred_prob": 0.04732083976123249, "mean_pred_prob_last_10": 0.23787792825023643, "mean_pred_prob_last_25": 0.13317689255054574, "mean_pred_prob_last_50": 0.08088049392972607, "mean_token_accuracy": 0.8867992579936981, "step": 28370 }, { "epoch": 0.5045064263239294, "grad_norm": 1.4909435687464416, "learning_rate": 0.0001, "loss": 0.776, "mean_abs_error": 851.477821430302, "mean_abs_error_last_10": 403.8492927201974, "mean_abs_error_last_25": 445.6989360296319, "mean_abs_error_last_50": 586.4474905341967, "mean_pred_prob": 0.03190665896399878, "mean_pred_prob_last_10": 0.16182375642820263, "mean_pred_prob_last_25": 0.08788257866399363, "mean_pred_prob_last_50": 0.053904014034196736, "mean_token_accuracy": 0.8780135750770569, "step": 28380 }, { "epoch": 0.5046841946207313, "grad_norm": 1.9540836042352934, "learning_rate": 0.0001, "loss": 0.769, "mean_abs_error": 544.5671668242909, "mean_abs_error_last_10": 201.65674189137312, "mean_abs_error_last_25": 307.321360888843, "mean_abs_error_last_50": 457.48446860879886, "mean_pred_prob": 0.028911988134495914, "mean_pred_prob_last_10": 0.15070727933198214, "mean_pred_prob_last_25": 0.08315718984231353, "mean_pred_prob_last_50": 0.04966543572954833, "mean_token_accuracy": 0.8684872984886169, "step": 28390 }, { "epoch": 0.5048619629175333, "grad_norm": 1.4785579604961292, "learning_rate": 0.0001, "loss": 0.7496, "mean_abs_error": 343.4818078184899, "mean_abs_error_last_10": 68.45429924266264, "mean_abs_error_last_25": 107.50881894843333, "mean_abs_error_last_50": 193.65003753872634, "mean_pred_prob": 0.04427277110517025, "mean_pred_prob_last_10": 0.20946699995547532, "mean_pred_prob_last_25": 0.12096804163884371, "mean_pred_prob_last_50": 0.07415259697008877, "mean_token_accuracy": 0.872558057308197, "step": 28400 }, { "epoch": 0.5050397312143352, "grad_norm": 1.593898826242172, "learning_rate": 0.0001, "loss": 0.7227, "mean_abs_error": 113.40618857992936, "mean_abs_error_last_10": 36.69990640779954, "mean_abs_error_last_25": 70.41626284393521, "mean_abs_error_last_50": 99.819228867879, "mean_pred_prob": 0.052063044626265764, "mean_pred_prob_last_10": 0.25520804822444915, "mean_pred_prob_last_25": 0.14328421801328659, "mean_pred_prob_last_50": 0.08768588714301587, "mean_token_accuracy": 0.8753440380096436, "step": 28410 }, { "epoch": 0.5052174995111371, "grad_norm": 1.2130325732169613, "learning_rate": 0.0001, "loss": 0.8397, "mean_abs_error": 232.8357793027339, "mean_abs_error_last_10": 59.84103478029969, "mean_abs_error_last_25": 91.16797052913543, "mean_abs_error_last_50": 132.8219786718736, "mean_pred_prob": 0.04260038961656391, "mean_pred_prob_last_10": 0.2146447364240885, "mean_pred_prob_last_25": 0.11630404219031334, "mean_pred_prob_last_50": 0.07155286269262433, "mean_token_accuracy": 0.8721212029457093, "step": 28420 }, { "epoch": 0.5053952678079391, "grad_norm": 1.7483932323282594, "learning_rate": 0.0001, "loss": 0.6109, "mean_abs_error": 148.58794880944743, "mean_abs_error_last_10": 67.35298582991327, "mean_abs_error_last_25": 110.12233238097886, "mean_abs_error_last_50": 105.94982427941834, "mean_pred_prob": 0.05086616980843246, "mean_pred_prob_last_10": 0.24413612764328718, "mean_pred_prob_last_25": 0.14189402796328068, "mean_pred_prob_last_50": 0.08578300727531314, "mean_token_accuracy": 0.890209287405014, "step": 28430 }, { "epoch": 0.5055730361047411, "grad_norm": 1.2548063341064408, "learning_rate": 0.0001, "loss": 0.6975, "mean_abs_error": 395.36791575859456, "mean_abs_error_last_10": 64.21446730417689, "mean_abs_error_last_25": 103.75878274941638, "mean_abs_error_last_50": 197.389798278806, "mean_pred_prob": 0.05171874607913196, "mean_pred_prob_last_10": 0.24451198736205698, "mean_pred_prob_last_25": 0.14207319379784167, "mean_pred_prob_last_50": 0.0867887534899637, "mean_token_accuracy": 0.8765899896621704, "step": 28440 }, { "epoch": 0.5057508044015431, "grad_norm": 0.9255794134773795, "learning_rate": 0.0001, "loss": 0.7091, "mean_abs_error": 873.6437241960897, "mean_abs_error_last_10": 239.71612577657683, "mean_abs_error_last_25": 309.19324264413456, "mean_abs_error_last_50": 448.25109463465833, "mean_pred_prob": 0.03754163450794294, "mean_pred_prob_last_10": 0.16596035736147313, "mean_pred_prob_last_25": 0.09942928490345367, "mean_pred_prob_last_50": 0.06174171055899933, "mean_token_accuracy": 0.8705553948879242, "step": 28450 }, { "epoch": 0.505928572698345, "grad_norm": 1.8684775736383916, "learning_rate": 0.0001, "loss": 0.7758, "mean_abs_error": 460.5368070242701, "mean_abs_error_last_10": 152.980188090217, "mean_abs_error_last_25": 237.6567064509852, "mean_abs_error_last_50": 357.14502982532457, "mean_pred_prob": 0.036381346924463286, "mean_pred_prob_last_10": 0.189995510969311, "mean_pred_prob_last_25": 0.10006523111369461, "mean_pred_prob_last_50": 0.06131248236633837, "mean_token_accuracy": 0.8727798521518707, "step": 28460 }, { "epoch": 0.506106340995147, "grad_norm": 1.7080774628136641, "learning_rate": 0.0001, "loss": 0.8459, "mean_abs_error": 318.49664334122537, "mean_abs_error_last_10": 141.73841531075317, "mean_abs_error_last_25": 136.21975521343106, "mean_abs_error_last_50": 176.3892446286951, "mean_pred_prob": 0.05225235614925623, "mean_pred_prob_last_10": 0.23119136397726833, "mean_pred_prob_last_25": 0.14105560993775726, "mean_pred_prob_last_50": 0.0867707911063917, "mean_token_accuracy": 0.8740389466285705, "step": 28470 }, { "epoch": 0.5062841092919489, "grad_norm": 1.1993370419686251, "learning_rate": 0.0001, "loss": 0.8423, "mean_abs_error": 1116.846350731137, "mean_abs_error_last_10": 430.91522672708305, "mean_abs_error_last_25": 523.6431687461663, "mean_abs_error_last_50": 696.3438550851703, "mean_pred_prob": 0.030251580849289895, "mean_pred_prob_last_10": 0.1501519650075352, "mean_pred_prob_last_25": 0.08436804143129847, "mean_pred_prob_last_50": 0.05039879397663753, "mean_token_accuracy": 0.8691127598285675, "step": 28480 }, { "epoch": 0.5064618775887508, "grad_norm": 1.4360839612609146, "learning_rate": 0.0001, "loss": 0.7771, "mean_abs_error": 435.3718338603527, "mean_abs_error_last_10": 388.3041621351898, "mean_abs_error_last_25": 422.4612789958451, "mean_abs_error_last_50": 423.68252236566514, "mean_pred_prob": 0.03831394188455306, "mean_pred_prob_last_10": 0.19703319645486772, "mean_pred_prob_last_25": 0.10767251153010875, "mean_pred_prob_last_50": 0.06452907372731716, "mean_token_accuracy": 0.8736389994621276, "step": 28490 }, { "epoch": 0.5066396458855528, "grad_norm": 2.732774471108593, "learning_rate": 0.0001, "loss": 0.862, "mean_abs_error": 387.17207514296183, "mean_abs_error_last_10": 197.3565395245022, "mean_abs_error_last_25": 228.1495083762383, "mean_abs_error_last_50": 267.99769852513407, "mean_pred_prob": 0.03236028996761888, "mean_pred_prob_last_10": 0.1582110883668065, "mean_pred_prob_last_25": 0.08400877714157104, "mean_pred_prob_last_50": 0.052564049558714035, "mean_token_accuracy": 0.8692990660667419, "step": 28500 }, { "epoch": 0.5068174141823547, "grad_norm": 1.6358689621231406, "learning_rate": 0.0001, "loss": 0.7761, "mean_abs_error": 741.7461865896323, "mean_abs_error_last_10": 437.9540772808033, "mean_abs_error_last_25": 536.5671758157613, "mean_abs_error_last_50": 594.7368578506218, "mean_pred_prob": 0.03797118502843659, "mean_pred_prob_last_10": 0.17704800134524704, "mean_pred_prob_last_25": 0.1015748537145555, "mean_pred_prob_last_50": 0.06350930804619566, "mean_token_accuracy": 0.8747001647949219, "step": 28510 }, { "epoch": 0.5069951824791566, "grad_norm": 2.2106707818092177, "learning_rate": 0.0001, "loss": 0.7424, "mean_abs_error": 217.31465045031945, "mean_abs_error_last_10": 46.70159711517423, "mean_abs_error_last_25": 122.36970027052534, "mean_abs_error_last_50": 151.45282356182037, "mean_pred_prob": 0.0555934323463589, "mean_pred_prob_last_10": 0.25953199043869973, "mean_pred_prob_last_25": 0.1498981605283916, "mean_pred_prob_last_50": 0.0924810797907412, "mean_token_accuracy": 0.8766565978527069, "step": 28520 }, { "epoch": 0.5071729507759586, "grad_norm": 2.4647435795979744, "learning_rate": 0.0001, "loss": 0.7251, "mean_abs_error": 467.4199132266255, "mean_abs_error_last_10": 329.3145990474594, "mean_abs_error_last_25": 318.3843269270734, "mean_abs_error_last_50": 336.2497902502031, "mean_pred_prob": 0.04710339538869448, "mean_pred_prob_last_10": 0.22577754819067195, "mean_pred_prob_last_25": 0.1301873484160751, "mean_pred_prob_last_50": 0.07908643375849352, "mean_token_accuracy": 0.8815706014633179, "step": 28530 }, { "epoch": 0.5073507190727605, "grad_norm": 0.657834058346078, "learning_rate": 0.0001, "loss": 0.8087, "mean_abs_error": 379.8709648909096, "mean_abs_error_last_10": 128.48387172699364, "mean_abs_error_last_25": 227.46421517635957, "mean_abs_error_last_50": 345.5861340702621, "mean_pred_prob": 0.02526465281844139, "mean_pred_prob_last_10": 0.12878627851605415, "mean_pred_prob_last_25": 0.06904043257236481, "mean_pred_prob_last_50": 0.041724046040326354, "mean_token_accuracy": 0.8706001281738281, "step": 28540 }, { "epoch": 0.5075284873695625, "grad_norm": 4.181922401680994, "learning_rate": 0.0001, "loss": 0.8913, "mean_abs_error": 1661.4431886161478, "mean_abs_error_last_10": 782.6409603270092, "mean_abs_error_last_25": 938.8679615668982, "mean_abs_error_last_50": 1184.0020470552975, "mean_pred_prob": 0.025820539507549256, "mean_pred_prob_last_10": 0.1373463634343352, "mean_pred_prob_last_25": 0.0740623578094528, "mean_pred_prob_last_50": 0.04403179326909594, "mean_token_accuracy": 0.8742922127246857, "step": 28550 }, { "epoch": 0.5077062556663645, "grad_norm": 1.4414285639316244, "learning_rate": 0.0001, "loss": 0.8048, "mean_abs_error": 295.55437188242615, "mean_abs_error_last_10": 101.02865686734656, "mean_abs_error_last_25": 133.42906812304713, "mean_abs_error_last_50": 192.8632739248686, "mean_pred_prob": 0.04671398539794609, "mean_pred_prob_last_10": 0.2361351957079023, "mean_pred_prob_last_25": 0.12923571143765003, "mean_pred_prob_last_50": 0.0784999012365006, "mean_token_accuracy": 0.8703373551368714, "step": 28560 }, { "epoch": 0.5078840239631665, "grad_norm": 0.9884007708136386, "learning_rate": 0.0001, "loss": 0.7657, "mean_abs_error": 441.6382819195229, "mean_abs_error_last_10": 187.82080506292795, "mean_abs_error_last_25": 187.5317948569086, "mean_abs_error_last_50": 234.07829846799828, "mean_pred_prob": 0.042686495603993534, "mean_pred_prob_last_10": 0.18854683436220512, "mean_pred_prob_last_25": 0.11547019860008731, "mean_pred_prob_last_50": 0.07091907694702967, "mean_token_accuracy": 0.8636652767658234, "step": 28570 }, { "epoch": 0.5080617922599684, "grad_norm": 1.1966317751074023, "learning_rate": 0.0001, "loss": 0.7489, "mean_abs_error": 184.7547571773087, "mean_abs_error_last_10": 46.45327448991642, "mean_abs_error_last_25": 74.09685970430999, "mean_abs_error_last_50": 126.21298252597762, "mean_pred_prob": 0.03668021508492529, "mean_pred_prob_last_10": 0.19391719363629817, "mean_pred_prob_last_25": 0.10462188776582479, "mean_pred_prob_last_50": 0.06272116117179394, "mean_token_accuracy": 0.8665999591350555, "step": 28580 }, { "epoch": 0.5082395605567703, "grad_norm": 2.483309835893339, "learning_rate": 0.0001, "loss": 0.906, "mean_abs_error": 594.7653204425199, "mean_abs_error_last_10": 234.7860589209507, "mean_abs_error_last_25": 265.09338494160704, "mean_abs_error_last_50": 362.48646659023933, "mean_pred_prob": 0.03342430640477687, "mean_pred_prob_last_10": 0.1746913313632831, "mean_pred_prob_last_25": 0.09584176094504074, "mean_pred_prob_last_50": 0.05702039678581059, "mean_token_accuracy": 0.8751627504825592, "step": 28590 }, { "epoch": 0.5084173288535723, "grad_norm": 1.5709970095311534, "learning_rate": 0.0001, "loss": 0.9629, "mean_abs_error": 151.08599181015256, "mean_abs_error_last_10": 42.77089140988777, "mean_abs_error_last_25": 51.50750260988737, "mean_abs_error_last_50": 81.74459671812062, "mean_pred_prob": 0.045577087067067625, "mean_pred_prob_last_10": 0.22368122935295104, "mean_pred_prob_last_25": 0.12660930082201957, "mean_pred_prob_last_50": 0.07709219884127379, "mean_token_accuracy": 0.8719697296619415, "step": 28600 }, { "epoch": 0.5085950971503742, "grad_norm": 1.0851478180896137, "learning_rate": 0.0001, "loss": 0.6972, "mean_abs_error": 371.8762830769837, "mean_abs_error_last_10": 131.28853988630055, "mean_abs_error_last_25": 154.81162227538448, "mean_abs_error_last_50": 203.74957914783408, "mean_pred_prob": 0.03899580086581409, "mean_pred_prob_last_10": 0.20544959269464017, "mean_pred_prob_last_25": 0.1087782352231443, "mean_pred_prob_last_50": 0.06595224579796195, "mean_token_accuracy": 0.8818092584609986, "step": 28610 }, { "epoch": 0.5087728654471761, "grad_norm": 1.4517087458764533, "learning_rate": 0.0001, "loss": 0.8087, "mean_abs_error": 510.87426593188513, "mean_abs_error_last_10": 213.3062563979769, "mean_abs_error_last_25": 211.34896003719723, "mean_abs_error_last_50": 292.7672086066695, "mean_pred_prob": 0.04624608090380207, "mean_pred_prob_last_10": 0.22304715572390704, "mean_pred_prob_last_25": 0.12487409152090549, "mean_pred_prob_last_50": 0.07710498650558292, "mean_token_accuracy": 0.881513524055481, "step": 28620 }, { "epoch": 0.5089506337439781, "grad_norm": 1.4855046167456998, "learning_rate": 0.0001, "loss": 0.779, "mean_abs_error": 429.4779883750688, "mean_abs_error_last_10": 73.60379446238646, "mean_abs_error_last_25": 193.12148360097984, "mean_abs_error_last_50": 388.25937175144213, "mean_pred_prob": 0.041235353937372564, "mean_pred_prob_last_10": 0.21395458579063414, "mean_pred_prob_last_25": 0.11762808877974748, "mean_pred_prob_last_50": 0.07070993045344949, "mean_token_accuracy": 0.8816164374351502, "step": 28630 }, { "epoch": 0.50912840204078, "grad_norm": 2.016309645558538, "learning_rate": 0.0001, "loss": 0.7788, "mean_abs_error": 326.25798562713715, "mean_abs_error_last_10": 97.80336436617796, "mean_abs_error_last_25": 113.5940807146455, "mean_abs_error_last_50": 164.829585061825, "mean_pred_prob": 0.030366109684109688, "mean_pred_prob_last_10": 0.15186338014900685, "mean_pred_prob_last_25": 0.08881202414631843, "mean_pred_prob_last_50": 0.05165067603811622, "mean_token_accuracy": 0.8679663360118866, "step": 28640 }, { "epoch": 0.509306170337582, "grad_norm": 1.082401574164336, "learning_rate": 0.0001, "loss": 0.6925, "mean_abs_error": 370.42529105834967, "mean_abs_error_last_10": 283.12636187198325, "mean_abs_error_last_25": 373.7126209501979, "mean_abs_error_last_50": 333.453636196312, "mean_pred_prob": 0.051299168705008925, "mean_pred_prob_last_10": 0.23326470041647554, "mean_pred_prob_last_25": 0.13610471016727388, "mean_pred_prob_last_50": 0.08582771408837289, "mean_token_accuracy": 0.8727491617202758, "step": 28650 }, { "epoch": 0.5094839386343839, "grad_norm": 1.0247012521806176, "learning_rate": 0.0001, "loss": 0.6722, "mean_abs_error": 675.8210827964949, "mean_abs_error_last_10": 171.8366294258629, "mean_abs_error_last_25": 217.40796133520303, "mean_abs_error_last_50": 354.4212399916, "mean_pred_prob": 0.02905527055845596, "mean_pred_prob_last_10": 0.14339746707119047, "mean_pred_prob_last_25": 0.08063599865417928, "mean_pred_prob_last_50": 0.049601554090622814, "mean_token_accuracy": 0.8833361029624939, "step": 28660 }, { "epoch": 0.5096617069311858, "grad_norm": 1.4574266728167395, "learning_rate": 0.0001, "loss": 0.7322, "mean_abs_error": 284.21105937060133, "mean_abs_error_last_10": 41.47805864559584, "mean_abs_error_last_25": 81.71229458077464, "mean_abs_error_last_50": 163.55082792869803, "mean_pred_prob": 0.0358640612103045, "mean_pred_prob_last_10": 0.18374403715133666, "mean_pred_prob_last_25": 0.10002783369272947, "mean_pred_prob_last_50": 0.06064811013638973, "mean_token_accuracy": 0.8777111649513245, "step": 28670 }, { "epoch": 0.5098394752279879, "grad_norm": 2.4531639573426585, "learning_rate": 0.0001, "loss": 0.8683, "mean_abs_error": 1018.3890318259643, "mean_abs_error_last_10": 254.24323235498915, "mean_abs_error_last_25": 342.98350491195527, "mean_abs_error_last_50": 549.5743378043535, "mean_pred_prob": 0.026747058346518315, "mean_pred_prob_last_10": 0.1467367658042349, "mean_pred_prob_last_25": 0.07572357764001936, "mean_pred_prob_last_50": 0.04514525480335578, "mean_token_accuracy": 0.8683404684066772, "step": 28680 }, { "epoch": 0.5100172435247898, "grad_norm": 1.9346999098397781, "learning_rate": 0.0001, "loss": 0.7088, "mean_abs_error": 81.82865690304345, "mean_abs_error_last_10": 58.63082065476285, "mean_abs_error_last_25": 72.91645979770341, "mean_abs_error_last_50": 66.62252498592478, "mean_pred_prob": 0.0739586773328483, "mean_pred_prob_last_10": 0.3210327126085758, "mean_pred_prob_last_25": 0.1888467289507389, "mean_pred_prob_last_50": 0.1208034910261631, "mean_token_accuracy": 0.8810029268264771, "step": 28690 }, { "epoch": 0.5101950118215918, "grad_norm": 2.350729435148853, "learning_rate": 0.0001, "loss": 0.8332, "mean_abs_error": 554.1229939168825, "mean_abs_error_last_10": 282.5927714903675, "mean_abs_error_last_25": 344.2988709728154, "mean_abs_error_last_50": 407.684534952955, "mean_pred_prob": 0.03020176976569928, "mean_pred_prob_last_10": 0.14844277859665453, "mean_pred_prob_last_25": 0.0830343423760496, "mean_pred_prob_last_50": 0.050801789434626696, "mean_token_accuracy": 0.8735908210277558, "step": 28700 }, { "epoch": 0.5103727801183937, "grad_norm": 2.8654540627390475, "learning_rate": 0.0001, "loss": 0.8673, "mean_abs_error": 297.8724824545346, "mean_abs_error_last_10": 105.02547329930205, "mean_abs_error_last_25": 173.83088227948446, "mean_abs_error_last_50": 243.9626066703871, "mean_pred_prob": 0.041071649780496954, "mean_pred_prob_last_10": 0.21394964065402747, "mean_pred_prob_last_25": 0.11546708839014172, "mean_pred_prob_last_50": 0.06889013643376529, "mean_token_accuracy": 0.8645703434944153, "step": 28710 }, { "epoch": 0.5105505484151956, "grad_norm": 1.7641061255509303, "learning_rate": 0.0001, "loss": 0.6777, "mean_abs_error": 185.2520849999902, "mean_abs_error_last_10": 42.07871974608468, "mean_abs_error_last_25": 61.346252249777514, "mean_abs_error_last_50": 97.52762492796161, "mean_pred_prob": 0.04224668694660068, "mean_pred_prob_last_10": 0.20233197920024396, "mean_pred_prob_last_25": 0.11272451933473349, "mean_pred_prob_last_50": 0.069782954454422, "mean_token_accuracy": 0.8778145790100098, "step": 28720 }, { "epoch": 0.5107283167119976, "grad_norm": 1.1772559713041264, "learning_rate": 0.0001, "loss": 0.7159, "mean_abs_error": 296.76616301702086, "mean_abs_error_last_10": 91.46506741575996, "mean_abs_error_last_25": 92.06263871240812, "mean_abs_error_last_50": 147.97293982281647, "mean_pred_prob": 0.05665655340999365, "mean_pred_prob_last_10": 0.25400550849735737, "mean_pred_prob_last_25": 0.15257339775562287, "mean_pred_prob_last_50": 0.09466686928644777, "mean_token_accuracy": 0.8795072615146637, "step": 28730 }, { "epoch": 0.5109060850087995, "grad_norm": 1.9920746132490887, "learning_rate": 0.0001, "loss": 0.7156, "mean_abs_error": 787.6805841720359, "mean_abs_error_last_10": 283.80359932743295, "mean_abs_error_last_25": 412.40482118067877, "mean_abs_error_last_50": 566.8295789810196, "mean_pred_prob": 0.023815515043679625, "mean_pred_prob_last_10": 0.11724098174599931, "mean_pred_prob_last_25": 0.062958737910958, "mean_pred_prob_last_50": 0.03903873281087726, "mean_token_accuracy": 0.8763804733753204, "step": 28740 }, { "epoch": 0.5110838533056015, "grad_norm": 2.201924150181111, "learning_rate": 0.0001, "loss": 0.754, "mean_abs_error": 456.326842107287, "mean_abs_error_last_10": 153.33282594722422, "mean_abs_error_last_25": 148.14082477010905, "mean_abs_error_last_50": 218.65574041995245, "mean_pred_prob": 0.04276622717734426, "mean_pred_prob_last_10": 0.20509249921888112, "mean_pred_prob_last_25": 0.11716687858570367, "mean_pred_prob_last_50": 0.07342835699673742, "mean_token_accuracy": 0.8738799929618836, "step": 28750 }, { "epoch": 0.5112616216024034, "grad_norm": 0.9694083019650487, "learning_rate": 0.0001, "loss": 0.7448, "mean_abs_error": 626.7358002593114, "mean_abs_error_last_10": 310.9972120068195, "mean_abs_error_last_25": 359.647657987385, "mean_abs_error_last_50": 458.8039473409377, "mean_pred_prob": 0.04398458561918232, "mean_pred_prob_last_10": 0.21827997877262534, "mean_pred_prob_last_25": 0.12201683249440975, "mean_pred_prob_last_50": 0.07435823479900136, "mean_token_accuracy": 0.875916314125061, "step": 28760 }, { "epoch": 0.5114393898992053, "grad_norm": 1.8194020059550051, "learning_rate": 0.0001, "loss": 0.8071, "mean_abs_error": 621.2819505777245, "mean_abs_error_last_10": 256.47413065446165, "mean_abs_error_last_25": 291.9672143916998, "mean_abs_error_last_50": 444.050019558485, "mean_pred_prob": 0.02407351993024349, "mean_pred_prob_last_10": 0.12891827952116727, "mean_pred_prob_last_25": 0.06951352907344699, "mean_pred_prob_last_50": 0.040953197749331594, "mean_token_accuracy": 0.8654258131980896, "step": 28770 }, { "epoch": 0.5116171581960073, "grad_norm": 1.5649326308427969, "learning_rate": 0.0001, "loss": 0.7485, "mean_abs_error": 832.8945584754301, "mean_abs_error_last_10": 208.78365782872487, "mean_abs_error_last_25": 241.81580582478415, "mean_abs_error_last_50": 451.5753204210438, "mean_pred_prob": 0.05099902637302876, "mean_pred_prob_last_10": 0.22557205113116652, "mean_pred_prob_last_25": 0.13355382388690487, "mean_pred_prob_last_50": 0.08395316363312304, "mean_token_accuracy": 0.8747463524341583, "step": 28780 }, { "epoch": 0.5117949264928092, "grad_norm": 1.4983728180325704, "learning_rate": 0.0001, "loss": 0.7513, "mean_abs_error": 265.55144250639006, "mean_abs_error_last_10": 97.33495414713157, "mean_abs_error_last_25": 97.38344061188427, "mean_abs_error_last_50": 171.5692947050215, "mean_pred_prob": 0.05037427538773045, "mean_pred_prob_last_10": 0.24956079153344035, "mean_pred_prob_last_25": 0.14029816356487573, "mean_pred_prob_last_50": 0.08555212332867086, "mean_token_accuracy": 0.8816155850887298, "step": 28790 }, { "epoch": 0.5119726947896113, "grad_norm": 1.3300086352131746, "learning_rate": 0.0001, "loss": 0.7156, "mean_abs_error": 663.394789643605, "mean_abs_error_last_10": 170.64196324404008, "mean_abs_error_last_25": 321.4268922515881, "mean_abs_error_last_50": 487.38803681994557, "mean_pred_prob": 0.030545159010216594, "mean_pred_prob_last_10": 0.151442939392291, "mean_pred_prob_last_25": 0.0810372335021384, "mean_pred_prob_last_50": 0.050401597004383805, "mean_token_accuracy": 0.8729714751243591, "step": 28800 }, { "epoch": 0.5121504630864132, "grad_norm": 0.9902665581735494, "learning_rate": 0.0001, "loss": 0.7065, "mean_abs_error": 528.1209998483391, "mean_abs_error_last_10": 224.4919815646105, "mean_abs_error_last_25": 264.9008531042077, "mean_abs_error_last_50": 276.39628664698523, "mean_pred_prob": 0.034638778009684755, "mean_pred_prob_last_10": 0.1852141500916332, "mean_pred_prob_last_25": 0.09842281707096845, "mean_pred_prob_last_50": 0.05891429820330814, "mean_token_accuracy": 0.8715011358261109, "step": 28810 }, { "epoch": 0.5123282313832151, "grad_norm": 1.3980013789108663, "learning_rate": 0.0001, "loss": 0.6983, "mean_abs_error": 196.6832268651936, "mean_abs_error_last_10": 95.64547070578388, "mean_abs_error_last_25": 109.76856803600954, "mean_abs_error_last_50": 107.87415485303896, "mean_pred_prob": 0.06234409629832953, "mean_pred_prob_last_10": 0.27708045234903694, "mean_pred_prob_last_25": 0.1605900566559285, "mean_pred_prob_last_50": 0.10095251849852502, "mean_token_accuracy": 0.8715827345848084, "step": 28820 }, { "epoch": 0.5125059996800171, "grad_norm": 2.2467483899563963, "learning_rate": 0.0001, "loss": 0.7499, "mean_abs_error": 819.0567338566412, "mean_abs_error_last_10": 194.28328820129553, "mean_abs_error_last_25": 246.69124522791353, "mean_abs_error_last_50": 441.6747073519344, "mean_pred_prob": 0.0249496195174288, "mean_pred_prob_last_10": 0.1382811774907168, "mean_pred_prob_last_25": 0.07330380519852042, "mean_pred_prob_last_50": 0.042731394089059904, "mean_token_accuracy": 0.8665369927883149, "step": 28830 }, { "epoch": 0.512683767976819, "grad_norm": 1.3926420246449416, "learning_rate": 0.0001, "loss": 0.679, "mean_abs_error": 403.6319596669247, "mean_abs_error_last_10": 192.15452500641592, "mean_abs_error_last_25": 211.2829548366533, "mean_abs_error_last_50": 299.84753488659726, "mean_pred_prob": 0.048321737378137185, "mean_pred_prob_last_10": 0.24112501075142062, "mean_pred_prob_last_25": 0.13073609155253507, "mean_pred_prob_last_50": 0.07968302157241851, "mean_token_accuracy": 0.8787336826324463, "step": 28840 }, { "epoch": 0.512861536273621, "grad_norm": 1.2692267721260364, "learning_rate": 0.0001, "loss": 0.7326, "mean_abs_error": 784.2887573787964, "mean_abs_error_last_10": 490.45881793915044, "mean_abs_error_last_25": 569.5575124900671, "mean_abs_error_last_50": 647.5910256073663, "mean_pred_prob": 0.04823237179225544, "mean_pred_prob_last_10": 0.23818665996659547, "mean_pred_prob_last_25": 0.13371148718724726, "mean_pred_prob_last_50": 0.08193692153145093, "mean_token_accuracy": 0.8727495431900024, "step": 28850 }, { "epoch": 0.5130393045704229, "grad_norm": 1.3242094991708155, "learning_rate": 0.0001, "loss": 0.755, "mean_abs_error": 268.77127725590924, "mean_abs_error_last_10": 76.10455987095546, "mean_abs_error_last_25": 89.2263535489871, "mean_abs_error_last_50": 141.00651689123097, "mean_pred_prob": 0.0554921010741964, "mean_pred_prob_last_10": 0.2653542529791594, "mean_pred_prob_last_25": 0.15317231556400657, "mean_pred_prob_last_50": 0.0944188771303743, "mean_token_accuracy": 0.8737327635288239, "step": 28860 }, { "epoch": 0.5132170728672248, "grad_norm": 1.7417156507111053, "learning_rate": 0.0001, "loss": 0.7598, "mean_abs_error": 439.8107277413782, "mean_abs_error_last_10": 141.9435881069497, "mean_abs_error_last_25": 170.52541698972064, "mean_abs_error_last_50": 218.31021283700665, "mean_pred_prob": 0.039407974039204416, "mean_pred_prob_last_10": 0.2024905490456149, "mean_pred_prob_last_25": 0.11022086995653808, "mean_pred_prob_last_50": 0.06652095386525617, "mean_token_accuracy": 0.8658661305904388, "step": 28870 }, { "epoch": 0.5133948411640268, "grad_norm": 1.6637847280228528, "learning_rate": 0.0001, "loss": 0.6968, "mean_abs_error": 411.05515523433934, "mean_abs_error_last_10": 107.4814511189963, "mean_abs_error_last_25": 179.75371130702257, "mean_abs_error_last_50": 280.5493090557834, "mean_pred_prob": 0.029710707394406198, "mean_pred_prob_last_10": 0.14599330574274064, "mean_pred_prob_last_25": 0.0821992240846157, "mean_pred_prob_last_50": 0.04948415020480752, "mean_token_accuracy": 0.8775086581707001, "step": 28880 }, { "epoch": 0.5135726094608287, "grad_norm": 1.052807082825438, "learning_rate": 0.0001, "loss": 0.7384, "mean_abs_error": 245.32836499318265, "mean_abs_error_last_10": 118.35805061769784, "mean_abs_error_last_25": 141.32367066136376, "mean_abs_error_last_50": 180.74497088111167, "mean_pred_prob": 0.041841762757394464, "mean_pred_prob_last_10": 0.1979687417857349, "mean_pred_prob_last_25": 0.11194091928191482, "mean_pred_prob_last_50": 0.06944757762830704, "mean_token_accuracy": 0.8749408185482025, "step": 28890 }, { "epoch": 0.5137503777576307, "grad_norm": 2.462934742005201, "learning_rate": 0.0001, "loss": 0.7546, "mean_abs_error": 257.22417305148235, "mean_abs_error_last_10": 72.73456341305693, "mean_abs_error_last_25": 89.32352541818122, "mean_abs_error_last_50": 159.51743361034502, "mean_pred_prob": 0.038657837500795725, "mean_pred_prob_last_10": 0.2033870590850711, "mean_pred_prob_last_25": 0.10941475061699749, "mean_pred_prob_last_50": 0.06622393042780458, "mean_token_accuracy": 0.8752181768417359, "step": 28900 }, { "epoch": 0.5139281460544326, "grad_norm": 2.9395955686250153, "learning_rate": 0.0001, "loss": 0.8105, "mean_abs_error": 139.64525314463032, "mean_abs_error_last_10": 42.809635745720286, "mean_abs_error_last_25": 71.72901739837891, "mean_abs_error_last_50": 108.43359841805315, "mean_pred_prob": 0.04341684682294726, "mean_pred_prob_last_10": 0.21373601406812667, "mean_pred_prob_last_25": 0.12041876632720232, "mean_pred_prob_last_50": 0.07296911180019379, "mean_token_accuracy": 0.8764136016368866, "step": 28910 }, { "epoch": 0.5141059143512346, "grad_norm": 1.3582267714487497, "learning_rate": 0.0001, "loss": 0.6462, "mean_abs_error": 115.203728839793, "mean_abs_error_last_10": 25.82688150755738, "mean_abs_error_last_25": 52.97608476040743, "mean_abs_error_last_50": 81.99189139110132, "mean_pred_prob": 0.05971492184326053, "mean_pred_prob_last_10": 0.27792490646243095, "mean_pred_prob_last_25": 0.16103766150772572, "mean_pred_prob_last_50": 0.09839769024401904, "mean_token_accuracy": 0.8913367569446564, "step": 28920 }, { "epoch": 0.5142836826480366, "grad_norm": 1.002510589915048, "learning_rate": 0.0001, "loss": 0.7151, "mean_abs_error": 414.3946100094986, "mean_abs_error_last_10": 161.1781654491623, "mean_abs_error_last_25": 216.5318382463385, "mean_abs_error_last_50": 258.3678987347217, "mean_pred_prob": 0.03832676685415208, "mean_pred_prob_last_10": 0.2067675095051527, "mean_pred_prob_last_25": 0.11138185430318118, "mean_pred_prob_last_50": 0.06546427528373897, "mean_token_accuracy": 0.8645403563976288, "step": 28930 }, { "epoch": 0.5144614509448385, "grad_norm": 1.01681985409879, "learning_rate": 0.0001, "loss": 0.7814, "mean_abs_error": 1121.037477059343, "mean_abs_error_last_10": 439.04568924814737, "mean_abs_error_last_25": 569.4966708369379, "mean_abs_error_last_50": 796.0025793251763, "mean_pred_prob": 0.034834998412407, "mean_pred_prob_last_10": 0.1577773940283805, "mean_pred_prob_last_25": 0.08994337616604753, "mean_pred_prob_last_50": 0.056524203429580665, "mean_token_accuracy": 0.8733380794525146, "step": 28940 }, { "epoch": 0.5146392192416405, "grad_norm": 1.20418215381094, "learning_rate": 0.0001, "loss": 0.7612, "mean_abs_error": 516.5303210571137, "mean_abs_error_last_10": 131.93725462015377, "mean_abs_error_last_25": 199.49001895982866, "mean_abs_error_last_50": 279.02400473511955, "mean_pred_prob": 0.04751849558670074, "mean_pred_prob_last_10": 0.2200768407434225, "mean_pred_prob_last_25": 0.1261479386128485, "mean_pred_prob_last_50": 0.08007552484050393, "mean_token_accuracy": 0.8803705990314483, "step": 28950 }, { "epoch": 0.5148169875384424, "grad_norm": 1.0497039466133418, "learning_rate": 0.0001, "loss": 0.8293, "mean_abs_error": 814.6805278395705, "mean_abs_error_last_10": 299.8801037620985, "mean_abs_error_last_25": 365.29713833547817, "mean_abs_error_last_50": 500.6096380632704, "mean_pred_prob": 0.038671591578167866, "mean_pred_prob_last_10": 0.16868192638212348, "mean_pred_prob_last_25": 0.09656707412796095, "mean_pred_prob_last_50": 0.0620461689599324, "mean_token_accuracy": 0.8787840604782104, "step": 28960 }, { "epoch": 0.5149947558352443, "grad_norm": 1.2766983103526832, "learning_rate": 0.0001, "loss": 0.7118, "mean_abs_error": 517.4586100935187, "mean_abs_error_last_10": 137.69234239583352, "mean_abs_error_last_25": 224.34912908110664, "mean_abs_error_last_50": 272.8415699076071, "mean_pred_prob": 0.03265606157947332, "mean_pred_prob_last_10": 0.17193466182798148, "mean_pred_prob_last_25": 0.09355955421924592, "mean_pred_prob_last_50": 0.056462996173650026, "mean_token_accuracy": 0.87129927277565, "step": 28970 }, { "epoch": 0.5151725241320463, "grad_norm": 1.737332285842719, "learning_rate": 0.0001, "loss": 0.6844, "mean_abs_error": 389.46769006132706, "mean_abs_error_last_10": 110.27832666837969, "mean_abs_error_last_25": 194.07339728340412, "mean_abs_error_last_50": 229.7167257104408, "mean_pred_prob": 0.043133249040693045, "mean_pred_prob_last_10": 0.2277885181363672, "mean_pred_prob_last_25": 0.12231490556150675, "mean_pred_prob_last_50": 0.07358724758960307, "mean_token_accuracy": 0.8688474416732788, "step": 28980 }, { "epoch": 0.5153502924288482, "grad_norm": 3.637617648696406, "learning_rate": 0.0001, "loss": 1.0721, "mean_abs_error": 1179.2395939606306, "mean_abs_error_last_10": 912.7026328027687, "mean_abs_error_last_25": 914.5760340691111, "mean_abs_error_last_50": 949.5339014771201, "mean_pred_prob": 0.03880396481254138, "mean_pred_prob_last_10": 0.17129889663192444, "mean_pred_prob_last_25": 0.10190903264447115, "mean_pred_prob_last_50": 0.06468433853879105, "mean_token_accuracy": 0.8718784332275391, "step": 28990 }, { "epoch": 0.5155280607256502, "grad_norm": 1.2141663420644997, "learning_rate": 0.0001, "loss": 0.7714, "mean_abs_error": 187.8585494594127, "mean_abs_error_last_10": 64.11981243744107, "mean_abs_error_last_25": 67.35720645930124, "mean_abs_error_last_50": 118.19093960056614, "mean_pred_prob": 0.04032578989863396, "mean_pred_prob_last_10": 0.19990784972906112, "mean_pred_prob_last_25": 0.11024993453174829, "mean_pred_prob_last_50": 0.06748964749276638, "mean_token_accuracy": 0.8718989014625549, "step": 29000 }, { "epoch": 0.5157058290224521, "grad_norm": 1.2366269867572166, "learning_rate": 0.0001, "loss": 0.7088, "mean_abs_error": 552.6508778021746, "mean_abs_error_last_10": 121.20534591641069, "mean_abs_error_last_25": 141.01593511397647, "mean_abs_error_last_50": 231.12483800182522, "mean_pred_prob": 0.038182677247095856, "mean_pred_prob_last_10": 0.18334443434141576, "mean_pred_prob_last_25": 0.10446715208236128, "mean_pred_prob_last_50": 0.0645824956940487, "mean_token_accuracy": 0.8830779373645783, "step": 29010 }, { "epoch": 0.515883597319254, "grad_norm": 1.4444579976174183, "learning_rate": 0.0001, "loss": 0.6583, "mean_abs_error": 963.4507878800723, "mean_abs_error_last_10": 496.70455899135504, "mean_abs_error_last_25": 574.2007568811078, "mean_abs_error_last_50": 698.8664591067092, "mean_pred_prob": 0.05220914334931877, "mean_pred_prob_last_10": 0.24865379291586578, "mean_pred_prob_last_25": 0.14497949496726506, "mean_pred_prob_last_50": 0.08821903374919202, "mean_token_accuracy": 0.8772828042507171, "step": 29020 }, { "epoch": 0.516061365616056, "grad_norm": 0.9485909215503441, "learning_rate": 0.0001, "loss": 0.6955, "mean_abs_error": 346.618497049888, "mean_abs_error_last_10": 92.18978327147649, "mean_abs_error_last_25": 131.23131598251277, "mean_abs_error_last_50": 211.01809918589706, "mean_pred_prob": 0.03421452273614704, "mean_pred_prob_last_10": 0.16812221743166447, "mean_pred_prob_last_25": 0.09233891870826483, "mean_pred_prob_last_50": 0.05754816355183721, "mean_token_accuracy": 0.8778190910816193, "step": 29030 }, { "epoch": 0.516239133912858, "grad_norm": 1.2852258269048114, "learning_rate": 0.0001, "loss": 0.8703, "mean_abs_error": 250.19226781166375, "mean_abs_error_last_10": 29.89848228445252, "mean_abs_error_last_25": 79.18424163611684, "mean_abs_error_last_50": 135.62477219499408, "mean_pred_prob": 0.04729332714341581, "mean_pred_prob_last_10": 0.24334742017090322, "mean_pred_prob_last_25": 0.13022113498300314, "mean_pred_prob_last_50": 0.08010414158925414, "mean_token_accuracy": 0.8746953070163727, "step": 29040 }, { "epoch": 0.51641690220966, "grad_norm": 1.0867978560719018, "learning_rate": 0.0001, "loss": 0.834, "mean_abs_error": 994.8190836332303, "mean_abs_error_last_10": 431.4116355116504, "mean_abs_error_last_25": 569.9429434680435, "mean_abs_error_last_50": 744.7763035902152, "mean_pred_prob": 0.037065398044069296, "mean_pred_prob_last_10": 0.1781743637577165, "mean_pred_prob_last_25": 0.10355388611496892, "mean_pred_prob_last_50": 0.0626471369410865, "mean_token_accuracy": 0.864356392621994, "step": 29050 }, { "epoch": 0.5165946705064619, "grad_norm": 1.7449806838923596, "learning_rate": 0.0001, "loss": 0.7553, "mean_abs_error": 157.46689321716423, "mean_abs_error_last_10": 30.257355668541862, "mean_abs_error_last_25": 52.39297688097339, "mean_abs_error_last_50": 96.67043899630875, "mean_pred_prob": 0.0483964070212096, "mean_pred_prob_last_10": 0.23075426705181598, "mean_pred_prob_last_25": 0.13107890151441098, "mean_pred_prob_last_50": 0.08139875205233693, "mean_token_accuracy": 0.8794250428676605, "step": 29060 }, { "epoch": 0.5167724388032638, "grad_norm": 1.3715613694942004, "learning_rate": 0.0001, "loss": 0.7835, "mean_abs_error": 690.2683862583458, "mean_abs_error_last_10": 208.38216832374155, "mean_abs_error_last_25": 312.7621219538179, "mean_abs_error_last_50": 485.47779455895954, "mean_pred_prob": 0.042511536926031115, "mean_pred_prob_last_10": 0.21363047477789224, "mean_pred_prob_last_25": 0.11768563268706203, "mean_pred_prob_last_50": 0.07060644570156001, "mean_token_accuracy": 0.8804095625877381, "step": 29070 }, { "epoch": 0.5169502071000658, "grad_norm": 1.9935139272532894, "learning_rate": 0.0001, "loss": 0.6271, "mean_abs_error": 174.94637747283386, "mean_abs_error_last_10": 109.2052560011617, "mean_abs_error_last_25": 116.10783075695981, "mean_abs_error_last_50": 128.5846557293211, "mean_pred_prob": 0.05227413563989103, "mean_pred_prob_last_10": 0.23078694120049476, "mean_pred_prob_last_25": 0.13549990234896542, "mean_pred_prob_last_50": 0.08650636682286858, "mean_token_accuracy": 0.8852194309234619, "step": 29080 }, { "epoch": 0.5171279753968677, "grad_norm": 2.6390267254530952, "learning_rate": 0.0001, "loss": 0.7238, "mean_abs_error": 739.9795392592388, "mean_abs_error_last_10": 198.51539735677875, "mean_abs_error_last_25": 266.21191253621805, "mean_abs_error_last_50": 408.86983628498126, "mean_pred_prob": 0.037482433835975826, "mean_pred_prob_last_10": 0.20021679410128854, "mean_pred_prob_last_25": 0.1100527476752177, "mean_pred_prob_last_50": 0.06532257357612252, "mean_token_accuracy": 0.8761380076408386, "step": 29090 }, { "epoch": 0.5173057436936697, "grad_norm": 2.703441403862892, "learning_rate": 0.0001, "loss": 0.8558, "mean_abs_error": 677.2527426364329, "mean_abs_error_last_10": 172.50874008050064, "mean_abs_error_last_25": 286.07360739730564, "mean_abs_error_last_50": 381.31352790809797, "mean_pred_prob": 0.0480523360951338, "mean_pred_prob_last_10": 0.21647195261903107, "mean_pred_prob_last_25": 0.12679642776492983, "mean_pred_prob_last_50": 0.07979533021571114, "mean_token_accuracy": 0.8709697484970093, "step": 29100 }, { "epoch": 0.5174835119904716, "grad_norm": 2.111437250528509, "learning_rate": 0.0001, "loss": 0.6605, "mean_abs_error": 138.27631793849417, "mean_abs_error_last_10": 25.015672878842864, "mean_abs_error_last_25": 35.670377887688346, "mean_abs_error_last_50": 64.59574940003543, "mean_pred_prob": 0.06534328060224652, "mean_pred_prob_last_10": 0.29379480630159377, "mean_pred_prob_last_25": 0.16651337333023547, "mean_pred_prob_last_50": 0.10663199350237847, "mean_token_accuracy": 0.8722636342048645, "step": 29110 }, { "epoch": 0.5176612802872735, "grad_norm": 1.2120296186860555, "learning_rate": 0.0001, "loss": 0.6884, "mean_abs_error": 601.8161893707718, "mean_abs_error_last_10": 96.73870511945061, "mean_abs_error_last_25": 132.60168245667123, "mean_abs_error_last_50": 288.6054151488961, "mean_pred_prob": 0.046901024848921224, "mean_pred_prob_last_10": 0.21528503079898656, "mean_pred_prob_last_25": 0.12869028393179177, "mean_pred_prob_last_50": 0.07963066993979737, "mean_token_accuracy": 0.8804178059101104, "step": 29120 }, { "epoch": 0.5178390485840755, "grad_norm": 3.208789209995972, "learning_rate": 0.0001, "loss": 0.7486, "mean_abs_error": 413.52178753208284, "mean_abs_error_last_10": 132.65788602330352, "mean_abs_error_last_25": 188.5308657952415, "mean_abs_error_last_50": 309.00791347379317, "mean_pred_prob": 0.03960780682973564, "mean_pred_prob_last_10": 0.19615471735596657, "mean_pred_prob_last_25": 0.10913008851930499, "mean_pred_prob_last_50": 0.06614464870654047, "mean_token_accuracy": 0.8742195427417755, "step": 29130 }, { "epoch": 0.5180168168808774, "grad_norm": 0.7961654129449885, "learning_rate": 0.0001, "loss": 0.637, "mean_abs_error": 429.5023521513646, "mean_abs_error_last_10": 143.31133505221288, "mean_abs_error_last_25": 173.40779042677707, "mean_abs_error_last_50": 236.47743811175465, "mean_pred_prob": 0.03923899508663453, "mean_pred_prob_last_10": 0.19666338523384183, "mean_pred_prob_last_25": 0.10861903910990804, "mean_pred_prob_last_50": 0.0666814198717475, "mean_token_accuracy": 0.8735457062721252, "step": 29140 }, { "epoch": 0.5181945851776795, "grad_norm": 1.3559657851071538, "learning_rate": 0.0001, "loss": 0.7414, "mean_abs_error": 261.2454505561497, "mean_abs_error_last_10": 98.5071803118322, "mean_abs_error_last_25": 122.37741243345235, "mean_abs_error_last_50": 193.06143051424985, "mean_pred_prob": 0.04543575961142778, "mean_pred_prob_last_10": 0.20810743123292924, "mean_pred_prob_last_25": 0.1215131824836135, "mean_pred_prob_last_50": 0.07503684647381306, "mean_token_accuracy": 0.8734558165073395, "step": 29150 }, { "epoch": 0.5183723534744814, "grad_norm": 1.702511441488875, "learning_rate": 0.0001, "loss": 0.7161, "mean_abs_error": 407.9610584919578, "mean_abs_error_last_10": 55.22420367773051, "mean_abs_error_last_25": 102.88040729045402, "mean_abs_error_last_50": 194.21933171950033, "mean_pred_prob": 0.040989747177809475, "mean_pred_prob_last_10": 0.199523987993598, "mean_pred_prob_last_25": 0.11013212613761425, "mean_pred_prob_last_50": 0.0682341798208654, "mean_token_accuracy": 0.878021878004074, "step": 29160 }, { "epoch": 0.5185501217712833, "grad_norm": 1.0315818022672743, "learning_rate": 0.0001, "loss": 0.692, "mean_abs_error": 218.78170492102763, "mean_abs_error_last_10": 85.44830706700732, "mean_abs_error_last_25": 104.75318238445473, "mean_abs_error_last_50": 156.70680624954542, "mean_pred_prob": 0.03296184255741537, "mean_pred_prob_last_10": 0.14834588691592215, "mean_pred_prob_last_25": 0.08546785013750195, "mean_pred_prob_last_50": 0.05328014958649874, "mean_token_accuracy": 0.8676959693431854, "step": 29170 }, { "epoch": 0.5187278900680853, "grad_norm": 2.9863358263530277, "learning_rate": 0.0001, "loss": 0.7643, "mean_abs_error": 296.3243471222806, "mean_abs_error_last_10": 228.49877616784357, "mean_abs_error_last_25": 245.8550150352624, "mean_abs_error_last_50": 243.726667527981, "mean_pred_prob": 0.04436547226505354, "mean_pred_prob_last_10": 0.21894232304766775, "mean_pred_prob_last_25": 0.1220895056729205, "mean_pred_prob_last_50": 0.07325561821926385, "mean_token_accuracy": 0.8684371709823608, "step": 29180 }, { "epoch": 0.5189056583648872, "grad_norm": 3.7807392159982793, "learning_rate": 0.0001, "loss": 0.7833, "mean_abs_error": 528.579442875953, "mean_abs_error_last_10": 204.7608716906857, "mean_abs_error_last_25": 422.27571325873896, "mean_abs_error_last_50": 526.2842889377496, "mean_pred_prob": 0.024822206841781734, "mean_pred_prob_last_10": 0.12073476128280163, "mean_pred_prob_last_25": 0.06690845955163241, "mean_pred_prob_last_50": 0.041432145051658154, "mean_token_accuracy": 0.8712833464145661, "step": 29190 }, { "epoch": 0.5190834266616892, "grad_norm": 1.1995144254815362, "learning_rate": 0.0001, "loss": 0.7836, "mean_abs_error": 463.40462503119744, "mean_abs_error_last_10": 163.7516828054899, "mean_abs_error_last_25": 193.55575678947275, "mean_abs_error_last_50": 288.2190713666796, "mean_pred_prob": 0.03753000713768415, "mean_pred_prob_last_10": 0.19056251104921101, "mean_pred_prob_last_25": 0.10442586676217616, "mean_pred_prob_last_50": 0.06321098156040535, "mean_token_accuracy": 0.8674968898296356, "step": 29200 }, { "epoch": 0.5192611949584911, "grad_norm": 1.461731562867139, "learning_rate": 0.0001, "loss": 0.7666, "mean_abs_error": 577.3921472274272, "mean_abs_error_last_10": 225.5842852105856, "mean_abs_error_last_25": 236.76611481247136, "mean_abs_error_last_50": 376.15350869299175, "mean_pred_prob": 0.04739352450706065, "mean_pred_prob_last_10": 0.20865621123230085, "mean_pred_prob_last_25": 0.12782453945837915, "mean_pred_prob_last_50": 0.07886127277160995, "mean_token_accuracy": 0.8684279084205627, "step": 29210 }, { "epoch": 0.519438963255293, "grad_norm": 1.6114833846799883, "learning_rate": 0.0001, "loss": 0.8122, "mean_abs_error": 1450.0874735010773, "mean_abs_error_last_10": 646.1880686195288, "mean_abs_error_last_25": 779.8245473012965, "mean_abs_error_last_50": 984.152264861034, "mean_pred_prob": 0.01997259239506093, "mean_pred_prob_last_10": 0.10330954751407262, "mean_pred_prob_last_25": 0.056850093774846754, "mean_pred_prob_last_50": 0.03424850950977998, "mean_token_accuracy": 0.8696889400482177, "step": 29220 }, { "epoch": 0.519616731552095, "grad_norm": 1.1484499463197717, "learning_rate": 0.0001, "loss": 0.8378, "mean_abs_error": 466.06508732112627, "mean_abs_error_last_10": 128.7659960819707, "mean_abs_error_last_25": 165.63072799255605, "mean_abs_error_last_50": 260.02096408776333, "mean_pred_prob": 0.05554722232045606, "mean_pred_prob_last_10": 0.2399982392322272, "mean_pred_prob_last_25": 0.14139166662935168, "mean_pred_prob_last_50": 0.08960763333598151, "mean_token_accuracy": 0.8735553741455078, "step": 29230 }, { "epoch": 0.5197944998488969, "grad_norm": 1.869086842508536, "learning_rate": 0.0001, "loss": 0.8073, "mean_abs_error": 270.41610025328225, "mean_abs_error_last_10": 110.7143111910485, "mean_abs_error_last_25": 258.5784568025141, "mean_abs_error_last_50": 275.8163221231593, "mean_pred_prob": 0.031103632366284727, "mean_pred_prob_last_10": 0.16607145592570305, "mean_pred_prob_last_25": 0.08646905766800046, "mean_pred_prob_last_50": 0.052560210414230826, "mean_token_accuracy": 0.8639090836048127, "step": 29240 }, { "epoch": 0.5199722681456989, "grad_norm": 2.3806901787424173, "learning_rate": 0.0001, "loss": 0.7121, "mean_abs_error": 877.5238938143171, "mean_abs_error_last_10": 362.0602891714578, "mean_abs_error_last_25": 403.1674471530725, "mean_abs_error_last_50": 591.4657098959148, "mean_pred_prob": 0.03192666673276108, "mean_pred_prob_last_10": 0.16728872118401342, "mean_pred_prob_last_25": 0.0897198988503078, "mean_pred_prob_last_50": 0.05434905592992436, "mean_token_accuracy": 0.8666667222976685, "step": 29250 }, { "epoch": 0.5201500364425008, "grad_norm": 1.7686511480465559, "learning_rate": 0.0001, "loss": 0.7763, "mean_abs_error": 357.6410550767791, "mean_abs_error_last_10": 90.2663880020967, "mean_abs_error_last_25": 142.74709622180882, "mean_abs_error_last_50": 207.81300482980828, "mean_pred_prob": 0.04169254244770855, "mean_pred_prob_last_10": 0.2020530069246888, "mean_pred_prob_last_25": 0.113604904897511, "mean_pred_prob_last_50": 0.07123005837202072, "mean_token_accuracy": 0.8700198590755462, "step": 29260 }, { "epoch": 0.5203278047393028, "grad_norm": 0.8219795130724606, "learning_rate": 0.0001, "loss": 0.6852, "mean_abs_error": 247.53316334836086, "mean_abs_error_last_10": 91.85293070299552, "mean_abs_error_last_25": 124.24974593396288, "mean_abs_error_last_50": 170.44994492535486, "mean_pred_prob": 0.03582142191007733, "mean_pred_prob_last_10": 0.17481023222208023, "mean_pred_prob_last_25": 0.0994809776544571, "mean_pred_prob_last_50": 0.06010704236105084, "mean_token_accuracy": 0.8802647829055786, "step": 29270 }, { "epoch": 0.5205055730361048, "grad_norm": 1.953826567138138, "learning_rate": 0.0001, "loss": 0.7967, "mean_abs_error": 937.0439362388322, "mean_abs_error_last_10": 279.44806320557666, "mean_abs_error_last_25": 404.4040415210653, "mean_abs_error_last_50": 567.593244268797, "mean_pred_prob": 0.03355559119372629, "mean_pred_prob_last_10": 0.16682821666472591, "mean_pred_prob_last_25": 0.09469005094142631, "mean_pred_prob_last_50": 0.05761395562440157, "mean_token_accuracy": 0.87194904088974, "step": 29280 }, { "epoch": 0.5206833413329067, "grad_norm": 1.5353519321347082, "learning_rate": 0.0001, "loss": 0.6213, "mean_abs_error": 303.27818858393346, "mean_abs_error_last_10": 54.5680978935025, "mean_abs_error_last_25": 84.21096747623626, "mean_abs_error_last_50": 147.5255249748647, "mean_pred_prob": 0.04964686054736376, "mean_pred_prob_last_10": 0.24952418627217413, "mean_pred_prob_last_25": 0.13827452466357498, "mean_pred_prob_last_50": 0.08450709243770689, "mean_token_accuracy": 0.8778783023357392, "step": 29290 }, { "epoch": 0.5208611096297087, "grad_norm": 1.7723311587400288, "learning_rate": 0.0001, "loss": 0.7502, "mean_abs_error": 339.63872552234915, "mean_abs_error_last_10": 89.98843017138537, "mean_abs_error_last_25": 132.71616932058265, "mean_abs_error_last_50": 193.40477687466372, "mean_pred_prob": 0.03970159732270986, "mean_pred_prob_last_10": 0.17653071088716388, "mean_pred_prob_last_25": 0.10894344486296177, "mean_pred_prob_last_50": 0.06684679528698326, "mean_token_accuracy": 0.8727069854736328, "step": 29300 }, { "epoch": 0.5210388779265106, "grad_norm": 0.8983947776688003, "learning_rate": 0.0001, "loss": 0.7851, "mean_abs_error": 296.3257015904032, "mean_abs_error_last_10": 103.9964647181622, "mean_abs_error_last_25": 122.24679116619916, "mean_abs_error_last_50": 175.3446590568488, "mean_pred_prob": 0.04165985002182424, "mean_pred_prob_last_10": 0.20805079955607653, "mean_pred_prob_last_25": 0.11604165062308311, "mean_pred_prob_last_50": 0.07054181061685086, "mean_token_accuracy": 0.8706709742546082, "step": 29310 }, { "epoch": 0.5212166462233125, "grad_norm": 1.905842110186438, "learning_rate": 0.0001, "loss": 0.8031, "mean_abs_error": 750.5183131826692, "mean_abs_error_last_10": 354.9608135845256, "mean_abs_error_last_25": 435.50548020570625, "mean_abs_error_last_50": 545.1333088674068, "mean_pred_prob": 0.04111621330666822, "mean_pred_prob_last_10": 0.1962194903870113, "mean_pred_prob_last_25": 0.11134357430855743, "mean_pred_prob_last_50": 0.0689859511126997, "mean_token_accuracy": 0.8632609784603119, "step": 29320 }, { "epoch": 0.5213944145201145, "grad_norm": 1.261196654901192, "learning_rate": 0.0001, "loss": 0.7928, "mean_abs_error": 425.2779227038169, "mean_abs_error_last_10": 85.45850910559496, "mean_abs_error_last_25": 221.05233236903777, "mean_abs_error_last_50": 325.4741623595421, "mean_pred_prob": 0.035829424392431974, "mean_pred_prob_last_10": 0.1758868047967553, "mean_pred_prob_last_25": 0.09655463993549347, "mean_pred_prob_last_50": 0.059752852329984306, "mean_token_accuracy": 0.8695498704910278, "step": 29330 }, { "epoch": 0.5215721828169164, "grad_norm": 2.668513508305649, "learning_rate": 0.0001, "loss": 0.6857, "mean_abs_error": 172.47869684527302, "mean_abs_error_last_10": 107.88427213995783, "mean_abs_error_last_25": 99.97492548061322, "mean_abs_error_last_50": 124.299394465312, "mean_pred_prob": 0.0576778482994996, "mean_pred_prob_last_10": 0.24591375645250083, "mean_pred_prob_last_25": 0.14595199909526854, "mean_pred_prob_last_50": 0.09446183937834576, "mean_token_accuracy": 0.8734721422195435, "step": 29340 }, { "epoch": 0.5217499511137184, "grad_norm": 1.5223808929519542, "learning_rate": 0.0001, "loss": 0.8929, "mean_abs_error": 348.55704270185817, "mean_abs_error_last_10": 184.44503072244353, "mean_abs_error_last_25": 261.9802442542231, "mean_abs_error_last_50": 272.1205289116052, "mean_pred_prob": 0.031638061022385956, "mean_pred_prob_last_10": 0.18546686619520186, "mean_pred_prob_last_25": 0.09305681907571853, "mean_pred_prob_last_50": 0.05449957605451346, "mean_token_accuracy": 0.8657494723796845, "step": 29350 }, { "epoch": 0.5219277194105203, "grad_norm": 1.3251379299423776, "learning_rate": 0.0001, "loss": 0.7924, "mean_abs_error": 338.67157677942026, "mean_abs_error_last_10": 71.43920859283158, "mean_abs_error_last_25": 108.52475276225309, "mean_abs_error_last_50": 155.83029092915442, "mean_pred_prob": 0.0424101916141808, "mean_pred_prob_last_10": 0.22190259769558907, "mean_pred_prob_last_25": 0.11914886217564344, "mean_pred_prob_last_50": 0.07200622810050845, "mean_token_accuracy": 0.8728759288787842, "step": 29360 }, { "epoch": 0.5221054877073222, "grad_norm": 2.2139480438289683, "learning_rate": 0.0001, "loss": 0.7779, "mean_abs_error": 264.40910677450836, "mean_abs_error_last_10": 142.7335700456636, "mean_abs_error_last_25": 143.5377485229295, "mean_abs_error_last_50": 185.78954389607546, "mean_pred_prob": 0.04160187888192013, "mean_pred_prob_last_10": 0.19705953809898347, "mean_pred_prob_last_25": 0.10951764467172324, "mean_pred_prob_last_50": 0.06804493948584422, "mean_token_accuracy": 0.8741303086280823, "step": 29370 }, { "epoch": 0.5222832560041242, "grad_norm": 2.221850197108806, "learning_rate": 0.0001, "loss": 0.893, "mean_abs_error": 892.4885518601911, "mean_abs_error_last_10": 254.05056674542539, "mean_abs_error_last_25": 381.50551099668394, "mean_abs_error_last_50": 553.6220898877027, "mean_pred_prob": 0.03317662838380784, "mean_pred_prob_last_10": 0.1817554028588347, "mean_pred_prob_last_25": 0.09539816953474656, "mean_pred_prob_last_50": 0.05723560654441826, "mean_token_accuracy": 0.874374943971634, "step": 29380 }, { "epoch": 0.5224610243009262, "grad_norm": 1.1300524494044846, "learning_rate": 0.0001, "loss": 0.7389, "mean_abs_error": 280.7144880526944, "mean_abs_error_last_10": 109.85020242875564, "mean_abs_error_last_25": 103.80420936152352, "mean_abs_error_last_50": 143.76116150097704, "mean_pred_prob": 0.04411371680907905, "mean_pred_prob_last_10": 0.19664053628221154, "mean_pred_prob_last_25": 0.11277314189355821, "mean_pred_prob_last_50": 0.07214670311659574, "mean_token_accuracy": 0.8867773950099945, "step": 29390 }, { "epoch": 0.5226387925977282, "grad_norm": 1.2605281815878584, "learning_rate": 0.0001, "loss": 0.7092, "mean_abs_error": 457.48278388848865, "mean_abs_error_last_10": 125.13370752735561, "mean_abs_error_last_25": 195.89199852880762, "mean_abs_error_last_50": 288.3870266348987, "mean_pred_prob": 0.03182776633184403, "mean_pred_prob_last_10": 0.16149226874113082, "mean_pred_prob_last_25": 0.09005317436531186, "mean_pred_prob_last_50": 0.05359933921135962, "mean_token_accuracy": 0.882824832201004, "step": 29400 }, { "epoch": 0.5228165608945301, "grad_norm": 8.93507804039384, "learning_rate": 0.0001, "loss": 0.7736, "mean_abs_error": 165.22353598763456, "mean_abs_error_last_10": 38.907226243012055, "mean_abs_error_last_25": 82.25900326958161, "mean_abs_error_last_50": 121.22754458668665, "mean_pred_prob": 0.048401635186746714, "mean_pred_prob_last_10": 0.2243578888475895, "mean_pred_prob_last_25": 0.13186926264315843, "mean_pred_prob_last_50": 0.08097038846462964, "mean_token_accuracy": 0.8774818241596222, "step": 29410 }, { "epoch": 0.522994329191332, "grad_norm": 2.236417693288753, "learning_rate": 0.0001, "loss": 0.7962, "mean_abs_error": 870.7145049417046, "mean_abs_error_last_10": 286.9911180586976, "mean_abs_error_last_25": 389.3051438758239, "mean_abs_error_last_50": 560.0233252337631, "mean_pred_prob": 0.029660558089381082, "mean_pred_prob_last_10": 0.16864356460864655, "mean_pred_prob_last_25": 0.09016831226763315, "mean_pred_prob_last_50": 0.051825537646072915, "mean_token_accuracy": 0.8774057149887085, "step": 29420 }, { "epoch": 0.523172097488134, "grad_norm": 2.1809063634435204, "learning_rate": 0.0001, "loss": 0.8451, "mean_abs_error": 966.7239171666054, "mean_abs_error_last_10": 460.2760378174497, "mean_abs_error_last_25": 550.0376122446648, "mean_abs_error_last_50": 683.7742026900837, "mean_pred_prob": 0.03439922898978694, "mean_pred_prob_last_10": 0.15390710057981777, "mean_pred_prob_last_25": 0.09088271773653105, "mean_pred_prob_last_50": 0.057327745296061036, "mean_token_accuracy": 0.8702758491039276, "step": 29430 }, { "epoch": 0.5233498657849359, "grad_norm": 1.218885887574423, "learning_rate": 0.0001, "loss": 0.8197, "mean_abs_error": 516.9496219585997, "mean_abs_error_last_10": 147.41915466348192, "mean_abs_error_last_25": 161.0719538245766, "mean_abs_error_last_50": 304.60165606122825, "mean_pred_prob": 0.03768408418400213, "mean_pred_prob_last_10": 0.1887052103993483, "mean_pred_prob_last_25": 0.10597010956844315, "mean_pred_prob_last_50": 0.06486398262204603, "mean_token_accuracy": 0.8783650636672974, "step": 29440 }, { "epoch": 0.5235276340817379, "grad_norm": 1.5919602004419076, "learning_rate": 0.0001, "loss": 0.7861, "mean_abs_error": 231.7384658454418, "mean_abs_error_last_10": 66.71995124123143, "mean_abs_error_last_25": 83.98651997875507, "mean_abs_error_last_50": 123.20359808859303, "mean_pred_prob": 0.03755000350065529, "mean_pred_prob_last_10": 0.1929351344704628, "mean_pred_prob_last_25": 0.10388737078756094, "mean_pred_prob_last_50": 0.06321355365216733, "mean_token_accuracy": 0.8811992943286896, "step": 29450 }, { "epoch": 0.5237054023785398, "grad_norm": 1.336725535157975, "learning_rate": 0.0001, "loss": 0.7221, "mean_abs_error": 801.8410225316736, "mean_abs_error_last_10": 516.1556105347482, "mean_abs_error_last_25": 560.1901530508492, "mean_abs_error_last_50": 618.8106245856078, "mean_pred_prob": 0.05007346679631155, "mean_pred_prob_last_10": 0.23584985932102426, "mean_pred_prob_last_25": 0.13689219946099912, "mean_pred_prob_last_50": 0.08418854310002644, "mean_token_accuracy": 0.8761286079883576, "step": 29460 }, { "epoch": 0.5238831706753417, "grad_norm": 1.2748590447291113, "learning_rate": 0.0001, "loss": 0.7111, "mean_abs_error": 995.9321107484745, "mean_abs_error_last_10": 422.8535879888679, "mean_abs_error_last_25": 503.8911992526064, "mean_abs_error_last_50": 689.6982107855978, "mean_pred_prob": 0.022184151227702387, "mean_pred_prob_last_10": 0.11104131081374362, "mean_pred_prob_last_25": 0.062302545079728586, "mean_pred_prob_last_50": 0.037587607296882194, "mean_token_accuracy": 0.8769199430942536, "step": 29470 }, { "epoch": 0.5240609389721437, "grad_norm": 0.8225375771991138, "learning_rate": 0.0001, "loss": 0.874, "mean_abs_error": 460.206927462615, "mean_abs_error_last_10": 140.12408572175048, "mean_abs_error_last_25": 205.0759309611839, "mean_abs_error_last_50": 298.9136276694143, "mean_pred_prob": 0.039853394101373854, "mean_pred_prob_last_10": 0.18116048630326986, "mean_pred_prob_last_25": 0.10467661479488015, "mean_pred_prob_last_50": 0.06590930265374481, "mean_token_accuracy": 0.878156054019928, "step": 29480 }, { "epoch": 0.5242387072689456, "grad_norm": 1.5145787134794308, "learning_rate": 0.0001, "loss": 0.8384, "mean_abs_error": 635.8680109075136, "mean_abs_error_last_10": 245.50400632531347, "mean_abs_error_last_25": 296.97139471627986, "mean_abs_error_last_50": 397.379796806587, "mean_pred_prob": 0.028977574477903546, "mean_pred_prob_last_10": 0.14099577642045916, "mean_pred_prob_last_25": 0.07983513347571716, "mean_pred_prob_last_50": 0.048726578697096554, "mean_token_accuracy": 0.8678508579730988, "step": 29490 }, { "epoch": 0.5244164755657476, "grad_norm": 1.5516213735702806, "learning_rate": 0.0001, "loss": 0.6486, "mean_abs_error": 406.8355995040317, "mean_abs_error_last_10": 177.7347788904525, "mean_abs_error_last_25": 229.2486190248109, "mean_abs_error_last_50": 278.68079957312966, "mean_pred_prob": 0.04499948574230075, "mean_pred_prob_last_10": 0.19557677544653415, "mean_pred_prob_last_25": 0.1181930061429739, "mean_pred_prob_last_50": 0.074944356828928, "mean_token_accuracy": 0.8725388586521149, "step": 29500 }, { "epoch": 0.5245942438625496, "grad_norm": 1.0996136727290204, "learning_rate": 0.0001, "loss": 0.7189, "mean_abs_error": 1037.4934989063056, "mean_abs_error_last_10": 539.1991975755521, "mean_abs_error_last_25": 619.06768840024, "mean_abs_error_last_50": 709.5687024541253, "mean_pred_prob": 0.04181085525924573, "mean_pred_prob_last_10": 0.20220409309549722, "mean_pred_prob_last_25": 0.11049501677625813, "mean_pred_prob_last_50": 0.06981716674636118, "mean_token_accuracy": 0.8779679715633393, "step": 29510 }, { "epoch": 0.5247720121593515, "grad_norm": 2.2304517964228157, "learning_rate": 0.0001, "loss": 0.7961, "mean_abs_error": 1699.1891892162507, "mean_abs_error_last_10": 986.8497832574645, "mean_abs_error_last_25": 1087.3025572264064, "mean_abs_error_last_50": 1263.8739864305844, "mean_pred_prob": 0.03763142949756002, "mean_pred_prob_last_10": 0.1772742410117644, "mean_pred_prob_last_25": 0.10368479136304813, "mean_pred_prob_last_50": 0.06308023773599417, "mean_token_accuracy": 0.8706844985485077, "step": 29520 }, { "epoch": 0.5249497804561535, "grad_norm": 1.8555492463050534, "learning_rate": 0.0001, "loss": 0.6833, "mean_abs_error": 311.45566223031426, "mean_abs_error_last_10": 79.52355743557192, "mean_abs_error_last_25": 131.37965786126705, "mean_abs_error_last_50": 193.65627344153856, "mean_pred_prob": 0.04918327145278454, "mean_pred_prob_last_10": 0.2393469216302037, "mean_pred_prob_last_25": 0.13502338668331504, "mean_pred_prob_last_50": 0.0829505663830787, "mean_token_accuracy": 0.8683187544345856, "step": 29530 }, { "epoch": 0.5251275487529554, "grad_norm": 2.0213016955261245, "learning_rate": 0.0001, "loss": 0.6959, "mean_abs_error": 1114.4967134681617, "mean_abs_error_last_10": 630.7300693614519, "mean_abs_error_last_25": 687.7575955235445, "mean_abs_error_last_50": 843.9045326597876, "mean_pred_prob": 0.0348783660607296, "mean_pred_prob_last_10": 0.17858748879225458, "mean_pred_prob_last_25": 0.09834804574638838, "mean_pred_prob_last_50": 0.059174212845391594, "mean_token_accuracy": 0.8778862535953522, "step": 29540 }, { "epoch": 0.5253053170497574, "grad_norm": 1.4951887068248677, "learning_rate": 0.0001, "loss": 0.6885, "mean_abs_error": 156.15999976541315, "mean_abs_error_last_10": 21.484949786014433, "mean_abs_error_last_25": 41.17308432584051, "mean_abs_error_last_50": 91.77693542285311, "mean_pred_prob": 0.053896459424868225, "mean_pred_prob_last_10": 0.2661279052495956, "mean_pred_prob_last_25": 0.15038646385073662, "mean_pred_prob_last_50": 0.0916954107582569, "mean_token_accuracy": 0.8757132947444916, "step": 29550 }, { "epoch": 0.5254830853465593, "grad_norm": 1.9125087536422283, "learning_rate": 0.0001, "loss": 0.767, "mean_abs_error": 354.6944681308181, "mean_abs_error_last_10": 174.4257285380808, "mean_abs_error_last_25": 189.25813428858493, "mean_abs_error_last_50": 250.14519156119044, "mean_pred_prob": 0.030378363886848093, "mean_pred_prob_last_10": 0.1530758548527956, "mean_pred_prob_last_25": 0.08324463805183768, "mean_pred_prob_last_50": 0.05110724233090878, "mean_token_accuracy": 0.8701436281204223, "step": 29560 }, { "epoch": 0.5256608536433612, "grad_norm": 1.6041477952164267, "learning_rate": 0.0001, "loss": 0.7332, "mean_abs_error": 321.25684141138424, "mean_abs_error_last_10": 171.23802022200445, "mean_abs_error_last_25": 137.52380790933415, "mean_abs_error_last_50": 172.16762388990568, "mean_pred_prob": 0.037397466809488834, "mean_pred_prob_last_10": 0.17796240001916885, "mean_pred_prob_last_25": 0.10163408406078815, "mean_pred_prob_last_50": 0.062803426804021, "mean_token_accuracy": 0.8783029675483703, "step": 29570 }, { "epoch": 0.5258386219401632, "grad_norm": 1.3700592506644782, "learning_rate": 0.0001, "loss": 0.7894, "mean_abs_error": 735.4275860466875, "mean_abs_error_last_10": 213.17031774926136, "mean_abs_error_last_25": 276.757743583815, "mean_abs_error_last_50": 418.72837082986155, "mean_pred_prob": 0.039807227108394724, "mean_pred_prob_last_10": 0.20094879075186328, "mean_pred_prob_last_25": 0.1041975200176239, "mean_pred_prob_last_50": 0.06575603071250953, "mean_token_accuracy": 0.8709764122962952, "step": 29580 }, { "epoch": 0.5260163902369651, "grad_norm": 1.2795895816375915, "learning_rate": 0.0001, "loss": 0.7315, "mean_abs_error": 339.26811835117985, "mean_abs_error_last_10": 88.9664283129886, "mean_abs_error_last_25": 141.21446107335154, "mean_abs_error_last_50": 195.0332744030544, "mean_pred_prob": 0.05158677897416055, "mean_pred_prob_last_10": 0.22252963911741971, "mean_pred_prob_last_25": 0.1339334562420845, "mean_pred_prob_last_50": 0.0844406433403492, "mean_token_accuracy": 0.8727628290653229, "step": 29590 }, { "epoch": 0.526194158533767, "grad_norm": 1.0595043699602706, "learning_rate": 0.0001, "loss": 0.8543, "mean_abs_error": 722.2046956722364, "mean_abs_error_last_10": 218.87266067207324, "mean_abs_error_last_25": 257.45604052185155, "mean_abs_error_last_50": 360.1848596333745, "mean_pred_prob": 0.023393281304743142, "mean_pred_prob_last_10": 0.11308239395730198, "mean_pred_prob_last_25": 0.06095501432428137, "mean_pred_prob_last_50": 0.03750858447747305, "mean_token_accuracy": 0.8689591526985169, "step": 29600 }, { "epoch": 0.526371926830569, "grad_norm": 1.8424861905005843, "learning_rate": 0.0001, "loss": 0.7442, "mean_abs_error": 1425.2591133847, "mean_abs_error_last_10": 889.8927702849209, "mean_abs_error_last_25": 945.6322043181083, "mean_abs_error_last_50": 1127.1950685681668, "mean_pred_prob": 0.041916895911708704, "mean_pred_prob_last_10": 0.21445026242145104, "mean_pred_prob_last_25": 0.11990893300971947, "mean_pred_prob_last_50": 0.07072033839358483, "mean_token_accuracy": 0.8623041152954102, "step": 29610 }, { "epoch": 0.5265496951273709, "grad_norm": 1.1577005113772345, "learning_rate": 0.0001, "loss": 0.6008, "mean_abs_error": 1084.9868353142172, "mean_abs_error_last_10": 373.29296310079184, "mean_abs_error_last_25": 490.37285782025447, "mean_abs_error_last_50": 708.3093519609034, "mean_pred_prob": 0.037130909875850195, "mean_pred_prob_last_10": 0.1880055986926891, "mean_pred_prob_last_25": 0.10264831183012575, "mean_pred_prob_last_50": 0.06253260348457843, "mean_token_accuracy": 0.8806853413581848, "step": 29620 }, { "epoch": 0.526727463424173, "grad_norm": 2.777665777744431, "learning_rate": 0.0001, "loss": 0.7927, "mean_abs_error": 448.44787838477015, "mean_abs_error_last_10": 211.74819767542107, "mean_abs_error_last_25": 197.3183070004892, "mean_abs_error_last_50": 327.4508673210388, "mean_pred_prob": 0.0401709241559729, "mean_pred_prob_last_10": 0.21280452106148004, "mean_pred_prob_last_25": 0.11519902236759663, "mean_pred_prob_last_50": 0.06792703103274107, "mean_token_accuracy": 0.8644300699234009, "step": 29630 }, { "epoch": 0.5269052317209749, "grad_norm": 1.5236152116464248, "learning_rate": 0.0001, "loss": 0.6503, "mean_abs_error": 583.5390276270391, "mean_abs_error_last_10": 250.24309237376252, "mean_abs_error_last_25": 356.1293360604701, "mean_abs_error_last_50": 413.4946509086174, "mean_pred_prob": 0.05892147643608041, "mean_pred_prob_last_10": 0.2479508884542156, "mean_pred_prob_last_25": 0.15093346121138893, "mean_pred_prob_last_50": 0.09663054347620345, "mean_token_accuracy": 0.8776101171970367, "step": 29640 }, { "epoch": 0.5270830000177769, "grad_norm": 2.1898426032254648, "learning_rate": 0.0001, "loss": 0.9886, "mean_abs_error": 329.10115666306285, "mean_abs_error_last_10": 217.47160135413816, "mean_abs_error_last_25": 215.93114181721648, "mean_abs_error_last_50": 261.2803705414175, "mean_pred_prob": 0.04704927932471037, "mean_pred_prob_last_10": 0.23804773464798928, "mean_pred_prob_last_25": 0.13585535436868668, "mean_pred_prob_last_50": 0.08069317024201154, "mean_token_accuracy": 0.8715458691120148, "step": 29650 }, { "epoch": 0.5272607683145788, "grad_norm": 0.882827282567363, "learning_rate": 0.0001, "loss": 0.7438, "mean_abs_error": 276.7965945874047, "mean_abs_error_last_10": 42.3809143106076, "mean_abs_error_last_25": 68.16223088856877, "mean_abs_error_last_50": 128.76525876347094, "mean_pred_prob": 0.050475581409409644, "mean_pred_prob_last_10": 0.22931280620396138, "mean_pred_prob_last_25": 0.13226510845124723, "mean_pred_prob_last_50": 0.08329486651346088, "mean_token_accuracy": 0.8771392405033112, "step": 29660 }, { "epoch": 0.5274385366113807, "grad_norm": 1.676051620545573, "learning_rate": 0.0001, "loss": 0.8084, "mean_abs_error": 787.1717303560695, "mean_abs_error_last_10": 215.18324941626037, "mean_abs_error_last_25": 318.8226062352132, "mean_abs_error_last_50": 489.34494416221696, "mean_pred_prob": 0.02229066765285097, "mean_pred_prob_last_10": 0.11892531258054077, "mean_pred_prob_last_25": 0.06451661320170388, "mean_pred_prob_last_50": 0.038523622538195926, "mean_token_accuracy": 0.8719017624855041, "step": 29670 }, { "epoch": 0.5276163049081827, "grad_norm": 1.1626765804262607, "learning_rate": 0.0001, "loss": 0.6422, "mean_abs_error": 337.3841785149613, "mean_abs_error_last_10": 181.4192627481525, "mean_abs_error_last_25": 187.38564075912709, "mean_abs_error_last_50": 235.71245360110333, "mean_pred_prob": 0.040523912361823025, "mean_pred_prob_last_10": 0.1999580891802907, "mean_pred_prob_last_25": 0.11068701045587659, "mean_pred_prob_last_50": 0.06779906558804213, "mean_token_accuracy": 0.8797889173030853, "step": 29680 }, { "epoch": 0.5277940732049846, "grad_norm": 1.193832933936066, "learning_rate": 0.0001, "loss": 0.8623, "mean_abs_error": 987.403942343068, "mean_abs_error_last_10": 391.7970868972055, "mean_abs_error_last_25": 494.0915056536871, "mean_abs_error_last_50": 655.3324246432578, "mean_pred_prob": 0.015755548430024646, "mean_pred_prob_last_10": 0.09306139003310818, "mean_pred_prob_last_25": 0.04482085694617126, "mean_pred_prob_last_50": 0.026495528759551235, "mean_token_accuracy": 0.868660855293274, "step": 29690 }, { "epoch": 0.5279718415017866, "grad_norm": 0.960625665380627, "learning_rate": 0.0001, "loss": 0.7485, "mean_abs_error": 525.7838653689271, "mean_abs_error_last_10": 282.82470511329205, "mean_abs_error_last_25": 329.19216719636904, "mean_abs_error_last_50": 383.00840951263183, "mean_pred_prob": 0.02623343386221677, "mean_pred_prob_last_10": 0.1421116237528622, "mean_pred_prob_last_25": 0.07373530208133161, "mean_pred_prob_last_50": 0.04372057996224612, "mean_token_accuracy": 0.8687725961208344, "step": 29700 }, { "epoch": 0.5281496097985885, "grad_norm": 1.4373529363289408, "learning_rate": 0.0001, "loss": 0.7979, "mean_abs_error": 436.4867363203718, "mean_abs_error_last_10": 113.97248119922737, "mean_abs_error_last_25": 117.32373526616603, "mean_abs_error_last_50": 245.8969136607327, "mean_pred_prob": 0.0398976574302651, "mean_pred_prob_last_10": 0.21421023588627577, "mean_pred_prob_last_25": 0.11465201660757884, "mean_pred_prob_last_50": 0.06820517611922697, "mean_token_accuracy": 0.8793503701686859, "step": 29710 }, { "epoch": 0.5283273780953904, "grad_norm": 2.6177669122395653, "learning_rate": 0.0001, "loss": 0.7731, "mean_abs_error": 637.6558356183593, "mean_abs_error_last_10": 304.63836425340116, "mean_abs_error_last_25": 290.0581534364534, "mean_abs_error_last_50": 507.4911077339615, "mean_pred_prob": 0.025222662603482604, "mean_pred_prob_last_10": 0.13743147831410168, "mean_pred_prob_last_25": 0.0735598017461598, "mean_pred_prob_last_50": 0.04325531292706728, "mean_token_accuracy": 0.8733343780040741, "step": 29720 }, { "epoch": 0.5285051463921924, "grad_norm": 1.2521904637058752, "learning_rate": 0.0001, "loss": 0.7784, "mean_abs_error": 268.0301521668177, "mean_abs_error_last_10": 94.22032048919, "mean_abs_error_last_25": 154.2183905280594, "mean_abs_error_last_50": 176.5593734565262, "mean_pred_prob": 0.027144402870908378, "mean_pred_prob_last_10": 0.14087993167340757, "mean_pred_prob_last_25": 0.07398219238966704, "mean_pred_prob_last_50": 0.04554551541805267, "mean_token_accuracy": 0.8705914676189422, "step": 29730 }, { "epoch": 0.5286829146889943, "grad_norm": 1.0313993089226123, "learning_rate": 0.0001, "loss": 0.6101, "mean_abs_error": 184.14244185048008, "mean_abs_error_last_10": 34.28762850637204, "mean_abs_error_last_25": 77.62269335146456, "mean_abs_error_last_50": 96.63569870769383, "mean_pred_prob": 0.05688489133026451, "mean_pred_prob_last_10": 0.26779546197503806, "mean_pred_prob_last_25": 0.1518164805136621, "mean_pred_prob_last_50": 0.09436188698746265, "mean_token_accuracy": 0.8844596207141876, "step": 29740 }, { "epoch": 0.5288606829857964, "grad_norm": 1.3999526831367388, "learning_rate": 0.0001, "loss": 0.7837, "mean_abs_error": 432.7531783935753, "mean_abs_error_last_10": 128.36487041583214, "mean_abs_error_last_25": 199.1217278284226, "mean_abs_error_last_50": 226.28816887780744, "mean_pred_prob": 0.036621628247667105, "mean_pred_prob_last_10": 0.18352294962387533, "mean_pred_prob_last_25": 0.10061957999132573, "mean_pred_prob_last_50": 0.06192173968302086, "mean_token_accuracy": 0.873794949054718, "step": 29750 }, { "epoch": 0.5290384512825983, "grad_norm": 1.3190692343860975, "learning_rate": 0.0001, "loss": 0.5909, "mean_abs_error": 594.5965856605447, "mean_abs_error_last_10": 208.37922519111916, "mean_abs_error_last_25": 360.58035705949584, "mean_abs_error_last_50": 464.1237369086868, "mean_pred_prob": 0.04316873920615762, "mean_pred_prob_last_10": 0.20365650261519477, "mean_pred_prob_last_25": 0.1153871092305053, "mean_pred_prob_last_50": 0.07205452887574211, "mean_token_accuracy": 0.8872340142726898, "step": 29760 }, { "epoch": 0.5292162195794002, "grad_norm": 0.9993362567748688, "learning_rate": 0.0001, "loss": 0.625, "mean_abs_error": 197.95922873946247, "mean_abs_error_last_10": 67.35884989349543, "mean_abs_error_last_25": 106.05984155250879, "mean_abs_error_last_50": 118.03918259223337, "mean_pred_prob": 0.045765879284590485, "mean_pred_prob_last_10": 0.22318385317921638, "mean_pred_prob_last_25": 0.12548332177102567, "mean_pred_prob_last_50": 0.07750959284603595, "mean_token_accuracy": 0.8816876292228699, "step": 29770 }, { "epoch": 0.5293939878762022, "grad_norm": 1.5779185435415832, "learning_rate": 0.0001, "loss": 0.7438, "mean_abs_error": 670.0281198767069, "mean_abs_error_last_10": 178.1255391538876, "mean_abs_error_last_25": 200.03826600714737, "mean_abs_error_last_50": 347.109089552769, "mean_pred_prob": 0.03474011195357889, "mean_pred_prob_last_10": 0.17088745185174048, "mean_pred_prob_last_25": 0.09370984854176641, "mean_pred_prob_last_50": 0.05794955090386793, "mean_token_accuracy": 0.8759045600891113, "step": 29780 }, { "epoch": 0.5295717561730041, "grad_norm": 1.1533548699386043, "learning_rate": 0.0001, "loss": 0.6214, "mean_abs_error": 162.13365213973276, "mean_abs_error_last_10": 22.59431257213317, "mean_abs_error_last_25": 70.3196053058401, "mean_abs_error_last_50": 100.15684666902487, "mean_pred_prob": 0.048248065449297425, "mean_pred_prob_last_10": 0.2364139549434185, "mean_pred_prob_last_25": 0.12795036286115646, "mean_pred_prob_last_50": 0.08046822063624859, "mean_token_accuracy": 0.8848749935626984, "step": 29790 }, { "epoch": 0.529749524469806, "grad_norm": 1.1621575193365206, "learning_rate": 0.0001, "loss": 0.8473, "mean_abs_error": 414.79118262081386, "mean_abs_error_last_10": 161.12390361682256, "mean_abs_error_last_25": 230.1931117672463, "mean_abs_error_last_50": 299.1770169801798, "mean_pred_prob": 0.030073978076688945, "mean_pred_prob_last_10": 0.14684435091912745, "mean_pred_prob_last_25": 0.08128330362960696, "mean_pred_prob_last_50": 0.04981573428958654, "mean_token_accuracy": 0.8652371168136597, "step": 29800 }, { "epoch": 0.529927292766608, "grad_norm": 1.605030566761541, "learning_rate": 0.0001, "loss": 0.7329, "mean_abs_error": 80.65710665710714, "mean_abs_error_last_10": 27.00562530739227, "mean_abs_error_last_25": 47.67499951739529, "mean_abs_error_last_50": 55.918434947821844, "mean_pred_prob": 0.05954935066401958, "mean_pred_prob_last_10": 0.29096133783459666, "mean_pred_prob_last_25": 0.16517286896705627, "mean_pred_prob_last_50": 0.10080101899802685, "mean_token_accuracy": 0.8699352622032166, "step": 29810 }, { "epoch": 0.5301050610634099, "grad_norm": 1.2467523894338537, "learning_rate": 0.0001, "loss": 0.8093, "mean_abs_error": 933.9217338950266, "mean_abs_error_last_10": 597.5178144248993, "mean_abs_error_last_25": 672.6118084164382, "mean_abs_error_last_50": 753.2466204347689, "mean_pred_prob": 0.05794723865474225, "mean_pred_prob_last_10": 0.3014840205796645, "mean_pred_prob_last_25": 0.1637080475658877, "mean_pred_prob_last_50": 0.09780001169419847, "mean_token_accuracy": 0.8735703527927399, "step": 29820 }, { "epoch": 0.5302828293602119, "grad_norm": 1.945624269557665, "learning_rate": 0.0001, "loss": 1.1517, "mean_abs_error": 396.91981636321185, "mean_abs_error_last_10": 118.50831041966092, "mean_abs_error_last_25": 116.81011050017347, "mean_abs_error_last_50": 265.6887621601205, "mean_pred_prob": 0.04264488634653389, "mean_pred_prob_last_10": 0.21693760622292757, "mean_pred_prob_last_25": 0.1339439295232296, "mean_pred_prob_last_50": 0.07610182990320027, "mean_token_accuracy": 0.8682982146739959, "step": 29830 }, { "epoch": 0.5304605976570138, "grad_norm": 0.9775731158082319, "learning_rate": 0.0001, "loss": 0.695, "mean_abs_error": 296.788435079767, "mean_abs_error_last_10": 145.0091081684155, "mean_abs_error_last_25": 197.8000608225064, "mean_abs_error_last_50": 199.2178971921631, "mean_pred_prob": 0.046103927574586125, "mean_pred_prob_last_10": 0.22649356848560273, "mean_pred_prob_last_25": 0.12765521267428995, "mean_pred_prob_last_50": 0.07880838739220053, "mean_token_accuracy": 0.878972864151001, "step": 29840 }, { "epoch": 0.5306383659538157, "grad_norm": 0.910322682898736, "learning_rate": 0.0001, "loss": 0.7578, "mean_abs_error": 1312.9067578666704, "mean_abs_error_last_10": 782.6784565095666, "mean_abs_error_last_25": 924.4543817110322, "mean_abs_error_last_50": 1040.8236167045927, "mean_pred_prob": 0.018033428287890275, "mean_pred_prob_last_10": 0.09849398255464621, "mean_pred_prob_last_25": 0.051247728161979464, "mean_pred_prob_last_50": 0.030826327129034325, "mean_token_accuracy": 0.8765980064868927, "step": 29850 }, { "epoch": 0.5308161342506178, "grad_norm": 3.1892885657777374, "learning_rate": 0.0001, "loss": 0.892, "mean_abs_error": 1898.4348774156417, "mean_abs_error_last_10": 1047.1432981789715, "mean_abs_error_last_25": 1158.2773433814637, "mean_abs_error_last_50": 1405.6743234465498, "mean_pred_prob": 0.02433763030785485, "mean_pred_prob_last_10": 0.12823202615691115, "mean_pred_prob_last_25": 0.06962754804699216, "mean_pred_prob_last_50": 0.041252036270452666, "mean_token_accuracy": 0.85858194231987, "step": 29860 }, { "epoch": 0.5309939025474197, "grad_norm": 1.8601145869731597, "learning_rate": 0.0001, "loss": 0.7366, "mean_abs_error": 147.3882077563987, "mean_abs_error_last_10": 44.01191940838115, "mean_abs_error_last_25": 68.74698235396932, "mean_abs_error_last_50": 87.00406484471742, "mean_pred_prob": 0.07527439640834928, "mean_pred_prob_last_10": 0.32695933505892755, "mean_pred_prob_last_25": 0.198545016720891, "mean_pred_prob_last_50": 0.1239812958985567, "mean_token_accuracy": 0.8794799029827118, "step": 29870 }, { "epoch": 0.5311716708442217, "grad_norm": 1.187511999308435, "learning_rate": 0.0001, "loss": 0.8339, "mean_abs_error": 440.8128619262173, "mean_abs_error_last_10": 87.14020920012827, "mean_abs_error_last_25": 174.6798448492197, "mean_abs_error_last_50": 269.46940186808064, "mean_pred_prob": 0.03837911018054001, "mean_pred_prob_last_10": 0.18235949696972967, "mean_pred_prob_last_25": 0.10621576072880998, "mean_pred_prob_last_50": 0.06434829104691744, "mean_token_accuracy": 0.8727271080017089, "step": 29880 }, { "epoch": 0.5313494391410236, "grad_norm": 1.0838709111557114, "learning_rate": 0.0001, "loss": 0.8295, "mean_abs_error": 483.73143833999666, "mean_abs_error_last_10": 180.00804467565086, "mean_abs_error_last_25": 205.8772551062888, "mean_abs_error_last_50": 253.73544297174598, "mean_pred_prob": 0.03998276675119996, "mean_pred_prob_last_10": 0.18949589971452951, "mean_pred_prob_last_25": 0.10915761385113001, "mean_pred_prob_last_50": 0.06665508123114705, "mean_token_accuracy": 0.8686878204345703, "step": 29890 }, { "epoch": 0.5315272074378256, "grad_norm": 1.3566137213258265, "learning_rate": 0.0001, "loss": 0.7957, "mean_abs_error": 544.1603934701137, "mean_abs_error_last_10": 390.4675943677078, "mean_abs_error_last_25": 405.6354592032095, "mean_abs_error_last_50": 364.56280149132215, "mean_pred_prob": 0.029389294353313743, "mean_pred_prob_last_10": 0.14062021411955355, "mean_pred_prob_last_25": 0.07856483506038785, "mean_pred_prob_last_50": 0.04906404130160809, "mean_token_accuracy": 0.8665006160736084, "step": 29900 }, { "epoch": 0.5317049757346275, "grad_norm": 1.3732804676830255, "learning_rate": 0.0001, "loss": 0.7745, "mean_abs_error": 284.60846700272515, "mean_abs_error_last_10": 83.6930219834903, "mean_abs_error_last_25": 150.89440094925703, "mean_abs_error_last_50": 211.64394000333215, "mean_pred_prob": 0.0401427888777107, "mean_pred_prob_last_10": 0.19049197398126125, "mean_pred_prob_last_25": 0.10971310809254646, "mean_pred_prob_last_50": 0.06694583659991622, "mean_token_accuracy": 0.8658517956733703, "step": 29910 }, { "epoch": 0.5318827440314294, "grad_norm": 3.282042228048764, "learning_rate": 0.0001, "loss": 0.7279, "mean_abs_error": 563.8700444749063, "mean_abs_error_last_10": 162.67396446717785, "mean_abs_error_last_25": 220.38539207026616, "mean_abs_error_last_50": 345.58912183525297, "mean_pred_prob": 0.033587654563598336, "mean_pred_prob_last_10": 0.17625415010843426, "mean_pred_prob_last_25": 0.09365743420785293, "mean_pred_prob_last_50": 0.056197587691713125, "mean_token_accuracy": 0.8786968111991882, "step": 29920 }, { "epoch": 0.5320605123282314, "grad_norm": 1.8483567288646254, "learning_rate": 0.0001, "loss": 0.8129, "mean_abs_error": 553.3880376917404, "mean_abs_error_last_10": 428.5212106396254, "mean_abs_error_last_25": 465.9306941452739, "mean_abs_error_last_50": 457.28572622483443, "mean_pred_prob": 0.04537387777527328, "mean_pred_prob_last_10": 0.2214464947261149, "mean_pred_prob_last_25": 0.12385676077683457, "mean_pred_prob_last_50": 0.07581302569597029, "mean_token_accuracy": 0.8731564044952392, "step": 29930 }, { "epoch": 0.5322382806250333, "grad_norm": 1.9870023119124518, "learning_rate": 0.0001, "loss": 0.712, "mean_abs_error": 573.178940431457, "mean_abs_error_last_10": 325.83006195822594, "mean_abs_error_last_25": 380.3836228322089, "mean_abs_error_last_50": 439.3669094512544, "mean_pred_prob": 0.04131152127229143, "mean_pred_prob_last_10": 0.22090288657345808, "mean_pred_prob_last_25": 0.1183960657857824, "mean_pred_prob_last_50": 0.0702760814805515, "mean_token_accuracy": 0.8796173930168152, "step": 29940 }, { "epoch": 0.5324160489218352, "grad_norm": 1.130478564001409, "learning_rate": 0.0001, "loss": 0.76, "mean_abs_error": 550.3022947501817, "mean_abs_error_last_10": 421.0569993542366, "mean_abs_error_last_25": 627.508739808057, "mean_abs_error_last_50": 544.5322121410388, "mean_pred_prob": 0.03358151789288968, "mean_pred_prob_last_10": 0.1846183100133203, "mean_pred_prob_last_25": 0.09656795331975446, "mean_pred_prob_last_50": 0.05751108075492084, "mean_token_accuracy": 0.8701359331607819, "step": 29950 }, { "epoch": 0.5325938172186372, "grad_norm": 1.2328116178464346, "learning_rate": 0.0001, "loss": 0.6149, "mean_abs_error": 163.3922410380299, "mean_abs_error_last_10": 38.452048257781044, "mean_abs_error_last_25": 57.24389936451772, "mean_abs_error_last_50": 92.8816714540166, "mean_pred_prob": 0.04572408851236105, "mean_pred_prob_last_10": 0.24815065339207648, "mean_pred_prob_last_25": 0.13116652630269526, "mean_pred_prob_last_50": 0.07758191786706448, "mean_token_accuracy": 0.881924307346344, "step": 29960 }, { "epoch": 0.5327715855154391, "grad_norm": 1.5806069764759845, "learning_rate": 0.0001, "loss": 0.7297, "mean_abs_error": 535.8170747191639, "mean_abs_error_last_10": 189.7100579465818, "mean_abs_error_last_25": 182.16667635430983, "mean_abs_error_last_50": 279.3180062153625, "mean_pred_prob": 0.037599622807465496, "mean_pred_prob_last_10": 0.19231891948729754, "mean_pred_prob_last_25": 0.10531329344958067, "mean_pred_prob_last_50": 0.06336734122596681, "mean_token_accuracy": 0.8703816533088684, "step": 29970 }, { "epoch": 0.5329493538122412, "grad_norm": 1.3299527610540054, "learning_rate": 0.0001, "loss": 0.7654, "mean_abs_error": 544.3754312132611, "mean_abs_error_last_10": 202.8383942189368, "mean_abs_error_last_25": 259.59926212434794, "mean_abs_error_last_50": 359.5680614484601, "mean_pred_prob": 0.025142797804437577, "mean_pred_prob_last_10": 0.13238462191075087, "mean_pred_prob_last_25": 0.07144384812563657, "mean_pred_prob_last_50": 0.04299990017898381, "mean_token_accuracy": 0.8742704868316651, "step": 29980 }, { "epoch": 0.5331271221090431, "grad_norm": 1.281720091145215, "learning_rate": 0.0001, "loss": 0.6369, "mean_abs_error": 153.6672521795797, "mean_abs_error_last_10": 62.35577985714978, "mean_abs_error_last_25": 88.22473499616024, "mean_abs_error_last_50": 119.64511203919082, "mean_pred_prob": 0.04333857763558626, "mean_pred_prob_last_10": 0.20059024095535277, "mean_pred_prob_last_25": 0.11412704288959503, "mean_pred_prob_last_50": 0.07105878293514252, "mean_token_accuracy": 0.8752126634120941, "step": 29990 }, { "epoch": 0.5333048904058451, "grad_norm": 2.2203905522398393, "learning_rate": 0.0001, "loss": 0.7109, "mean_abs_error": 227.23973537947285, "mean_abs_error_last_10": 61.11120306879013, "mean_abs_error_last_25": 96.73579816890212, "mean_abs_error_last_50": 118.69106107807802, "mean_pred_prob": 0.0352622639387846, "mean_pred_prob_last_10": 0.18103223741054536, "mean_pred_prob_last_25": 0.0968353847041726, "mean_pred_prob_last_50": 0.058813543524593115, "mean_token_accuracy": 0.8733931183815002, "step": 30000 }, { "epoch": 0.533482658702647, "grad_norm": 3.6487902110334853, "learning_rate": 0.0001, "loss": 0.8602, "mean_abs_error": 825.9195899968588, "mean_abs_error_last_10": 345.8224251377534, "mean_abs_error_last_25": 533.72149882178, "mean_abs_error_last_50": 703.3901997326685, "mean_pred_prob": 0.02619041004218161, "mean_pred_prob_last_10": 0.12837050524540244, "mean_pred_prob_last_25": 0.07025726165156812, "mean_pred_prob_last_50": 0.04320927183143795, "mean_token_accuracy": 0.8655388295650482, "step": 30010 }, { "epoch": 0.5336604269994489, "grad_norm": 2.390550109886001, "learning_rate": 0.0001, "loss": 0.636, "mean_abs_error": 361.65436636198206, "mean_abs_error_last_10": 100.64730956503804, "mean_abs_error_last_25": 116.45337934279819, "mean_abs_error_last_50": 209.79232521539956, "mean_pred_prob": 0.04467120619956404, "mean_pred_prob_last_10": 0.2108964802697301, "mean_pred_prob_last_25": 0.12367900991812349, "mean_pred_prob_last_50": 0.07605338101275265, "mean_token_accuracy": 0.873120391368866, "step": 30020 }, { "epoch": 0.5338381952962509, "grad_norm": 1.4161138723806017, "learning_rate": 0.0001, "loss": 0.7003, "mean_abs_error": 155.26203588709302, "mean_abs_error_last_10": 57.55778900662183, "mean_abs_error_last_25": 81.94947880408584, "mean_abs_error_last_50": 124.60305962481873, "mean_pred_prob": 0.05864850147627294, "mean_pred_prob_last_10": 0.2707937242463231, "mean_pred_prob_last_25": 0.1575038072653115, "mean_pred_prob_last_50": 0.09648263775743544, "mean_token_accuracy": 0.8778778731822967, "step": 30030 }, { "epoch": 0.5340159635930528, "grad_norm": 1.4072741868758487, "learning_rate": 0.0001, "loss": 0.7824, "mean_abs_error": 172.33295851238654, "mean_abs_error_last_10": 134.4407052278023, "mean_abs_error_last_25": 114.96259778985875, "mean_abs_error_last_50": 124.8076238381846, "mean_pred_prob": 0.05583390747196972, "mean_pred_prob_last_10": 0.2850365534424782, "mean_pred_prob_last_25": 0.15846586879342794, "mean_pred_prob_last_50": 0.0954657610040158, "mean_token_accuracy": 0.8718409836292267, "step": 30040 }, { "epoch": 0.5341937318898548, "grad_norm": 1.4814581365216866, "learning_rate": 0.0001, "loss": 0.7855, "mean_abs_error": 231.16186079161443, "mean_abs_error_last_10": 81.41948664731679, "mean_abs_error_last_25": 138.75247813448962, "mean_abs_error_last_50": 168.45899208104896, "mean_pred_prob": 0.03889790649991483, "mean_pred_prob_last_10": 0.19798432532697915, "mean_pred_prob_last_25": 0.1089876415207982, "mean_pred_prob_last_50": 0.06575152813456953, "mean_token_accuracy": 0.8702279329299927, "step": 30050 }, { "epoch": 0.5343715001866567, "grad_norm": 1.391386315339071, "learning_rate": 0.0001, "loss": 0.7054, "mean_abs_error": 498.4310303446081, "mean_abs_error_last_10": 174.6197833867422, "mean_abs_error_last_25": 262.17961529463884, "mean_abs_error_last_50": 357.8897933851346, "mean_pred_prob": 0.05999995920865331, "mean_pred_prob_last_10": 0.27815677133621647, "mean_pred_prob_last_25": 0.16058604468707927, "mean_pred_prob_last_50": 0.10072578670224175, "mean_token_accuracy": 0.8726583421230316, "step": 30060 }, { "epoch": 0.5345492684834586, "grad_norm": 1.287737497709047, "learning_rate": 0.0001, "loss": 0.7109, "mean_abs_error": 163.8086264312466, "mean_abs_error_last_10": 50.220974257603146, "mean_abs_error_last_25": 79.22679370843197, "mean_abs_error_last_50": 81.42526511047791, "mean_pred_prob": 0.045413976348936555, "mean_pred_prob_last_10": 0.19552754126489164, "mean_pred_prob_last_25": 0.11811390239745378, "mean_pred_prob_last_50": 0.07543876972049475, "mean_token_accuracy": 0.8728344440460205, "step": 30070 }, { "epoch": 0.5347270367802606, "grad_norm": 1.6112754188618548, "learning_rate": 0.0001, "loss": 0.6952, "mean_abs_error": 638.5045615314559, "mean_abs_error_last_10": 291.03921117796006, "mean_abs_error_last_25": 298.98882186284925, "mean_abs_error_last_50": 447.2220654130401, "mean_pred_prob": 0.03372496152878739, "mean_pred_prob_last_10": 0.17248497642576693, "mean_pred_prob_last_25": 0.09346517909434623, "mean_pred_prob_last_50": 0.05708560417988338, "mean_token_accuracy": 0.8798671007156372, "step": 30080 }, { "epoch": 0.5349048050770625, "grad_norm": 1.83844595972259, "learning_rate": 0.0001, "loss": 0.6356, "mean_abs_error": 278.6414903235353, "mean_abs_error_last_10": 48.105184354855595, "mean_abs_error_last_25": 82.50405969965595, "mean_abs_error_last_50": 135.75233247600937, "mean_pred_prob": 0.03852914050221443, "mean_pred_prob_last_10": 0.18612084444612265, "mean_pred_prob_last_25": 0.10557813309133053, "mean_pred_prob_last_50": 0.06474993107840418, "mean_token_accuracy": 0.8799134492874146, "step": 30090 }, { "epoch": 0.5350825733738646, "grad_norm": 2.0027132501194314, "learning_rate": 0.0001, "loss": 0.7226, "mean_abs_error": 537.9048272628366, "mean_abs_error_last_10": 183.89400762033839, "mean_abs_error_last_25": 221.75334188856885, "mean_abs_error_last_50": 358.018564458506, "mean_pred_prob": 0.026402154006063938, "mean_pred_prob_last_10": 0.13816347755491734, "mean_pred_prob_last_25": 0.07479573655873537, "mean_pred_prob_last_50": 0.04497748469002545, "mean_token_accuracy": 0.8783189356327057, "step": 30100 }, { "epoch": 0.5352603416706665, "grad_norm": 1.4217136200155647, "learning_rate": 0.0001, "loss": 0.6664, "mean_abs_error": 255.1219047346038, "mean_abs_error_last_10": 62.00815125327257, "mean_abs_error_last_25": 86.39660594986677, "mean_abs_error_last_50": 129.7970996989745, "mean_pred_prob": 0.050664297747425736, "mean_pred_prob_last_10": 0.25378942731767895, "mean_pred_prob_last_25": 0.13899277830496432, "mean_pred_prob_last_50": 0.08537748558446764, "mean_token_accuracy": 0.8663742423057557, "step": 30110 }, { "epoch": 0.5354381099674684, "grad_norm": 1.2993648088258547, "learning_rate": 0.0001, "loss": 0.6837, "mean_abs_error": 233.5306687892815, "mean_abs_error_last_10": 72.46677128636726, "mean_abs_error_last_25": 89.37699913638485, "mean_abs_error_last_50": 127.47257940007435, "mean_pred_prob": 0.04347936012782157, "mean_pred_prob_last_10": 0.2144606739282608, "mean_pred_prob_last_25": 0.12069774903357029, "mean_pred_prob_last_50": 0.07416938841342927, "mean_token_accuracy": 0.8785801351070404, "step": 30120 }, { "epoch": 0.5356158782642704, "grad_norm": 3.0728323944504634, "learning_rate": 0.0001, "loss": 0.8867, "mean_abs_error": 429.3758117827638, "mean_abs_error_last_10": 150.09060166146065, "mean_abs_error_last_25": 238.9171719718184, "mean_abs_error_last_50": 339.2943322426844, "mean_pred_prob": 0.026536129345186055, "mean_pred_prob_last_10": 0.14046120047569274, "mean_pred_prob_last_25": 0.07505672862753272, "mean_pred_prob_last_50": 0.04524623639881611, "mean_token_accuracy": 0.876353120803833, "step": 30130 }, { "epoch": 0.5357936465610723, "grad_norm": 1.8661357999612238, "learning_rate": 0.0001, "loss": 0.6301, "mean_abs_error": 410.2972945195391, "mean_abs_error_last_10": 135.3561482168851, "mean_abs_error_last_25": 200.6323377167942, "mean_abs_error_last_50": 275.48545491203436, "mean_pred_prob": 0.042050643294351174, "mean_pred_prob_last_10": 0.20093975578201934, "mean_pred_prob_last_25": 0.11289948128396646, "mean_pred_prob_last_50": 0.06967203266685829, "mean_token_accuracy": 0.8767833948135376, "step": 30140 }, { "epoch": 0.5359714148578743, "grad_norm": 1.5637027369227987, "learning_rate": 0.0001, "loss": 0.654, "mean_abs_error": 260.77466398650324, "mean_abs_error_last_10": 143.25858145072115, "mean_abs_error_last_25": 180.127166139676, "mean_abs_error_last_50": 176.6896007803375, "mean_pred_prob": 0.035495950188487765, "mean_pred_prob_last_10": 0.18069581128656864, "mean_pred_prob_last_25": 0.09596623284742237, "mean_pred_prob_last_50": 0.059780165832489726, "mean_token_accuracy": 0.8815979659557343, "step": 30150 }, { "epoch": 0.5361491831546762, "grad_norm": 1.4111859911601021, "learning_rate": 0.0001, "loss": 0.7321, "mean_abs_error": 572.8694857439172, "mean_abs_error_last_10": 270.78472814965164, "mean_abs_error_last_25": 445.0043324982627, "mean_abs_error_last_50": 500.4450196313346, "mean_pred_prob": 0.021170352515764535, "mean_pred_prob_last_10": 0.11175400579813868, "mean_pred_prob_last_25": 0.05949613054981455, "mean_pred_prob_last_50": 0.03571120214182884, "mean_token_accuracy": 0.8723805785179138, "step": 30160 }, { "epoch": 0.5363269514514781, "grad_norm": 1.909757069339257, "learning_rate": 0.0001, "loss": 0.7913, "mean_abs_error": 289.1897231812717, "mean_abs_error_last_10": 115.47648845340439, "mean_abs_error_last_25": 172.19085741201553, "mean_abs_error_last_50": 200.2707213839751, "mean_pred_prob": 0.029382798122242093, "mean_pred_prob_last_10": 0.15975253097712994, "mean_pred_prob_last_25": 0.0849350668489933, "mean_pred_prob_last_50": 0.05085362773388624, "mean_token_accuracy": 0.8710972607135773, "step": 30170 }, { "epoch": 0.5365047197482801, "grad_norm": 1.8545118068589115, "learning_rate": 0.0001, "loss": 0.7452, "mean_abs_error": 1424.7715236589065, "mean_abs_error_last_10": 798.9272386462275, "mean_abs_error_last_25": 871.6963997593093, "mean_abs_error_last_50": 1025.014080677145, "mean_pred_prob": 0.031187402077193838, "mean_pred_prob_last_10": 0.14768700463173445, "mean_pred_prob_last_25": 0.08360518296831287, "mean_pred_prob_last_50": 0.05212322704319376, "mean_token_accuracy": 0.8704873263835907, "step": 30180 }, { "epoch": 0.536682488045082, "grad_norm": 3.072183615447985, "learning_rate": 0.0001, "loss": 0.767, "mean_abs_error": 293.88757049844475, "mean_abs_error_last_10": 49.0431814919155, "mean_abs_error_last_25": 65.6069483427093, "mean_abs_error_last_50": 140.11516655963572, "mean_pred_prob": 0.045925404597073796, "mean_pred_prob_last_10": 0.22556118499487637, "mean_pred_prob_last_25": 0.12573995012789965, "mean_pred_prob_last_50": 0.07759541003033518, "mean_token_accuracy": 0.8705217719078064, "step": 30190 }, { "epoch": 0.536860256341884, "grad_norm": 2.0033756037834, "learning_rate": 0.0001, "loss": 0.8025, "mean_abs_error": 1104.4723974003505, "mean_abs_error_last_10": 455.2574642463742, "mean_abs_error_last_25": 687.4687241596789, "mean_abs_error_last_50": 834.921436087249, "mean_pred_prob": 0.02467577094794251, "mean_pred_prob_last_10": 0.13373537344741634, "mean_pred_prob_last_25": 0.06922013483126647, "mean_pred_prob_last_50": 0.041937335446709766, "mean_token_accuracy": 0.8667084157466889, "step": 30200 }, { "epoch": 0.5370380246386859, "grad_norm": 1.3705531523895733, "learning_rate": 0.0001, "loss": 0.7481, "mean_abs_error": 521.3750780115415, "mean_abs_error_last_10": 171.00248542246837, "mean_abs_error_last_25": 286.47750113949394, "mean_abs_error_last_50": 373.06172285693503, "mean_pred_prob": 0.031209139147540553, "mean_pred_prob_last_10": 0.16078434092924, "mean_pred_prob_last_25": 0.08864390210947022, "mean_pred_prob_last_50": 0.05333759238710627, "mean_token_accuracy": 0.8775892674922943, "step": 30210 }, { "epoch": 0.5372157929354879, "grad_norm": 1.1391404024356875, "learning_rate": 0.0001, "loss": 0.6422, "mean_abs_error": 127.03272567220782, "mean_abs_error_last_10": 22.005444149705376, "mean_abs_error_last_25": 57.073470162444856, "mean_abs_error_last_50": 92.72484682243537, "mean_pred_prob": 0.06363700153306126, "mean_pred_prob_last_10": 0.30154736824333667, "mean_pred_prob_last_25": 0.17643937654793262, "mean_pred_prob_last_50": 0.10816583968698978, "mean_token_accuracy": 0.885134881734848, "step": 30220 }, { "epoch": 0.5373935612322899, "grad_norm": 1.5247357360936877, "learning_rate": 0.0001, "loss": 0.8123, "mean_abs_error": 738.0749844620093, "mean_abs_error_last_10": 336.48301812505247, "mean_abs_error_last_25": 418.21379671015995, "mean_abs_error_last_50": 537.9728293980058, "mean_pred_prob": 0.02578509392333217, "mean_pred_prob_last_10": 0.12558946121134795, "mean_pred_prob_last_25": 0.06753540364443325, "mean_pred_prob_last_50": 0.04230666221992578, "mean_token_accuracy": 0.8673544287681579, "step": 30230 }, { "epoch": 0.5375713295290918, "grad_norm": 1.1334858149393525, "learning_rate": 0.0001, "loss": 0.7735, "mean_abs_error": 628.5399288403007, "mean_abs_error_last_10": 321.6032947161742, "mean_abs_error_last_25": 367.0231564581146, "mean_abs_error_last_50": 475.25265937521783, "mean_pred_prob": 0.02655043532140553, "mean_pred_prob_last_10": 0.13286700206808746, "mean_pred_prob_last_25": 0.07402719656238332, "mean_pred_prob_last_50": 0.04500722091179341, "mean_token_accuracy": 0.8792090952396393, "step": 30240 }, { "epoch": 0.5377490978258938, "grad_norm": 1.2976374882829111, "learning_rate": 0.0001, "loss": 0.7228, "mean_abs_error": 539.6999157445222, "mean_abs_error_last_10": 116.32292819000239, "mean_abs_error_last_25": 193.02746924017208, "mean_abs_error_last_50": 291.29099891886256, "mean_pred_prob": 0.045576429372886196, "mean_pred_prob_last_10": 0.22620213138870895, "mean_pred_prob_last_25": 0.1244737730245106, "mean_pred_prob_last_50": 0.07635513023124077, "mean_token_accuracy": 0.8791631698608399, "step": 30250 }, { "epoch": 0.5379268661226957, "grad_norm": 2.1529932903340154, "learning_rate": 0.0001, "loss": 0.6769, "mean_abs_error": 552.0385324445831, "mean_abs_error_last_10": 158.88253618905293, "mean_abs_error_last_25": 193.8813107474195, "mean_abs_error_last_50": 308.24414494435445, "mean_pred_prob": 0.034378886071499436, "mean_pred_prob_last_10": 0.17444506778847427, "mean_pred_prob_last_25": 0.09602759721456096, "mean_pred_prob_last_50": 0.05717920891474933, "mean_token_accuracy": 0.8884863376617431, "step": 30260 }, { "epoch": 0.5381046344194976, "grad_norm": 1.2826214939233298, "learning_rate": 0.0001, "loss": 0.697, "mean_abs_error": 338.93059488251856, "mean_abs_error_last_10": 114.6045599226334, "mean_abs_error_last_25": 147.77850522296157, "mean_abs_error_last_50": 221.13419877692453, "mean_pred_prob": 0.04477226133458316, "mean_pred_prob_last_10": 0.2146235376596451, "mean_pred_prob_last_25": 0.12183935670182108, "mean_pred_prob_last_50": 0.07573514762334525, "mean_token_accuracy": 0.8693027794361115, "step": 30270 }, { "epoch": 0.5382824027162996, "grad_norm": 1.0730925343469797, "learning_rate": 0.0001, "loss": 0.6777, "mean_abs_error": 213.9105648399155, "mean_abs_error_last_10": 97.58588024844258, "mean_abs_error_last_25": 106.73254090721987, "mean_abs_error_last_50": 139.2999417682529, "mean_pred_prob": 0.04082203689031303, "mean_pred_prob_last_10": 0.19354104567319155, "mean_pred_prob_last_25": 0.11106412392109632, "mean_pred_prob_last_50": 0.06948257610201836, "mean_token_accuracy": 0.8692655801773072, "step": 30280 }, { "epoch": 0.5384601710131015, "grad_norm": 2.154853034966809, "learning_rate": 0.0001, "loss": 0.8198, "mean_abs_error": 330.85254229169607, "mean_abs_error_last_10": 61.03628575099232, "mean_abs_error_last_25": 103.52764694279813, "mean_abs_error_last_50": 187.19732314503713, "mean_pred_prob": 0.03611455536447465, "mean_pred_prob_last_10": 0.16924017183482648, "mean_pred_prob_last_25": 0.10084235724061727, "mean_pred_prob_last_50": 0.06107460232451558, "mean_token_accuracy": 0.8741660237312316, "step": 30290 }, { "epoch": 0.5386379393099034, "grad_norm": 1.5055766525672791, "learning_rate": 0.0001, "loss": 0.7814, "mean_abs_error": 436.58245184783874, "mean_abs_error_last_10": 103.03202278187064, "mean_abs_error_last_25": 128.97927579636195, "mean_abs_error_last_50": 223.14055443625807, "mean_pred_prob": 0.042559275031089784, "mean_pred_prob_last_10": 0.2123921292833984, "mean_pred_prob_last_25": 0.11908908062614501, "mean_pred_prob_last_50": 0.07121050460264086, "mean_token_accuracy": 0.8663586854934693, "step": 30300 }, { "epoch": 0.5388157076067054, "grad_norm": 1.5793166660675293, "learning_rate": 0.0001, "loss": 0.7129, "mean_abs_error": 438.5393398931431, "mean_abs_error_last_10": 241.1443651946022, "mean_abs_error_last_25": 261.53893360188687, "mean_abs_error_last_50": 310.06977370834926, "mean_pred_prob": 0.046861318359151485, "mean_pred_prob_last_10": 0.21711126207374037, "mean_pred_prob_last_25": 0.12657960931537673, "mean_pred_prob_last_50": 0.07894833418540656, "mean_token_accuracy": 0.865899407863617, "step": 30310 }, { "epoch": 0.5389934759035073, "grad_norm": 1.0770143475243685, "learning_rate": 0.0001, "loss": 0.8029, "mean_abs_error": 189.90036012409092, "mean_abs_error_last_10": 36.2868088054846, "mean_abs_error_last_25": 64.1323623387926, "mean_abs_error_last_50": 103.81288021873861, "mean_pred_prob": 0.04602605700492859, "mean_pred_prob_last_10": 0.2219301402568817, "mean_pred_prob_last_25": 0.12300954591482878, "mean_pred_prob_last_50": 0.076564528234303, "mean_token_accuracy": 0.8719684898853302, "step": 30320 }, { "epoch": 0.5391712442003093, "grad_norm": 1.2387541428824749, "learning_rate": 0.0001, "loss": 0.6632, "mean_abs_error": 734.7469740537547, "mean_abs_error_last_10": 167.28649073606198, "mean_abs_error_last_25": 256.9890959110777, "mean_abs_error_last_50": 445.4708975008102, "mean_pred_prob": 0.041198307496961205, "mean_pred_prob_last_10": 0.19851397138554602, "mean_pred_prob_last_25": 0.11368898503715172, "mean_pred_prob_last_50": 0.06889149328926578, "mean_token_accuracy": 0.8711780667304992, "step": 30330 }, { "epoch": 0.5393490124971113, "grad_norm": 1.408563194623745, "learning_rate": 0.0001, "loss": 0.8144, "mean_abs_error": 379.5549289928959, "mean_abs_error_last_10": 83.78691023523538, "mean_abs_error_last_25": 98.82499764069505, "mean_abs_error_last_50": 154.72641173143876, "mean_pred_prob": 0.0492798340972513, "mean_pred_prob_last_10": 0.2415800852701068, "mean_pred_prob_last_25": 0.13297723233699799, "mean_pred_prob_last_50": 0.08213160503655673, "mean_token_accuracy": 0.8712827920913696, "step": 30340 }, { "epoch": 0.5395267807939133, "grad_norm": 2.749077290361951, "learning_rate": 0.0001, "loss": 0.7599, "mean_abs_error": 481.07097627182276, "mean_abs_error_last_10": 146.68808028119452, "mean_abs_error_last_25": 231.80106701541231, "mean_abs_error_last_50": 348.8384784270396, "mean_pred_prob": 0.033916262746788564, "mean_pred_prob_last_10": 0.1830429369583726, "mean_pred_prob_last_25": 0.09490606748731807, "mean_pred_prob_last_50": 0.05759695530869067, "mean_token_accuracy": 0.8763463079929352, "step": 30350 }, { "epoch": 0.5397045490907152, "grad_norm": 2.152495993819918, "learning_rate": 0.0001, "loss": 0.6771, "mean_abs_error": 560.0127247184065, "mean_abs_error_last_10": 230.4072852553396, "mean_abs_error_last_25": 257.85177015927627, "mean_abs_error_last_50": 346.4205622766302, "mean_pred_prob": 0.03492762092500925, "mean_pred_prob_last_10": 0.17356544341892005, "mean_pred_prob_last_25": 0.09767635148018598, "mean_pred_prob_last_50": 0.05924847787246108, "mean_token_accuracy": 0.8737639248371124, "step": 30360 }, { "epoch": 0.5398823173875171, "grad_norm": 3.2052009970597295, "learning_rate": 0.0001, "loss": 0.818, "mean_abs_error": 186.32262405630627, "mean_abs_error_last_10": 71.85214022953633, "mean_abs_error_last_25": 81.19152492448981, "mean_abs_error_last_50": 151.0019168082421, "mean_pred_prob": 0.04719023103825748, "mean_pred_prob_last_10": 0.2573738362640142, "mean_pred_prob_last_25": 0.13900011368095874, "mean_pred_prob_last_50": 0.08104385016486049, "mean_token_accuracy": 0.8760190606117249, "step": 30370 }, { "epoch": 0.5400600856843191, "grad_norm": 2.35392266493441, "learning_rate": 0.0001, "loss": 0.7409, "mean_abs_error": 950.9295962161343, "mean_abs_error_last_10": 444.9025487154133, "mean_abs_error_last_25": 533.4702564960061, "mean_abs_error_last_50": 673.3754233019865, "mean_pred_prob": 0.019655919895740226, "mean_pred_prob_last_10": 0.10088721191859804, "mean_pred_prob_last_25": 0.05370793925249018, "mean_pred_prob_last_50": 0.03329566298634745, "mean_token_accuracy": 0.875525850057602, "step": 30380 }, { "epoch": 0.540237853981121, "grad_norm": 1.007439863374206, "learning_rate": 0.0001, "loss": 0.6376, "mean_abs_error": 466.6111593889524, "mean_abs_error_last_10": 101.17436201158553, "mean_abs_error_last_25": 154.76757262766878, "mean_abs_error_last_50": 274.8338024766939, "mean_pred_prob": 0.03728820540709421, "mean_pred_prob_last_10": 0.1850404867203906, "mean_pred_prob_last_25": 0.10519088089931757, "mean_pred_prob_last_50": 0.0631769779487513, "mean_token_accuracy": 0.876605772972107, "step": 30390 }, { "epoch": 0.540415622277923, "grad_norm": 1.687673163499594, "learning_rate": 0.0001, "loss": 0.8229, "mean_abs_error": 347.5334392906039, "mean_abs_error_last_10": 88.01203522872275, "mean_abs_error_last_25": 153.12161824981075, "mean_abs_error_last_50": 210.73584756273402, "mean_pred_prob": 0.03076231840532273, "mean_pred_prob_last_10": 0.15842538606375456, "mean_pred_prob_last_25": 0.08580125207081438, "mean_pred_prob_last_50": 0.051619417313486335, "mean_token_accuracy": 0.8609386801719665, "step": 30400 }, { "epoch": 0.5405933905747249, "grad_norm": 1.4004255966027683, "learning_rate": 0.0001, "loss": 0.637, "mean_abs_error": 669.5771936808314, "mean_abs_error_last_10": 282.0778604494179, "mean_abs_error_last_25": 307.8998091390246, "mean_abs_error_last_50": 425.1574491347029, "mean_pred_prob": 0.02809195914014708, "mean_pred_prob_last_10": 0.1288501278089825, "mean_pred_prob_last_25": 0.07271011494449339, "mean_pred_prob_last_50": 0.04524939191178419, "mean_token_accuracy": 0.8773478865623474, "step": 30410 }, { "epoch": 0.5407711588715268, "grad_norm": 1.3948364524114454, "learning_rate": 0.0001, "loss": 0.8031, "mean_abs_error": 275.1625237935748, "mean_abs_error_last_10": 132.9940778467323, "mean_abs_error_last_25": 159.77876208838617, "mean_abs_error_last_50": 204.81617152183458, "mean_pred_prob": 0.031717833038419484, "mean_pred_prob_last_10": 0.15954062305390834, "mean_pred_prob_last_25": 0.08771442556753754, "mean_pred_prob_last_50": 0.05297611188143492, "mean_token_accuracy": 0.86728156208992, "step": 30420 }, { "epoch": 0.5409489271683288, "grad_norm": 1.7685889577541816, "learning_rate": 0.0001, "loss": 0.8344, "mean_abs_error": 863.9930990212113, "mean_abs_error_last_10": 367.6602093069965, "mean_abs_error_last_25": 417.2580072171612, "mean_abs_error_last_50": 550.8010452011778, "mean_pred_prob": 0.0402546127239475, "mean_pred_prob_last_10": 0.1798362532397732, "mean_pred_prob_last_25": 0.105428682861384, "mean_pred_prob_last_50": 0.06627023015462327, "mean_token_accuracy": 0.871067875623703, "step": 30430 }, { "epoch": 0.5411266954651307, "grad_norm": 1.2235601395448517, "learning_rate": 0.0001, "loss": 0.6428, "mean_abs_error": 520.5140006279823, "mean_abs_error_last_10": 243.85728988588954, "mean_abs_error_last_25": 301.09470319164814, "mean_abs_error_last_50": 377.512961825115, "mean_pred_prob": 0.041378508578054606, "mean_pred_prob_last_10": 0.2057602177374065, "mean_pred_prob_last_25": 0.11542969425208867, "mean_pred_prob_last_50": 0.07025966695509851, "mean_token_accuracy": 0.8683583676815033, "step": 30440 }, { "epoch": 0.5413044637619326, "grad_norm": 1.5411645401919343, "learning_rate": 0.0001, "loss": 0.7576, "mean_abs_error": 525.4823386498734, "mean_abs_error_last_10": 249.94155143217077, "mean_abs_error_last_25": 267.52518385769133, "mean_abs_error_last_50": 318.35141190147925, "mean_pred_prob": 0.034019213332794604, "mean_pred_prob_last_10": 0.17459138249978423, "mean_pred_prob_last_25": 0.0957512088236399, "mean_pred_prob_last_50": 0.05758046942064539, "mean_token_accuracy": 0.8662905335426331, "step": 30450 }, { "epoch": 0.5414822320587347, "grad_norm": 1.2952452250379791, "learning_rate": 0.0001, "loss": 0.77, "mean_abs_error": 1413.3102328792543, "mean_abs_error_last_10": 958.5296519025275, "mean_abs_error_last_25": 1052.1273172997164, "mean_abs_error_last_50": 1174.4903743548593, "mean_pred_prob": 0.017989534945081688, "mean_pred_prob_last_10": 0.09370083185895055, "mean_pred_prob_last_25": 0.049774672796775124, "mean_pred_prob_last_50": 0.03032325566964573, "mean_token_accuracy": 0.866401869058609, "step": 30460 }, { "epoch": 0.5416600003555366, "grad_norm": 1.1544408863898832, "learning_rate": 0.0001, "loss": 0.6684, "mean_abs_error": 441.8556003310085, "mean_abs_error_last_10": 126.72706962737884, "mean_abs_error_last_25": 192.09508224381986, "mean_abs_error_last_50": 253.67282009882132, "mean_pred_prob": 0.03862962755956687, "mean_pred_prob_last_10": 0.1925715979654342, "mean_pred_prob_last_25": 0.10425568668870255, "mean_pred_prob_last_50": 0.0647191203723196, "mean_token_accuracy": 0.8724156558513642, "step": 30470 }, { "epoch": 0.5418377686523386, "grad_norm": 2.4532894116950383, "learning_rate": 0.0001, "loss": 0.6674, "mean_abs_error": 394.4733555943888, "mean_abs_error_last_10": 233.36839728370347, "mean_abs_error_last_25": 285.47696749744875, "mean_abs_error_last_50": 251.27781269651808, "mean_pred_prob": 0.061105387739371506, "mean_pred_prob_last_10": 0.2655669407919049, "mean_pred_prob_last_25": 0.1557551616569981, "mean_pred_prob_last_50": 0.1000472960062325, "mean_token_accuracy": 0.8823635876178741, "step": 30480 }, { "epoch": 0.5420155369491405, "grad_norm": 1.1560492399889548, "learning_rate": 0.0001, "loss": 0.7167, "mean_abs_error": 107.85127738537497, "mean_abs_error_last_10": 29.337034311005304, "mean_abs_error_last_25": 42.10796893325976, "mean_abs_error_last_50": 68.17747565678015, "mean_pred_prob": 0.04923743363469839, "mean_pred_prob_last_10": 0.24482799582183362, "mean_pred_prob_last_25": 0.1379902048036456, "mean_pred_prob_last_50": 0.0832961618900299, "mean_token_accuracy": 0.885513174533844, "step": 30490 }, { "epoch": 0.5421933052459424, "grad_norm": 2.292624766387444, "learning_rate": 0.0001, "loss": 0.7049, "mean_abs_error": 514.4917791509085, "mean_abs_error_last_10": 105.47546236555836, "mean_abs_error_last_25": 146.28402973483338, "mean_abs_error_last_50": 238.42639975926264, "mean_pred_prob": 0.02828951507108286, "mean_pred_prob_last_10": 0.14479293012991548, "mean_pred_prob_last_25": 0.07687809949275107, "mean_pred_prob_last_50": 0.04679489203263074, "mean_token_accuracy": 0.8767470717430115, "step": 30500 }, { "epoch": 0.5423710735427444, "grad_norm": 1.6375892205632565, "learning_rate": 0.0001, "loss": 0.8193, "mean_abs_error": 859.5446582885494, "mean_abs_error_last_10": 423.18998800958315, "mean_abs_error_last_25": 477.5761317891396, "mean_abs_error_last_50": 642.8614374545181, "mean_pred_prob": 0.03772111375728855, "mean_pred_prob_last_10": 0.15808010355685836, "mean_pred_prob_last_25": 0.09328871015750337, "mean_pred_prob_last_50": 0.06043471330194734, "mean_token_accuracy": 0.8746559798717499, "step": 30510 }, { "epoch": 0.5425488418395463, "grad_norm": 1.2973825225446711, "learning_rate": 0.0001, "loss": 0.807, "mean_abs_error": 308.61003589655934, "mean_abs_error_last_10": 84.34532956113819, "mean_abs_error_last_25": 147.27980317197563, "mean_abs_error_last_50": 243.12453098258302, "mean_pred_prob": 0.04356665764935315, "mean_pred_prob_last_10": 0.2303142687305808, "mean_pred_prob_last_25": 0.12081394484266639, "mean_pred_prob_last_50": 0.07309716767631472, "mean_token_accuracy": 0.8626934885978699, "step": 30520 }, { "epoch": 0.5427266101363483, "grad_norm": 3.749320005546148, "learning_rate": 0.0001, "loss": 0.8383, "mean_abs_error": 656.1260868684261, "mean_abs_error_last_10": 149.9748915874431, "mean_abs_error_last_25": 224.9936846732059, "mean_abs_error_last_50": 377.51780990830514, "mean_pred_prob": 0.023024425935000183, "mean_pred_prob_last_10": 0.11582620656117797, "mean_pred_prob_last_25": 0.06275526415556669, "mean_pred_prob_last_50": 0.03845972018316388, "mean_token_accuracy": 0.8680748462677002, "step": 30530 }, { "epoch": 0.5429043784331502, "grad_norm": 1.337943637483132, "learning_rate": 0.0001, "loss": 0.7405, "mean_abs_error": 172.6037786827354, "mean_abs_error_last_10": 114.47732542919107, "mean_abs_error_last_25": 113.33512680667704, "mean_abs_error_last_50": 143.08389398579615, "mean_pred_prob": 0.05606080982834101, "mean_pred_prob_last_10": 0.27158462125808003, "mean_pred_prob_last_25": 0.14788788529112934, "mean_pred_prob_last_50": 0.09198561599478125, "mean_token_accuracy": 0.8801075279712677, "step": 30540 }, { "epoch": 0.5430821467299521, "grad_norm": 2.320938852721986, "learning_rate": 0.0001, "loss": 0.7677, "mean_abs_error": 792.3496146844072, "mean_abs_error_last_10": 475.04804574716565, "mean_abs_error_last_25": 517.9429682979224, "mean_abs_error_last_50": 578.8728509346676, "mean_pred_prob": 0.04539502017141785, "mean_pred_prob_last_10": 0.21763705605408176, "mean_pred_prob_last_25": 0.1231708359438926, "mean_pred_prob_last_50": 0.07633625509915873, "mean_token_accuracy": 0.8690402328968048, "step": 30550 }, { "epoch": 0.5432599150267541, "grad_norm": 2.516424895753085, "learning_rate": 0.0001, "loss": 0.8058, "mean_abs_error": 665.635007128518, "mean_abs_error_last_10": 239.93326301674506, "mean_abs_error_last_25": 284.2793344376216, "mean_abs_error_last_50": 436.01071901724737, "mean_pred_prob": 0.0335785863280762, "mean_pred_prob_last_10": 0.1673650617711246, "mean_pred_prob_last_25": 0.0939938960247673, "mean_pred_prob_last_50": 0.05632134224870242, "mean_token_accuracy": 0.8727368474006653, "step": 30560 }, { "epoch": 0.5434376833235561, "grad_norm": 1.0923110612034583, "learning_rate": 0.0001, "loss": 0.7198, "mean_abs_error": 416.44565054203105, "mean_abs_error_last_10": 82.88765615027489, "mean_abs_error_last_25": 188.65940610443477, "mean_abs_error_last_50": 351.23799284724726, "mean_pred_prob": 0.03645418919622898, "mean_pred_prob_last_10": 0.19643979873508216, "mean_pred_prob_last_25": 0.1080246651545167, "mean_pred_prob_last_50": 0.06349557675421239, "mean_token_accuracy": 0.881703782081604, "step": 30570 }, { "epoch": 0.5436154516203581, "grad_norm": 1.180200166535108, "learning_rate": 0.0001, "loss": 0.7305, "mean_abs_error": 1019.6129010502221, "mean_abs_error_last_10": 511.9933754288742, "mean_abs_error_last_25": 571.9843952394727, "mean_abs_error_last_50": 701.390684888674, "mean_pred_prob": 0.026123959952383303, "mean_pred_prob_last_10": 0.12100510780583136, "mean_pred_prob_last_25": 0.06790357023128309, "mean_pred_prob_last_50": 0.043036778646637686, "mean_token_accuracy": 0.866270101070404, "step": 30580 }, { "epoch": 0.54379321991716, "grad_norm": 1.5519245316961179, "learning_rate": 0.0001, "loss": 0.7197, "mean_abs_error": 403.3742490395471, "mean_abs_error_last_10": 108.23879845027459, "mean_abs_error_last_25": 167.15661323890515, "mean_abs_error_last_50": 206.72183674417846, "mean_pred_prob": 0.04143973807804287, "mean_pred_prob_last_10": 0.18453093972057105, "mean_pred_prob_last_25": 0.10950038405135273, "mean_pred_prob_last_50": 0.06837453246116638, "mean_token_accuracy": 0.8773913204669952, "step": 30590 }, { "epoch": 0.543970988213962, "grad_norm": 1.331411418630511, "learning_rate": 0.0001, "loss": 0.6627, "mean_abs_error": 342.44802833126516, "mean_abs_error_last_10": 45.95843118058432, "mean_abs_error_last_25": 112.1638480705644, "mean_abs_error_last_50": 219.96898701429927, "mean_pred_prob": 0.044627186562865975, "mean_pred_prob_last_10": 0.21357168070971966, "mean_pred_prob_last_25": 0.11603712067008018, "mean_pred_prob_last_50": 0.07312212204560638, "mean_token_accuracy": 0.8855270326137543, "step": 30600 }, { "epoch": 0.5441487565107639, "grad_norm": 1.3805116357924327, "learning_rate": 0.0001, "loss": 0.7358, "mean_abs_error": 186.31582527200422, "mean_abs_error_last_10": 42.78088992622095, "mean_abs_error_last_25": 66.33673006743092, "mean_abs_error_last_50": 109.30697034653747, "mean_pred_prob": 0.04640766298398376, "mean_pred_prob_last_10": 0.23838694728910922, "mean_pred_prob_last_25": 0.12971608210355043, "mean_pred_prob_last_50": 0.07790221553295851, "mean_token_accuracy": 0.8774003744125366, "step": 30610 }, { "epoch": 0.5443265248075658, "grad_norm": 2.1582868101316044, "learning_rate": 0.0001, "loss": 0.723, "mean_abs_error": 528.3019851490791, "mean_abs_error_last_10": 211.71077775207306, "mean_abs_error_last_25": 339.4547343056351, "mean_abs_error_last_50": 436.4296306034418, "mean_pred_prob": 0.03920675173867494, "mean_pred_prob_last_10": 0.1871375573799014, "mean_pred_prob_last_25": 0.1103276981972158, "mean_pred_prob_last_50": 0.06576589467003942, "mean_token_accuracy": 0.8816485702991486, "step": 30620 }, { "epoch": 0.5445042931043678, "grad_norm": 1.304169937180685, "learning_rate": 0.0001, "loss": 0.8538, "mean_abs_error": 2087.395005977265, "mean_abs_error_last_10": 1021.4025100701622, "mean_abs_error_last_25": 1198.8404568239512, "mean_abs_error_last_50": 1472.935545156555, "mean_pred_prob": 0.01390296155004762, "mean_pred_prob_last_10": 0.061484892992302774, "mean_pred_prob_last_25": 0.03494696088891942, "mean_pred_prob_last_50": 0.022156351161538624, "mean_token_accuracy": 0.8728156507015228, "step": 30630 }, { "epoch": 0.5446820614011697, "grad_norm": 1.4052647625609118, "learning_rate": 0.0001, "loss": 0.7152, "mean_abs_error": 1081.9289735612388, "mean_abs_error_last_10": 530.4047143598079, "mean_abs_error_last_25": 589.5067077480337, "mean_abs_error_last_50": 731.634880237921, "mean_pred_prob": 0.0384883593826089, "mean_pred_prob_last_10": 0.19807135078881402, "mean_pred_prob_last_25": 0.11107519404031337, "mean_pred_prob_last_50": 0.06733827671705513, "mean_token_accuracy": 0.8798171281814575, "step": 30640 }, { "epoch": 0.5448598296979716, "grad_norm": 1.506285074568579, "learning_rate": 0.0001, "loss": 0.7084, "mean_abs_error": 428.9204354146999, "mean_abs_error_last_10": 129.291953315739, "mean_abs_error_last_25": 207.71033014251134, "mean_abs_error_last_50": 292.4660709885121, "mean_pred_prob": 0.037354924925602974, "mean_pred_prob_last_10": 0.1673251798376441, "mean_pred_prob_last_25": 0.09966860990971327, "mean_pred_prob_last_50": 0.061899546068161726, "mean_token_accuracy": 0.8682030498981476, "step": 30650 }, { "epoch": 0.5450375979947736, "grad_norm": 1.0591515610028586, "learning_rate": 0.0001, "loss": 0.9761, "mean_abs_error": 680.1715312882201, "mean_abs_error_last_10": 288.37323174944635, "mean_abs_error_last_25": 361.83534164405654, "mean_abs_error_last_50": 415.32748203878384, "mean_pred_prob": 0.026739403349347414, "mean_pred_prob_last_10": 0.13498780155787243, "mean_pred_prob_last_25": 0.07495827047387138, "mean_pred_prob_last_50": 0.045687373448163274, "mean_token_accuracy": 0.8765649914741516, "step": 30660 }, { "epoch": 0.5452153662915755, "grad_norm": 2.091037294747395, "learning_rate": 0.0001, "loss": 0.6871, "mean_abs_error": 217.43587328233474, "mean_abs_error_last_10": 70.84047964605861, "mean_abs_error_last_25": 101.1675892278196, "mean_abs_error_last_50": 135.101724441434, "mean_pred_prob": 0.04738939837552607, "mean_pred_prob_last_10": 0.2052731357514858, "mean_pred_prob_last_25": 0.1225929407402873, "mean_pred_prob_last_50": 0.07827934119850397, "mean_token_accuracy": 0.876520311832428, "step": 30670 }, { "epoch": 0.5453931345883775, "grad_norm": 1.9305580420662038, "learning_rate": 0.0001, "loss": 0.7955, "mean_abs_error": 1014.8932632297067, "mean_abs_error_last_10": 550.5372686133933, "mean_abs_error_last_25": 598.9727906865812, "mean_abs_error_last_50": 714.2294363848617, "mean_pred_prob": 0.018541987059870736, "mean_pred_prob_last_10": 0.09980622507282533, "mean_pred_prob_last_25": 0.05028073691646569, "mean_pred_prob_last_50": 0.030487709026783705, "mean_token_accuracy": 0.8737285494804382, "step": 30680 }, { "epoch": 0.5455709028851795, "grad_norm": 1.2253774176940386, "learning_rate": 0.0001, "loss": 0.7741, "mean_abs_error": 1053.5822672531417, "mean_abs_error_last_10": 752.1912824014519, "mean_abs_error_last_25": 820.6950040499248, "mean_abs_error_last_50": 874.9335840039737, "mean_pred_prob": 0.04612286583724199, "mean_pred_prob_last_10": 0.2083040601137327, "mean_pred_prob_last_25": 0.12090274122310803, "mean_pred_prob_last_50": 0.07597867956355912, "mean_token_accuracy": 0.8755173087120056, "step": 30690 }, { "epoch": 0.5457486711819814, "grad_norm": 3.4006129817722353, "learning_rate": 0.0001, "loss": 0.8893, "mean_abs_error": 510.19948301589375, "mean_abs_error_last_10": 181.3726205713252, "mean_abs_error_last_25": 237.71636952848652, "mean_abs_error_last_50": 311.51566839536065, "mean_pred_prob": 0.040161425538826734, "mean_pred_prob_last_10": 0.1966891669027973, "mean_pred_prob_last_25": 0.11194098031264729, "mean_pred_prob_last_50": 0.06832994956639596, "mean_token_accuracy": 0.8710705220699311, "step": 30700 }, { "epoch": 0.5459264394787834, "grad_norm": 0.9945341320425259, "learning_rate": 0.0001, "loss": 0.7186, "mean_abs_error": 578.4199085018024, "mean_abs_error_last_10": 224.4271400145836, "mean_abs_error_last_25": 234.86133852410453, "mean_abs_error_last_50": 306.48179049390876, "mean_pred_prob": 0.028954455093480647, "mean_pred_prob_last_10": 0.15510722529143095, "mean_pred_prob_last_25": 0.08170366971753537, "mean_pred_prob_last_50": 0.04889077187981457, "mean_token_accuracy": 0.870596069097519, "step": 30710 }, { "epoch": 0.5461042077755853, "grad_norm": 2.00767497055479, "learning_rate": 0.0001, "loss": 0.7601, "mean_abs_error": 604.0476250798831, "mean_abs_error_last_10": 164.9718394158984, "mean_abs_error_last_25": 237.58070800702785, "mean_abs_error_last_50": 363.0065773283441, "mean_pred_prob": 0.039671251631807534, "mean_pred_prob_last_10": 0.17865289211040364, "mean_pred_prob_last_25": 0.10499895407119766, "mean_pred_prob_last_50": 0.06519497013650835, "mean_token_accuracy": 0.8761860847473144, "step": 30720 }, { "epoch": 0.5462819760723873, "grad_norm": 1.9317715706660592, "learning_rate": 0.0001, "loss": 0.8332, "mean_abs_error": 574.4873940176315, "mean_abs_error_last_10": 157.0029180262631, "mean_abs_error_last_25": 287.9108391965807, "mean_abs_error_last_50": 443.6854584914248, "mean_pred_prob": 0.03343678460805677, "mean_pred_prob_last_10": 0.16994439265690744, "mean_pred_prob_last_25": 0.0932928105816245, "mean_pred_prob_last_50": 0.05616853976389393, "mean_token_accuracy": 0.863953047990799, "step": 30730 }, { "epoch": 0.5464597443691892, "grad_norm": 1.823434113102533, "learning_rate": 0.0001, "loss": 0.7366, "mean_abs_error": 465.13202014595515, "mean_abs_error_last_10": 136.35587922305896, "mean_abs_error_last_25": 181.67268271517045, "mean_abs_error_last_50": 244.27230973945808, "mean_pred_prob": 0.047265159385278824, "mean_pred_prob_last_10": 0.22433040749747307, "mean_pred_prob_last_25": 0.1303972277441062, "mean_pred_prob_last_50": 0.08049252278869971, "mean_token_accuracy": 0.8752392888069153, "step": 30740 }, { "epoch": 0.5466375126659911, "grad_norm": 1.1309120596369029, "learning_rate": 0.0001, "loss": 0.7412, "mean_abs_error": 431.29061615616257, "mean_abs_error_last_10": 74.55523436394952, "mean_abs_error_last_25": 97.80975191235667, "mean_abs_error_last_50": 210.9092158801515, "mean_pred_prob": 0.054583324072882536, "mean_pred_prob_last_10": 0.2536057036370039, "mean_pred_prob_last_25": 0.1487705866806209, "mean_pred_prob_last_50": 0.09140806570649147, "mean_token_accuracy": 0.8732897996902466, "step": 30750 }, { "epoch": 0.5468152809627931, "grad_norm": 1.775780859765762, "learning_rate": 0.0001, "loss": 0.6275, "mean_abs_error": 127.34564824702468, "mean_abs_error_last_10": 32.43066825248145, "mean_abs_error_last_25": 43.01467988327353, "mean_abs_error_last_50": 82.9982190181581, "mean_pred_prob": 0.06621250072494149, "mean_pred_prob_last_10": 0.2901521731168032, "mean_pred_prob_last_25": 0.17125704688951374, "mean_pred_prob_last_50": 0.1088313028216362, "mean_token_accuracy": 0.8829164922237396, "step": 30760 }, { "epoch": 0.546993049259595, "grad_norm": 2.683861616627248, "learning_rate": 0.0001, "loss": 0.7575, "mean_abs_error": 701.7205306489857, "mean_abs_error_last_10": 156.83806922554996, "mean_abs_error_last_25": 217.80884777889224, "mean_abs_error_last_50": 357.02892123773626, "mean_pred_prob": 0.03152537203859538, "mean_pred_prob_last_10": 0.15271390427369624, "mean_pred_prob_last_25": 0.08598064223770052, "mean_pred_prob_last_50": 0.05365020213648677, "mean_token_accuracy": 0.8687226891517639, "step": 30770 }, { "epoch": 0.547170817556397, "grad_norm": 1.3624461409209274, "learning_rate": 0.0001, "loss": 0.7572, "mean_abs_error": 350.84482809546154, "mean_abs_error_last_10": 113.60383849268155, "mean_abs_error_last_25": 264.28219939873594, "mean_abs_error_last_50": 342.9620803813529, "mean_pred_prob": 0.03382169618271291, "mean_pred_prob_last_10": 0.17681796997785568, "mean_pred_prob_last_25": 0.09466254394501447, "mean_pred_prob_last_50": 0.05776397101581097, "mean_token_accuracy": 0.8682435989379883, "step": 30780 }, { "epoch": 0.5473485858531989, "grad_norm": 2.2490985661033007, "learning_rate": 0.0001, "loss": 0.6531, "mean_abs_error": 634.8568918106383, "mean_abs_error_last_10": 112.55143605484523, "mean_abs_error_last_25": 230.606038521865, "mean_abs_error_last_50": 362.9641400223571, "mean_pred_prob": 0.038736203033477065, "mean_pred_prob_last_10": 0.19645702473353593, "mean_pred_prob_last_25": 0.10850442355731502, "mean_pred_prob_last_50": 0.06627141422359273, "mean_token_accuracy": 0.890486741065979, "step": 30790 }, { "epoch": 0.5475263541500008, "grad_norm": 2.6885256735268164, "learning_rate": 0.0001, "loss": 0.7901, "mean_abs_error": 94.73068185087334, "mean_abs_error_last_10": 44.82252501079357, "mean_abs_error_last_25": 56.73347956595423, "mean_abs_error_last_50": 66.11740846426608, "mean_pred_prob": 0.055209831614047286, "mean_pred_prob_last_10": 0.2470542810857296, "mean_pred_prob_last_25": 0.14193647876381874, "mean_pred_prob_last_50": 0.09045191816985607, "mean_token_accuracy": 0.8686547875404358, "step": 30800 }, { "epoch": 0.5477041224468029, "grad_norm": 1.375337595509708, "learning_rate": 0.0001, "loss": 0.6642, "mean_abs_error": 1073.3539437750173, "mean_abs_error_last_10": 722.8203653560257, "mean_abs_error_last_25": 793.0748529347954, "mean_abs_error_last_50": 891.6543492032712, "mean_pred_prob": 0.03835561343148584, "mean_pred_prob_last_10": 0.1836611027669278, "mean_pred_prob_last_25": 0.10561805615143385, "mean_pred_prob_last_50": 0.06533778592856834, "mean_token_accuracy": 0.8808438658714295, "step": 30810 }, { "epoch": 0.5478818907436048, "grad_norm": 2.0013764762497597, "learning_rate": 0.0001, "loss": 0.8992, "mean_abs_error": 62.270925031616, "mean_abs_error_last_10": 14.674411884605437, "mean_abs_error_last_25": 27.566393457725063, "mean_abs_error_last_50": 36.26232581310734, "mean_pred_prob": 0.06189831178635359, "mean_pred_prob_last_10": 0.26943406015634536, "mean_pred_prob_last_25": 0.16017184183001518, "mean_pred_prob_last_50": 0.10202635452151299, "mean_token_accuracy": 0.8734724044799804, "step": 30820 }, { "epoch": 0.5480596590404068, "grad_norm": 1.516736526561528, "learning_rate": 0.0001, "loss": 0.7281, "mean_abs_error": 341.6502252711788, "mean_abs_error_last_10": 41.382111072164264, "mean_abs_error_last_25": 86.27395185194896, "mean_abs_error_last_50": 164.67051714071812, "mean_pred_prob": 0.05275946599431336, "mean_pred_prob_last_10": 0.2639431029558182, "mean_pred_prob_last_25": 0.14382746983319522, "mean_pred_prob_last_50": 0.08894869396463037, "mean_token_accuracy": 0.8675603568553925, "step": 30830 }, { "epoch": 0.5482374273372087, "grad_norm": 1.1928278249433193, "learning_rate": 0.0001, "loss": 0.7651, "mean_abs_error": 767.1067691921172, "mean_abs_error_last_10": 287.7500609300522, "mean_abs_error_last_25": 313.5845133991118, "mean_abs_error_last_50": 455.848651755192, "mean_pred_prob": 0.03130782170628663, "mean_pred_prob_last_10": 0.15784137726295738, "mean_pred_prob_last_25": 0.08719906784244813, "mean_pred_prob_last_50": 0.05268115713261068, "mean_token_accuracy": 0.879810881614685, "step": 30840 }, { "epoch": 0.5484151956340106, "grad_norm": 1.3002209708206753, "learning_rate": 0.0001, "loss": 0.7254, "mean_abs_error": 213.86540941033928, "mean_abs_error_last_10": 34.445673992242185, "mean_abs_error_last_25": 49.63969435186467, "mean_abs_error_last_50": 82.11090263921271, "mean_pred_prob": 0.047569905314594506, "mean_pred_prob_last_10": 0.22351247053593398, "mean_pred_prob_last_25": 0.1285318342037499, "mean_pred_prob_last_50": 0.07914433730766177, "mean_token_accuracy": 0.8789462506771087, "step": 30850 }, { "epoch": 0.5485929639308126, "grad_norm": 1.783249854108571, "learning_rate": 0.0001, "loss": 0.7217, "mean_abs_error": 859.2408306556179, "mean_abs_error_last_10": 467.0382369033341, "mean_abs_error_last_25": 562.7559540418404, "mean_abs_error_last_50": 666.465785315504, "mean_pred_prob": 0.03701620391802862, "mean_pred_prob_last_10": 0.18186459417047446, "mean_pred_prob_last_25": 0.10502743889810517, "mean_pred_prob_last_50": 0.06336627254204359, "mean_token_accuracy": 0.882403039932251, "step": 30860 }, { "epoch": 0.5487707322276145, "grad_norm": 2.152945258290862, "learning_rate": 0.0001, "loss": 0.7547, "mean_abs_error": 158.3027774149776, "mean_abs_error_last_10": 31.524193184060856, "mean_abs_error_last_25": 53.15664197467133, "mean_abs_error_last_50": 88.95119907129278, "mean_pred_prob": 0.05229120282456279, "mean_pred_prob_last_10": 0.26376389861106875, "mean_pred_prob_last_25": 0.14231912083923817, "mean_pred_prob_last_50": 0.08677899772301316, "mean_token_accuracy": 0.8680346310138702, "step": 30870 }, { "epoch": 0.5489485005244165, "grad_norm": 4.04977356081287, "learning_rate": 0.0001, "loss": 0.8947, "mean_abs_error": 601.2144117145236, "mean_abs_error_last_10": 330.791809252452, "mean_abs_error_last_25": 275.8205388040029, "mean_abs_error_last_50": 394.47953815516945, "mean_pred_prob": 0.030250084155704828, "mean_pred_prob_last_10": 0.16114879191154613, "mean_pred_prob_last_25": 0.08737371544120834, "mean_pred_prob_last_50": 0.05259591360809281, "mean_token_accuracy": 0.8630622923374176, "step": 30880 }, { "epoch": 0.5491262688212184, "grad_norm": 1.4709773433065694, "learning_rate": 0.0001, "loss": 0.7957, "mean_abs_error": 451.70738249578346, "mean_abs_error_last_10": 137.20138264281073, "mean_abs_error_last_25": 157.60056886198086, "mean_abs_error_last_50": 228.71271548747217, "mean_pred_prob": 0.027051551826298238, "mean_pred_prob_last_10": 0.13688416332006453, "mean_pred_prob_last_25": 0.07400521291419863, "mean_pred_prob_last_50": 0.045385025627911094, "mean_token_accuracy": 0.8696207046508789, "step": 30890 }, { "epoch": 0.5493040371180203, "grad_norm": 8.149741141717346, "learning_rate": 0.0001, "loss": 0.7896, "mean_abs_error": 437.3134990802861, "mean_abs_error_last_10": 107.30094547603025, "mean_abs_error_last_25": 171.58602814315947, "mean_abs_error_last_50": 279.37158271958623, "mean_pred_prob": 0.041529602190712465, "mean_pred_prob_last_10": 0.20982504251878709, "mean_pred_prob_last_25": 0.12245844807475806, "mean_pred_prob_last_50": 0.07296942120883614, "mean_token_accuracy": 0.8778264105319977, "step": 30900 }, { "epoch": 0.5494818054148223, "grad_norm": 1.015218300437544, "learning_rate": 0.0001, "loss": 0.8577, "mean_abs_error": 278.9037959297726, "mean_abs_error_last_10": 64.48912867402228, "mean_abs_error_last_25": 174.60449867963598, "mean_abs_error_last_50": 177.36237782661834, "mean_pred_prob": 0.04863839549943805, "mean_pred_prob_last_10": 0.22821597550064326, "mean_pred_prob_last_25": 0.1310864725150168, "mean_pred_prob_last_50": 0.08194255428388715, "mean_token_accuracy": 0.865077018737793, "step": 30910 }, { "epoch": 0.5496595737116242, "grad_norm": 1.3535555195127729, "learning_rate": 0.0001, "loss": 0.7713, "mean_abs_error": 257.3794777830643, "mean_abs_error_last_10": 77.07739509555506, "mean_abs_error_last_25": 144.85941220219496, "mean_abs_error_last_50": 174.40128357391796, "mean_pred_prob": 0.03075940813869238, "mean_pred_prob_last_10": 0.1531426254659891, "mean_pred_prob_last_25": 0.08379859570413828, "mean_pred_prob_last_50": 0.05132751166820526, "mean_token_accuracy": 0.8722059786319732, "step": 30920 }, { "epoch": 0.5498373420084263, "grad_norm": 1.1339322931241471, "learning_rate": 0.0001, "loss": 0.7807, "mean_abs_error": 707.888093186432, "mean_abs_error_last_10": 284.5690456243553, "mean_abs_error_last_25": 347.21714980523, "mean_abs_error_last_50": 447.69071570823945, "mean_pred_prob": 0.03942713318392634, "mean_pred_prob_last_10": 0.13870394565165042, "mean_pred_prob_last_25": 0.09415541063062846, "mean_pred_prob_last_50": 0.06159852161072195, "mean_token_accuracy": 0.8723829209804534, "step": 30930 }, { "epoch": 0.5500151103052282, "grad_norm": 2.237184142792162, "learning_rate": 0.0001, "loss": 0.8272, "mean_abs_error": 903.6893576301106, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 659.0056761004414, "mean_pred_prob": 0.0581236729194643, "mean_pred_prob_last_10": 0.1680227934732102, "mean_pred_prob_last_25": 0.11536584237182979, "mean_pred_prob_last_50": 0.08079502309556119, "mean_token_accuracy": 0.8632865786552429, "step": 30940 }, { "epoch": 0.5501928786020301, "grad_norm": 1.0452686705103904, "learning_rate": 0.0001, "loss": 0.7399, "mean_abs_error": 562.5990651571772, "mean_abs_error_last_10": 148.54035155362595, "mean_abs_error_last_25": 246.4765245850951, "mean_abs_error_last_50": 331.7429502306642, "mean_pred_prob": 0.0424765411473345, "mean_pred_prob_last_10": 0.1990491326781921, "mean_pred_prob_last_25": 0.11116017527529039, "mean_pred_prob_last_50": 0.06934837402077391, "mean_token_accuracy": 0.885675573348999, "step": 30950 }, { "epoch": 0.5503706468988321, "grad_norm": 1.150789655041771, "learning_rate": 0.0001, "loss": 0.745, "mean_abs_error": 979.2326142828458, "mean_abs_error_last_10": 517.2127058978652, "mean_abs_error_last_25": 590.5274378155652, "mean_abs_error_last_50": 744.9450571370385, "mean_pred_prob": 0.03289261523023015, "mean_pred_prob_last_10": 0.17452314861584456, "mean_pred_prob_last_25": 0.09564621958706994, "mean_pred_prob_last_50": 0.05654819625051459, "mean_token_accuracy": 0.8710103392601013, "step": 30960 }, { "epoch": 0.550548415195634, "grad_norm": 2.0698549050795036, "learning_rate": 0.0001, "loss": 0.737, "mean_abs_error": 276.0822856154492, "mean_abs_error_last_10": 132.59584070043522, "mean_abs_error_last_25": 127.71926759412285, "mean_abs_error_last_50": 158.55960056835352, "mean_pred_prob": 0.05195257943123579, "mean_pred_prob_last_10": 0.25573001969605685, "mean_pred_prob_last_25": 0.14288015123456715, "mean_pred_prob_last_50": 0.08797459863126278, "mean_token_accuracy": 0.8741533696651459, "step": 30970 }, { "epoch": 0.550726183492436, "grad_norm": 1.2302238514868393, "learning_rate": 0.0001, "loss": 0.6805, "mean_abs_error": 435.5149311259646, "mean_abs_error_last_10": 149.5194166835494, "mean_abs_error_last_25": 171.78580671971386, "mean_abs_error_last_50": 287.88191257683417, "mean_pred_prob": 0.03343603352550417, "mean_pred_prob_last_10": 0.17639070954173802, "mean_pred_prob_last_25": 0.09608749472536146, "mean_pred_prob_last_50": 0.05709022842347622, "mean_token_accuracy": 0.8709520041942597, "step": 30980 }, { "epoch": 0.5509039517892379, "grad_norm": 1.2272041051463956, "learning_rate": 0.0001, "loss": 0.6536, "mean_abs_error": 406.6640607816256, "mean_abs_error_last_10": 236.75437181132233, "mean_abs_error_last_25": 223.6913210469198, "mean_abs_error_last_50": 245.98302158591613, "mean_pred_prob": 0.03758422867394984, "mean_pred_prob_last_10": 0.17793565876781942, "mean_pred_prob_last_25": 0.09961352637037635, "mean_pred_prob_last_50": 0.0624986672308296, "mean_token_accuracy": 0.872587776184082, "step": 30990 }, { "epoch": 0.5510817200860398, "grad_norm": 1.1536018435811635, "learning_rate": 0.0001, "loss": 0.6305, "mean_abs_error": 129.18432889490757, "mean_abs_error_last_10": 36.292320155365644, "mean_abs_error_last_25": 61.733924005104505, "mean_abs_error_last_50": 79.81199892992339, "mean_pred_prob": 0.05364115564152598, "mean_pred_prob_last_10": 0.24944821521639823, "mean_pred_prob_last_25": 0.14689883701503276, "mean_pred_prob_last_50": 0.09069202989339828, "mean_token_accuracy": 0.8771138250827789, "step": 31000 }, { "epoch": 0.5512594883828418, "grad_norm": 1.8940837049735948, "learning_rate": 0.0001, "loss": 0.7505, "mean_abs_error": 1097.8076902605012, "mean_abs_error_last_10": 547.3024303976285, "mean_abs_error_last_25": 676.8202617532597, "mean_abs_error_last_50": 814.0867680600745, "mean_pred_prob": 0.03458693434658926, "mean_pred_prob_last_10": 0.18273290407378226, "mean_pred_prob_last_25": 0.10133376536832657, "mean_pred_prob_last_50": 0.059918569239380305, "mean_token_accuracy": 0.8756227433681488, "step": 31010 }, { "epoch": 0.5514372566796437, "grad_norm": 1.6306333518714817, "learning_rate": 0.0001, "loss": 0.7513, "mean_abs_error": 1247.3794236185283, "mean_abs_error_last_10": 731.9499735758825, "mean_abs_error_last_25": 813.4240242657756, "mean_abs_error_last_50": 941.2500122157423, "mean_pred_prob": 0.032154226279817524, "mean_pred_prob_last_10": 0.1741795702939271, "mean_pred_prob_last_25": 0.09264748718851479, "mean_pred_prob_last_50": 0.05430043275118805, "mean_token_accuracy": 0.8759924948215485, "step": 31020 }, { "epoch": 0.5516150249764457, "grad_norm": 1.4224194360485614, "learning_rate": 0.0001, "loss": 0.8594, "mean_abs_error": 796.4957281520876, "mean_abs_error_last_10": 183.27333196819131, "mean_abs_error_last_25": 277.08307183057184, "mean_abs_error_last_50": 426.60823269621886, "mean_pred_prob": 0.04746461670147255, "mean_pred_prob_last_10": 0.1999057954060845, "mean_pred_prob_last_25": 0.12532650298671796, "mean_pred_prob_last_50": 0.08224227172322571, "mean_token_accuracy": 0.871419894695282, "step": 31030 }, { "epoch": 0.5517927932732476, "grad_norm": 1.9843565163048593, "learning_rate": 0.0001, "loss": 0.8306, "mean_abs_error": 445.85325479668364, "mean_abs_error_last_10": 137.93326601954703, "mean_abs_error_last_25": 251.5394444100797, "mean_abs_error_last_50": 357.09837926198884, "mean_pred_prob": 0.03593876580707729, "mean_pred_prob_last_10": 0.18606728911399842, "mean_pred_prob_last_25": 0.10177356395870447, "mean_pred_prob_last_50": 0.06019300604239106, "mean_token_accuracy": 0.8757787108421325, "step": 31040 }, { "epoch": 0.5519705615700496, "grad_norm": 1.5634632312934682, "learning_rate": 0.0001, "loss": 0.8312, "mean_abs_error": 1010.664666340356, "mean_abs_error_last_10": 658.1378846094001, "mean_abs_error_last_25": 718.986048156966, "mean_abs_error_last_50": 738.2754305137689, "mean_pred_prob": 0.017878849350381643, "mean_pred_prob_last_10": 0.09793159375549294, "mean_pred_prob_last_25": 0.05068304458400234, "mean_pred_prob_last_50": 0.030421478551579638, "mean_token_accuracy": 0.8632364571094513, "step": 31050 }, { "epoch": 0.5521483298668516, "grad_norm": 1.0036471421144115, "learning_rate": 0.0001, "loss": 0.7669, "mean_abs_error": 310.9273396868808, "mean_abs_error_last_10": 67.21146301106768, "mean_abs_error_last_25": 96.32093846598679, "mean_abs_error_last_50": 146.59163752887835, "mean_pred_prob": 0.05240732349921018, "mean_pred_prob_last_10": 0.26688279677182436, "mean_pred_prob_last_25": 0.14623770350590348, "mean_pred_prob_last_50": 0.08867781474255025, "mean_token_accuracy": 0.8714130163192749, "step": 31060 }, { "epoch": 0.5523260981636535, "grad_norm": 1.1185098387347607, "learning_rate": 0.0001, "loss": 0.8175, "mean_abs_error": 480.4876825594799, "mean_abs_error_last_10": 367.41025893125766, "mean_abs_error_last_25": 363.4801763318708, "mean_abs_error_last_50": 368.5833340394519, "mean_pred_prob": 0.03719424023292959, "mean_pred_prob_last_10": 0.16937843505293132, "mean_pred_prob_last_25": 0.09977082186378539, "mean_pred_prob_last_50": 0.061424364987760785, "mean_token_accuracy": 0.8716633021831512, "step": 31070 }, { "epoch": 0.5525038664604555, "grad_norm": 3.0787720314319316, "learning_rate": 0.0001, "loss": 0.7993, "mean_abs_error": 227.95125282236222, "mean_abs_error_last_10": 73.2490426642885, "mean_abs_error_last_25": 134.80461966614172, "mean_abs_error_last_50": 177.0124242255453, "mean_pred_prob": 0.047362450323998925, "mean_pred_prob_last_10": 0.2517387829720974, "mean_pred_prob_last_25": 0.13236347455531358, "mean_pred_prob_last_50": 0.07832164755091071, "mean_token_accuracy": 0.8742690086364746, "step": 31080 }, { "epoch": 0.5526816347572574, "grad_norm": 1.6421716230892645, "learning_rate": 0.0001, "loss": 0.6861, "mean_abs_error": 652.4290100280148, "mean_abs_error_last_10": 128.56114578793174, "mean_abs_error_last_25": 164.0881572259303, "mean_abs_error_last_50": 329.5798813565819, "mean_pred_prob": 0.025615118828136475, "mean_pred_prob_last_10": 0.1324530249228701, "mean_pred_prob_last_25": 0.07270069235237316, "mean_pred_prob_last_50": 0.0442118230741471, "mean_token_accuracy": 0.8828414738178253, "step": 31090 }, { "epoch": 0.5528594030540593, "grad_norm": 2.232383166168116, "learning_rate": 0.0001, "loss": 0.9801, "mean_abs_error": 482.20292672477433, "mean_abs_error_last_10": 198.1793027961019, "mean_abs_error_last_25": 276.813426041139, "mean_abs_error_last_50": 378.7458763242628, "mean_pred_prob": 0.028431984642520548, "mean_pred_prob_last_10": 0.14917154256254433, "mean_pred_prob_last_25": 0.08204711079597474, "mean_pred_prob_last_50": 0.04812370091676712, "mean_token_accuracy": 0.8619608163833619, "step": 31100 }, { "epoch": 0.5530371713508613, "grad_norm": 1.9460084061486704, "learning_rate": 0.0001, "loss": 0.8877, "mean_abs_error": 517.0429186896488, "mean_abs_error_last_10": 219.64884642376393, "mean_abs_error_last_25": 243.56566041043334, "mean_abs_error_last_50": 330.88473766832124, "mean_pred_prob": 0.055707739092758855, "mean_pred_prob_last_10": 0.2611449373187497, "mean_pred_prob_last_25": 0.14710568350856193, "mean_pred_prob_last_50": 0.09207562938099727, "mean_token_accuracy": 0.8576910257339477, "step": 31110 }, { "epoch": 0.5532149396476632, "grad_norm": 2.3178395197949944, "learning_rate": 0.0001, "loss": 0.75, "mean_abs_error": 549.2740999894602, "mean_abs_error_last_10": 154.91540422021245, "mean_abs_error_last_25": 214.914402925703, "mean_abs_error_last_50": 296.6360976827352, "mean_pred_prob": 0.04690813955967314, "mean_pred_prob_last_10": 0.21081679075723514, "mean_pred_prob_last_25": 0.12257088182959705, "mean_pred_prob_last_50": 0.07745908923097886, "mean_token_accuracy": 0.8834276616573333, "step": 31120 }, { "epoch": 0.5533927079444652, "grad_norm": 1.3376511119532752, "learning_rate": 0.0001, "loss": 0.6578, "mean_abs_error": 426.42229622505175, "mean_abs_error_last_10": 87.240472528212, "mean_abs_error_last_25": 111.7175060393284, "mean_abs_error_last_50": 205.5992408665883, "mean_pred_prob": 0.0411840358749032, "mean_pred_prob_last_10": 0.1983098279684782, "mean_pred_prob_last_25": 0.11098965378478169, "mean_pred_prob_last_50": 0.06910172468051315, "mean_token_accuracy": 0.8760299265384675, "step": 31130 }, { "epoch": 0.5535704762412671, "grad_norm": 2.0696335633281007, "learning_rate": 0.0001, "loss": 0.7836, "mean_abs_error": 360.58394033554003, "mean_abs_error_last_10": 35.89636384724392, "mean_abs_error_last_25": 60.68645534492299, "mean_abs_error_last_50": 141.13510368779026, "mean_pred_prob": 0.047742689540609715, "mean_pred_prob_last_10": 0.2362165842205286, "mean_pred_prob_last_25": 0.13153586108237506, "mean_pred_prob_last_50": 0.08041811604052782, "mean_token_accuracy": 0.8813385307788849, "step": 31140 }, { "epoch": 0.553748244538069, "grad_norm": 1.7972594399961372, "learning_rate": 0.0001, "loss": 0.7796, "mean_abs_error": 430.5516160425017, "mean_abs_error_last_10": 190.40400457117607, "mean_abs_error_last_25": 206.82075396799593, "mean_abs_error_last_50": 299.1719974727269, "mean_pred_prob": 0.03306711697950959, "mean_pred_prob_last_10": 0.17155086756683885, "mean_pred_prob_last_25": 0.09422582641709595, "mean_pred_prob_last_50": 0.05470707931090146, "mean_token_accuracy": 0.8717517256736755, "step": 31150 }, { "epoch": 0.5539260128348711, "grad_norm": 3.191317729570609, "learning_rate": 0.0001, "loss": 0.7843, "mean_abs_error": 580.2559033320315, "mean_abs_error_last_10": 258.600800517313, "mean_abs_error_last_25": 233.98757555083048, "mean_abs_error_last_50": 351.81343576136317, "mean_pred_prob": 0.03316238124971278, "mean_pred_prob_last_10": 0.16077798911137506, "mean_pred_prob_last_25": 0.0866858376772143, "mean_pred_prob_last_50": 0.05429644618998282, "mean_token_accuracy": 0.8766614437103272, "step": 31160 }, { "epoch": 0.554103781131673, "grad_norm": 2.009629710372219, "learning_rate": 0.0001, "loss": 0.8377, "mean_abs_error": 1143.6263307928562, "mean_abs_error_last_10": 440.5246490914521, "mean_abs_error_last_25": 616.3843786282489, "mean_abs_error_last_50": 783.4118003354199, "mean_pred_prob": 0.03207799053925555, "mean_pred_prob_last_10": 0.15015410433406942, "mean_pred_prob_last_25": 0.08674474550061859, "mean_pred_prob_last_50": 0.0531109418196138, "mean_token_accuracy": 0.8700123071670532, "step": 31170 }, { "epoch": 0.554281549428475, "grad_norm": 2.267305804220606, "learning_rate": 0.0001, "loss": 0.7241, "mean_abs_error": 1316.9898637029416, "mean_abs_error_last_10": 621.6959023664286, "mean_abs_error_last_25": 712.3801081052775, "mean_abs_error_last_50": 907.3172642197773, "mean_pred_prob": 0.029925047235155943, "mean_pred_prob_last_10": 0.1615818257792853, "mean_pred_prob_last_25": 0.08639789519802435, "mean_pred_prob_last_50": 0.051304281697957775, "mean_token_accuracy": 0.8768088817596436, "step": 31180 }, { "epoch": 0.5544593177252769, "grad_norm": 1.8229083186530612, "learning_rate": 0.0001, "loss": 0.6993, "mean_abs_error": 237.45501635676678, "mean_abs_error_last_10": 87.77317294799587, "mean_abs_error_last_25": 150.31037311167645, "mean_abs_error_last_50": 161.60343688873394, "mean_pred_prob": 0.03991177123971283, "mean_pred_prob_last_10": 0.182315469160676, "mean_pred_prob_last_25": 0.1005513509735465, "mean_pred_prob_last_50": 0.06433263551443816, "mean_token_accuracy": 0.8727321743965148, "step": 31190 }, { "epoch": 0.5546370860220788, "grad_norm": 2.625767503421458, "learning_rate": 0.0001, "loss": 0.6929, "mean_abs_error": 697.5124247860044, "mean_abs_error_last_10": 356.3086837353264, "mean_abs_error_last_25": 369.28748680188465, "mean_abs_error_last_50": 454.58357647437333, "mean_pred_prob": 0.03757518086349591, "mean_pred_prob_last_10": 0.19271907600341365, "mean_pred_prob_last_25": 0.10795531710609793, "mean_pred_prob_last_50": 0.06454662883188575, "mean_token_accuracy": 0.8621998310089112, "step": 31200 }, { "epoch": 0.5548148543188808, "grad_norm": 1.2992114966641413, "learning_rate": 0.0001, "loss": 0.5522, "mean_abs_error": 275.1722864298766, "mean_abs_error_last_10": 145.34147460751703, "mean_abs_error_last_25": 161.71893774300375, "mean_abs_error_last_50": 220.76389151932418, "mean_pred_prob": 0.04075427339412272, "mean_pred_prob_last_10": 0.20258101485669613, "mean_pred_prob_last_25": 0.11606578445062041, "mean_pred_prob_last_50": 0.06997451521456241, "mean_token_accuracy": 0.8852911174297333, "step": 31210 }, { "epoch": 0.5549926226156827, "grad_norm": 1.0004827293254905, "learning_rate": 0.0001, "loss": 0.7741, "mean_abs_error": 169.1324181688504, "mean_abs_error_last_10": 48.19602458209962, "mean_abs_error_last_25": 69.00554611043835, "mean_abs_error_last_50": 92.03102684676443, "mean_pred_prob": 0.055151462694630025, "mean_pred_prob_last_10": 0.22779437936842442, "mean_pred_prob_last_25": 0.13412298252806068, "mean_pred_prob_last_50": 0.08901349026709796, "mean_token_accuracy": 0.8691562354564667, "step": 31220 }, { "epoch": 0.5551703909124847, "grad_norm": 2.429344813464442, "learning_rate": 0.0001, "loss": 0.8028, "mean_abs_error": 367.14129802258736, "mean_abs_error_last_10": 86.02395794411854, "mean_abs_error_last_25": 149.1826178926965, "mean_abs_error_last_50": 265.7807311668901, "mean_pred_prob": 0.027120286459103228, "mean_pred_prob_last_10": 0.1407946314662695, "mean_pred_prob_last_25": 0.07637666277587414, "mean_pred_prob_last_50": 0.04604491507634521, "mean_token_accuracy": 0.8810093462467193, "step": 31230 }, { "epoch": 0.5553481592092866, "grad_norm": 1.5832983954949031, "learning_rate": 0.0001, "loss": 0.6918, "mean_abs_error": 1314.5199176303145, "mean_abs_error_last_10": 639.2802462886391, "mean_abs_error_last_25": 722.8193874629386, "mean_abs_error_last_50": 948.2769182363797, "mean_pred_prob": 0.03979394618945662, "mean_pred_prob_last_10": 0.20256378139310982, "mean_pred_prob_last_25": 0.11550505829800387, "mean_pred_prob_last_50": 0.06803355472220574, "mean_token_accuracy": 0.8705329656600952, "step": 31240 }, { "epoch": 0.5555259275060885, "grad_norm": 1.4425982053709392, "learning_rate": 0.0001, "loss": 0.8364, "mean_abs_error": 605.5164937353536, "mean_abs_error_last_10": 109.77379263643661, "mean_abs_error_last_25": 155.4370650181762, "mean_abs_error_last_50": 272.06418547582524, "mean_pred_prob": 0.029023136221803726, "mean_pred_prob_last_10": 0.14953890666365624, "mean_pred_prob_last_25": 0.08159630033187568, "mean_pred_prob_last_50": 0.04923378871753812, "mean_token_accuracy": 0.8736388921737671, "step": 31250 }, { "epoch": 0.5557036958028905, "grad_norm": 1.6246385126195313, "learning_rate": 0.0001, "loss": 0.9099, "mean_abs_error": 484.09452578531045, "mean_abs_error_last_10": 142.56215963160355, "mean_abs_error_last_25": 184.8604712237871, "mean_abs_error_last_50": 308.1019786314253, "mean_pred_prob": 0.03184959664940834, "mean_pred_prob_last_10": 0.16250123566715047, "mean_pred_prob_last_25": 0.08928973044967278, "mean_pred_prob_last_50": 0.05413811300531961, "mean_token_accuracy": 0.864179116487503, "step": 31260 }, { "epoch": 0.5558814640996924, "grad_norm": 2.7707548528802315, "learning_rate": 0.0001, "loss": 0.7184, "mean_abs_error": 587.4402276176247, "mean_abs_error_last_10": 304.5322684893289, "mean_abs_error_last_25": 326.0935835229573, "mean_abs_error_last_50": 366.8783370346104, "mean_pred_prob": 0.02629703809507191, "mean_pred_prob_last_10": 0.1402939778752625, "mean_pred_prob_last_25": 0.0737647857517004, "mean_pred_prob_last_50": 0.04468116802163422, "mean_token_accuracy": 0.8718752920627594, "step": 31270 }, { "epoch": 0.5560592323964945, "grad_norm": 2.759053794682244, "learning_rate": 0.0001, "loss": 0.7887, "mean_abs_error": 970.3354390687584, "mean_abs_error_last_10": 310.7130235742934, "mean_abs_error_last_25": 399.4287762989852, "mean_abs_error_last_50": 595.3156839519942, "mean_pred_prob": 0.0303104111837456, "mean_pred_prob_last_10": 0.1685506286274176, "mean_pred_prob_last_25": 0.086881254252512, "mean_pred_prob_last_50": 0.051632786891423164, "mean_token_accuracy": 0.8745922029018403, "step": 31280 }, { "epoch": 0.5562370006932964, "grad_norm": 1.2718156241025327, "learning_rate": 0.0001, "loss": 0.6791, "mean_abs_error": 741.8172439233913, "mean_abs_error_last_10": 324.9084386636135, "mean_abs_error_last_25": 384.1788256428897, "mean_abs_error_last_50": 494.9963073096005, "mean_pred_prob": 0.03907594270422123, "mean_pred_prob_last_10": 0.20075591310160235, "mean_pred_prob_last_25": 0.1093584425514564, "mean_pred_prob_last_50": 0.06730030378093943, "mean_token_accuracy": 0.8785698413848877, "step": 31290 }, { "epoch": 0.5564147689900983, "grad_norm": 1.2221586410491667, "learning_rate": 0.0001, "loss": 0.7073, "mean_abs_error": 377.24304457271876, "mean_abs_error_last_10": 94.86673249677663, "mean_abs_error_last_25": 119.22021219509247, "mean_abs_error_last_50": 226.52726212480076, "mean_pred_prob": 0.05095634026220068, "mean_pred_prob_last_10": 0.2254870452452451, "mean_pred_prob_last_25": 0.13267414063448085, "mean_pred_prob_last_50": 0.08452533432282508, "mean_token_accuracy": 0.8698045134544372, "step": 31300 }, { "epoch": 0.5565925372869003, "grad_norm": 2.3890435194859587, "learning_rate": 0.0001, "loss": 0.7946, "mean_abs_error": 1105.8892620041956, "mean_abs_error_last_10": 670.929986939829, "mean_abs_error_last_25": 743.0412787860794, "mean_abs_error_last_50": 842.176219991418, "mean_pred_prob": 0.030418430686404464, "mean_pred_prob_last_10": 0.17853757079574278, "mean_pred_prob_last_25": 0.09171942505054176, "mean_pred_prob_last_50": 0.05297816886595683, "mean_token_accuracy": 0.872698825597763, "step": 31310 }, { "epoch": 0.5567703055837022, "grad_norm": 1.8957830096735733, "learning_rate": 0.0001, "loss": 0.7398, "mean_abs_error": 146.74409225968355, "mean_abs_error_last_10": 38.63404676254986, "mean_abs_error_last_25": 47.81331875872041, "mean_abs_error_last_50": 78.88874235042091, "mean_pred_prob": 0.05014152717776597, "mean_pred_prob_last_10": 0.2175894036889076, "mean_pred_prob_last_25": 0.12842655833810568, "mean_pred_prob_last_50": 0.08179147765040398, "mean_token_accuracy": 0.8584957957267761, "step": 31320 }, { "epoch": 0.5569480738805042, "grad_norm": 1.6152515383587935, "learning_rate": 0.0001, "loss": 0.7854, "mean_abs_error": 512.2372784115377, "mean_abs_error_last_10": 102.94052486973729, "mean_abs_error_last_25": 199.2481075511401, "mean_abs_error_last_50": 279.7669822415729, "mean_pred_prob": 0.02988335774280131, "mean_pred_prob_last_10": 0.1635555699467659, "mean_pred_prob_last_25": 0.08619017228484153, "mean_pred_prob_last_50": 0.05097471391782164, "mean_token_accuracy": 0.8792542338371276, "step": 31330 }, { "epoch": 0.5571258421773061, "grad_norm": 1.0432815731524099, "learning_rate": 0.0001, "loss": 0.6216, "mean_abs_error": 77.3287214907539, "mean_abs_error_last_10": 15.185344195266065, "mean_abs_error_last_25": 30.573997296148786, "mean_abs_error_last_50": 48.57708865339248, "mean_pred_prob": 0.05783800445497036, "mean_pred_prob_last_10": 0.2825986683368683, "mean_pred_prob_last_25": 0.16122098602354526, "mean_pred_prob_last_50": 0.09793308898806571, "mean_token_accuracy": 0.877196478843689, "step": 31340 }, { "epoch": 0.557303610474108, "grad_norm": 1.5359454237665118, "learning_rate": 0.0001, "loss": 0.7738, "mean_abs_error": 330.73001850237114, "mean_abs_error_last_10": 247.6393124984185, "mean_abs_error_last_25": 226.75664135064972, "mean_abs_error_last_50": 232.4138322621173, "mean_pred_prob": 0.029681119800079613, "mean_pred_prob_last_10": 0.13893345430260523, "mean_pred_prob_last_25": 0.07606732279527932, "mean_pred_prob_last_50": 0.04887598627246916, "mean_token_accuracy": 0.8783431351184845, "step": 31350 }, { "epoch": 0.55748137877091, "grad_norm": 1.4625212921142625, "learning_rate": 0.0001, "loss": 0.6796, "mean_abs_error": 223.87039252987591, "mean_abs_error_last_10": 49.599295674087685, "mean_abs_error_last_25": 103.0210373905066, "mean_abs_error_last_50": 135.19043820178828, "mean_pred_prob": 0.04440346583724022, "mean_pred_prob_last_10": 0.22387958317995071, "mean_pred_prob_last_25": 0.12179591711610556, "mean_pred_prob_last_50": 0.07456496264785528, "mean_token_accuracy": 0.8687199413776397, "step": 31360 }, { "epoch": 0.5576591470677119, "grad_norm": 1.8900539868718207, "learning_rate": 0.0001, "loss": 0.8146, "mean_abs_error": 187.2218740420929, "mean_abs_error_last_10": 40.4976018520707, "mean_abs_error_last_25": 63.5527795753848, "mean_abs_error_last_50": 120.60969987317799, "mean_pred_prob": 0.03183020791038871, "mean_pred_prob_last_10": 0.15498869344592095, "mean_pred_prob_last_25": 0.0851178865879774, "mean_pred_prob_last_50": 0.0528815608471632, "mean_token_accuracy": 0.875061982870102, "step": 31370 }, { "epoch": 0.5578369153645139, "grad_norm": 0.8778823723842784, "learning_rate": 0.0001, "loss": 0.7668, "mean_abs_error": 885.9432009756217, "mean_abs_error_last_10": 546.5117112207521, "mean_abs_error_last_25": 650.0492753205388, "mean_abs_error_last_50": 720.3713283707373, "mean_pred_prob": 0.04044699328078423, "mean_pred_prob_last_10": 0.19946337492146995, "mean_pred_prob_last_25": 0.11196285379410256, "mean_pred_prob_last_50": 0.06842414439452113, "mean_token_accuracy": 0.8694418966770172, "step": 31380 }, { "epoch": 0.5580146836613158, "grad_norm": 1.1801661946289899, "learning_rate": 0.0001, "loss": 0.8152, "mean_abs_error": 1282.9635378477867, "mean_abs_error_last_10": 718.9189960453905, "mean_abs_error_last_25": 1033.6841099486653, "mean_abs_error_last_50": 1068.3494570637904, "mean_pred_prob": 0.020339541626162826, "mean_pred_prob_last_10": 0.10599215341790114, "mean_pred_prob_last_25": 0.05723228164133616, "mean_pred_prob_last_50": 0.03449765002878848, "mean_token_accuracy": 0.8674045085906983, "step": 31390 }, { "epoch": 0.5581924519581178, "grad_norm": 1.5835905353033486, "learning_rate": 0.0001, "loss": 0.8777, "mean_abs_error": 466.19384325891497, "mean_abs_error_last_10": 130.60894473230408, "mean_abs_error_last_25": 181.55127424055132, "mean_abs_error_last_50": 281.41313062999285, "mean_pred_prob": 0.02809662141953595, "mean_pred_prob_last_10": 0.14428973657777533, "mean_pred_prob_last_25": 0.0789095870568417, "mean_pred_prob_last_50": 0.04702743528760038, "mean_token_accuracy": 0.8765154719352722, "step": 31400 }, { "epoch": 0.5583702202549198, "grad_norm": 1.6467865329756155, "learning_rate": 0.0001, "loss": 0.7377, "mean_abs_error": 844.2570205826809, "mean_abs_error_last_10": 421.2884559841158, "mean_abs_error_last_25": 512.2795229366462, "mean_abs_error_last_50": 586.7684417470424, "mean_pred_prob": 0.03639034431835171, "mean_pred_prob_last_10": 0.14924350071814843, "mean_pred_prob_last_25": 0.09046726372325793, "mean_pred_prob_last_50": 0.05754510588012636, "mean_token_accuracy": 0.8742930114269256, "step": 31410 }, { "epoch": 0.5585479885517217, "grad_norm": 1.4917756922084644, "learning_rate": 0.0001, "loss": 0.8563, "mean_abs_error": 292.60985979721306, "mean_abs_error_last_10": 100.44188781815087, "mean_abs_error_last_25": 112.42836788435966, "mean_abs_error_last_50": 136.271144586707, "mean_pred_prob": 0.03999506530817598, "mean_pred_prob_last_10": 0.20687538012862206, "mean_pred_prob_last_25": 0.11362891141325235, "mean_pred_prob_last_50": 0.06796030979603529, "mean_token_accuracy": 0.8759986877441406, "step": 31420 }, { "epoch": 0.5587257568485237, "grad_norm": 1.5080515638344674, "learning_rate": 0.0001, "loss": 0.6896, "mean_abs_error": 303.7885247644423, "mean_abs_error_last_10": 115.79491855092697, "mean_abs_error_last_25": 127.98985837698126, "mean_abs_error_last_50": 194.06832395625165, "mean_pred_prob": 0.04096902837045491, "mean_pred_prob_last_10": 0.2055858217179775, "mean_pred_prob_last_25": 0.10987791232764721, "mean_pred_prob_last_50": 0.06819864017888903, "mean_token_accuracy": 0.8701709508895874, "step": 31430 }, { "epoch": 0.5589035251453256, "grad_norm": 2.077918755136614, "learning_rate": 0.0001, "loss": 0.752, "mean_abs_error": 663.6163159605932, "mean_abs_error_last_10": 276.2043391039047, "mean_abs_error_last_25": 280.49098281690164, "mean_abs_error_last_50": 363.65747240653894, "mean_pred_prob": 0.038005337526556104, "mean_pred_prob_last_10": 0.18360481783747673, "mean_pred_prob_last_25": 0.10238074644003063, "mean_pred_prob_last_50": 0.06346363474149257, "mean_token_accuracy": 0.8717457056045532, "step": 31440 }, { "epoch": 0.5590812934421275, "grad_norm": 1.948030583085992, "learning_rate": 0.0001, "loss": 0.7812, "mean_abs_error": 283.53659017648084, "mean_abs_error_last_10": 198.40611251661966, "mean_abs_error_last_25": 162.13637148600554, "mean_abs_error_last_50": 162.57280113804978, "mean_pred_prob": 0.0478618782479316, "mean_pred_prob_last_10": 0.22629972472786902, "mean_pred_prob_last_25": 0.12894386877305805, "mean_pred_prob_last_50": 0.07983991159126162, "mean_token_accuracy": 0.8664554834365845, "step": 31450 }, { "epoch": 0.5592590617389295, "grad_norm": 1.8061923610044883, "learning_rate": 0.0001, "loss": 0.749, "mean_abs_error": 640.428322522732, "mean_abs_error_last_10": 302.00410007722917, "mean_abs_error_last_25": 347.6545730836648, "mean_abs_error_last_50": 430.1194029419441, "mean_pred_prob": 0.03280974139051977, "mean_pred_prob_last_10": 0.15928427256876604, "mean_pred_prob_last_25": 0.08974545750534162, "mean_pred_prob_last_50": 0.055276307847816494, "mean_token_accuracy": 0.8693052113056183, "step": 31460 }, { "epoch": 0.5594368300357314, "grad_norm": 2.051749837645256, "learning_rate": 0.0001, "loss": 0.7124, "mean_abs_error": 283.5478227230418, "mean_abs_error_last_10": 70.69367721633417, "mean_abs_error_last_25": 93.46761828127795, "mean_abs_error_last_50": 134.50380659743664, "mean_pred_prob": 0.043420321750454606, "mean_pred_prob_last_10": 0.20537084373645484, "mean_pred_prob_last_25": 0.11557641273830085, "mean_pred_prob_last_50": 0.07143676056293771, "mean_token_accuracy": 0.8741164863109588, "step": 31470 }, { "epoch": 0.5596145983325334, "grad_norm": 1.4493452387815027, "learning_rate": 0.0001, "loss": 0.686, "mean_abs_error": 491.7008531601672, "mean_abs_error_last_10": 69.21998671490245, "mean_abs_error_last_25": 141.65355565515944, "mean_abs_error_last_50": 253.87672124885995, "mean_pred_prob": 0.032311357790604234, "mean_pred_prob_last_10": 0.1574196085333824, "mean_pred_prob_last_25": 0.08660287745296955, "mean_pred_prob_last_50": 0.05331817651167512, "mean_token_accuracy": 0.8832844197750092, "step": 31480 }, { "epoch": 0.5597923666293353, "grad_norm": 0.9620506572522483, "learning_rate": 0.0001, "loss": 0.648, "mean_abs_error": 901.0707026189435, "mean_abs_error_last_10": 426.72137003579, "mean_abs_error_last_25": 529.2365896698946, "mean_abs_error_last_50": 660.3469282054363, "mean_pred_prob": 0.038568477843364236, "mean_pred_prob_last_10": 0.18237522999115754, "mean_pred_prob_last_25": 0.10596903036057484, "mean_pred_prob_last_50": 0.06446764682186767, "mean_token_accuracy": 0.8813917934894562, "step": 31490 }, { "epoch": 0.5599701349261372, "grad_norm": 1.298883064019744, "learning_rate": 0.0001, "loss": 0.7754, "mean_abs_error": 350.7348521551609, "mean_abs_error_last_10": 61.079035929919506, "mean_abs_error_last_25": 117.43647022413461, "mean_abs_error_last_50": 184.457363450579, "mean_pred_prob": 0.03489886065945029, "mean_pred_prob_last_10": 0.19457921162247657, "mean_pred_prob_last_25": 0.10204883515834809, "mean_pred_prob_last_50": 0.06050722133368254, "mean_token_accuracy": 0.8794383227825164, "step": 31500 }, { "epoch": 0.5601479032229392, "grad_norm": 1.3134065912684578, "learning_rate": 0.0001, "loss": 0.6543, "mean_abs_error": 129.20774611906927, "mean_abs_error_last_10": 41.5526899914491, "mean_abs_error_last_25": 51.49254998309366, "mean_abs_error_last_50": 77.85143224744782, "mean_pred_prob": 0.05767970443703234, "mean_pred_prob_last_10": 0.2586538437753916, "mean_pred_prob_last_25": 0.14968306869268416, "mean_pred_prob_last_50": 0.09470507455989718, "mean_token_accuracy": 0.8727178752422333, "step": 31510 }, { "epoch": 0.5603256715197412, "grad_norm": 1.4187011569799055, "learning_rate": 0.0001, "loss": 0.72, "mean_abs_error": 550.1511540131183, "mean_abs_error_last_10": 165.61911693468912, "mean_abs_error_last_25": 196.31013575355198, "mean_abs_error_last_50": 260.7625940474576, "mean_pred_prob": 0.03643903917400167, "mean_pred_prob_last_10": 0.18005141781177372, "mean_pred_prob_last_25": 0.10041339248418808, "mean_pred_prob_last_50": 0.06093178564915434, "mean_token_accuracy": 0.8648828268051147, "step": 31520 }, { "epoch": 0.5605034398165432, "grad_norm": 2.127998878384511, "learning_rate": 0.0001, "loss": 0.7461, "mean_abs_error": 557.117165211477, "mean_abs_error_last_10": 296.081666026992, "mean_abs_error_last_25": 332.32110272215857, "mean_abs_error_last_50": 355.85140998287113, "mean_pred_prob": 0.030539226601831615, "mean_pred_prob_last_10": 0.14680768456310034, "mean_pred_prob_last_25": 0.08333997102454305, "mean_pred_prob_last_50": 0.05169645771384239, "mean_token_accuracy": 0.8737479746341705, "step": 31530 }, { "epoch": 0.5606812081133451, "grad_norm": 1.755464445824011, "learning_rate": 0.0001, "loss": 0.7692, "mean_abs_error": 738.7899946991204, "mean_abs_error_last_10": 371.4733760982473, "mean_abs_error_last_25": 427.11597074583943, "mean_abs_error_last_50": 503.3622925432286, "mean_pred_prob": 0.03324086146312766, "mean_pred_prob_last_10": 0.16151296261814424, "mean_pred_prob_last_25": 0.09026826572953724, "mean_pred_prob_last_50": 0.05505041942815296, "mean_token_accuracy": 0.8649769306182862, "step": 31540 }, { "epoch": 0.560858976410147, "grad_norm": 1.858925240039075, "learning_rate": 0.0001, "loss": 0.7694, "mean_abs_error": 235.5997639789611, "mean_abs_error_last_10": 67.50447725394258, "mean_abs_error_last_25": 77.72443092651213, "mean_abs_error_last_50": 119.44639722656723, "mean_pred_prob": 0.06023454810492694, "mean_pred_prob_last_10": 0.24182129576802253, "mean_pred_prob_last_25": 0.1507791569456458, "mean_pred_prob_last_50": 0.09559719553217291, "mean_token_accuracy": 0.8663119792938232, "step": 31550 }, { "epoch": 0.561036744706949, "grad_norm": 2.4158335424443695, "learning_rate": 0.0001, "loss": 0.9576, "mean_abs_error": 212.4079601807663, "mean_abs_error_last_10": 101.71460947148361, "mean_abs_error_last_25": 80.0271643487603, "mean_abs_error_last_50": 135.2756182032135, "mean_pred_prob": 0.037048089946620164, "mean_pred_prob_last_10": 0.18448572494089605, "mean_pred_prob_last_25": 0.10288634970784187, "mean_pred_prob_last_50": 0.06259288457222283, "mean_token_accuracy": 0.8819522023200989, "step": 31560 }, { "epoch": 0.5612145130037509, "grad_norm": 1.4282430392943792, "learning_rate": 0.0001, "loss": 0.7188, "mean_abs_error": 274.6018588731477, "mean_abs_error_last_10": 106.91404849911677, "mean_abs_error_last_25": 142.37992563648226, "mean_abs_error_last_50": 182.01059178664332, "mean_pred_prob": 0.0344550343696028, "mean_pred_prob_last_10": 0.15732339695096015, "mean_pred_prob_last_25": 0.08684803731739521, "mean_pred_prob_last_50": 0.055393666867166756, "mean_token_accuracy": 0.876551377773285, "step": 31570 }, { "epoch": 0.5613922813005529, "grad_norm": 1.7284428715309346, "learning_rate": 0.0001, "loss": 0.815, "mean_abs_error": 459.39601008685156, "mean_abs_error_last_10": 154.33149056493357, "mean_abs_error_last_25": 168.74485758351426, "mean_abs_error_last_50": 261.51436146777036, "mean_pred_prob": 0.035669119883095846, "mean_pred_prob_last_10": 0.1833120884373784, "mean_pred_prob_last_25": 0.10216651088558137, "mean_pred_prob_last_50": 0.06070184063864872, "mean_token_accuracy": 0.8712482273578643, "step": 31580 }, { "epoch": 0.5615700495973548, "grad_norm": 1.9682315335204073, "learning_rate": 0.0001, "loss": 0.7476, "mean_abs_error": 730.0707228551493, "mean_abs_error_last_10": 168.66969718840667, "mean_abs_error_last_25": 283.79325381274094, "mean_abs_error_last_50": 393.95380505098296, "mean_pred_prob": 0.03784408589999657, "mean_pred_prob_last_10": 0.17949140643468126, "mean_pred_prob_last_25": 0.10406793286092579, "mean_pred_prob_last_50": 0.06425063082715496, "mean_token_accuracy": 0.8728355526924133, "step": 31590 }, { "epoch": 0.5617478178941567, "grad_norm": 1.4060593130183174, "learning_rate": 0.0001, "loss": 0.6743, "mean_abs_error": 192.0063752702289, "mean_abs_error_last_10": 122.1653719465284, "mean_abs_error_last_25": 110.82828899845008, "mean_abs_error_last_50": 123.25080971929711, "mean_pred_prob": 0.04805947681888938, "mean_pred_prob_last_10": 0.2338104672729969, "mean_pred_prob_last_25": 0.12942458279430866, "mean_pred_prob_last_50": 0.08058459702879191, "mean_token_accuracy": 0.8870393335819244, "step": 31600 }, { "epoch": 0.5619255861909587, "grad_norm": 2.4374779210424533, "learning_rate": 0.0001, "loss": 0.8099, "mean_abs_error": 836.590897977401, "mean_abs_error_last_10": 222.58264544514768, "mean_abs_error_last_25": 367.05980104325397, "mean_abs_error_last_50": 564.4775706220347, "mean_pred_prob": 0.014505153649952262, "mean_pred_prob_last_10": 0.07121353537077084, "mean_pred_prob_last_25": 0.037639191246125844, "mean_pred_prob_last_50": 0.02363228499307297, "mean_token_accuracy": 0.8652683556079864, "step": 31610 }, { "epoch": 0.5621033544877606, "grad_norm": 1.2421142854701466, "learning_rate": 0.0001, "loss": 0.6237, "mean_abs_error": 313.3046439196122, "mean_abs_error_last_10": 118.69942037960985, "mean_abs_error_last_25": 138.63223929942353, "mean_abs_error_last_50": 225.31563574284255, "mean_pred_prob": 0.04999561733566225, "mean_pred_prob_last_10": 0.24136226214468479, "mean_pred_prob_last_25": 0.13818410150706767, "mean_pred_prob_last_50": 0.0841306938789785, "mean_token_accuracy": 0.8827644765377045, "step": 31620 }, { "epoch": 0.5622811227845625, "grad_norm": 1.1242877930009578, "learning_rate": 0.0001, "loss": 1.0153, "mean_abs_error": 1527.2058648802306, "mean_abs_error_last_10": 599.5778201960696, "mean_abs_error_last_25": 704.6112999758708, "mean_abs_error_last_50": 945.0996863992289, "mean_pred_prob": 0.024393050906655844, "mean_pred_prob_last_10": 0.12100385005614953, "mean_pred_prob_last_25": 0.06904437469202093, "mean_pred_prob_last_50": 0.04174491723970277, "mean_token_accuracy": 0.8676275372505188, "step": 31630 }, { "epoch": 0.5624588910813646, "grad_norm": 0.9184411215987224, "learning_rate": 0.0001, "loss": 0.6983, "mean_abs_error": 486.27465668471507, "mean_abs_error_last_10": 158.51985855519337, "mean_abs_error_last_25": 237.22744869334656, "mean_abs_error_last_50": 335.6940200263113, "mean_pred_prob": 0.036846823222003874, "mean_pred_prob_last_10": 0.18831495763733982, "mean_pred_prob_last_25": 0.10139617514796555, "mean_pred_prob_last_50": 0.061319833947345616, "mean_token_accuracy": 0.8589888274669647, "step": 31640 }, { "epoch": 0.5626366593781665, "grad_norm": 2.277123904515968, "learning_rate": 0.0001, "loss": 0.7699, "mean_abs_error": 395.5361287440729, "mean_abs_error_last_10": 221.53624260349858, "mean_abs_error_last_25": 273.7165837314879, "mean_abs_error_last_50": 308.3134392331016, "mean_pred_prob": 0.041479041101410985, "mean_pred_prob_last_10": 0.1748600346967578, "mean_pred_prob_last_25": 0.10746766533702612, "mean_pred_prob_last_50": 0.06794310552068054, "mean_token_accuracy": 0.8781428039073944, "step": 31650 }, { "epoch": 0.5628144276749685, "grad_norm": 1.2844066696720724, "learning_rate": 0.0001, "loss": 0.7018, "mean_abs_error": 755.9692758884439, "mean_abs_error_last_10": 340.27387373475347, "mean_abs_error_last_25": 347.4707924753105, "mean_abs_error_last_50": 513.9402342454576, "mean_pred_prob": 0.022739307663869113, "mean_pred_prob_last_10": 0.11508777053095401, "mean_pred_prob_last_25": 0.0638139582471922, "mean_pred_prob_last_50": 0.038553753029555084, "mean_token_accuracy": 0.8622909426689148, "step": 31660 }, { "epoch": 0.5629921959717704, "grad_norm": 1.221966360443029, "learning_rate": 0.0001, "loss": 0.7048, "mean_abs_error": 321.70458213417174, "mean_abs_error_last_10": 40.329669257107795, "mean_abs_error_last_25": 76.13307486877099, "mean_abs_error_last_50": 168.08128156662593, "mean_pred_prob": 0.046036084461957213, "mean_pred_prob_last_10": 0.22848839815706015, "mean_pred_prob_last_25": 0.12529764706268906, "mean_pred_prob_last_50": 0.07655931380577385, "mean_token_accuracy": 0.8708621740341187, "step": 31670 }, { "epoch": 0.5631699642685724, "grad_norm": 1.4630526770223429, "learning_rate": 0.0001, "loss": 0.7731, "mean_abs_error": 827.831727014235, "mean_abs_error_last_10": 527.5300117329198, "mean_abs_error_last_25": 535.550572027644, "mean_abs_error_last_50": 623.8863522532174, "mean_pred_prob": 0.02657305358152371, "mean_pred_prob_last_10": 0.139473121901392, "mean_pred_prob_last_25": 0.07497878876747563, "mean_pred_prob_last_50": 0.04505519687081687, "mean_token_accuracy": 0.8743894279003144, "step": 31680 }, { "epoch": 0.5633477325653743, "grad_norm": 1.1061269380088017, "learning_rate": 0.0001, "loss": 0.8144, "mean_abs_error": 1134.0286678283924, "mean_abs_error_last_10": 543.0181593870195, "mean_abs_error_last_25": 605.3050286435057, "mean_abs_error_last_50": 831.9067312372639, "mean_pred_prob": 0.019569379321183078, "mean_pred_prob_last_10": 0.09717015327478293, "mean_pred_prob_last_25": 0.05339520812267438, "mean_pred_prob_last_50": 0.03273706928885076, "mean_token_accuracy": 0.8666198134422303, "step": 31690 }, { "epoch": 0.5635255008621762, "grad_norm": 1.5998105571349586, "learning_rate": 0.0001, "loss": 0.8541, "mean_abs_error": 371.6705257855839, "mean_abs_error_last_10": 212.20914305866413, "mean_abs_error_last_25": 231.768263546956, "mean_abs_error_last_50": 249.12435948306248, "mean_pred_prob": 0.05121006685076281, "mean_pred_prob_last_10": 0.2320341597078368, "mean_pred_prob_last_25": 0.13467722955392675, "mean_pred_prob_last_50": 0.08441022319020704, "mean_token_accuracy": 0.8711960256099701, "step": 31700 }, { "epoch": 0.5637032691589782, "grad_norm": 1.5203852076299966, "learning_rate": 0.0001, "loss": 0.7708, "mean_abs_error": 235.81035184712465, "mean_abs_error_last_10": 91.45216812759074, "mean_abs_error_last_25": 120.88092235272227, "mean_abs_error_last_50": 155.4690987365728, "mean_pred_prob": 0.030434251157566906, "mean_pred_prob_last_10": 0.14803242087364196, "mean_pred_prob_last_25": 0.08079503066837787, "mean_pred_prob_last_50": 0.05045259920880198, "mean_token_accuracy": 0.876561313867569, "step": 31710 }, { "epoch": 0.5638810374557801, "grad_norm": 1.2074415462739954, "learning_rate": 0.0001, "loss": 0.8235, "mean_abs_error": 122.70601733928115, "mean_abs_error_last_10": 16.418441523087186, "mean_abs_error_last_25": 30.827549969318436, "mean_abs_error_last_50": 54.91267625728104, "mean_pred_prob": 0.058909548446536064, "mean_pred_prob_last_10": 0.28265659846365454, "mean_pred_prob_last_25": 0.16166497189551593, "mean_pred_prob_last_50": 0.09950197860598564, "mean_token_accuracy": 0.8711443722248078, "step": 31720 }, { "epoch": 0.564058805752582, "grad_norm": 1.0463169820117935, "learning_rate": 0.0001, "loss": 0.7742, "mean_abs_error": 242.23267231919777, "mean_abs_error_last_10": 74.8542775433797, "mean_abs_error_last_25": 93.95188039597284, "mean_abs_error_last_50": 141.3505850998471, "mean_pred_prob": 0.03609569552354515, "mean_pred_prob_last_10": 0.17394792847335339, "mean_pred_prob_last_25": 0.09774986132979394, "mean_pred_prob_last_50": 0.0604116961825639, "mean_token_accuracy": 0.8671764314174653, "step": 31730 }, { "epoch": 0.564236574049384, "grad_norm": 0.9654241825888092, "learning_rate": 0.0001, "loss": 0.6347, "mean_abs_error": 236.2128175919448, "mean_abs_error_last_10": 65.96061751900028, "mean_abs_error_last_25": 121.02244264119622, "mean_abs_error_last_50": 168.3872596923025, "mean_pred_prob": 0.04016631282866001, "mean_pred_prob_last_10": 0.1982365507632494, "mean_pred_prob_last_25": 0.10896813459694385, "mean_pred_prob_last_50": 0.0669992495328188, "mean_token_accuracy": 0.8913573443889617, "step": 31740 }, { "epoch": 0.5644143423461859, "grad_norm": 2.078307744323155, "learning_rate": 0.0001, "loss": 0.7302, "mean_abs_error": 850.3333763794229, "mean_abs_error_last_10": 291.22858270963195, "mean_abs_error_last_25": 329.3079385208759, "mean_abs_error_last_50": 486.60238421179326, "mean_pred_prob": 0.035476527223363516, "mean_pred_prob_last_10": 0.1861684938077815, "mean_pred_prob_last_25": 0.09681488596252166, "mean_pred_prob_last_50": 0.05938105543027632, "mean_token_accuracy": 0.8827856481075287, "step": 31750 }, { "epoch": 0.564592110642988, "grad_norm": 1.2402009790673227, "learning_rate": 0.0001, "loss": 0.7329, "mean_abs_error": 993.8155081633388, "mean_abs_error_last_10": 564.9030619303402, "mean_abs_error_last_25": 657.0550359605438, "mean_abs_error_last_50": 752.9069978308468, "mean_pred_prob": 0.03985798920766683, "mean_pred_prob_last_10": 0.21527971483301372, "mean_pred_prob_last_25": 0.11478510407323483, "mean_pred_prob_last_50": 0.06790199650276918, "mean_token_accuracy": 0.8797866880893708, "step": 31760 }, { "epoch": 0.5647698789397899, "grad_norm": 2.5584631307212318, "learning_rate": 0.0001, "loss": 0.7843, "mean_abs_error": 1053.0190580524686, "mean_abs_error_last_10": 588.4679386170299, "mean_abs_error_last_25": 667.1309018294294, "mean_abs_error_last_50": 805.5771556962432, "mean_pred_prob": 0.037979702254233416, "mean_pred_prob_last_10": 0.18691078681731596, "mean_pred_prob_last_25": 0.10513457477209158, "mean_pred_prob_last_50": 0.06461683805828215, "mean_token_accuracy": 0.8755391955375671, "step": 31770 }, { "epoch": 0.5649476472365919, "grad_norm": 1.7881499044969824, "learning_rate": 0.0001, "loss": 0.7397, "mean_abs_error": 714.2635416136985, "mean_abs_error_last_10": 144.12662199582908, "mean_abs_error_last_25": 198.70935381930795, "mean_abs_error_last_50": 381.03076376947536, "mean_pred_prob": 0.023977915121940897, "mean_pred_prob_last_10": 0.13566508204676211, "mean_pred_prob_last_25": 0.06986543886596337, "mean_pred_prob_last_50": 0.04150808294652961, "mean_token_accuracy": 0.8681667268276214, "step": 31780 }, { "epoch": 0.5651254155333938, "grad_norm": 1.5160873818684668, "learning_rate": 0.0001, "loss": 0.6965, "mean_abs_error": 420.32569345012655, "mean_abs_error_last_10": 125.56017848179856, "mean_abs_error_last_25": 228.6427491078412, "mean_abs_error_last_50": 375.5173718229485, "mean_pred_prob": 0.03535452762153, "mean_pred_prob_last_10": 0.17890557451173664, "mean_pred_prob_last_25": 0.09748912462964654, "mean_pred_prob_last_50": 0.05966503052040935, "mean_token_accuracy": 0.867574143409729, "step": 31790 }, { "epoch": 0.5653031838301957, "grad_norm": 2.2866144674042292, "learning_rate": 0.0001, "loss": 0.7998, "mean_abs_error": 107.40481744833315, "mean_abs_error_last_10": 46.7892763436872, "mean_abs_error_last_25": 50.882981704294394, "mean_abs_error_last_50": 62.331993809846495, "mean_pred_prob": 0.07320811650715768, "mean_pred_prob_last_10": 0.34031844399869443, "mean_pred_prob_last_25": 0.19789891093969345, "mean_pred_prob_last_50": 0.12246369002386928, "mean_token_accuracy": 0.8799633860588074, "step": 31800 }, { "epoch": 0.5654809521269977, "grad_norm": 0.8733138501432204, "learning_rate": 0.0001, "loss": 0.7548, "mean_abs_error": 404.1389991619747, "mean_abs_error_last_10": 200.56620446363587, "mean_abs_error_last_25": 210.38278580267223, "mean_abs_error_last_50": 238.10803138613105, "mean_pred_prob": 0.040430200309492646, "mean_pred_prob_last_10": 0.17958961967378856, "mean_pred_prob_last_25": 0.10460950753185898, "mean_pred_prob_last_50": 0.06650935150682927, "mean_token_accuracy": 0.8757052779197693, "step": 31810 }, { "epoch": 0.5656587204237996, "grad_norm": 1.883285968112624, "learning_rate": 0.0001, "loss": 0.7557, "mean_abs_error": 591.3390905844053, "mean_abs_error_last_10": 280.3771564244511, "mean_abs_error_last_25": 353.777952787102, "mean_abs_error_last_50": 483.7356316743391, "mean_pred_prob": 0.042559854499995706, "mean_pred_prob_last_10": 0.23065119758248329, "mean_pred_prob_last_25": 0.1209640464745462, "mean_pred_prob_last_50": 0.07195591488853097, "mean_token_accuracy": 0.8734577596187592, "step": 31820 }, { "epoch": 0.5658364887206015, "grad_norm": 1.1099871430639123, "learning_rate": 0.0001, "loss": 0.746, "mean_abs_error": 226.15837307262692, "mean_abs_error_last_10": 66.0085309334124, "mean_abs_error_last_25": 79.38646713365603, "mean_abs_error_last_50": 120.51969875574189, "mean_pred_prob": 0.04262702155392617, "mean_pred_prob_last_10": 0.20952791357412934, "mean_pred_prob_last_25": 0.11339502427726984, "mean_pred_prob_last_50": 0.07038608957082033, "mean_token_accuracy": 0.861135846376419, "step": 31830 }, { "epoch": 0.5660142570174035, "grad_norm": 0.7378548089838377, "learning_rate": 0.0001, "loss": 0.7187, "mean_abs_error": 440.7440953742078, "mean_abs_error_last_10": 166.75076047493536, "mean_abs_error_last_25": 203.78350176460071, "mean_abs_error_last_50": 274.37087390431884, "mean_pred_prob": 0.037074779812246564, "mean_pred_prob_last_10": 0.19792553819715977, "mean_pred_prob_last_25": 0.10424564443528653, "mean_pred_prob_last_50": 0.06274401871487498, "mean_token_accuracy": 0.8765255510807037, "step": 31840 }, { "epoch": 0.5661920253142054, "grad_norm": 1.6633345270432973, "learning_rate": 0.0001, "loss": 0.7804, "mean_abs_error": 544.5127867619536, "mean_abs_error_last_10": 213.404035441011, "mean_abs_error_last_25": 208.02877108202216, "mean_abs_error_last_50": 331.9052287626761, "mean_pred_prob": 0.01930138354655355, "mean_pred_prob_last_10": 0.10450791176408529, "mean_pred_prob_last_25": 0.053465678449720146, "mean_pred_prob_last_50": 0.03219844368286431, "mean_token_accuracy": 0.8654161751270294, "step": 31850 }, { "epoch": 0.5663697936110074, "grad_norm": 1.6467125604867798, "learning_rate": 0.0001, "loss": 0.8055, "mean_abs_error": 245.20293586956, "mean_abs_error_last_10": 59.884986149858165, "mean_abs_error_last_25": 62.87529363998781, "mean_abs_error_last_50": 121.0320382058162, "mean_pred_prob": 0.05392690973822027, "mean_pred_prob_last_10": 0.2545864490792155, "mean_pred_prob_last_25": 0.14248512582853437, "mean_pred_prob_last_50": 0.08916163011454045, "mean_token_accuracy": 0.8750517249107361, "step": 31860 }, { "epoch": 0.5665475619078094, "grad_norm": 1.031274095451894, "learning_rate": 0.0001, "loss": 1.0634, "mean_abs_error": 384.9974029859445, "mean_abs_error_last_10": 80.21133545000743, "mean_abs_error_last_25": 101.23114480325648, "mean_abs_error_last_50": 205.0282818823433, "mean_pred_prob": 0.059234240662772206, "mean_pred_prob_last_10": 0.2640376767842099, "mean_pred_prob_last_25": 0.1617702700663358, "mean_pred_prob_last_50": 0.10235683505889029, "mean_token_accuracy": 0.8644204258918762, "step": 31870 }, { "epoch": 0.5667253302046114, "grad_norm": 2.530915939661779, "learning_rate": 0.0001, "loss": 0.8449, "mean_abs_error": 270.58696800227705, "mean_abs_error_last_10": 32.99893065983788, "mean_abs_error_last_25": 65.21211266225274, "mean_abs_error_last_50": 104.37360181984658, "mean_pred_prob": 0.05374765214510262, "mean_pred_prob_last_10": 0.265924658626318, "mean_pred_prob_last_25": 0.14791934667155146, "mean_pred_prob_last_50": 0.09041031887754798, "mean_token_accuracy": 0.8875993192195892, "step": 31880 }, { "epoch": 0.5669030985014133, "grad_norm": 1.51420002674886, "learning_rate": 0.0001, "loss": 0.7499, "mean_abs_error": 585.9657205107922, "mean_abs_error_last_10": 216.05486518254284, "mean_abs_error_last_25": 289.7381168891463, "mean_abs_error_last_50": 404.95151086699474, "mean_pred_prob": 0.03753899020375684, "mean_pred_prob_last_10": 0.1780902612954378, "mean_pred_prob_last_25": 0.10196726680733263, "mean_pred_prob_last_50": 0.06291079176589846, "mean_token_accuracy": 0.8788307964801788, "step": 31890 }, { "epoch": 0.5670808667982152, "grad_norm": 2.247387389015832, "learning_rate": 0.0001, "loss": 0.675, "mean_abs_error": 400.5515698679231, "mean_abs_error_last_10": 75.07368372717276, "mean_abs_error_last_25": 273.56690734769677, "mean_abs_error_last_50": 362.8407086433222, "mean_pred_prob": 0.040796564007177946, "mean_pred_prob_last_10": 0.20090255234390497, "mean_pred_prob_last_25": 0.11380125414580107, "mean_pred_prob_last_50": 0.06918093087151647, "mean_token_accuracy": 0.8764484584331512, "step": 31900 }, { "epoch": 0.5672586350950172, "grad_norm": 1.1806259992463028, "learning_rate": 0.0001, "loss": 0.6754, "mean_abs_error": 369.87696455254456, "mean_abs_error_last_10": 102.47497615858833, "mean_abs_error_last_25": 181.9338108016211, "mean_abs_error_last_50": 251.0307296803954, "mean_pred_prob": 0.0419376059435308, "mean_pred_prob_last_10": 0.2053949510678649, "mean_pred_prob_last_25": 0.11038273368030786, "mean_pred_prob_last_50": 0.06884196093305946, "mean_token_accuracy": 0.8732967674732208, "step": 31910 }, { "epoch": 0.5674364033918191, "grad_norm": 2.7108823250704766, "learning_rate": 0.0001, "loss": 0.8265, "mean_abs_error": 281.0602697091507, "mean_abs_error_last_10": 58.17328179221901, "mean_abs_error_last_25": 102.16307241138004, "mean_abs_error_last_50": 150.85123961538577, "mean_pred_prob": 0.051790902158245444, "mean_pred_prob_last_10": 0.22909513097256423, "mean_pred_prob_last_25": 0.1351583793759346, "mean_pred_prob_last_50": 0.08597243344411254, "mean_token_accuracy": 0.8669497728347778, "step": 31920 }, { "epoch": 0.567614171688621, "grad_norm": 0.8765163486373505, "learning_rate": 0.0001, "loss": 0.7992, "mean_abs_error": 1368.328901787933, "mean_abs_error_last_10": 730.0714495935537, "mean_abs_error_last_25": 843.5278562319967, "mean_abs_error_last_50": 1035.5199654340277, "mean_pred_prob": 0.03312778215185972, "mean_pred_prob_last_10": 0.1455405057349708, "mean_pred_prob_last_25": 0.0848602923550061, "mean_pred_prob_last_50": 0.053531866270350295, "mean_token_accuracy": 0.8734632074832916, "step": 31930 }, { "epoch": 0.567791939985423, "grad_norm": 2.283844755763227, "learning_rate": 0.0001, "loss": 0.8169, "mean_abs_error": 251.16658600045366, "mean_abs_error_last_10": 129.54558737376433, "mean_abs_error_last_25": 126.43731314335521, "mean_abs_error_last_50": 182.27136499375553, "mean_pred_prob": 0.04932800210081041, "mean_pred_prob_last_10": 0.23972419276833534, "mean_pred_prob_last_25": 0.13386288862675427, "mean_pred_prob_last_50": 0.08249852787703275, "mean_token_accuracy": 0.8734575927257537, "step": 31940 }, { "epoch": 0.5679697082822249, "grad_norm": 1.4796253726210893, "learning_rate": 0.0001, "loss": 0.6268, "mean_abs_error": 182.51002899190857, "mean_abs_error_last_10": 21.4678331673486, "mean_abs_error_last_25": 58.60813301539044, "mean_abs_error_last_50": 93.3340049859063, "mean_pred_prob": 0.05977089647203684, "mean_pred_prob_last_10": 0.28878516927361486, "mean_pred_prob_last_25": 0.1616668425500393, "mean_pred_prob_last_50": 0.09967983793467283, "mean_token_accuracy": 0.8817027628421783, "step": 31950 }, { "epoch": 0.5681474765790269, "grad_norm": 1.6543622408230259, "learning_rate": 0.0001, "loss": 0.6218, "mean_abs_error": 231.35872385441613, "mean_abs_error_last_10": 32.94043208801688, "mean_abs_error_last_25": 84.12339641526253, "mean_abs_error_last_50": 162.42938173329892, "mean_pred_prob": 0.05554812238551676, "mean_pred_prob_last_10": 0.25099498927593233, "mean_pred_prob_last_25": 0.1497207896783948, "mean_pred_prob_last_50": 0.09320850847288967, "mean_token_accuracy": 0.8755313038825989, "step": 31960 }, { "epoch": 0.5683252448758288, "grad_norm": 1.390650375806482, "learning_rate": 0.0001, "loss": 0.7178, "mean_abs_error": 305.6306950946841, "mean_abs_error_last_10": 130.72059454493638, "mean_abs_error_last_25": 144.2005561769604, "mean_abs_error_last_50": 225.96142370915018, "mean_pred_prob": 0.04361330182291567, "mean_pred_prob_last_10": 0.2163074130192399, "mean_pred_prob_last_25": 0.12266915151849389, "mean_pred_prob_last_50": 0.07393986168317497, "mean_token_accuracy": 0.8735604107379913, "step": 31970 }, { "epoch": 0.5685030131726307, "grad_norm": 1.8184164117829231, "learning_rate": 0.0001, "loss": 0.7373, "mean_abs_error": 1056.0982872676777, "mean_abs_error_last_10": 593.7669444842346, "mean_abs_error_last_25": 681.504393980922, "mean_abs_error_last_50": 700.5921424647125, "mean_pred_prob": 0.02314322473539505, "mean_pred_prob_last_10": 0.12018606926430948, "mean_pred_prob_last_25": 0.06480618443456479, "mean_pred_prob_last_50": 0.0391294660337735, "mean_token_accuracy": 0.8730436801910401, "step": 31980 }, { "epoch": 0.5686807814694328, "grad_norm": 1.3482950352152387, "learning_rate": 0.0001, "loss": 0.6389, "mean_abs_error": 197.24735675524568, "mean_abs_error_last_10": 66.31156086145045, "mean_abs_error_last_25": 65.322856869491, "mean_abs_error_last_50": 110.09558076729604, "mean_pred_prob": 0.0603465982247144, "mean_pred_prob_last_10": 0.2822503440082073, "mean_pred_prob_last_25": 0.16184298619627951, "mean_pred_prob_last_50": 0.0988866150379181, "mean_token_accuracy": 0.8734951734542846, "step": 31990 }, { "epoch": 0.5688585497662347, "grad_norm": 1.6876652162236032, "learning_rate": 0.0001, "loss": 0.7337, "mean_abs_error": 852.7811846738423, "mean_abs_error_last_10": 522.0897149373334, "mean_abs_error_last_25": 593.4026187926472, "mean_abs_error_last_50": 649.0144768727241, "mean_pred_prob": 0.03077344650228042, "mean_pred_prob_last_10": 0.15697255705890711, "mean_pred_prob_last_25": 0.08599266397650354, "mean_pred_prob_last_50": 0.05184554700681474, "mean_token_accuracy": 0.8763659000396729, "step": 32000 }, { "epoch": 0.5690363180630367, "grad_norm": 2.1252325977910247, "learning_rate": 0.0001, "loss": 0.702, "mean_abs_error": 226.28850433364283, "mean_abs_error_last_10": 42.22531787313219, "mean_abs_error_last_25": 60.127036176982664, "mean_abs_error_last_50": 104.65827702713845, "mean_pred_prob": 0.06214326689951122, "mean_pred_prob_last_10": 0.2690022962167859, "mean_pred_prob_last_25": 0.15953194694593548, "mean_pred_prob_last_50": 0.10122976480051875, "mean_token_accuracy": 0.8845486998558044, "step": 32010 }, { "epoch": 0.5692140863598386, "grad_norm": 3.1473785269736383, "learning_rate": 0.0001, "loss": 0.8402, "mean_abs_error": 585.4128838934687, "mean_abs_error_last_10": 190.5543542215894, "mean_abs_error_last_25": 307.7104995844246, "mean_abs_error_last_50": 359.00994076174516, "mean_pred_prob": 0.028066887939348816, "mean_pred_prob_last_10": 0.1411477943882346, "mean_pred_prob_last_25": 0.07717170650139452, "mean_pred_prob_last_50": 0.04755677557550371, "mean_token_accuracy": 0.8642886698246002, "step": 32020 }, { "epoch": 0.5693918546566406, "grad_norm": 0.9895855233527833, "learning_rate": 0.0001, "loss": 0.7381, "mean_abs_error": 220.52075615948212, "mean_abs_error_last_10": 46.935678820050285, "mean_abs_error_last_25": 101.86300700665264, "mean_abs_error_last_50": 145.04449147073052, "mean_pred_prob": 0.055053595919162035, "mean_pred_prob_last_10": 0.2576191797852516, "mean_pred_prob_last_25": 0.14736029570922254, "mean_pred_prob_last_50": 0.09089560424908996, "mean_token_accuracy": 0.8647333920001984, "step": 32030 }, { "epoch": 0.5695696229534425, "grad_norm": 1.2391879158814418, "learning_rate": 0.0001, "loss": 0.7221, "mean_abs_error": 448.4304727767844, "mean_abs_error_last_10": 231.38349440450492, "mean_abs_error_last_25": 232.07558196174855, "mean_abs_error_last_50": 304.12858363042466, "mean_pred_prob": 0.03938536350615322, "mean_pred_prob_last_10": 0.18938610814511775, "mean_pred_prob_last_25": 0.10519480695948005, "mean_pred_prob_last_50": 0.06589276855811477, "mean_token_accuracy": 0.8741515576839447, "step": 32040 }, { "epoch": 0.5697473912502444, "grad_norm": 1.5161838905730254, "learning_rate": 0.0001, "loss": 0.6818, "mean_abs_error": 192.0180802947573, "mean_abs_error_last_10": 45.205350260889766, "mean_abs_error_last_25": 85.17685754180165, "mean_abs_error_last_50": 119.000555576081, "mean_pred_prob": 0.04189251381903887, "mean_pred_prob_last_10": 0.19763876926153898, "mean_pred_prob_last_25": 0.11461981767788529, "mean_pred_prob_last_50": 0.07090863878838718, "mean_token_accuracy": 0.8826324701309204, "step": 32050 }, { "epoch": 0.5699251595470464, "grad_norm": 1.7799812988411958, "learning_rate": 0.0001, "loss": 0.6469, "mean_abs_error": 275.9136374610663, "mean_abs_error_last_10": 107.43471723384616, "mean_abs_error_last_25": 135.4586717308252, "mean_abs_error_last_50": 208.12753396621468, "mean_pred_prob": 0.03777896049432457, "mean_pred_prob_last_10": 0.18492361065000296, "mean_pred_prob_last_25": 0.10202610660344362, "mean_pred_prob_last_50": 0.06341409496963024, "mean_token_accuracy": 0.8745073676109314, "step": 32060 }, { "epoch": 0.5701029278438483, "grad_norm": 1.807958183787617, "learning_rate": 0.0001, "loss": 0.8369, "mean_abs_error": 479.0064941617717, "mean_abs_error_last_10": 183.5709732934801, "mean_abs_error_last_25": 388.3815760876149, "mean_abs_error_last_50": 382.6300566655453, "mean_pred_prob": 0.028000601730309427, "mean_pred_prob_last_10": 0.14697498492896557, "mean_pred_prob_last_25": 0.07751855235546827, "mean_pred_prob_last_50": 0.04722022041678429, "mean_token_accuracy": 0.8762845039367676, "step": 32070 }, { "epoch": 0.5702806961406502, "grad_norm": 1.5610731686286605, "learning_rate": 0.0001, "loss": 0.7559, "mean_abs_error": 466.44562493344336, "mean_abs_error_last_10": 161.7131449946997, "mean_abs_error_last_25": 199.49921805916856, "mean_abs_error_last_50": 252.2455315469474, "mean_pred_prob": 0.03579006582731381, "mean_pred_prob_last_10": 0.17033773011062295, "mean_pred_prob_last_25": 0.09348360344301909, "mean_pred_prob_last_50": 0.05910843601450324, "mean_token_accuracy": 0.8763257682323455, "step": 32080 }, { "epoch": 0.5704584644374522, "grad_norm": 1.495068724094444, "learning_rate": 0.0001, "loss": 0.7712, "mean_abs_error": 487.5568779357051, "mean_abs_error_last_10": 392.98077839893347, "mean_abs_error_last_25": 326.19685757004805, "mean_abs_error_last_50": 343.8765211304092, "mean_pred_prob": 0.031163463450502603, "mean_pred_prob_last_10": 0.14231576316524297, "mean_pred_prob_last_25": 0.08066623505437746, "mean_pred_prob_last_50": 0.05102786086499691, "mean_token_accuracy": 0.8745019853115081, "step": 32090 }, { "epoch": 0.5706362327342541, "grad_norm": 1.5193269521356751, "learning_rate": 0.0001, "loss": 0.6889, "mean_abs_error": 339.7389391347937, "mean_abs_error_last_10": 124.83742697027455, "mean_abs_error_last_25": 139.4254861655738, "mean_abs_error_last_50": 229.7793976953481, "mean_pred_prob": 0.03668964104726911, "mean_pred_prob_last_10": 0.17702395785599948, "mean_pred_prob_last_25": 0.09853032203391195, "mean_pred_prob_last_50": 0.061094514140859243, "mean_token_accuracy": 0.8723666429519653, "step": 32100 }, { "epoch": 0.5708140010310562, "grad_norm": 1.86385757853404, "learning_rate": 0.0001, "loss": 0.6677, "mean_abs_error": 211.48149000274194, "mean_abs_error_last_10": 114.60999653716314, "mean_abs_error_last_25": 120.82315578481112, "mean_abs_error_last_50": 122.46901914106718, "mean_pred_prob": 0.04378358982503414, "mean_pred_prob_last_10": 0.2210135031491518, "mean_pred_prob_last_25": 0.12534204265102744, "mean_pred_prob_last_50": 0.07682634899392723, "mean_token_accuracy": 0.8765900075435639, "step": 32110 }, { "epoch": 0.5709917693278581, "grad_norm": 1.5864419055707513, "learning_rate": 0.0001, "loss": 0.6815, "mean_abs_error": 272.6523328168175, "mean_abs_error_last_10": 55.71771802218954, "mean_abs_error_last_25": 92.46377598375769, "mean_abs_error_last_50": 133.573320166943, "mean_pred_prob": 0.03932312997058034, "mean_pred_prob_last_10": 0.2089343223720789, "mean_pred_prob_last_25": 0.11054736860096455, "mean_pred_prob_last_50": 0.06684424076229334, "mean_token_accuracy": 0.88095144033432, "step": 32120 }, { "epoch": 0.57116953762466, "grad_norm": 1.0227348850156368, "learning_rate": 0.0001, "loss": 0.7402, "mean_abs_error": 843.0776075526717, "mean_abs_error_last_10": 353.13683563433085, "mean_abs_error_last_25": 478.25412154770856, "mean_abs_error_last_50": 659.8992020133196, "mean_pred_prob": 0.027932535181753336, "mean_pred_prob_last_10": 0.14377530953497625, "mean_pred_prob_last_25": 0.07455090330913663, "mean_pred_prob_last_50": 0.04622258032322861, "mean_token_accuracy": 0.8702369153499603, "step": 32130 }, { "epoch": 0.571347305921462, "grad_norm": 2.453069239395774, "learning_rate": 0.0001, "loss": 0.9784, "mean_abs_error": 1327.1700253437805, "mean_abs_error_last_10": 708.1073263389346, "mean_abs_error_last_25": 779.6394840597557, "mean_abs_error_last_50": 957.0702239887387, "mean_pred_prob": 0.0229375896815327, "mean_pred_prob_last_10": 0.11439328772830777, "mean_pred_prob_last_25": 0.06342171662690817, "mean_pred_prob_last_50": 0.03922864962514723, "mean_token_accuracy": 0.8615457534790039, "step": 32140 }, { "epoch": 0.5715250742182639, "grad_norm": 1.5507418696092983, "learning_rate": 0.0001, "loss": 0.7987, "mean_abs_error": 497.4374421748911, "mean_abs_error_last_10": 177.73270202444735, "mean_abs_error_last_25": 244.22562244196553, "mean_abs_error_last_50": 311.6546224531692, "mean_pred_prob": 0.03882324153091758, "mean_pred_prob_last_10": 0.18428068766370415, "mean_pred_prob_last_25": 0.10176184873562306, "mean_pred_prob_last_50": 0.06335506885079667, "mean_token_accuracy": 0.8728938102722168, "step": 32150 }, { "epoch": 0.5717028425150659, "grad_norm": 1.7579228115878263, "learning_rate": 0.0001, "loss": 0.7295, "mean_abs_error": 576.951481802232, "mean_abs_error_last_10": 141.43854654926784, "mean_abs_error_last_25": 192.53699217805007, "mean_abs_error_last_50": 307.4522314820507, "mean_pred_prob": 0.024515064898878336, "mean_pred_prob_last_10": 0.12878662012517453, "mean_pred_prob_last_25": 0.0673388622701168, "mean_pred_prob_last_50": 0.0407950968015939, "mean_token_accuracy": 0.8828189611434937, "step": 32160 }, { "epoch": 0.5718806108118678, "grad_norm": 1.4953647619927088, "learning_rate": 0.0001, "loss": 0.6805, "mean_abs_error": 373.32237880291814, "mean_abs_error_last_10": 155.19425945528081, "mean_abs_error_last_25": 199.36996372652052, "mean_abs_error_last_50": 272.71468823458054, "mean_pred_prob": 0.048125050187809396, "mean_pred_prob_last_10": 0.23642988767824136, "mean_pred_prob_last_25": 0.1322157073416747, "mean_pred_prob_last_50": 0.08085448171477765, "mean_token_accuracy": 0.869532710313797, "step": 32170 }, { "epoch": 0.5720583791086697, "grad_norm": 3.540785761682513, "learning_rate": 0.0001, "loss": 0.7526, "mean_abs_error": 411.628793961455, "mean_abs_error_last_10": 53.2516045499465, "mean_abs_error_last_25": 131.20404486622175, "mean_abs_error_last_50": 187.59724993041587, "mean_pred_prob": 0.034542435547336936, "mean_pred_prob_last_10": 0.1815108649432659, "mean_pred_prob_last_25": 0.09876871090382337, "mean_pred_prob_last_50": 0.058634983375668526, "mean_token_accuracy": 0.8761365234851837, "step": 32180 }, { "epoch": 0.5722361474054717, "grad_norm": 4.263125508496598, "learning_rate": 0.0001, "loss": 0.8934, "mean_abs_error": 2464.4155142779537, "mean_abs_error_last_10": 1292.130523412591, "mean_abs_error_last_25": 1476.4316260864782, "mean_abs_error_last_50": 1844.5578802974414, "mean_pred_prob": 0.027179988210264128, "mean_pred_prob_last_10": 0.10843229811289348, "mean_pred_prob_last_25": 0.0637931351433508, "mean_pred_prob_last_50": 0.043206247844500466, "mean_token_accuracy": 0.8716100633144379, "step": 32190 }, { "epoch": 0.5724139157022736, "grad_norm": 1.1820437301763866, "learning_rate": 0.0001, "loss": 0.7258, "mean_abs_error": 265.7902312738796, "mean_abs_error_last_10": 33.483712775890695, "mean_abs_error_last_25": 71.56270324197871, "mean_abs_error_last_50": 151.97615928248223, "mean_pred_prob": 0.040804173052310946, "mean_pred_prob_last_10": 0.21272665690630674, "mean_pred_prob_last_25": 0.11696950271725655, "mean_pred_prob_last_50": 0.07022698111832142, "mean_token_accuracy": 0.8643091797828675, "step": 32200 }, { "epoch": 0.5725916839990756, "grad_norm": 1.393314725776781, "learning_rate": 0.0001, "loss": 0.7259, "mean_abs_error": 1075.3193684842342, "mean_abs_error_last_10": 552.5103513311176, "mean_abs_error_last_25": 673.6317747046666, "mean_abs_error_last_50": 870.5965010004762, "mean_pred_prob": 0.021959600260015578, "mean_pred_prob_last_10": 0.1000890480238013, "mean_pred_prob_last_25": 0.058914500038372355, "mean_pred_prob_last_50": 0.03606350779300556, "mean_token_accuracy": 0.8685509383678436, "step": 32210 }, { "epoch": 0.5727694522958775, "grad_norm": 1.4177549328413355, "learning_rate": 0.0001, "loss": 0.758, "mean_abs_error": 244.7099352052826, "mean_abs_error_last_10": 87.79191723334044, "mean_abs_error_last_25": 106.05758381002545, "mean_abs_error_last_50": 169.05900892819878, "mean_pred_prob": 0.052102076425217095, "mean_pred_prob_last_10": 0.25647263787686825, "mean_pred_prob_last_25": 0.14287628261372448, "mean_pred_prob_last_50": 0.08763149632140994, "mean_token_accuracy": 0.8702778279781341, "step": 32220 }, { "epoch": 0.5729472205926796, "grad_norm": 1.5914451872862112, "learning_rate": 0.0001, "loss": 0.7158, "mean_abs_error": 536.565703518991, "mean_abs_error_last_10": 252.7260035802097, "mean_abs_error_last_25": 382.626841747615, "mean_abs_error_last_50": 391.51333624581406, "mean_pred_prob": 0.034435457875952126, "mean_pred_prob_last_10": 0.17253828942775726, "mean_pred_prob_last_25": 0.09554877933114767, "mean_pred_prob_last_50": 0.05831441460177302, "mean_token_accuracy": 0.8685838341712951, "step": 32230 }, { "epoch": 0.5731249888894815, "grad_norm": 2.7587020222006013, "learning_rate": 0.0001, "loss": 0.7043, "mean_abs_error": 228.1554717123783, "mean_abs_error_last_10": 39.92127526188759, "mean_abs_error_last_25": 62.63252352357622, "mean_abs_error_last_50": 107.28616831291694, "mean_pred_prob": 0.04681591698899865, "mean_pred_prob_last_10": 0.23324296474456788, "mean_pred_prob_last_25": 0.13397605028003454, "mean_pred_prob_last_50": 0.0797313460148871, "mean_token_accuracy": 0.868679541349411, "step": 32240 }, { "epoch": 0.5733027571862834, "grad_norm": 2.354309459866207, "learning_rate": 0.0001, "loss": 0.6289, "mean_abs_error": 356.53678449415554, "mean_abs_error_last_10": 106.62549687516419, "mean_abs_error_last_25": 195.96587961776962, "mean_abs_error_last_50": 261.8239631148596, "mean_pred_prob": 0.048014425113797186, "mean_pred_prob_last_10": 0.22125094942748547, "mean_pred_prob_last_25": 0.12717406591400504, "mean_pred_prob_last_50": 0.080197817645967, "mean_token_accuracy": 0.8776334166526795, "step": 32250 }, { "epoch": 0.5734805254830854, "grad_norm": 1.442753138721905, "learning_rate": 0.0001, "loss": 0.7785, "mean_abs_error": 224.34849026412638, "mean_abs_error_last_10": 21.15724116292439, "mean_abs_error_last_25": 48.123187997974945, "mean_abs_error_last_50": 85.61868841701124, "mean_pred_prob": 0.045856408402323724, "mean_pred_prob_last_10": 0.22881827242672442, "mean_pred_prob_last_25": 0.12613727804273367, "mean_pred_prob_last_50": 0.07825401853770017, "mean_token_accuracy": 0.875289922952652, "step": 32260 }, { "epoch": 0.5736582937798873, "grad_norm": 1.0148499080240303, "learning_rate": 0.0001, "loss": 0.6289, "mean_abs_error": 187.15603437460385, "mean_abs_error_last_10": 48.486254997424766, "mean_abs_error_last_25": 62.243205101374016, "mean_abs_error_last_50": 109.61876915621913, "mean_pred_prob": 0.03340805433690548, "mean_pred_prob_last_10": 0.16953181177377702, "mean_pred_prob_last_25": 0.09311057664453984, "mean_pred_prob_last_50": 0.0566941486671567, "mean_token_accuracy": 0.886099910736084, "step": 32270 }, { "epoch": 0.5738360620766892, "grad_norm": 1.438933661461317, "learning_rate": 0.0001, "loss": 0.8194, "mean_abs_error": 872.5690265986774, "mean_abs_error_last_10": 397.6659560261386, "mean_abs_error_last_25": 526.099633360834, "mean_abs_error_last_50": 683.8598954165811, "mean_pred_prob": 0.04839018406491959, "mean_pred_prob_last_10": 0.24496471060847397, "mean_pred_prob_last_25": 0.13331883109058254, "mean_pred_prob_last_50": 0.08037589643499814, "mean_token_accuracy": 0.8746883273124695, "step": 32280 }, { "epoch": 0.5740138303734912, "grad_norm": 1.9292717768824867, "learning_rate": 0.0001, "loss": 0.7173, "mean_abs_error": 308.1976010621538, "mean_abs_error_last_10": 89.93390984103372, "mean_abs_error_last_25": 166.7362806146422, "mean_abs_error_last_50": 219.01113668317748, "mean_pred_prob": 0.039805275946855546, "mean_pred_prob_last_10": 0.21192070804536342, "mean_pred_prob_last_25": 0.11088617974892259, "mean_pred_prob_last_50": 0.06642275797203183, "mean_token_accuracy": 0.8662383019924164, "step": 32290 }, { "epoch": 0.5741915986702931, "grad_norm": 1.3525739945445696, "learning_rate": 0.0001, "loss": 0.8528, "mean_abs_error": 1461.7519662831878, "mean_abs_error_last_10": 767.8287907866433, "mean_abs_error_last_25": 899.4003453760197, "mean_abs_error_last_50": 1070.2062882794232, "mean_pred_prob": 0.025872179091675208, "mean_pred_prob_last_10": 0.14444267856306398, "mean_pred_prob_last_25": 0.0746424317083438, "mean_pred_prob_last_50": 0.043873158920905556, "mean_token_accuracy": 0.8654489934444427, "step": 32300 }, { "epoch": 0.5743693669670951, "grad_norm": 1.3848274046363511, "learning_rate": 0.0001, "loss": 0.8264, "mean_abs_error": 327.73317084245843, "mean_abs_error_last_10": 63.437364759638264, "mean_abs_error_last_25": 104.9673623545477, "mean_abs_error_last_50": 174.1663462112298, "mean_pred_prob": 0.03314274838194251, "mean_pred_prob_last_10": 0.1730290472507477, "mean_pred_prob_last_25": 0.0938549242913723, "mean_pred_prob_last_50": 0.05658312812447548, "mean_token_accuracy": 0.8608259677886962, "step": 32310 }, { "epoch": 0.574547135263897, "grad_norm": 0.9977860517494435, "learning_rate": 0.0001, "loss": 0.6859, "mean_abs_error": 203.7608178275328, "mean_abs_error_last_10": 106.57144795869075, "mean_abs_error_last_25": 145.01970118826728, "mean_abs_error_last_50": 165.1751909319027, "mean_pred_prob": 0.053109037969261405, "mean_pred_prob_last_10": 0.23984678480774163, "mean_pred_prob_last_25": 0.1414126074872911, "mean_pred_prob_last_50": 0.08796157818287612, "mean_token_accuracy": 0.87043097615242, "step": 32320 }, { "epoch": 0.5747249035606989, "grad_norm": 2.3788616760940147, "learning_rate": 0.0001, "loss": 0.7053, "mean_abs_error": 1044.6576347630537, "mean_abs_error_last_10": 438.79119167047975, "mean_abs_error_last_25": 462.41823235556524, "mean_abs_error_last_50": 698.2669893838977, "mean_pred_prob": 0.01956920681113843, "mean_pred_prob_last_10": 0.09579911031760276, "mean_pred_prob_last_25": 0.054033764218911526, "mean_pred_prob_last_50": 0.03289319047471508, "mean_token_accuracy": 0.8789593338966369, "step": 32330 }, { "epoch": 0.5749026718575009, "grad_norm": 1.4838171656548675, "learning_rate": 0.0001, "loss": 0.6907, "mean_abs_error": 308.19812566320877, "mean_abs_error_last_10": 87.76299455656026, "mean_abs_error_last_25": 103.95806395211439, "mean_abs_error_last_50": 205.8837367760671, "mean_pred_prob": 0.04165753447450697, "mean_pred_prob_last_10": 0.2132706668227911, "mean_pred_prob_last_25": 0.11663768477737904, "mean_pred_prob_last_50": 0.06967232516035438, "mean_token_accuracy": 0.8827095270156861, "step": 32340 }, { "epoch": 0.5750804401543029, "grad_norm": 1.6903565516238037, "learning_rate": 0.0001, "loss": 0.7178, "mean_abs_error": 404.97933079987945, "mean_abs_error_last_10": 116.94651286712947, "mean_abs_error_last_25": 142.31535857049047, "mean_abs_error_last_50": 238.28175614847137, "mean_pred_prob": 0.032209005951881406, "mean_pred_prob_last_10": 0.15441461391746997, "mean_pred_prob_last_25": 0.08701483840122819, "mean_pred_prob_last_50": 0.05339512303471565, "mean_token_accuracy": 0.8739298999309539, "step": 32350 }, { "epoch": 0.5752582084511049, "grad_norm": 1.7921114397802806, "learning_rate": 0.0001, "loss": 0.75, "mean_abs_error": 328.851931892284, "mean_abs_error_last_10": 75.86015794197792, "mean_abs_error_last_25": 91.36216901174416, "mean_abs_error_last_50": 159.90623532397748, "mean_pred_prob": 0.043864273373037575, "mean_pred_prob_last_10": 0.21531049571931363, "mean_pred_prob_last_25": 0.11810519369319081, "mean_pred_prob_last_50": 0.07354225260205567, "mean_token_accuracy": 0.8677087426185608, "step": 32360 }, { "epoch": 0.5754359767479068, "grad_norm": 2.4539812069758047, "learning_rate": 0.0001, "loss": 0.8878, "mean_abs_error": 109.47230389856831, "mean_abs_error_last_10": 19.81465275675196, "mean_abs_error_last_25": 36.87549400422259, "mean_abs_error_last_50": 53.84365193769729, "mean_pred_prob": 0.046049682423472404, "mean_pred_prob_last_10": 0.22518010511994363, "mean_pred_prob_last_25": 0.12624325528740882, "mean_pred_prob_last_50": 0.07746491450816392, "mean_token_accuracy": 0.8670489728450775, "step": 32370 }, { "epoch": 0.5756137450447087, "grad_norm": 2.258224789520014, "learning_rate": 0.0001, "loss": 0.7667, "mean_abs_error": 1079.457771609697, "mean_abs_error_last_10": 605.6918776822698, "mean_abs_error_last_25": 693.8624238595257, "mean_abs_error_last_50": 781.8825080365517, "mean_pred_prob": 0.03696888835693244, "mean_pred_prob_last_10": 0.19528768721793313, "mean_pred_prob_last_25": 0.09906604154675733, "mean_pred_prob_last_50": 0.06150120050442638, "mean_token_accuracy": 0.8699365973472595, "step": 32380 }, { "epoch": 0.5757915133415107, "grad_norm": 1.1349164008699724, "learning_rate": 0.0001, "loss": 0.7341, "mean_abs_error": 320.01612163499266, "mean_abs_error_last_10": 47.882122196084644, "mean_abs_error_last_25": 85.94599140578927, "mean_abs_error_last_50": 149.66248069446794, "mean_pred_prob": 0.029289142275229097, "mean_pred_prob_last_10": 0.1569737784564495, "mean_pred_prob_last_25": 0.08394081369042397, "mean_pred_prob_last_50": 0.050833409931510684, "mean_token_accuracy": 0.8753091275691987, "step": 32390 }, { "epoch": 0.5759692816383126, "grad_norm": 2.1805269405693055, "learning_rate": 0.0001, "loss": 0.8621, "mean_abs_error": 748.6221231542529, "mean_abs_error_last_10": 182.124324188599, "mean_abs_error_last_25": 224.0568435145768, "mean_abs_error_last_50": 390.33388897757317, "mean_pred_prob": 0.032836034125648436, "mean_pred_prob_last_10": 0.15720370293129235, "mean_pred_prob_last_25": 0.0870464376406744, "mean_pred_prob_last_50": 0.054767396813258526, "mean_token_accuracy": 0.8724441468715668, "step": 32400 }, { "epoch": 0.5761470499351146, "grad_norm": 2.1957744305009324, "learning_rate": 0.0001, "loss": 0.7538, "mean_abs_error": 669.1385689468063, "mean_abs_error_last_10": 102.96018895243779, "mean_abs_error_last_25": 160.53641855806188, "mean_abs_error_last_50": 327.031654012438, "mean_pred_prob": 0.040889936656458306, "mean_pred_prob_last_10": 0.1906848235987127, "mean_pred_prob_last_25": 0.10967586964834482, "mean_pred_prob_last_50": 0.06725668205181137, "mean_token_accuracy": 0.8756264746189117, "step": 32410 }, { "epoch": 0.5763248182319165, "grad_norm": 1.6962537542676661, "learning_rate": 0.0001, "loss": 0.7146, "mean_abs_error": 205.18116175750538, "mean_abs_error_last_10": 106.32210593039736, "mean_abs_error_last_25": 170.52079910471357, "mean_abs_error_last_50": 205.75756765838477, "mean_pred_prob": 0.05054769734852016, "mean_pred_prob_last_10": 0.25140111669898035, "mean_pred_prob_last_25": 0.13951295260339974, "mean_pred_prob_last_50": 0.08360023610293865, "mean_token_accuracy": 0.8799666225910187, "step": 32420 }, { "epoch": 0.5765025865287184, "grad_norm": 1.404829989685402, "learning_rate": 0.0001, "loss": 0.7683, "mean_abs_error": 203.44067490264885, "mean_abs_error_last_10": 38.74272254394181, "mean_abs_error_last_25": 68.38831754103289, "mean_abs_error_last_50": 100.9718718594493, "mean_pred_prob": 0.04689850439317524, "mean_pred_prob_last_10": 0.2160424405708909, "mean_pred_prob_last_25": 0.1208118723705411, "mean_pred_prob_last_50": 0.07679009698331356, "mean_token_accuracy": 0.8678113341331481, "step": 32430 }, { "epoch": 0.5766803548255204, "grad_norm": 1.1112346034670502, "learning_rate": 0.0001, "loss": 0.801, "mean_abs_error": 815.6692509596216, "mean_abs_error_last_10": 455.1890759560641, "mean_abs_error_last_25": 552.4643975093275, "mean_abs_error_last_50": 648.3750655967638, "mean_pred_prob": 0.026766306353965773, "mean_pred_prob_last_10": 0.15034601453808136, "mean_pred_prob_last_25": 0.07732670339173638, "mean_pred_prob_last_50": 0.0456965001008939, "mean_token_accuracy": 0.8678021013736725, "step": 32440 }, { "epoch": 0.5768581231223223, "grad_norm": 1.2920894937825331, "learning_rate": 0.0001, "loss": 0.817, "mean_abs_error": 315.22978354306315, "mean_abs_error_last_10": 131.6610654514372, "mean_abs_error_last_25": 144.4877932710046, "mean_abs_error_last_50": 214.80493145680174, "mean_pred_prob": 0.03201640066690743, "mean_pred_prob_last_10": 0.14480037856847047, "mean_pred_prob_last_25": 0.08076715301722288, "mean_pred_prob_last_50": 0.05151336416602135, "mean_token_accuracy": 0.8700067400932312, "step": 32450 }, { "epoch": 0.5770358914191243, "grad_norm": 2.2948902953707306, "learning_rate": 0.0001, "loss": 0.7254, "mean_abs_error": 389.1741520692478, "mean_abs_error_last_10": 78.74714797602078, "mean_abs_error_last_25": 118.05633700957178, "mean_abs_error_last_50": 183.3229107250691, "mean_pred_prob": 0.03415730488486588, "mean_pred_prob_last_10": 0.18640080019831656, "mean_pred_prob_last_25": 0.09781040381640196, "mean_pred_prob_last_50": 0.05827660216018558, "mean_token_accuracy": 0.8747810840606689, "step": 32460 }, { "epoch": 0.5772136597159263, "grad_norm": 2.5652264226956016, "learning_rate": 0.0001, "loss": 0.8434, "mean_abs_error": 573.7087925145054, "mean_abs_error_last_10": 149.954622811215, "mean_abs_error_last_25": 197.36106940529174, "mean_abs_error_last_50": 323.02402889885434, "mean_pred_prob": 0.052864330308511856, "mean_pred_prob_last_10": 0.2644932806491852, "mean_pred_prob_last_25": 0.14881854439154268, "mean_pred_prob_last_50": 0.08932785061188042, "mean_token_accuracy": 0.8622282564640045, "step": 32470 }, { "epoch": 0.5773914280127282, "grad_norm": 1.0581008770963465, "learning_rate": 0.0001, "loss": 0.689, "mean_abs_error": 338.3297492205001, "mean_abs_error_last_10": 60.21999503125822, "mean_abs_error_last_25": 176.96952860768474, "mean_abs_error_last_50": 250.63910040424753, "mean_pred_prob": 0.035012933006510136, "mean_pred_prob_last_10": 0.1714587274938822, "mean_pred_prob_last_25": 0.0956659410148859, "mean_pred_prob_last_50": 0.058646219130605456, "mean_token_accuracy": 0.8773933231830597, "step": 32480 }, { "epoch": 0.5775691963095302, "grad_norm": 6.699590920860011, "learning_rate": 0.0001, "loss": 0.8589, "mean_abs_error": 478.86219633290955, "mean_abs_error_last_10": 308.5494534852636, "mean_abs_error_last_25": 398.9305943275721, "mean_abs_error_last_50": 470.96697342882135, "mean_pred_prob": 0.03026811722666025, "mean_pred_prob_last_10": 0.15510054342448712, "mean_pred_prob_last_25": 0.08497430561110378, "mean_pred_prob_last_50": 0.05112959835678339, "mean_token_accuracy": 0.8708164751529693, "step": 32490 }, { "epoch": 0.5777469646063321, "grad_norm": 1.1630703969522078, "learning_rate": 0.0001, "loss": 0.6516, "mean_abs_error": 121.1271240672094, "mean_abs_error_last_10": 21.499325661936574, "mean_abs_error_last_25": 43.85767579814732, "mean_abs_error_last_50": 76.19499679167829, "mean_pred_prob": 0.04955694694072008, "mean_pred_prob_last_10": 0.23104971051216125, "mean_pred_prob_last_25": 0.13213243298232555, "mean_pred_prob_last_50": 0.08260337933897972, "mean_token_accuracy": 0.8774383783340454, "step": 32500 }, { "epoch": 0.5779247329031341, "grad_norm": 2.2586309241423352, "learning_rate": 0.0001, "loss": 0.8252, "mean_abs_error": 415.87931133381664, "mean_abs_error_last_10": 106.78240816998559, "mean_abs_error_last_25": 218.75645040058276, "mean_abs_error_last_50": 279.686305801884, "mean_pred_prob": 0.04177560955286026, "mean_pred_prob_last_10": 0.21028472818434238, "mean_pred_prob_last_25": 0.11633045459166169, "mean_pred_prob_last_50": 0.07118076598271728, "mean_token_accuracy": 0.8743378698825837, "step": 32510 }, { "epoch": 0.578102501199936, "grad_norm": 1.1251124298674184, "learning_rate": 0.0001, "loss": 0.7581, "mean_abs_error": 502.3480903432334, "mean_abs_error_last_10": 227.94748140793664, "mean_abs_error_last_25": 217.3047542999161, "mean_abs_error_last_50": 297.05282594875285, "mean_pred_prob": 0.023062393168220296, "mean_pred_prob_last_10": 0.11142110743094236, "mean_pred_prob_last_25": 0.06249432455515489, "mean_pred_prob_last_50": 0.03811205936362967, "mean_token_accuracy": 0.879909211397171, "step": 32520 }, { "epoch": 0.5782802694967379, "grad_norm": 1.212106679803754, "learning_rate": 0.0001, "loss": 0.6896, "mean_abs_error": 721.2889652635404, "mean_abs_error_last_10": 186.66224063384345, "mean_abs_error_last_25": 311.33177456292964, "mean_abs_error_last_50": 417.4860113980244, "mean_pred_prob": 0.041045426344498995, "mean_pred_prob_last_10": 0.19344959611771628, "mean_pred_prob_last_25": 0.10993690271861851, "mean_pred_prob_last_50": 0.06863390660146251, "mean_token_accuracy": 0.8680875539779663, "step": 32530 }, { "epoch": 0.5784580377935399, "grad_norm": 1.2095299297431066, "learning_rate": 0.0001, "loss": 0.6355, "mean_abs_error": 242.37005761650752, "mean_abs_error_last_10": 80.02217942900229, "mean_abs_error_last_25": 195.43349249072818, "mean_abs_error_last_50": 220.8810045125696, "mean_pred_prob": 0.04268882656469941, "mean_pred_prob_last_10": 0.201231437176466, "mean_pred_prob_last_25": 0.11370212715119124, "mean_pred_prob_last_50": 0.07071542935445904, "mean_token_accuracy": 0.8703024506568908, "step": 32540 }, { "epoch": 0.5786358060903418, "grad_norm": 1.1074351758175989, "learning_rate": 0.0001, "loss": 0.8135, "mean_abs_error": 569.4971175141088, "mean_abs_error_last_10": 164.08340799667974, "mean_abs_error_last_25": 216.4762467183031, "mean_abs_error_last_50": 347.0579841058939, "mean_pred_prob": 0.025927395501639694, "mean_pred_prob_last_10": 0.13731553298421204, "mean_pred_prob_last_25": 0.07032298070844263, "mean_pred_prob_last_50": 0.0422677859198302, "mean_token_accuracy": 0.8699152648448945, "step": 32550 }, { "epoch": 0.5788135743871438, "grad_norm": 1.343673689563699, "learning_rate": 0.0001, "loss": 0.7172, "mean_abs_error": 226.63470792785998, "mean_abs_error_last_10": 66.2059600886044, "mean_abs_error_last_25": 78.09511837710842, "mean_abs_error_last_50": 133.79219196575426, "mean_pred_prob": 0.0487994909286499, "mean_pred_prob_last_10": 0.24945489317178726, "mean_pred_prob_last_25": 0.14028361570090056, "mean_pred_prob_last_50": 0.08345595365390182, "mean_token_accuracy": 0.8726655960083007, "step": 32560 }, { "epoch": 0.5789913426839457, "grad_norm": 1.174251256247107, "learning_rate": 0.0001, "loss": 0.7989, "mean_abs_error": 377.85661717950086, "mean_abs_error_last_10": 178.14178629316393, "mean_abs_error_last_25": 197.09163036020647, "mean_abs_error_last_50": 259.40554673114957, "mean_pred_prob": 0.02803899922873825, "mean_pred_prob_last_10": 0.11782133746892214, "mean_pred_prob_last_25": 0.06842135181650519, "mean_pred_prob_last_50": 0.044089723564684394, "mean_token_accuracy": 0.8713900089263916, "step": 32570 }, { "epoch": 0.5791691109807477, "grad_norm": 2.1428189346191417, "learning_rate": 0.0001, "loss": 0.6871, "mean_abs_error": 512.7722429649396, "mean_abs_error_last_10": 99.78556664921501, "mean_abs_error_last_25": 137.86996553741724, "mean_abs_error_last_50": 251.08441346022255, "mean_pred_prob": 0.047036439541261646, "mean_pred_prob_last_10": 0.221328906272538, "mean_pred_prob_last_25": 0.12749010492116214, "mean_pred_prob_last_50": 0.07821259989868849, "mean_token_accuracy": 0.872039121389389, "step": 32580 }, { "epoch": 0.5793468792775497, "grad_norm": 1.8907983895893732, "learning_rate": 0.0001, "loss": 0.6397, "mean_abs_error": 1071.5170175092849, "mean_abs_error_last_10": 706.2110086558857, "mean_abs_error_last_25": 756.8223206757983, "mean_abs_error_last_50": 830.4280948612983, "mean_pred_prob": 0.039726307257660666, "mean_pred_prob_last_10": 0.20119998999580274, "mean_pred_prob_last_25": 0.10800405812624377, "mean_pred_prob_last_50": 0.0667355734622106, "mean_token_accuracy": 0.8812907338142395, "step": 32590 }, { "epoch": 0.5795246475743516, "grad_norm": 1.053096127328198, "learning_rate": 0.0001, "loss": 0.7517, "mean_abs_error": 518.0285956906614, "mean_abs_error_last_10": 213.2833929124218, "mean_abs_error_last_25": 286.862025995831, "mean_abs_error_last_50": 416.7891989762106, "mean_pred_prob": 0.041197653859853745, "mean_pred_prob_last_10": 0.20765558443963528, "mean_pred_prob_last_25": 0.11311855395324528, "mean_pred_prob_last_50": 0.0700962461065501, "mean_token_accuracy": 0.8852773785591126, "step": 32600 }, { "epoch": 0.5797024158711536, "grad_norm": 1.338851585166513, "learning_rate": 0.0001, "loss": 0.8908, "mean_abs_error": 823.990340001373, "mean_abs_error_last_10": 378.9530571291698, "mean_abs_error_last_25": 408.0048956287453, "mean_abs_error_last_50": 566.6887093876611, "mean_pred_prob": 0.03131756684742868, "mean_pred_prob_last_10": 0.1581350634631235, "mean_pred_prob_last_25": 0.0940521544776857, "mean_pred_prob_last_50": 0.054999832157045604, "mean_token_accuracy": 0.8805502891540528, "step": 32610 }, { "epoch": 0.5798801841679555, "grad_norm": 0.7853383384219752, "learning_rate": 0.0001, "loss": 0.7293, "mean_abs_error": 866.3342113162473, "mean_abs_error_last_10": 337.40119421838125, "mean_abs_error_last_25": 398.4255541562606, "mean_abs_error_last_50": 507.4373039877244, "mean_pred_prob": 0.02194465449429117, "mean_pred_prob_last_10": 0.11295199698070064, "mean_pred_prob_last_25": 0.061599370068870486, "mean_pred_prob_last_50": 0.037444257835159075, "mean_token_accuracy": 0.8707336068153382, "step": 32620 }, { "epoch": 0.5800579524647574, "grad_norm": 1.1752666669727083, "learning_rate": 0.0001, "loss": 0.8231, "mean_abs_error": 683.1482694643506, "mean_abs_error_last_10": 389.7507874751276, "mean_abs_error_last_25": 590.2891014785766, "mean_abs_error_last_50": 622.4362923422921, "mean_pred_prob": 0.04284507696283981, "mean_pred_prob_last_10": 0.14853499559685587, "mean_pred_prob_last_25": 0.09916995980311186, "mean_pred_prob_last_50": 0.06692665315931663, "mean_token_accuracy": 0.8604839980602265, "step": 32630 }, { "epoch": 0.5802357207615594, "grad_norm": 0.9201944155217795, "learning_rate": 0.0001, "loss": 0.8964, "mean_abs_error": 252.16689498031255, "mean_abs_error_last_10": 104.53249643862998, "mean_abs_error_last_25": 118.28184444800995, "mean_abs_error_last_50": 156.46849982814248, "mean_pred_prob": 0.03438805933110416, "mean_pred_prob_last_10": 0.16186277908273042, "mean_pred_prob_last_25": 0.0896898495964706, "mean_pred_prob_last_50": 0.05680948151275515, "mean_token_accuracy": 0.8837296962738037, "step": 32640 }, { "epoch": 0.5804134890583613, "grad_norm": 1.0045470175451032, "learning_rate": 0.0001, "loss": 0.6907, "mean_abs_error": 331.0713750507758, "mean_abs_error_last_10": 47.34187405020448, "mean_abs_error_last_25": 94.19068783837689, "mean_abs_error_last_50": 197.59733171498326, "mean_pred_prob": 0.048532280046492814, "mean_pred_prob_last_10": 0.2248387224972248, "mean_pred_prob_last_25": 0.13145672492682933, "mean_pred_prob_last_50": 0.08188399970531464, "mean_token_accuracy": 0.8823596000671386, "step": 32650 }, { "epoch": 0.5805912573551633, "grad_norm": 2.197283335241599, "learning_rate": 0.0001, "loss": 0.7703, "mean_abs_error": 445.0303908185759, "mean_abs_error_last_10": 134.41143849049791, "mean_abs_error_last_25": 153.10818440348493, "mean_abs_error_last_50": 240.51805877612628, "mean_pred_prob": 0.05537722717272118, "mean_pred_prob_last_10": 0.23410584628582, "mean_pred_prob_last_25": 0.14013548709917814, "mean_pred_prob_last_50": 0.09006883178371936, "mean_token_accuracy": 0.8696851909160614, "step": 32660 }, { "epoch": 0.5807690256519652, "grad_norm": 1.4073135025758678, "learning_rate": 0.0001, "loss": 0.7707, "mean_abs_error": 754.961860849872, "mean_abs_error_last_10": 237.81685523164097, "mean_abs_error_last_25": 365.9052206329273, "mean_abs_error_last_50": 451.6501857695606, "mean_pred_prob": 0.030970882705878465, "mean_pred_prob_last_10": 0.15211121658794582, "mean_pred_prob_last_25": 0.08537227886263281, "mean_pred_prob_last_50": 0.05235540634021163, "mean_token_accuracy": 0.8700811207294464, "step": 32670 }, { "epoch": 0.5809467939487671, "grad_norm": 1.2709138726292608, "learning_rate": 0.0001, "loss": 0.7432, "mean_abs_error": 363.50170234267483, "mean_abs_error_last_10": 157.67045004108581, "mean_abs_error_last_25": 165.144955244985, "mean_abs_error_last_50": 240.96309480361566, "mean_pred_prob": 0.03125089155510068, "mean_pred_prob_last_10": 0.17002116851508617, "mean_pred_prob_last_25": 0.09052750701084733, "mean_pred_prob_last_50": 0.05347162252292037, "mean_token_accuracy": 0.8765999972820282, "step": 32680 }, { "epoch": 0.5811245622455691, "grad_norm": 1.3591032143811796, "learning_rate": 0.0001, "loss": 0.8032, "mean_abs_error": 261.86178239084427, "mean_abs_error_last_10": 105.15656123535709, "mean_abs_error_last_25": 113.98424115238727, "mean_abs_error_last_50": 148.82666168911558, "mean_pred_prob": 0.049423388484865424, "mean_pred_prob_last_10": 0.23357066251337527, "mean_pred_prob_last_25": 0.1340870765969157, "mean_pred_prob_last_50": 0.08213969264179469, "mean_token_accuracy": 0.8710578560829163, "step": 32690 }, { "epoch": 0.5813023305423711, "grad_norm": 1.6778114338311712, "learning_rate": 0.0001, "loss": 0.7425, "mean_abs_error": 291.6708773587685, "mean_abs_error_last_10": 46.80924839540131, "mean_abs_error_last_25": 114.5883007254857, "mean_abs_error_last_50": 200.42184104289882, "mean_pred_prob": 0.051794079644605515, "mean_pred_prob_last_10": 0.24801443964242936, "mean_pred_prob_last_25": 0.14113677404820918, "mean_pred_prob_last_50": 0.0871674282476306, "mean_token_accuracy": 0.8783189296722412, "step": 32700 }, { "epoch": 0.5814800988391731, "grad_norm": 2.829255398992258, "learning_rate": 0.0001, "loss": 0.7957, "mean_abs_error": 608.9042605557363, "mean_abs_error_last_10": 228.2516853495606, "mean_abs_error_last_25": 253.41747705445852, "mean_abs_error_last_50": 407.70304653724384, "mean_pred_prob": 0.027688172284979374, "mean_pred_prob_last_10": 0.14292845684103667, "mean_pred_prob_last_25": 0.07829314949922264, "mean_pred_prob_last_50": 0.04661029876442626, "mean_token_accuracy": 0.8715745687484742, "step": 32710 }, { "epoch": 0.581657867135975, "grad_norm": 1.2173343688606706, "learning_rate": 0.0001, "loss": 0.7848, "mean_abs_error": 752.859297915941, "mean_abs_error_last_10": 230.33185649859337, "mean_abs_error_last_25": 239.67745587715132, "mean_abs_error_last_50": 316.66345339806867, "mean_pred_prob": 0.03153857035795227, "mean_pred_prob_last_10": 0.15870631218422204, "mean_pred_prob_last_25": 0.08809321320150047, "mean_pred_prob_last_50": 0.0527818996924907, "mean_token_accuracy": 0.8746402740478516, "step": 32720 }, { "epoch": 0.5818356354327769, "grad_norm": 1.5521957906435733, "learning_rate": 0.0001, "loss": 0.7375, "mean_abs_error": 1245.0541620375884, "mean_abs_error_last_10": 656.3736927983543, "mean_abs_error_last_25": 682.5417226517093, "mean_abs_error_last_50": 891.9422569913638, "mean_pred_prob": 0.03229015361284837, "mean_pred_prob_last_10": 0.13682849451433868, "mean_pred_prob_last_25": 0.08222705589723774, "mean_pred_prob_last_50": 0.052391899976646526, "mean_token_accuracy": 0.8746040761470795, "step": 32730 }, { "epoch": 0.5820134037295789, "grad_norm": 1.8554687378597252, "learning_rate": 0.0001, "loss": 0.7244, "mean_abs_error": 1543.5843532952133, "mean_abs_error_last_10": 814.6912992999142, "mean_abs_error_last_25": 940.9389494075203, "mean_abs_error_last_50": 1126.3401756159851, "mean_pred_prob": 0.016397572905407286, "mean_pred_prob_last_10": 0.08198959443107015, "mean_pred_prob_last_25": 0.04391977817867883, "mean_pred_prob_last_50": 0.026908071149227906, "mean_token_accuracy": 0.8751017987728119, "step": 32740 }, { "epoch": 0.5821911720263808, "grad_norm": 1.3531692173197434, "learning_rate": 0.0001, "loss": 0.7604, "mean_abs_error": 681.5322071033554, "mean_abs_error_last_10": 221.50834691122108, "mean_abs_error_last_25": 344.24911856627244, "mean_abs_error_last_50": 425.3370797244708, "mean_pred_prob": 0.03767017551581375, "mean_pred_prob_last_10": 0.17376810654532165, "mean_pred_prob_last_25": 0.09616748683620244, "mean_pred_prob_last_50": 0.060944751149509105, "mean_token_accuracy": 0.8742580711841583, "step": 32750 }, { "epoch": 0.5823689403231828, "grad_norm": 1.5300817482585847, "learning_rate": 0.0001, "loss": 0.7706, "mean_abs_error": 460.9500834357612, "mean_abs_error_last_10": 144.96575161950986, "mean_abs_error_last_25": 188.2983589056655, "mean_abs_error_last_50": 251.6428080838582, "mean_pred_prob": 0.03416307574952952, "mean_pred_prob_last_10": 0.16887309852172622, "mean_pred_prob_last_25": 0.09315872311708517, "mean_pred_prob_last_50": 0.05801042959210463, "mean_token_accuracy": 0.8729477763175965, "step": 32760 }, { "epoch": 0.5825467086199847, "grad_norm": 1.3076752684433388, "learning_rate": 0.0001, "loss": 0.6548, "mean_abs_error": 525.27762041446, "mean_abs_error_last_10": 152.03380467727408, "mean_abs_error_last_25": 192.3480368606739, "mean_abs_error_last_50": 302.0627475358768, "mean_pred_prob": 0.044490744709037244, "mean_pred_prob_last_10": 0.20886020730249583, "mean_pred_prob_last_25": 0.12013320135883987, "mean_pred_prob_last_50": 0.07418019277974963, "mean_token_accuracy": 0.8754137396812439, "step": 32770 }, { "epoch": 0.5827244769167866, "grad_norm": 1.0309241151690225, "learning_rate": 0.0001, "loss": 0.6907, "mean_abs_error": 344.8974621801573, "mean_abs_error_last_10": 183.96459538673838, "mean_abs_error_last_25": 174.25159254994745, "mean_abs_error_last_50": 206.689114964301, "mean_pred_prob": 0.04389750764239579, "mean_pred_prob_last_10": 0.20312674716114998, "mean_pred_prob_last_25": 0.11249471958726645, "mean_pred_prob_last_50": 0.07138009630143642, "mean_token_accuracy": 0.8777077615261077, "step": 32780 }, { "epoch": 0.5829022452135886, "grad_norm": 1.4347494230332007, "learning_rate": 0.0001, "loss": 0.7353, "mean_abs_error": 865.659193892869, "mean_abs_error_last_10": 434.37670327291073, "mean_abs_error_last_25": 555.1759533266538, "mean_abs_error_last_50": 675.1903451802465, "mean_pred_prob": 0.049559581570792946, "mean_pred_prob_last_10": 0.22005470324656926, "mean_pred_prob_last_25": 0.12494293377967551, "mean_pred_prob_last_50": 0.0794047230418073, "mean_token_accuracy": 0.8767613232135772, "step": 32790 }, { "epoch": 0.5830800135103905, "grad_norm": 1.0982286038654328, "learning_rate": 0.0001, "loss": 0.7053, "mean_abs_error": 911.4382538614205, "mean_abs_error_last_10": 418.178296927875, "mean_abs_error_last_25": 480.0070548321781, "mean_abs_error_last_50": 641.767806207174, "mean_pred_prob": 0.0484956481930567, "mean_pred_prob_last_10": 0.24684725332190283, "mean_pred_prob_last_25": 0.13548907372460234, "mean_pred_prob_last_50": 0.08224190135952085, "mean_token_accuracy": 0.8818948924541473, "step": 32800 }, { "epoch": 0.5832577818071925, "grad_norm": 1.9688299254120134, "learning_rate": 0.0001, "loss": 0.6772, "mean_abs_error": 333.00220046113964, "mean_abs_error_last_10": 66.70876154318731, "mean_abs_error_last_25": 114.25746616641842, "mean_abs_error_last_50": 183.10012731291778, "mean_pred_prob": 0.04139907585922629, "mean_pred_prob_last_10": 0.19743588902056217, "mean_pred_prob_last_25": 0.1098217005841434, "mean_pred_prob_last_50": 0.06878628595732153, "mean_token_accuracy": 0.8781088709831237, "step": 32810 }, { "epoch": 0.5834355501039945, "grad_norm": 2.1316071947692548, "learning_rate": 0.0001, "loss": 0.8608, "mean_abs_error": 473.4356393968941, "mean_abs_error_last_10": 97.1346192427959, "mean_abs_error_last_25": 194.13719786367759, "mean_abs_error_last_50": 299.6182129910146, "mean_pred_prob": 0.032397340930765496, "mean_pred_prob_last_10": 0.16778280425351114, "mean_pred_prob_last_25": 0.0872294555301778, "mean_pred_prob_last_50": 0.053799500106833875, "mean_token_accuracy": 0.8823185086250305, "step": 32820 }, { "epoch": 0.5836133184007964, "grad_norm": 1.041169065417923, "learning_rate": 0.0001, "loss": 0.652, "mean_abs_error": 269.6957030229743, "mean_abs_error_last_10": 50.76782339765606, "mean_abs_error_last_25": 81.81622803436379, "mean_abs_error_last_50": 136.9633195385948, "mean_pred_prob": 0.05850203214213252, "mean_pred_prob_last_10": 0.24864823780953885, "mean_pred_prob_last_25": 0.15170771796256305, "mean_pred_prob_last_50": 0.09655577214434743, "mean_token_accuracy": 0.8762270689010621, "step": 32830 }, { "epoch": 0.5837910866975984, "grad_norm": 1.9595361666139188, "learning_rate": 0.0001, "loss": 0.7413, "mean_abs_error": 474.35798212179486, "mean_abs_error_last_10": 189.4863654223216, "mean_abs_error_last_25": 225.1726967436585, "mean_abs_error_last_50": 315.6742527896815, "mean_pred_prob": 0.048349210747983304, "mean_pred_prob_last_10": 0.20622565948870034, "mean_pred_prob_last_25": 0.12299898207420483, "mean_pred_prob_last_50": 0.07769198582973331, "mean_token_accuracy": 0.8710676074028015, "step": 32840 }, { "epoch": 0.5839688549944003, "grad_norm": 2.3038889911678515, "learning_rate": 0.0001, "loss": 0.8382, "mean_abs_error": 1394.8889551906955, "mean_abs_error_last_10": 545.4452808833064, "mean_abs_error_last_25": 667.4687196660766, "mean_abs_error_last_50": 934.7262763307087, "mean_pred_prob": 0.01654494469112251, "mean_pred_prob_last_10": 0.09525059092557057, "mean_pred_prob_last_25": 0.04848311780951917, "mean_pred_prob_last_50": 0.028280270815594123, "mean_token_accuracy": 0.8625456035137177, "step": 32850 }, { "epoch": 0.5841466232912023, "grad_norm": 1.3713401931562765, "learning_rate": 0.0001, "loss": 0.7357, "mean_abs_error": 419.52956801880725, "mean_abs_error_last_10": 198.6307770088636, "mean_abs_error_last_25": 269.6652898788269, "mean_abs_error_last_50": 329.57742340923534, "mean_pred_prob": 0.03548568040132523, "mean_pred_prob_last_10": 0.1582203410565853, "mean_pred_prob_last_25": 0.09210832361131907, "mean_pred_prob_last_50": 0.05791140855289996, "mean_token_accuracy": 0.8746548593044281, "step": 32860 }, { "epoch": 0.5843243915880042, "grad_norm": 1.9847222209281004, "learning_rate": 0.0001, "loss": 0.7413, "mean_abs_error": 289.6628643230765, "mean_abs_error_last_10": 80.42159301032038, "mean_abs_error_last_25": 124.40196537290319, "mean_abs_error_last_50": 190.49843550173273, "mean_pred_prob": 0.03688349793665111, "mean_pred_prob_last_10": 0.1814730640500784, "mean_pred_prob_last_25": 0.10242272559553385, "mean_pred_prob_last_50": 0.0630139303393662, "mean_token_accuracy": 0.8760977447032928, "step": 32870 }, { "epoch": 0.5845021598848061, "grad_norm": 1.4706688125852507, "learning_rate": 0.0001, "loss": 0.7321, "mean_abs_error": 440.07067495508545, "mean_abs_error_last_10": 280.4866128683163, "mean_abs_error_last_25": 248.6492732337349, "mean_abs_error_last_50": 296.9113189427495, "mean_pred_prob": 0.03553031905321404, "mean_pred_prob_last_10": 0.15758060358930379, "mean_pred_prob_last_25": 0.09511926379054784, "mean_pred_prob_last_50": 0.05827048536157235, "mean_token_accuracy": 0.8683795988559723, "step": 32880 }, { "epoch": 0.5846799281816081, "grad_norm": 1.2309070232465282, "learning_rate": 0.0001, "loss": 0.7715, "mean_abs_error": 489.4878638336959, "mean_abs_error_last_10": 136.20603138183816, "mean_abs_error_last_25": 150.88639606867557, "mean_abs_error_last_50": 255.40966518114328, "mean_pred_prob": 0.0342346916673705, "mean_pred_prob_last_10": 0.17931871432811022, "mean_pred_prob_last_25": 0.09713497515767813, "mean_pred_prob_last_50": 0.057349010417237875, "mean_token_accuracy": 0.8616825461387634, "step": 32890 }, { "epoch": 0.58485769647841, "grad_norm": 1.2131478273421346, "learning_rate": 0.0001, "loss": 0.7108, "mean_abs_error": 1085.832076239177, "mean_abs_error_last_10": 474.3657457946497, "mean_abs_error_last_25": 650.928256781607, "mean_abs_error_last_50": 823.7779966578506, "mean_pred_prob": 0.01851432286784984, "mean_pred_prob_last_10": 0.09749395947437733, "mean_pred_prob_last_25": 0.05183832304319367, "mean_pred_prob_last_50": 0.031239518988877536, "mean_token_accuracy": 0.8705288767814636, "step": 32900 }, { "epoch": 0.585035464775212, "grad_norm": 0.961730297226656, "learning_rate": 0.0001, "loss": 0.679, "mean_abs_error": 279.8526905820461, "mean_abs_error_last_10": 145.52267934930887, "mean_abs_error_last_25": 166.2762474581003, "mean_abs_error_last_50": 179.7036652382099, "mean_pred_prob": 0.046709376608487216, "mean_pred_prob_last_10": 0.22568739096168428, "mean_pred_prob_last_25": 0.12667710618115963, "mean_pred_prob_last_50": 0.07870317470515147, "mean_token_accuracy": 0.8726699352264404, "step": 32910 }, { "epoch": 0.5852132330720139, "grad_norm": 1.2898381840537116, "learning_rate": 0.0001, "loss": 0.7856, "mean_abs_error": 593.8050232500925, "mean_abs_error_last_10": 337.138567893959, "mean_abs_error_last_25": 511.5914782531283, "mean_abs_error_last_50": 587.3608854368092, "mean_pred_prob": 0.038602099753916264, "mean_pred_prob_last_10": 0.19461254645138978, "mean_pred_prob_last_25": 0.10726356320083141, "mean_pred_prob_last_50": 0.06451817024499178, "mean_token_accuracy": 0.8775243699550629, "step": 32920 }, { "epoch": 0.5853910013688158, "grad_norm": 1.179949220649669, "learning_rate": 0.0001, "loss": 0.9031, "mean_abs_error": 411.64013827830803, "mean_abs_error_last_10": 108.91162982593009, "mean_abs_error_last_25": 153.5821619099181, "mean_abs_error_last_50": 227.11570074755724, "mean_pred_prob": 0.02733544467482716, "mean_pred_prob_last_10": 0.14609125945717097, "mean_pred_prob_last_25": 0.07709458963945508, "mean_pred_prob_last_50": 0.04609441664069891, "mean_token_accuracy": 0.8633720934391022, "step": 32930 }, { "epoch": 0.5855687696656179, "grad_norm": 0.9608462811536589, "learning_rate": 0.0001, "loss": 0.6129, "mean_abs_error": 351.8252674845704, "mean_abs_error_last_10": 105.36186153264525, "mean_abs_error_last_25": 121.53597750958404, "mean_abs_error_last_50": 181.10284083363237, "mean_pred_prob": 0.045257973100524394, "mean_pred_prob_last_10": 0.22600736578460784, "mean_pred_prob_last_25": 0.1261824483051896, "mean_pred_prob_last_50": 0.07664109460311011, "mean_token_accuracy": 0.8835758686065673, "step": 32940 }, { "epoch": 0.5857465379624198, "grad_norm": 2.8576218426823248, "learning_rate": 0.0001, "loss": 0.7354, "mean_abs_error": 767.4556828208252, "mean_abs_error_last_10": 263.0994132641006, "mean_abs_error_last_25": 309.84224383275216, "mean_abs_error_last_50": 434.8186291480009, "mean_pred_prob": 0.026380866265390067, "mean_pred_prob_last_10": 0.13716110415989533, "mean_pred_prob_last_25": 0.07472003763541579, "mean_pred_prob_last_50": 0.04505782873602584, "mean_token_accuracy": 0.8709759414196014, "step": 32950 }, { "epoch": 0.5859243062592218, "grad_norm": 1.6774148749699214, "learning_rate": 0.0001, "loss": 0.914, "mean_abs_error": 987.3142446578437, "mean_abs_error_last_10": 644.3100569463186, "mean_abs_error_last_25": 723.4373548769843, "mean_abs_error_last_50": 812.2282350825246, "mean_pred_prob": 0.031187461808440276, "mean_pred_prob_last_10": 0.14297953249479178, "mean_pred_prob_last_25": 0.07904110960080288, "mean_pred_prob_last_50": 0.05045027920568827, "mean_token_accuracy": 0.8698287785053254, "step": 32960 }, { "epoch": 0.5861020745560237, "grad_norm": 1.1651724827913608, "learning_rate": 0.0001, "loss": 0.6765, "mean_abs_error": 304.1918515707875, "mean_abs_error_last_10": 67.92240915463975, "mean_abs_error_last_25": 111.57424534918918, "mean_abs_error_last_50": 205.83991175826264, "mean_pred_prob": 0.031121886102482677, "mean_pred_prob_last_10": 0.15452194027602673, "mean_pred_prob_last_25": 0.08489857148379087, "mean_pred_prob_last_50": 0.05193145172670484, "mean_token_accuracy": 0.8777925789356231, "step": 32970 }, { "epoch": 0.5862798428528256, "grad_norm": 1.784203786626913, "learning_rate": 0.0001, "loss": 0.7491, "mean_abs_error": 153.62129058643112, "mean_abs_error_last_10": 83.98721658383458, "mean_abs_error_last_25": 95.87208378641488, "mean_abs_error_last_50": 132.9287729473661, "mean_pred_prob": 0.032460454665124414, "mean_pred_prob_last_10": 0.17029681764543056, "mean_pred_prob_last_25": 0.09087089374661446, "mean_pred_prob_last_50": 0.05455878563225269, "mean_token_accuracy": 0.8633777141571045, "step": 32980 }, { "epoch": 0.5864576111496276, "grad_norm": 1.3266797536744928, "learning_rate": 0.0001, "loss": 0.7263, "mean_abs_error": 595.0732559290556, "mean_abs_error_last_10": 364.15784220328464, "mean_abs_error_last_25": 334.20591617730753, "mean_abs_error_last_50": 401.23712709285223, "mean_pred_prob": 0.05327643360942602, "mean_pred_prob_last_10": 0.22733141678618268, "mean_pred_prob_last_25": 0.14055871233576908, "mean_pred_prob_last_50": 0.08900320914108306, "mean_token_accuracy": 0.8831517040729523, "step": 32990 }, { "epoch": 0.5866353794464295, "grad_norm": 0.7573600347630316, "learning_rate": 0.0001, "loss": 0.645, "mean_abs_error": 254.45629208269435, "mean_abs_error_last_10": 116.52289413884849, "mean_abs_error_last_25": 107.64354626532347, "mean_abs_error_last_50": 137.07576540992233, "mean_pred_prob": 0.04700329175684601, "mean_pred_prob_last_10": 0.24022783422842622, "mean_pred_prob_last_25": 0.13321762843988835, "mean_pred_prob_last_50": 0.08065161504782736, "mean_token_accuracy": 0.8772879064083099, "step": 33000 }, { "epoch": 0.5868131477432315, "grad_norm": 2.6066255967380396, "learning_rate": 0.0001, "loss": 0.8002, "mean_abs_error": 135.05228322988282, "mean_abs_error_last_10": 23.874669722832802, "mean_abs_error_last_25": 52.493438585933745, "mean_abs_error_last_50": 92.22081838288736, "mean_pred_prob": 0.041863334644585845, "mean_pred_prob_last_10": 0.20872924774885177, "mean_pred_prob_last_25": 0.1126685094088316, "mean_pred_prob_last_50": 0.069285230897367, "mean_token_accuracy": 0.8806544482707978, "step": 33010 }, { "epoch": 0.5869909160400334, "grad_norm": 1.4104531897758632, "learning_rate": 0.0001, "loss": 0.8997, "mean_abs_error": 480.7625824670499, "mean_abs_error_last_10": 185.32238768814216, "mean_abs_error_last_25": 316.0583230065482, "mean_abs_error_last_50": 422.56179281686235, "mean_pred_prob": 0.023420733702369034, "mean_pred_prob_last_10": 0.1253862215206027, "mean_pred_prob_last_25": 0.06694543277844786, "mean_pred_prob_last_50": 0.04077508100308478, "mean_token_accuracy": 0.8755899906158447, "step": 33020 }, { "epoch": 0.5871686843368353, "grad_norm": 1.7714913018046203, "learning_rate": 0.0001, "loss": 0.7414, "mean_abs_error": 260.70816104625584, "mean_abs_error_last_10": 101.83429707965988, "mean_abs_error_last_25": 132.14370486860122, "mean_abs_error_last_50": 169.34273851563216, "mean_pred_prob": 0.03537054788321257, "mean_pred_prob_last_10": 0.1793518763035536, "mean_pred_prob_last_25": 0.09913608757779002, "mean_pred_prob_last_50": 0.060078959260135886, "mean_token_accuracy": 0.8660271286964416, "step": 33030 }, { "epoch": 0.5873464526336373, "grad_norm": 1.4993198657679436, "learning_rate": 0.0001, "loss": 0.7573, "mean_abs_error": 414.118121704956, "mean_abs_error_last_10": 189.0030557444693, "mean_abs_error_last_25": 208.0789164762964, "mean_abs_error_last_50": 283.4594606645834, "mean_pred_prob": 0.03474220794159919, "mean_pred_prob_last_10": 0.17114602979272603, "mean_pred_prob_last_25": 0.09532557390630245, "mean_pred_prob_last_50": 0.05826821192167699, "mean_token_accuracy": 0.8747256994247437, "step": 33040 }, { "epoch": 0.5875242209304392, "grad_norm": 2.0601009319036985, "learning_rate": 0.0001, "loss": 0.8612, "mean_abs_error": 511.380070338114, "mean_abs_error_last_10": 284.4509329860188, "mean_abs_error_last_25": 304.4929958093519, "mean_abs_error_last_50": 353.09271830415247, "mean_pred_prob": 0.03363656934234314, "mean_pred_prob_last_10": 0.18386461802292614, "mean_pred_prob_last_25": 0.09461691400501877, "mean_pred_prob_last_50": 0.056146497663576156, "mean_token_accuracy": 0.8741149544715882, "step": 33050 }, { "epoch": 0.5877019892272413, "grad_norm": 1.6716451138491386, "learning_rate": 0.0001, "loss": 0.7468, "mean_abs_error": 247.27849559738394, "mean_abs_error_last_10": 71.52080542322938, "mean_abs_error_last_25": 106.0805384778359, "mean_abs_error_last_50": 142.54434874968092, "mean_pred_prob": 0.04143184674903751, "mean_pred_prob_last_10": 0.20197921972721816, "mean_pred_prob_last_25": 0.11343341269530356, "mean_pred_prob_last_50": 0.07008644631132484, "mean_token_accuracy": 0.8750795245170593, "step": 33060 }, { "epoch": 0.5878797575240432, "grad_norm": 2.7893629082610087, "learning_rate": 0.0001, "loss": 0.7467, "mean_abs_error": 392.0196128755133, "mean_abs_error_last_10": 192.3096824488719, "mean_abs_error_last_25": 256.73505213771597, "mean_abs_error_last_50": 273.6939592644433, "mean_pred_prob": 0.036739790928550065, "mean_pred_prob_last_10": 0.18239045590162278, "mean_pred_prob_last_25": 0.099961615819484, "mean_pred_prob_last_50": 0.06168866902589798, "mean_token_accuracy": 0.8734867751598359, "step": 33070 }, { "epoch": 0.5880575258208451, "grad_norm": 1.4555766335208866, "learning_rate": 0.0001, "loss": 0.6938, "mean_abs_error": 562.0621359781964, "mean_abs_error_last_10": 136.989042829789, "mean_abs_error_last_25": 224.8547606138987, "mean_abs_error_last_50": 342.75384925897026, "mean_pred_prob": 0.03864973066956736, "mean_pred_prob_last_10": 0.21230131183983758, "mean_pred_prob_last_25": 0.10851551326923073, "mean_pred_prob_last_50": 0.06523723548161797, "mean_token_accuracy": 0.8730187833309173, "step": 33080 }, { "epoch": 0.5882352941176471, "grad_norm": 1.5268325723872223, "learning_rate": 0.0001, "loss": 0.7299, "mean_abs_error": 301.6220797126326, "mean_abs_error_last_10": 97.38304660699688, "mean_abs_error_last_25": 114.34443959997411, "mean_abs_error_last_50": 171.16539147850693, "mean_pred_prob": 0.03789249351248145, "mean_pred_prob_last_10": 0.18715845048427582, "mean_pred_prob_last_25": 0.10347464634105563, "mean_pred_prob_last_50": 0.06298198881559074, "mean_token_accuracy": 0.8740190029144287, "step": 33090 }, { "epoch": 0.588413062414449, "grad_norm": 1.3104352200707428, "learning_rate": 0.0001, "loss": 0.6201, "mean_abs_error": 793.3031298502416, "mean_abs_error_last_10": 463.9606411068509, "mean_abs_error_last_25": 549.1744603506058, "mean_abs_error_last_50": 675.2517050796889, "mean_pred_prob": 0.04946953670296352, "mean_pred_prob_last_10": 0.23991235173889436, "mean_pred_prob_last_25": 0.13476466733845882, "mean_pred_prob_last_50": 0.08241637555183842, "mean_token_accuracy": 0.8730913519859314, "step": 33100 }, { "epoch": 0.588590830711251, "grad_norm": 2.1459217229485734, "learning_rate": 0.0001, "loss": 0.8376, "mean_abs_error": 151.92772956016424, "mean_abs_error_last_10": 51.63121808734189, "mean_abs_error_last_25": 57.89866771442587, "mean_abs_error_last_50": 89.57686021670636, "mean_pred_prob": 0.06729723124299199, "mean_pred_prob_last_10": 0.2855186857283115, "mean_pred_prob_last_25": 0.17273540394380688, "mean_pred_prob_last_50": 0.11014427025802434, "mean_token_accuracy": 0.8731653332710266, "step": 33110 }, { "epoch": 0.5887685990080529, "grad_norm": 2.381028352090559, "learning_rate": 0.0001, "loss": 0.7481, "mean_abs_error": 368.7255180894072, "mean_abs_error_last_10": 115.66001818340324, "mean_abs_error_last_25": 130.16303234874613, "mean_abs_error_last_50": 203.95225379021105, "mean_pred_prob": 0.04295468383934349, "mean_pred_prob_last_10": 0.20285000747535378, "mean_pred_prob_last_25": 0.11429210735950619, "mean_pred_prob_last_50": 0.0709814581554383, "mean_token_accuracy": 0.867623770236969, "step": 33120 }, { "epoch": 0.5889463673048548, "grad_norm": 1.1165693973839983, "learning_rate": 0.0001, "loss": 0.784, "mean_abs_error": 235.46925184182572, "mean_abs_error_last_10": 118.2960655281178, "mean_abs_error_last_25": 183.13724336867074, "mean_abs_error_last_50": 262.70265859804294, "mean_pred_prob": 0.053869856800884006, "mean_pred_prob_last_10": 0.24766726568341255, "mean_pred_prob_last_25": 0.14568579718470573, "mean_pred_prob_last_50": 0.09049233980476856, "mean_token_accuracy": 0.8782878637313842, "step": 33130 }, { "epoch": 0.5891241356016568, "grad_norm": 1.2775526638304782, "learning_rate": 0.0001, "loss": 0.6294, "mean_abs_error": 554.5518455801673, "mean_abs_error_last_10": 222.70787307582253, "mean_abs_error_last_25": 357.0875545672643, "mean_abs_error_last_50": 400.2887603544494, "mean_pred_prob": 0.03834252300730441, "mean_pred_prob_last_10": 0.1858916309371125, "mean_pred_prob_last_25": 0.10179035410401412, "mean_pred_prob_last_50": 0.06381351174786687, "mean_token_accuracy": 0.8759915053844451, "step": 33140 }, { "epoch": 0.5893019038984587, "grad_norm": 1.3937180258595068, "learning_rate": 0.0001, "loss": 0.8884, "mean_abs_error": 443.9306354888254, "mean_abs_error_last_10": 135.13039450794207, "mean_abs_error_last_25": 236.9826928087592, "mean_abs_error_last_50": 296.402530939609, "mean_pred_prob": 0.024722713371738793, "mean_pred_prob_last_10": 0.13247640617191792, "mean_pred_prob_last_25": 0.07163922069594264, "mean_pred_prob_last_50": 0.04233838645741343, "mean_token_accuracy": 0.8679487824440002, "step": 33150 }, { "epoch": 0.5894796721952607, "grad_norm": 1.6048798003749238, "learning_rate": 0.0001, "loss": 0.7783, "mean_abs_error": 824.8866137788393, "mean_abs_error_last_10": 289.90314529195507, "mean_abs_error_last_25": 344.679385489349, "mean_abs_error_last_50": 483.5908255105913, "mean_pred_prob": 0.032602329144719985, "mean_pred_prob_last_10": 0.16070194738567806, "mean_pred_prob_last_25": 0.08929112921468914, "mean_pred_prob_last_50": 0.05478437862475403, "mean_token_accuracy": 0.8730226635932923, "step": 33160 }, { "epoch": 0.5896574404920627, "grad_norm": 1.412448807089094, "learning_rate": 0.0001, "loss": 0.6819, "mean_abs_error": 1022.4883485552539, "mean_abs_error_last_10": 473.0667096048155, "mean_abs_error_last_25": 566.5217226994243, "mean_abs_error_last_50": 704.0183535878033, "mean_pred_prob": 0.035282268312585074, "mean_pred_prob_last_10": 0.17626565541140735, "mean_pred_prob_last_25": 0.09625434815534391, "mean_pred_prob_last_50": 0.057939287362387404, "mean_token_accuracy": 0.8687621772289276, "step": 33170 }, { "epoch": 0.5898352087888646, "grad_norm": 2.364074006750551, "learning_rate": 0.0001, "loss": 0.7713, "mean_abs_error": 685.1677984318754, "mean_abs_error_last_10": 125.58193695365105, "mean_abs_error_last_25": 315.3866921567575, "mean_abs_error_last_50": 505.7416184776792, "mean_pred_prob": 0.02571059949696064, "mean_pred_prob_last_10": 0.1357429414987564, "mean_pred_prob_last_25": 0.07222151132300496, "mean_pred_prob_last_50": 0.04308936893939972, "mean_token_accuracy": 0.8755634844303131, "step": 33180 }, { "epoch": 0.5900129770856666, "grad_norm": 1.8397465096982697, "learning_rate": 0.0001, "loss": 0.6636, "mean_abs_error": 284.0551883326436, "mean_abs_error_last_10": 52.295218792858215, "mean_abs_error_last_25": 101.39033181507742, "mean_abs_error_last_50": 151.84062038109832, "mean_pred_prob": 0.04575210988987237, "mean_pred_prob_last_10": 0.20084969904273747, "mean_pred_prob_last_25": 0.11878691790625454, "mean_pred_prob_last_50": 0.07425714321434498, "mean_token_accuracy": 0.8681448817253112, "step": 33190 }, { "epoch": 0.5901907453824685, "grad_norm": 1.0742078909873853, "learning_rate": 0.0001, "loss": 0.7322, "mean_abs_error": 514.7113552011074, "mean_abs_error_last_10": 212.5764601829488, "mean_abs_error_last_25": 201.83030245213692, "mean_abs_error_last_50": 294.443735538567, "mean_pred_prob": 0.042925417446531354, "mean_pred_prob_last_10": 0.2016008771257475, "mean_pred_prob_last_25": 0.11212922420818358, "mean_pred_prob_last_50": 0.06957391538890079, "mean_token_accuracy": 0.8618903696537018, "step": 33200 }, { "epoch": 0.5903685136792705, "grad_norm": 1.241674992326253, "learning_rate": 0.0001, "loss": 0.7157, "mean_abs_error": 250.41057378119598, "mean_abs_error_last_10": 83.86647658730513, "mean_abs_error_last_25": 90.54766251984357, "mean_abs_error_last_50": 139.68213606305548, "mean_pred_prob": 0.03421069155447185, "mean_pred_prob_last_10": 0.16864474378526212, "mean_pred_prob_last_25": 0.09414089061319827, "mean_pred_prob_last_50": 0.05765770748257637, "mean_token_accuracy": 0.8771001100540161, "step": 33210 }, { "epoch": 0.5905462819760724, "grad_norm": 1.5056644594148054, "learning_rate": 0.0001, "loss": 0.7902, "mean_abs_error": 797.30582317762, "mean_abs_error_last_10": 214.18435607656156, "mean_abs_error_last_25": 283.2116816518693, "mean_abs_error_last_50": 431.47768021254313, "mean_pred_prob": 0.03145293096313253, "mean_pred_prob_last_10": 0.15146744127268902, "mean_pred_prob_last_25": 0.0850115334847942, "mean_pred_prob_last_50": 0.052660869288956746, "mean_token_accuracy": 0.8699456334114075, "step": 33220 }, { "epoch": 0.5907240502728743, "grad_norm": 0.9834462483861087, "learning_rate": 0.0001, "loss": 0.8389, "mean_abs_error": 200.0711666613531, "mean_abs_error_last_10": 47.07794633900806, "mean_abs_error_last_25": 106.2170394450416, "mean_abs_error_last_50": 132.10068041104552, "mean_pred_prob": 0.048228393541648984, "mean_pred_prob_last_10": 0.23788968175649644, "mean_pred_prob_last_25": 0.1344107449054718, "mean_pred_prob_last_50": 0.08099078955128788, "mean_token_accuracy": 0.8754566490650177, "step": 33230 }, { "epoch": 0.5909018185696763, "grad_norm": 1.5369847479424363, "learning_rate": 0.0001, "loss": 0.7308, "mean_abs_error": 845.0231870243708, "mean_abs_error_last_10": 586.7970895038358, "mean_abs_error_last_25": 661.5653834419134, "mean_abs_error_last_50": 746.2636937901556, "mean_pred_prob": 0.03418818383070175, "mean_pred_prob_last_10": 0.17268165804562158, "mean_pred_prob_last_25": 0.094436064659385, "mean_pred_prob_last_50": 0.057559682719875126, "mean_token_accuracy": 0.8719843208789826, "step": 33240 }, { "epoch": 0.5910795868664782, "grad_norm": 2.5380297861208194, "learning_rate": 0.0001, "loss": 0.7317, "mean_abs_error": 699.147057290555, "mean_abs_error_last_10": 294.0776671876423, "mean_abs_error_last_25": 408.12445532205675, "mean_abs_error_last_50": 491.59350946283837, "mean_pred_prob": 0.03240999218542129, "mean_pred_prob_last_10": 0.16434332660282963, "mean_pred_prob_last_25": 0.08944260671269148, "mean_pred_prob_last_50": 0.0544249874772504, "mean_token_accuracy": 0.8824974238872528, "step": 33250 }, { "epoch": 0.5912573551632802, "grad_norm": 1.116948505754959, "learning_rate": 0.0001, "loss": 0.6502, "mean_abs_error": 209.25150193496535, "mean_abs_error_last_10": 48.30165550354783, "mean_abs_error_last_25": 85.3754844984957, "mean_abs_error_last_50": 119.72926426409285, "mean_pred_prob": 0.030295681580901145, "mean_pred_prob_last_10": 0.17015091702342033, "mean_pred_prob_last_25": 0.08654629662632943, "mean_pred_prob_last_50": 0.05170360412448645, "mean_token_accuracy": 0.8830251812934875, "step": 33260 }, { "epoch": 0.5914351234600821, "grad_norm": 1.986218754565356, "learning_rate": 0.0001, "loss": 0.699, "mean_abs_error": 319.4390359039754, "mean_abs_error_last_10": 121.62483361813084, "mean_abs_error_last_25": 163.87949633796748, "mean_abs_error_last_50": 209.89081864976396, "mean_pred_prob": 0.037556875590234996, "mean_pred_prob_last_10": 0.20368830878287553, "mean_pred_prob_last_25": 0.10812463741749526, "mean_pred_prob_last_50": 0.06396104651503265, "mean_token_accuracy": 0.8750757098197937, "step": 33270 }, { "epoch": 0.591612891756884, "grad_norm": 2.5647296787061595, "learning_rate": 0.0001, "loss": 0.7833, "mean_abs_error": 1280.9794043452243, "mean_abs_error_last_10": 660.6177356518456, "mean_abs_error_last_25": 845.5413819125658, "mean_abs_error_last_50": 1066.6475051979237, "mean_pred_prob": 0.03815737063850975, "mean_pred_prob_last_10": 0.1788128232525196, "mean_pred_prob_last_25": 0.09853463949839351, "mean_pred_prob_last_50": 0.0619649014697643, "mean_token_accuracy": 0.8702915966510772, "step": 33280 }, { "epoch": 0.5917906600536861, "grad_norm": 1.507595493846553, "learning_rate": 0.0001, "loss": 0.859, "mean_abs_error": 746.1368915516504, "mean_abs_error_last_10": 89.44288325022045, "mean_abs_error_last_25": 249.31004563022353, "mean_abs_error_last_50": 418.510136066885, "mean_pred_prob": 0.026264138106489553, "mean_pred_prob_last_10": 0.13601563822012394, "mean_pred_prob_last_25": 0.07328269799472764, "mean_pred_prob_last_50": 0.044337272201664744, "mean_token_accuracy": 0.8845268070697785, "step": 33290 }, { "epoch": 0.591968428350488, "grad_norm": 0.9779372026454939, "learning_rate": 0.0001, "loss": 0.8318, "mean_abs_error": 435.48787077409844, "mean_abs_error_last_10": 77.23209110994362, "mean_abs_error_last_25": 102.96054520706475, "mean_abs_error_last_50": 171.1911293308321, "mean_pred_prob": 0.04463011496700346, "mean_pred_prob_last_10": 0.19666001982986928, "mean_pred_prob_last_25": 0.11633964469656348, "mean_pred_prob_last_50": 0.07369091883301734, "mean_token_accuracy": 0.8602374434471131, "step": 33300 }, { "epoch": 0.59214619664729, "grad_norm": 4.17439454215035, "learning_rate": 0.0001, "loss": 0.7965, "mean_abs_error": 473.2271918367154, "mean_abs_error_last_10": 184.06258641179932, "mean_abs_error_last_25": 209.50640509148283, "mean_abs_error_last_50": 355.0825202197886, "mean_pred_prob": 0.038862860598601404, "mean_pred_prob_last_10": 0.17666922230273485, "mean_pred_prob_last_25": 0.1059166177175939, "mean_pred_prob_last_50": 0.06533145052380859, "mean_token_accuracy": 0.8610818982124329, "step": 33310 }, { "epoch": 0.5923239649440919, "grad_norm": 1.3584263671852677, "learning_rate": 0.0001, "loss": 0.7313, "mean_abs_error": 519.6277678236395, "mean_abs_error_last_10": 370.0962851718938, "mean_abs_error_last_25": 456.09522610994725, "mean_abs_error_last_50": 424.619262793882, "mean_pred_prob": 0.035325156117323786, "mean_pred_prob_last_10": 0.1618398318067193, "mean_pred_prob_last_25": 0.09647440589033067, "mean_pred_prob_last_50": 0.059914833237417045, "mean_token_accuracy": 0.859863007068634, "step": 33320 }, { "epoch": 0.5925017332408938, "grad_norm": 1.3534018877706977, "learning_rate": 0.0001, "loss": 0.713, "mean_abs_error": 335.05478779753355, "mean_abs_error_last_10": 94.28204607158696, "mean_abs_error_last_25": 132.69983690405235, "mean_abs_error_last_50": 197.04962955254697, "mean_pred_prob": 0.04252017089165747, "mean_pred_prob_last_10": 0.20869168732315302, "mean_pred_prob_last_25": 0.11776850977912545, "mean_pred_prob_last_50": 0.07195618953555823, "mean_token_accuracy": 0.8742129027843475, "step": 33330 }, { "epoch": 0.5926795015376958, "grad_norm": 1.941468218369291, "learning_rate": 0.0001, "loss": 0.7771, "mean_abs_error": 1272.8342617486574, "mean_abs_error_last_10": 582.9241132302119, "mean_abs_error_last_25": 776.0399818953109, "mean_abs_error_last_50": 1013.4578418541744, "mean_pred_prob": 0.018803401160403153, "mean_pred_prob_last_10": 0.09501728634932079, "mean_pred_prob_last_25": 0.0527217452734476, "mean_pred_prob_last_50": 0.03173776399635244, "mean_token_accuracy": 0.874067485332489, "step": 33340 }, { "epoch": 0.5928572698344977, "grad_norm": 2.659137527483133, "learning_rate": 0.0001, "loss": 0.6878, "mean_abs_error": 944.3799682997866, "mean_abs_error_last_10": 274.29774792616917, "mean_abs_error_last_25": 314.74488778340157, "mean_abs_error_last_50": 567.3488536028387, "mean_pred_prob": 0.030198678310262038, "mean_pred_prob_last_10": 0.15232443594140932, "mean_pred_prob_last_25": 0.08066355023765936, "mean_pred_prob_last_50": 0.04926588523667306, "mean_token_accuracy": 0.8833667159080505, "step": 33350 }, { "epoch": 0.5930350381312997, "grad_norm": 1.682776421827808, "learning_rate": 0.0001, "loss": 0.7368, "mean_abs_error": 291.3215046122402, "mean_abs_error_last_10": 337.1139436077865, "mean_abs_error_last_25": 330.25150425916553, "mean_abs_error_last_50": 291.7996873667281, "mean_pred_prob": 0.037565561488736424, "mean_pred_prob_last_10": 0.19416921347146854, "mean_pred_prob_last_25": 0.10578303028596565, "mean_pred_prob_last_50": 0.06460229962831363, "mean_token_accuracy": 0.8674992144107818, "step": 33360 }, { "epoch": 0.5932128064281016, "grad_norm": 0.9993364775845576, "learning_rate": 0.0001, "loss": 0.6875, "mean_abs_error": 421.7375941155357, "mean_abs_error_last_10": 101.7494836247564, "mean_abs_error_last_25": 158.40232774557418, "mean_abs_error_last_50": 267.20607773399206, "mean_pred_prob": 0.03717600735835731, "mean_pred_prob_last_10": 0.18472288101911544, "mean_pred_prob_last_25": 0.09896679166704417, "mean_pred_prob_last_50": 0.0618935713544488, "mean_token_accuracy": 0.8778617322444916, "step": 33370 }, { "epoch": 0.5933905747249035, "grad_norm": 2.9900018072401386, "learning_rate": 0.0001, "loss": 0.8478, "mean_abs_error": 126.01749117003797, "mean_abs_error_last_10": 35.7503229277254, "mean_abs_error_last_25": 50.51070302882781, "mean_abs_error_last_50": 77.97841823104265, "mean_pred_prob": 0.04377590324729681, "mean_pred_prob_last_10": 0.19695753566920757, "mean_pred_prob_last_25": 0.11514735370874404, "mean_pred_prob_last_50": 0.07247316781431437, "mean_token_accuracy": 0.8717412114143371, "step": 33380 }, { "epoch": 0.5935683430217055, "grad_norm": 1.0535257142262366, "learning_rate": 0.0001, "loss": 0.714, "mean_abs_error": 452.84332496321747, "mean_abs_error_last_10": 113.1638280687765, "mean_abs_error_last_25": 180.66032798857478, "mean_abs_error_last_50": 283.9343822510065, "mean_pred_prob": 0.04062843159772456, "mean_pred_prob_last_10": 0.20413505444303154, "mean_pred_prob_last_25": 0.11702762958593667, "mean_pred_prob_last_50": 0.06973270629532635, "mean_token_accuracy": 0.875326520204544, "step": 33390 }, { "epoch": 0.5937461113185074, "grad_norm": 1.2090463515853112, "learning_rate": 0.0001, "loss": 0.7512, "mean_abs_error": 646.1966644777051, "mean_abs_error_last_10": 229.71623116962286, "mean_abs_error_last_25": 346.9585259234644, "mean_abs_error_last_50": 471.5260021350844, "mean_pred_prob": 0.03615483783360105, "mean_pred_prob_last_10": 0.17561413553194144, "mean_pred_prob_last_25": 0.09936817637644708, "mean_pred_prob_last_50": 0.06045955286826939, "mean_token_accuracy": 0.8715935587882996, "step": 33400 }, { "epoch": 0.5939238796153095, "grad_norm": 1.7200810378251759, "learning_rate": 0.0001, "loss": 0.6551, "mean_abs_error": 502.49650464293916, "mean_abs_error_last_10": 183.92079004541267, "mean_abs_error_last_25": 229.94822019831145, "mean_abs_error_last_50": 312.7474638170787, "mean_pred_prob": 0.03462062024045735, "mean_pred_prob_last_10": 0.16169568467885256, "mean_pred_prob_last_25": 0.09024552712216974, "mean_pred_prob_last_50": 0.056549394084140656, "mean_token_accuracy": 0.8736155271530152, "step": 33410 }, { "epoch": 0.5941016479121114, "grad_norm": 2.5173856219414255, "learning_rate": 0.0001, "loss": 0.7352, "mean_abs_error": 314.72943712452513, "mean_abs_error_last_10": 121.4534077978215, "mean_abs_error_last_25": 141.05199553478255, "mean_abs_error_last_50": 177.31145095985306, "mean_pred_prob": 0.04681120598688722, "mean_pred_prob_last_10": 0.2233026280067861, "mean_pred_prob_last_25": 0.13100609462708235, "mean_pred_prob_last_50": 0.07905046213418246, "mean_token_accuracy": 0.8659850239753724, "step": 33420 }, { "epoch": 0.5942794162089133, "grad_norm": 1.285225388175957, "learning_rate": 0.0001, "loss": 0.7287, "mean_abs_error": 968.3581403977723, "mean_abs_error_last_10": 640.6013846736911, "mean_abs_error_last_25": 693.2247518838647, "mean_abs_error_last_50": 789.7453879629736, "mean_pred_prob": 0.0393042101917672, "mean_pred_prob_last_10": 0.19394711127097253, "mean_pred_prob_last_25": 0.1084716038298211, "mean_pred_prob_last_50": 0.06563308155891719, "mean_token_accuracy": 0.8658995687961578, "step": 33430 }, { "epoch": 0.5944571845057153, "grad_norm": 2.1808680578723556, "learning_rate": 0.0001, "loss": 0.7506, "mean_abs_error": 449.6485493824245, "mean_abs_error_last_10": 172.05065782798928, "mean_abs_error_last_25": 260.3425975300182, "mean_abs_error_last_50": 358.52480043049746, "mean_pred_prob": 0.033037535520270465, "mean_pred_prob_last_10": 0.1746638985350728, "mean_pred_prob_last_25": 0.09260595999658108, "mean_pred_prob_last_50": 0.05592279797419906, "mean_token_accuracy": 0.8815403699874877, "step": 33440 }, { "epoch": 0.5946349528025172, "grad_norm": 1.4772482345391758, "learning_rate": 0.0001, "loss": 0.6562, "mean_abs_error": 349.26972817837117, "mean_abs_error_last_10": 63.967449205604815, "mean_abs_error_last_25": 67.92223190618813, "mean_abs_error_last_50": 157.4693526897137, "mean_pred_prob": 0.046557641588151455, "mean_pred_prob_last_10": 0.22006401177495719, "mean_pred_prob_last_25": 0.12560249157249928, "mean_pred_prob_last_50": 0.07745669111609459, "mean_token_accuracy": 0.8759987115859985, "step": 33450 }, { "epoch": 0.5948127210993192, "grad_norm": 1.4706072978131695, "learning_rate": 0.0001, "loss": 0.7715, "mean_abs_error": 227.0539509061651, "mean_abs_error_last_10": 59.06367526662565, "mean_abs_error_last_25": 79.93953485012878, "mean_abs_error_last_50": 117.90194001829579, "mean_pred_prob": 0.04975100443698466, "mean_pred_prob_last_10": 0.2275791995227337, "mean_pred_prob_last_25": 0.13243879619985818, "mean_pred_prob_last_50": 0.08228266481310129, "mean_token_accuracy": 0.8705689966678619, "step": 33460 }, { "epoch": 0.5949904893961211, "grad_norm": 0.8628671369407159, "learning_rate": 0.0001, "loss": 0.7449, "mean_abs_error": 487.8867591012223, "mean_abs_error_last_10": 162.10834169994283, "mean_abs_error_last_25": 219.73382551404507, "mean_abs_error_last_50": 300.74121637151, "mean_pred_prob": 0.02378368703648448, "mean_pred_prob_last_10": 0.12183503434062004, "mean_pred_prob_last_25": 0.06707418719306588, "mean_pred_prob_last_50": 0.04045194727368653, "mean_token_accuracy": 0.8726192891597748, "step": 33470 }, { "epoch": 0.595168257692923, "grad_norm": 3.3438828632364648, "learning_rate": 0.0001, "loss": 0.736, "mean_abs_error": 993.6058221370579, "mean_abs_error_last_10": 329.8950053007503, "mean_abs_error_last_25": 466.59058616337006, "mean_abs_error_last_50": 592.58913214051, "mean_pred_prob": 0.027868913862039334, "mean_pred_prob_last_10": 0.1099014857551083, "mean_pred_prob_last_25": 0.07087324626045302, "mean_pred_prob_last_50": 0.044771232485072684, "mean_token_accuracy": 0.8708030819892884, "step": 33480 }, { "epoch": 0.595346025989725, "grad_norm": 1.4747090711733635, "learning_rate": 0.0001, "loss": 0.6795, "mean_abs_error": 261.85744578466546, "mean_abs_error_last_10": 21.454254171474318, "mean_abs_error_last_25": 47.668026711339586, "mean_abs_error_last_50": 118.01935094560811, "mean_pred_prob": 0.04623177126049995, "mean_pred_prob_last_10": 0.24633046090602875, "mean_pred_prob_last_25": 0.1352593142539263, "mean_pred_prob_last_50": 0.07906812173314393, "mean_token_accuracy": 0.8839545011520386, "step": 33490 }, { "epoch": 0.5955237942865269, "grad_norm": 1.58885074298963, "learning_rate": 0.0001, "loss": 0.6467, "mean_abs_error": 901.3098100604795, "mean_abs_error_last_10": 576.2695792967297, "mean_abs_error_last_25": 550.6565221388014, "mean_abs_error_last_50": 616.6092045095313, "mean_pred_prob": 0.04307378380035516, "mean_pred_prob_last_10": 0.20087627948378212, "mean_pred_prob_last_25": 0.1146972936054226, "mean_pred_prob_last_50": 0.07127506348188035, "mean_token_accuracy": 0.876529210805893, "step": 33500 }, { "epoch": 0.5957015625833288, "grad_norm": 1.5909336090260753, "learning_rate": 0.0001, "loss": 0.835, "mean_abs_error": 1054.975780057225, "mean_abs_error_last_10": 527.740234264912, "mean_abs_error_last_25": 629.0547721567458, "mean_abs_error_last_50": 750.322466139346, "mean_pred_prob": 0.02798840867471881, "mean_pred_prob_last_10": 0.12995152597140985, "mean_pred_prob_last_25": 0.07209685658162926, "mean_pred_prob_last_50": 0.04486364968179259, "mean_token_accuracy": 0.8694411396980286, "step": 33510 }, { "epoch": 0.5958793308801308, "grad_norm": 3.8508005514072967, "learning_rate": 0.0001, "loss": 0.7312, "mean_abs_error": 105.88786508911303, "mean_abs_error_last_10": 17.061626637124576, "mean_abs_error_last_25": 46.298593864745854, "mean_abs_error_last_50": 66.60943609372566, "mean_pred_prob": 0.06910959174856543, "mean_pred_prob_last_10": 0.31430579349398613, "mean_pred_prob_last_25": 0.18487004581838845, "mean_pred_prob_last_50": 0.11426655892282725, "mean_token_accuracy": 0.8720750451087952, "step": 33520 }, { "epoch": 0.5960570991769328, "grad_norm": 1.0775390453411695, "learning_rate": 0.0001, "loss": 0.6628, "mean_abs_error": 861.1535763452484, "mean_abs_error_last_10": 389.23104242082206, "mean_abs_error_last_25": 352.16692268034967, "mean_abs_error_last_50": 466.4675011298562, "mean_pred_prob": 0.030463876941939815, "mean_pred_prob_last_10": 0.1398947480483912, "mean_pred_prob_last_25": 0.08162688980810344, "mean_pred_prob_last_50": 0.05070462253061123, "mean_token_accuracy": 0.8762592554092408, "step": 33530 }, { "epoch": 0.5962348674737348, "grad_norm": 4.372070919648597, "learning_rate": 0.0001, "loss": 0.7391, "mean_abs_error": 751.9449370887764, "mean_abs_error_last_10": 296.65206872531695, "mean_abs_error_last_25": 314.6137023402634, "mean_abs_error_last_50": 347.36016360320366, "mean_pred_prob": 0.03523381282575429, "mean_pred_prob_last_10": 0.16533279441064225, "mean_pred_prob_last_25": 0.09891644496237859, "mean_pred_prob_last_50": 0.0604169404366985, "mean_token_accuracy": 0.8693431973457336, "step": 33540 }, { "epoch": 0.5964126357705367, "grad_norm": 1.5355362378286317, "learning_rate": 0.0001, "loss": 0.8102, "mean_abs_error": 256.2123950665291, "mean_abs_error_last_10": 48.32590103411194, "mean_abs_error_last_25": 67.11422639882285, "mean_abs_error_last_50": 131.82899446620718, "mean_pred_prob": 0.05310277051758021, "mean_pred_prob_last_10": 0.24197487235069276, "mean_pred_prob_last_25": 0.14339968748390675, "mean_pred_prob_last_50": 0.08791622058488428, "mean_token_accuracy": 0.8690655589103699, "step": 33550 }, { "epoch": 0.5965904040673387, "grad_norm": 1.154442130409944, "learning_rate": 0.0001, "loss": 0.7042, "mean_abs_error": 775.6985268363916, "mean_abs_error_last_10": 312.3819601842116, "mean_abs_error_last_25": 424.60467182022967, "mean_abs_error_last_50": 605.3061798374836, "mean_pred_prob": 0.03962605788256042, "mean_pred_prob_last_10": 0.1905248849245254, "mean_pred_prob_last_25": 0.1094003826379776, "mean_pred_prob_last_50": 0.06675935566308908, "mean_token_accuracy": 0.8676380693912507, "step": 33560 }, { "epoch": 0.5967681723641406, "grad_norm": 2.012789119753491, "learning_rate": 0.0001, "loss": 0.7361, "mean_abs_error": 467.4181165044985, "mean_abs_error_last_10": 344.91602271457646, "mean_abs_error_last_25": 382.01576109879574, "mean_abs_error_last_50": 414.8232536572333, "mean_pred_prob": 0.030730418814346194, "mean_pred_prob_last_10": 0.13864833125844597, "mean_pred_prob_last_25": 0.07997690527699888, "mean_pred_prob_last_50": 0.0503353831358254, "mean_token_accuracy": 0.8726221561431885, "step": 33570 }, { "epoch": 0.5969459406609425, "grad_norm": 1.4348877859010887, "learning_rate": 0.0001, "loss": 0.7958, "mean_abs_error": 161.37486623217686, "mean_abs_error_last_10": 33.243530233854, "mean_abs_error_last_25": 56.977383686883385, "mean_abs_error_last_50": 84.05694816547835, "mean_pred_prob": 0.04709933251142502, "mean_pred_prob_last_10": 0.23733282163739206, "mean_pred_prob_last_25": 0.13129148427397014, "mean_pred_prob_last_50": 0.08061994332820177, "mean_token_accuracy": 0.8689680516719818, "step": 33580 }, { "epoch": 0.5971237089577445, "grad_norm": 0.9011343840726269, "learning_rate": 0.0001, "loss": 0.7344, "mean_abs_error": 755.7553232726517, "mean_abs_error_last_10": 243.5832102649565, "mean_abs_error_last_25": 290.9540350736011, "mean_abs_error_last_50": 425.18442220579345, "mean_pred_prob": 0.03506985922576859, "mean_pred_prob_last_10": 0.17279420603299514, "mean_pred_prob_last_25": 0.09728923622751609, "mean_pred_prob_last_50": 0.05916798500693403, "mean_token_accuracy": 0.8743598878383636, "step": 33590 }, { "epoch": 0.5973014772545464, "grad_norm": 0.9465493836343221, "learning_rate": 0.0001, "loss": 0.7485, "mean_abs_error": 455.11425709369104, "mean_abs_error_last_10": 147.8835256791718, "mean_abs_error_last_25": 261.17632720127324, "mean_abs_error_last_50": 297.4683912424989, "mean_pred_prob": 0.028494874900206923, "mean_pred_prob_last_10": 0.1458020441234112, "mean_pred_prob_last_25": 0.07678679674863816, "mean_pred_prob_last_50": 0.04698434518650174, "mean_token_accuracy": 0.8743287205696106, "step": 33600 }, { "epoch": 0.5974792455513483, "grad_norm": 1.3320993786842645, "learning_rate": 0.0001, "loss": 0.6493, "mean_abs_error": 157.64453881744456, "mean_abs_error_last_10": 13.793197869265763, "mean_abs_error_last_25": 37.05704005302937, "mean_abs_error_last_50": 98.68328097283992, "mean_pred_prob": 0.04633934898301959, "mean_pred_prob_last_10": 0.2183719538152218, "mean_pred_prob_last_25": 0.12316418886184692, "mean_pred_prob_last_50": 0.07743316367268563, "mean_token_accuracy": 0.8593354761600495, "step": 33610 }, { "epoch": 0.5976570138481503, "grad_norm": 1.5842862280996282, "learning_rate": 0.0001, "loss": 0.6908, "mean_abs_error": 276.9436317936924, "mean_abs_error_last_10": 70.5118566827675, "mean_abs_error_last_25": 105.09083767830134, "mean_abs_error_last_50": 178.97692545123837, "mean_pred_prob": 0.03477429812774062, "mean_pred_prob_last_10": 0.18271722495555878, "mean_pred_prob_last_25": 0.1018295606598258, "mean_pred_prob_last_50": 0.05941179227083922, "mean_token_accuracy": 0.8690930783748627, "step": 33620 }, { "epoch": 0.5978347821449522, "grad_norm": 2.252973047009773, "learning_rate": 0.0001, "loss": 0.791, "mean_abs_error": 617.1102592977293, "mean_abs_error_last_10": 411.95203031687754, "mean_abs_error_last_25": 381.46640698852104, "mean_abs_error_last_50": 364.6196617506087, "mean_pred_prob": 0.04214439860079437, "mean_pred_prob_last_10": 0.21013609010260553, "mean_pred_prob_last_25": 0.11784065706888214, "mean_pred_prob_last_50": 0.07217696362058632, "mean_token_accuracy": 0.8731007754802704, "step": 33630 }, { "epoch": 0.5980125504417542, "grad_norm": 1.3537413191865324, "learning_rate": 0.0001, "loss": 0.7903, "mean_abs_error": 594.2988961751716, "mean_abs_error_last_10": 176.99843954668134, "mean_abs_error_last_25": 325.156036225463, "mean_abs_error_last_50": 428.5064463119339, "mean_pred_prob": 0.01970784836448729, "mean_pred_prob_last_10": 0.09839012175798416, "mean_pred_prob_last_25": 0.052664841059595346, "mean_pred_prob_last_50": 0.03242304855957627, "mean_token_accuracy": 0.8767390310764313, "step": 33640 }, { "epoch": 0.5981903187385562, "grad_norm": 1.2319440257130159, "learning_rate": 0.0001, "loss": 0.6475, "mean_abs_error": 272.15027915730053, "mean_abs_error_last_10": 195.81130662929553, "mean_abs_error_last_25": 215.09816714751136, "mean_abs_error_last_50": 217.0085859264801, "mean_pred_prob": 0.05091411712346598, "mean_pred_prob_last_10": 0.24625535880913957, "mean_pred_prob_last_25": 0.13544296426698565, "mean_pred_prob_last_50": 0.08465895965928212, "mean_token_accuracy": 0.8801485896110535, "step": 33650 }, { "epoch": 0.5983680870353582, "grad_norm": 1.3700536913184451, "learning_rate": 0.0001, "loss": 0.6523, "mean_abs_error": 513.7710563612424, "mean_abs_error_last_10": 124.11907505466354, "mean_abs_error_last_25": 175.9046952601039, "mean_abs_error_last_50": 284.0265059416314, "mean_pred_prob": 0.02777527696453035, "mean_pred_prob_last_10": 0.13990966191049664, "mean_pred_prob_last_25": 0.07584386535454542, "mean_pred_prob_last_50": 0.046888640651013704, "mean_token_accuracy": 0.8782771766185761, "step": 33660 }, { "epoch": 0.5985458553321601, "grad_norm": 2.2286781133493374, "learning_rate": 0.0001, "loss": 0.711, "mean_abs_error": 603.7179268584076, "mean_abs_error_last_10": 229.91676133896058, "mean_abs_error_last_25": 254.62889829744972, "mean_abs_error_last_50": 377.2198786940195, "mean_pred_prob": 0.023625649081077428, "mean_pred_prob_last_10": 0.11474028374068439, "mean_pred_prob_last_25": 0.06573365847580134, "mean_pred_prob_last_50": 0.03994496149243787, "mean_token_accuracy": 0.8827488601207734, "step": 33670 }, { "epoch": 0.598723623628962, "grad_norm": 1.8787230716059056, "learning_rate": 0.0001, "loss": 0.7553, "mean_abs_error": 280.51038609552467, "mean_abs_error_last_10": 38.746852037408196, "mean_abs_error_last_25": 66.64134305570066, "mean_abs_error_last_50": 134.22548942753266, "mean_pred_prob": 0.047661915933713314, "mean_pred_prob_last_10": 0.23992313407361507, "mean_pred_prob_last_25": 0.1360244084149599, "mean_pred_prob_last_50": 0.08074154928326607, "mean_token_accuracy": 0.8713780283927918, "step": 33680 }, { "epoch": 0.598901391925764, "grad_norm": 1.7041013556790454, "learning_rate": 0.0001, "loss": 0.6829, "mean_abs_error": 181.4247252225693, "mean_abs_error_last_10": 57.11794394464609, "mean_abs_error_last_25": 144.88016466014622, "mean_abs_error_last_50": 173.01962848978656, "mean_pred_prob": 0.05521685746498406, "mean_pred_prob_last_10": 0.2541003838181496, "mean_pred_prob_last_25": 0.14922438133507968, "mean_pred_prob_last_50": 0.09144449369050563, "mean_token_accuracy": 0.877496188879013, "step": 33690 }, { "epoch": 0.5990791602225659, "grad_norm": 1.1963404721494997, "learning_rate": 0.0001, "loss": 0.9381, "mean_abs_error": 823.9721721364185, "mean_abs_error_last_10": 240.00218028840087, "mean_abs_error_last_25": 316.9566258589078, "mean_abs_error_last_50": 442.5070773430442, "mean_pred_prob": 0.03408243585145101, "mean_pred_prob_last_10": 0.15044485623948275, "mean_pred_prob_last_25": 0.09039323758333921, "mean_pred_prob_last_50": 0.05680314886849373, "mean_token_accuracy": 0.8643183588981629, "step": 33700 }, { "epoch": 0.5992569285193678, "grad_norm": 1.2061453580162047, "learning_rate": 0.0001, "loss": 0.8774, "mean_abs_error": 885.1093141476176, "mean_abs_error_last_10": 283.0012211719141, "mean_abs_error_last_25": 329.95487937582936, "mean_abs_error_last_50": 548.3719951616605, "mean_pred_prob": 0.033151083096163346, "mean_pred_prob_last_10": 0.17345112770562993, "mean_pred_prob_last_25": 0.09588170778588392, "mean_pred_prob_last_50": 0.05596274888957851, "mean_token_accuracy": 0.8668042063713074, "step": 33710 }, { "epoch": 0.5994346968161698, "grad_norm": 1.387154121083186, "learning_rate": 0.0001, "loss": 0.7306, "mean_abs_error": 663.8405547864842, "mean_abs_error_last_10": 239.03181530920378, "mean_abs_error_last_25": 278.42301587893655, "mean_abs_error_last_50": 360.05328840901393, "mean_pred_prob": 0.015713147190399467, "mean_pred_prob_last_10": 0.08218779433518648, "mean_pred_prob_last_25": 0.0426705963909626, "mean_pred_prob_last_50": 0.026069361111149193, "mean_token_accuracy": 0.8833798706531525, "step": 33720 }, { "epoch": 0.5996124651129717, "grad_norm": 2.036853956182899, "learning_rate": 0.0001, "loss": 0.8064, "mean_abs_error": 679.5943451785212, "mean_abs_error_last_10": 312.69298961097786, "mean_abs_error_last_25": 432.9044168807653, "mean_abs_error_last_50": 521.7951208010394, "mean_pred_prob": 0.02504959898069501, "mean_pred_prob_last_10": 0.12064935276284813, "mean_pred_prob_last_25": 0.06861653053201736, "mean_pred_prob_last_50": 0.04163369373418391, "mean_token_accuracy": 0.8705615162849426, "step": 33730 }, { "epoch": 0.5997902334097737, "grad_norm": 1.0532137278484053, "learning_rate": 0.0001, "loss": 0.766, "mean_abs_error": 616.2773489168851, "mean_abs_error_last_10": 208.27755090361583, "mean_abs_error_last_25": 248.90401169773986, "mean_abs_error_last_50": 378.8186379393906, "mean_pred_prob": 0.014490326610393822, "mean_pred_prob_last_10": 0.07773729925975204, "mean_pred_prob_last_25": 0.041532105393707754, "mean_pred_prob_last_50": 0.02490399540401995, "mean_token_accuracy": 0.8751302361488342, "step": 33740 }, { "epoch": 0.5999680017065756, "grad_norm": 1.3278678738429321, "learning_rate": 0.0001, "loss": 0.7014, "mean_abs_error": 389.7096600932117, "mean_abs_error_last_10": 63.28176626452763, "mean_abs_error_last_25": 90.07556665116385, "mean_abs_error_last_50": 201.42728462001944, "mean_pred_prob": 0.03358292717020959, "mean_pred_prob_last_10": 0.17682954836636783, "mean_pred_prob_last_25": 0.09610365773551166, "mean_pred_prob_last_50": 0.05688893801998347, "mean_token_accuracy": 0.871141004562378, "step": 33750 }, { "epoch": 0.6001457700033775, "grad_norm": 2.0097415187574237, "learning_rate": 0.0001, "loss": 0.6668, "mean_abs_error": 569.8234951426654, "mean_abs_error_last_10": 220.06142817053166, "mean_abs_error_last_25": 253.4458657820748, "mean_abs_error_last_50": 328.9962237451543, "mean_pred_prob": 0.033737164334161206, "mean_pred_prob_last_10": 0.16847577778389677, "mean_pred_prob_last_25": 0.09382498646154999, "mean_pred_prob_last_50": 0.0577750900760293, "mean_token_accuracy": 0.8734571754932403, "step": 33760 }, { "epoch": 0.6003235383001796, "grad_norm": 1.1937243411653709, "learning_rate": 0.0001, "loss": 0.6681, "mean_abs_error": 374.74661886954857, "mean_abs_error_last_10": 161.42304734710598, "mean_abs_error_last_25": 209.40310776371794, "mean_abs_error_last_50": 258.4115277187233, "mean_pred_prob": 0.032332929829135534, "mean_pred_prob_last_10": 0.1604273342527449, "mean_pred_prob_last_25": 0.08726096488535404, "mean_pred_prob_last_50": 0.05341963763348758, "mean_token_accuracy": 0.8779656529426575, "step": 33770 }, { "epoch": 0.6005013065969815, "grad_norm": 1.5979145416660814, "learning_rate": 0.0001, "loss": 0.6746, "mean_abs_error": 296.3824280324807, "mean_abs_error_last_10": 57.81351043690627, "mean_abs_error_last_25": 69.99406431512384, "mean_abs_error_last_50": 122.77732822539204, "mean_pred_prob": 0.04618217330425978, "mean_pred_prob_last_10": 0.21768902987241745, "mean_pred_prob_last_25": 0.12340396847575903, "mean_pred_prob_last_50": 0.07641838267445564, "mean_token_accuracy": 0.8748140335083008, "step": 33780 }, { "epoch": 0.6006790748937835, "grad_norm": 1.242729000065318, "learning_rate": 0.0001, "loss": 0.8098, "mean_abs_error": 724.0814075127817, "mean_abs_error_last_10": 449.29347648620194, "mean_abs_error_last_25": 435.62417842406086, "mean_abs_error_last_50": 446.7529187021267, "mean_pred_prob": 0.03129826067015529, "mean_pred_prob_last_10": 0.14843751300359145, "mean_pred_prob_last_25": 0.08577622667653487, "mean_pred_prob_last_50": 0.05269282958470285, "mean_token_accuracy": 0.8754574477672576, "step": 33790 }, { "epoch": 0.6008568431905854, "grad_norm": 1.4436242279248475, "learning_rate": 0.0001, "loss": 0.7733, "mean_abs_error": 753.6770192311094, "mean_abs_error_last_10": 157.64579865123693, "mean_abs_error_last_25": 335.1204175477554, "mean_abs_error_last_50": 454.40306853757266, "mean_pred_prob": 0.04620667481212877, "mean_pred_prob_last_10": 0.20726241044467314, "mean_pred_prob_last_25": 0.12160375303355977, "mean_pred_prob_last_50": 0.07822858507861383, "mean_token_accuracy": 0.8660415232181549, "step": 33800 }, { "epoch": 0.6010346114873873, "grad_norm": 1.4173762619614716, "learning_rate": 0.0001, "loss": 0.7825, "mean_abs_error": 248.79557135551153, "mean_abs_error_last_10": 82.98935974156669, "mean_abs_error_last_25": 111.1977796637897, "mean_abs_error_last_50": 164.16203496882002, "mean_pred_prob": 0.06215102552087046, "mean_pred_prob_last_10": 0.2851060432731174, "mean_pred_prob_last_25": 0.16560804668115453, "mean_pred_prob_last_50": 0.10373735233442857, "mean_token_accuracy": 0.8626442372798919, "step": 33810 }, { "epoch": 0.6012123797841893, "grad_norm": 1.6240744673115723, "learning_rate": 0.0001, "loss": 0.6769, "mean_abs_error": 320.0131453504553, "mean_abs_error_last_10": 154.3854841531567, "mean_abs_error_last_25": 204.05202050316745, "mean_abs_error_last_50": 209.49461077825717, "mean_pred_prob": 0.03612784617580474, "mean_pred_prob_last_10": 0.17850493974983692, "mean_pred_prob_last_25": 0.09922169074416161, "mean_pred_prob_last_50": 0.06070319842547178, "mean_token_accuracy": 0.8807987034320831, "step": 33820 }, { "epoch": 0.6013901480809912, "grad_norm": 1.1774476636758058, "learning_rate": 0.0001, "loss": 0.7082, "mean_abs_error": 1105.3187743105777, "mean_abs_error_last_10": 246.5923771717953, "mean_abs_error_last_25": 381.7325240168935, "mean_abs_error_last_50": 657.8572574600252, "mean_pred_prob": 0.029052455606870354, "mean_pred_prob_last_10": 0.13427623811876402, "mean_pred_prob_last_25": 0.07906148316105828, "mean_pred_prob_last_50": 0.04834216688759625, "mean_token_accuracy": 0.8748748004436493, "step": 33830 }, { "epoch": 0.6015679163777932, "grad_norm": 3.061016660237201, "learning_rate": 0.0001, "loss": 0.719, "mean_abs_error": 930.9756489600735, "mean_abs_error_last_10": 412.8593136088219, "mean_abs_error_last_25": 591.6029817318984, "mean_abs_error_last_50": 731.7426431175397, "mean_pred_prob": 0.025933576785610056, "mean_pred_prob_last_10": 0.14014449326787143, "mean_pred_prob_last_25": 0.07329755731625483, "mean_pred_prob_last_50": 0.04416473050951027, "mean_token_accuracy": 0.8726833462715149, "step": 33840 }, { "epoch": 0.6017456846745951, "grad_norm": 1.258159278279141, "learning_rate": 0.0001, "loss": 0.816, "mean_abs_error": 243.2274412435032, "mean_abs_error_last_10": 63.10111137630182, "mean_abs_error_last_25": 99.35876744895398, "mean_abs_error_last_50": 166.89672006316178, "mean_pred_prob": 0.03739216676913202, "mean_pred_prob_last_10": 0.2074697956442833, "mean_pred_prob_last_25": 0.10810867799445986, "mean_pred_prob_last_50": 0.06372723132371902, "mean_token_accuracy": 0.8672141671180725, "step": 33850 }, { "epoch": 0.601923452971397, "grad_norm": 1.602818189844614, "learning_rate": 0.0001, "loss": 0.6139, "mean_abs_error": 494.92671570578824, "mean_abs_error_last_10": 296.5143187975776, "mean_abs_error_last_25": 369.4218604357693, "mean_abs_error_last_50": 379.3434891961263, "mean_pred_prob": 0.03201712323352694, "mean_pred_prob_last_10": 0.15822017155587673, "mean_pred_prob_last_25": 0.08746642861515283, "mean_pred_prob_last_50": 0.05378793063573539, "mean_token_accuracy": 0.8811738669872284, "step": 33860 }, { "epoch": 0.602101221268199, "grad_norm": 1.5987769777960208, "learning_rate": 0.0001, "loss": 0.6989, "mean_abs_error": 772.5290997122945, "mean_abs_error_last_10": 131.8891483934643, "mean_abs_error_last_25": 230.5933660632731, "mean_abs_error_last_50": 425.1049449439455, "mean_pred_prob": 0.03657331365975551, "mean_pred_prob_last_10": 0.1763507449766621, "mean_pred_prob_last_25": 0.10025378275895491, "mean_pred_prob_last_50": 0.06061237866524607, "mean_token_accuracy": 0.8855694830417633, "step": 33870 }, { "epoch": 0.602278989565001, "grad_norm": 1.1132304633223253, "learning_rate": 0.0001, "loss": 0.7427, "mean_abs_error": 1335.4160973656813, "mean_abs_error_last_10": 793.6823458468232, "mean_abs_error_last_25": 878.1139988831006, "mean_abs_error_last_50": 1026.9091160239461, "mean_pred_prob": 0.031584587891120466, "mean_pred_prob_last_10": 0.14860190266626888, "mean_pred_prob_last_25": 0.08349635044869501, "mean_pred_prob_last_50": 0.051916644522134445, "mean_token_accuracy": 0.870970493555069, "step": 33880 }, { "epoch": 0.602456757861803, "grad_norm": 1.2385219599390882, "learning_rate": 0.0001, "loss": 0.6746, "mean_abs_error": 407.0794228259425, "mean_abs_error_last_10": 153.70062877286978, "mean_abs_error_last_25": 170.48759586678472, "mean_abs_error_last_50": 198.22654772848492, "mean_pred_prob": 0.038030822691507635, "mean_pred_prob_last_10": 0.18976861108094453, "mean_pred_prob_last_25": 0.10509117662440985, "mean_pred_prob_last_50": 0.06401635510846973, "mean_token_accuracy": 0.8765475273132324, "step": 33890 }, { "epoch": 0.6026345261586049, "grad_norm": 1.6409781021647378, "learning_rate": 0.0001, "loss": 0.6619, "mean_abs_error": 567.6436110831515, "mean_abs_error_last_10": 167.22604621915576, "mean_abs_error_last_25": 231.32808405488322, "mean_abs_error_last_50": 387.37506051215576, "mean_pred_prob": 0.04168512730393559, "mean_pred_prob_last_10": 0.19478862234391273, "mean_pred_prob_last_25": 0.1096910779364407, "mean_pred_prob_last_50": 0.06851848913356662, "mean_token_accuracy": 0.8717963397502899, "step": 33900 }, { "epoch": 0.6028122944554068, "grad_norm": 1.660296017406404, "learning_rate": 0.0001, "loss": 0.7704, "mean_abs_error": 254.11012299013242, "mean_abs_error_last_10": 82.75102258006544, "mean_abs_error_last_25": 129.11694640337652, "mean_abs_error_last_50": 141.88144785874903, "mean_pred_prob": 0.041310427337884904, "mean_pred_prob_last_10": 0.2027611831203103, "mean_pred_prob_last_25": 0.11360327014699578, "mean_pred_prob_last_50": 0.06885403702035546, "mean_token_accuracy": 0.8751242101192475, "step": 33910 }, { "epoch": 0.6029900627522088, "grad_norm": 1.3495987566362821, "learning_rate": 0.0001, "loss": 0.7589, "mean_abs_error": 394.0710345365343, "mean_abs_error_last_10": 59.19230308564828, "mean_abs_error_last_25": 131.8506074471661, "mean_abs_error_last_50": 216.36416081325405, "mean_pred_prob": 0.048968054540455344, "mean_pred_prob_last_10": 0.22324149273335933, "mean_pred_prob_last_25": 0.13158575501292943, "mean_pred_prob_last_50": 0.08098816317506134, "mean_token_accuracy": 0.8769107818603515, "step": 33920 }, { "epoch": 0.6031678310490107, "grad_norm": 1.9783114452270194, "learning_rate": 0.0001, "loss": 0.6335, "mean_abs_error": 162.99795477301717, "mean_abs_error_last_10": 28.577845010955308, "mean_abs_error_last_25": 58.99260313079111, "mean_abs_error_last_50": 117.38220836359548, "mean_pred_prob": 0.04699977347627282, "mean_pred_prob_last_10": 0.22584970965981482, "mean_pred_prob_last_25": 0.1266134824603796, "mean_pred_prob_last_50": 0.07795046223327518, "mean_token_accuracy": 0.8767520725727082, "step": 33930 }, { "epoch": 0.6033455993458127, "grad_norm": 1.0489879070035404, "learning_rate": 0.0001, "loss": 0.7462, "mean_abs_error": 494.8280121917243, "mean_abs_error_last_10": 94.55732408497666, "mean_abs_error_last_25": 212.07710671883382, "mean_abs_error_last_50": 307.4633964502745, "mean_pred_prob": 0.03670798267703503, "mean_pred_prob_last_10": 0.18304895609617233, "mean_pred_prob_last_25": 0.10045065162703395, "mean_pred_prob_last_50": 0.06188997915014625, "mean_token_accuracy": 0.8738932847976685, "step": 33940 }, { "epoch": 0.6035233676426146, "grad_norm": 2.9432037906908084, "learning_rate": 0.0001, "loss": 0.9814, "mean_abs_error": 221.65855975704972, "mean_abs_error_last_10": 76.33002085202784, "mean_abs_error_last_25": 87.51357827899854, "mean_abs_error_last_50": 137.99112624148, "mean_pred_prob": 0.047427576035261154, "mean_pred_prob_last_10": 0.2224679807201028, "mean_pred_prob_last_25": 0.1263926774263382, "mean_pred_prob_last_50": 0.07778587411157786, "mean_token_accuracy": 0.8718820929527282, "step": 33950 }, { "epoch": 0.6037011359394165, "grad_norm": 1.0036951999497916, "learning_rate": 0.0001, "loss": 0.755, "mean_abs_error": 308.00229252689553, "mean_abs_error_last_10": 73.4904512813628, "mean_abs_error_last_25": 135.6991638877324, "mean_abs_error_last_50": 186.8422369171564, "mean_pred_prob": 0.03116945254150778, "mean_pred_prob_last_10": 0.15323375426232816, "mean_pred_prob_last_25": 0.08380160462111234, "mean_pred_prob_last_50": 0.05121227619238198, "mean_token_accuracy": 0.8818021297454834, "step": 33960 }, { "epoch": 0.6038789042362185, "grad_norm": 2.765191816177201, "learning_rate": 0.0001, "loss": 0.7731, "mean_abs_error": 869.5939148978496, "mean_abs_error_last_10": 256.3844990645734, "mean_abs_error_last_25": 326.1925142002972, "mean_abs_error_last_50": 472.15339463053186, "mean_pred_prob": 0.04127915138087701, "mean_pred_prob_last_10": 0.18951293852878734, "mean_pred_prob_last_25": 0.10937827826710418, "mean_pred_prob_last_50": 0.06782201986061409, "mean_token_accuracy": 0.8823664963245392, "step": 33970 }, { "epoch": 0.6040566725330204, "grad_norm": 2.933271304302339, "learning_rate": 0.0001, "loss": 0.8618, "mean_abs_error": 1185.697941666494, "mean_abs_error_last_10": 519.5132485163441, "mean_abs_error_last_25": 573.98539815039, "mean_abs_error_last_50": 757.5021013660659, "mean_pred_prob": 0.02568340774741955, "mean_pred_prob_last_10": 0.13775863173650577, "mean_pred_prob_last_25": 0.07264133886201307, "mean_pred_prob_last_50": 0.04325459601241164, "mean_token_accuracy": 0.8701766073703766, "step": 33980 }, { "epoch": 0.6042344408298224, "grad_norm": 1.4078635574405385, "learning_rate": 0.0001, "loss": 0.7644, "mean_abs_error": 1040.3187350627616, "mean_abs_error_last_10": 389.5407279778659, "mean_abs_error_last_25": 457.4585976088355, "mean_abs_error_last_50": 614.6832705547127, "mean_pred_prob": 0.02205834013293497, "mean_pred_prob_last_10": 0.11245767378713936, "mean_pred_prob_last_25": 0.061209225986385715, "mean_pred_prob_last_50": 0.03723855964490212, "mean_token_accuracy": 0.8679081916809082, "step": 33990 }, { "epoch": 0.6044122091266244, "grad_norm": 4.569898694991356, "learning_rate": 0.0001, "loss": 0.662, "mean_abs_error": 305.82859419229084, "mean_abs_error_last_10": 66.19811813806518, "mean_abs_error_last_25": 103.51070589995429, "mean_abs_error_last_50": 192.0610093070914, "mean_pred_prob": 0.03199854181148112, "mean_pred_prob_last_10": 0.18784728273749352, "mean_pred_prob_last_25": 0.09552046041935683, "mean_pred_prob_last_50": 0.05550816264003515, "mean_token_accuracy": 0.8788430631160736, "step": 34000 }, { "epoch": 0.6045899774234264, "grad_norm": 2.1510533376817773, "learning_rate": 0.0001, "loss": 0.7451, "mean_abs_error": 705.3393622859384, "mean_abs_error_last_10": 290.0852698250625, "mean_abs_error_last_25": 419.2084337066717, "mean_abs_error_last_50": 540.3603338401829, "mean_pred_prob": 0.05419576862768736, "mean_pred_prob_last_10": 0.23401285163126886, "mean_pred_prob_last_25": 0.1436322921246756, "mean_pred_prob_last_50": 0.08993411889241543, "mean_token_accuracy": 0.8724624335765838, "step": 34010 }, { "epoch": 0.6047677457202283, "grad_norm": 1.9108183993212342, "learning_rate": 0.0001, "loss": 0.8667, "mean_abs_error": 1122.8733750182614, "mean_abs_error_last_10": 764.3276277591133, "mean_abs_error_last_25": 811.8122564502041, "mean_abs_error_last_50": 926.3015724064401, "mean_pred_prob": 0.04533003341493895, "mean_pred_prob_last_10": 0.2115595150673471, "mean_pred_prob_last_25": 0.12427511002024402, "mean_pred_prob_last_50": 0.07644607908951002, "mean_token_accuracy": 0.873538488149643, "step": 34020 }, { "epoch": 0.6049455140170302, "grad_norm": 1.3821362847979703, "learning_rate": 0.0001, "loss": 0.7414, "mean_abs_error": 1319.7830946543686, "mean_abs_error_last_10": 628.1408991115989, "mean_abs_error_last_25": 755.90601758547, "mean_abs_error_last_50": 978.2562344041592, "mean_pred_prob": 0.030580056786129718, "mean_pred_prob_last_10": 0.15676915725052823, "mean_pred_prob_last_25": 0.08536477453599219, "mean_pred_prob_last_50": 0.05133350300311577, "mean_token_accuracy": 0.8700054049491882, "step": 34030 }, { "epoch": 0.6051232823138322, "grad_norm": 1.6196716856677285, "learning_rate": 0.0001, "loss": 0.634, "mean_abs_error": 1167.2935910910614, "mean_abs_error_last_10": 632.3497084250228, "mean_abs_error_last_25": 709.1957937432664, "mean_abs_error_last_50": 846.2189316543727, "mean_pred_prob": 0.028589543393172788, "mean_pred_prob_last_10": 0.12403776058054064, "mean_pred_prob_last_25": 0.07348722156893928, "mean_pred_prob_last_50": 0.046155263492255474, "mean_token_accuracy": 0.8780804574489594, "step": 34040 }, { "epoch": 0.6053010506106341, "grad_norm": 2.9712154777594764, "learning_rate": 0.0001, "loss": 0.6822, "mean_abs_error": 450.86679385029066, "mean_abs_error_last_10": 169.36571344912304, "mean_abs_error_last_25": 211.5408510834542, "mean_abs_error_last_50": 303.30014253521955, "mean_pred_prob": 0.039863448805408555, "mean_pred_prob_last_10": 0.20221408921061085, "mean_pred_prob_last_25": 0.11162364416522905, "mean_pred_prob_last_50": 0.06777546941302717, "mean_token_accuracy": 0.8825695633888244, "step": 34050 }, { "epoch": 0.605478818907436, "grad_norm": 1.7066424970325822, "learning_rate": 0.0001, "loss": 0.8491, "mean_abs_error": 426.3129152689688, "mean_abs_error_last_10": 171.77634873467983, "mean_abs_error_last_25": 296.66592513242796, "mean_abs_error_last_50": 336.2891043112706, "mean_pred_prob": 0.024243591353297234, "mean_pred_prob_last_10": 0.13677166141569613, "mean_pred_prob_last_25": 0.07095504244789481, "mean_pred_prob_last_50": 0.041325894417241216, "mean_token_accuracy": 0.8712861239910126, "step": 34060 }, { "epoch": 0.605656587204238, "grad_norm": 1.840227042074016, "learning_rate": 0.0001, "loss": 0.8509, "mean_abs_error": 945.3893291118189, "mean_abs_error_last_10": 448.9048774814024, "mean_abs_error_last_25": 563.9642340843585, "mean_abs_error_last_50": 700.9274043146847, "mean_pred_prob": 0.03598325404163916, "mean_pred_prob_last_10": 0.1728748817316955, "mean_pred_prob_last_25": 0.09760531025240198, "mean_pred_prob_last_50": 0.060652244044467805, "mean_token_accuracy": 0.8659451484680176, "step": 34070 }, { "epoch": 0.6058343555010399, "grad_norm": 1.1517004015309034, "learning_rate": 0.0001, "loss": 0.7571, "mean_abs_error": 363.20535478743943, "mean_abs_error_last_10": 101.56499265733062, "mean_abs_error_last_25": 183.26897971133462, "mean_abs_error_last_50": 294.3954415357213, "mean_pred_prob": 0.04107577939867042, "mean_pred_prob_last_10": 0.21900853831321002, "mean_pred_prob_last_25": 0.11614003411959857, "mean_pred_prob_last_50": 0.0686000632122159, "mean_token_accuracy": 0.8707768857479096, "step": 34080 }, { "epoch": 0.6060121237978419, "grad_norm": 1.3150063910916243, "learning_rate": 0.0001, "loss": 0.664, "mean_abs_error": 195.77044430655823, "mean_abs_error_last_10": 27.15134460404106, "mean_abs_error_last_25": 53.456151888772546, "mean_abs_error_last_50": 90.98156744078673, "mean_pred_prob": 0.05182807566598058, "mean_pred_prob_last_10": 0.2521138271316886, "mean_pred_prob_last_25": 0.14425114439800382, "mean_pred_prob_last_50": 0.0878297514282167, "mean_token_accuracy": 0.8717835485935211, "step": 34090 }, { "epoch": 0.6061898920946438, "grad_norm": 2.16035512519515, "learning_rate": 0.0001, "loss": 0.8873, "mean_abs_error": 808.7195796235337, "mean_abs_error_last_10": 481.3281678734535, "mean_abs_error_last_25": 516.034847477767, "mean_abs_error_last_50": 575.3040665612891, "mean_pred_prob": 0.05413889335177373, "mean_pred_prob_last_10": 0.275226109373034, "mean_pred_prob_last_25": 0.1500281183660263, "mean_pred_prob_last_50": 0.09065975481353235, "mean_token_accuracy": 0.8715761721134185, "step": 34100 }, { "epoch": 0.6063676603914457, "grad_norm": 3.4557050910274754, "learning_rate": 0.0001, "loss": 0.6674, "mean_abs_error": 561.2726522836899, "mean_abs_error_last_10": 228.18489556644863, "mean_abs_error_last_25": 286.1863937723544, "mean_abs_error_last_50": 384.1088774084136, "mean_pred_prob": 0.04377887544396799, "mean_pred_prob_last_10": 0.21373380540171638, "mean_pred_prob_last_25": 0.12299554575583897, "mean_pred_prob_last_50": 0.07462877820944414, "mean_token_accuracy": 0.880022656917572, "step": 34110 }, { "epoch": 0.6065454286882478, "grad_norm": 2.1642064675122716, "learning_rate": 0.0001, "loss": 0.7562, "mean_abs_error": 549.1148641834154, "mean_abs_error_last_10": 116.33805330526418, "mean_abs_error_last_25": 341.2522683523664, "mean_abs_error_last_50": 437.43585043727353, "mean_pred_prob": 0.04478892246261239, "mean_pred_prob_last_10": 0.20538891293108463, "mean_pred_prob_last_25": 0.12022063359618188, "mean_pred_prob_last_50": 0.07391207981854678, "mean_token_accuracy": 0.8649758636951447, "step": 34120 }, { "epoch": 0.6067231969850497, "grad_norm": 1.041633263464797, "learning_rate": 0.0001, "loss": 0.8328, "mean_abs_error": 405.72217600296636, "mean_abs_error_last_10": 132.3674907720286, "mean_abs_error_last_25": 145.8272881755789, "mean_abs_error_last_50": 208.52306620638973, "mean_pred_prob": 0.04598125956836156, "mean_pred_prob_last_10": 0.19553595287725328, "mean_pred_prob_last_25": 0.1138633661554195, "mean_pred_prob_last_50": 0.07337563117034733, "mean_token_accuracy": 0.8610018372535706, "step": 34130 }, { "epoch": 0.6069009652818517, "grad_norm": 1.7359288561397426, "learning_rate": 0.0001, "loss": 0.8359, "mean_abs_error": 975.7396542302968, "mean_abs_error_last_10": 335.0777906829113, "mean_abs_error_last_25": 468.4031026868496, "mean_abs_error_last_50": 643.3093809112095, "mean_pred_prob": 0.0255048676248407, "mean_pred_prob_last_10": 0.1272536495467648, "mean_pred_prob_last_25": 0.07035554543836042, "mean_pred_prob_last_50": 0.042594934446970004, "mean_token_accuracy": 0.8664466917514801, "step": 34140 }, { "epoch": 0.6070787335786536, "grad_norm": 3.1326628069998295, "learning_rate": 0.0001, "loss": 0.9175, "mean_abs_error": 1495.4826224815383, "mean_abs_error_last_10": 964.7260831238848, "mean_abs_error_last_25": 1050.1167128566353, "mean_abs_error_last_50": 1237.546543419354, "mean_pred_prob": 0.01872249893931439, "mean_pred_prob_last_10": 0.09958146656572353, "mean_pred_prob_last_25": 0.05256934427015949, "mean_pred_prob_last_50": 0.03144849833333865, "mean_token_accuracy": 0.8691931903362274, "step": 34150 }, { "epoch": 0.6072565018754555, "grad_norm": 0.9970159659929585, "learning_rate": 0.0001, "loss": 0.7108, "mean_abs_error": 1090.7593745079143, "mean_abs_error_last_10": 563.9702719232544, "mean_abs_error_last_25": 650.5690130784966, "mean_abs_error_last_50": 782.5248209142144, "mean_pred_prob": 0.053970541071612387, "mean_pred_prob_last_10": 0.26082760028948543, "mean_pred_prob_last_25": 0.14297951854241547, "mean_pred_prob_last_50": 0.08978337027365342, "mean_token_accuracy": 0.8846324324607849, "step": 34160 }, { "epoch": 0.6074342701722575, "grad_norm": 1.459454892100208, "learning_rate": 0.0001, "loss": 0.7457, "mean_abs_error": 274.45877558326595, "mean_abs_error_last_10": 42.907977658985374, "mean_abs_error_last_25": 129.25814825994183, "mean_abs_error_last_50": 230.4685034694515, "mean_pred_prob": 0.027983926283195616, "mean_pred_prob_last_10": 0.1658548057079315, "mean_pred_prob_last_25": 0.08413913659751415, "mean_pred_prob_last_50": 0.04817440127953887, "mean_token_accuracy": 0.8715191304683685, "step": 34170 }, { "epoch": 0.6076120384690594, "grad_norm": 2.0725065437499377, "learning_rate": 0.0001, "loss": 0.6796, "mean_abs_error": 252.352306621274, "mean_abs_error_last_10": 205.47665007336855, "mean_abs_error_last_25": 245.05648941026897, "mean_abs_error_last_50": 243.9308955429064, "mean_pred_prob": 0.04563514171168208, "mean_pred_prob_last_10": 0.21164024863392114, "mean_pred_prob_last_25": 0.12277309568598867, "mean_pred_prob_last_50": 0.07557076942175626, "mean_token_accuracy": 0.8873047411441803, "step": 34180 }, { "epoch": 0.6077898067658614, "grad_norm": 1.8450912168739624, "learning_rate": 0.0001, "loss": 0.709, "mean_abs_error": 151.3395841410188, "mean_abs_error_last_10": 42.192527206788704, "mean_abs_error_last_25": 76.55716610160351, "mean_abs_error_last_50": 98.20540110856305, "mean_pred_prob": 0.04206662178039551, "mean_pred_prob_last_10": 0.224862315133214, "mean_pred_prob_last_25": 0.11738692373037338, "mean_pred_prob_last_50": 0.07160818474367261, "mean_token_accuracy": 0.8726019322872162, "step": 34190 }, { "epoch": 0.6079675750626633, "grad_norm": 1.9385217281783529, "learning_rate": 0.0001, "loss": 0.7815, "mean_abs_error": 354.81735807187295, "mean_abs_error_last_10": 114.35226687640329, "mean_abs_error_last_25": 125.7474912950612, "mean_abs_error_last_50": 218.5370915838419, "mean_pred_prob": 0.02726456292439252, "mean_pred_prob_last_10": 0.1466540465131402, "mean_pred_prob_last_25": 0.07790101813152432, "mean_pred_prob_last_50": 0.04620956657454371, "mean_token_accuracy": 0.8688057899475098, "step": 34200 }, { "epoch": 0.6081453433594652, "grad_norm": 1.5093091370445675, "learning_rate": 0.0001, "loss": 0.8134, "mean_abs_error": 457.3662462229583, "mean_abs_error_last_10": 102.32481201785384, "mean_abs_error_last_25": 142.53724709167616, "mean_abs_error_last_50": 305.16891252911466, "mean_pred_prob": 0.0433237309451215, "mean_pred_prob_last_10": 0.1874195558950305, "mean_pred_prob_last_25": 0.11411069931928068, "mean_pred_prob_last_50": 0.07186687879730017, "mean_token_accuracy": 0.8755546748638153, "step": 34210 }, { "epoch": 0.6083231116562672, "grad_norm": 0.7343056083726979, "learning_rate": 0.0001, "loss": 0.9531, "mean_abs_error": 1381.1222451400583, "mean_abs_error_last_10": 603.0536533763841, "mean_abs_error_last_25": 741.3178569857579, "mean_abs_error_last_50": 878.1288950307368, "mean_pred_prob": 0.023410380055429413, "mean_pred_prob_last_10": 0.12694044533418491, "mean_pred_prob_last_25": 0.06702858081553131, "mean_pred_prob_last_50": 0.04021298557927366, "mean_token_accuracy": 0.868025255203247, "step": 34220 }, { "epoch": 0.6085008799530691, "grad_norm": 0.8897602887972966, "learning_rate": 0.0001, "loss": 0.711, "mean_abs_error": 255.86387900240143, "mean_abs_error_last_10": 49.58806539732448, "mean_abs_error_last_25": 81.87813879035329, "mean_abs_error_last_50": 133.7960086585025, "mean_pred_prob": 0.06148966660257429, "mean_pred_prob_last_10": 0.2812373124063015, "mean_pred_prob_last_25": 0.16086281538009645, "mean_pred_prob_last_50": 0.10011678035371005, "mean_token_accuracy": 0.8682887554168701, "step": 34230 }, { "epoch": 0.6086786482498712, "grad_norm": 1.2434915994768252, "learning_rate": 0.0001, "loss": 0.8231, "mean_abs_error": 530.262623314261, "mean_abs_error_last_10": 144.79454504922228, "mean_abs_error_last_25": 196.41468804765046, "mean_abs_error_last_50": 269.1047431507273, "mean_pred_prob": 0.0343575578648597, "mean_pred_prob_last_10": 0.17006738856434822, "mean_pred_prob_last_25": 0.09456847654655576, "mean_pred_prob_last_50": 0.05747387344017625, "mean_token_accuracy": 0.8692316949367523, "step": 34240 }, { "epoch": 0.6088564165466731, "grad_norm": 1.072929272147868, "learning_rate": 0.0001, "loss": 0.6851, "mean_abs_error": 1404.6415372772803, "mean_abs_error_last_10": 1055.950067029767, "mean_abs_error_last_25": 1003.653892286228, "mean_abs_error_last_50": 1222.3918789728937, "mean_pred_prob": 0.022432211379054934, "mean_pred_prob_last_10": 0.11367386841593544, "mean_pred_prob_last_25": 0.06280533732206095, "mean_pred_prob_last_50": 0.0383488579391269, "mean_token_accuracy": 0.8740528702735901, "step": 34250 }, { "epoch": 0.609034184843475, "grad_norm": 1.3034879990061516, "learning_rate": 0.0001, "loss": 0.5597, "mean_abs_error": 415.3998427556635, "mean_abs_error_last_10": 104.51486303094468, "mean_abs_error_last_25": 169.5746801167847, "mean_abs_error_last_50": 233.89052346301324, "mean_pred_prob": 0.05288218964124099, "mean_pred_prob_last_10": 0.2536506413074676, "mean_pred_prob_last_25": 0.14463203540071845, "mean_pred_prob_last_50": 0.0889698570186738, "mean_token_accuracy": 0.8754410088062287, "step": 34260 }, { "epoch": 0.609211953140277, "grad_norm": 0.8655271014399833, "learning_rate": 0.0001, "loss": 0.5967, "mean_abs_error": 154.81867039934556, "mean_abs_error_last_10": 20.104475133152835, "mean_abs_error_last_25": 38.39012406269525, "mean_abs_error_last_50": 100.1521414100339, "mean_pred_prob": 0.05267956592142582, "mean_pred_prob_last_10": 0.26200658529996873, "mean_pred_prob_last_25": 0.14787193723022937, "mean_pred_prob_last_50": 0.09003638979047537, "mean_token_accuracy": 0.8821271359920502, "step": 34270 }, { "epoch": 0.6093897214370789, "grad_norm": 2.5868008975891503, "learning_rate": 0.0001, "loss": 0.7379, "mean_abs_error": 450.323542532595, "mean_abs_error_last_10": 141.5769721538194, "mean_abs_error_last_25": 222.55859783532802, "mean_abs_error_last_50": 343.964138132775, "mean_pred_prob": 0.03507529309717938, "mean_pred_prob_last_10": 0.1644641277147457, "mean_pred_prob_last_25": 0.09498710413463414, "mean_pred_prob_last_50": 0.05939167531905696, "mean_token_accuracy": 0.8763127088546753, "step": 34280 }, { "epoch": 0.6095674897338809, "grad_norm": 1.2694879453904047, "learning_rate": 0.0001, "loss": 0.7631, "mean_abs_error": 442.9998155698595, "mean_abs_error_last_10": 76.66831154816684, "mean_abs_error_last_25": 247.85572622625835, "mean_abs_error_last_50": 352.45916768802283, "mean_pred_prob": 0.03182924825232476, "mean_pred_prob_last_10": 0.1647675795480609, "mean_pred_prob_last_25": 0.0899892820045352, "mean_pred_prob_last_50": 0.05434412369504571, "mean_token_accuracy": 0.8605438292026519, "step": 34290 }, { "epoch": 0.6097452580306828, "grad_norm": 1.420397395169867, "learning_rate": 0.0001, "loss": 0.7975, "mean_abs_error": 264.9723489416541, "mean_abs_error_last_10": 71.31611766317646, "mean_abs_error_last_25": 85.22387725816286, "mean_abs_error_last_50": 134.74407490979283, "mean_pred_prob": 0.045933796744793656, "mean_pred_prob_last_10": 0.19771751593798398, "mean_pred_prob_last_25": 0.11858339169993996, "mean_pred_prob_last_50": 0.07634004782885313, "mean_token_accuracy": 0.8648903667926788, "step": 34300 }, { "epoch": 0.6099230263274847, "grad_norm": 1.3459788854078962, "learning_rate": 0.0001, "loss": 0.7394, "mean_abs_error": 233.27512453311687, "mean_abs_error_last_10": 133.53751842414937, "mean_abs_error_last_25": 180.30704051274247, "mean_abs_error_last_50": 190.6529441287135, "mean_pred_prob": 0.04660033667460084, "mean_pred_prob_last_10": 0.21584926974028348, "mean_pred_prob_last_25": 0.12177548902109266, "mean_pred_prob_last_50": 0.07549772928468883, "mean_token_accuracy": 0.8725605964660644, "step": 34310 }, { "epoch": 0.6101007946242867, "grad_norm": 2.9874007323869165, "learning_rate": 0.0001, "loss": 0.7643, "mean_abs_error": 887.4253591155041, "mean_abs_error_last_10": 288.26875486582975, "mean_abs_error_last_25": 463.536835299014, "mean_abs_error_last_50": 593.4646013384436, "mean_pred_prob": 0.040213745455548636, "mean_pred_prob_last_10": 0.19278374736313708, "mean_pred_prob_last_25": 0.10660211479407736, "mean_pred_prob_last_50": 0.06698157849023119, "mean_token_accuracy": 0.8675971865653992, "step": 34320 }, { "epoch": 0.6102785629210886, "grad_norm": 1.2054045606894535, "learning_rate": 0.0001, "loss": 0.7455, "mean_abs_error": 562.4468496957194, "mean_abs_error_last_10": 217.49072544709475, "mean_abs_error_last_25": 387.16806325609974, "mean_abs_error_last_50": 464.53955894379544, "mean_pred_prob": 0.019690803124103696, "mean_pred_prob_last_10": 0.10500156793277711, "mean_pred_prob_last_25": 0.055267715360969305, "mean_pred_prob_last_50": 0.03299044463783503, "mean_token_accuracy": 0.8673815488815307, "step": 34330 }, { "epoch": 0.6104563312178906, "grad_norm": 1.1966750567265516, "learning_rate": 0.0001, "loss": 0.7207, "mean_abs_error": 836.7318041484984, "mean_abs_error_last_10": 554.7644459455112, "mean_abs_error_last_25": 605.953352319837, "mean_abs_error_last_50": 666.0056536562413, "mean_pred_prob": 0.04108459732669871, "mean_pred_prob_last_10": 0.19488056284317282, "mean_pred_prob_last_25": 0.109547296713572, "mean_pred_prob_last_50": 0.06842599624069408, "mean_token_accuracy": 0.86304572224617, "step": 34340 }, { "epoch": 0.6106340995146925, "grad_norm": 1.7154183306228412, "learning_rate": 0.0001, "loss": 0.7921, "mean_abs_error": 502.77276396593714, "mean_abs_error_last_10": 130.8034138951927, "mean_abs_error_last_25": 186.6793719803777, "mean_abs_error_last_50": 315.19452742798865, "mean_pred_prob": 0.023790818359702825, "mean_pred_prob_last_10": 0.11940385736525058, "mean_pred_prob_last_25": 0.06602219380438328, "mean_pred_prob_last_50": 0.03944511516019702, "mean_token_accuracy": 0.8739948511123657, "step": 34350 }, { "epoch": 0.6108118678114945, "grad_norm": 1.1157842862872585, "learning_rate": 0.0001, "loss": 0.7136, "mean_abs_error": 597.0868827236166, "mean_abs_error_last_10": 251.23564236245198, "mean_abs_error_last_25": 305.0959045869076, "mean_abs_error_last_50": 381.3316338038724, "mean_pred_prob": 0.04150048696610611, "mean_pred_prob_last_10": 0.2117166127311066, "mean_pred_prob_last_25": 0.11836850661202333, "mean_pred_prob_last_50": 0.07089255076716654, "mean_token_accuracy": 0.8696335971355438, "step": 34360 }, { "epoch": 0.6109896361082965, "grad_norm": 1.0820595985980672, "learning_rate": 0.0001, "loss": 0.5903, "mean_abs_error": 114.48402834383725, "mean_abs_error_last_10": 25.375279555323907, "mean_abs_error_last_25": 48.62337504667503, "mean_abs_error_last_50": 77.8219262684755, "mean_pred_prob": 0.04094307944178581, "mean_pred_prob_last_10": 0.2121264636516571, "mean_pred_prob_last_25": 0.11373997367918491, "mean_pred_prob_last_50": 0.06927629690617323, "mean_token_accuracy": 0.8881747841835022, "step": 34370 }, { "epoch": 0.6111674044050984, "grad_norm": 1.487085468199977, "learning_rate": 0.0001, "loss": 0.6802, "mean_abs_error": 305.47183986397926, "mean_abs_error_last_10": 72.27529053989089, "mean_abs_error_last_25": 82.99847870025124, "mean_abs_error_last_50": 167.04121532708453, "mean_pred_prob": 0.05151927830884233, "mean_pred_prob_last_10": 0.24738922831602395, "mean_pred_prob_last_25": 0.13993854830041527, "mean_pred_prob_last_50": 0.0851370814954862, "mean_token_accuracy": 0.8726487636566163, "step": 34380 }, { "epoch": 0.6113451727019004, "grad_norm": 1.7631977376324766, "learning_rate": 0.0001, "loss": 0.7239, "mean_abs_error": 348.9131156108439, "mean_abs_error_last_10": 179.34311271456303, "mean_abs_error_last_25": 228.01134654935103, "mean_abs_error_last_50": 256.06540181369917, "mean_pred_prob": 0.04895995929837227, "mean_pred_prob_last_10": 0.22605518866330385, "mean_pred_prob_last_25": 0.12940493719652296, "mean_pred_prob_last_50": 0.08108028364367784, "mean_token_accuracy": 0.86717329621315, "step": 34390 }, { "epoch": 0.6115229409987023, "grad_norm": 1.9107604330688759, "learning_rate": 0.0001, "loss": 0.7977, "mean_abs_error": 997.8339215615406, "mean_abs_error_last_10": 275.61253715700053, "mean_abs_error_last_25": 441.3182664742247, "mean_abs_error_last_50": 581.1995638494521, "mean_pred_prob": 0.02875709519139491, "mean_pred_prob_last_10": 0.12955146168824286, "mean_pred_prob_last_25": 0.07373123606666923, "mean_pred_prob_last_50": 0.04657205700641498, "mean_token_accuracy": 0.8673410654067993, "step": 34400 }, { "epoch": 0.6117007092955042, "grad_norm": 1.4100536915455584, "learning_rate": 0.0001, "loss": 0.7334, "mean_abs_error": 295.2881206135456, "mean_abs_error_last_10": 132.9921605963261, "mean_abs_error_last_25": 165.51272494667256, "mean_abs_error_last_50": 185.69526274082364, "mean_pred_prob": 0.04587923027575016, "mean_pred_prob_last_10": 0.2218317475169897, "mean_pred_prob_last_25": 0.12694609677419066, "mean_pred_prob_last_50": 0.0773583859205246, "mean_token_accuracy": 0.8592877209186554, "step": 34410 }, { "epoch": 0.6118784775923062, "grad_norm": 1.2850567550720584, "learning_rate": 0.0001, "loss": 0.7423, "mean_abs_error": 765.8819226852916, "mean_abs_error_last_10": 269.29822010593364, "mean_abs_error_last_25": 323.728435526143, "mean_abs_error_last_50": 434.76561877062807, "mean_pred_prob": 0.03908003386459313, "mean_pred_prob_last_10": 0.1749005070887506, "mean_pred_prob_last_25": 0.10335829922114499, "mean_pred_prob_last_50": 0.06500099196564406, "mean_token_accuracy": 0.8712233424186706, "step": 34420 }, { "epoch": 0.6120562458891081, "grad_norm": 1.146384794115321, "learning_rate": 0.0001, "loss": 0.8705, "mean_abs_error": 676.0305338864879, "mean_abs_error_last_10": 241.88799652937877, "mean_abs_error_last_25": 367.8845562268302, "mean_abs_error_last_50": 494.80003732786236, "mean_pred_prob": 0.03211903015035204, "mean_pred_prob_last_10": 0.16354240052751265, "mean_pred_prob_last_25": 0.09112290377961471, "mean_pred_prob_last_50": 0.05525905534741469, "mean_token_accuracy": 0.8579772889614106, "step": 34430 }, { "epoch": 0.6122340141859101, "grad_norm": 1.90510314404451, "learning_rate": 0.0001, "loss": 0.7764, "mean_abs_error": 1548.7525459934627, "mean_abs_error_last_10": 1066.7658465118052, "mean_abs_error_last_25": 1305.3407334147964, "mean_abs_error_last_50": 1480.3442295555024, "mean_pred_prob": 0.05235623971966561, "mean_pred_prob_last_10": 0.22906156342869508, "mean_pred_prob_last_25": 0.13758090937844827, "mean_pred_prob_last_50": 0.08644441470059974, "mean_token_accuracy": 0.875071781873703, "step": 34440 }, { "epoch": 0.612411782482712, "grad_norm": 1.5310855414097817, "learning_rate": 0.0001, "loss": 0.7982, "mean_abs_error": 613.2016688735246, "mean_abs_error_last_10": 138.43156196223305, "mean_abs_error_last_25": 189.56459015320283, "mean_abs_error_last_50": 374.08634355823426, "mean_pred_prob": 0.017420493951067328, "mean_pred_prob_last_10": 0.0949616651982069, "mean_pred_prob_last_25": 0.05181958097964525, "mean_pred_prob_last_50": 0.02982673579826951, "mean_token_accuracy": 0.86804980635643, "step": 34450 }, { "epoch": 0.6125895507795139, "grad_norm": 1.3504372232444508, "learning_rate": 0.0001, "loss": 0.6345, "mean_abs_error": 68.26476788448255, "mean_abs_error_last_10": 12.283525056631081, "mean_abs_error_last_25": 28.55855382433928, "mean_abs_error_last_50": 39.87830030457421, "mean_pred_prob": 0.06662965295836329, "mean_pred_prob_last_10": 0.30961689725518227, "mean_pred_prob_last_25": 0.176593966409564, "mean_pred_prob_last_50": 0.11069637183099985, "mean_token_accuracy": 0.8775436520576477, "step": 34460 }, { "epoch": 0.6127673190763159, "grad_norm": 1.50111037540572, "learning_rate": 0.0001, "loss": 0.8022, "mean_abs_error": 404.748108331152, "mean_abs_error_last_10": 149.20404568862847, "mean_abs_error_last_25": 177.74192240974617, "mean_abs_error_last_50": 251.49228037133315, "mean_pred_prob": 0.040632465318776664, "mean_pred_prob_last_10": 0.19872978653293102, "mean_pred_prob_last_25": 0.10962616741890088, "mean_pred_prob_last_50": 0.06787145521957427, "mean_token_accuracy": 0.8711487054824829, "step": 34470 }, { "epoch": 0.6129450873731179, "grad_norm": 2.475844497742935, "learning_rate": 0.0001, "loss": 0.751, "mean_abs_error": 1001.5541267053459, "mean_abs_error_last_10": 586.7041115450847, "mean_abs_error_last_25": 630.2788996302146, "mean_abs_error_last_50": 788.5271200776367, "mean_pred_prob": 0.03434945698827505, "mean_pred_prob_last_10": 0.15748022351472174, "mean_pred_prob_last_25": 0.09373610953916796, "mean_pred_prob_last_50": 0.058041621815937104, "mean_token_accuracy": 0.8706257522106171, "step": 34480 }, { "epoch": 0.6131228556699199, "grad_norm": 1.3693156626516174, "learning_rate": 0.0001, "loss": 0.6832, "mean_abs_error": 426.88194368583027, "mean_abs_error_last_10": 70.56334150384176, "mean_abs_error_last_25": 138.8062775305704, "mean_abs_error_last_50": 203.564909047236, "mean_pred_prob": 0.04485107092186809, "mean_pred_prob_last_10": 0.21383300572633743, "mean_pred_prob_last_25": 0.12079415880143643, "mean_pred_prob_last_50": 0.07559207789599895, "mean_token_accuracy": 0.8809370040893555, "step": 34490 }, { "epoch": 0.6133006239667218, "grad_norm": 1.621066038624834, "learning_rate": 0.0001, "loss": 0.6746, "mean_abs_error": 630.9173054145564, "mean_abs_error_last_10": 172.67087117220618, "mean_abs_error_last_25": 192.46595170684213, "mean_abs_error_last_50": 343.61164305835354, "mean_pred_prob": 0.033024496375583114, "mean_pred_prob_last_10": 0.16849150185007603, "mean_pred_prob_last_25": 0.09529232837958261, "mean_pred_prob_last_50": 0.057225553202442825, "mean_token_accuracy": 0.8654052436351776, "step": 34500 }, { "epoch": 0.6134783922635237, "grad_norm": 1.347150147282663, "learning_rate": 0.0001, "loss": 0.6732, "mean_abs_error": 821.5955458391738, "mean_abs_error_last_10": 274.3976552644175, "mean_abs_error_last_25": 323.9654961751345, "mean_abs_error_last_50": 532.4679774178618, "mean_pred_prob": 0.03224595655628946, "mean_pred_prob_last_10": 0.15562039717915468, "mean_pred_prob_last_25": 0.08760063932859338, "mean_pred_prob_last_50": 0.0542871177312918, "mean_token_accuracy": 0.879466050863266, "step": 34510 }, { "epoch": 0.6136561605603257, "grad_norm": 1.6436840083796413, "learning_rate": 0.0001, "loss": 0.5479, "mean_abs_error": 789.1994412024203, "mean_abs_error_last_10": 336.548446269375, "mean_abs_error_last_25": 458.29902101415917, "mean_abs_error_last_50": 577.1653672507116, "mean_pred_prob": 0.03352087653474882, "mean_pred_prob_last_10": 0.1668851485592313, "mean_pred_prob_last_25": 0.09240619097836315, "mean_pred_prob_last_50": 0.056601990427589045, "mean_token_accuracy": 0.8827292501926423, "step": 34520 }, { "epoch": 0.6138339288571276, "grad_norm": 1.8135620618614672, "learning_rate": 0.0001, "loss": 0.8194, "mean_abs_error": 441.365681157875, "mean_abs_error_last_10": 95.05404072396549, "mean_abs_error_last_25": 165.81144661387532, "mean_abs_error_last_50": 234.0437485545495, "mean_pred_prob": 0.034541217517107724, "mean_pred_prob_last_10": 0.1672115907073021, "mean_pred_prob_last_25": 0.09456777330487967, "mean_pred_prob_last_50": 0.05871250880882144, "mean_token_accuracy": 0.8790317118167877, "step": 34530 }, { "epoch": 0.6140116971539296, "grad_norm": 1.9513250843347312, "learning_rate": 0.0001, "loss": 0.7195, "mean_abs_error": 414.49870308927495, "mean_abs_error_last_10": 51.71346547810349, "mean_abs_error_last_25": 111.08729976071668, "mean_abs_error_last_50": 229.88423203865744, "mean_pred_prob": 0.03859295852016657, "mean_pred_prob_last_10": 0.19383639749139547, "mean_pred_prob_last_25": 0.10502699529752135, "mean_pred_prob_last_50": 0.06441140747629107, "mean_token_accuracy": 0.872019237279892, "step": 34540 }, { "epoch": 0.6141894654507315, "grad_norm": 1.4032803840966146, "learning_rate": 0.0001, "loss": 0.7293, "mean_abs_error": 200.89613282398972, "mean_abs_error_last_10": 57.31435129605669, "mean_abs_error_last_25": 89.41301732741479, "mean_abs_error_last_50": 116.41192759050327, "mean_pred_prob": 0.05464943540282548, "mean_pred_prob_last_10": 0.27196327559649947, "mean_pred_prob_last_25": 0.15083822682499887, "mean_pred_prob_last_50": 0.09210887067019939, "mean_token_accuracy": 0.8719629943370819, "step": 34550 }, { "epoch": 0.6143672337475334, "grad_norm": 2.639145526465611, "learning_rate": 0.0001, "loss": 0.8328, "mean_abs_error": 563.610359486358, "mean_abs_error_last_10": 157.3441091683374, "mean_abs_error_last_25": 206.58569557529285, "mean_abs_error_last_50": 333.0824565677684, "mean_pred_prob": 0.03654228246305138, "mean_pred_prob_last_10": 0.1667027711868286, "mean_pred_prob_last_25": 0.10050814496353269, "mean_pred_prob_last_50": 0.061971033830195664, "mean_token_accuracy": 0.8711625814437867, "step": 34560 }, { "epoch": 0.6145450020443354, "grad_norm": 2.0808875924381987, "learning_rate": 0.0001, "loss": 0.6827, "mean_abs_error": 628.0906456330265, "mean_abs_error_last_10": 242.56751719219818, "mean_abs_error_last_25": 295.85931369567226, "mean_abs_error_last_50": 443.71614872923044, "mean_pred_prob": 0.025341230945196004, "mean_pred_prob_last_10": 0.1297209815820679, "mean_pred_prob_last_25": 0.07034047220367938, "mean_pred_prob_last_50": 0.04255454660160467, "mean_token_accuracy": 0.8758702397346496, "step": 34570 }, { "epoch": 0.6147227703411373, "grad_norm": 0.7607668677329983, "learning_rate": 0.0001, "loss": 0.6902, "mean_abs_error": 353.95013239355666, "mean_abs_error_last_10": 95.99065113386078, "mean_abs_error_last_25": 136.1628439062609, "mean_abs_error_last_50": 209.09737665028925, "mean_pred_prob": 0.046738877054303886, "mean_pred_prob_last_10": 0.21893604174256326, "mean_pred_prob_last_25": 0.12381670642644167, "mean_pred_prob_last_50": 0.07645714725367725, "mean_token_accuracy": 0.8786565005779267, "step": 34580 }, { "epoch": 0.6149005386379394, "grad_norm": 1.4198230350896708, "learning_rate": 0.0001, "loss": 0.9714, "mean_abs_error": 1450.814152898056, "mean_abs_error_last_10": 841.1424052697188, "mean_abs_error_last_25": 932.2079212216744, "mean_abs_error_last_50": 1135.9176090546366, "mean_pred_prob": 0.02753980960260378, "mean_pred_prob_last_10": 0.14051753219828242, "mean_pred_prob_last_25": 0.07474157391116024, "mean_pred_prob_last_50": 0.04616888216114603, "mean_token_accuracy": 0.8721162915229798, "step": 34590 }, { "epoch": 0.6150783069347413, "grad_norm": 1.8854477930140596, "learning_rate": 0.0001, "loss": 0.6381, "mean_abs_error": 350.10228967909876, "mean_abs_error_last_10": 135.55714346068166, "mean_abs_error_last_25": 217.8506638681948, "mean_abs_error_last_50": 262.1646704736833, "mean_pred_prob": 0.040512799355201426, "mean_pred_prob_last_10": 0.17596784457564354, "mean_pred_prob_last_25": 0.10624028807505966, "mean_pred_prob_last_50": 0.06676529948599637, "mean_token_accuracy": 0.8762386500835418, "step": 34600 }, { "epoch": 0.6152560752315432, "grad_norm": 2.0651425817808216, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 303.67037568454026, "mean_abs_error_last_10": 50.037850508158215, "mean_abs_error_last_25": 79.72347104456824, "mean_abs_error_last_50": 157.1969185062562, "mean_pred_prob": 0.04764118860475719, "mean_pred_prob_last_10": 0.23759630955755712, "mean_pred_prob_last_25": 0.1314276520162821, "mean_pred_prob_last_50": 0.08097176775336265, "mean_token_accuracy": 0.8795827150344848, "step": 34610 }, { "epoch": 0.6154338435283452, "grad_norm": 1.1317684459518695, "learning_rate": 0.0001, "loss": 0.7031, "mean_abs_error": 529.6494649804469, "mean_abs_error_last_10": 139.25508136309458, "mean_abs_error_last_25": 174.6696853556686, "mean_abs_error_last_50": 273.0821711043119, "mean_pred_prob": 0.038116647163406014, "mean_pred_prob_last_10": 0.17757862641010433, "mean_pred_prob_last_25": 0.0994808328570798, "mean_pred_prob_last_50": 0.06290759197436273, "mean_token_accuracy": 0.8793665826320648, "step": 34620 }, { "epoch": 0.6156116118251471, "grad_norm": 1.5060244787036152, "learning_rate": 0.0001, "loss": 0.6793, "mean_abs_error": 614.2096192527595, "mean_abs_error_last_10": 173.66753781300008, "mean_abs_error_last_25": 231.47519939809217, "mean_abs_error_last_50": 309.88745721841786, "mean_pred_prob": 0.02816601805971004, "mean_pred_prob_last_10": 0.14167716930387542, "mean_pred_prob_last_25": 0.07721688213059678, "mean_pred_prob_last_50": 0.04669270983431488, "mean_token_accuracy": 0.876358151435852, "step": 34630 }, { "epoch": 0.6157893801219491, "grad_norm": 2.0007594992564903, "learning_rate": 0.0001, "loss": 0.8685, "mean_abs_error": 406.20865644030744, "mean_abs_error_last_10": 158.131919457755, "mean_abs_error_last_25": 236.5770446290889, "mean_abs_error_last_50": 330.9496074566067, "mean_pred_prob": 0.03492100844159722, "mean_pred_prob_last_10": 0.15635437723249196, "mean_pred_prob_last_25": 0.0883839282207191, "mean_pred_prob_last_50": 0.05689998366869986, "mean_token_accuracy": 0.8650220215320588, "step": 34640 }, { "epoch": 0.615967148418751, "grad_norm": 1.499086367769107, "learning_rate": 0.0001, "loss": 0.8172, "mean_abs_error": 382.2551579744426, "mean_abs_error_last_10": 124.3986832886753, "mean_abs_error_last_25": 186.47819195400345, "mean_abs_error_last_50": 235.26694539342446, "mean_pred_prob": 0.03662480060011149, "mean_pred_prob_last_10": 0.17154229078441857, "mean_pred_prob_last_25": 0.09467329103499651, "mean_pred_prob_last_50": 0.05968324160203338, "mean_token_accuracy": 0.8683787524700165, "step": 34650 }, { "epoch": 0.6161449167155529, "grad_norm": 1.2541411535786906, "learning_rate": 0.0001, "loss": 0.6843, "mean_abs_error": 516.4561887766469, "mean_abs_error_last_10": 151.74678143795197, "mean_abs_error_last_25": 263.1605421871524, "mean_abs_error_last_50": 315.7616041909781, "mean_pred_prob": 0.03201932003721595, "mean_pred_prob_last_10": 0.1554332260042429, "mean_pred_prob_last_25": 0.08893788978457451, "mean_pred_prob_last_50": 0.05386164407245815, "mean_token_accuracy": 0.8708315014839172, "step": 34660 }, { "epoch": 0.6163226850123549, "grad_norm": 1.2514292564414295, "learning_rate": 0.0001, "loss": 0.8084, "mean_abs_error": 919.0623733575163, "mean_abs_error_last_10": 618.1445606175322, "mean_abs_error_last_25": 661.322735389153, "mean_abs_error_last_50": 737.9125684634845, "mean_pred_prob": 0.04598755463521229, "mean_pred_prob_last_10": 0.21568521770532242, "mean_pred_prob_last_25": 0.12449988690059399, "mean_pred_prob_last_50": 0.07669921910855919, "mean_token_accuracy": 0.8653047502040863, "step": 34670 }, { "epoch": 0.6165004533091568, "grad_norm": 2.1869051010295246, "learning_rate": 0.0001, "loss": 0.7698, "mean_abs_error": 672.300595176747, "mean_abs_error_last_10": 207.59844370532875, "mean_abs_error_last_25": 272.8845215295628, "mean_abs_error_last_50": 396.16003754538673, "mean_pred_prob": 0.030722194397822023, "mean_pred_prob_last_10": 0.15904935783473775, "mean_pred_prob_last_25": 0.08799933964619414, "mean_pred_prob_last_50": 0.053019716165727, "mean_token_accuracy": 0.8738931238651275, "step": 34680 }, { "epoch": 0.6166782216059588, "grad_norm": 1.8401014567825003, "learning_rate": 0.0001, "loss": 0.6016, "mean_abs_error": 478.6303412650239, "mean_abs_error_last_10": 152.6034202834966, "mean_abs_error_last_25": 193.96368219388123, "mean_abs_error_last_50": 300.72359465508094, "mean_pred_prob": 0.03582163024111651, "mean_pred_prob_last_10": 0.1726169066329021, "mean_pred_prob_last_25": 0.09809296168386936, "mean_pred_prob_last_50": 0.05972390141105279, "mean_token_accuracy": 0.8782339036464691, "step": 34690 }, { "epoch": 0.6168559899027607, "grad_norm": 1.216785666965699, "learning_rate": 0.0001, "loss": 0.6943, "mean_abs_error": 706.5315588454963, "mean_abs_error_last_10": 341.12914001116917, "mean_abs_error_last_25": 448.7659784117606, "mean_abs_error_last_50": 568.450570368509, "mean_pred_prob": 0.030331204761750996, "mean_pred_prob_last_10": 0.13975894302129746, "mean_pred_prob_last_25": 0.08155242269858717, "mean_pred_prob_last_50": 0.051545061822980645, "mean_token_accuracy": 0.8754704058170318, "step": 34700 }, { "epoch": 0.6170337581995627, "grad_norm": 1.6299623068539795, "learning_rate": 0.0001, "loss": 0.6917, "mean_abs_error": 1072.1355315611727, "mean_abs_error_last_10": 319.5257384450157, "mean_abs_error_last_25": 530.8869258752858, "mean_abs_error_last_50": 731.9469436923353, "mean_pred_prob": 0.04349642338056583, "mean_pred_prob_last_10": 0.20712595396908, "mean_pred_prob_last_25": 0.1186367615009658, "mean_pred_prob_last_50": 0.07251663029601332, "mean_token_accuracy": 0.8778202414512635, "step": 34710 }, { "epoch": 0.6172115264963647, "grad_norm": 1.5211483251923352, "learning_rate": 0.0001, "loss": 0.7986, "mean_abs_error": 225.71905224913093, "mean_abs_error_last_10": 105.29780542899005, "mean_abs_error_last_25": 112.08810105696372, "mean_abs_error_last_50": 133.4894191142626, "mean_pred_prob": 0.026957378536462785, "mean_pred_prob_last_10": 0.12832326479256154, "mean_pred_prob_last_25": 0.06948625650256872, "mean_pred_prob_last_50": 0.04369281204417348, "mean_token_accuracy": 0.8821808278560639, "step": 34720 }, { "epoch": 0.6173892947931666, "grad_norm": 1.1085467128498032, "learning_rate": 0.0001, "loss": 0.7299, "mean_abs_error": 142.92083152714093, "mean_abs_error_last_10": 43.191445623206974, "mean_abs_error_last_25": 77.74280866522244, "mean_abs_error_last_50": 96.36538190548274, "mean_pred_prob": 0.05846016160212457, "mean_pred_prob_last_10": 0.27819902785122397, "mean_pred_prob_last_25": 0.1554310854524374, "mean_pred_prob_last_50": 0.09702528938651085, "mean_token_accuracy": 0.8685896158218384, "step": 34730 }, { "epoch": 0.6175670630899686, "grad_norm": 1.3973859223620462, "learning_rate": 0.0001, "loss": 0.7027, "mean_abs_error": 794.0427429200707, "mean_abs_error_last_10": 260.73825105190264, "mean_abs_error_last_25": 331.4823462884283, "mean_abs_error_last_50": 486.4421462575956, "mean_pred_prob": 0.046955954472650774, "mean_pred_prob_last_10": 0.2120030836726073, "mean_pred_prob_last_25": 0.12151932728302199, "mean_pred_prob_last_50": 0.07640975324320606, "mean_token_accuracy": 0.8792797684669494, "step": 34740 }, { "epoch": 0.6177448313867705, "grad_norm": 2.294260648618777, "learning_rate": 0.0001, "loss": 0.6275, "mean_abs_error": 408.7507231050322, "mean_abs_error_last_10": 304.5730841823015, "mean_abs_error_last_25": 326.80346389946556, "mean_abs_error_last_50": 306.2358766573303, "mean_pred_prob": 0.04494901869911701, "mean_pred_prob_last_10": 0.19311818107962608, "mean_pred_prob_last_25": 0.1151701488532126, "mean_pred_prob_last_50": 0.07312878211960197, "mean_token_accuracy": 0.8711999654769897, "step": 34750 }, { "epoch": 0.6179225996835724, "grad_norm": 1.0125657185336208, "learning_rate": 0.0001, "loss": 0.6889, "mean_abs_error": 511.0004577078395, "mean_abs_error_last_10": 216.33058512457532, "mean_abs_error_last_25": 239.026261277274, "mean_abs_error_last_50": 284.07284227546467, "mean_pred_prob": 0.027382126153679565, "mean_pred_prob_last_10": 0.1514894546708092, "mean_pred_prob_last_25": 0.07870798856019974, "mean_pred_prob_last_50": 0.04657242582179606, "mean_token_accuracy": 0.8738856494426728, "step": 34760 }, { "epoch": 0.6181003679803744, "grad_norm": 0.797350066247245, "learning_rate": 0.0001, "loss": 0.7092, "mean_abs_error": 864.0838152099666, "mean_abs_error_last_10": 460.3907456659825, "mean_abs_error_last_25": 563.9446220178843, "mean_abs_error_last_50": 698.3678048018005, "mean_pred_prob": 0.037888531768112445, "mean_pred_prob_last_10": 0.1607139487110544, "mean_pred_prob_last_25": 0.09793963500123937, "mean_pred_prob_last_50": 0.06255575386167038, "mean_token_accuracy": 0.8778231859207153, "step": 34770 }, { "epoch": 0.6182781362771763, "grad_norm": 1.4634696201189086, "learning_rate": 0.0001, "loss": 0.6255, "mean_abs_error": 190.7506892726637, "mean_abs_error_last_10": 44.33816838291292, "mean_abs_error_last_25": 81.00410810398353, "mean_abs_error_last_50": 138.86506079304056, "mean_pred_prob": 0.054887707158923146, "mean_pred_prob_last_10": 0.27114466167986395, "mean_pred_prob_last_25": 0.15446554571390153, "mean_pred_prob_last_50": 0.09322653133422136, "mean_token_accuracy": 0.8727887630462646, "step": 34780 }, { "epoch": 0.6184559045739783, "grad_norm": 0.9653167774853012, "learning_rate": 0.0001, "loss": 0.6844, "mean_abs_error": 143.8511795315693, "mean_abs_error_last_10": 40.058391440227304, "mean_abs_error_last_25": 47.88047602830294, "mean_abs_error_last_50": 89.44796028788667, "mean_pred_prob": 0.052638672105967996, "mean_pred_prob_last_10": 0.24238718189299108, "mean_pred_prob_last_25": 0.13987971600145102, "mean_pred_prob_last_50": 0.0867636950686574, "mean_token_accuracy": 0.8735233545303345, "step": 34790 }, { "epoch": 0.6186336728707802, "grad_norm": 2.0003582614751196, "learning_rate": 0.0001, "loss": 0.7785, "mean_abs_error": 658.990756694304, "mean_abs_error_last_10": 265.8890443472152, "mean_abs_error_last_25": 376.4226061962925, "mean_abs_error_last_50": 489.5385088149348, "mean_pred_prob": 0.040935781283769755, "mean_pred_prob_last_10": 0.18439084269921296, "mean_pred_prob_last_25": 0.10708871991373599, "mean_pred_prob_last_50": 0.06753606405109167, "mean_token_accuracy": 0.8748961508274078, "step": 34800 }, { "epoch": 0.6188114411675821, "grad_norm": 2.0654277973425796, "learning_rate": 0.0001, "loss": 0.7553, "mean_abs_error": 219.8070905040109, "mean_abs_error_last_10": 84.09182284772035, "mean_abs_error_last_25": 127.20065837079221, "mean_abs_error_last_50": 158.5363530307391, "mean_pred_prob": 0.05358064325992018, "mean_pred_prob_last_10": 0.26448827236890793, "mean_pred_prob_last_25": 0.1439464229159057, "mean_pred_prob_last_50": 0.08876725332811475, "mean_token_accuracy": 0.8797242641448975, "step": 34810 }, { "epoch": 0.6189892094643841, "grad_norm": 1.0732006440803232, "learning_rate": 0.0001, "loss": 0.7154, "mean_abs_error": 449.1456612441032, "mean_abs_error_last_10": 83.19139014324087, "mean_abs_error_last_25": 165.26194984702943, "mean_abs_error_last_50": 267.3057904518084, "mean_pred_prob": 0.0644954756833613, "mean_pred_prob_last_10": 0.295402030297555, "mean_pred_prob_last_25": 0.16585885958047583, "mean_pred_prob_last_50": 0.10649975510314107, "mean_token_accuracy": 0.8702219069004059, "step": 34820 }, { "epoch": 0.6191669777611861, "grad_norm": 1.0628193768749694, "learning_rate": 0.0001, "loss": 0.7348, "mean_abs_error": 280.93844958895335, "mean_abs_error_last_10": 66.72126239660189, "mean_abs_error_last_25": 126.6250476792759, "mean_abs_error_last_50": 211.44612849246906, "mean_pred_prob": 0.04811434848234057, "mean_pred_prob_last_10": 0.22314430195838214, "mean_pred_prob_last_25": 0.1303895021788776, "mean_pred_prob_last_50": 0.08070081612095237, "mean_token_accuracy": 0.8849606990814209, "step": 34830 }, { "epoch": 0.6193447460579881, "grad_norm": 1.4273169827644592, "learning_rate": 0.0001, "loss": 0.6683, "mean_abs_error": 365.1465781791414, "mean_abs_error_last_10": 123.59193771053529, "mean_abs_error_last_25": 186.470157173205, "mean_abs_error_last_50": 244.24544802093624, "mean_pred_prob": 0.03725705994293094, "mean_pred_prob_last_10": 0.17747523933649062, "mean_pred_prob_last_25": 0.09651576709002256, "mean_pred_prob_last_50": 0.06030590715818107, "mean_token_accuracy": 0.8726742088794708, "step": 34840 }, { "epoch": 0.61952251435479, "grad_norm": 2.4826578031118367, "learning_rate": 0.0001, "loss": 0.6552, "mean_abs_error": 372.84527197492287, "mean_abs_error_last_10": 73.12930116890973, "mean_abs_error_last_25": 155.3311495770247, "mean_abs_error_last_50": 236.5716780791218, "mean_pred_prob": 0.03502917396835983, "mean_pred_prob_last_10": 0.1852991396561265, "mean_pred_prob_last_25": 0.09337902525439859, "mean_pred_prob_last_50": 0.05745916571468115, "mean_token_accuracy": 0.8825334846973419, "step": 34850 }, { "epoch": 0.6197002826515919, "grad_norm": 0.9435412107686929, "learning_rate": 0.0001, "loss": 0.7346, "mean_abs_error": 1229.5810706100392, "mean_abs_error_last_10": 669.9522790881648, "mean_abs_error_last_25": 756.1627604835769, "mean_abs_error_last_50": 932.7652960082872, "mean_pred_prob": 0.041439339682256104, "mean_pred_prob_last_10": 0.20259432508901226, "mean_pred_prob_last_25": 0.11529937126324512, "mean_pred_prob_last_50": 0.06977896406751824, "mean_token_accuracy": 0.8704476952552795, "step": 34860 }, { "epoch": 0.6198780509483939, "grad_norm": 2.3271028934491618, "learning_rate": 0.0001, "loss": 0.6256, "mean_abs_error": 252.76180398745836, "mean_abs_error_last_10": 68.65830746240238, "mean_abs_error_last_25": 140.87868756620912, "mean_abs_error_last_50": 194.61439476020306, "mean_pred_prob": 0.050972831528633834, "mean_pred_prob_last_10": 0.2475263575091958, "mean_pred_prob_last_25": 0.14049952197819948, "mean_pred_prob_last_50": 0.08718489073216915, "mean_token_accuracy": 0.8774948000907898, "step": 34870 }, { "epoch": 0.6200558192451958, "grad_norm": 3.388968924663452, "learning_rate": 0.0001, "loss": 0.6727, "mean_abs_error": 101.86527246976422, "mean_abs_error_last_10": 25.717924039169134, "mean_abs_error_last_25": 38.15819659855079, "mean_abs_error_last_50": 62.969173810823406, "mean_pred_prob": 0.05653739729896188, "mean_pred_prob_last_10": 0.29117904864251615, "mean_pred_prob_last_25": 0.1586271170526743, "mean_pred_prob_last_50": 0.0960498415865004, "mean_token_accuracy": 0.8866516649723053, "step": 34880 }, { "epoch": 0.6202335875419978, "grad_norm": 2.409971690612714, "learning_rate": 0.0001, "loss": 0.5857, "mean_abs_error": 447.1809391340074, "mean_abs_error_last_10": 157.08643874998566, "mean_abs_error_last_25": 183.75806677038062, "mean_abs_error_last_50": 244.58057196854492, "mean_pred_prob": 0.04724681854713708, "mean_pred_prob_last_10": 0.21683221510611475, "mean_pred_prob_last_25": 0.12571127305272967, "mean_pred_prob_last_50": 0.07882331310538575, "mean_token_accuracy": 0.8728897392749786, "step": 34890 }, { "epoch": 0.6204113558387997, "grad_norm": 1.752994668117335, "learning_rate": 0.0001, "loss": 0.7139, "mean_abs_error": 674.0322870026839, "mean_abs_error_last_10": 306.41069143963057, "mean_abs_error_last_25": 403.81174127850846, "mean_abs_error_last_50": 456.7607108692567, "mean_pred_prob": 0.03488612330984324, "mean_pred_prob_last_10": 0.16394571587443352, "mean_pred_prob_last_25": 0.0926364510320127, "mean_pred_prob_last_50": 0.05826190132647753, "mean_token_accuracy": 0.8690557539463043, "step": 34900 }, { "epoch": 0.6205891241356016, "grad_norm": 1.4277425872787388, "learning_rate": 0.0001, "loss": 0.7156, "mean_abs_error": 363.8944781396134, "mean_abs_error_last_10": 60.809892577182474, "mean_abs_error_last_25": 128.09748462250084, "mean_abs_error_last_50": 212.96036055838022, "mean_pred_prob": 0.03063757224008441, "mean_pred_prob_last_10": 0.15987358056008816, "mean_pred_prob_last_25": 0.08528466001152993, "mean_pred_prob_last_50": 0.05170115092769265, "mean_token_accuracy": 0.863544100522995, "step": 34910 }, { "epoch": 0.6207668924324036, "grad_norm": 2.3558104996311573, "learning_rate": 0.0001, "loss": 0.6586, "mean_abs_error": 554.2783143635281, "mean_abs_error_last_10": 259.19831668402037, "mean_abs_error_last_25": 312.94483133400547, "mean_abs_error_last_50": 392.3911869992148, "mean_pred_prob": 0.02864414167124778, "mean_pred_prob_last_10": 0.14515298027545215, "mean_pred_prob_last_25": 0.07824200252071023, "mean_pred_prob_last_50": 0.047761583258397876, "mean_token_accuracy": 0.8765626847743988, "step": 34920 }, { "epoch": 0.6209446607292055, "grad_norm": 1.2994328169401645, "learning_rate": 0.0001, "loss": 0.7076, "mean_abs_error": 383.508648126116, "mean_abs_error_last_10": 112.43325282560629, "mean_abs_error_last_25": 158.33367345177757, "mean_abs_error_last_50": 183.04908920826307, "mean_pred_prob": 0.04645457556471229, "mean_pred_prob_last_10": 0.2074737712740898, "mean_pred_prob_last_25": 0.11702220290899276, "mean_pred_prob_last_50": 0.07595073892734945, "mean_token_accuracy": 0.877319198846817, "step": 34930 }, { "epoch": 0.6211224290260074, "grad_norm": 0.9659936138963747, "learning_rate": 0.0001, "loss": 0.6979, "mean_abs_error": 658.3665625375622, "mean_abs_error_last_10": 279.9864876664202, "mean_abs_error_last_25": 379.41773624404, "mean_abs_error_last_50": 432.11981293870514, "mean_pred_prob": 0.03270396185398568, "mean_pred_prob_last_10": 0.17714447044418194, "mean_pred_prob_last_25": 0.09694691970944405, "mean_pred_prob_last_50": 0.05634217143524438, "mean_token_accuracy": 0.8757529020309448, "step": 34940 }, { "epoch": 0.6213001973228095, "grad_norm": 0.9585108977790107, "learning_rate": 0.0001, "loss": 0.8067, "mean_abs_error": 430.0114566081378, "mean_abs_error_last_10": 82.0886806754528, "mean_abs_error_last_25": 110.65139749253407, "mean_abs_error_last_50": 262.91831176431526, "mean_pred_prob": 0.038775414309930055, "mean_pred_prob_last_10": 0.18997397157363594, "mean_pred_prob_last_25": 0.10933391790604219, "mean_pred_prob_last_50": 0.06530340940225869, "mean_token_accuracy": 0.8648510992527008, "step": 34950 }, { "epoch": 0.6214779656196114, "grad_norm": 1.9570287258929508, "learning_rate": 0.0001, "loss": 0.6761, "mean_abs_error": 673.5791735591063, "mean_abs_error_last_10": 417.0954146132284, "mean_abs_error_last_25": 453.5234489341844, "mean_abs_error_last_50": 463.30334704332, "mean_pred_prob": 0.056676798482658344, "mean_pred_prob_last_10": 0.24685653123306112, "mean_pred_prob_last_25": 0.15066662358294708, "mean_pred_prob_last_50": 0.09450197966070846, "mean_token_accuracy": 0.8856699466705322, "step": 34960 }, { "epoch": 0.6216557339164134, "grad_norm": 1.7920755483967008, "learning_rate": 0.0001, "loss": 0.7538, "mean_abs_error": 187.44910020867923, "mean_abs_error_last_10": 70.34092755109023, "mean_abs_error_last_25": 93.67033586235868, "mean_abs_error_last_50": 137.34865922237464, "mean_pred_prob": 0.05600144653581083, "mean_pred_prob_last_10": 0.241749307513237, "mean_pred_prob_last_25": 0.14448777548968791, "mean_pred_prob_last_50": 0.09259879384189844, "mean_token_accuracy": 0.8774011790752411, "step": 34970 }, { "epoch": 0.6218335022132153, "grad_norm": 1.4313576280840625, "learning_rate": 0.0001, "loss": 0.7194, "mean_abs_error": 119.47250218594904, "mean_abs_error_last_10": 51.071613675157, "mean_abs_error_last_25": 73.65922677477944, "mean_abs_error_last_50": 86.8848108269367, "mean_pred_prob": 0.05516932969912887, "mean_pred_prob_last_10": 0.2618307065218687, "mean_pred_prob_last_25": 0.15051317773759365, "mean_pred_prob_last_50": 0.09321336103603244, "mean_token_accuracy": 0.8765224397182465, "step": 34980 }, { "epoch": 0.6220112705100173, "grad_norm": 4.212265696355217, "learning_rate": 0.0001, "loss": 0.7698, "mean_abs_error": 514.0685488353582, "mean_abs_error_last_10": 133.40811678156052, "mean_abs_error_last_25": 169.40378482668024, "mean_abs_error_last_50": 275.00157167122137, "mean_pred_prob": 0.041691182891372594, "mean_pred_prob_last_10": 0.20673234954010694, "mean_pred_prob_last_25": 0.11762272897176444, "mean_pred_prob_last_50": 0.07139622222166508, "mean_token_accuracy": 0.87963707447052, "step": 34990 }, { "epoch": 0.6221890388068192, "grad_norm": 2.562420882182876, "learning_rate": 0.0001, "loss": 0.8647, "mean_abs_error": 580.9260912251773, "mean_abs_error_last_10": 217.1246008113636, "mean_abs_error_last_25": 291.83944450836736, "mean_abs_error_last_50": 379.6808164040768, "mean_pred_prob": 0.03528356303577311, "mean_pred_prob_last_10": 0.18200776505400426, "mean_pred_prob_last_25": 0.09896393798408099, "mean_pred_prob_last_50": 0.05972505956306122, "mean_token_accuracy": 0.8714328050613404, "step": 35000 }, { "epoch": 0.6223668071036211, "grad_norm": 1.6474445430365556, "learning_rate": 0.0001, "loss": 0.7305, "mean_abs_error": 108.2644521536128, "mean_abs_error_last_10": 27.246021865095408, "mean_abs_error_last_25": 46.25672689175418, "mean_abs_error_last_50": 68.55377145442613, "mean_pred_prob": 0.05796858137473464, "mean_pred_prob_last_10": 0.2857239853590727, "mean_pred_prob_last_25": 0.1574561096727848, "mean_pred_prob_last_50": 0.09637192841619253, "mean_token_accuracy": 0.8783777415752411, "step": 35010 }, { "epoch": 0.6225445754004231, "grad_norm": 2.6265704428774455, "learning_rate": 0.0001, "loss": 0.6865, "mean_abs_error": 289.98476147315876, "mean_abs_error_last_10": 70.01784799872881, "mean_abs_error_last_25": 125.18616463696662, "mean_abs_error_last_50": 179.42461384133782, "mean_pred_prob": 0.03915483024902642, "mean_pred_prob_last_10": 0.17938017845153809, "mean_pred_prob_last_25": 0.10228776503354312, "mean_pred_prob_last_50": 0.06400128928944468, "mean_token_accuracy": 0.8756141424179077, "step": 35020 }, { "epoch": 0.622722343697225, "grad_norm": 2.181688658700971, "learning_rate": 0.0001, "loss": 0.6438, "mean_abs_error": 134.40210827749303, "mean_abs_error_last_10": 46.067978411795664, "mean_abs_error_last_25": 92.84941815131745, "mean_abs_error_last_50": 106.54445427180755, "mean_pred_prob": 0.07778941374272108, "mean_pred_prob_last_10": 0.3440893854945898, "mean_pred_prob_last_25": 0.19574793800711632, "mean_pred_prob_last_50": 0.12415014049038291, "mean_token_accuracy": 0.8750657081604004, "step": 35030 }, { "epoch": 0.622900111994027, "grad_norm": 1.2232243818679998, "learning_rate": 0.0001, "loss": 0.8181, "mean_abs_error": 240.2512000697719, "mean_abs_error_last_10": 33.14158921670342, "mean_abs_error_last_25": 64.73185258473282, "mean_abs_error_last_50": 134.89949386851396, "mean_pred_prob": 0.044933993835002184, "mean_pred_prob_last_10": 0.21722662225365638, "mean_pred_prob_last_25": 0.1217420931905508, "mean_pred_prob_last_50": 0.07514559179544449, "mean_token_accuracy": 0.8732079803943634, "step": 35040 }, { "epoch": 0.6230778802908289, "grad_norm": 3.101311543538924, "learning_rate": 0.0001, "loss": 0.8442, "mean_abs_error": 274.2837901260244, "mean_abs_error_last_10": 111.565876308351, "mean_abs_error_last_25": 145.60629797374753, "mean_abs_error_last_50": 177.76716662177805, "mean_pred_prob": 0.029482101462781428, "mean_pred_prob_last_10": 0.1454357013106346, "mean_pred_prob_last_25": 0.0790008382871747, "mean_pred_prob_last_50": 0.04907449688762426, "mean_token_accuracy": 0.8751477897167206, "step": 35050 }, { "epoch": 0.6232556485876308, "grad_norm": 1.3247281065343015, "learning_rate": 0.0001, "loss": 0.7181, "mean_abs_error": 1170.2049356486489, "mean_abs_error_last_10": 661.2681466742733, "mean_abs_error_last_25": 702.9731894900758, "mean_abs_error_last_50": 899.9926250537471, "mean_pred_prob": 0.02474774566362612, "mean_pred_prob_last_10": 0.12631275781895965, "mean_pred_prob_last_25": 0.06950687665957958, "mean_pred_prob_last_50": 0.0426698702853173, "mean_token_accuracy": 0.8734112441539764, "step": 35060 }, { "epoch": 0.6234334168844329, "grad_norm": 1.3357720730909768, "learning_rate": 0.0001, "loss": 0.7536, "mean_abs_error": 330.6636564261412, "mean_abs_error_last_10": 49.643563470199034, "mean_abs_error_last_25": 114.0369679585389, "mean_abs_error_last_50": 202.03315425232253, "mean_pred_prob": 0.027344047930091618, "mean_pred_prob_last_10": 0.15496946610510348, "mean_pred_prob_last_25": 0.07673851009458303, "mean_pred_prob_last_50": 0.04534211028367281, "mean_token_accuracy": 0.8808327376842499, "step": 35070 }, { "epoch": 0.6236111851812348, "grad_norm": 1.0126681991635709, "learning_rate": 0.0001, "loss": 0.6418, "mean_abs_error": 1011.2861259700736, "mean_abs_error_last_10": 442.8421660444975, "mean_abs_error_last_25": 562.6628071396974, "mean_abs_error_last_50": 773.2165884259376, "mean_pred_prob": 0.03508373187796678, "mean_pred_prob_last_10": 0.16120407058042474, "mean_pred_prob_last_25": 0.09262210073065943, "mean_pred_prob_last_50": 0.05742873366980348, "mean_token_accuracy": 0.8792880117893219, "step": 35080 }, { "epoch": 0.6237889534780368, "grad_norm": 2.9246600873763566, "learning_rate": 0.0001, "loss": 0.8041, "mean_abs_error": 1435.4822185289506, "mean_abs_error_last_10": 742.4003360095076, "mean_abs_error_last_25": 829.8498824579186, "mean_abs_error_last_50": 1015.7628651455589, "mean_pred_prob": 0.02880616544862278, "mean_pred_prob_last_10": 0.1305989059532294, "mean_pred_prob_last_25": 0.07604759603273123, "mean_pred_prob_last_50": 0.047714364211424255, "mean_token_accuracy": 0.8698360443115234, "step": 35090 }, { "epoch": 0.6239667217748387, "grad_norm": 1.860512661851562, "learning_rate": 0.0001, "loss": 0.6885, "mean_abs_error": 555.3920052588874, "mean_abs_error_last_10": 311.5140647693946, "mean_abs_error_last_25": 418.9356954694141, "mean_abs_error_last_50": 420.6829627186089, "mean_pred_prob": 0.04230377514613792, "mean_pred_prob_last_10": 0.2142898138379678, "mean_pred_prob_last_25": 0.11748883309774101, "mean_pred_prob_last_50": 0.07175623506773263, "mean_token_accuracy": 0.8780820906162262, "step": 35100 }, { "epoch": 0.6241444900716406, "grad_norm": 6.996110265397078, "learning_rate": 0.0001, "loss": 0.8747, "mean_abs_error": 448.11873485123215, "mean_abs_error_last_10": 66.75182892663594, "mean_abs_error_last_25": 155.6361477269412, "mean_abs_error_last_50": 249.9620157863902, "mean_pred_prob": 0.037403890513814986, "mean_pred_prob_last_10": 0.18407924398779868, "mean_pred_prob_last_25": 0.10194925246760249, "mean_pred_prob_last_50": 0.062160029029473665, "mean_token_accuracy": 0.8674871563911438, "step": 35110 }, { "epoch": 0.6243222583684426, "grad_norm": 1.7009101166737177, "learning_rate": 0.0001, "loss": 0.8784, "mean_abs_error": 209.56649010336113, "mean_abs_error_last_10": 47.56754674574318, "mean_abs_error_last_25": 75.67682633284139, "mean_abs_error_last_50": 126.61490514828003, "mean_pred_prob": 0.0521429393440485, "mean_pred_prob_last_10": 0.25265921726822854, "mean_pred_prob_last_25": 0.14234177097678186, "mean_pred_prob_last_50": 0.08657023767009378, "mean_token_accuracy": 0.8706137597560882, "step": 35120 }, { "epoch": 0.6245000266652445, "grad_norm": 1.419231123994503, "learning_rate": 0.0001, "loss": 0.7654, "mean_abs_error": 497.9546331505004, "mean_abs_error_last_10": 276.85344227059124, "mean_abs_error_last_25": 246.94691576123802, "mean_abs_error_last_50": 318.6983919439, "mean_pred_prob": 0.037634080927819016, "mean_pred_prob_last_10": 0.19021443724632264, "mean_pred_prob_last_25": 0.10172057598829269, "mean_pred_prob_last_50": 0.06315237728413195, "mean_token_accuracy": 0.8708618879318237, "step": 35130 }, { "epoch": 0.6246777949620465, "grad_norm": 1.3904185252561094, "learning_rate": 0.0001, "loss": 0.6911, "mean_abs_error": 436.76887148892195, "mean_abs_error_last_10": 154.35129955836572, "mean_abs_error_last_25": 229.41691507446004, "mean_abs_error_last_50": 291.24386838181766, "mean_pred_prob": 0.031786124454811215, "mean_pred_prob_last_10": 0.16203433088958263, "mean_pred_prob_last_25": 0.0868713584728539, "mean_pred_prob_last_50": 0.0536745382938534, "mean_token_accuracy": 0.8805894374847412, "step": 35140 }, { "epoch": 0.6248555632588484, "grad_norm": 1.4641042943151203, "learning_rate": 0.0001, "loss": 0.6052, "mean_abs_error": 268.1039233492338, "mean_abs_error_last_10": 71.22990436985488, "mean_abs_error_last_25": 81.09335004789182, "mean_abs_error_last_50": 127.6827605450042, "mean_pred_prob": 0.05272267574910074, "mean_pred_prob_last_10": 0.2561783967539668, "mean_pred_prob_last_25": 0.1478127148002386, "mean_pred_prob_last_50": 0.09087408678606153, "mean_token_accuracy": 0.881292200088501, "step": 35150 }, { "epoch": 0.6250333315556503, "grad_norm": 2.318966656278661, "learning_rate": 0.0001, "loss": 0.7207, "mean_abs_error": 760.7859038576354, "mean_abs_error_last_10": 351.00040549138896, "mean_abs_error_last_25": 434.79212508410745, "mean_abs_error_last_50": 562.7355807816946, "mean_pred_prob": 0.04473994935979135, "mean_pred_prob_last_10": 0.21490517793572508, "mean_pred_prob_last_25": 0.12123626741813495, "mean_pred_prob_last_50": 0.07517792116268537, "mean_token_accuracy": 0.877259635925293, "step": 35160 }, { "epoch": 0.6252110998524523, "grad_norm": 2.1132255958024144, "learning_rate": 0.0001, "loss": 0.7131, "mean_abs_error": 1051.8823470528364, "mean_abs_error_last_10": 894.5005246908222, "mean_abs_error_last_25": 932.2970029521912, "mean_abs_error_last_50": 952.078014534012, "mean_pred_prob": 0.032976612220227255, "mean_pred_prob_last_10": 0.18181316909176531, "mean_pred_prob_last_25": 0.09434738378622569, "mean_pred_prob_last_50": 0.05615103128948249, "mean_token_accuracy": 0.8720552325248718, "step": 35170 }, { "epoch": 0.6253888681492542, "grad_norm": 2.1631581842287915, "learning_rate": 0.0001, "loss": 0.7511, "mean_abs_error": 678.0020319182984, "mean_abs_error_last_10": 348.6835376083621, "mean_abs_error_last_25": 398.0385364548533, "mean_abs_error_last_50": 480.26363997713986, "mean_pred_prob": 0.028960930585162713, "mean_pred_prob_last_10": 0.13963654289254918, "mean_pred_prob_last_25": 0.07790716849849559, "mean_pred_prob_last_50": 0.04852486426825635, "mean_token_accuracy": 0.8790741682052612, "step": 35180 }, { "epoch": 0.6255666364460563, "grad_norm": 0.8285907935902912, "learning_rate": 0.0001, "loss": 0.7802, "mean_abs_error": 428.6691961847954, "mean_abs_error_last_10": 212.84594975929113, "mean_abs_error_last_25": 217.95791117956787, "mean_abs_error_last_50": 256.31266334877716, "mean_pred_prob": 0.038245322299189866, "mean_pred_prob_last_10": 0.2002314915182069, "mean_pred_prob_last_25": 0.10761855238815769, "mean_pred_prob_last_50": 0.06337728624930605, "mean_token_accuracy": 0.8789785444736481, "step": 35190 }, { "epoch": 0.6257444047428582, "grad_norm": 1.2683444591473683, "learning_rate": 0.0001, "loss": 0.6593, "mean_abs_error": 444.54835290356306, "mean_abs_error_last_10": 95.80292741558347, "mean_abs_error_last_25": 141.8657204588274, "mean_abs_error_last_50": 237.21264503694584, "mean_pred_prob": 0.048839362018043175, "mean_pred_prob_last_10": 0.24828689155401662, "mean_pred_prob_last_25": 0.13659345649648458, "mean_pred_prob_last_50": 0.08260450655361637, "mean_token_accuracy": 0.8785703718662262, "step": 35200 }, { "epoch": 0.6259221730396601, "grad_norm": 1.5994567056906157, "learning_rate": 0.0001, "loss": 0.7317, "mean_abs_error": 688.9019043155527, "mean_abs_error_last_10": 361.622519429675, "mean_abs_error_last_25": 418.8353116171949, "mean_abs_error_last_50": 518.095152037439, "mean_pred_prob": 0.0449786500132177, "mean_pred_prob_last_10": 0.2087997355440166, "mean_pred_prob_last_25": 0.1207446453277953, "mean_pred_prob_last_50": 0.0753002468845807, "mean_token_accuracy": 0.8744945049285888, "step": 35210 }, { "epoch": 0.6260999413364621, "grad_norm": 2.229522918271816, "learning_rate": 0.0001, "loss": 0.6985, "mean_abs_error": 480.02777122290365, "mean_abs_error_last_10": 251.29584254455818, "mean_abs_error_last_25": 245.24809854180302, "mean_abs_error_last_50": 296.80071655594446, "mean_pred_prob": 0.02954444653587416, "mean_pred_prob_last_10": 0.13693390557309612, "mean_pred_prob_last_25": 0.0794395946781151, "mean_pred_prob_last_50": 0.0490171822020784, "mean_token_accuracy": 0.8665911734104157, "step": 35220 }, { "epoch": 0.626277709633264, "grad_norm": 2.423150294767065, "learning_rate": 0.0001, "loss": 0.7014, "mean_abs_error": 807.6496980101907, "mean_abs_error_last_10": 308.11730198779867, "mean_abs_error_last_25": 392.6822425953918, "mean_abs_error_last_50": 578.0448828125456, "mean_pred_prob": 0.037302024455857466, "mean_pred_prob_last_10": 0.17978768191533162, "mean_pred_prob_last_25": 0.09963488065113779, "mean_pred_prob_last_50": 0.06175140143895987, "mean_token_accuracy": 0.886333578824997, "step": 35230 }, { "epoch": 0.626455477930066, "grad_norm": 2.532674776729966, "learning_rate": 0.0001, "loss": 0.8245, "mean_abs_error": 903.8567573283257, "mean_abs_error_last_10": 204.94633426399318, "mean_abs_error_last_25": 340.65332812666594, "mean_abs_error_last_50": 581.3916603979524, "mean_pred_prob": 0.028235112261609175, "mean_pred_prob_last_10": 0.14383497686358168, "mean_pred_prob_last_25": 0.0781599025358446, "mean_pred_prob_last_50": 0.046968465688405556, "mean_token_accuracy": 0.8635453224182129, "step": 35240 }, { "epoch": 0.6266332462268679, "grad_norm": 2.1769093271722753, "learning_rate": 0.0001, "loss": 0.6624, "mean_abs_error": 1249.0565396713348, "mean_abs_error_last_10": 619.4595903557923, "mean_abs_error_last_25": 764.4244468140454, "mean_abs_error_last_50": 924.7461401852051, "mean_pred_prob": 0.034591131468187084, "mean_pred_prob_last_10": 0.17144840573891998, "mean_pred_prob_last_25": 0.09501886507787276, "mean_pred_prob_last_50": 0.05811386006826069, "mean_token_accuracy": 0.8840436935424805, "step": 35250 }, { "epoch": 0.6268110145236698, "grad_norm": 1.4235303618452435, "learning_rate": 0.0001, "loss": 0.7119, "mean_abs_error": 553.0448558402834, "mean_abs_error_last_10": 192.97286903654611, "mean_abs_error_last_25": 232.6207165375531, "mean_abs_error_last_50": 352.56483545197256, "mean_pred_prob": 0.056258243834599854, "mean_pred_prob_last_10": 0.2795446655654814, "mean_pred_prob_last_25": 0.1503892027132679, "mean_pred_prob_last_50": 0.09193543936708011, "mean_token_accuracy": 0.8771408438682556, "step": 35260 }, { "epoch": 0.6269887828204718, "grad_norm": 1.1456620653223892, "learning_rate": 0.0001, "loss": 0.6853, "mean_abs_error": 443.63922503536014, "mean_abs_error_last_10": 285.52697081361987, "mean_abs_error_last_25": 286.64862209807825, "mean_abs_error_last_50": 335.48881646819063, "mean_pred_prob": 0.03423619985114783, "mean_pred_prob_last_10": 0.17977553827222437, "mean_pred_prob_last_25": 0.09847474852576851, "mean_pred_prob_last_50": 0.05859110925812274, "mean_token_accuracy": 0.8687467873096466, "step": 35270 }, { "epoch": 0.6271665511172737, "grad_norm": 1.6762287215739848, "learning_rate": 0.0001, "loss": 0.6299, "mean_abs_error": 267.7782922231964, "mean_abs_error_last_10": 54.934264631581776, "mean_abs_error_last_25": 88.21307999286624, "mean_abs_error_last_50": 155.67249763538592, "mean_pred_prob": 0.04545641879085451, "mean_pred_prob_last_10": 0.21546043120324612, "mean_pred_prob_last_25": 0.12489828681573271, "mean_pred_prob_last_50": 0.07676111552864313, "mean_token_accuracy": 0.8796270191669464, "step": 35280 }, { "epoch": 0.6273443194140756, "grad_norm": 0.9342509404639591, "learning_rate": 0.0001, "loss": 0.5791, "mean_abs_error": 462.9379001169427, "mean_abs_error_last_10": 149.38254151272992, "mean_abs_error_last_25": 200.30045360105095, "mean_abs_error_last_50": 322.85252937118696, "mean_pred_prob": 0.04454169779201038, "mean_pred_prob_last_10": 0.21977316988632084, "mean_pred_prob_last_25": 0.12254204694763757, "mean_pred_prob_last_50": 0.07396026126807556, "mean_token_accuracy": 0.8913484156131745, "step": 35290 }, { "epoch": 0.6275220877108777, "grad_norm": 1.7784609509291853, "learning_rate": 0.0001, "loss": 0.6901, "mean_abs_error": 318.9013247577034, "mean_abs_error_last_10": 73.39874154581136, "mean_abs_error_last_25": 84.26112480133142, "mean_abs_error_last_50": 158.26239465328183, "mean_pred_prob": 0.0507784754037857, "mean_pred_prob_last_10": 0.23450095057487488, "mean_pred_prob_last_25": 0.139462517388165, "mean_pred_prob_last_50": 0.08452052418142557, "mean_token_accuracy": 0.8653914153575897, "step": 35300 }, { "epoch": 0.6276998560076796, "grad_norm": 0.8931389090138037, "learning_rate": 0.0001, "loss": 0.7508, "mean_abs_error": 261.0500046726023, "mean_abs_error_last_10": 142.3269960786128, "mean_abs_error_last_25": 166.49322261883532, "mean_abs_error_last_50": 164.70874517883192, "mean_pred_prob": 0.04738911124877632, "mean_pred_prob_last_10": 0.22697427414823323, "mean_pred_prob_last_25": 0.1280260150320828, "mean_pred_prob_last_50": 0.07983865633141249, "mean_token_accuracy": 0.8739908516407013, "step": 35310 }, { "epoch": 0.6278776243044816, "grad_norm": 1.5473145647760937, "learning_rate": 0.0001, "loss": 0.8291, "mean_abs_error": 133.6600351353191, "mean_abs_error_last_10": 25.802295293068465, "mean_abs_error_last_25": 51.93084627931229, "mean_abs_error_last_50": 84.36251250739463, "mean_pred_prob": 0.0634537072852254, "mean_pred_prob_last_10": 0.3234868351370096, "mean_pred_prob_last_25": 0.17706977054476739, "mean_pred_prob_last_50": 0.10735833272337914, "mean_token_accuracy": 0.867521071434021, "step": 35320 }, { "epoch": 0.6280553926012835, "grad_norm": 1.469388501071229, "learning_rate": 0.0001, "loss": 0.6843, "mean_abs_error": 308.2827109631957, "mean_abs_error_last_10": 56.7654139268127, "mean_abs_error_last_25": 94.9035156352609, "mean_abs_error_last_50": 154.44525909144292, "mean_pred_prob": 0.03232779926620424, "mean_pred_prob_last_10": 0.15980772972106932, "mean_pred_prob_last_25": 0.08603092581033707, "mean_pred_prob_last_50": 0.053624301124364136, "mean_token_accuracy": 0.878233152627945, "step": 35330 }, { "epoch": 0.6282331608980855, "grad_norm": 1.2876931464009334, "learning_rate": 0.0001, "loss": 0.8137, "mean_abs_error": 1545.0235516200398, "mean_abs_error_last_10": 970.7510727791687, "mean_abs_error_last_25": 1124.317849781984, "mean_abs_error_last_50": 1206.7569214281546, "mean_pred_prob": 0.019591336738085375, "mean_pred_prob_last_10": 0.10152330484706909, "mean_pred_prob_last_25": 0.05429394168604631, "mean_pred_prob_last_50": 0.03288580857042689, "mean_token_accuracy": 0.8797202348709107, "step": 35340 }, { "epoch": 0.6284109291948874, "grad_norm": 1.3041919557825181, "learning_rate": 0.0001, "loss": 0.8422, "mean_abs_error": 738.422963143464, "mean_abs_error_last_10": 490.25132024125514, "mean_abs_error_last_25": 522.2510465878516, "mean_abs_error_last_50": 617.1642568403806, "mean_pred_prob": 0.026103481801692398, "mean_pred_prob_last_10": 0.1315392951713875, "mean_pred_prob_last_25": 0.07204873767332173, "mean_pred_prob_last_50": 0.043741044611670075, "mean_token_accuracy": 0.8652873396873474, "step": 35350 }, { "epoch": 0.6285886974916893, "grad_norm": 1.5607609964952316, "learning_rate": 0.0001, "loss": 0.7948, "mean_abs_error": 296.4438304158867, "mean_abs_error_last_10": 63.891464581095214, "mean_abs_error_last_25": 95.67010875275365, "mean_abs_error_last_50": 158.25116133385808, "mean_pred_prob": 0.04816137494053692, "mean_pred_prob_last_10": 0.24681142559275032, "mean_pred_prob_last_25": 0.13776055960915984, "mean_pred_prob_last_50": 0.08249737587757408, "mean_token_accuracy": 0.8660265445709229, "step": 35360 }, { "epoch": 0.6287664657884913, "grad_norm": 2.3431778834992167, "learning_rate": 0.0001, "loss": 0.8225, "mean_abs_error": 234.18285722310657, "mean_abs_error_last_10": 75.39296458462822, "mean_abs_error_last_25": 121.79874590634022, "mean_abs_error_last_50": 159.92789395807193, "mean_pred_prob": 0.04712942140176892, "mean_pred_prob_last_10": 0.23353857714682819, "mean_pred_prob_last_25": 0.12979373056441545, "mean_pred_prob_last_50": 0.07922725677490235, "mean_token_accuracy": 0.8750640153884888, "step": 35370 }, { "epoch": 0.6289442340852932, "grad_norm": 2.147323742252823, "learning_rate": 0.0001, "loss": 0.6455, "mean_abs_error": 275.6198710171351, "mean_abs_error_last_10": 196.61126482711924, "mean_abs_error_last_25": 283.41482759273146, "mean_abs_error_last_50": 301.0150368713934, "mean_pred_prob": 0.047459805640392004, "mean_pred_prob_last_10": 0.2387202676385641, "mean_pred_prob_last_25": 0.12887653233483434, "mean_pred_prob_last_50": 0.07890666229650378, "mean_token_accuracy": 0.8789414763450623, "step": 35380 }, { "epoch": 0.6291220023820951, "grad_norm": 2.438989795931474, "learning_rate": 0.0001, "loss": 0.7481, "mean_abs_error": 464.8847181380035, "mean_abs_error_last_10": 198.97630442542788, "mean_abs_error_last_25": 327.54772859502793, "mean_abs_error_last_50": 400.80984518330325, "mean_pred_prob": 0.029790657409466802, "mean_pred_prob_last_10": 0.14999267999082805, "mean_pred_prob_last_25": 0.08026734925806522, "mean_pred_prob_last_50": 0.04920227602124214, "mean_token_accuracy": 0.8840282976627349, "step": 35390 }, { "epoch": 0.6292997706788971, "grad_norm": 1.494987395536246, "learning_rate": 0.0001, "loss": 0.7621, "mean_abs_error": 118.82538431645874, "mean_abs_error_last_10": 34.1349757604516, "mean_abs_error_last_25": 57.13227230881371, "mean_abs_error_last_50": 83.85909867202642, "mean_pred_prob": 0.05162742296233773, "mean_pred_prob_last_10": 0.2549267515540123, "mean_pred_prob_last_25": 0.1393441203981638, "mean_pred_prob_last_50": 0.08565053064376116, "mean_token_accuracy": 0.8734072804450989, "step": 35400 }, { "epoch": 0.629477538975699, "grad_norm": 1.251772901277918, "learning_rate": 0.0001, "loss": 0.6394, "mean_abs_error": 139.94745101401145, "mean_abs_error_last_10": 42.94698960002363, "mean_abs_error_last_25": 64.65711151196629, "mean_abs_error_last_50": 78.77398923586402, "mean_pred_prob": 0.04867181433364749, "mean_pred_prob_last_10": 0.24284992441534997, "mean_pred_prob_last_25": 0.13367325235158206, "mean_pred_prob_last_50": 0.08145977519452571, "mean_token_accuracy": 0.8872679173946381, "step": 35410 }, { "epoch": 0.6296553072725011, "grad_norm": 1.7691240652856743, "learning_rate": 0.0001, "loss": 0.7636, "mean_abs_error": 1125.1134159984524, "mean_abs_error_last_10": 461.75576990173596, "mean_abs_error_last_25": 581.9019455507918, "mean_abs_error_last_50": 772.2647342191757, "mean_pred_prob": 0.03155301143997349, "mean_pred_prob_last_10": 0.16293812891235576, "mean_pred_prob_last_25": 0.0858885878129513, "mean_pred_prob_last_50": 0.05200450781849213, "mean_token_accuracy": 0.8691894352436066, "step": 35420 }, { "epoch": 0.629833075569303, "grad_norm": 1.4336032569921078, "learning_rate": 0.0001, "loss": 0.7142, "mean_abs_error": 657.0614022786942, "mean_abs_error_last_10": 311.3370766418773, "mean_abs_error_last_25": 369.55448053687434, "mean_abs_error_last_50": 467.3977273844331, "mean_pred_prob": 0.030794682312989607, "mean_pred_prob_last_10": 0.15546733841183596, "mean_pred_prob_last_25": 0.08661150260595604, "mean_pred_prob_last_50": 0.052003507985500616, "mean_token_accuracy": 0.8766946613788604, "step": 35430 }, { "epoch": 0.630010843866105, "grad_norm": 1.3405459357679195, "learning_rate": 0.0001, "loss": 0.7197, "mean_abs_error": 411.5987768952997, "mean_abs_error_last_10": 129.41195175395816, "mean_abs_error_last_25": 205.43035278498505, "mean_abs_error_last_50": 300.85079358840983, "mean_pred_prob": 0.028835568763315677, "mean_pred_prob_last_10": 0.1478427205234766, "mean_pred_prob_last_25": 0.08100474998354912, "mean_pred_prob_last_50": 0.04838283085264265, "mean_token_accuracy": 0.8728464484214783, "step": 35440 }, { "epoch": 0.6301886121629069, "grad_norm": 0.8159333093196853, "learning_rate": 0.0001, "loss": 0.7123, "mean_abs_error": 1085.2506881241973, "mean_abs_error_last_10": 685.1137713842011, "mean_abs_error_last_25": 769.6486929213996, "mean_abs_error_last_50": 898.304724076477, "mean_pred_prob": 0.03466363614061265, "mean_pred_prob_last_10": 0.1777966135152383, "mean_pred_prob_last_25": 0.09540348724549404, "mean_pred_prob_last_50": 0.05880611045577098, "mean_token_accuracy": 0.8729931771755218, "step": 35450 }, { "epoch": 0.6303663804597088, "grad_norm": 1.8253449677626936, "learning_rate": 0.0001, "loss": 0.7133, "mean_abs_error": 85.04366281492737, "mean_abs_error_last_10": 14.728066509797761, "mean_abs_error_last_25": 21.343553766320607, "mean_abs_error_last_50": 38.40278461161928, "mean_pred_prob": 0.07336326213553548, "mean_pred_prob_last_10": 0.34505944401025773, "mean_pred_prob_last_25": 0.19942551255226135, "mean_pred_prob_last_50": 0.12197081129997969, "mean_token_accuracy": 0.8756689906120301, "step": 35460 }, { "epoch": 0.6305441487565108, "grad_norm": 1.8605963006415744, "learning_rate": 0.0001, "loss": 0.8498, "mean_abs_error": 302.8217056175639, "mean_abs_error_last_10": 66.39831138224038, "mean_abs_error_last_25": 91.29574782666336, "mean_abs_error_last_50": 156.04538801437425, "mean_pred_prob": 0.03798850113525987, "mean_pred_prob_last_10": 0.1925077322870493, "mean_pred_prob_last_25": 0.10421852972358465, "mean_pred_prob_last_50": 0.06370740784332156, "mean_token_accuracy": 0.8713146328926087, "step": 35470 }, { "epoch": 0.6307219170533127, "grad_norm": 1.7414385372530632, "learning_rate": 0.0001, "loss": 0.6815, "mean_abs_error": 462.28564723145007, "mean_abs_error_last_10": 185.68228801644636, "mean_abs_error_last_25": 209.58674138009684, "mean_abs_error_last_50": 262.3947859828796, "mean_pred_prob": 0.04423317386535928, "mean_pred_prob_last_10": 0.2134747035219334, "mean_pred_prob_last_25": 0.12142273955978453, "mean_pred_prob_last_50": 0.07432915010722355, "mean_token_accuracy": 0.8810678243637085, "step": 35480 }, { "epoch": 0.6308996853501146, "grad_norm": 1.1483666222312754, "learning_rate": 0.0001, "loss": 0.7743, "mean_abs_error": 1003.064611488141, "mean_abs_error_last_10": 547.2705393933554, "mean_abs_error_last_25": 562.7778378270881, "mean_abs_error_last_50": 630.1155932634229, "mean_pred_prob": 0.040110840412671676, "mean_pred_prob_last_10": 0.19822955801500938, "mean_pred_prob_last_25": 0.11238889333908446, "mean_pred_prob_last_50": 0.06823753590870182, "mean_token_accuracy": 0.8672053158283234, "step": 35490 }, { "epoch": 0.6310774536469166, "grad_norm": 2.2583868141969208, "learning_rate": 0.0001, "loss": 0.6315, "mean_abs_error": 266.7022381512992, "mean_abs_error_last_10": 94.87657559902468, "mean_abs_error_last_25": 101.06003720396839, "mean_abs_error_last_50": 158.17075183279997, "mean_pred_prob": 0.05284149954095483, "mean_pred_prob_last_10": 0.2585458135232329, "mean_pred_prob_last_25": 0.14100732044316827, "mean_pred_prob_last_50": 0.08740100492723286, "mean_token_accuracy": 0.8806386828422547, "step": 35500 }, { "epoch": 0.6312552219437185, "grad_norm": 1.1667577713393609, "learning_rate": 0.0001, "loss": 0.5871, "mean_abs_error": 379.5841113592768, "mean_abs_error_last_10": 242.4800104459386, "mean_abs_error_last_25": 247.44350239802446, "mean_abs_error_last_50": 285.35165764172973, "mean_pred_prob": 0.05507885788683779, "mean_pred_prob_last_10": 0.27053796224063265, "mean_pred_prob_last_25": 0.14577898393617944, "mean_pred_prob_last_50": 0.08968839450972155, "mean_token_accuracy": 0.8698491811752319, "step": 35510 }, { "epoch": 0.6314329902405205, "grad_norm": 1.0320500814401674, "learning_rate": 0.0001, "loss": 0.7165, "mean_abs_error": 486.4047026107547, "mean_abs_error_last_10": 100.00549487221663, "mean_abs_error_last_25": 157.1569608204815, "mean_abs_error_last_50": 343.25763415198674, "mean_pred_prob": 0.040789154404774305, "mean_pred_prob_last_10": 0.189072759822011, "mean_pred_prob_last_25": 0.10992393847554922, "mean_pred_prob_last_50": 0.06721834698691964, "mean_token_accuracy": 0.8668067872524261, "step": 35520 }, { "epoch": 0.6316107585373224, "grad_norm": 1.666030765981949, "learning_rate": 0.0001, "loss": 0.824, "mean_abs_error": 465.973896859652, "mean_abs_error_last_10": 185.82868048508823, "mean_abs_error_last_25": 189.08783939031522, "mean_abs_error_last_50": 243.4317191681593, "mean_pred_prob": 0.03493725535809063, "mean_pred_prob_last_10": 0.1512602219823748, "mean_pred_prob_last_25": 0.09059106374625117, "mean_pred_prob_last_50": 0.056579424627125265, "mean_token_accuracy": 0.8716790795326232, "step": 35530 }, { "epoch": 0.6317885268341245, "grad_norm": 1.8256605885485064, "learning_rate": 0.0001, "loss": 0.8227, "mean_abs_error": 840.8315941983317, "mean_abs_error_last_10": 336.9315913169049, "mean_abs_error_last_25": 438.14033059338743, "mean_abs_error_last_50": 568.4642386811668, "mean_pred_prob": 0.050004073716991115, "mean_pred_prob_last_10": 0.21416251184418797, "mean_pred_prob_last_25": 0.13106735586479773, "mean_pred_prob_last_50": 0.08323311296699103, "mean_token_accuracy": 0.8648223280906677, "step": 35540 }, { "epoch": 0.6319662951309264, "grad_norm": 1.9399226876770643, "learning_rate": 0.0001, "loss": 0.7944, "mean_abs_error": 179.99342990251165, "mean_abs_error_last_10": 43.45094448039013, "mean_abs_error_last_25": 99.49747765275148, "mean_abs_error_last_50": 112.7383842097111, "mean_pred_prob": 0.05400482192635536, "mean_pred_prob_last_10": 0.2687963888049126, "mean_pred_prob_last_25": 0.14742410499602557, "mean_pred_prob_last_50": 0.09062961461022497, "mean_token_accuracy": 0.8743244469165802, "step": 35550 }, { "epoch": 0.6321440634277283, "grad_norm": 1.2618715930995368, "learning_rate": 0.0001, "loss": 0.7001, "mean_abs_error": 538.6892178504474, "mean_abs_error_last_10": 154.8138369860197, "mean_abs_error_last_25": 267.63346111178316, "mean_abs_error_last_50": 371.74513393129826, "mean_pred_prob": 0.027861054777167736, "mean_pred_prob_last_10": 0.14264725260436534, "mean_pred_prob_last_25": 0.07657411089166999, "mean_pred_prob_last_50": 0.04701110501773655, "mean_token_accuracy": 0.863346791267395, "step": 35560 }, { "epoch": 0.6323218317245303, "grad_norm": 2.1048269588596584, "learning_rate": 0.0001, "loss": 0.7271, "mean_abs_error": 572.1516088390196, "mean_abs_error_last_10": 250.3843849162675, "mean_abs_error_last_25": 280.280075331215, "mean_abs_error_last_50": 346.33953956569883, "mean_pred_prob": 0.040742631838656965, "mean_pred_prob_last_10": 0.201371493563056, "mean_pred_prob_last_25": 0.11298920032568276, "mean_pred_prob_last_50": 0.06877826484851539, "mean_token_accuracy": 0.8662484228610993, "step": 35570 }, { "epoch": 0.6324996000213322, "grad_norm": 1.1321202257296994, "learning_rate": 0.0001, "loss": 0.7827, "mean_abs_error": 868.4594080025469, "mean_abs_error_last_10": 324.2984073597088, "mean_abs_error_last_25": 513.2270656864864, "mean_abs_error_last_50": 683.4271947232025, "mean_pred_prob": 0.03151514487108216, "mean_pred_prob_last_10": 0.14961638106033207, "mean_pred_prob_last_25": 0.08141254568472504, "mean_pred_prob_last_50": 0.051275539514608684, "mean_token_accuracy": 0.8828078508377075, "step": 35580 }, { "epoch": 0.6326773683181341, "grad_norm": 2.8960967479949797, "learning_rate": 0.0001, "loss": 0.7375, "mean_abs_error": 1037.5662240304998, "mean_abs_error_last_10": 610.3111318182715, "mean_abs_error_last_25": 696.4361699704077, "mean_abs_error_last_50": 847.673122043551, "mean_pred_prob": 0.036991877782566004, "mean_pred_prob_last_10": 0.16339491432008799, "mean_pred_prob_last_25": 0.10041509671718814, "mean_pred_prob_last_50": 0.06244997726607835, "mean_token_accuracy": 0.8729261934757233, "step": 35590 }, { "epoch": 0.6328551366149361, "grad_norm": 2.2744138130141267, "learning_rate": 0.0001, "loss": 1.1603, "mean_abs_error": 846.9769425063166, "mean_abs_error_last_10": 362.5070320393526, "mean_abs_error_last_25": 482.638398312314, "mean_abs_error_last_50": 623.5386217969153, "mean_pred_prob": 0.03766782139718998, "mean_pred_prob_last_10": 0.1946829981985502, "mean_pred_prob_last_25": 0.10751353116938844, "mean_pred_prob_last_50": 0.06442940459528472, "mean_token_accuracy": 0.8697004616260529, "step": 35600 }, { "epoch": 0.633032904911738, "grad_norm": 2.975526603240398, "learning_rate": 0.0001, "loss": 0.7461, "mean_abs_error": 433.1620500468862, "mean_abs_error_last_10": 221.14865240187845, "mean_abs_error_last_25": 233.05275141255188, "mean_abs_error_last_50": 315.4546103453199, "mean_pred_prob": 0.04301835157675669, "mean_pred_prob_last_10": 0.18955593092832715, "mean_pred_prob_last_25": 0.11145928816404194, "mean_pred_prob_last_50": 0.07040705357212573, "mean_token_accuracy": 0.8746544480323791, "step": 35610 }, { "epoch": 0.63321067320854, "grad_norm": 1.246744558820987, "learning_rate": 0.0001, "loss": 0.7559, "mean_abs_error": 332.1041260316659, "mean_abs_error_last_10": 62.34474653697507, "mean_abs_error_last_25": 203.5442860752458, "mean_abs_error_last_50": 290.26466738781374, "mean_pred_prob": 0.032809771690517665, "mean_pred_prob_last_10": 0.16501758042722942, "mean_pred_prob_last_25": 0.09122863067314028, "mean_pred_prob_last_50": 0.05504442062228918, "mean_token_accuracy": 0.8709242641925812, "step": 35620 }, { "epoch": 0.6333884415053419, "grad_norm": 1.5147747952402155, "learning_rate": 0.0001, "loss": 0.7515, "mean_abs_error": 798.7350779031478, "mean_abs_error_last_10": 409.6348620058808, "mean_abs_error_last_25": 484.8941379168585, "mean_abs_error_last_50": 568.0272886305737, "mean_pred_prob": 0.022936774982372298, "mean_pred_prob_last_10": 0.13339665848761798, "mean_pred_prob_last_25": 0.06609274739166722, "mean_pred_prob_last_50": 0.038573432285920715, "mean_token_accuracy": 0.8752821326255799, "step": 35630 }, { "epoch": 0.6335662098021438, "grad_norm": 1.9912151757960872, "learning_rate": 0.0001, "loss": 0.7833, "mean_abs_error": 1159.9286936991298, "mean_abs_error_last_10": 439.94181082100266, "mean_abs_error_last_25": 579.2110697239549, "mean_abs_error_last_50": 747.4224977169056, "mean_pred_prob": 0.01711649557983037, "mean_pred_prob_last_10": 0.09103140836232342, "mean_pred_prob_last_25": 0.04591767364763655, "mean_pred_prob_last_50": 0.028503752220422028, "mean_token_accuracy": 0.8590580284595489, "step": 35640 }, { "epoch": 0.6337439780989458, "grad_norm": 2.6140729170596284, "learning_rate": 0.0001, "loss": 0.8241, "mean_abs_error": 492.0627353429615, "mean_abs_error_last_10": 92.15336547903243, "mean_abs_error_last_25": 268.2966442613016, "mean_abs_error_last_50": 378.00840090454335, "mean_pred_prob": 0.046898063318803904, "mean_pred_prob_last_10": 0.21851465236395598, "mean_pred_prob_last_25": 0.13034092700108885, "mean_pred_prob_last_50": 0.0785646315664053, "mean_token_accuracy": 0.8678337574005127, "step": 35650 }, { "epoch": 0.6339217463957478, "grad_norm": 2.4608499698041704, "learning_rate": 0.0001, "loss": 0.8838, "mean_abs_error": 670.2144581850812, "mean_abs_error_last_10": 266.2535897745623, "mean_abs_error_last_25": 346.3752913879907, "mean_abs_error_last_50": 474.643962372122, "mean_pred_prob": 0.03317702107015066, "mean_pred_prob_last_10": 0.15896716655697674, "mean_pred_prob_last_25": 0.08795508096809498, "mean_pred_prob_last_50": 0.05361186133814044, "mean_token_accuracy": 0.8703112483024598, "step": 35660 }, { "epoch": 0.6340995146925498, "grad_norm": 1.6098492343743698, "learning_rate": 0.0001, "loss": 0.8081, "mean_abs_error": 469.61522658403203, "mean_abs_error_last_10": 149.54966214463553, "mean_abs_error_last_25": 199.8690510302012, "mean_abs_error_last_50": 300.76746242333576, "mean_pred_prob": 0.03476141770370304, "mean_pred_prob_last_10": 0.1593860574066639, "mean_pred_prob_last_25": 0.08989837858825922, "mean_pred_prob_last_50": 0.057143419957719745, "mean_token_accuracy": 0.8687082886695862, "step": 35670 }, { "epoch": 0.6342772829893517, "grad_norm": 1.719476363598835, "learning_rate": 0.0001, "loss": 0.7123, "mean_abs_error": 455.8088913079547, "mean_abs_error_last_10": 86.64151485517581, "mean_abs_error_last_25": 124.05969270091923, "mean_abs_error_last_50": 220.33442833023628, "mean_pred_prob": 0.0434280943358317, "mean_pred_prob_last_10": 0.1973750598728657, "mean_pred_prob_last_25": 0.11214287746697664, "mean_pred_prob_last_50": 0.0708181249909103, "mean_token_accuracy": 0.8742313861846924, "step": 35680 }, { "epoch": 0.6344550512861536, "grad_norm": 1.9587455170114494, "learning_rate": 0.0001, "loss": 0.6169, "mean_abs_error": 82.90537795078151, "mean_abs_error_last_10": 23.74829480315313, "mean_abs_error_last_25": 38.14171561889775, "mean_abs_error_last_50": 62.073131315513976, "mean_pred_prob": 0.053864652290940285, "mean_pred_prob_last_10": 0.2659959517419338, "mean_pred_prob_last_25": 0.14862568601965903, "mean_pred_prob_last_50": 0.09024908747524023, "mean_token_accuracy": 0.8835014760494232, "step": 35690 }, { "epoch": 0.6346328195829556, "grad_norm": 2.872483444396133, "learning_rate": 0.0001, "loss": 0.7332, "mean_abs_error": 273.1764011933182, "mean_abs_error_last_10": 74.20893802361005, "mean_abs_error_last_25": 139.40781437027726, "mean_abs_error_last_50": 167.34244027254437, "mean_pred_prob": 0.05354048465378582, "mean_pred_prob_last_10": 0.21793053522706032, "mean_pred_prob_last_25": 0.13628782536834477, "mean_pred_prob_last_50": 0.08777426676824689, "mean_token_accuracy": 0.8698533654212952, "step": 35700 }, { "epoch": 0.6348105878797575, "grad_norm": 2.343640151250587, "learning_rate": 0.0001, "loss": 0.6936, "mean_abs_error": 720.2033811573747, "mean_abs_error_last_10": 244.94170664841803, "mean_abs_error_last_25": 295.04294437130096, "mean_abs_error_last_50": 436.0312039446446, "mean_pred_prob": 0.028150286211166532, "mean_pred_prob_last_10": 0.14683873628964647, "mean_pred_prob_last_25": 0.07872821970377117, "mean_pred_prob_last_50": 0.04714630219386891, "mean_token_accuracy": 0.8736243665218353, "step": 35710 }, { "epoch": 0.6349883561765595, "grad_norm": 2.1781975073032727, "learning_rate": 0.0001, "loss": 0.6663, "mean_abs_error": 679.1661466764134, "mean_abs_error_last_10": 264.08562558070827, "mean_abs_error_last_25": 296.5575522211848, "mean_abs_error_last_50": 523.6903474191835, "mean_pred_prob": 0.03359659698326141, "mean_pred_prob_last_10": 0.16365618966519832, "mean_pred_prob_last_25": 0.0901633377186954, "mean_pred_prob_last_50": 0.056058775261044504, "mean_token_accuracy": 0.8691004514694214, "step": 35720 }, { "epoch": 0.6351661244733614, "grad_norm": 0.8848487336525498, "learning_rate": 0.0001, "loss": 0.6237, "mean_abs_error": 88.91685219229564, "mean_abs_error_last_10": 27.76582210456511, "mean_abs_error_last_25": 37.8860838237671, "mean_abs_error_last_50": 53.432654440680935, "mean_pred_prob": 0.05378205655142665, "mean_pred_prob_last_10": 0.2660193298012018, "mean_pred_prob_last_25": 0.14778588246554136, "mean_pred_prob_last_50": 0.09095077235251665, "mean_token_accuracy": 0.8811661124229431, "step": 35730 }, { "epoch": 0.6353438927701633, "grad_norm": 1.8745866317178128, "learning_rate": 0.0001, "loss": 0.7302, "mean_abs_error": 387.8368341157071, "mean_abs_error_last_10": 84.87163543103041, "mean_abs_error_last_25": 118.77520641377453, "mean_abs_error_last_50": 201.29194514761681, "mean_pred_prob": 0.027532103518024088, "mean_pred_prob_last_10": 0.13957830760627984, "mean_pred_prob_last_25": 0.07419197224080562, "mean_pred_prob_last_50": 0.045655850507318976, "mean_token_accuracy": 0.8720341384410858, "step": 35740 }, { "epoch": 0.6355216610669653, "grad_norm": 2.3941001094715597, "learning_rate": 0.0001, "loss": 0.7002, "mean_abs_error": 424.8352742504543, "mean_abs_error_last_10": 80.87644877075896, "mean_abs_error_last_25": 108.64861532439154, "mean_abs_error_last_50": 228.2449678838312, "mean_pred_prob": 0.042134725605137646, "mean_pred_prob_last_10": 0.19857150688767433, "mean_pred_prob_last_25": 0.11228069374337793, "mean_pred_prob_last_50": 0.06927449498325586, "mean_token_accuracy": 0.867846417427063, "step": 35750 }, { "epoch": 0.6356994293637672, "grad_norm": 1.8758678076804596, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 299.97226767843244, "mean_abs_error_last_10": 274.20442941687827, "mean_abs_error_last_25": 242.6481395349815, "mean_abs_error_last_50": 251.6822018444372, "mean_pred_prob": 0.04322002902626991, "mean_pred_prob_last_10": 0.20309458512347192, "mean_pred_prob_last_25": 0.11526031945832074, "mean_pred_prob_last_50": 0.07139507960528135, "mean_token_accuracy": 0.8721555650234223, "step": 35760 }, { "epoch": 0.6358771976605692, "grad_norm": 1.436015529168976, "learning_rate": 0.0001, "loss": 0.6912, "mean_abs_error": 165.5359277830515, "mean_abs_error_last_10": 43.99542536499213, "mean_abs_error_last_25": 70.91462251201575, "mean_abs_error_last_50": 112.22821155586924, "mean_pred_prob": 0.040328305680304766, "mean_pred_prob_last_10": 0.21316220313310624, "mean_pred_prob_last_25": 0.11482141576707364, "mean_pred_prob_last_50": 0.06874231733381748, "mean_token_accuracy": 0.8744442999362946, "step": 35770 }, { "epoch": 0.6360549659573712, "grad_norm": 1.7715629558251587, "learning_rate": 0.0001, "loss": 0.719, "mean_abs_error": 526.3244111034494, "mean_abs_error_last_10": 58.531493207237226, "mean_abs_error_last_25": 95.71664344907829, "mean_abs_error_last_50": 217.85175811013005, "mean_pred_prob": 0.042159363604150714, "mean_pred_prob_last_10": 0.2067019896581769, "mean_pred_prob_last_25": 0.11769153196364641, "mean_pred_prob_last_50": 0.07142511187121273, "mean_token_accuracy": 0.8757576465606689, "step": 35780 }, { "epoch": 0.6362327342541731, "grad_norm": 2.0531159222248125, "learning_rate": 0.0001, "loss": 0.7818, "mean_abs_error": 912.460554245409, "mean_abs_error_last_10": 420.50800171550065, "mean_abs_error_last_25": 510.5732033061411, "mean_abs_error_last_50": 643.4092296708137, "mean_pred_prob": 0.037132610245316756, "mean_pred_prob_last_10": 0.17060325073252897, "mean_pred_prob_last_25": 0.1000330968934577, "mean_pred_prob_last_50": 0.062148980407800994, "mean_token_accuracy": 0.8702752530574799, "step": 35790 }, { "epoch": 0.6364105025509751, "grad_norm": 1.7270046166099593, "learning_rate": 0.0001, "loss": 0.7348, "mean_abs_error": 173.0619684981105, "mean_abs_error_last_10": 20.97613016172935, "mean_abs_error_last_25": 61.32501223143237, "mean_abs_error_last_50": 123.3554676004695, "mean_pred_prob": 0.04732439205981791, "mean_pred_prob_last_10": 0.23410580717027188, "mean_pred_prob_last_25": 0.13174143824726342, "mean_pred_prob_last_50": 0.07922223750501871, "mean_token_accuracy": 0.87973393201828, "step": 35800 }, { "epoch": 0.636588270847777, "grad_norm": 2.4130963311409115, "learning_rate": 0.0001, "loss": 0.705, "mean_abs_error": 741.4780197089744, "mean_abs_error_last_10": 438.9906163493396, "mean_abs_error_last_25": 600.6410091351707, "mean_abs_error_last_50": 591.1836669566758, "mean_pred_prob": 0.032483125521684995, "mean_pred_prob_last_10": 0.15701075358083472, "mean_pred_prob_last_25": 0.08807202245807275, "mean_pred_prob_last_50": 0.05443518671672791, "mean_token_accuracy": 0.8700968861579895, "step": 35810 }, { "epoch": 0.636766039144579, "grad_norm": 3.108180964431323, "learning_rate": 0.0001, "loss": 0.7101, "mean_abs_error": 267.4946863558859, "mean_abs_error_last_10": 39.3887661580211, "mean_abs_error_last_25": 115.160961769545, "mean_abs_error_last_50": 164.54234739465363, "mean_pred_prob": 0.03510823510587215, "mean_pred_prob_last_10": 0.17961819469928741, "mean_pred_prob_last_25": 0.09847998712211847, "mean_pred_prob_last_50": 0.059927317034453156, "mean_token_accuracy": 0.8866494059562683, "step": 35820 }, { "epoch": 0.6369438074413809, "grad_norm": 2.6683727562033277, "learning_rate": 0.0001, "loss": 0.7353, "mean_abs_error": 169.20456753272884, "mean_abs_error_last_10": 69.06711633456983, "mean_abs_error_last_25": 80.75431516720187, "mean_abs_error_last_50": 111.52665101867576, "mean_pred_prob": 0.05025194708723575, "mean_pred_prob_last_10": 0.21976885702461005, "mean_pred_prob_last_25": 0.13245894545689224, "mean_pred_prob_last_50": 0.08157870313152671, "mean_token_accuracy": 0.8848654508590699, "step": 35830 }, { "epoch": 0.6371215757381828, "grad_norm": 1.3395664413639867, "learning_rate": 0.0001, "loss": 0.7352, "mean_abs_error": 280.6690513475592, "mean_abs_error_last_10": 133.18619412899278, "mean_abs_error_last_25": 115.8187039425175, "mean_abs_error_last_50": 162.61385997270526, "mean_pred_prob": 0.05017281649634242, "mean_pred_prob_last_10": 0.23673181999474763, "mean_pred_prob_last_25": 0.13246406400576233, "mean_pred_prob_last_50": 0.08273687139153481, "mean_token_accuracy": 0.8662489891052246, "step": 35840 }, { "epoch": 0.6372993440349848, "grad_norm": 1.3697814362240601, "learning_rate": 0.0001, "loss": 0.8102, "mean_abs_error": 388.92452809140843, "mean_abs_error_last_10": 127.92868178185873, "mean_abs_error_last_25": 168.4657317392344, "mean_abs_error_last_50": 248.51298440402675, "mean_pred_prob": 0.04293360188603401, "mean_pred_prob_last_10": 0.20827474761754275, "mean_pred_prob_last_25": 0.11517612598836421, "mean_pred_prob_last_50": 0.07142064711079002, "mean_token_accuracy": 0.8793195247650146, "step": 35850 }, { "epoch": 0.6374771123317867, "grad_norm": 1.1790869224546994, "learning_rate": 0.0001, "loss": 0.8189, "mean_abs_error": 775.8348469038935, "mean_abs_error_last_10": 305.31112574627485, "mean_abs_error_last_25": 358.5150267682475, "mean_abs_error_last_50": 445.64051496235805, "mean_pred_prob": 0.031139909202465788, "mean_pred_prob_last_10": 0.1427186564891599, "mean_pred_prob_last_25": 0.08149275961332023, "mean_pred_prob_last_50": 0.05157764105824754, "mean_token_accuracy": 0.8695933759212494, "step": 35860 }, { "epoch": 0.6376548806285887, "grad_norm": 1.476002142362693, "learning_rate": 0.0001, "loss": 0.8009, "mean_abs_error": 286.25108877985207, "mean_abs_error_last_10": 95.96469135020871, "mean_abs_error_last_25": 164.1179016821207, "mean_abs_error_last_50": 221.99475362800104, "mean_pred_prob": 0.02664325989317149, "mean_pred_prob_last_10": 0.13469507005065678, "mean_pred_prob_last_25": 0.07398362867534161, "mean_pred_prob_last_50": 0.04512806721031666, "mean_token_accuracy": 0.8789893925189972, "step": 35870 }, { "epoch": 0.6378326489253906, "grad_norm": 0.9043160621984123, "learning_rate": 0.0001, "loss": 0.7507, "mean_abs_error": 416.84223989539316, "mean_abs_error_last_10": 181.23378786474765, "mean_abs_error_last_25": 191.99595796792312, "mean_abs_error_last_50": 244.30412150875486, "mean_pred_prob": 0.029131104820407927, "mean_pred_prob_last_10": 0.14591997731477022, "mean_pred_prob_last_25": 0.07680731303989888, "mean_pred_prob_last_50": 0.047489112615585326, "mean_token_accuracy": 0.8724419414997101, "step": 35880 }, { "epoch": 0.6380104172221926, "grad_norm": 2.607895075589255, "learning_rate": 0.0001, "loss": 0.7341, "mean_abs_error": 161.46190086082157, "mean_abs_error_last_10": 126.26341946366173, "mean_abs_error_last_25": 149.53344516811407, "mean_abs_error_last_50": 133.67718192452176, "mean_pred_prob": 0.05181266274303198, "mean_pred_prob_last_10": 0.24631261229515075, "mean_pred_prob_last_25": 0.13448909875005483, "mean_pred_prob_last_50": 0.08337387088686228, "mean_token_accuracy": 0.8706216156482697, "step": 35890 }, { "epoch": 0.6381881855189946, "grad_norm": 2.136806164331492, "learning_rate": 0.0001, "loss": 0.729, "mean_abs_error": 238.21762715729906, "mean_abs_error_last_10": 107.03579836180617, "mean_abs_error_last_25": 140.53684375985702, "mean_abs_error_last_50": 188.01319949525163, "mean_pred_prob": 0.03986981897614896, "mean_pred_prob_last_10": 0.18556362390518188, "mean_pred_prob_last_25": 0.10628324439749122, "mean_pred_prob_last_50": 0.06648486922495067, "mean_token_accuracy": 0.8733608901500702, "step": 35900 }, { "epoch": 0.6383659538157965, "grad_norm": 1.189663206167541, "learning_rate": 0.0001, "loss": 0.6925, "mean_abs_error": 1294.59139636401, "mean_abs_error_last_10": 595.8959503900161, "mean_abs_error_last_25": 713.156400894058, "mean_abs_error_last_50": 946.4251766141779, "mean_pred_prob": 0.028462363926519175, "mean_pred_prob_last_10": 0.12961282562464477, "mean_pred_prob_last_25": 0.07392398132942617, "mean_pred_prob_last_50": 0.046848697696987074, "mean_token_accuracy": 0.8755315840244293, "step": 35910 }, { "epoch": 0.6385437221125985, "grad_norm": 2.0413104894977536, "learning_rate": 0.0001, "loss": 0.739, "mean_abs_error": 516.4285915515463, "mean_abs_error_last_10": 165.07592522417806, "mean_abs_error_last_25": 265.13973616618773, "mean_abs_error_last_50": 356.0034596823428, "mean_pred_prob": 0.04259712310740724, "mean_pred_prob_last_10": 0.2049488250631839, "mean_pred_prob_last_25": 0.11657144725322724, "mean_pred_prob_last_50": 0.07168961483985185, "mean_token_accuracy": 0.8586493074893952, "step": 35920 }, { "epoch": 0.6387214904094004, "grad_norm": 1.8080730534000427, "learning_rate": 0.0001, "loss": 0.7593, "mean_abs_error": 857.956581366598, "mean_abs_error_last_10": 432.9521854490554, "mean_abs_error_last_25": 540.4708044615013, "mean_abs_error_last_50": 631.3725265970645, "mean_pred_prob": 0.032036032926407644, "mean_pred_prob_last_10": 0.17271119764191098, "mean_pred_prob_last_25": 0.09050229871354531, "mean_pred_prob_last_50": 0.0530806856462732, "mean_token_accuracy": 0.8704703569412231, "step": 35930 }, { "epoch": 0.6388992587062023, "grad_norm": 4.094650774022804, "learning_rate": 0.0001, "loss": 0.6982, "mean_abs_error": 622.7579161555523, "mean_abs_error_last_10": 148.3599837318567, "mean_abs_error_last_25": 185.34555116832956, "mean_abs_error_last_50": 285.77756396828613, "mean_pred_prob": 0.043002683710074055, "mean_pred_prob_last_10": 0.2117490936536342, "mean_pred_prob_last_25": 0.12097892393358052, "mean_pred_prob_last_50": 0.07334244308294728, "mean_token_accuracy": 0.8686288833618164, "step": 35940 }, { "epoch": 0.6390770270030043, "grad_norm": 1.4169502421770492, "learning_rate": 0.0001, "loss": 0.8366, "mean_abs_error": 475.3051314364163, "mean_abs_error_last_10": 131.28093223667432, "mean_abs_error_last_25": 172.01512561694653, "mean_abs_error_last_50": 252.45563423323537, "mean_pred_prob": 0.037941687065176664, "mean_pred_prob_last_10": 0.17661789042758755, "mean_pred_prob_last_25": 0.10369081272510812, "mean_pred_prob_last_50": 0.06436637413571589, "mean_token_accuracy": 0.8643018066883087, "step": 35950 }, { "epoch": 0.6392547952998062, "grad_norm": 1.8354112777754454, "learning_rate": 0.0001, "loss": 0.6974, "mean_abs_error": 644.1803383930412, "mean_abs_error_last_10": 376.4368961629149, "mean_abs_error_last_25": 376.8194249186752, "mean_abs_error_last_50": 380.6637765771251, "mean_pred_prob": 0.031109095737338065, "mean_pred_prob_last_10": 0.15667637633159756, "mean_pred_prob_last_25": 0.08628757488913834, "mean_pred_prob_last_50": 0.05268950044410303, "mean_token_accuracy": 0.8783331036567688, "step": 35960 }, { "epoch": 0.6394325635966082, "grad_norm": 1.282753001225526, "learning_rate": 0.0001, "loss": 0.7132, "mean_abs_error": 877.9079800764275, "mean_abs_error_last_10": 383.77830883933063, "mean_abs_error_last_25": 530.2203953859103, "mean_abs_error_last_50": 644.3766593598286, "mean_pred_prob": 0.0353428919945145, "mean_pred_prob_last_10": 0.17332851358223705, "mean_pred_prob_last_25": 0.09835173313622363, "mean_pred_prob_last_50": 0.05881443605176173, "mean_token_accuracy": 0.8721751689910888, "step": 35970 }, { "epoch": 0.6396103318934101, "grad_norm": 1.177530990232642, "learning_rate": 0.0001, "loss": 0.7188, "mean_abs_error": 910.9947993942145, "mean_abs_error_last_10": 265.88712508337795, "mean_abs_error_last_25": 380.4583802513906, "mean_abs_error_last_50": 569.3137125590981, "mean_pred_prob": 0.06307168510684277, "mean_pred_prob_last_10": 0.2511026440421119, "mean_pred_prob_last_25": 0.15580369122326373, "mean_pred_prob_last_50": 0.10138725110737141, "mean_token_accuracy": 0.858179235458374, "step": 35980 }, { "epoch": 0.639788100190212, "grad_norm": 1.5858982600968288, "learning_rate": 0.0001, "loss": 0.722, "mean_abs_error": 662.207596594881, "mean_abs_error_last_10": 271.4281245367179, "mean_abs_error_last_25": 287.4631439293321, "mean_abs_error_last_50": 379.2495124616727, "mean_pred_prob": 0.030300216044997795, "mean_pred_prob_last_10": 0.14645166533300652, "mean_pred_prob_last_25": 0.08398478433955461, "mean_pred_prob_last_50": 0.05085963375749998, "mean_token_accuracy": 0.8620787501335144, "step": 35990 }, { "epoch": 0.639965868487014, "grad_norm": 3.7585382153967335, "learning_rate": 0.0001, "loss": 0.8099, "mean_abs_error": 1095.0590235494255, "mean_abs_error_last_10": 464.8175145371762, "mean_abs_error_last_25": 570.6734623243673, "mean_abs_error_last_50": 773.286589482406, "mean_pred_prob": 0.02986723276844714, "mean_pred_prob_last_10": 0.1597900127846515, "mean_pred_prob_last_25": 0.08453164083475713, "mean_pred_prob_last_50": 0.050828302884474395, "mean_token_accuracy": 0.859581571817398, "step": 36000 }, { "epoch": 0.640143636783816, "grad_norm": 1.5474179740701381, "learning_rate": 0.0001, "loss": 0.6513, "mean_abs_error": 584.7560559248193, "mean_abs_error_last_10": 122.72129046567167, "mean_abs_error_last_25": 199.53139286079943, "mean_abs_error_last_50": 304.50170333244205, "mean_pred_prob": 0.03201739910291508, "mean_pred_prob_last_10": 0.16214703533332794, "mean_pred_prob_last_25": 0.0881963511928916, "mean_pred_prob_last_50": 0.053836583206430075, "mean_token_accuracy": 0.870355224609375, "step": 36010 }, { "epoch": 0.640321405080618, "grad_norm": 1.529160202092163, "learning_rate": 0.0001, "loss": 0.7511, "mean_abs_error": 490.2764294831703, "mean_abs_error_last_10": 121.99285136020801, "mean_abs_error_last_25": 180.8332452057444, "mean_abs_error_last_50": 277.31931920314776, "mean_pred_prob": 0.038501410256139935, "mean_pred_prob_last_10": 0.1925481552723795, "mean_pred_prob_last_25": 0.11063790421467275, "mean_pred_prob_last_50": 0.066479596006684, "mean_token_accuracy": 0.8791958689689636, "step": 36020 }, { "epoch": 0.6404991733774199, "grad_norm": 1.1004392090795132, "learning_rate": 0.0001, "loss": 0.7157, "mean_abs_error": 359.3240277556272, "mean_abs_error_last_10": 77.56318593947715, "mean_abs_error_last_25": 129.76412588214356, "mean_abs_error_last_50": 199.44061207988497, "mean_pred_prob": 0.04659067168831825, "mean_pred_prob_last_10": 0.23059320896863938, "mean_pred_prob_last_25": 0.1299563710577786, "mean_pred_prob_last_50": 0.07885337150655687, "mean_token_accuracy": 0.8754785001277924, "step": 36030 }, { "epoch": 0.6406769416742218, "grad_norm": 2.198326457591934, "learning_rate": 0.0001, "loss": 0.9467, "mean_abs_error": 399.290412485919, "mean_abs_error_last_10": 100.89802658178579, "mean_abs_error_last_25": 178.19511352198342, "mean_abs_error_last_50": 276.0097091435796, "mean_pred_prob": 0.042114769807085395, "mean_pred_prob_last_10": 0.23024714775383473, "mean_pred_prob_last_25": 0.12017798013985156, "mean_pred_prob_last_50": 0.07153626047074794, "mean_token_accuracy": 0.8719989120960235, "step": 36040 }, { "epoch": 0.6408547099710238, "grad_norm": 2.0209767478253196, "learning_rate": 0.0001, "loss": 0.7584, "mean_abs_error": 201.45510327377605, "mean_abs_error_last_10": 84.25985082457586, "mean_abs_error_last_25": 129.5106018687804, "mean_abs_error_last_50": 166.0814333063748, "mean_pred_prob": 0.052194732753559944, "mean_pred_prob_last_10": 0.25106169320642946, "mean_pred_prob_last_25": 0.14120232593268156, "mean_pred_prob_last_50": 0.08711470598354935, "mean_token_accuracy": 0.8683882474899292, "step": 36050 }, { "epoch": 0.6410324782678257, "grad_norm": 1.5475524425352005, "learning_rate": 0.0001, "loss": 0.6746, "mean_abs_error": 393.64561046858, "mean_abs_error_last_10": 163.53176099482448, "mean_abs_error_last_25": 165.09977068802584, "mean_abs_error_last_50": 223.7852709399127, "mean_pred_prob": 0.03832312853774056, "mean_pred_prob_last_10": 0.1805714001879096, "mean_pred_prob_last_25": 0.10263136245775968, "mean_pred_prob_last_50": 0.06324318886036054, "mean_token_accuracy": 0.87246013879776, "step": 36060 }, { "epoch": 0.6412102465646277, "grad_norm": 1.7683018452735253, "learning_rate": 0.0001, "loss": 0.6389, "mean_abs_error": 217.59251297524915, "mean_abs_error_last_10": 63.54716520173933, "mean_abs_error_last_25": 86.12639257576993, "mean_abs_error_last_50": 134.87072708505858, "mean_pred_prob": 0.042494323244318365, "mean_pred_prob_last_10": 0.20914333797991275, "mean_pred_prob_last_25": 0.11663543488830327, "mean_pred_prob_last_50": 0.07112587299197912, "mean_token_accuracy": 0.8710107028484344, "step": 36070 }, { "epoch": 0.6413880148614296, "grad_norm": 12.254879236038867, "learning_rate": 0.0001, "loss": 0.6995, "mean_abs_error": 155.47387367536783, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 89.99587284418227, "mean_pred_prob": 0.07521687694825233, "mean_pred_prob_last_10": 0.25787559859454634, "mean_pred_prob_last_25": 0.16972401831299067, "mean_pred_prob_last_50": 0.09850462209433317, "mean_token_accuracy": 0.8816694438457489, "step": 36080 }, { "epoch": 0.6415657831582315, "grad_norm": 1.729847338357805, "learning_rate": 0.0001, "loss": 0.8746, "mean_abs_error": 324.83293248714295, "mean_abs_error_last_10": 61.883229248105295, "mean_abs_error_last_25": 87.27596350265544, "mean_abs_error_last_50": 160.75963057371024, "mean_pred_prob": 0.04102648845873773, "mean_pred_prob_last_10": 0.19835642203688622, "mean_pred_prob_last_25": 0.11267331540584564, "mean_pred_prob_last_50": 0.06962105957791209, "mean_token_accuracy": 0.8704780578613281, "step": 36090 }, { "epoch": 0.6417435514550335, "grad_norm": 2.0985555751163556, "learning_rate": 0.0001, "loss": 0.6698, "mean_abs_error": 249.00896602179645, "mean_abs_error_last_10": 131.56087884743653, "mean_abs_error_last_25": 225.612117693057, "mean_abs_error_last_50": 223.72001518690368, "mean_pred_prob": 0.06564396992907859, "mean_pred_prob_last_10": 0.30373236346058546, "mean_pred_prob_last_25": 0.17571296733804048, "mean_pred_prob_last_50": 0.11006199261173606, "mean_token_accuracy": 0.8802381992340088, "step": 36100 }, { "epoch": 0.6419213197518354, "grad_norm": 2.5974315939845476, "learning_rate": 0.0001, "loss": 0.639, "mean_abs_error": 587.201503347215, "mean_abs_error_last_10": 343.96578950306565, "mean_abs_error_last_25": 395.1575612742321, "mean_abs_error_last_50": 475.8350727987234, "mean_pred_prob": 0.04868238218768965, "mean_pred_prob_last_10": 0.22668992112448905, "mean_pred_prob_last_25": 0.12978025956545025, "mean_pred_prob_last_50": 0.08104908328969032, "mean_token_accuracy": 0.8723918557167053, "step": 36110 }, { "epoch": 0.6420990880486374, "grad_norm": 2.020857107342804, "learning_rate": 0.0001, "loss": 0.6312, "mean_abs_error": 260.2211987583715, "mean_abs_error_last_10": 168.64008258083575, "mean_abs_error_last_25": 185.6062673629001, "mean_abs_error_last_50": 214.73748256401046, "mean_pred_prob": 0.057750866643618795, "mean_pred_prob_last_10": 0.2643171033007093, "mean_pred_prob_last_25": 0.1528076311573386, "mean_pred_prob_last_50": 0.09565450520021841, "mean_token_accuracy": 0.8842210710048676, "step": 36120 }, { "epoch": 0.6422768563454394, "grad_norm": 2.1448654709739374, "learning_rate": 0.0001, "loss": 0.7688, "mean_abs_error": 900.1478329469788, "mean_abs_error_last_10": 258.24821791491536, "mean_abs_error_last_25": 351.6805099920929, "mean_abs_error_last_50": 505.5503778119998, "mean_pred_prob": 0.033281830154010095, "mean_pred_prob_last_10": 0.159085224987939, "mean_pred_prob_last_25": 0.09041136437444948, "mean_pred_prob_last_50": 0.05593343046493828, "mean_token_accuracy": 0.879630959033966, "step": 36130 }, { "epoch": 0.6424546246422413, "grad_norm": 1.1124810689350848, "learning_rate": 0.0001, "loss": 0.7428, "mean_abs_error": 2395.096344552305, "mean_abs_error_last_10": 1473.2899599549614, "mean_abs_error_last_25": 1672.7235317768598, "mean_abs_error_last_50": 1894.4207820740241, "mean_pred_prob": 0.01284771857172018, "mean_pred_prob_last_10": 0.05374045197386294, "mean_pred_prob_last_25": 0.03201599556050496, "mean_pred_prob_last_50": 0.02057653373485664, "mean_token_accuracy": 0.8573841512203216, "step": 36140 }, { "epoch": 0.6426323929390433, "grad_norm": 1.2121415377601585, "learning_rate": 0.0001, "loss": 0.7287, "mean_abs_error": 287.6071257863587, "mean_abs_error_last_10": 120.94265105410811, "mean_abs_error_last_25": 239.71807103876182, "mean_abs_error_last_50": 274.79042363786044, "mean_pred_prob": 0.04763552900403738, "mean_pred_prob_last_10": 0.22575795836746693, "mean_pred_prob_last_25": 0.12324509881436825, "mean_pred_prob_last_50": 0.07896877340972423, "mean_token_accuracy": 0.8792464673519135, "step": 36150 }, { "epoch": 0.6428101612358452, "grad_norm": 2.4210969760530694, "learning_rate": 0.0001, "loss": 0.7306, "mean_abs_error": 343.4155809553452, "mean_abs_error_last_10": 72.38519679325825, "mean_abs_error_last_25": 194.93567501394273, "mean_abs_error_last_50": 219.722652103967, "mean_pred_prob": 0.035266058659181, "mean_pred_prob_last_10": 0.17758108340203763, "mean_pred_prob_last_25": 0.09501278549432754, "mean_pred_prob_last_50": 0.058539863862097266, "mean_token_accuracy": 0.8714076101779937, "step": 36160 }, { "epoch": 0.6429879295326472, "grad_norm": 1.7259531955739629, "learning_rate": 0.0001, "loss": 0.6267, "mean_abs_error": 238.44607703628026, "mean_abs_error_last_10": 75.02293747657797, "mean_abs_error_last_25": 86.05444027311822, "mean_abs_error_last_50": 144.6514845655091, "mean_pred_prob": 0.033717659721151, "mean_pred_prob_last_10": 0.1608227401971817, "mean_pred_prob_last_25": 0.09052171744406223, "mean_pred_prob_last_50": 0.05606948146596551, "mean_token_accuracy": 0.8826508104801178, "step": 36170 }, { "epoch": 0.6431656978294491, "grad_norm": 1.5173935834999366, "learning_rate": 0.0001, "loss": 0.6249, "mean_abs_error": 849.874970863881, "mean_abs_error_last_10": 605.0845182203791, "mean_abs_error_last_25": 663.8881109162894, "mean_abs_error_last_50": 759.5550808770778, "mean_pred_prob": 0.047108174263848925, "mean_pred_prob_last_10": 0.22163422314624767, "mean_pred_prob_last_25": 0.12200195271580014, "mean_pred_prob_last_50": 0.07797363910940476, "mean_token_accuracy": 0.8809681713581086, "step": 36180 }, { "epoch": 0.643343466126251, "grad_norm": 1.0577309124368832, "learning_rate": 0.0001, "loss": 0.6791, "mean_abs_error": 414.96182975338644, "mean_abs_error_last_10": 175.51620842436995, "mean_abs_error_last_25": 139.69079179192045, "mean_abs_error_last_50": 200.6522872174903, "mean_pred_prob": 0.07223357522161677, "mean_pred_prob_last_10": 0.31611712703015654, "mean_pred_prob_last_25": 0.19332088119117544, "mean_pred_prob_last_50": 0.11912725133588538, "mean_token_accuracy": 0.8735723793506622, "step": 36190 }, { "epoch": 0.643521234423053, "grad_norm": 2.150896531547257, "learning_rate": 0.0001, "loss": 0.6491, "mean_abs_error": 181.260226691835, "mean_abs_error_last_10": 77.73043387339852, "mean_abs_error_last_25": 119.55713786389371, "mean_abs_error_last_50": 127.49763192237769, "mean_pred_prob": 0.06197715550661087, "mean_pred_prob_last_10": 0.2763082006946206, "mean_pred_prob_last_25": 0.16121221045032144, "mean_pred_prob_last_50": 0.1021734954789281, "mean_token_accuracy": 0.8758707821369172, "step": 36200 }, { "epoch": 0.6436990027198549, "grad_norm": 1.4292265293829958, "learning_rate": 0.0001, "loss": 0.831, "mean_abs_error": 312.6497519306569, "mean_abs_error_last_10": 141.49848249582698, "mean_abs_error_last_25": 186.66958583736357, "mean_abs_error_last_50": 206.18921595835255, "mean_pred_prob": 0.03956726910546422, "mean_pred_prob_last_10": 0.18333296068012714, "mean_pred_prob_last_25": 0.10173318758606911, "mean_pred_prob_last_50": 0.0650535091292113, "mean_token_accuracy": 0.870187646150589, "step": 36210 }, { "epoch": 0.6438767710166569, "grad_norm": 1.0509416677187622, "learning_rate": 0.0001, "loss": 0.7727, "mean_abs_error": 612.970723836647, "mean_abs_error_last_10": 249.00710777463263, "mean_abs_error_last_25": 363.48927170116974, "mean_abs_error_last_50": 482.9357812268096, "mean_pred_prob": 0.02694457909092307, "mean_pred_prob_last_10": 0.15049240738153458, "mean_pred_prob_last_25": 0.08098844662308693, "mean_pred_prob_last_50": 0.047384654032066466, "mean_token_accuracy": 0.8613694131374359, "step": 36220 }, { "epoch": 0.6440545393134588, "grad_norm": 0.9453047163536542, "learning_rate": 0.0001, "loss": 0.8852, "mean_abs_error": 791.1115568722, "mean_abs_error_last_10": 351.40100235403565, "mean_abs_error_last_25": 489.9628485865108, "mean_abs_error_last_50": 538.1002719248825, "mean_pred_prob": 0.02097362501663156, "mean_pred_prob_last_10": 0.1114304997259751, "mean_pred_prob_last_25": 0.05620224836748093, "mean_pred_prob_last_50": 0.03430006387643516, "mean_token_accuracy": 0.8697699129581451, "step": 36230 }, { "epoch": 0.6442323076102607, "grad_norm": 1.0224024026638205, "learning_rate": 0.0001, "loss": 0.7468, "mean_abs_error": 465.03108660341434, "mean_abs_error_last_10": 125.40966224858258, "mean_abs_error_last_25": 164.50095656064872, "mean_abs_error_last_50": 251.68690100493927, "mean_pred_prob": 0.032722123176790775, "mean_pred_prob_last_10": 0.1532241593580693, "mean_pred_prob_last_25": 0.08950621492695063, "mean_pred_prob_last_50": 0.055030275776516646, "mean_token_accuracy": 0.8735946834087371, "step": 36240 }, { "epoch": 0.6444100759070628, "grad_norm": 1.4323853532251272, "learning_rate": 0.0001, "loss": 0.631, "mean_abs_error": 446.50343149508456, "mean_abs_error_last_10": 139.27017537857733, "mean_abs_error_last_25": 288.2244312939627, "mean_abs_error_last_50": 365.1584559759474, "mean_pred_prob": 0.029772790055721997, "mean_pred_prob_last_10": 0.15085695125162601, "mean_pred_prob_last_25": 0.08167367540299893, "mean_pred_prob_last_50": 0.04956570607610047, "mean_token_accuracy": 0.8704843401908875, "step": 36250 }, { "epoch": 0.6445878442038647, "grad_norm": 2.0813551885009955, "learning_rate": 0.0001, "loss": 0.7234, "mean_abs_error": 673.1740792365766, "mean_abs_error_last_10": 193.46647535044525, "mean_abs_error_last_25": 306.9127899776969, "mean_abs_error_last_50": 508.04895523121985, "mean_pred_prob": 0.03417046626564115, "mean_pred_prob_last_10": 0.16258546779863536, "mean_pred_prob_last_25": 0.09471826467779465, "mean_pred_prob_last_50": 0.05733652274357155, "mean_token_accuracy": 0.8735430181026459, "step": 36260 }, { "epoch": 0.6447656125006667, "grad_norm": 1.4186663277654574, "learning_rate": 0.0001, "loss": 0.6652, "mean_abs_error": 146.99709936453576, "mean_abs_error_last_10": 67.60576606371498, "mean_abs_error_last_25": 115.63834303277079, "mean_abs_error_last_50": 111.11290518483716, "mean_pred_prob": 0.06358276368118823, "mean_pred_prob_last_10": 0.2978248566389084, "mean_pred_prob_last_25": 0.1740808177739382, "mean_pred_prob_last_50": 0.10744975795969366, "mean_token_accuracy": 0.87835653424263, "step": 36270 }, { "epoch": 0.6449433807974686, "grad_norm": 1.4367212102702416, "learning_rate": 0.0001, "loss": 0.7221, "mean_abs_error": 284.0311151292803, "mean_abs_error_last_10": 95.2221353581605, "mean_abs_error_last_25": 160.8746297198669, "mean_abs_error_last_50": 231.94576500687663, "mean_pred_prob": 0.04638313786126673, "mean_pred_prob_last_10": 0.22801580466330051, "mean_pred_prob_last_25": 0.13001718781888486, "mean_pred_prob_last_50": 0.07838995484635233, "mean_token_accuracy": 0.8803313314914704, "step": 36280 }, { "epoch": 0.6451211490942705, "grad_norm": 1.4920877787313243, "learning_rate": 0.0001, "loss": 0.7561, "mean_abs_error": 430.6274387264927, "mean_abs_error_last_10": 209.69121168769146, "mean_abs_error_last_25": 312.4234536556019, "mean_abs_error_last_50": 352.99405912670807, "mean_pred_prob": 0.04315828992985189, "mean_pred_prob_last_10": 0.21249900218099355, "mean_pred_prob_last_25": 0.11711431262083352, "mean_pred_prob_last_50": 0.0726889729499817, "mean_token_accuracy": 0.8659765005111695, "step": 36290 }, { "epoch": 0.6452989173910725, "grad_norm": 1.4352283034703404, "learning_rate": 0.0001, "loss": 0.6291, "mean_abs_error": 425.5975088795243, "mean_abs_error_last_10": 139.935659649409, "mean_abs_error_last_25": 218.1441685534691, "mean_abs_error_last_50": 308.6296080068411, "mean_pred_prob": 0.03504149557556957, "mean_pred_prob_last_10": 0.17614981774240732, "mean_pred_prob_last_25": 0.09349474841728807, "mean_pred_prob_last_50": 0.05951886526308954, "mean_token_accuracy": 0.8841129601001739, "step": 36300 }, { "epoch": 0.6454766856878744, "grad_norm": 2.5834574458321664, "learning_rate": 0.0001, "loss": 0.916, "mean_abs_error": 204.8600285669821, "mean_abs_error_last_10": 29.959582606852894, "mean_abs_error_last_25": 52.5934406674084, "mean_abs_error_last_50": 93.8558493517047, "mean_pred_prob": 0.044939794950187205, "mean_pred_prob_last_10": 0.2144528917968273, "mean_pred_prob_last_25": 0.12116943206638098, "mean_pred_prob_last_50": 0.07499383417889476, "mean_token_accuracy": 0.8607341110706329, "step": 36310 }, { "epoch": 0.6456544539846764, "grad_norm": 1.9151445436812788, "learning_rate": 0.0001, "loss": 0.8211, "mean_abs_error": 615.539501170147, "mean_abs_error_last_10": 312.54536733174456, "mean_abs_error_last_25": 391.09522419919864, "mean_abs_error_last_50": 475.3291486046327, "mean_pred_prob": 0.05042162209283561, "mean_pred_prob_last_10": 0.2508066380862147, "mean_pred_prob_last_25": 0.13423296273685992, "mean_pred_prob_last_50": 0.08261603810824454, "mean_token_accuracy": 0.8764552116394043, "step": 36320 }, { "epoch": 0.6458322222814783, "grad_norm": 1.212232992461374, "learning_rate": 0.0001, "loss": 0.7269, "mean_abs_error": 325.55035233221554, "mean_abs_error_last_10": 93.54734690232426, "mean_abs_error_last_25": 152.2433713575738, "mean_abs_error_last_50": 235.70311543738018, "mean_pred_prob": 0.056752248341217636, "mean_pred_prob_last_10": 0.2612964589847252, "mean_pred_prob_last_25": 0.14753959279041737, "mean_pred_prob_last_50": 0.09257677333662287, "mean_token_accuracy": 0.8663141846656799, "step": 36330 }, { "epoch": 0.6460099905782802, "grad_norm": 2.4669200293194544, "learning_rate": 0.0001, "loss": 0.7155, "mean_abs_error": 343.03929116660646, "mean_abs_error_last_10": 53.8291394525696, "mean_abs_error_last_25": 75.28342310179313, "mean_abs_error_last_50": 140.0887741919691, "mean_pred_prob": 0.040111191524192694, "mean_pred_prob_last_10": 0.19230393413454294, "mean_pred_prob_last_25": 0.10840229205787182, "mean_pred_prob_last_50": 0.06865111431106925, "mean_token_accuracy": 0.8708838045597076, "step": 36340 }, { "epoch": 0.6461877588750822, "grad_norm": 1.8258002390869494, "learning_rate": 0.0001, "loss": 0.7703, "mean_abs_error": 577.3947729609815, "mean_abs_error_last_10": 358.81716448943627, "mean_abs_error_last_25": 335.2719214028325, "mean_abs_error_last_50": 383.80930308818563, "mean_pred_prob": 0.02692773148883134, "mean_pred_prob_last_10": 0.15039481571875513, "mean_pred_prob_last_25": 0.07813603938557208, "mean_pred_prob_last_50": 0.04658574878703803, "mean_token_accuracy": 0.8618976593017578, "step": 36350 }, { "epoch": 0.6463655271718841, "grad_norm": 1.2737355969571051, "learning_rate": 0.0001, "loss": 0.7623, "mean_abs_error": 558.2918352733108, "mean_abs_error_last_10": 136.47761356718735, "mean_abs_error_last_25": 191.51052167607702, "mean_abs_error_last_50": 311.50558312628374, "mean_pred_prob": 0.0275048038456589, "mean_pred_prob_last_10": 0.1285372029989958, "mean_pred_prob_last_25": 0.06984937684610486, "mean_pred_prob_last_50": 0.04455130831338465, "mean_token_accuracy": 0.8845160663127899, "step": 36360 }, { "epoch": 0.6465432954686862, "grad_norm": 2.255915694655058, "learning_rate": 0.0001, "loss": 0.8086, "mean_abs_error": 548.7596752307305, "mean_abs_error_last_10": 191.5747181875817, "mean_abs_error_last_25": 320.7475519720836, "mean_abs_error_last_50": 442.5640234338457, "mean_pred_prob": 0.033549047028645876, "mean_pred_prob_last_10": 0.17286546919494866, "mean_pred_prob_last_25": 0.09272264176979661, "mean_pred_prob_last_50": 0.05633684624917805, "mean_token_accuracy": 0.8693805575370789, "step": 36370 }, { "epoch": 0.6467210637654881, "grad_norm": 1.9001708616453856, "learning_rate": 0.0001, "loss": 0.6967, "mean_abs_error": 151.16586775273856, "mean_abs_error_last_10": 51.20839851560106, "mean_abs_error_last_25": 53.72998648758319, "mean_abs_error_last_50": 106.5553145684659, "mean_pred_prob": 0.05606243121437728, "mean_pred_prob_last_10": 0.26071745790541173, "mean_pred_prob_last_25": 0.1487019808962941, "mean_pred_prob_last_50": 0.09188277488574385, "mean_token_accuracy": 0.8749389111995697, "step": 36380 }, { "epoch": 0.64689883206229, "grad_norm": 2.7421578329717002, "learning_rate": 0.0001, "loss": 0.7771, "mean_abs_error": 373.59431117358446, "mean_abs_error_last_10": 73.84346968178093, "mean_abs_error_last_25": 78.674600369856, "mean_abs_error_last_50": 154.40682437868435, "mean_pred_prob": 0.056088293553330004, "mean_pred_prob_last_10": 0.2703812148422003, "mean_pred_prob_last_25": 0.14879582179710268, "mean_pred_prob_last_50": 0.0920010443776846, "mean_token_accuracy": 0.8768282473087311, "step": 36390 }, { "epoch": 0.647076600359092, "grad_norm": 1.0936296210983891, "learning_rate": 0.0001, "loss": 0.6455, "mean_abs_error": 261.9139058074842, "mean_abs_error_last_10": 87.25324473074012, "mean_abs_error_last_25": 109.84005308624766, "mean_abs_error_last_50": 171.668006500966, "mean_pred_prob": 0.036942000850103796, "mean_pred_prob_last_10": 0.16784163992851972, "mean_pred_prob_last_25": 0.09420933024957776, "mean_pred_prob_last_50": 0.06025291243568063, "mean_token_accuracy": 0.881815904378891, "step": 36400 }, { "epoch": 0.6472543686558939, "grad_norm": 2.0459642006311256, "learning_rate": 0.0001, "loss": 0.7063, "mean_abs_error": 292.64931449643547, "mean_abs_error_last_10": 125.21361852701196, "mean_abs_error_last_25": 167.10650658271658, "mean_abs_error_last_50": 192.83585977895137, "mean_pred_prob": 0.045081846439279614, "mean_pred_prob_last_10": 0.20880134399048983, "mean_pred_prob_last_25": 0.12005438867490739, "mean_pred_prob_last_50": 0.07511440361849964, "mean_token_accuracy": 0.8738497614860534, "step": 36410 }, { "epoch": 0.6474321369526959, "grad_norm": 0.9903748700097971, "learning_rate": 0.0001, "loss": 0.8132, "mean_abs_error": 92.5141747336443, "mean_abs_error_last_10": 20.322189788906375, "mean_abs_error_last_25": 42.809643760228816, "mean_abs_error_last_50": 64.51229942853787, "mean_pred_prob": 0.05422540418803692, "mean_pred_prob_last_10": 0.2675315134227276, "mean_pred_prob_last_25": 0.14611094184219836, "mean_pred_prob_last_50": 0.08924104124307633, "mean_token_accuracy": 0.8652174770832062, "step": 36420 }, { "epoch": 0.6476099052494978, "grad_norm": 3.87958208400007, "learning_rate": 0.0001, "loss": 0.6674, "mean_abs_error": 77.02327562641126, "mean_abs_error_last_10": 10.629383597583999, "mean_abs_error_last_25": 23.173475892829426, "mean_abs_error_last_50": 42.58090671940843, "mean_pred_prob": 0.07309906762093306, "mean_pred_prob_last_10": 0.32734042443335054, "mean_pred_prob_last_25": 0.1910868477076292, "mean_pred_prob_last_50": 0.12005545925348997, "mean_token_accuracy": 0.8725080013275146, "step": 36430 }, { "epoch": 0.6477876735462997, "grad_norm": 1.177153192856656, "learning_rate": 0.0001, "loss": 0.661, "mean_abs_error": 231.51649639608223, "mean_abs_error_last_10": 60.77541162990606, "mean_abs_error_last_25": 70.10211432759581, "mean_abs_error_last_50": 124.70972916836376, "mean_pred_prob": 0.04117778148502112, "mean_pred_prob_last_10": 0.19795805513858794, "mean_pred_prob_last_25": 0.11241818405687809, "mean_pred_prob_last_50": 0.0693510403856635, "mean_token_accuracy": 0.8646468102931977, "step": 36440 }, { "epoch": 0.6479654418431017, "grad_norm": 3.0601920737094224, "learning_rate": 0.0001, "loss": 0.6967, "mean_abs_error": 1282.2666829623827, "mean_abs_error_last_10": 1046.8079745506975, "mean_abs_error_last_25": 1037.0524304571404, "mean_abs_error_last_50": 1178.0407504187654, "mean_pred_prob": 0.04275215044945071, "mean_pred_prob_last_10": 0.19515613449366356, "mean_pred_prob_last_25": 0.11157865668283193, "mean_pred_prob_last_50": 0.069238062108343, "mean_token_accuracy": 0.8793979823589325, "step": 36450 }, { "epoch": 0.6481432101399036, "grad_norm": 1.4905282592260412, "learning_rate": 0.0001, "loss": 0.6807, "mean_abs_error": 673.3833240442347, "mean_abs_error_last_10": 336.09555333206106, "mean_abs_error_last_25": 368.4692408919251, "mean_abs_error_last_50": 455.5450136518608, "mean_pred_prob": 0.05349926158087328, "mean_pred_prob_last_10": 0.2507924054225441, "mean_pred_prob_last_25": 0.14000222433242016, "mean_pred_prob_last_50": 0.08906531533575616, "mean_token_accuracy": 0.8597058951854706, "step": 36460 }, { "epoch": 0.6483209784367056, "grad_norm": 2.1489751149622216, "learning_rate": 0.0001, "loss": 0.8015, "mean_abs_error": 855.0588533667303, "mean_abs_error_last_10": 448.08792032540805, "mean_abs_error_last_25": 579.9911024019667, "mean_abs_error_last_50": 633.3875780961007, "mean_pred_prob": 0.02571166854177136, "mean_pred_prob_last_10": 0.13256739476928486, "mean_pred_prob_last_25": 0.06832619992783293, "mean_pred_prob_last_50": 0.042036214628024024, "mean_token_accuracy": 0.8688344061374664, "step": 36470 }, { "epoch": 0.6484987467335075, "grad_norm": 1.1535349374225372, "learning_rate": 0.0001, "loss": 0.852, "mean_abs_error": 988.854697398567, "mean_abs_error_last_10": 303.5431344374869, "mean_abs_error_last_25": 388.0264686215232, "mean_abs_error_last_50": 615.9009715527995, "mean_pred_prob": 0.021656720413011497, "mean_pred_prob_last_10": 0.10923160248203204, "mean_pred_prob_last_25": 0.059211273706750944, "mean_pred_prob_last_50": 0.03532447553589009, "mean_token_accuracy": 0.870013165473938, "step": 36480 }, { "epoch": 0.6486765150303095, "grad_norm": 2.4628660010520838, "learning_rate": 0.0001, "loss": 0.8267, "mean_abs_error": 776.6674431327522, "mean_abs_error_last_10": 92.94980390386581, "mean_abs_error_last_25": 212.14801327050117, "mean_abs_error_last_50": 412.2116683797259, "mean_pred_prob": 0.03094499888829887, "mean_pred_prob_last_10": 0.1524682639632374, "mean_pred_prob_last_25": 0.08349668148439378, "mean_pred_prob_last_50": 0.05089697928633541, "mean_token_accuracy": 0.8681622207164764, "step": 36490 }, { "epoch": 0.6488542833271115, "grad_norm": 3.9859005253108113, "learning_rate": 0.0001, "loss": 0.7483, "mean_abs_error": 706.8492106118929, "mean_abs_error_last_10": 374.5233394822453, "mean_abs_error_last_25": 413.5068492039357, "mean_abs_error_last_50": 490.7504936182033, "mean_pred_prob": 0.0563460338031291, "mean_pred_prob_last_10": 0.27022978878812864, "mean_pred_prob_last_25": 0.15319110447890125, "mean_pred_prob_last_50": 0.09411949160275981, "mean_token_accuracy": 0.8807907223701477, "step": 36500 }, { "epoch": 0.6490320516239134, "grad_norm": 1.2924675726908177, "learning_rate": 0.0001, "loss": 0.8065, "mean_abs_error": 659.9206198147988, "mean_abs_error_last_10": 201.06467193124834, "mean_abs_error_last_25": 409.6647487567122, "mean_abs_error_last_50": 580.5800512944077, "mean_pred_prob": 0.03067248237784952, "mean_pred_prob_last_10": 0.16728122383356095, "mean_pred_prob_last_25": 0.08263080776669085, "mean_pred_prob_last_50": 0.049427756760269406, "mean_token_accuracy": 0.867545735836029, "step": 36510 }, { "epoch": 0.6492098199207154, "grad_norm": 1.9712396749681071, "learning_rate": 0.0001, "loss": 0.716, "mean_abs_error": 289.8560570666594, "mean_abs_error_last_10": 43.82774853213122, "mean_abs_error_last_25": 77.91192372421403, "mean_abs_error_last_50": 141.10248808288742, "mean_pred_prob": 0.061509011592715976, "mean_pred_prob_last_10": 0.2884388629347086, "mean_pred_prob_last_25": 0.1732624693773687, "mean_pred_prob_last_50": 0.105329475319013, "mean_token_accuracy": 0.8725278437137604, "step": 36520 }, { "epoch": 0.6493875882175173, "grad_norm": 1.460627026901327, "learning_rate": 0.0001, "loss": 0.7555, "mean_abs_error": 703.8073581485265, "mean_abs_error_last_10": 317.5913309230974, "mean_abs_error_last_25": 271.52293499933245, "mean_abs_error_last_50": 316.6948392783619, "mean_pred_prob": 0.04296143532847054, "mean_pred_prob_last_10": 0.18216852432815359, "mean_pred_prob_last_25": 0.10844210315262899, "mean_pred_prob_last_50": 0.06971172433113679, "mean_token_accuracy": 0.8694705724716186, "step": 36530 }, { "epoch": 0.6495653565143192, "grad_norm": 1.557311699138893, "learning_rate": 0.0001, "loss": 0.6884, "mean_abs_error": 799.8948698238619, "mean_abs_error_last_10": 397.47981420132294, "mean_abs_error_last_25": 421.47416744945565, "mean_abs_error_last_50": 539.9033791927968, "mean_pred_prob": 0.040953678981168196, "mean_pred_prob_last_10": 0.1673021059948951, "mean_pred_prob_last_25": 0.10036716239410452, "mean_pred_prob_last_50": 0.06612960863276385, "mean_token_accuracy": 0.8748941361904145, "step": 36540 }, { "epoch": 0.6497431248111212, "grad_norm": 0.8675183416606679, "learning_rate": 0.0001, "loss": 0.7155, "mean_abs_error": 468.1377622683185, "mean_abs_error_last_10": 168.21608317008256, "mean_abs_error_last_25": 214.15079719314213, "mean_abs_error_last_50": 300.3557145531121, "mean_pred_prob": 0.02966773227090016, "mean_pred_prob_last_10": 0.13573487806133927, "mean_pred_prob_last_25": 0.07746189921163023, "mean_pred_prob_last_50": 0.048653823765926064, "mean_token_accuracy": 0.8699907064437866, "step": 36550 }, { "epoch": 0.6499208931079231, "grad_norm": 2.3455678024679725, "learning_rate": 0.0001, "loss": 0.765, "mean_abs_error": 244.0300162401261, "mean_abs_error_last_10": 73.37534339468807, "mean_abs_error_last_25": 93.5707739337331, "mean_abs_error_last_50": 143.53301912370358, "mean_pred_prob": 0.029933296795934438, "mean_pred_prob_last_10": 0.13304213471710682, "mean_pred_prob_last_25": 0.07388139050453901, "mean_pred_prob_last_50": 0.047719224635511634, "mean_token_accuracy": 0.863786107301712, "step": 36560 }, { "epoch": 0.650098661404725, "grad_norm": 2.147527918511043, "learning_rate": 0.0001, "loss": 0.7214, "mean_abs_error": 434.577125296752, "mean_abs_error_last_10": 159.09182396290055, "mean_abs_error_last_25": 161.45025767274453, "mean_abs_error_last_50": 220.67021184739951, "mean_pred_prob": 0.03977686404832639, "mean_pred_prob_last_10": 0.1826093890122138, "mean_pred_prob_last_25": 0.10704099081922322, "mean_pred_prob_last_50": 0.06650894643971697, "mean_token_accuracy": 0.8727007269859314, "step": 36570 }, { "epoch": 0.650276429701527, "grad_norm": 1.825610171062666, "learning_rate": 0.0001, "loss": 0.7821, "mean_abs_error": 303.5203733962642, "mean_abs_error_last_10": 67.35732197342803, "mean_abs_error_last_25": 91.13078172253162, "mean_abs_error_last_50": 267.71630068504373, "mean_pred_prob": 0.04852932840585709, "mean_pred_prob_last_10": 0.24244750887155533, "mean_pred_prob_last_25": 0.13470348538830876, "mean_pred_prob_last_50": 0.0816470795776695, "mean_token_accuracy": 0.8801044046878814, "step": 36580 }, { "epoch": 0.6504541979983289, "grad_norm": 1.0693796435396623, "learning_rate": 0.0001, "loss": 0.9097, "mean_abs_error": 680.6542593002438, "mean_abs_error_last_10": 163.04400467868567, "mean_abs_error_last_25": 315.6590788183586, "mean_abs_error_last_50": 544.986298820005, "mean_pred_prob": 0.03412756577017717, "mean_pred_prob_last_10": 0.16998429815284907, "mean_pred_prob_last_25": 0.09303692773682996, "mean_pred_prob_last_50": 0.057685548631707204, "mean_token_accuracy": 0.8648934602737427, "step": 36590 }, { "epoch": 0.650631966295131, "grad_norm": 1.2052783241700762, "learning_rate": 0.0001, "loss": 0.6363, "mean_abs_error": 190.54314376128653, "mean_abs_error_last_10": 62.40846102763258, "mean_abs_error_last_25": 76.8817329076906, "mean_abs_error_last_50": 111.81863666801753, "mean_pred_prob": 0.044212687248364094, "mean_pred_prob_last_10": 0.19585029557347297, "mean_pred_prob_last_25": 0.11352274157106876, "mean_pred_prob_last_50": 0.07280574953183531, "mean_token_accuracy": 0.8721599459648133, "step": 36600 }, { "epoch": 0.6508097345919329, "grad_norm": 1.4883862532552694, "learning_rate": 0.0001, "loss": 0.7684, "mean_abs_error": 501.70441044185355, "mean_abs_error_last_10": 172.71107642305495, "mean_abs_error_last_25": 250.1709147611476, "mean_abs_error_last_50": 342.7119326037049, "mean_pred_prob": 0.03894795776868705, "mean_pred_prob_last_10": 0.19235114012262783, "mean_pred_prob_last_25": 0.10895261585246771, "mean_pred_prob_last_50": 0.06618038776214234, "mean_token_accuracy": 0.8697355031967163, "step": 36610 }, { "epoch": 0.6509875028887349, "grad_norm": 1.7287064685942701, "learning_rate": 0.0001, "loss": 0.7171, "mean_abs_error": 437.7054129347328, "mean_abs_error_last_10": 107.92641704461089, "mean_abs_error_last_25": 249.34940961295456, "mean_abs_error_last_50": 364.36615478207494, "mean_pred_prob": 0.030697317514568567, "mean_pred_prob_last_10": 0.163005636818707, "mean_pred_prob_last_25": 0.08603664115071297, "mean_pred_prob_last_50": 0.05089824665337801, "mean_token_accuracy": 0.8771928608417511, "step": 36620 }, { "epoch": 0.6511652711855368, "grad_norm": 1.2018668666010848, "learning_rate": 0.0001, "loss": 0.7897, "mean_abs_error": 593.2460083495233, "mean_abs_error_last_10": 164.01115964679488, "mean_abs_error_last_25": 257.6672856130623, "mean_abs_error_last_50": 350.82275728133266, "mean_pred_prob": 0.032470053067663686, "mean_pred_prob_last_10": 0.1627260712441057, "mean_pred_prob_last_25": 0.08891634519095533, "mean_pred_prob_last_50": 0.054047068866202606, "mean_token_accuracy": 0.8591533839702606, "step": 36630 }, { "epoch": 0.6513430394823387, "grad_norm": 2.330562344964579, "learning_rate": 0.0001, "loss": 0.7178, "mean_abs_error": 284.65322912900524, "mean_abs_error_last_10": 16.40528228028938, "mean_abs_error_last_25": 65.89861677865701, "mean_abs_error_last_50": 142.97492195772563, "mean_pred_prob": 0.054342823941260575, "mean_pred_prob_last_10": 0.25832631438970566, "mean_pred_prob_last_25": 0.14722761511802673, "mean_pred_prob_last_50": 0.09033537078648805, "mean_token_accuracy": 0.8720816075801849, "step": 36640 }, { "epoch": 0.6515208077791407, "grad_norm": 2.1900618024009453, "learning_rate": 0.0001, "loss": 0.6476, "mean_abs_error": 309.17135852899446, "mean_abs_error_last_10": 110.80413283986024, "mean_abs_error_last_25": 175.2198926360718, "mean_abs_error_last_50": 239.26489679234942, "mean_pred_prob": 0.04394201203249395, "mean_pred_prob_last_10": 0.2118617007508874, "mean_pred_prob_last_25": 0.11708358163014054, "mean_pred_prob_last_50": 0.0722471121698618, "mean_token_accuracy": 0.8794442296028138, "step": 36650 }, { "epoch": 0.6516985760759426, "grad_norm": 0.9935469066081066, "learning_rate": 0.0001, "loss": 0.7405, "mean_abs_error": 266.26801609686714, "mean_abs_error_last_10": 30.382500247835186, "mean_abs_error_last_25": 99.61995031026153, "mean_abs_error_last_50": 140.91646969349816, "mean_pred_prob": 0.05329374796710908, "mean_pred_prob_last_10": 0.27590843215584754, "mean_pred_prob_last_25": 0.15142274107784032, "mean_pred_prob_last_50": 0.09156559016555547, "mean_token_accuracy": 0.8692294180393219, "step": 36660 }, { "epoch": 0.6518763443727446, "grad_norm": 1.0507035404583123, "learning_rate": 0.0001, "loss": 0.6521, "mean_abs_error": 382.34367875569575, "mean_abs_error_last_10": 92.61225050364155, "mean_abs_error_last_25": 120.83777259633307, "mean_abs_error_last_50": 213.16186963623204, "mean_pred_prob": 0.04810824852320365, "mean_pred_prob_last_10": 0.23364705094136298, "mean_pred_prob_last_25": 0.13145733664277942, "mean_pred_prob_last_50": 0.08042326214490458, "mean_token_accuracy": 0.8639256000518799, "step": 36670 }, { "epoch": 0.6520541126695465, "grad_norm": 2.1230259351355865, "learning_rate": 0.0001, "loss": 0.7213, "mean_abs_error": 149.03569320610887, "mean_abs_error_last_10": 44.9885060354811, "mean_abs_error_last_25": 72.84901788753317, "mean_abs_error_last_50": 101.44140452802367, "mean_pred_prob": 0.055028055096045136, "mean_pred_prob_last_10": 0.24662973508238792, "mean_pred_prob_last_25": 0.1496714860200882, "mean_pred_prob_last_50": 0.09272200940176845, "mean_token_accuracy": 0.8759910702705384, "step": 36680 }, { "epoch": 0.6522318809663484, "grad_norm": 3.3111763200012994, "learning_rate": 0.0001, "loss": 0.7767, "mean_abs_error": 910.5301928817993, "mean_abs_error_last_10": 506.27009674533593, "mean_abs_error_last_25": 581.8177414440959, "mean_abs_error_last_50": 717.2986951180438, "mean_pred_prob": 0.03538313724566251, "mean_pred_prob_last_10": 0.17059526314842516, "mean_pred_prob_last_25": 0.09436172778368927, "mean_pred_prob_last_50": 0.05901434845145559, "mean_token_accuracy": 0.8696721374988556, "step": 36690 }, { "epoch": 0.6524096492631504, "grad_norm": 1.7122622881093057, "learning_rate": 0.0001, "loss": 0.7006, "mean_abs_error": 224.1207964153205, "mean_abs_error_last_10": 63.15194412694812, "mean_abs_error_last_25": 96.8773857502608, "mean_abs_error_last_50": 157.51510701249302, "mean_pred_prob": 0.03295890772715211, "mean_pred_prob_last_10": 0.1733669202774763, "mean_pred_prob_last_25": 0.09395751431584358, "mean_pred_prob_last_50": 0.055833081062883136, "mean_token_accuracy": 0.8695118129253387, "step": 36700 }, { "epoch": 0.6525874175599523, "grad_norm": 1.637115680236687, "learning_rate": 0.0001, "loss": 0.9248, "mean_abs_error": 450.07907987691385, "mean_abs_error_last_10": 284.2312725207147, "mean_abs_error_last_25": 316.02123115130763, "mean_abs_error_last_50": 363.81332171491675, "mean_pred_prob": 0.04357145003741607, "mean_pred_prob_last_10": 0.18672653819667176, "mean_pred_prob_last_25": 0.11141099053202197, "mean_pred_prob_last_50": 0.07092731866287068, "mean_token_accuracy": 0.8634195506572724, "step": 36710 }, { "epoch": 0.6527651858567544, "grad_norm": 1.1145745138860141, "learning_rate": 0.0001, "loss": 0.6663, "mean_abs_error": 390.0312759955293, "mean_abs_error_last_10": 224.13590588700936, "mean_abs_error_last_25": 221.81880284931486, "mean_abs_error_last_50": 245.1246281066487, "mean_pred_prob": 0.04218191972468048, "mean_pred_prob_last_10": 0.19548851577565074, "mean_pred_prob_last_25": 0.10474944696761668, "mean_pred_prob_last_50": 0.06711393874138594, "mean_token_accuracy": 0.8678257763385773, "step": 36720 }, { "epoch": 0.6529429541535563, "grad_norm": 1.37320731626808, "learning_rate": 0.0001, "loss": 0.6384, "mean_abs_error": 427.01211992889165, "mean_abs_error_last_10": 133.63789477011247, "mean_abs_error_last_25": 123.83212671353799, "mean_abs_error_last_50": 244.85134062242395, "mean_pred_prob": 0.040528069948777556, "mean_pred_prob_last_10": 0.19974232758395374, "mean_pred_prob_last_25": 0.11246473058126867, "mean_pred_prob_last_50": 0.06853773186448961, "mean_token_accuracy": 0.8740293681621552, "step": 36730 }, { "epoch": 0.6531207224503582, "grad_norm": 1.4708484608024461, "learning_rate": 0.0001, "loss": 0.6677, "mean_abs_error": 576.8105097279027, "mean_abs_error_last_10": 139.75701851995655, "mean_abs_error_last_25": 229.5585734400483, "mean_abs_error_last_50": 355.39641129320046, "mean_pred_prob": 0.03508743109414354, "mean_pred_prob_last_10": 0.16543517210520803, "mean_pred_prob_last_25": 0.09600778365274891, "mean_pred_prob_last_50": 0.059427971980767325, "mean_token_accuracy": 0.8731212973594665, "step": 36740 }, { "epoch": 0.6532984907471602, "grad_norm": 1.982679883791753, "learning_rate": 0.0001, "loss": 0.719, "mean_abs_error": 406.85044547353635, "mean_abs_error_last_10": 121.11280612390729, "mean_abs_error_last_25": 201.5524275133783, "mean_abs_error_last_50": 295.0043770961057, "mean_pred_prob": 0.05467783072963357, "mean_pred_prob_last_10": 0.2626190189272165, "mean_pred_prob_last_25": 0.14812980871647596, "mean_pred_prob_last_50": 0.09107925230637193, "mean_token_accuracy": 0.8745913028717041, "step": 36750 }, { "epoch": 0.6534762590439621, "grad_norm": 2.2726539104342547, "learning_rate": 0.0001, "loss": 0.7498, "mean_abs_error": 588.6539646279266, "mean_abs_error_last_10": 133.2919340302126, "mean_abs_error_last_25": 208.68441723320598, "mean_abs_error_last_50": 310.80297969506773, "mean_pred_prob": 0.051358379854355006, "mean_pred_prob_last_10": 0.22491944634821265, "mean_pred_prob_last_25": 0.13311761040240527, "mean_pred_prob_last_50": 0.08471960290335119, "mean_token_accuracy": 0.8617757976055145, "step": 36760 }, { "epoch": 0.653654027340764, "grad_norm": 2.4182941234854765, "learning_rate": 0.0001, "loss": 0.6872, "mean_abs_error": 853.4879313215739, "mean_abs_error_last_10": 231.11834843095798, "mean_abs_error_last_25": 289.2727673440668, "mean_abs_error_last_50": 464.40223468952837, "mean_pred_prob": 0.016678049392066897, "mean_pred_prob_last_10": 0.08765883372398094, "mean_pred_prob_last_25": 0.047251539095304905, "mean_pred_prob_last_50": 0.028084380878135563, "mean_token_accuracy": 0.8715857744216919, "step": 36770 }, { "epoch": 0.653831795637566, "grad_norm": 1.6737891512929213, "learning_rate": 0.0001, "loss": 0.6009, "mean_abs_error": 115.59474775183688, "mean_abs_error_last_10": 23.990693711113035, "mean_abs_error_last_25": 33.32432147730405, "mean_abs_error_last_50": 73.17989012943384, "mean_pred_prob": 0.06571432296186686, "mean_pred_prob_last_10": 0.30777775272727015, "mean_pred_prob_last_25": 0.17895762696862222, "mean_pred_prob_last_50": 0.11091583743691444, "mean_token_accuracy": 0.8784945070743561, "step": 36780 }, { "epoch": 0.6540095639343679, "grad_norm": 1.935241372042583, "learning_rate": 0.0001, "loss": 0.6306, "mean_abs_error": 146.57566786012373, "mean_abs_error_last_10": 31.341595173308708, "mean_abs_error_last_25": 45.048938290902875, "mean_abs_error_last_50": 74.8123497510163, "mean_pred_prob": 0.06009276672266424, "mean_pred_prob_last_10": 0.2525846131145954, "mean_pred_prob_last_25": 0.1544462200254202, "mean_pred_prob_last_50": 0.09950502440333367, "mean_token_accuracy": 0.8799548864364624, "step": 36790 }, { "epoch": 0.6541873322311699, "grad_norm": 1.1270771229618022, "learning_rate": 0.0001, "loss": 0.7454, "mean_abs_error": 1295.4425969172714, "mean_abs_error_last_10": 660.4265853461224, "mean_abs_error_last_25": 774.4883586195298, "mean_abs_error_last_50": 997.9882977439034, "mean_pred_prob": 0.026960463635623454, "mean_pred_prob_last_10": 0.1390815639460925, "mean_pred_prob_last_25": 0.0699134317052085, "mean_pred_prob_last_50": 0.04392450242303312, "mean_token_accuracy": 0.8782869279384613, "step": 36800 }, { "epoch": 0.6543651005279718, "grad_norm": 3.8461421069229194, "learning_rate": 0.0001, "loss": 0.7592, "mean_abs_error": 1634.4220415341572, "mean_abs_error_last_10": 950.9390078392802, "mean_abs_error_last_25": 1036.2570838139345, "mean_abs_error_last_50": 1234.8447879721439, "mean_pred_prob": 0.0077694438441540115, "mean_pred_prob_last_10": 0.04598190439573955, "mean_pred_prob_last_25": 0.02225390886014793, "mean_pred_prob_last_50": 0.01305859053536551, "mean_token_accuracy": 0.8679754793643951, "step": 36810 }, { "epoch": 0.6545428688247737, "grad_norm": 1.6572615954035532, "learning_rate": 0.0001, "loss": 0.6988, "mean_abs_error": 528.3912649054696, "mean_abs_error_last_10": 225.19818557735329, "mean_abs_error_last_25": 248.3647653967089, "mean_abs_error_last_50": 326.5083413327342, "mean_pred_prob": 0.05377321847481653, "mean_pred_prob_last_10": 0.22829507945571095, "mean_pred_prob_last_25": 0.13597812168300152, "mean_pred_prob_last_50": 0.08700502874562517, "mean_token_accuracy": 0.8815144896507263, "step": 36820 }, { "epoch": 0.6547206371215757, "grad_norm": 1.1999143007019513, "learning_rate": 0.0001, "loss": 0.761, "mean_abs_error": 574.9455819442634, "mean_abs_error_last_10": 136.1674863296503, "mean_abs_error_last_25": 203.206216163029, "mean_abs_error_last_50": 304.3395312989177, "mean_pred_prob": 0.019022965442854912, "mean_pred_prob_last_10": 0.10355038533452898, "mean_pred_prob_last_25": 0.054205630510114136, "mean_pred_prob_last_50": 0.03199179583461955, "mean_token_accuracy": 0.8726990699768067, "step": 36830 }, { "epoch": 0.6548984054183777, "grad_norm": 1.3924587439613063, "learning_rate": 0.0001, "loss": 0.6728, "mean_abs_error": 185.829226086504, "mean_abs_error_last_10": 56.96249356234141, "mean_abs_error_last_25": 73.14097268237599, "mean_abs_error_last_50": 125.87471263257135, "mean_pred_prob": 0.05103687578812242, "mean_pred_prob_last_10": 0.2428494941443205, "mean_pred_prob_last_25": 0.14008610285818576, "mean_pred_prob_last_50": 0.08515356406569481, "mean_token_accuracy": 0.8736719965934754, "step": 36840 }, { "epoch": 0.6550761737151797, "grad_norm": 2.093243905904514, "learning_rate": 0.0001, "loss": 0.8249, "mean_abs_error": 1101.4283921192095, "mean_abs_error_last_10": 424.49174169465215, "mean_abs_error_last_25": 693.6566230737338, "mean_abs_error_last_50": 867.7655431776409, "mean_pred_prob": 0.04035105916846078, "mean_pred_prob_last_10": 0.1986921060422901, "mean_pred_prob_last_25": 0.111547870375216, "mean_pred_prob_last_50": 0.06840508622699418, "mean_token_accuracy": 0.8742287874221801, "step": 36850 }, { "epoch": 0.6552539420119816, "grad_norm": 2.4258298530924773, "learning_rate": 0.0001, "loss": 0.8929, "mean_abs_error": 492.992906892997, "mean_abs_error_last_10": 116.36950057077595, "mean_abs_error_last_25": 143.63774343458763, "mean_abs_error_last_50": 226.2313414658734, "mean_pred_prob": 0.0394090945716016, "mean_pred_prob_last_10": 0.19644370432943106, "mean_pred_prob_last_25": 0.1073876976268366, "mean_pred_prob_last_50": 0.06649563828250393, "mean_token_accuracy": 0.8739484965801239, "step": 36860 }, { "epoch": 0.6554317103087836, "grad_norm": 1.4432945403085473, "learning_rate": 0.0001, "loss": 0.7963, "mean_abs_error": 766.5116214357581, "mean_abs_error_last_10": 332.20840337367935, "mean_abs_error_last_25": 429.49662804201563, "mean_abs_error_last_50": 516.1230984754368, "mean_pred_prob": 0.03914527154993266, "mean_pred_prob_last_10": 0.178311840002425, "mean_pred_prob_last_25": 0.10894995605340227, "mean_pred_prob_last_50": 0.06577974561369046, "mean_token_accuracy": 0.8714020788669586, "step": 36870 }, { "epoch": 0.6556094786055855, "grad_norm": 2.5249577407600126, "learning_rate": 0.0001, "loss": 0.748, "mean_abs_error": 1001.3387668847554, "mean_abs_error_last_10": 282.5378023709505, "mean_abs_error_last_25": 424.53055165324486, "mean_abs_error_last_50": 612.7490820304124, "mean_pred_prob": 0.017753252357942985, "mean_pred_prob_last_10": 0.1004936195909977, "mean_pred_prob_last_25": 0.050010634103091436, "mean_pred_prob_last_50": 0.02967347952653654, "mean_token_accuracy": 0.8743775367736817, "step": 36880 }, { "epoch": 0.6557872469023874, "grad_norm": 2.369239031821199, "learning_rate": 0.0001, "loss": 0.6455, "mean_abs_error": 335.7671988239496, "mean_abs_error_last_10": 59.03686518532928, "mean_abs_error_last_25": 79.75812008813642, "mean_abs_error_last_50": 167.67603152617804, "mean_pred_prob": 0.05187575602903962, "mean_pred_prob_last_10": 0.23059796798042953, "mean_pred_prob_last_25": 0.1349364335415885, "mean_pred_prob_last_50": 0.08547899613622575, "mean_token_accuracy": 0.8791053712368011, "step": 36890 }, { "epoch": 0.6559650151991894, "grad_norm": 2.377263593663581, "learning_rate": 0.0001, "loss": 0.764, "mean_abs_error": 1204.3822331802453, "mean_abs_error_last_10": 753.312011788774, "mean_abs_error_last_25": 756.9426895983095, "mean_abs_error_last_50": 918.2852844121305, "mean_pred_prob": 0.029878570561413652, "mean_pred_prob_last_10": 0.14076229650818278, "mean_pred_prob_last_25": 0.07897227348730666, "mean_pred_prob_last_50": 0.04906351213430753, "mean_token_accuracy": 0.8683013081550598, "step": 36900 }, { "epoch": 0.6561427834959913, "grad_norm": 2.2040536653763834, "learning_rate": 0.0001, "loss": 0.667, "mean_abs_error": 121.13577070709042, "mean_abs_error_last_10": 34.99735614315193, "mean_abs_error_last_25": 66.77217067896245, "mean_abs_error_last_50": 71.48614914306644, "mean_pred_prob": 0.047751443926244974, "mean_pred_prob_last_10": 0.238689836114645, "mean_pred_prob_last_25": 0.12975117564201355, "mean_pred_prob_last_50": 0.07879446279257536, "mean_token_accuracy": 0.8814583122730255, "step": 36910 }, { "epoch": 0.6563205517927932, "grad_norm": 3.083813388403955, "learning_rate": 0.0001, "loss": 0.7889, "mean_abs_error": 438.26665225375183, "mean_abs_error_last_10": 111.48004540211727, "mean_abs_error_last_25": 131.9004738767189, "mean_abs_error_last_50": 224.09399099285596, "mean_pred_prob": 0.028417956456542014, "mean_pred_prob_last_10": 0.14773249458521603, "mean_pred_prob_last_25": 0.08268629172816873, "mean_pred_prob_last_50": 0.049347995594143865, "mean_token_accuracy": 0.8749410688877106, "step": 36920 }, { "epoch": 0.6564983200895952, "grad_norm": 1.8982548917490796, "learning_rate": 0.0001, "loss": 0.7686, "mean_abs_error": 669.1523580513876, "mean_abs_error_last_10": 444.454171737902, "mean_abs_error_last_25": 499.52819802591273, "mean_abs_error_last_50": 526.7435099847967, "mean_pred_prob": 0.04737057999882381, "mean_pred_prob_last_10": 0.24491053533274681, "mean_pred_prob_last_25": 0.1367457235173788, "mean_pred_prob_last_50": 0.08052410377422348, "mean_token_accuracy": 0.863885885477066, "step": 36930 }, { "epoch": 0.6566760883863971, "grad_norm": 1.0130846957037607, "learning_rate": 0.0001, "loss": 0.7965, "mean_abs_error": 207.32539736277894, "mean_abs_error_last_10": 62.813843494157084, "mean_abs_error_last_25": 75.07873573716896, "mean_abs_error_last_50": 105.58706499279154, "mean_pred_prob": 0.040361945913173256, "mean_pred_prob_last_10": 0.1912361767143011, "mean_pred_prob_last_25": 0.10587261244654655, "mean_pred_prob_last_50": 0.066444688802585, "mean_token_accuracy": 0.8634982883930207, "step": 36940 }, { "epoch": 0.6568538566831991, "grad_norm": 1.6763097741739315, "learning_rate": 0.0001, "loss": 0.7078, "mean_abs_error": 264.4092459235511, "mean_abs_error_last_10": 183.01325268034515, "mean_abs_error_last_25": 207.89339071792665, "mean_abs_error_last_50": 188.54681743795527, "mean_pred_prob": 0.05167419468052685, "mean_pred_prob_last_10": 0.25645765475928783, "mean_pred_prob_last_25": 0.1402600333094597, "mean_pred_prob_last_50": 0.08512698356062173, "mean_token_accuracy": 0.8657010555267334, "step": 36950 }, { "epoch": 0.6570316249800011, "grad_norm": 1.2036023237408486, "learning_rate": 0.0001, "loss": 0.6574, "mean_abs_error": 375.6616881546295, "mean_abs_error_last_10": 162.87621624618805, "mean_abs_error_last_25": 205.6492970443957, "mean_abs_error_last_50": 243.77425775244524, "mean_pred_prob": 0.03146964940242469, "mean_pred_prob_last_10": 0.1630903072655201, "mean_pred_prob_last_25": 0.08836745619773864, "mean_pred_prob_last_50": 0.05297892214730382, "mean_token_accuracy": 0.8773480176925659, "step": 36960 }, { "epoch": 0.6572093932768031, "grad_norm": 1.6760271783640326, "learning_rate": 0.0001, "loss": 0.7081, "mean_abs_error": 885.8832288181144, "mean_abs_error_last_10": 330.7016436585841, "mean_abs_error_last_25": 481.7258674340768, "mean_abs_error_last_50": 618.0787020196633, "mean_pred_prob": 0.04505621231801342, "mean_pred_prob_last_10": 0.20961065724841318, "mean_pred_prob_last_25": 0.1221637058944907, "mean_pred_prob_last_50": 0.07716333093121648, "mean_token_accuracy": 0.8760646641254425, "step": 36970 }, { "epoch": 0.657387161573605, "grad_norm": 2.1564864736312743, "learning_rate": 0.0001, "loss": 0.7182, "mean_abs_error": 134.51759910593057, "mean_abs_error_last_10": 15.80094408374273, "mean_abs_error_last_25": 37.68884396861696, "mean_abs_error_last_50": 70.60080670959931, "mean_pred_prob": 0.046492919139564035, "mean_pred_prob_last_10": 0.2480781428515911, "mean_pred_prob_last_25": 0.1351007752120495, "mean_pred_prob_last_50": 0.07976274136453868, "mean_token_accuracy": 0.8812920093536377, "step": 36980 }, { "epoch": 0.6575649298704069, "grad_norm": 1.3490966341618447, "learning_rate": 0.0001, "loss": 0.7084, "mean_abs_error": 301.18997185803454, "mean_abs_error_last_10": 114.67314278117655, "mean_abs_error_last_25": 157.6927767255364, "mean_abs_error_last_50": 193.89016656292102, "mean_pred_prob": 0.04374498159158975, "mean_pred_prob_last_10": 0.1976889155805111, "mean_pred_prob_last_25": 0.11654529524967075, "mean_pred_prob_last_50": 0.07233653352595866, "mean_token_accuracy": 0.8781744241714478, "step": 36990 }, { "epoch": 0.6577426981672089, "grad_norm": 1.2652460920557285, "learning_rate": 0.0001, "loss": 0.7049, "mean_abs_error": 582.3129285046941, "mean_abs_error_last_10": 159.54140193559223, "mean_abs_error_last_25": 233.40764074130007, "mean_abs_error_last_50": 291.52104217484805, "mean_pred_prob": 0.031522186455549675, "mean_pred_prob_last_10": 0.16472559932153671, "mean_pred_prob_last_25": 0.08786931433714926, "mean_pred_prob_last_50": 0.05347590188030153, "mean_token_accuracy": 0.8715788960456848, "step": 37000 }, { "epoch": 0.6579204664640108, "grad_norm": 1.2419865495074742, "learning_rate": 0.0001, "loss": 0.6624, "mean_abs_error": 469.8971186121727, "mean_abs_error_last_10": 161.13165819333145, "mean_abs_error_last_25": 148.62330593008477, "mean_abs_error_last_50": 274.523643929319, "mean_pred_prob": 0.04273197753354907, "mean_pred_prob_last_10": 0.20765392631292343, "mean_pred_prob_last_25": 0.11797702535986901, "mean_pred_prob_last_50": 0.0722863492090255, "mean_token_accuracy": 0.8761215806007385, "step": 37010 }, { "epoch": 0.6580982347608128, "grad_norm": 1.7150634755253487, "learning_rate": 0.0001, "loss": 0.8489, "mean_abs_error": 786.922031659562, "mean_abs_error_last_10": 179.01368153857004, "mean_abs_error_last_25": 204.88585372606357, "mean_abs_error_last_50": 315.9517183271234, "mean_pred_prob": 0.01774435068946332, "mean_pred_prob_last_10": 0.09676946916151792, "mean_pred_prob_last_25": 0.049578821496106686, "mean_pred_prob_last_50": 0.029822074226103723, "mean_token_accuracy": 0.8658180952072143, "step": 37020 }, { "epoch": 0.6582760030576147, "grad_norm": 1.4119395395396184, "learning_rate": 0.0001, "loss": 0.643, "mean_abs_error": 61.03047151949973, "mean_abs_error_last_10": 12.73623279811764, "mean_abs_error_last_25": 18.127946457864308, "mean_abs_error_last_50": 30.542818981730164, "mean_pred_prob": 0.07398852948099374, "mean_pred_prob_last_10": 0.32743547707796095, "mean_pred_prob_last_25": 0.19701104015111923, "mean_pred_prob_last_50": 0.12403510585427284, "mean_token_accuracy": 0.8708034813404083, "step": 37030 }, { "epoch": 0.6584537713544166, "grad_norm": 1.3332294626532868, "learning_rate": 0.0001, "loss": 0.7651, "mean_abs_error": 267.6289619434058, "mean_abs_error_last_10": 53.91408050467936, "mean_abs_error_last_25": 71.35643084137274, "mean_abs_error_last_50": 145.09981868994953, "mean_pred_prob": 0.03184057157486677, "mean_pred_prob_last_10": 0.16692682653665541, "mean_pred_prob_last_25": 0.0937701852992177, "mean_pred_prob_last_50": 0.054969507921487096, "mean_token_accuracy": 0.8743985950946808, "step": 37040 }, { "epoch": 0.6586315396512186, "grad_norm": 1.580998055419163, "learning_rate": 0.0001, "loss": 0.6176, "mean_abs_error": 630.1666273465482, "mean_abs_error_last_10": 137.08821667019842, "mean_abs_error_last_25": 220.54592220045257, "mean_abs_error_last_50": 368.72036120200175, "mean_pred_prob": 0.038747288734884935, "mean_pred_prob_last_10": 0.1835638411459513, "mean_pred_prob_last_25": 0.10364195967558772, "mean_pred_prob_last_50": 0.06347217119764537, "mean_token_accuracy": 0.8671296596527099, "step": 37050 }, { "epoch": 0.6588093079480205, "grad_norm": 2.071485857976649, "learning_rate": 0.0001, "loss": 0.6437, "mean_abs_error": 631.727658526147, "mean_abs_error_last_10": 405.04765084266353, "mean_abs_error_last_25": 479.4464323331749, "mean_abs_error_last_50": 511.26444077624035, "mean_pred_prob": 0.025853686255868525, "mean_pred_prob_last_10": 0.11237068416085094, "mean_pred_prob_last_25": 0.068531499709934, "mean_pred_prob_last_50": 0.04262127742404118, "mean_token_accuracy": 0.8628359854221344, "step": 37060 }, { "epoch": 0.6589870762448224, "grad_norm": 1.802950473984781, "learning_rate": 0.0001, "loss": 0.7478, "mean_abs_error": 758.7410141518507, "mean_abs_error_last_10": 327.6627542449131, "mean_abs_error_last_25": 411.5858197716374, "mean_abs_error_last_50": 516.5399940839006, "mean_pred_prob": 0.029313090539653787, "mean_pred_prob_last_10": 0.13096863739192485, "mean_pred_prob_last_25": 0.07492044869577512, "mean_pred_prob_last_50": 0.04750519680674188, "mean_token_accuracy": 0.8747846961021424, "step": 37070 }, { "epoch": 0.6591648445416245, "grad_norm": 1.1996695564784339, "learning_rate": 0.0001, "loss": 0.6807, "mean_abs_error": 138.47784216461946, "mean_abs_error_last_10": 54.6145147664105, "mean_abs_error_last_25": 74.75169807957417, "mean_abs_error_last_50": 85.02100996267691, "mean_pred_prob": 0.051483623776584865, "mean_pred_prob_last_10": 0.22440365813672541, "mean_pred_prob_last_25": 0.134875401109457, "mean_pred_prob_last_50": 0.08503092369064688, "mean_token_accuracy": 0.8678557395935058, "step": 37080 }, { "epoch": 0.6593426128384264, "grad_norm": 3.895947415374438, "learning_rate": 0.0001, "loss": 0.8141, "mean_abs_error": 423.13627489773864, "mean_abs_error_last_10": 141.91885678321836, "mean_abs_error_last_25": 172.41683064487367, "mean_abs_error_last_50": 286.82982584943005, "mean_pred_prob": 0.04087118171155453, "mean_pred_prob_last_10": 0.16623153388500214, "mean_pred_prob_last_25": 0.10157799068838358, "mean_pred_prob_last_50": 0.06546355271711946, "mean_token_accuracy": 0.8703265845775604, "step": 37090 }, { "epoch": 0.6595203811352284, "grad_norm": 0.9833505039136168, "learning_rate": 0.0001, "loss": 0.8754, "mean_abs_error": 1563.4483756315853, "mean_abs_error_last_10": 909.0395951239152, "mean_abs_error_last_25": 1079.1124135276045, "mean_abs_error_last_50": 1240.1875530810867, "mean_pred_prob": 0.025713483186700616, "mean_pred_prob_last_10": 0.12188426275388338, "mean_pred_prob_last_25": 0.06773896863160189, "mean_pred_prob_last_50": 0.04184427128711832, "mean_token_accuracy": 0.8663501143455505, "step": 37100 }, { "epoch": 0.6596981494320303, "grad_norm": 1.3237420340422095, "learning_rate": 0.0001, "loss": 0.6708, "mean_abs_error": 842.1325676148105, "mean_abs_error_last_10": 542.6806623712498, "mean_abs_error_last_25": 557.7575714379612, "mean_abs_error_last_50": 639.5986841202621, "mean_pred_prob": 0.0417891790275462, "mean_pred_prob_last_10": 0.20464957420772406, "mean_pred_prob_last_25": 0.11449316404468846, "mean_pred_prob_last_50": 0.06878164108202327, "mean_token_accuracy": 0.8800787508487702, "step": 37110 }, { "epoch": 0.6598759177288323, "grad_norm": 1.466158875086784, "learning_rate": 0.0001, "loss": 0.8125, "mean_abs_error": 600.9349719436491, "mean_abs_error_last_10": 142.33996032640096, "mean_abs_error_last_25": 215.99328336707111, "mean_abs_error_last_50": 353.2703162150823, "mean_pred_prob": 0.02954558436758816, "mean_pred_prob_last_10": 0.16621318012475966, "mean_pred_prob_last_25": 0.08579814368858933, "mean_pred_prob_last_50": 0.05034224479459226, "mean_token_accuracy": 0.8629474937915802, "step": 37120 }, { "epoch": 0.6600536860256342, "grad_norm": 1.3921532075485556, "learning_rate": 0.0001, "loss": 0.7658, "mean_abs_error": 1035.2558678212108, "mean_abs_error_last_10": 453.6911032709725, "mean_abs_error_last_25": 524.0512545422055, "mean_abs_error_last_50": 718.6812996471931, "mean_pred_prob": 0.015916122874477877, "mean_pred_prob_last_10": 0.08204292326699943, "mean_pred_prob_last_25": 0.04378267777792644, "mean_pred_prob_last_50": 0.026364831789396703, "mean_token_accuracy": 0.8640940904617309, "step": 37130 }, { "epoch": 0.6602314543224361, "grad_norm": 1.4496294393855018, "learning_rate": 0.0001, "loss": 0.8542, "mean_abs_error": 225.97611168978028, "mean_abs_error_last_10": 88.61807487856348, "mean_abs_error_last_25": 132.81443107618963, "mean_abs_error_last_50": 183.38322800468478, "mean_pred_prob": 0.03789212023839354, "mean_pred_prob_last_10": 0.18129599541425706, "mean_pred_prob_last_25": 0.10263315420597792, "mean_pred_prob_last_50": 0.06418141070753336, "mean_token_accuracy": 0.8733617126941681, "step": 37140 }, { "epoch": 0.6604092226192381, "grad_norm": 1.188280611655862, "learning_rate": 0.0001, "loss": 0.7339, "mean_abs_error": 283.9251806708154, "mean_abs_error_last_10": 77.12106572637381, "mean_abs_error_last_25": 108.2144331020581, "mean_abs_error_last_50": 172.93681997060133, "mean_pred_prob": 0.029547882941551508, "mean_pred_prob_last_10": 0.14112098030745984, "mean_pred_prob_last_25": 0.0784948755055666, "mean_pred_prob_last_50": 0.04841646165587008, "mean_token_accuracy": 0.8855625212192535, "step": 37150 }, { "epoch": 0.66058699091604, "grad_norm": 2.2227051271575182, "learning_rate": 0.0001, "loss": 0.6891, "mean_abs_error": 524.3293841319193, "mean_abs_error_last_10": 79.62115860529545, "mean_abs_error_last_25": 140.8225017573076, "mean_abs_error_last_50": 254.54262631532143, "mean_pred_prob": 0.04681856569368392, "mean_pred_prob_last_10": 0.21963260658085346, "mean_pred_prob_last_25": 0.12367172185331583, "mean_pred_prob_last_50": 0.07614391418173909, "mean_token_accuracy": 0.8840703547000885, "step": 37160 }, { "epoch": 0.660764759212842, "grad_norm": 1.5821859824243572, "learning_rate": 0.0001, "loss": 0.7103, "mean_abs_error": 223.92025837049204, "mean_abs_error_last_10": 81.60971416370009, "mean_abs_error_last_25": 130.74921217584227, "mean_abs_error_last_50": 180.79925850329317, "mean_pred_prob": 0.04465887760743499, "mean_pred_prob_last_10": 0.21290924344211817, "mean_pred_prob_last_25": 0.12113962881267071, "mean_pred_prob_last_50": 0.0757860649842769, "mean_token_accuracy": 0.8738082528114319, "step": 37170 }, { "epoch": 0.6609425275096439, "grad_norm": 1.2171216586030196, "learning_rate": 0.0001, "loss": 0.6897, "mean_abs_error": 608.7784716606976, "mean_abs_error_last_10": 239.8362793743526, "mean_abs_error_last_25": 251.5349249217507, "mean_abs_error_last_50": 398.0921289336269, "mean_pred_prob": 0.037529859773349016, "mean_pred_prob_last_10": 0.19746340671554208, "mean_pred_prob_last_25": 0.10533739672973752, "mean_pred_prob_last_50": 0.06312693657819182, "mean_token_accuracy": 0.8800309121608734, "step": 37180 }, { "epoch": 0.6611202958064458, "grad_norm": 1.1464614669824635, "learning_rate": 0.0001, "loss": 0.7938, "mean_abs_error": 657.0017935239348, "mean_abs_error_last_10": 213.61723880150333, "mean_abs_error_last_25": 290.9042740828481, "mean_abs_error_last_50": 451.9476684975526, "mean_pred_prob": 0.029933056549634785, "mean_pred_prob_last_10": 0.14575382890179753, "mean_pred_prob_last_25": 0.08218642977299169, "mean_pred_prob_last_50": 0.050678530603181574, "mean_token_accuracy": 0.8748990893363953, "step": 37190 }, { "epoch": 0.6612980641032479, "grad_norm": 1.9982604984537755, "learning_rate": 0.0001, "loss": 0.6948, "mean_abs_error": 1292.0791158133613, "mean_abs_error_last_10": 529.1438138967876, "mean_abs_error_last_25": 732.4333422218967, "mean_abs_error_last_50": 897.562675604668, "mean_pred_prob": 0.03250402117264457, "mean_pred_prob_last_10": 0.15708899540186394, "mean_pred_prob_last_25": 0.08702438046748284, "mean_pred_prob_last_50": 0.05300583058851771, "mean_token_accuracy": 0.8694537818431854, "step": 37200 }, { "epoch": 0.6614758324000498, "grad_norm": 1.8001950536718496, "learning_rate": 0.0001, "loss": 0.6201, "mean_abs_error": 416.413031857567, "mean_abs_error_last_10": 384.4916991108765, "mean_abs_error_last_25": 457.2973040644132, "mean_abs_error_last_50": 443.00571352028675, "mean_pred_prob": 0.028165512392297386, "mean_pred_prob_last_10": 0.14631221771705896, "mean_pred_prob_last_25": 0.07661453743930906, "mean_pred_prob_last_50": 0.04690640368498862, "mean_token_accuracy": 0.8789168536663056, "step": 37210 }, { "epoch": 0.6616536006968518, "grad_norm": 1.7271862812860628, "learning_rate": 0.0001, "loss": 0.7855, "mean_abs_error": 440.62898881816744, "mean_abs_error_last_10": 159.5881327616418, "mean_abs_error_last_25": 180.53906446090937, "mean_abs_error_last_50": 280.11591620875083, "mean_pred_prob": 0.02942701685242355, "mean_pred_prob_last_10": 0.16361558195203543, "mean_pred_prob_last_25": 0.08860447015613318, "mean_pred_prob_last_50": 0.05256537478417158, "mean_token_accuracy": 0.8821821331977844, "step": 37220 }, { "epoch": 0.6618313689936537, "grad_norm": 2.0995418581341485, "learning_rate": 0.0001, "loss": 0.7829, "mean_abs_error": 408.4019772630185, "mean_abs_error_last_10": 144.59201441699918, "mean_abs_error_last_25": 193.51590102225765, "mean_abs_error_last_50": 251.1889315572517, "mean_pred_prob": 0.02377714409958571, "mean_pred_prob_last_10": 0.11917947866022587, "mean_pred_prob_last_25": 0.06494094114750623, "mean_pred_prob_last_50": 0.03974958057515323, "mean_token_accuracy": 0.8683815062046051, "step": 37230 }, { "epoch": 0.6620091372904556, "grad_norm": 0.9525686163351356, "learning_rate": 0.0001, "loss": 0.638, "mean_abs_error": 330.0337819637748, "mean_abs_error_last_10": 112.89208520747363, "mean_abs_error_last_25": 118.73025993075139, "mean_abs_error_last_50": 193.1386506300749, "mean_pred_prob": 0.04097494075540453, "mean_pred_prob_last_10": 0.190742764249444, "mean_pred_prob_last_25": 0.10672939103096724, "mean_pred_prob_last_50": 0.06771597638726234, "mean_token_accuracy": 0.8703950941562653, "step": 37240 }, { "epoch": 0.6621869055872576, "grad_norm": 1.0501815460252437, "learning_rate": 0.0001, "loss": 0.72, "mean_abs_error": 1278.9848165333456, "mean_abs_error_last_10": 686.1278778710115, "mean_abs_error_last_25": 849.401005550251, "mean_abs_error_last_50": 966.9379567141401, "mean_pred_prob": 0.025843862599867862, "mean_pred_prob_last_10": 0.14206609750690405, "mean_pred_prob_last_25": 0.07450055122317281, "mean_pred_prob_last_50": 0.04362588901713025, "mean_token_accuracy": 0.8672724008560181, "step": 37250 }, { "epoch": 0.6623646738840595, "grad_norm": 0.9124999066536859, "learning_rate": 0.0001, "loss": 0.7015, "mean_abs_error": 1138.4307005445605, "mean_abs_error_last_10": 712.7381685349844, "mean_abs_error_last_25": 750.0187163984051, "mean_abs_error_last_50": 884.768501775539, "mean_pred_prob": 0.03366543514712248, "mean_pred_prob_last_10": 0.15003611743741202, "mean_pred_prob_last_25": 0.08735566763789393, "mean_pred_prob_last_50": 0.05494270611088723, "mean_token_accuracy": 0.8738461792469024, "step": 37260 }, { "epoch": 0.6625424421808614, "grad_norm": 1.6967935085918409, "learning_rate": 0.0001, "loss": 0.6517, "mean_abs_error": 189.42722511226393, "mean_abs_error_last_10": 44.41178456487088, "mean_abs_error_last_25": 80.35044550567417, "mean_abs_error_last_50": 122.5849837399616, "mean_pred_prob": 0.050724197365343573, "mean_pred_prob_last_10": 0.24267432279884815, "mean_pred_prob_last_25": 0.13631703034043313, "mean_pred_prob_last_50": 0.08466148087754846, "mean_token_accuracy": 0.8772302746772767, "step": 37270 }, { "epoch": 0.6627202104776634, "grad_norm": 1.0958651504699255, "learning_rate": 0.0001, "loss": 0.8189, "mean_abs_error": 513.4344418015905, "mean_abs_error_last_10": 155.51433446070897, "mean_abs_error_last_25": 226.2293231479668, "mean_abs_error_last_50": 331.19466144354817, "mean_pred_prob": 0.027448818925768136, "mean_pred_prob_last_10": 0.13799259383231402, "mean_pred_prob_last_25": 0.0755088010802865, "mean_pred_prob_last_50": 0.04510075789876282, "mean_token_accuracy": 0.8660646736621856, "step": 37280 }, { "epoch": 0.6628979787744653, "grad_norm": 1.3629080164293224, "learning_rate": 0.0001, "loss": 0.6747, "mean_abs_error": 387.6698546057248, "mean_abs_error_last_10": 81.0639629085675, "mean_abs_error_last_25": 137.74226887191475, "mean_abs_error_last_50": 210.05630506743222, "mean_pred_prob": 0.04270676607266068, "mean_pred_prob_last_10": 0.19499538401141764, "mean_pred_prob_last_25": 0.11187261659651995, "mean_pred_prob_last_50": 0.07088865623809397, "mean_token_accuracy": 0.877072137594223, "step": 37290 }, { "epoch": 0.6630757470712673, "grad_norm": 0.9184147361302599, "learning_rate": 0.0001, "loss": 0.6012, "mean_abs_error": 61.15314421288879, "mean_abs_error_last_10": 25.570703842774098, "mean_abs_error_last_25": 35.17726069214458, "mean_abs_error_last_50": 43.05939586317651, "mean_pred_prob": 0.060436786897480485, "mean_pred_prob_last_10": 0.2732694700360298, "mean_pred_prob_last_25": 0.15509481653571128, "mean_pred_prob_last_50": 0.09793300423771142, "mean_token_accuracy": 0.8811679780483246, "step": 37300 }, { "epoch": 0.6632535153680693, "grad_norm": 1.8143927676030145, "learning_rate": 0.0001, "loss": 0.6794, "mean_abs_error": 397.4951345544553, "mean_abs_error_last_10": 159.8719961363947, "mean_abs_error_last_25": 217.62825013195896, "mean_abs_error_last_50": 199.70286721407655, "mean_pred_prob": 0.04570624019252136, "mean_pred_prob_last_10": 0.22837155952583998, "mean_pred_prob_last_25": 0.12465368593111634, "mean_pred_prob_last_50": 0.0775524914613925, "mean_token_accuracy": 0.8706903874874115, "step": 37310 }, { "epoch": 0.6634312836648713, "grad_norm": 1.1195222932932354, "learning_rate": 0.0001, "loss": 0.7355, "mean_abs_error": 496.468344437733, "mean_abs_error_last_10": 145.3115668839709, "mean_abs_error_last_25": 147.3820698198395, "mean_abs_error_last_50": 223.56153331906998, "mean_pred_prob": 0.033716045261826366, "mean_pred_prob_last_10": 0.1544020175235346, "mean_pred_prob_last_25": 0.08821313246153295, "mean_pred_prob_last_50": 0.05541181940352544, "mean_token_accuracy": 0.863929706811905, "step": 37320 }, { "epoch": 0.6636090519616732, "grad_norm": 1.8709973725446143, "learning_rate": 0.0001, "loss": 0.6836, "mean_abs_error": 477.94946118800607, "mean_abs_error_last_10": 150.1487116917813, "mean_abs_error_last_25": 197.26863849487466, "mean_abs_error_last_50": 279.8536978795416, "mean_pred_prob": 0.025718375341966748, "mean_pred_prob_last_10": 0.14163235034793614, "mean_pred_prob_last_25": 0.0733157609589398, "mean_pred_prob_last_50": 0.04347512209787965, "mean_token_accuracy": 0.8724863708019257, "step": 37330 }, { "epoch": 0.6637868202584751, "grad_norm": 3.1382086908668185, "learning_rate": 0.0001, "loss": 0.7962, "mean_abs_error": 628.1007417117038, "mean_abs_error_last_10": 329.7666616067408, "mean_abs_error_last_25": 397.723610948311, "mean_abs_error_last_50": 503.90512658151175, "mean_pred_prob": 0.04104507745651063, "mean_pred_prob_last_10": 0.19804195201722904, "mean_pred_prob_last_25": 0.11386172682396137, "mean_pred_prob_last_50": 0.06967724520363845, "mean_token_accuracy": 0.8705715656280517, "step": 37340 }, { "epoch": 0.6639645885552771, "grad_norm": 1.1847480544932867, "learning_rate": 0.0001, "loss": 0.782, "mean_abs_error": 472.14194543757213, "mean_abs_error_last_10": 169.0338950914646, "mean_abs_error_last_25": 292.88646210342614, "mean_abs_error_last_50": 364.0907697380672, "mean_pred_prob": 0.05421077084029093, "mean_pred_prob_last_10": 0.24602512731216847, "mean_pred_prob_last_25": 0.1441930810222402, "mean_pred_prob_last_50": 0.09110498201916925, "mean_token_accuracy": 0.8733937919139863, "step": 37350 }, { "epoch": 0.664142356852079, "grad_norm": 2.41097263934813, "learning_rate": 0.0001, "loss": 0.7378, "mean_abs_error": 791.5206510170354, "mean_abs_error_last_10": 195.2848437202002, "mean_abs_error_last_25": 293.25780055936673, "mean_abs_error_last_50": 464.9581597541728, "mean_pred_prob": 0.028804053596104495, "mean_pred_prob_last_10": 0.14092395454645157, "mean_pred_prob_last_25": 0.07854437367641368, "mean_pred_prob_last_50": 0.04870091251214035, "mean_token_accuracy": 0.8732980608940124, "step": 37360 }, { "epoch": 0.664320125148881, "grad_norm": 1.4220372438112585, "learning_rate": 0.0001, "loss": 0.83, "mean_abs_error": 271.3532994106189, "mean_abs_error_last_10": 56.446916973978524, "mean_abs_error_last_25": 78.39966105337324, "mean_abs_error_last_50": 129.09031418460918, "mean_pred_prob": 0.05246147783473134, "mean_pred_prob_last_10": 0.25842582285404203, "mean_pred_prob_last_25": 0.14398093223571778, "mean_pred_prob_last_50": 0.08760075243189931, "mean_token_accuracy": 0.8716824769973754, "step": 37370 }, { "epoch": 0.6644978934456829, "grad_norm": 3.3618781247736753, "learning_rate": 0.0001, "loss": 0.7845, "mean_abs_error": 338.31583155378837, "mean_abs_error_last_10": 128.36520128693022, "mean_abs_error_last_25": 154.94907398267407, "mean_abs_error_last_50": 193.70841032099526, "mean_pred_prob": 0.04263794315047562, "mean_pred_prob_last_10": 0.20133742401376367, "mean_pred_prob_last_25": 0.11312748165801167, "mean_pred_prob_last_50": 0.07056181957013904, "mean_token_accuracy": 0.8698365807533264, "step": 37380 }, { "epoch": 0.6646756617424848, "grad_norm": 2.198707982755626, "learning_rate": 0.0001, "loss": 0.7854, "mean_abs_error": 522.2002906776543, "mean_abs_error_last_10": 170.48001157436718, "mean_abs_error_last_25": 249.40405348734674, "mean_abs_error_last_50": 351.2747312326427, "mean_pred_prob": 0.03021608589333482, "mean_pred_prob_last_10": 0.1394599383464083, "mean_pred_prob_last_25": 0.07902737059630453, "mean_pred_prob_last_50": 0.04951500545721501, "mean_token_accuracy": 0.8670497953891754, "step": 37390 }, { "epoch": 0.6648534300392868, "grad_norm": 1.2880050431297925, "learning_rate": 0.0001, "loss": 0.7374, "mean_abs_error": 461.78038406719287, "mean_abs_error_last_10": 120.2316977067539, "mean_abs_error_last_25": 152.86519477887396, "mean_abs_error_last_50": 294.82928796058434, "mean_pred_prob": 0.032037130743265155, "mean_pred_prob_last_10": 0.15440483521670104, "mean_pred_prob_last_25": 0.084759321808815, "mean_pred_prob_last_50": 0.0524993940256536, "mean_token_accuracy": 0.872254741191864, "step": 37400 }, { "epoch": 0.6650311983360887, "grad_norm": 1.580026200170547, "learning_rate": 0.0001, "loss": 0.7893, "mean_abs_error": 386.6015709624017, "mean_abs_error_last_10": 153.82634682700805, "mean_abs_error_last_25": 202.97913879196477, "mean_abs_error_last_50": 297.6332570953625, "mean_pred_prob": 0.03201504764147103, "mean_pred_prob_last_10": 0.1614570714533329, "mean_pred_prob_last_25": 0.08688726425170898, "mean_pred_prob_last_50": 0.053275722451508044, "mean_token_accuracy": 0.8660831391811371, "step": 37410 }, { "epoch": 0.6652089666328906, "grad_norm": 1.4938330144251326, "learning_rate": 0.0001, "loss": 0.668, "mean_abs_error": 170.721215077824, "mean_abs_error_last_10": 74.63807087313151, "mean_abs_error_last_25": 84.7550755047954, "mean_abs_error_last_50": 106.62354541607647, "mean_pred_prob": 0.05203579613007605, "mean_pred_prob_last_10": 0.26455683410167696, "mean_pred_prob_last_25": 0.14675967413932084, "mean_pred_prob_last_50": 0.08795808255672455, "mean_token_accuracy": 0.8764474332332611, "step": 37420 }, { "epoch": 0.6653867349296927, "grad_norm": 2.02724338010343, "learning_rate": 0.0001, "loss": 0.6241, "mean_abs_error": 278.4111532521672, "mean_abs_error_last_10": 63.217307136055275, "mean_abs_error_last_25": 74.90212204458943, "mean_abs_error_last_50": 132.10760553223128, "mean_pred_prob": 0.05959972119308077, "mean_pred_prob_last_10": 0.24381210568826645, "mean_pred_prob_last_25": 0.15026797404279932, "mean_pred_prob_last_50": 0.09799632241483777, "mean_token_accuracy": 0.873573362827301, "step": 37430 }, { "epoch": 0.6655645032264946, "grad_norm": 2.0432430371784585, "learning_rate": 0.0001, "loss": 0.8918, "mean_abs_error": 1516.5032797754186, "mean_abs_error_last_10": 755.308420085779, "mean_abs_error_last_25": 892.3526214004881, "mean_abs_error_last_50": 1114.4150068539964, "mean_pred_prob": 0.03792998517383239, "mean_pred_prob_last_10": 0.18740714019513688, "mean_pred_prob_last_25": 0.1079981048409536, "mean_pred_prob_last_50": 0.0646633719385136, "mean_token_accuracy": 0.8741909027099609, "step": 37440 }, { "epoch": 0.6657422715232966, "grad_norm": 1.1585096710940106, "learning_rate": 0.0001, "loss": 0.7253, "mean_abs_error": 282.36653142224367, "mean_abs_error_last_10": 145.1678474615988, "mean_abs_error_last_25": 151.36541344937964, "mean_abs_error_last_50": 160.40038116527222, "mean_pred_prob": 0.03950378363952041, "mean_pred_prob_last_10": 0.20131252345163375, "mean_pred_prob_last_25": 0.1059704499784857, "mean_pred_prob_last_50": 0.06512682291213423, "mean_token_accuracy": 0.8758576154708863, "step": 37450 }, { "epoch": 0.6659200398200985, "grad_norm": 1.224592128012921, "learning_rate": 0.0001, "loss": 0.6105, "mean_abs_error": 172.28656089276052, "mean_abs_error_last_10": 38.985807522819314, "mean_abs_error_last_25": 75.4264958277053, "mean_abs_error_last_50": 125.2652914537636, "mean_pred_prob": 0.043426689133048056, "mean_pred_prob_last_10": 0.2061532061547041, "mean_pred_prob_last_25": 0.11312125772237777, "mean_pred_prob_last_50": 0.07059097029268742, "mean_token_accuracy": 0.8700159668922425, "step": 37460 }, { "epoch": 0.6660978081169004, "grad_norm": 1.530447244892365, "learning_rate": 0.0001, "loss": 0.7665, "mean_abs_error": 1059.4997629742406, "mean_abs_error_last_10": 639.3737484702663, "mean_abs_error_last_25": 680.2276191291969, "mean_abs_error_last_50": 804.8600770856993, "mean_pred_prob": 0.03350346321240068, "mean_pred_prob_last_10": 0.15461788343673105, "mean_pred_prob_last_25": 0.0890613671304891, "mean_pred_prob_last_50": 0.055193797510582954, "mean_token_accuracy": 0.8737212598323822, "step": 37470 }, { "epoch": 0.6662755764137024, "grad_norm": 1.215837913815681, "learning_rate": 0.0001, "loss": 0.7009, "mean_abs_error": 615.9897472886455, "mean_abs_error_last_10": 140.19683618638572, "mean_abs_error_last_25": 173.28976884097057, "mean_abs_error_last_50": 301.5300179811171, "mean_pred_prob": 0.0309874385362491, "mean_pred_prob_last_10": 0.15439051687717437, "mean_pred_prob_last_25": 0.08447843212634325, "mean_pred_prob_last_50": 0.05191815979778767, "mean_token_accuracy": 0.877811872959137, "step": 37480 }, { "epoch": 0.6664533447105043, "grad_norm": 0.972112852930348, "learning_rate": 0.0001, "loss": 0.6283, "mean_abs_error": 353.08671277423485, "mean_abs_error_last_10": 143.65576876169612, "mean_abs_error_last_25": 319.88027753506816, "mean_abs_error_last_50": 353.3596853811743, "mean_pred_prob": 0.04098472234327346, "mean_pred_prob_last_10": 0.1832454912364483, "mean_pred_prob_last_25": 0.10742582930251957, "mean_pred_prob_last_50": 0.06709902910515667, "mean_token_accuracy": 0.8766285479068756, "step": 37490 }, { "epoch": 0.6666311130073063, "grad_norm": 1.5363012600260317, "learning_rate": 0.0001, "loss": 0.8366, "mean_abs_error": 374.15946083007236, "mean_abs_error_last_10": 77.65983246412502, "mean_abs_error_last_25": 142.91616551672752, "mean_abs_error_last_50": 214.1841629442374, "mean_pred_prob": 0.03345689568668604, "mean_pred_prob_last_10": 0.17731155790388584, "mean_pred_prob_last_25": 0.09386729970574378, "mean_pred_prob_last_50": 0.05564821842126548, "mean_token_accuracy": 0.8736620306968689, "step": 37500 }, { "epoch": 0.6668088813041082, "grad_norm": 1.371704510780169, "learning_rate": 0.0001, "loss": 0.7332, "mean_abs_error": 324.79995686137346, "mean_abs_error_last_10": 257.3249885587442, "mean_abs_error_last_25": 244.7063785781839, "mean_abs_error_last_50": 252.099959241321, "mean_pred_prob": 0.05630515676457435, "mean_pred_prob_last_10": 0.2673038044944406, "mean_pred_prob_last_25": 0.15238814246840776, "mean_pred_prob_last_50": 0.09322848315350711, "mean_token_accuracy": 0.8756286323070526, "step": 37510 }, { "epoch": 0.6669866496009101, "grad_norm": 1.9184589701301367, "learning_rate": 0.0001, "loss": 0.5954, "mean_abs_error": 680.389260158084, "mean_abs_error_last_10": 258.16286513524403, "mean_abs_error_last_25": 373.0714760059535, "mean_abs_error_last_50": 546.2902188776725, "mean_pred_prob": 0.048812658179667776, "mean_pred_prob_last_10": 0.2215276710339822, "mean_pred_prob_last_25": 0.131475515186321, "mean_pred_prob_last_50": 0.08129867329844273, "mean_token_accuracy": 0.8767901122570038, "step": 37520 }, { "epoch": 0.6671644178977121, "grad_norm": 1.00791430057327, "learning_rate": 0.0001, "loss": 0.7772, "mean_abs_error": 369.533008575298, "mean_abs_error_last_10": 83.841899522507, "mean_abs_error_last_25": 88.12137488416663, "mean_abs_error_last_50": 152.85419458615388, "mean_pred_prob": 0.048733451031148435, "mean_pred_prob_last_10": 0.2331759797409177, "mean_pred_prob_last_25": 0.13601724226027728, "mean_pred_prob_last_50": 0.08193009877577424, "mean_token_accuracy": 0.8696212887763977, "step": 37530 }, { "epoch": 0.667342186194514, "grad_norm": 1.4442929090735206, "learning_rate": 0.0001, "loss": 0.7974, "mean_abs_error": 674.1978228764755, "mean_abs_error_last_10": 210.89778205556453, "mean_abs_error_last_25": 229.58487967903176, "mean_abs_error_last_50": 361.68645296707075, "mean_pred_prob": 0.03791052590822801, "mean_pred_prob_last_10": 0.17756165590835735, "mean_pred_prob_last_25": 0.09600805635564029, "mean_pred_prob_last_50": 0.06001523247687146, "mean_token_accuracy": 0.8647151589393616, "step": 37540 }, { "epoch": 0.6675199544913161, "grad_norm": 1.1532327476863646, "learning_rate": 0.0001, "loss": 0.6416, "mean_abs_error": 407.6792864539058, "mean_abs_error_last_10": 201.67397715405656, "mean_abs_error_last_25": 177.0820315567865, "mean_abs_error_last_50": 277.46509678439503, "mean_pred_prob": 0.03363604918122291, "mean_pred_prob_last_10": 0.1710479491390288, "mean_pred_prob_last_25": 0.09269232258666307, "mean_pred_prob_last_50": 0.05551223650109023, "mean_token_accuracy": 0.8792100787162781, "step": 37550 }, { "epoch": 0.667697722788118, "grad_norm": 1.5025001961004454, "learning_rate": 0.0001, "loss": 0.787, "mean_abs_error": 993.9505878762466, "mean_abs_error_last_10": 701.6246178273298, "mean_abs_error_last_25": 735.6421045910716, "mean_abs_error_last_50": 802.11118343572, "mean_pred_prob": 0.04266174557560589, "mean_pred_prob_last_10": 0.20342159149076905, "mean_pred_prob_last_25": 0.11477992378204363, "mean_pred_prob_last_50": 0.07253033070883248, "mean_token_accuracy": 0.8596553325653076, "step": 37560 }, { "epoch": 0.66787549108492, "grad_norm": 1.8666177494250642, "learning_rate": 0.0001, "loss": 0.7801, "mean_abs_error": 296.86283300116986, "mean_abs_error_last_10": 95.33286029250777, "mean_abs_error_last_25": 137.21155467350826, "mean_abs_error_last_50": 179.11917365105973, "mean_pred_prob": 0.03780522575834766, "mean_pred_prob_last_10": 0.19167990086134523, "mean_pred_prob_last_25": 0.1009624897968024, "mean_pred_prob_last_50": 0.0633005695301108, "mean_token_accuracy": 0.8760239601135253, "step": 37570 }, { "epoch": 0.6680532593817219, "grad_norm": 1.7787093285965185, "learning_rate": 0.0001, "loss": 0.6868, "mean_abs_error": 388.61571105101547, "mean_abs_error_last_10": 390.5085224260439, "mean_abs_error_last_25": 351.0583113594274, "mean_abs_error_last_50": 292.1276508873742, "mean_pred_prob": 0.0332752880116459, "mean_pred_prob_last_10": 0.17623742914875037, "mean_pred_prob_last_25": 0.09397086615208536, "mean_pred_prob_last_50": 0.05587369371205568, "mean_token_accuracy": 0.8741701781749726, "step": 37580 }, { "epoch": 0.6682310276785238, "grad_norm": 4.47795656861456, "learning_rate": 0.0001, "loss": 0.8161, "mean_abs_error": 864.4514797905298, "mean_abs_error_last_10": 220.98564142628894, "mean_abs_error_last_25": 392.850157055687, "mean_abs_error_last_50": 627.2832539537217, "mean_pred_prob": 0.022115347586805, "mean_pred_prob_last_10": 0.11773242603521794, "mean_pred_prob_last_25": 0.06379012216348201, "mean_pred_prob_last_50": 0.038153990911087024, "mean_token_accuracy": 0.8686962425708771, "step": 37590 }, { "epoch": 0.6684087959753258, "grad_norm": 1.669579597896236, "learning_rate": 0.0001, "loss": 0.7139, "mean_abs_error": 461.4927576275442, "mean_abs_error_last_10": 259.7170531402208, "mean_abs_error_last_25": 240.10395205808763, "mean_abs_error_last_50": 295.36253116825264, "mean_pred_prob": 0.03600639650830999, "mean_pred_prob_last_10": 0.18626172770746052, "mean_pred_prob_last_25": 0.09912777177523821, "mean_pred_prob_last_50": 0.06164424205198884, "mean_token_accuracy": 0.8667417228221893, "step": 37600 }, { "epoch": 0.6685865642721277, "grad_norm": 2.5222197823295667, "learning_rate": 0.0001, "loss": 0.7737, "mean_abs_error": 197.1955416258083, "mean_abs_error_last_10": 88.07582906471971, "mean_abs_error_last_25": 93.59438314323066, "mean_abs_error_last_50": 127.81653727010647, "mean_pred_prob": 0.049573181616142395, "mean_pred_prob_last_10": 0.24467570669949054, "mean_pred_prob_last_25": 0.1368738256394863, "mean_pred_prob_last_50": 0.08331901207566261, "mean_token_accuracy": 0.870527571439743, "step": 37610 }, { "epoch": 0.6687643325689296, "grad_norm": 2.3308110975321563, "learning_rate": 0.0001, "loss": 0.6794, "mean_abs_error": 1289.1300482484692, "mean_abs_error_last_10": 635.6271172031022, "mean_abs_error_last_25": 737.6355053234114, "mean_abs_error_last_50": 967.4315159266956, "mean_pred_prob": 0.030409571145719384, "mean_pred_prob_last_10": 0.1499180169601459, "mean_pred_prob_last_25": 0.07958684780460316, "mean_pred_prob_last_50": 0.04996094507077942, "mean_token_accuracy": 0.8689183175563813, "step": 37620 }, { "epoch": 0.6689421008657316, "grad_norm": 1.9850875199521605, "learning_rate": 0.0001, "loss": 0.7197, "mean_abs_error": 289.03539790645374, "mean_abs_error_last_10": 115.44762395087227, "mean_abs_error_last_25": 124.2290532626503, "mean_abs_error_last_50": 168.17414548491337, "mean_pred_prob": 0.029583510523661972, "mean_pred_prob_last_10": 0.15499029234051703, "mean_pred_prob_last_25": 0.08271953705698251, "mean_pred_prob_last_50": 0.05005686632357538, "mean_token_accuracy": 0.870191115140915, "step": 37630 }, { "epoch": 0.6691198691625335, "grad_norm": 1.031028413274025, "learning_rate": 0.0001, "loss": 0.7278, "mean_abs_error": 473.30816639910137, "mean_abs_error_last_10": 175.2746840996584, "mean_abs_error_last_25": 293.64265308973893, "mean_abs_error_last_50": 310.9003757120025, "mean_pred_prob": 0.0362688364693895, "mean_pred_prob_last_10": 0.18354189551901073, "mean_pred_prob_last_25": 0.09859731270698831, "mean_pred_prob_last_50": 0.06073872753186151, "mean_token_accuracy": 0.8718505799770355, "step": 37640 }, { "epoch": 0.6692976374593355, "grad_norm": 0.9979639681505027, "learning_rate": 0.0001, "loss": 0.6229, "mean_abs_error": 857.0244484611516, "mean_abs_error_last_10": 531.4945993747758, "mean_abs_error_last_25": 589.3344597098228, "mean_abs_error_last_50": 626.9293886210195, "mean_pred_prob": 0.029688018932938577, "mean_pred_prob_last_10": 0.12775871544727124, "mean_pred_prob_last_25": 0.07805404781829565, "mean_pred_prob_last_50": 0.04953858331427909, "mean_token_accuracy": 0.8784467935562134, "step": 37650 }, { "epoch": 0.6694754057561374, "grad_norm": 1.2533470752676936, "learning_rate": 0.0001, "loss": 0.8265, "mean_abs_error": 387.3341908754484, "mean_abs_error_last_10": 127.43959291110848, "mean_abs_error_last_25": 160.3034616604203, "mean_abs_error_last_50": 193.1317650377236, "mean_pred_prob": 0.04245884083211422, "mean_pred_prob_last_10": 0.2033228999003768, "mean_pred_prob_last_25": 0.11432769810780882, "mean_pred_prob_last_50": 0.07075841706246137, "mean_token_accuracy": 0.8697010934352875, "step": 37660 }, { "epoch": 0.6696531740529394, "grad_norm": 1.974790789134377, "learning_rate": 0.0001, "loss": 0.6914, "mean_abs_error": 102.3067713281936, "mean_abs_error_last_10": 38.68966512864064, "mean_abs_error_last_25": 57.28378796311474, "mean_abs_error_last_50": 63.88228906857743, "mean_pred_prob": 0.05062405476346612, "mean_pred_prob_last_10": 0.23749838583171368, "mean_pred_prob_last_25": 0.1355537872761488, "mean_pred_prob_last_50": 0.08344321828335524, "mean_token_accuracy": 0.8701201736927032, "step": 37670 }, { "epoch": 0.6698309423497414, "grad_norm": 1.8000268352495752, "learning_rate": 0.0001, "loss": 0.7812, "mean_abs_error": 272.2431233866099, "mean_abs_error_last_10": 94.62407612213447, "mean_abs_error_last_25": 104.36651983881961, "mean_abs_error_last_50": 163.97896089501234, "mean_pred_prob": 0.03844666318036616, "mean_pred_prob_last_10": 0.1834465928375721, "mean_pred_prob_last_25": 0.10544509124010801, "mean_pred_prob_last_50": 0.06409366028383374, "mean_token_accuracy": 0.8656874358654022, "step": 37680 }, { "epoch": 0.6700087106465433, "grad_norm": 3.3506543286537545, "learning_rate": 0.0001, "loss": 0.829, "mean_abs_error": 504.40296645600466, "mean_abs_error_last_10": 186.927197253023, "mean_abs_error_last_25": 174.43557980360413, "mean_abs_error_last_50": 288.59410455031554, "mean_pred_prob": 0.032069661933928725, "mean_pred_prob_last_10": 0.1434519711881876, "mean_pred_prob_last_25": 0.08310090890154243, "mean_pred_prob_last_50": 0.052802380500361325, "mean_token_accuracy": 0.8804189503192902, "step": 37690 }, { "epoch": 0.6701864789433453, "grad_norm": 1.750402577406052, "learning_rate": 0.0001, "loss": 0.7099, "mean_abs_error": 399.2596070369846, "mean_abs_error_last_10": 243.64621397481264, "mean_abs_error_last_25": 301.07276706665186, "mean_abs_error_last_50": 311.98013373497554, "mean_pred_prob": 0.023674341361038388, "mean_pred_prob_last_10": 0.12699641901999711, "mean_pred_prob_last_25": 0.06721087731420994, "mean_pred_prob_last_50": 0.039499895414337516, "mean_token_accuracy": 0.8764837503433227, "step": 37700 }, { "epoch": 0.6703642472401472, "grad_norm": 3.0597262035194137, "learning_rate": 0.0001, "loss": 0.8091, "mean_abs_error": 100.19813284192301, "mean_abs_error_last_10": 12.55497225395791, "mean_abs_error_last_25": 29.788311204074507, "mean_abs_error_last_50": 52.952344154979, "mean_pred_prob": 0.05910497764125466, "mean_pred_prob_last_10": 0.28495965227484704, "mean_pred_prob_last_25": 0.1603087641298771, "mean_pred_prob_last_50": 0.09894834235310554, "mean_token_accuracy": 0.8771598100662231, "step": 37710 }, { "epoch": 0.6705420155369491, "grad_norm": 1.4733918359274114, "learning_rate": 0.0001, "loss": 0.7752, "mean_abs_error": 528.0771225341897, "mean_abs_error_last_10": 146.91819101652672, "mean_abs_error_last_25": 196.06597663171675, "mean_abs_error_last_50": 268.69940236102497, "mean_pred_prob": 0.04991877570282668, "mean_pred_prob_last_10": 0.21480463563930244, "mean_pred_prob_last_25": 0.1295538960956037, "mean_pred_prob_last_50": 0.08174491004901938, "mean_token_accuracy": 0.8687069416046143, "step": 37720 }, { "epoch": 0.6707197838337511, "grad_norm": 1.289748543721706, "learning_rate": 0.0001, "loss": 0.7082, "mean_abs_error": 700.5849756510336, "mean_abs_error_last_10": 225.2154030316195, "mean_abs_error_last_25": 331.52226607514746, "mean_abs_error_last_50": 465.40737912241184, "mean_pred_prob": 0.0419761523779016, "mean_pred_prob_last_10": 0.20164076088112778, "mean_pred_prob_last_25": 0.11408871243475005, "mean_pred_prob_last_50": 0.06952932113199495, "mean_token_accuracy": 0.8780538320541382, "step": 37730 }, { "epoch": 0.670897552130553, "grad_norm": 0.9608028393898561, "learning_rate": 0.0001, "loss": 0.682, "mean_abs_error": 423.2128229297381, "mean_abs_error_last_10": 113.92834547522452, "mean_abs_error_last_25": 128.17872140004272, "mean_abs_error_last_50": 207.34144900287166, "mean_pred_prob": 0.05929341443115845, "mean_pred_prob_last_10": 0.26096815527416767, "mean_pred_prob_last_25": 0.15142053654417395, "mean_pred_prob_last_50": 0.09674465388525277, "mean_token_accuracy": 0.8733046591281891, "step": 37740 }, { "epoch": 0.671075320427355, "grad_norm": 1.3703441989400447, "learning_rate": 0.0001, "loss": 0.7579, "mean_abs_error": 1147.690436418679, "mean_abs_error_last_10": 642.8057079630985, "mean_abs_error_last_25": 751.545371699571, "mean_abs_error_last_50": 886.6122293711642, "mean_pred_prob": 0.02778343581740046, "mean_pred_prob_last_10": 0.1411485247022938, "mean_pred_prob_last_25": 0.07544973845942878, "mean_pred_prob_last_50": 0.04607845007703872, "mean_token_accuracy": 0.873447036743164, "step": 37750 }, { "epoch": 0.6712530887241569, "grad_norm": 1.991401702572184, "learning_rate": 0.0001, "loss": 0.7237, "mean_abs_error": 512.3565688624005, "mean_abs_error_last_10": 170.86565126615878, "mean_abs_error_last_25": 255.96807628263213, "mean_abs_error_last_50": 320.27708807593365, "mean_pred_prob": 0.02658692846307531, "mean_pred_prob_last_10": 0.13937716069631279, "mean_pred_prob_last_25": 0.07529095204081386, "mean_pred_prob_last_50": 0.04537525644991547, "mean_token_accuracy": 0.8707249641418457, "step": 37760 }, { "epoch": 0.6714308570209588, "grad_norm": 1.2931797553913864, "learning_rate": 0.0001, "loss": 0.6665, "mean_abs_error": 387.32611265782896, "mean_abs_error_last_10": 64.98139406911187, "mean_abs_error_last_25": 87.28599210049263, "mean_abs_error_last_50": 161.2042985493107, "mean_pred_prob": 0.04636415671557188, "mean_pred_prob_last_10": 0.22761883828788995, "mean_pred_prob_last_25": 0.1256740174256265, "mean_pred_prob_last_50": 0.07812685249373316, "mean_token_accuracy": 0.8752717196941375, "step": 37770 }, { "epoch": 0.6716086253177608, "grad_norm": 1.6712544031999117, "learning_rate": 0.0001, "loss": 0.8291, "mean_abs_error": 188.51117631561675, "mean_abs_error_last_10": 35.183631136283665, "mean_abs_error_last_25": 58.0910986331523, "mean_abs_error_last_50": 104.29550233269859, "mean_pred_prob": 0.04141299175098538, "mean_pred_prob_last_10": 0.21397512704133986, "mean_pred_prob_last_25": 0.11465683672577143, "mean_pred_prob_last_50": 0.06997304605320095, "mean_token_accuracy": 0.8634189069271088, "step": 37780 }, { "epoch": 0.6717863936145628, "grad_norm": 1.5684044488816842, "learning_rate": 0.0001, "loss": 0.8143, "mean_abs_error": 1026.7548050547794, "mean_abs_error_last_10": 385.49666602493755, "mean_abs_error_last_25": 490.85015235758175, "mean_abs_error_last_50": 679.6947799149978, "mean_pred_prob": 0.03527262248680927, "mean_pred_prob_last_10": 0.16681273108697497, "mean_pred_prob_last_25": 0.09101714555872605, "mean_pred_prob_last_50": 0.057781701599014926, "mean_token_accuracy": 0.86878901720047, "step": 37790 }, { "epoch": 0.6719641619113648, "grad_norm": 1.934482633501832, "learning_rate": 0.0001, "loss": 0.7862, "mean_abs_error": 543.2581327502832, "mean_abs_error_last_10": 206.07199923421507, "mean_abs_error_last_25": 353.6982363784108, "mean_abs_error_last_50": 408.7566356266058, "mean_pred_prob": 0.046310937879025, "mean_pred_prob_last_10": 0.2052709695242811, "mean_pred_prob_last_25": 0.11884057530551217, "mean_pred_prob_last_50": 0.074674578855047, "mean_token_accuracy": 0.8708827018737793, "step": 37800 }, { "epoch": 0.6721419302081667, "grad_norm": 2.413084726154878, "learning_rate": 0.0001, "loss": 0.6776, "mean_abs_error": 235.6784738984527, "mean_abs_error_last_10": 68.46576941974618, "mean_abs_error_last_25": 114.61099017438173, "mean_abs_error_last_50": 179.675690120118, "mean_pred_prob": 0.03761044633574784, "mean_pred_prob_last_10": 0.18625567741692067, "mean_pred_prob_last_25": 0.10216114111244678, "mean_pred_prob_last_50": 0.06205435637384653, "mean_token_accuracy": 0.8824831306934356, "step": 37810 }, { "epoch": 0.6723196985049686, "grad_norm": 1.5550585361987335, "learning_rate": 0.0001, "loss": 0.603, "mean_abs_error": 604.4203944351513, "mean_abs_error_last_10": 242.69936634214076, "mean_abs_error_last_25": 261.5248609027404, "mean_abs_error_last_50": 364.4249502352212, "mean_pred_prob": 0.023996718536363915, "mean_pred_prob_last_10": 0.12048129482427612, "mean_pred_prob_last_25": 0.06600386331556365, "mean_pred_prob_last_50": 0.04076572188641876, "mean_token_accuracy": 0.8782413899898529, "step": 37820 }, { "epoch": 0.6724974668017706, "grad_norm": 1.424312786132986, "learning_rate": 0.0001, "loss": 0.6659, "mean_abs_error": 114.85531027343923, "mean_abs_error_last_10": 20.80361073093913, "mean_abs_error_last_25": 42.828945159506915, "mean_abs_error_last_50": 69.47623036856965, "mean_pred_prob": 0.06220209002494812, "mean_pred_prob_last_10": 0.2927481710910797, "mean_pred_prob_last_25": 0.17368748486042024, "mean_pred_prob_last_50": 0.1062687698751688, "mean_token_accuracy": 0.8660362601280213, "step": 37830 }, { "epoch": 0.6726752350985725, "grad_norm": 1.2731829446418648, "learning_rate": 0.0001, "loss": 0.6632, "mean_abs_error": 292.74349653666184, "mean_abs_error_last_10": 68.88962889623738, "mean_abs_error_last_25": 84.49294526770333, "mean_abs_error_last_50": 140.15186016509355, "mean_pred_prob": 0.04719497268088162, "mean_pred_prob_last_10": 0.22504699397832156, "mean_pred_prob_last_25": 0.13112709876149892, "mean_pred_prob_last_50": 0.07962444578297437, "mean_token_accuracy": 0.8729071319103241, "step": 37840 }, { "epoch": 0.6728530033953745, "grad_norm": 1.997561871509253, "learning_rate": 0.0001, "loss": 0.6273, "mean_abs_error": 584.548912993427, "mean_abs_error_last_10": 189.3370382449516, "mean_abs_error_last_25": 272.85341535649184, "mean_abs_error_last_50": 388.1190023868761, "mean_pred_prob": 0.054545120018883606, "mean_pred_prob_last_10": 0.24465747370850294, "mean_pred_prob_last_25": 0.14191595125594175, "mean_pred_prob_last_50": 0.09076304735790472, "mean_token_accuracy": 0.8732600092887879, "step": 37850 }, { "epoch": 0.6730307716921764, "grad_norm": 1.0437098525425803, "learning_rate": 0.0001, "loss": 0.7154, "mean_abs_error": 406.4360688247072, "mean_abs_error_last_10": 143.4937024912847, "mean_abs_error_last_25": 207.50210350102537, "mean_abs_error_last_50": 247.38650130609955, "mean_pred_prob": 0.028185171098448335, "mean_pred_prob_last_10": 0.1413793709129095, "mean_pred_prob_last_25": 0.07824413543567062, "mean_pred_prob_last_50": 0.048869125079363585, "mean_token_accuracy": 0.8724102675914764, "step": 37860 }, { "epoch": 0.6732085399889783, "grad_norm": 1.63702250167922, "learning_rate": 0.0001, "loss": 0.7712, "mean_abs_error": 1207.624219377185, "mean_abs_error_last_10": 574.5372965154363, "mean_abs_error_last_25": 699.2535370789691, "mean_abs_error_last_50": 865.2317191902312, "mean_pred_prob": 0.007842901820549742, "mean_pred_prob_last_10": 0.04667997543001547, "mean_pred_prob_last_25": 0.022858568403171374, "mean_pred_prob_last_50": 0.013472606800496579, "mean_token_accuracy": 0.875704002380371, "step": 37870 }, { "epoch": 0.6733863082857803, "grad_norm": 1.1812395427483215, "learning_rate": 0.0001, "loss": 0.9352, "mean_abs_error": 572.6028672245508, "mean_abs_error_last_10": 333.45549897100074, "mean_abs_error_last_25": 310.84405681972873, "mean_abs_error_last_50": 425.8853573802371, "mean_pred_prob": 0.036351170699344945, "mean_pred_prob_last_10": 0.1741899311193265, "mean_pred_prob_last_25": 0.09639882958726957, "mean_pred_prob_last_50": 0.05999713545897976, "mean_token_accuracy": 0.8734863340854645, "step": 37880 }, { "epoch": 0.6735640765825822, "grad_norm": 1.3454030053565258, "learning_rate": 0.0001, "loss": 0.6957, "mean_abs_error": 501.36688834493225, "mean_abs_error_last_10": 146.89625606512922, "mean_abs_error_last_25": 184.32070805283013, "mean_abs_error_last_50": 259.0780748269648, "mean_pred_prob": 0.041089976322837174, "mean_pred_prob_last_10": 0.1972167240222916, "mean_pred_prob_last_25": 0.11084896622924134, "mean_pred_prob_last_50": 0.06980714567471295, "mean_token_accuracy": 0.8779473841190338, "step": 37890 }, { "epoch": 0.6737418448793842, "grad_norm": 2.441485190368028, "learning_rate": 0.0001, "loss": 0.6957, "mean_abs_error": 188.47667798153824, "mean_abs_error_last_10": 41.68796531442723, "mean_abs_error_last_25": 66.95299753199699, "mean_abs_error_last_50": 103.59792260977122, "mean_pred_prob": 0.04387131561525166, "mean_pred_prob_last_10": 0.21411513201892377, "mean_pred_prob_last_25": 0.12052800338715315, "mean_pred_prob_last_50": 0.07293889187276363, "mean_token_accuracy": 0.8747716784477234, "step": 37900 }, { "epoch": 0.6739196131761862, "grad_norm": 3.811446825097844, "learning_rate": 0.0001, "loss": 0.7223, "mean_abs_error": 293.59875371247284, "mean_abs_error_last_10": 58.2081805577418, "mean_abs_error_last_25": 92.22685749475683, "mean_abs_error_last_50": 196.1847529019421, "mean_pred_prob": 0.0413165089674294, "mean_pred_prob_last_10": 0.20581169668585061, "mean_pred_prob_last_25": 0.11654936214908958, "mean_pred_prob_last_50": 0.06955064283683896, "mean_token_accuracy": 0.8767222166061401, "step": 37910 }, { "epoch": 0.6740973814729881, "grad_norm": 1.4445145083457955, "learning_rate": 0.0001, "loss": 0.7975, "mean_abs_error": 225.92937292130358, "mean_abs_error_last_10": 48.90313622255182, "mean_abs_error_last_25": 81.09214632218173, "mean_abs_error_last_50": 124.3470528075068, "mean_pred_prob": 0.05273947762325406, "mean_pred_prob_last_10": 0.23831279538571834, "mean_pred_prob_last_25": 0.13915570341050626, "mean_pred_prob_last_50": 0.08696209900081157, "mean_token_accuracy": 0.8719800353050232, "step": 37920 }, { "epoch": 0.6742751497697901, "grad_norm": 1.2639618176655483, "learning_rate": 0.0001, "loss": 0.6675, "mean_abs_error": 353.9821969756995, "mean_abs_error_last_10": 164.8168430262196, "mean_abs_error_last_25": 210.22780576585728, "mean_abs_error_last_50": 277.92356776780764, "mean_pred_prob": 0.04389074738137424, "mean_pred_prob_last_10": 0.2137384831905365, "mean_pred_prob_last_25": 0.12010939624160528, "mean_pred_prob_last_50": 0.07224007695913315, "mean_token_accuracy": 0.8732249081134796, "step": 37930 }, { "epoch": 0.674452918066592, "grad_norm": 1.7535418766133855, "learning_rate": 0.0001, "loss": 0.6595, "mean_abs_error": 204.53193720631822, "mean_abs_error_last_10": 33.92742971923765, "mean_abs_error_last_25": 59.30014686142275, "mean_abs_error_last_50": 109.15113011079117, "mean_pred_prob": 0.04568729931488633, "mean_pred_prob_last_10": 0.21188262477517128, "mean_pred_prob_last_25": 0.12186357267200947, "mean_pred_prob_last_50": 0.07538089035078883, "mean_token_accuracy": 0.8759272515773773, "step": 37940 }, { "epoch": 0.674630686363394, "grad_norm": 1.6041500111862563, "learning_rate": 0.0001, "loss": 0.6879, "mean_abs_error": 808.0459890431937, "mean_abs_error_last_10": 214.21183420620474, "mean_abs_error_last_25": 287.0940216411652, "mean_abs_error_last_50": 436.5347443033523, "mean_pred_prob": 0.036860490625258535, "mean_pred_prob_last_10": 0.17783055141335352, "mean_pred_prob_last_25": 0.10514584998600185, "mean_pred_prob_last_50": 0.0626172449265141, "mean_token_accuracy": 0.871096670627594, "step": 37950 }, { "epoch": 0.6748084546601959, "grad_norm": 0.9916709010371753, "learning_rate": 0.0001, "loss": 0.7578, "mean_abs_error": 475.27540887251854, "mean_abs_error_last_10": 309.2266470271268, "mean_abs_error_last_25": 315.5799280376466, "mean_abs_error_last_50": 355.0545678694038, "mean_pred_prob": 0.025343252788297833, "mean_pred_prob_last_10": 0.13506462648510933, "mean_pred_prob_last_25": 0.07058697873726487, "mean_pred_prob_last_50": 0.0428739205468446, "mean_token_accuracy": 0.8725259006023407, "step": 37960 }, { "epoch": 0.6749862229569978, "grad_norm": 0.9878018861447505, "learning_rate": 0.0001, "loss": 0.6916, "mean_abs_error": 664.9046973227711, "mean_abs_error_last_10": 315.7943110963574, "mean_abs_error_last_25": 365.79758407255537, "mean_abs_error_last_50": 464.7976916687461, "mean_pred_prob": 0.02346811390016228, "mean_pred_prob_last_10": 0.1284598340280354, "mean_pred_prob_last_25": 0.06847593801212497, "mean_pred_prob_last_50": 0.040450350957689805, "mean_token_accuracy": 0.8657759606838227, "step": 37970 }, { "epoch": 0.6751639912537998, "grad_norm": 1.0094142347988935, "learning_rate": 0.0001, "loss": 0.7556, "mean_abs_error": 309.81949613091217, "mean_abs_error_last_10": 71.70113096637121, "mean_abs_error_last_25": 157.33856958760367, "mean_abs_error_last_50": 198.8953196967162, "mean_pred_prob": 0.03594156885519624, "mean_pred_prob_last_10": 0.18257611617445946, "mean_pred_prob_last_25": 0.1003831934183836, "mean_pred_prob_last_50": 0.06090131658129394, "mean_token_accuracy": 0.8798993110656739, "step": 37980 }, { "epoch": 0.6753417595506017, "grad_norm": 1.4990719048242938, "learning_rate": 0.0001, "loss": 0.8048, "mean_abs_error": 464.73605989890666, "mean_abs_error_last_10": 201.13520744444574, "mean_abs_error_last_25": 238.01456518602814, "mean_abs_error_last_50": 274.48506890589306, "mean_pred_prob": 0.03296040238346905, "mean_pred_prob_last_10": 0.16913776001892983, "mean_pred_prob_last_25": 0.09545873554889113, "mean_pred_prob_last_50": 0.05667981856968254, "mean_token_accuracy": 0.8683632254600525, "step": 37990 }, { "epoch": 0.6755195278474037, "grad_norm": 0.9718420517523205, "learning_rate": 0.0001, "loss": 0.7071, "mean_abs_error": 896.5209718108847, "mean_abs_error_last_10": 308.74765608137926, "mean_abs_error_last_25": 387.5378937276397, "mean_abs_error_last_50": 506.54789779748916, "mean_pred_prob": 0.029124500742182136, "mean_pred_prob_last_10": 0.14358344924403355, "mean_pred_prob_last_25": 0.07423422021092847, "mean_pred_prob_last_50": 0.046534137066919355, "mean_token_accuracy": 0.8708757698535919, "step": 38000 }, { "epoch": 0.6756972961442056, "grad_norm": 1.1009632685978594, "learning_rate": 0.0001, "loss": 0.7988, "mean_abs_error": 270.7153831201259, "mean_abs_error_last_10": 172.16706952119813, "mean_abs_error_last_25": 207.08495261352033, "mean_abs_error_last_50": 216.16220897339113, "mean_pred_prob": 0.03772753784433007, "mean_pred_prob_last_10": 0.1997041441500187, "mean_pred_prob_last_25": 0.10640821941196918, "mean_pred_prob_last_50": 0.06435353960841894, "mean_token_accuracy": 0.8678828656673432, "step": 38010 }, { "epoch": 0.6758750644410076, "grad_norm": 1.0441623124782287, "learning_rate": 0.0001, "loss": 0.6988, "mean_abs_error": 305.2984868969471, "mean_abs_error_last_10": 91.08655083410366, "mean_abs_error_last_25": 95.88577711308565, "mean_abs_error_last_50": 172.05158055637145, "mean_pred_prob": 0.05343327925074846, "mean_pred_prob_last_10": 0.25262684356421233, "mean_pred_prob_last_25": 0.14045858662575483, "mean_pred_prob_last_50": 0.08830688390880823, "mean_token_accuracy": 0.8716314256191253, "step": 38020 }, { "epoch": 0.6760528327378096, "grad_norm": 2.5190151728260837, "learning_rate": 0.0001, "loss": 0.7644, "mean_abs_error": 474.11450311618034, "mean_abs_error_last_10": 103.45212667773845, "mean_abs_error_last_25": 142.54720521034326, "mean_abs_error_last_50": 250.5759507855252, "mean_pred_prob": 0.034136200742796066, "mean_pred_prob_last_10": 0.1752992022782564, "mean_pred_prob_last_25": 0.09597997833043337, "mean_pred_prob_last_50": 0.05737806428223848, "mean_token_accuracy": 0.8647332906723022, "step": 38030 }, { "epoch": 0.6762306010346115, "grad_norm": 2.5221295409248685, "learning_rate": 0.0001, "loss": 0.7871, "mean_abs_error": 360.59090414483535, "mean_abs_error_last_10": 94.44121947647325, "mean_abs_error_last_25": 185.0999617978312, "mean_abs_error_last_50": 237.16091675576132, "mean_pred_prob": 0.03727047701831907, "mean_pred_prob_last_10": 0.18356007521506398, "mean_pred_prob_last_25": 0.1031243706587702, "mean_pred_prob_last_50": 0.06329217636957765, "mean_token_accuracy": 0.8730793595314026, "step": 38040 }, { "epoch": 0.6764083693314135, "grad_norm": 1.2255859919628127, "learning_rate": 0.0001, "loss": 0.7203, "mean_abs_error": 347.40310553944556, "mean_abs_error_last_10": 141.671629543734, "mean_abs_error_last_25": 140.00991183818613, "mean_abs_error_last_50": 224.91990089026984, "mean_pred_prob": 0.033095872635021804, "mean_pred_prob_last_10": 0.16402369625866414, "mean_pred_prob_last_25": 0.09061411246657372, "mean_pred_prob_last_50": 0.05573545428924263, "mean_token_accuracy": 0.8723372519016266, "step": 38050 }, { "epoch": 0.6765861376282154, "grad_norm": 2.1920220345425663, "learning_rate": 0.0001, "loss": 0.9121, "mean_abs_error": 222.03002817639504, "mean_abs_error_last_10": 45.508494013113314, "mean_abs_error_last_25": 94.07689113355588, "mean_abs_error_last_50": 149.58615096613173, "mean_pred_prob": 0.04723550095222891, "mean_pred_prob_last_10": 0.22957587540149688, "mean_pred_prob_last_25": 0.12928527146577834, "mean_pred_prob_last_50": 0.07784784315153956, "mean_token_accuracy": 0.8766371011734009, "step": 38060 }, { "epoch": 0.6767639059250173, "grad_norm": 1.0431231170863853, "learning_rate": 0.0001, "loss": 0.6738, "mean_abs_error": 981.2950478595151, "mean_abs_error_last_10": 587.2061972298163, "mean_abs_error_last_25": 695.6410878765496, "mean_abs_error_last_50": 818.2572268133275, "mean_pred_prob": 0.039609336674038785, "mean_pred_prob_last_10": 0.2108781241637189, "mean_pred_prob_last_25": 0.11444669345801231, "mean_pred_prob_last_50": 0.06751260595046915, "mean_token_accuracy": 0.8707217872142792, "step": 38070 }, { "epoch": 0.6769416742218193, "grad_norm": 1.6059148045521388, "learning_rate": 0.0001, "loss": 0.8338, "mean_abs_error": 258.3064409616772, "mean_abs_error_last_10": 47.304979333585415, "mean_abs_error_last_25": 65.06989707480689, "mean_abs_error_last_50": 137.17839305421572, "mean_pred_prob": 0.04710505476687103, "mean_pred_prob_last_10": 0.22303381152451038, "mean_pred_prob_last_25": 0.12975817443802953, "mean_pred_prob_last_50": 0.07995080682449043, "mean_token_accuracy": 0.8702450692653656, "step": 38080 }, { "epoch": 0.6771194425186212, "grad_norm": 1.5118101747613084, "learning_rate": 0.0001, "loss": 0.73, "mean_abs_error": 245.19875884322832, "mean_abs_error_last_10": 49.709399899895786, "mean_abs_error_last_25": 93.42102328282466, "mean_abs_error_last_50": 161.0534902859568, "mean_pred_prob": 0.04575701071880758, "mean_pred_prob_last_10": 0.1884994026273489, "mean_pred_prob_last_25": 0.11432685386389493, "mean_pred_prob_last_50": 0.07381466999650002, "mean_token_accuracy": 0.8794206500053405, "step": 38090 }, { "epoch": 0.6772972108154232, "grad_norm": 1.1129087103014688, "learning_rate": 0.0001, "loss": 0.6464, "mean_abs_error": 338.7859439587054, "mean_abs_error_last_10": 210.37178862740615, "mean_abs_error_last_25": 237.07670448860236, "mean_abs_error_last_50": 239.49225572758755, "mean_pred_prob": 0.0500017148675397, "mean_pred_prob_last_10": 0.24937419928610324, "mean_pred_prob_last_25": 0.13645472866483033, "mean_pred_prob_last_50": 0.08335313065908849, "mean_token_accuracy": 0.8784484446048737, "step": 38100 }, { "epoch": 0.6774749791122251, "grad_norm": 1.6445894516391923, "learning_rate": 0.0001, "loss": 0.7187, "mean_abs_error": 370.09212445272345, "mean_abs_error_last_10": 255.46075465016415, "mean_abs_error_last_25": 203.72303523494384, "mean_abs_error_last_50": 248.70698809417982, "mean_pred_prob": 0.045615514484234156, "mean_pred_prob_last_10": 0.20365975107997655, "mean_pred_prob_last_25": 0.12003317316994071, "mean_pred_prob_last_50": 0.074740038998425, "mean_token_accuracy": 0.8735147953033447, "step": 38110 }, { "epoch": 0.677652747409027, "grad_norm": 2.3418821394441918, "learning_rate": 0.0001, "loss": 0.8243, "mean_abs_error": 1339.7010558376592, "mean_abs_error_last_10": 747.1144727785335, "mean_abs_error_last_25": 860.5376206140912, "mean_abs_error_last_50": 1008.9432950929116, "mean_pred_prob": 0.028110502725030528, "mean_pred_prob_last_10": 0.1418256811564788, "mean_pred_prob_last_25": 0.0814636170034646, "mean_pred_prob_last_50": 0.04822489145444706, "mean_token_accuracy": 0.864864045381546, "step": 38120 }, { "epoch": 0.677830515705829, "grad_norm": 2.2589207299884033, "learning_rate": 0.0001, "loss": 0.8697, "mean_abs_error": 384.393564765565, "mean_abs_error_last_10": 89.95523816877225, "mean_abs_error_last_25": 149.5273554387314, "mean_abs_error_last_50": 253.26842627962728, "mean_pred_prob": 0.03635364305227995, "mean_pred_prob_last_10": 0.16581978220492602, "mean_pred_prob_last_25": 0.09735652431845665, "mean_pred_prob_last_50": 0.06161660137586296, "mean_token_accuracy": 0.8732831120491028, "step": 38130 }, { "epoch": 0.678008284002631, "grad_norm": 1.240236212999282, "learning_rate": 0.0001, "loss": 0.7609, "mean_abs_error": 350.01662480023043, "mean_abs_error_last_10": 184.58183492693, "mean_abs_error_last_25": 210.57737367957893, "mean_abs_error_last_50": 288.5418214732491, "mean_pred_prob": 0.0505635934881866, "mean_pred_prob_last_10": 0.2375297134742141, "mean_pred_prob_last_25": 0.13629616247490048, "mean_pred_prob_last_50": 0.08462242139503359, "mean_token_accuracy": 0.8669327855110168, "step": 38140 }, { "epoch": 0.678186052299433, "grad_norm": 1.5049481193750425, "learning_rate": 0.0001, "loss": 0.7115, "mean_abs_error": 550.2436369571244, "mean_abs_error_last_10": 153.58727091109125, "mean_abs_error_last_25": 194.35850427062402, "mean_abs_error_last_50": 309.8835324463457, "mean_pred_prob": 0.03333450090140104, "mean_pred_prob_last_10": 0.15870040990412235, "mean_pred_prob_last_25": 0.08578106369823217, "mean_pred_prob_last_50": 0.0531715770252049, "mean_token_accuracy": 0.868798291683197, "step": 38150 }, { "epoch": 0.6783638205962349, "grad_norm": 1.6505863048589722, "learning_rate": 0.0001, "loss": 0.84, "mean_abs_error": 256.34786867666935, "mean_abs_error_last_10": 64.32863287596663, "mean_abs_error_last_25": 182.23163031920848, "mean_abs_error_last_50": 235.2778458046635, "mean_pred_prob": 0.047532723285257816, "mean_pred_prob_last_10": 0.23566647730767726, "mean_pred_prob_last_25": 0.13235706444829703, "mean_pred_prob_last_50": 0.07996005518361926, "mean_token_accuracy": 0.8661285519599915, "step": 38160 }, { "epoch": 0.6785415888930368, "grad_norm": 1.8085417159874861, "learning_rate": 0.0001, "loss": 0.7287, "mean_abs_error": 795.3102740719875, "mean_abs_error_last_10": 380.18173763933385, "mean_abs_error_last_25": 431.48876330286066, "mean_abs_error_last_50": 528.866685299137, "mean_pred_prob": 0.031081622262718156, "mean_pred_prob_last_10": 0.1795358371862676, "mean_pred_prob_last_25": 0.09344813622301444, "mean_pred_prob_last_50": 0.05354235819249879, "mean_token_accuracy": 0.8807890951633454, "step": 38170 }, { "epoch": 0.6787193571898388, "grad_norm": 1.7952455224337625, "learning_rate": 0.0001, "loss": 0.8425, "mean_abs_error": 1526.1907405949692, "mean_abs_error_last_10": 765.3700040741926, "mean_abs_error_last_25": 903.5683802130536, "mean_abs_error_last_50": 1090.8129478516134, "mean_pred_prob": 0.04596416970598512, "mean_pred_prob_last_10": 0.21521162098797503, "mean_pred_prob_last_25": 0.1254175590176601, "mean_pred_prob_last_50": 0.07741273869469296, "mean_token_accuracy": 0.8589844882488251, "step": 38180 }, { "epoch": 0.6788971254866407, "grad_norm": 2.1363102675011647, "learning_rate": 0.0001, "loss": 0.6694, "mean_abs_error": 793.9164902679688, "mean_abs_error_last_10": 179.68908386118818, "mean_abs_error_last_25": 279.39802668126424, "mean_abs_error_last_50": 494.2814196255505, "mean_pred_prob": 0.03602723321528174, "mean_pred_prob_last_10": 0.17052974491380155, "mean_pred_prob_last_25": 0.09382884316146374, "mean_pred_prob_last_50": 0.05886757636326365, "mean_token_accuracy": 0.8790680646896363, "step": 38190 }, { "epoch": 0.6790748937834427, "grad_norm": 1.6334783935952648, "learning_rate": 0.0001, "loss": 0.6983, "mean_abs_error": 201.01368399841155, "mean_abs_error_last_10": 40.345779289386236, "mean_abs_error_last_25": 50.8602284491294, "mean_abs_error_last_50": 125.58816091695682, "mean_pred_prob": 0.053022470138967034, "mean_pred_prob_last_10": 0.26388136520981786, "mean_pred_prob_last_25": 0.14974494390189647, "mean_pred_prob_last_50": 0.09033875595778226, "mean_token_accuracy": 0.8798515200614929, "step": 38200 }, { "epoch": 0.6792526620802446, "grad_norm": 1.659698231225616, "learning_rate": 0.0001, "loss": 0.747, "mean_abs_error": 1039.8922509396125, "mean_abs_error_last_10": 414.37354280896204, "mean_abs_error_last_25": 457.4237483851351, "mean_abs_error_last_50": 610.1880199821504, "mean_pred_prob": 0.04320550941629335, "mean_pred_prob_last_10": 0.19448802566039375, "mean_pred_prob_last_25": 0.11511503418441862, "mean_pred_prob_last_50": 0.07121955164475366, "mean_token_accuracy": 0.8614192485809327, "step": 38210 }, { "epoch": 0.6794304303770465, "grad_norm": 1.5071378014086432, "learning_rate": 0.0001, "loss": 0.7257, "mean_abs_error": 451.526129207679, "mean_abs_error_last_10": 391.73977127432534, "mean_abs_error_last_25": 392.15584469953336, "mean_abs_error_last_50": 415.95568102814303, "mean_pred_prob": 0.04411021639825776, "mean_pred_prob_last_10": 0.21576179994735867, "mean_pred_prob_last_25": 0.1252186382538639, "mean_pred_prob_last_50": 0.0761374777299352, "mean_token_accuracy": 0.8782973527908325, "step": 38220 }, { "epoch": 0.6796081986738485, "grad_norm": 2.364329587668686, "learning_rate": 0.0001, "loss": 0.7513, "mean_abs_error": 449.7336583985674, "mean_abs_error_last_10": 187.99777012067523, "mean_abs_error_last_25": 181.7712156667617, "mean_abs_error_last_50": 253.99781315815454, "mean_pred_prob": 0.03293678391491994, "mean_pred_prob_last_10": 0.1714274617144838, "mean_pred_prob_last_25": 0.09207506987731903, "mean_pred_prob_last_50": 0.055370652792043985, "mean_token_accuracy": 0.8654716849327088, "step": 38230 }, { "epoch": 0.6797859669706504, "grad_norm": 2.3682601506030414, "learning_rate": 0.0001, "loss": 0.7266, "mean_abs_error": 541.9500499052231, "mean_abs_error_last_10": 83.5881034541779, "mean_abs_error_last_25": 153.18509067519935, "mean_abs_error_last_50": 263.7411358683235, "mean_pred_prob": 0.04760518994298764, "mean_pred_prob_last_10": 0.2369266817579046, "mean_pred_prob_last_25": 0.1314744579140097, "mean_pred_prob_last_50": 0.07985997255891561, "mean_token_accuracy": 0.8715231120586395, "step": 38240 }, { "epoch": 0.6799637352674524, "grad_norm": 2.002945091534073, "learning_rate": 0.0001, "loss": 0.724, "mean_abs_error": 311.3574408473004, "mean_abs_error_last_10": 159.05036130019602, "mean_abs_error_last_25": 173.39668551177672, "mean_abs_error_last_50": 220.64928139774935, "mean_pred_prob": 0.037790216720895844, "mean_pred_prob_last_10": 0.19256546427495777, "mean_pred_prob_last_25": 0.10420725850854069, "mean_pred_prob_last_50": 0.06393637083237991, "mean_token_accuracy": 0.8641859710216522, "step": 38250 }, { "epoch": 0.6801415035642544, "grad_norm": 1.569401954484769, "learning_rate": 0.0001, "loss": 0.8026, "mean_abs_error": 308.3761954913342, "mean_abs_error_last_10": 63.118691720387574, "mean_abs_error_last_25": 94.44910186949097, "mean_abs_error_last_50": 202.23352266867818, "mean_pred_prob": 0.04693930535577238, "mean_pred_prob_last_10": 0.24966287799179554, "mean_pred_prob_last_25": 0.13501269361004234, "mean_pred_prob_last_50": 0.08037930419668556, "mean_token_accuracy": 0.8714852929115295, "step": 38260 }, { "epoch": 0.6803192718610563, "grad_norm": 1.1679033845183189, "learning_rate": 0.0001, "loss": 0.6769, "mean_abs_error": 255.05333490704214, "mean_abs_error_last_10": 43.71135659233559, "mean_abs_error_last_25": 68.98118327146899, "mean_abs_error_last_50": 126.64026389623287, "mean_pred_prob": 0.04689056491479278, "mean_pred_prob_last_10": 0.22116233613342046, "mean_pred_prob_last_25": 0.12161202467978001, "mean_pred_prob_last_50": 0.07759200283326209, "mean_token_accuracy": 0.8668549239635468, "step": 38270 }, { "epoch": 0.6804970401578583, "grad_norm": 1.0306739643351923, "learning_rate": 0.0001, "loss": 0.71, "mean_abs_error": 815.9229533983873, "mean_abs_error_last_10": 279.8851054095362, "mean_abs_error_last_25": 299.03880272808, "mean_abs_error_last_50": 412.1738683079346, "mean_pred_prob": 0.016698788898065687, "mean_pred_prob_last_10": 0.09530999595299364, "mean_pred_prob_last_25": 0.04784739069873467, "mean_pred_prob_last_50": 0.028300910152029245, "mean_token_accuracy": 0.8736581027507782, "step": 38280 }, { "epoch": 0.6806748084546602, "grad_norm": 1.564614303884823, "learning_rate": 0.0001, "loss": 0.5967, "mean_abs_error": 172.84201974771568, "mean_abs_error_last_10": 45.873277060927315, "mean_abs_error_last_25": 101.25767403430125, "mean_abs_error_last_50": 146.27446683106297, "mean_pred_prob": 0.05423730560578406, "mean_pred_prob_last_10": 0.25764551013708115, "mean_pred_prob_last_25": 0.1418531863950193, "mean_pred_prob_last_50": 0.0883313688915223, "mean_token_accuracy": 0.884574168920517, "step": 38290 }, { "epoch": 0.6808525767514622, "grad_norm": 1.4007339687386964, "learning_rate": 0.0001, "loss": 0.7164, "mean_abs_error": 455.4779696308836, "mean_abs_error_last_10": 175.7835474141394, "mean_abs_error_last_25": 182.68987009892976, "mean_abs_error_last_50": 224.7237035243018, "mean_pred_prob": 0.03220748137682676, "mean_pred_prob_last_10": 0.15376538578420879, "mean_pred_prob_last_25": 0.08663473455235363, "mean_pred_prob_last_50": 0.05316856321878731, "mean_token_accuracy": 0.8666511178016663, "step": 38300 }, { "epoch": 0.6810303450482641, "grad_norm": 1.8333189920442756, "learning_rate": 0.0001, "loss": 0.6271, "mean_abs_error": 109.77829263455897, "mean_abs_error_last_10": 34.26816673648891, "mean_abs_error_last_25": 49.69360540815653, "mean_abs_error_last_50": 67.50229627152387, "mean_pred_prob": 0.06400018101558089, "mean_pred_prob_last_10": 0.27926933392882347, "mean_pred_prob_last_25": 0.16539720445871353, "mean_pred_prob_last_50": 0.10415405407547951, "mean_token_accuracy": 0.8784770786762237, "step": 38310 }, { "epoch": 0.681208113345066, "grad_norm": 1.6720834650165308, "learning_rate": 0.0001, "loss": 0.6614, "mean_abs_error": 257.5815215434679, "mean_abs_error_last_10": 102.96753227944865, "mean_abs_error_last_25": 107.69325038482293, "mean_abs_error_last_50": 151.44005553072435, "mean_pred_prob": 0.061410814471310006, "mean_pred_prob_last_10": 0.3107150645228103, "mean_pred_prob_last_25": 0.17327749972464518, "mean_pred_prob_last_50": 0.1033975423895754, "mean_token_accuracy": 0.8754364788532257, "step": 38320 }, { "epoch": 0.681385881641868, "grad_norm": 1.5062000527062298, "learning_rate": 0.0001, "loss": 0.7721, "mean_abs_error": 756.0270470637217, "mean_abs_error_last_10": 406.8351065312654, "mean_abs_error_last_25": 479.8099512157713, "mean_abs_error_last_50": 563.4751711338523, "mean_pred_prob": 0.050684765659389085, "mean_pred_prob_last_10": 0.22841149589803536, "mean_pred_prob_last_25": 0.13213409256131853, "mean_pred_prob_last_50": 0.0819555254158331, "mean_token_accuracy": 0.8726060926914215, "step": 38330 }, { "epoch": 0.6815636499386699, "grad_norm": 1.3493172724236806, "learning_rate": 0.0001, "loss": 0.6576, "mean_abs_error": 331.40888580495255, "mean_abs_error_last_10": 44.37939414052455, "mean_abs_error_last_25": 75.86413928833174, "mean_abs_error_last_50": 163.18469114082285, "mean_pred_prob": 0.04169181925244629, "mean_pred_prob_last_10": 0.2109362557530403, "mean_pred_prob_last_25": 0.11746883504092694, "mean_pred_prob_last_50": 0.07058768272399903, "mean_token_accuracy": 0.8747708141803742, "step": 38340 }, { "epoch": 0.6817414182354719, "grad_norm": 1.7435041720934754, "learning_rate": 0.0001, "loss": 0.6744, "mean_abs_error": 419.9522627989646, "mean_abs_error_last_10": 223.3190974160063, "mean_abs_error_last_25": 315.063857764242, "mean_abs_error_last_50": 397.54184351528636, "mean_pred_prob": 0.04339252293575555, "mean_pred_prob_last_10": 0.19169569879304618, "mean_pred_prob_last_25": 0.11122472604038194, "mean_pred_prob_last_50": 0.07052042098948733, "mean_token_accuracy": 0.8749111890792847, "step": 38350 }, { "epoch": 0.6819191865322738, "grad_norm": 2.6648839870603265, "learning_rate": 0.0001, "loss": 0.6651, "mean_abs_error": 438.47094471096005, "mean_abs_error_last_10": 330.25923901186894, "mean_abs_error_last_25": 325.56945696594687, "mean_abs_error_last_50": 305.50467750505544, "mean_pred_prob": 0.03760444252984598, "mean_pred_prob_last_10": 0.18793721571564675, "mean_pred_prob_last_25": 0.1040174343623221, "mean_pred_prob_last_50": 0.06407516815233975, "mean_token_accuracy": 0.878745299577713, "step": 38360 }, { "epoch": 0.6820969548290757, "grad_norm": 1.0211843347172487, "learning_rate": 0.0001, "loss": 0.8225, "mean_abs_error": 1306.53462733064, "mean_abs_error_last_10": 790.4769775000427, "mean_abs_error_last_25": 821.1878011295114, "mean_abs_error_last_50": 978.825678664772, "mean_pred_prob": 0.04345561751397327, "mean_pred_prob_last_10": 0.19812874116614693, "mean_pred_prob_last_25": 0.11597843531199033, "mean_pred_prob_last_50": 0.07276580698235194, "mean_token_accuracy": 0.8771095752716065, "step": 38370 }, { "epoch": 0.6822747231258778, "grad_norm": 2.0700191411054667, "learning_rate": 0.0001, "loss": 0.5878, "mean_abs_error": 366.28493771897314, "mean_abs_error_last_10": 84.66588829134571, "mean_abs_error_last_25": 122.0141201124328, "mean_abs_error_last_50": 216.60474345373095, "mean_pred_prob": 0.03602026416920125, "mean_pred_prob_last_10": 0.1929860383272171, "mean_pred_prob_last_25": 0.10346579309552908, "mean_pred_prob_last_50": 0.061474567651748656, "mean_token_accuracy": 0.8878398656845092, "step": 38380 }, { "epoch": 0.6824524914226797, "grad_norm": 0.9294372936891889, "learning_rate": 0.0001, "loss": 0.7989, "mean_abs_error": 242.04089338070668, "mean_abs_error_last_10": 41.21290405907884, "mean_abs_error_last_25": 64.99894737141776, "mean_abs_error_last_50": 130.56674928325086, "mean_pred_prob": 0.05158575205132365, "mean_pred_prob_last_10": 0.249379157461226, "mean_pred_prob_last_25": 0.1394375362433493, "mean_pred_prob_last_50": 0.08472738396376371, "mean_token_accuracy": 0.8725871026515961, "step": 38390 }, { "epoch": 0.6826302597194817, "grad_norm": 1.6040820093177024, "learning_rate": 0.0001, "loss": 0.7973, "mean_abs_error": 426.74718715368317, "mean_abs_error_last_10": 120.17940602050862, "mean_abs_error_last_25": 249.86005684486298, "mean_abs_error_last_50": 303.00283359629486, "mean_pred_prob": 0.027063098759390412, "mean_pred_prob_last_10": 0.14780557248741388, "mean_pred_prob_last_25": 0.07733084885403514, "mean_pred_prob_last_50": 0.04557492586318403, "mean_token_accuracy": 0.8713133633136749, "step": 38400 }, { "epoch": 0.6828080280162836, "grad_norm": 1.4722984890227935, "learning_rate": 0.0001, "loss": 0.7053, "mean_abs_error": 598.9648846111008, "mean_abs_error_last_10": 220.62559887778662, "mean_abs_error_last_25": 287.18959890373424, "mean_abs_error_last_50": 361.7634128381801, "mean_pred_prob": 0.047974120042636056, "mean_pred_prob_last_10": 0.22806163732893764, "mean_pred_prob_last_25": 0.1323828899418004, "mean_pred_prob_last_50": 0.08096674552070908, "mean_token_accuracy": 0.8706800937652588, "step": 38410 }, { "epoch": 0.6829857963130855, "grad_norm": 1.4416456300170168, "learning_rate": 0.0001, "loss": 0.7888, "mean_abs_error": 353.3131565820671, "mean_abs_error_last_10": 109.53112830353442, "mean_abs_error_last_25": 173.79025326308533, "mean_abs_error_last_50": 264.553115575117, "mean_pred_prob": 0.04139266780111939, "mean_pred_prob_last_10": 0.1975707683712244, "mean_pred_prob_last_25": 0.11221161102876068, "mean_pred_prob_last_50": 0.06823980389162898, "mean_token_accuracy": 0.8520506322383881, "step": 38420 }, { "epoch": 0.6831635646098875, "grad_norm": 1.3775499520115626, "learning_rate": 0.0001, "loss": 0.6357, "mean_abs_error": 430.0216568707371, "mean_abs_error_last_10": 159.40426929799804, "mean_abs_error_last_25": 235.64408053014526, "mean_abs_error_last_50": 269.1051439770445, "mean_pred_prob": 0.031132154597435145, "mean_pred_prob_last_10": 0.14811294623650612, "mean_pred_prob_last_25": 0.08394448403269053, "mean_pred_prob_last_50": 0.05190578072797507, "mean_token_accuracy": 0.8742991089820862, "step": 38430 }, { "epoch": 0.6833413329066894, "grad_norm": 1.6237027502224863, "learning_rate": 0.0001, "loss": 0.6465, "mean_abs_error": 436.4772768619391, "mean_abs_error_last_10": 121.6025692780689, "mean_abs_error_last_25": 190.9843454697974, "mean_abs_error_last_50": 249.1154934264827, "mean_pred_prob": 0.034203286794945595, "mean_pred_prob_last_10": 0.18090758863836526, "mean_pred_prob_last_25": 0.09763920940458774, "mean_pred_prob_last_50": 0.057511892169713974, "mean_token_accuracy": 0.8732068181037903, "step": 38440 }, { "epoch": 0.6835191012034914, "grad_norm": 2.747138003638963, "learning_rate": 0.0001, "loss": 0.7535, "mean_abs_error": 569.6828706132351, "mean_abs_error_last_10": 278.5375073829145, "mean_abs_error_last_25": 336.4962333397487, "mean_abs_error_last_50": 372.35195869829545, "mean_pred_prob": 0.042931276961462574, "mean_pred_prob_last_10": 0.1719768983661197, "mean_pred_prob_last_25": 0.11095824800431728, "mean_pred_prob_last_50": 0.07129696465563029, "mean_token_accuracy": 0.8651165246963501, "step": 38450 }, { "epoch": 0.6836968695002933, "grad_norm": 1.823595481605162, "learning_rate": 0.0001, "loss": 0.8939, "mean_abs_error": 196.29648292536294, "mean_abs_error_last_10": 56.32428119945706, "mean_abs_error_last_25": 67.1427513873212, "mean_abs_error_last_50": 112.19304809154121, "mean_pred_prob": 0.04102216577157378, "mean_pred_prob_last_10": 0.22461473271250726, "mean_pred_prob_last_25": 0.12068002130836249, "mean_pred_prob_last_50": 0.07086161337792873, "mean_token_accuracy": 0.8751230955123901, "step": 38460 }, { "epoch": 0.6838746377970952, "grad_norm": 1.0881493960357944, "learning_rate": 0.0001, "loss": 0.8548, "mean_abs_error": 820.4004032323807, "mean_abs_error_last_10": 441.7059358581806, "mean_abs_error_last_25": 624.6123108428891, "mean_abs_error_last_50": 661.7585560791733, "mean_pred_prob": 0.013123106735292822, "mean_pred_prob_last_10": 0.08156551632564515, "mean_pred_prob_last_25": 0.03907207256415859, "mean_pred_prob_last_50": 0.02252060720929876, "mean_token_accuracy": 0.8794080257415772, "step": 38470 }, { "epoch": 0.6840524060938972, "grad_norm": 2.1740197314192766, "learning_rate": 0.0001, "loss": 0.6945, "mean_abs_error": 1335.9598397206141, "mean_abs_error_last_10": 647.6440924808477, "mean_abs_error_last_25": 782.1656044334916, "mean_abs_error_last_50": 946.8742972165774, "mean_pred_prob": 0.034352528367890045, "mean_pred_prob_last_10": 0.17679042015515734, "mean_pred_prob_last_25": 0.09347601902554743, "mean_pred_prob_last_50": 0.057573303568642584, "mean_token_accuracy": 0.880508416891098, "step": 38480 }, { "epoch": 0.6842301743906991, "grad_norm": 1.8063023351229173, "learning_rate": 0.0001, "loss": 0.7688, "mean_abs_error": 316.5638944025679, "mean_abs_error_last_10": 64.25007932964242, "mean_abs_error_last_25": 104.19162394058245, "mean_abs_error_last_50": 173.58911462828627, "mean_pred_prob": 0.04451134265400469, "mean_pred_prob_last_10": 0.22735762130469084, "mean_pred_prob_last_25": 0.12578943045809865, "mean_pred_prob_last_50": 0.0740417969878763, "mean_token_accuracy": 0.8648608446121215, "step": 38490 }, { "epoch": 0.6844079426875012, "grad_norm": 1.46596564120419, "learning_rate": 0.0001, "loss": 0.761, "mean_abs_error": 345.0398672805535, "mean_abs_error_last_10": 70.0450905079817, "mean_abs_error_last_25": 194.71837121370407, "mean_abs_error_last_50": 261.3039913358217, "mean_pred_prob": 0.033123301016166805, "mean_pred_prob_last_10": 0.16945842355489732, "mean_pred_prob_last_25": 0.09242554577067494, "mean_pred_prob_last_50": 0.05576352281495929, "mean_token_accuracy": 0.8670711576938629, "step": 38500 }, { "epoch": 0.6845857109843031, "grad_norm": 2.6147949179455416, "learning_rate": 0.0001, "loss": 0.7846, "mean_abs_error": 455.1712430717087, "mean_abs_error_last_10": 228.57185221792264, "mean_abs_error_last_25": 367.25059630552244, "mean_abs_error_last_50": 370.84265805352345, "mean_pred_prob": 0.03786049934569746, "mean_pred_prob_last_10": 0.16637549018487335, "mean_pred_prob_last_25": 0.09738084175623954, "mean_pred_prob_last_50": 0.06218744576908648, "mean_token_accuracy": 0.8771290421485901, "step": 38510 }, { "epoch": 0.684763479281105, "grad_norm": 3.5360616202817914, "learning_rate": 0.0001, "loss": 0.6923, "mean_abs_error": 264.5519470119889, "mean_abs_error_last_10": 21.892211784033925, "mean_abs_error_last_25": 51.404939257104274, "mean_abs_error_last_50": 121.36714580597727, "mean_pred_prob": 0.06859150165691971, "mean_pred_prob_last_10": 0.29039476476609705, "mean_pred_prob_last_25": 0.17869628788903355, "mean_pred_prob_last_50": 0.11402596537955105, "mean_token_accuracy": 0.8645317494869232, "step": 38520 }, { "epoch": 0.684941247577907, "grad_norm": 1.7387955633447079, "learning_rate": 0.0001, "loss": 0.7396, "mean_abs_error": 958.1439886911278, "mean_abs_error_last_10": 212.28402650429183, "mean_abs_error_last_25": 288.23347703446336, "mean_abs_error_last_50": 530.4839611832275, "mean_pred_prob": 0.029818021185928956, "mean_pred_prob_last_10": 0.13105366143863648, "mean_pred_prob_last_25": 0.07563309218967333, "mean_pred_prob_last_50": 0.04834049263736233, "mean_token_accuracy": 0.8695839583873749, "step": 38530 }, { "epoch": 0.6851190158747089, "grad_norm": 1.3967113280952834, "learning_rate": 0.0001, "loss": 0.7329, "mean_abs_error": 332.0589855535978, "mean_abs_error_last_10": 86.17209979483073, "mean_abs_error_last_25": 116.84388713599576, "mean_abs_error_last_50": 158.50696102737325, "mean_pred_prob": 0.03182242093607783, "mean_pred_prob_last_10": 0.15208867974579335, "mean_pred_prob_last_25": 0.08697427269071341, "mean_pred_prob_last_50": 0.0521265416406095, "mean_token_accuracy": 0.8795765817165375, "step": 38540 }, { "epoch": 0.6852967841715109, "grad_norm": 1.7707852385489966, "learning_rate": 0.0001, "loss": 0.8547, "mean_abs_error": 1520.0393505080533, "mean_abs_error_last_10": 988.6214911514156, "mean_abs_error_last_25": 1053.0945175084544, "mean_abs_error_last_50": 1173.1834316913328, "mean_pred_prob": 0.0235554994767881, "mean_pred_prob_last_10": 0.10358710400032578, "mean_pred_prob_last_25": 0.058770524339342954, "mean_pred_prob_last_50": 0.0378500993232592, "mean_token_accuracy": 0.8723326742649078, "step": 38550 }, { "epoch": 0.6854745524683128, "grad_norm": 1.8337928814874032, "learning_rate": 0.0001, "loss": 0.6678, "mean_abs_error": 483.8322036349717, "mean_abs_error_last_10": 105.01048167942729, "mean_abs_error_last_25": 175.83305440498822, "mean_abs_error_last_50": 300.3753155426136, "mean_pred_prob": 0.04872843608027324, "mean_pred_prob_last_10": 0.24090255638584496, "mean_pred_prob_last_25": 0.1321023107273504, "mean_pred_prob_last_50": 0.07980446515139192, "mean_token_accuracy": 0.8757561028003693, "step": 38560 }, { "epoch": 0.6856523207651147, "grad_norm": 1.0535634631568633, "learning_rate": 0.0001, "loss": 0.7902, "mean_abs_error": 550.6399678829671, "mean_abs_error_last_10": 88.61012503940064, "mean_abs_error_last_25": 179.94675255515537, "mean_abs_error_last_50": 334.5496317837618, "mean_pred_prob": 0.027538583124987782, "mean_pred_prob_last_10": 0.149541637301445, "mean_pred_prob_last_25": 0.0803958361968398, "mean_pred_prob_last_50": 0.0474583019502461, "mean_token_accuracy": 0.869218897819519, "step": 38570 }, { "epoch": 0.6858300890619167, "grad_norm": 1.2053334048379698, "learning_rate": 0.0001, "loss": 0.6067, "mean_abs_error": 282.59693790830124, "mean_abs_error_last_10": 138.26701150749824, "mean_abs_error_last_25": 144.71012290492226, "mean_abs_error_last_50": 187.19650616101066, "mean_pred_prob": 0.028288083034567535, "mean_pred_prob_last_10": 0.14708928978070618, "mean_pred_prob_last_25": 0.07897243592888117, "mean_pred_prob_last_50": 0.04834781815297902, "mean_token_accuracy": 0.8722106158733368, "step": 38580 }, { "epoch": 0.6860078573587186, "grad_norm": 1.454411820428126, "learning_rate": 0.0001, "loss": 0.636, "mean_abs_error": 132.6803054353996, "mean_abs_error_last_10": 37.1012423130491, "mean_abs_error_last_25": 45.87651398850176, "mean_abs_error_last_50": 79.66889567072296, "mean_pred_prob": 0.05934536419808865, "mean_pred_prob_last_10": 0.27932345159351823, "mean_pred_prob_last_25": 0.16216078735888004, "mean_pred_prob_last_50": 0.10039637880399824, "mean_token_accuracy": 0.8804852366447449, "step": 38590 }, { "epoch": 0.6861856256555205, "grad_norm": 1.7541617231041595, "learning_rate": 0.0001, "loss": 0.8399, "mean_abs_error": 991.9656572813353, "mean_abs_error_last_10": 403.9318670017623, "mean_abs_error_last_25": 525.0992685530213, "mean_abs_error_last_50": 709.0171500657663, "mean_pred_prob": 0.026903281267732383, "mean_pred_prob_last_10": 0.13786022245476487, "mean_pred_prob_last_25": 0.07492005753447302, "mean_pred_prob_last_50": 0.04401058844814543, "mean_token_accuracy": 0.8731173276901245, "step": 38600 }, { "epoch": 0.6863633939523226, "grad_norm": 1.3460034082479193, "learning_rate": 0.0001, "loss": 0.683, "mean_abs_error": 501.4617234591431, "mean_abs_error_last_10": 124.77923166626883, "mean_abs_error_last_25": 210.37924298191746, "mean_abs_error_last_50": 339.25782658220106, "mean_pred_prob": 0.01309129511937499, "mean_pred_prob_last_10": 0.07910209558904172, "mean_pred_prob_last_25": 0.04013745840638876, "mean_pred_prob_last_50": 0.023388042487204073, "mean_token_accuracy": 0.8760302245616913, "step": 38610 }, { "epoch": 0.6865411622491245, "grad_norm": 1.4470721774997617, "learning_rate": 0.0001, "loss": 0.9654, "mean_abs_error": 163.18669519946812, "mean_abs_error_last_10": 50.27510775102545, "mean_abs_error_last_25": 65.91724417850283, "mean_abs_error_last_50": 105.02928518949845, "mean_pred_prob": 0.05437771058641374, "mean_pred_prob_last_10": 0.25222763419151306, "mean_pred_prob_last_25": 0.14287037644535303, "mean_pred_prob_last_50": 0.08902352014556528, "mean_token_accuracy": 0.8701078116893768, "step": 38620 }, { "epoch": 0.6867189305459265, "grad_norm": 1.1246754867872195, "learning_rate": 0.0001, "loss": 0.7168, "mean_abs_error": 1153.7204630998685, "mean_abs_error_last_10": 359.70295999763414, "mean_abs_error_last_25": 555.7847307953809, "mean_abs_error_last_50": 784.4224626083994, "mean_pred_prob": 0.04294455325580202, "mean_pred_prob_last_10": 0.21961030658567324, "mean_pred_prob_last_25": 0.12148378087440506, "mean_pred_prob_last_50": 0.07245917358086444, "mean_token_accuracy": 0.8775511920452118, "step": 38630 }, { "epoch": 0.6868966988427284, "grad_norm": 2.402466839471359, "learning_rate": 0.0001, "loss": 0.807, "mean_abs_error": 635.3582452058643, "mean_abs_error_last_10": 189.8186085127894, "mean_abs_error_last_25": 380.37301539270527, "mean_abs_error_last_50": 445.83287371921296, "mean_pred_prob": 0.02230245580431074, "mean_pred_prob_last_10": 0.11185913067311049, "mean_pred_prob_last_25": 0.06107226926833391, "mean_pred_prob_last_50": 0.03707881080918014, "mean_token_accuracy": 0.8600165367126464, "step": 38640 }, { "epoch": 0.6870744671395304, "grad_norm": 1.7227572142647103, "learning_rate": 0.0001, "loss": 0.7475, "mean_abs_error": 263.6825320264246, "mean_abs_error_last_10": 38.25685135966804, "mean_abs_error_last_25": 67.85048162624986, "mean_abs_error_last_50": 143.92807862987874, "mean_pred_prob": 0.047022958798334, "mean_pred_prob_last_10": 0.22919304370880128, "mean_pred_prob_last_25": 0.1288547079078853, "mean_pred_prob_last_50": 0.07876446740701795, "mean_token_accuracy": 0.872605437040329, "step": 38650 }, { "epoch": 0.6872522354363323, "grad_norm": 1.8739692186150514, "learning_rate": 0.0001, "loss": 0.7688, "mean_abs_error": 711.2888457325597, "mean_abs_error_last_10": 290.8946023321111, "mean_abs_error_last_25": 390.5314495120231, "mean_abs_error_last_50": 490.8959055015989, "mean_pred_prob": 0.033239545140531845, "mean_pred_prob_last_10": 0.1722976751741953, "mean_pred_prob_last_25": 0.09346385853714309, "mean_pred_prob_last_50": 0.05613743588910438, "mean_token_accuracy": 0.8645604491233826, "step": 38660 }, { "epoch": 0.6874300037331342, "grad_norm": 0.9169786388431244, "learning_rate": 0.0001, "loss": 0.7018, "mean_abs_error": 837.300761683415, "mean_abs_error_last_10": 375.2003216099671, "mean_abs_error_last_25": 443.0639156701159, "mean_abs_error_last_50": 551.9295059432196, "mean_pred_prob": 0.03677577222697437, "mean_pred_prob_last_10": 0.17391796501469797, "mean_pred_prob_last_25": 0.09809237833833322, "mean_pred_prob_last_50": 0.060809877543943, "mean_token_accuracy": 0.8777101278305054, "step": 38670 }, { "epoch": 0.6876077720299362, "grad_norm": 1.3114939996189838, "learning_rate": 0.0001, "loss": 0.7527, "mean_abs_error": 316.083089116595, "mean_abs_error_last_10": 87.66225063807526, "mean_abs_error_last_25": 104.42224208433231, "mean_abs_error_last_50": 158.8657677502177, "mean_pred_prob": 0.03467146821785718, "mean_pred_prob_last_10": 0.18029799708165228, "mean_pred_prob_last_25": 0.09858778358902782, "mean_pred_prob_last_50": 0.058672964235302064, "mean_token_accuracy": 0.8623670935630798, "step": 38680 }, { "epoch": 0.6877855403267381, "grad_norm": 2.909517199700631, "learning_rate": 0.0001, "loss": 0.7758, "mean_abs_error": 513.7598403149159, "mean_abs_error_last_10": 241.1788762759396, "mean_abs_error_last_25": 268.2139144795152, "mean_abs_error_last_50": 345.98674874217943, "mean_pred_prob": 0.03821269311010837, "mean_pred_prob_last_10": 0.15781838838011025, "mean_pred_prob_last_25": 0.09784998176619411, "mean_pred_prob_last_50": 0.06254709153436125, "mean_token_accuracy": 0.8691299200057984, "step": 38690 }, { "epoch": 0.68796330862354, "grad_norm": 1.7596177413361143, "learning_rate": 0.0001, "loss": 0.7944, "mean_abs_error": 668.4440199381778, "mean_abs_error_last_10": 167.62685574434016, "mean_abs_error_last_25": 184.9881093973324, "mean_abs_error_last_50": 326.12312357881336, "mean_pred_prob": 0.030996959086041897, "mean_pred_prob_last_10": 0.156999284401536, "mean_pred_prob_last_25": 0.08561222155112773, "mean_pred_prob_last_50": 0.05194535778136924, "mean_token_accuracy": 0.8706675827503204, "step": 38700 }, { "epoch": 0.688141076920342, "grad_norm": 2.410514729705674, "learning_rate": 0.0001, "loss": 0.717, "mean_abs_error": 716.8045282559851, "mean_abs_error_last_10": 205.32948173249957, "mean_abs_error_last_25": 383.01580417809475, "mean_abs_error_last_50": 550.6512596920993, "mean_pred_prob": 0.036273091356270014, "mean_pred_prob_last_10": 0.18489300017245114, "mean_pred_prob_last_25": 0.10227307726163418, "mean_pred_prob_last_50": 0.06154608863871545, "mean_token_accuracy": 0.8700637757778168, "step": 38710 }, { "epoch": 0.6883188452171439, "grad_norm": 2.024698766325585, "learning_rate": 0.0001, "loss": 0.8133, "mean_abs_error": 636.4040179004844, "mean_abs_error_last_10": 128.8937749945624, "mean_abs_error_last_25": 174.34967631133603, "mean_abs_error_last_50": 348.17930149275094, "mean_pred_prob": 0.025629947579000147, "mean_pred_prob_last_10": 0.13532536101993173, "mean_pred_prob_last_25": 0.07505039547104389, "mean_pred_prob_last_50": 0.04417794169858098, "mean_token_accuracy": 0.870430713891983, "step": 38720 }, { "epoch": 0.688496613513946, "grad_norm": 2.0529591755364542, "learning_rate": 0.0001, "loss": 0.8335, "mean_abs_error": 416.6499809948501, "mean_abs_error_last_10": 86.96542515900943, "mean_abs_error_last_25": 130.57367895335815, "mean_abs_error_last_50": 234.43825097046874, "mean_pred_prob": 0.038769563450478016, "mean_pred_prob_last_10": 0.17650587745010854, "mean_pred_prob_last_25": 0.10219550020992756, "mean_pred_prob_last_50": 0.06272501796483994, "mean_token_accuracy": 0.8732093572616577, "step": 38730 }, { "epoch": 0.6886743818107479, "grad_norm": 0.9862950145755398, "learning_rate": 0.0001, "loss": 0.8625, "mean_abs_error": 310.40229483172664, "mean_abs_error_last_10": 82.51459952565634, "mean_abs_error_last_25": 129.70973337496773, "mean_abs_error_last_50": 208.99540011390087, "mean_pred_prob": 0.043405033322051166, "mean_pred_prob_last_10": 0.19571744911372663, "mean_pred_prob_last_25": 0.11833270387724042, "mean_pred_prob_last_50": 0.07424683179706335, "mean_token_accuracy": 0.8710167884826661, "step": 38740 }, { "epoch": 0.6888521501075499, "grad_norm": 1.3518802398362484, "learning_rate": 0.0001, "loss": 0.6876, "mean_abs_error": 525.6391857980448, "mean_abs_error_last_10": 382.90523371383506, "mean_abs_error_last_25": 399.9226642317416, "mean_abs_error_last_50": 449.1471359989596, "mean_pred_prob": 0.030462995899142697, "mean_pred_prob_last_10": 0.16352866031229496, "mean_pred_prob_last_25": 0.08923808039398864, "mean_pred_prob_last_50": 0.05257119596935809, "mean_token_accuracy": 0.8815990507602691, "step": 38750 }, { "epoch": 0.6890299184043518, "grad_norm": 1.2519168237982363, "learning_rate": 0.0001, "loss": 0.6447, "mean_abs_error": 177.74803500718036, "mean_abs_error_last_10": 72.61318873441692, "mean_abs_error_last_25": 86.93203850261281, "mean_abs_error_last_50": 115.79585673787786, "mean_pred_prob": 0.03712976910173893, "mean_pred_prob_last_10": 0.19428187869489194, "mean_pred_prob_last_25": 0.10411375183612108, "mean_pred_prob_last_50": 0.06242957329377532, "mean_token_accuracy": 0.8703931570053101, "step": 38760 }, { "epoch": 0.6892076867011537, "grad_norm": 2.3869329579426637, "learning_rate": 0.0001, "loss": 0.7209, "mean_abs_error": 483.2955111226405, "mean_abs_error_last_10": 295.4861844908239, "mean_abs_error_last_25": 427.2738184986588, "mean_abs_error_last_50": 427.2151278806288, "mean_pred_prob": 0.04572051479481161, "mean_pred_prob_last_10": 0.2142094816081226, "mean_pred_prob_last_25": 0.12372749226633459, "mean_pred_prob_last_50": 0.07643202792387456, "mean_token_accuracy": 0.8676041662693024, "step": 38770 }, { "epoch": 0.6893854549979557, "grad_norm": 1.7212397529375316, "learning_rate": 0.0001, "loss": 0.6307, "mean_abs_error": 235.2906301724719, "mean_abs_error_last_10": 54.71654261742888, "mean_abs_error_last_25": 74.66396065139887, "mean_abs_error_last_50": 135.26731362227682, "mean_pred_prob": 0.03846953171305358, "mean_pred_prob_last_10": 0.2093655861914158, "mean_pred_prob_last_25": 0.11191261168569326, "mean_pred_prob_last_50": 0.06665421361103654, "mean_token_accuracy": 0.883086746931076, "step": 38780 }, { "epoch": 0.6895632232947576, "grad_norm": 1.0714083344712724, "learning_rate": 0.0001, "loss": 0.8065, "mean_abs_error": 740.7907341479602, "mean_abs_error_last_10": 379.6047116029718, "mean_abs_error_last_25": 417.63882930899547, "mean_abs_error_last_50": 443.0855248347666, "mean_pred_prob": 0.029815847106510772, "mean_pred_prob_last_10": 0.13524678194662557, "mean_pred_prob_last_25": 0.07657613002229482, "mean_pred_prob_last_50": 0.048735517979366705, "mean_token_accuracy": 0.8563996970653533, "step": 38790 }, { "epoch": 0.6897409915915595, "grad_norm": 1.328098259109059, "learning_rate": 0.0001, "loss": 0.6782, "mean_abs_error": 517.6040536842883, "mean_abs_error_last_10": 173.9848692105191, "mean_abs_error_last_25": 252.38066360658686, "mean_abs_error_last_50": 279.3850695726772, "mean_pred_prob": 0.05300169877009466, "mean_pred_prob_last_10": 0.21006469461135566, "mean_pred_prob_last_25": 0.13675643082242459, "mean_pred_prob_last_50": 0.0875381359946914, "mean_token_accuracy": 0.872655165195465, "step": 38800 }, { "epoch": 0.6899187598883615, "grad_norm": 3.5616585537542895, "learning_rate": 0.0001, "loss": 0.7356, "mean_abs_error": 126.43755375174212, "mean_abs_error_last_10": 21.598495527243983, "mean_abs_error_last_25": 65.38609954989336, "mean_abs_error_last_50": 105.86427389852358, "mean_pred_prob": 0.05536363576538861, "mean_pred_prob_last_10": 0.2697436321526766, "mean_pred_prob_last_25": 0.15455775633454322, "mean_pred_prob_last_50": 0.09453752748668194, "mean_token_accuracy": 0.8656714379787445, "step": 38810 }, { "epoch": 0.6900965281851634, "grad_norm": 1.520734340799557, "learning_rate": 0.0001, "loss": 0.8045, "mean_abs_error": 1277.89782874192, "mean_abs_error_last_10": 646.0293631923669, "mean_abs_error_last_25": 755.5076385285788, "mean_abs_error_last_50": 882.1660520455332, "mean_pred_prob": 0.0315538684517378, "mean_pred_prob_last_10": 0.15950685567804612, "mean_pred_prob_last_25": 0.08526351021428127, "mean_pred_prob_last_50": 0.05170670897350647, "mean_token_accuracy": 0.8684730887413025, "step": 38820 }, { "epoch": 0.6902742964819654, "grad_norm": 1.3924591542413336, "learning_rate": 0.0001, "loss": 0.6188, "mean_abs_error": 150.699670325833, "mean_abs_error_last_10": 42.65744132200547, "mean_abs_error_last_25": 62.82216053775144, "mean_abs_error_last_50": 103.152518601497, "mean_pred_prob": 0.04562890660017729, "mean_pred_prob_last_10": 0.21721951328217984, "mean_pred_prob_last_25": 0.12466728091239929, "mean_pred_prob_last_50": 0.07691738130524754, "mean_token_accuracy": 0.882329523563385, "step": 38830 }, { "epoch": 0.6904520647787673, "grad_norm": 1.1228260239780858, "learning_rate": 0.0001, "loss": 0.7271, "mean_abs_error": 472.6677673396201, "mean_abs_error_last_10": 130.10305030531674, "mean_abs_error_last_25": 154.7977019676112, "mean_abs_error_last_50": 227.49919165970647, "mean_pred_prob": 0.04055981657002121, "mean_pred_prob_last_10": 0.18638793807476758, "mean_pred_prob_last_25": 0.11150263715535402, "mean_pred_prob_last_50": 0.06869250922463835, "mean_token_accuracy": 0.8770815908908844, "step": 38840 }, { "epoch": 0.6906298330755694, "grad_norm": 1.5343456429810878, "learning_rate": 0.0001, "loss": 1.0508, "mean_abs_error": 321.3317191824827, "mean_abs_error_last_10": 116.17752631695234, "mean_abs_error_last_25": 165.43113379056317, "mean_abs_error_last_50": 231.63470535357754, "mean_pred_prob": 0.02964378912001848, "mean_pred_prob_last_10": 0.14044811017811298, "mean_pred_prob_last_25": 0.07889513298869133, "mean_pred_prob_last_50": 0.04873515646904707, "mean_token_accuracy": 0.8699970066547393, "step": 38850 }, { "epoch": 0.6908076013723713, "grad_norm": 1.3895655636914668, "learning_rate": 0.0001, "loss": 0.701, "mean_abs_error": 58.10886763135194, "mean_abs_error_last_10": 7.330654362339333, "mean_abs_error_last_25": 18.746829323457238, "mean_abs_error_last_50": 38.76004239437539, "mean_pred_prob": 0.07123260609805585, "mean_pred_prob_last_10": 0.3573734909296036, "mean_pred_prob_last_25": 0.19675636366009713, "mean_pred_prob_last_50": 0.11885287389159202, "mean_token_accuracy": 0.8795853674411773, "step": 38860 }, { "epoch": 0.6909853696691732, "grad_norm": 1.610841341282075, "learning_rate": 0.0001, "loss": 0.6972, "mean_abs_error": 468.2288552585028, "mean_abs_error_last_10": 163.5452733727098, "mean_abs_error_last_25": 196.54084380985694, "mean_abs_error_last_50": 285.54743576626436, "mean_pred_prob": 0.05170185955939814, "mean_pred_prob_last_10": 0.23405219039414077, "mean_pred_prob_last_25": 0.13577495085773988, "mean_pred_prob_last_50": 0.08509649723418988, "mean_token_accuracy": 0.8745690524578095, "step": 38870 }, { "epoch": 0.6911631379659752, "grad_norm": 1.4022201287694567, "learning_rate": 0.0001, "loss": 0.7473, "mean_abs_error": 1108.1243891188606, "mean_abs_error_last_10": 410.25159481481717, "mean_abs_error_last_25": 572.9629993838975, "mean_abs_error_last_50": 794.3354948549561, "mean_pred_prob": 0.017607678777130785, "mean_pred_prob_last_10": 0.09207628030853812, "mean_pred_prob_last_25": 0.049856786767486484, "mean_pred_prob_last_50": 0.029797791028977373, "mean_token_accuracy": 0.8701156258583069, "step": 38880 }, { "epoch": 0.6913409062627771, "grad_norm": 1.7880483680823986, "learning_rate": 0.0001, "loss": 0.6884, "mean_abs_error": 648.7144560978962, "mean_abs_error_last_10": 265.50050028244027, "mean_abs_error_last_25": 318.4882626643452, "mean_abs_error_last_50": 414.5494463286773, "mean_pred_prob": 0.042147297263727526, "mean_pred_prob_last_10": 0.20597817934467458, "mean_pred_prob_last_25": 0.11691296339267865, "mean_pred_prob_last_50": 0.07182351401133928, "mean_token_accuracy": 0.8740874409675599, "step": 38890 }, { "epoch": 0.691518674559579, "grad_norm": 1.7204782853151666, "learning_rate": 0.0001, "loss": 0.6536, "mean_abs_error": 317.1694578021902, "mean_abs_error_last_10": 155.44323682893634, "mean_abs_error_last_25": 177.39133852326466, "mean_abs_error_last_50": 226.47917017888395, "mean_pred_prob": 0.05020194565877319, "mean_pred_prob_last_10": 0.23422840479761362, "mean_pred_prob_last_25": 0.1347745379898697, "mean_pred_prob_last_50": 0.08341693372931332, "mean_token_accuracy": 0.8712851822376251, "step": 38900 }, { "epoch": 0.691696442856381, "grad_norm": 1.2406038456800386, "learning_rate": 0.0001, "loss": 0.7437, "mean_abs_error": 485.96972775223855, "mean_abs_error_last_10": 146.3110568313313, "mean_abs_error_last_25": 177.44305397377553, "mean_abs_error_last_50": 211.10565505728746, "mean_pred_prob": 0.04074006725568324, "mean_pred_prob_last_10": 0.17353974673897027, "mean_pred_prob_last_25": 0.10748947653919458, "mean_pred_prob_last_50": 0.06862980634905398, "mean_token_accuracy": 0.8710385262966156, "step": 38910 }, { "epoch": 0.6918742111531829, "grad_norm": 1.530711227657493, "learning_rate": 0.0001, "loss": 0.6294, "mean_abs_error": 314.93497356961973, "mean_abs_error_last_10": 106.17120005486338, "mean_abs_error_last_25": 129.53670816100652, "mean_abs_error_last_50": 179.32677377725742, "mean_pred_prob": 0.04731868540402502, "mean_pred_prob_last_10": 0.2215689001372084, "mean_pred_prob_last_25": 0.12517612932715566, "mean_pred_prob_last_50": 0.07756048192968593, "mean_token_accuracy": 0.8705016851425171, "step": 38920 }, { "epoch": 0.6920519794499849, "grad_norm": 1.593053338119572, "learning_rate": 0.0001, "loss": 0.7698, "mean_abs_error": 291.7846973011852, "mean_abs_error_last_10": 62.71687497026634, "mean_abs_error_last_25": 139.10497616371273, "mean_abs_error_last_50": 226.75347975247004, "mean_pred_prob": 0.03914026562124491, "mean_pred_prob_last_10": 0.19542713239789009, "mean_pred_prob_last_25": 0.10426049176603555, "mean_pred_prob_last_50": 0.06464048800989985, "mean_token_accuracy": 0.8672810912132263, "step": 38930 }, { "epoch": 0.6922297477467868, "grad_norm": 2.2996460374493934, "learning_rate": 0.0001, "loss": 0.7765, "mean_abs_error": 131.82462894915994, "mean_abs_error_last_10": 55.807564751752466, "mean_abs_error_last_25": 51.3763867565461, "mean_abs_error_last_50": 69.09640362240465, "mean_pred_prob": 0.06598430844023824, "mean_pred_prob_last_10": 0.31711791418492796, "mean_pred_prob_last_25": 0.17784761767834426, "mean_pred_prob_last_50": 0.11004253495484591, "mean_token_accuracy": 0.8682303786277771, "step": 38940 }, { "epoch": 0.6924075160435887, "grad_norm": 1.2453401770716177, "learning_rate": 0.0001, "loss": 0.7287, "mean_abs_error": 956.2468066063014, "mean_abs_error_last_10": 273.19989057494286, "mean_abs_error_last_25": 491.9702037810597, "mean_abs_error_last_50": 628.3717568037066, "mean_pred_prob": 0.024448748625582085, "mean_pred_prob_last_10": 0.11515774159342981, "mean_pred_prob_last_25": 0.06537481503328309, "mean_pred_prob_last_50": 0.04062796246726066, "mean_token_accuracy": 0.8765618622303009, "step": 38950 }, { "epoch": 0.6925852843403907, "grad_norm": 1.0722606150700311, "learning_rate": 0.0001, "loss": 0.7321, "mean_abs_error": 206.95850664528365, "mean_abs_error_last_10": 51.3270458366904, "mean_abs_error_last_25": 63.39287179418261, "mean_abs_error_last_50": 113.05439876473186, "mean_pred_prob": 0.041827995656058194, "mean_pred_prob_last_10": 0.21689246762543918, "mean_pred_prob_last_25": 0.11796870827674866, "mean_pred_prob_last_50": 0.07173298140987754, "mean_token_accuracy": 0.8702285826206207, "step": 38960 }, { "epoch": 0.6927630526371927, "grad_norm": 0.9203032416673761, "learning_rate": 0.0001, "loss": 0.7493, "mean_abs_error": 870.2912169583333, "mean_abs_error_last_10": 544.0796977713602, "mean_abs_error_last_25": 643.0440835271827, "mean_abs_error_last_50": 726.153852323899, "mean_pred_prob": 0.055074086268723475, "mean_pred_prob_last_10": 0.2556390675948933, "mean_pred_prob_last_25": 0.1504651675160858, "mean_pred_prob_last_50": 0.09263621583959321, "mean_token_accuracy": 0.8694168031215668, "step": 38970 }, { "epoch": 0.6929408209339947, "grad_norm": 1.247359809429343, "learning_rate": 0.0001, "loss": 0.6948, "mean_abs_error": 385.75670022101707, "mean_abs_error_last_10": 143.04909199259353, "mean_abs_error_last_25": 196.80122513899204, "mean_abs_error_last_50": 263.0471905703328, "mean_pred_prob": 0.046789253340102734, "mean_pred_prob_last_10": 0.21248997915536166, "mean_pred_prob_last_25": 0.12433466296643018, "mean_pred_prob_last_50": 0.07736310287145898, "mean_token_accuracy": 0.8688698172569275, "step": 38980 }, { "epoch": 0.6931185892307966, "grad_norm": 1.9287600690536855, "learning_rate": 0.0001, "loss": 0.6656, "mean_abs_error": 1026.4683405238743, "mean_abs_error_last_10": 561.1228467487286, "mean_abs_error_last_25": 626.183567148146, "mean_abs_error_last_50": 752.1966807158428, "mean_pred_prob": 0.04697703879355686, "mean_pred_prob_last_10": 0.22723853336647154, "mean_pred_prob_last_25": 0.12938989699759987, "mean_pred_prob_last_50": 0.07849855449458118, "mean_token_accuracy": 0.8744845509529113, "step": 38990 }, { "epoch": 0.6932963575275986, "grad_norm": 1.5171490084942847, "learning_rate": 0.0001, "loss": 0.6374, "mean_abs_error": 506.4224162997149, "mean_abs_error_last_10": 218.74155140719049, "mean_abs_error_last_25": 331.49003823583035, "mean_abs_error_last_50": 414.29250220425536, "mean_pred_prob": 0.038281023828312756, "mean_pred_prob_last_10": 0.18756876811385154, "mean_pred_prob_last_25": 0.10593323213979602, "mean_pred_prob_last_50": 0.06402203408069909, "mean_token_accuracy": 0.8826218664646148, "step": 39000 }, { "epoch": 0.6934741258244005, "grad_norm": 1.272890412067814, "learning_rate": 0.0001, "loss": 0.7234, "mean_abs_error": 283.8492231445305, "mean_abs_error_last_10": 240.69752394466212, "mean_abs_error_last_25": 204.35035646864438, "mean_abs_error_last_50": 230.63013616261145, "mean_pred_prob": 0.03867556049954146, "mean_pred_prob_last_10": 0.19240761181572452, "mean_pred_prob_last_25": 0.10496528341900557, "mean_pred_prob_last_50": 0.0651525597437285, "mean_token_accuracy": 0.8772284984588623, "step": 39010 }, { "epoch": 0.6936518941212024, "grad_norm": 1.296797339693455, "learning_rate": 0.0001, "loss": 0.6876, "mean_abs_error": 506.61340019448437, "mean_abs_error_last_10": 73.7571991277293, "mean_abs_error_last_25": 123.40452203552404, "mean_abs_error_last_50": 235.4246963747715, "mean_pred_prob": 0.03497449471615255, "mean_pred_prob_last_10": 0.17752919625490904, "mean_pred_prob_last_25": 0.09669272126629949, "mean_pred_prob_last_50": 0.05877537722699344, "mean_token_accuracy": 0.8722844183444977, "step": 39020 }, { "epoch": 0.6938296624180044, "grad_norm": 1.3370771226126463, "learning_rate": 0.0001, "loss": 0.7533, "mean_abs_error": 916.0662673339888, "mean_abs_error_last_10": 516.0202494420488, "mean_abs_error_last_25": 644.9640068467019, "mean_abs_error_last_50": 761.2115209775318, "mean_pred_prob": 0.03790183435048675, "mean_pred_prob_last_10": 0.19179270221939076, "mean_pred_prob_last_25": 0.10447257747291587, "mean_pred_prob_last_50": 0.06344461647677235, "mean_token_accuracy": 0.8701438963413238, "step": 39030 }, { "epoch": 0.6940074307148063, "grad_norm": 1.8851642797795185, "learning_rate": 0.0001, "loss": 0.7112, "mean_abs_error": 306.96305264722594, "mean_abs_error_last_10": 55.25198246155793, "mean_abs_error_last_25": 70.78248716753434, "mean_abs_error_last_50": 137.31010906790374, "mean_pred_prob": 0.051296137273311615, "mean_pred_prob_last_10": 0.26645148191601037, "mean_pred_prob_last_25": 0.14180127959698438, "mean_pred_prob_last_50": 0.08560328166931867, "mean_token_accuracy": 0.8734505355358124, "step": 39040 }, { "epoch": 0.6941851990116082, "grad_norm": 1.0341296632701662, "learning_rate": 0.0001, "loss": 0.6821, "mean_abs_error": 239.21182777057942, "mean_abs_error_last_10": 87.0840897484109, "mean_abs_error_last_25": 100.052384560053, "mean_abs_error_last_50": 141.77757304315267, "mean_pred_prob": 0.05072783643845469, "mean_pred_prob_last_10": 0.23853244464844464, "mean_pred_prob_last_25": 0.13878964651376008, "mean_pred_prob_last_50": 0.08487582812085748, "mean_token_accuracy": 0.8898804485797882, "step": 39050 }, { "epoch": 0.6943629673084102, "grad_norm": 1.4285893204324744, "learning_rate": 0.0001, "loss": 0.6704, "mean_abs_error": 185.83171535770992, "mean_abs_error_last_10": 80.19618866382483, "mean_abs_error_last_25": 85.24230432716467, "mean_abs_error_last_50": 120.54082254281874, "mean_pred_prob": 0.036833839118480684, "mean_pred_prob_last_10": 0.1754225715994835, "mean_pred_prob_last_25": 0.09967195354402066, "mean_pred_prob_last_50": 0.061492316983640195, "mean_token_accuracy": 0.8684185922145844, "step": 39060 }, { "epoch": 0.6945407356052121, "grad_norm": 1.4671157770327836, "learning_rate": 0.0001, "loss": 0.7846, "mean_abs_error": 426.0894632281027, "mean_abs_error_last_10": 87.55826090706329, "mean_abs_error_last_25": 123.76974117196237, "mean_abs_error_last_50": 221.76040515340327, "mean_pred_prob": 0.030135634681209923, "mean_pred_prob_last_10": 0.14884999804198742, "mean_pred_prob_last_25": 0.08265373902395368, "mean_pred_prob_last_50": 0.050079854764044286, "mean_token_accuracy": 0.8750189423561097, "step": 39070 }, { "epoch": 0.6947185039020141, "grad_norm": 1.4786335131328214, "learning_rate": 0.0001, "loss": 0.8009, "mean_abs_error": 262.51463222622334, "mean_abs_error_last_10": 195.09365311575363, "mean_abs_error_last_25": 235.30340851944425, "mean_abs_error_last_50": 207.99199040255607, "mean_pred_prob": 0.05258500594645739, "mean_pred_prob_last_10": 0.26331838211044667, "mean_pred_prob_last_25": 0.14537088628858327, "mean_pred_prob_last_50": 0.08828116902150214, "mean_token_accuracy": 0.8712579488754273, "step": 39080 }, { "epoch": 0.6948962721988161, "grad_norm": 1.5173317841390068, "learning_rate": 0.0001, "loss": 0.7039, "mean_abs_error": 392.5790007455371, "mean_abs_error_last_10": 81.14996963923078, "mean_abs_error_last_25": 104.81501764686436, "mean_abs_error_last_50": 198.4345226734717, "mean_pred_prob": 0.04824868412688375, "mean_pred_prob_last_10": 0.23087522927671672, "mean_pred_prob_last_25": 0.13045666133984923, "mean_pred_prob_last_50": 0.0813258751295507, "mean_token_accuracy": 0.8791258573532105, "step": 39090 }, { "epoch": 0.695074040495618, "grad_norm": 1.1788624693454324, "learning_rate": 0.0001, "loss": 0.8122, "mean_abs_error": 329.74415868544173, "mean_abs_error_last_10": 123.97976368056243, "mean_abs_error_last_25": 190.39228505292817, "mean_abs_error_last_50": 244.1176023971444, "mean_pred_prob": 0.029242521012201907, "mean_pred_prob_last_10": 0.15339275933802127, "mean_pred_prob_last_25": 0.08143365997821092, "mean_pred_prob_last_50": 0.049253338295966384, "mean_token_accuracy": 0.8650564253330231, "step": 39100 }, { "epoch": 0.69525180879242, "grad_norm": 1.7138020033706136, "learning_rate": 0.0001, "loss": 0.7496, "mean_abs_error": 1161.0879946013297, "mean_abs_error_last_10": 646.7149190098216, "mean_abs_error_last_25": 719.227504037418, "mean_abs_error_last_50": 842.5300465050911, "mean_pred_prob": 0.032931283095967954, "mean_pred_prob_last_10": 0.17257918266695924, "mean_pred_prob_last_25": 0.09382874998555053, "mean_pred_prob_last_50": 0.055804218328557906, "mean_token_accuracy": 0.872313779592514, "step": 39110 }, { "epoch": 0.6954295770892219, "grad_norm": 3.8742587639869077, "learning_rate": 0.0001, "loss": 0.8017, "mean_abs_error": 736.4658305224657, "mean_abs_error_last_10": 196.34147877294401, "mean_abs_error_last_25": 307.94018615928445, "mean_abs_error_last_50": 476.75081632843546, "mean_pred_prob": 0.04132481500710128, "mean_pred_prob_last_10": 0.1952138083986938, "mean_pred_prob_last_25": 0.11176385733997449, "mean_pred_prob_last_50": 0.06989739919954445, "mean_token_accuracy": 0.8640614151954651, "step": 39120 }, { "epoch": 0.6956073453860239, "grad_norm": 1.576894628138655, "learning_rate": 0.0001, "loss": 0.7486, "mean_abs_error": 389.2160337550846, "mean_abs_error_last_10": 270.72187927660144, "mean_abs_error_last_25": 301.3613816647583, "mean_abs_error_last_50": 322.4322077441543, "mean_pred_prob": 0.0363695589127019, "mean_pred_prob_last_10": 0.16368834546301514, "mean_pred_prob_last_25": 0.09547439999878407, "mean_pred_prob_last_50": 0.05966725782491267, "mean_token_accuracy": 0.882986330986023, "step": 39130 }, { "epoch": 0.6957851136828258, "grad_norm": 1.5350579195951384, "learning_rate": 0.0001, "loss": 0.8134, "mean_abs_error": 227.85572501539883, "mean_abs_error_last_10": 37.107482400988395, "mean_abs_error_last_25": 126.93432819130093, "mean_abs_error_last_50": 165.4954885448997, "mean_pred_prob": 0.06053262003697455, "mean_pred_prob_last_10": 0.30385554432868955, "mean_pred_prob_last_25": 0.1656497685238719, "mean_pred_prob_last_50": 0.10069012884050607, "mean_token_accuracy": 0.8719773173332215, "step": 39140 }, { "epoch": 0.6959628819796277, "grad_norm": 1.561831991966111, "learning_rate": 0.0001, "loss": 0.6874, "mean_abs_error": 383.93214860729586, "mean_abs_error_last_10": 39.583707817775064, "mean_abs_error_last_25": 91.82800530954177, "mean_abs_error_last_50": 191.74229187139812, "mean_pred_prob": 0.040258680726401506, "mean_pred_prob_last_10": 0.18675333708524705, "mean_pred_prob_last_25": 0.10472979433834553, "mean_pred_prob_last_50": 0.0655894497409463, "mean_token_accuracy": 0.870464426279068, "step": 39150 }, { "epoch": 0.6961406502764297, "grad_norm": 1.5424104869065072, "learning_rate": 0.0001, "loss": 0.6981, "mean_abs_error": 130.80266255735708, "mean_abs_error_last_10": 38.84365949869052, "mean_abs_error_last_25": 60.61865127535807, "mean_abs_error_last_50": 83.07922409700033, "mean_pred_prob": 0.04131064936518669, "mean_pred_prob_last_10": 0.21256059035658836, "mean_pred_prob_last_25": 0.11786742992699147, "mean_pred_prob_last_50": 0.07017526272684335, "mean_token_accuracy": 0.8706090748310089, "step": 39160 }, { "epoch": 0.6963184185732316, "grad_norm": 2.2341151043930134, "learning_rate": 0.0001, "loss": 0.7013, "mean_abs_error": 616.2251924647222, "mean_abs_error_last_10": 153.27539505111912, "mean_abs_error_last_25": 235.36906082451682, "mean_abs_error_last_50": 371.8254841316997, "mean_pred_prob": 0.03849761028541252, "mean_pred_prob_last_10": 0.18402740089222788, "mean_pred_prob_last_25": 0.10597586188232526, "mean_pred_prob_last_50": 0.06426327179069631, "mean_token_accuracy": 0.8716256380081177, "step": 39170 }, { "epoch": 0.6964961868700336, "grad_norm": 1.0814529008223506, "learning_rate": 0.0001, "loss": 0.6904, "mean_abs_error": 207.11684858982312, "mean_abs_error_last_10": 75.41021745399198, "mean_abs_error_last_25": 118.37054090131775, "mean_abs_error_last_50": 151.2688359305062, "mean_pred_prob": 0.03754539960063994, "mean_pred_prob_last_10": 0.17737870179116727, "mean_pred_prob_last_25": 0.0958621023222804, "mean_pred_prob_last_50": 0.06035244455561042, "mean_token_accuracy": 0.8683567047119141, "step": 39180 }, { "epoch": 0.6966739551668355, "grad_norm": 2.054830758876835, "learning_rate": 0.0001, "loss": 0.9377, "mean_abs_error": 502.71268608242497, "mean_abs_error_last_10": 126.09802045556694, "mean_abs_error_last_25": 167.3709568836006, "mean_abs_error_last_50": 311.81408347669515, "mean_pred_prob": 0.02603640016168356, "mean_pred_prob_last_10": 0.15756019540131092, "mean_pred_prob_last_25": 0.08387216525152326, "mean_pred_prob_last_50": 0.0471045137848705, "mean_token_accuracy": 0.8788283824920654, "step": 39190 }, { "epoch": 0.6968517234636374, "grad_norm": 1.0634476207499213, "learning_rate": 0.0001, "loss": 0.602, "mean_abs_error": 214.87806858699315, "mean_abs_error_last_10": 70.88857370886012, "mean_abs_error_last_25": 143.17359328616456, "mean_abs_error_last_50": 154.4703363938949, "mean_pred_prob": 0.05892787976190448, "mean_pred_prob_last_10": 0.2676316011697054, "mean_pred_prob_last_25": 0.15947629008442163, "mean_pred_prob_last_50": 0.09844256658107042, "mean_token_accuracy": 0.8831106543540954, "step": 39200 }, { "epoch": 0.6970294917604395, "grad_norm": 0.9645563149059961, "learning_rate": 0.0001, "loss": 0.5798, "mean_abs_error": 419.0939811677783, "mean_abs_error_last_10": 132.69056581281447, "mean_abs_error_last_25": 180.7751325035336, "mean_abs_error_last_50": 240.51870903499784, "mean_pred_prob": 0.04421971687115729, "mean_pred_prob_last_10": 0.19445759090594947, "mean_pred_prob_last_25": 0.11629261134658009, "mean_pred_prob_last_50": 0.07401537771802395, "mean_token_accuracy": 0.8766022801399231, "step": 39210 }, { "epoch": 0.6972072600572414, "grad_norm": 1.5166342152414845, "learning_rate": 0.0001, "loss": 0.6773, "mean_abs_error": 741.6132250646624, "mean_abs_error_last_10": 534.7772780962471, "mean_abs_error_last_25": 544.540005789994, "mean_abs_error_last_50": 608.42945069887, "mean_pred_prob": 0.06185448519972851, "mean_pred_prob_last_10": 0.2815144847147167, "mean_pred_prob_last_25": 0.16586611793609335, "mean_pred_prob_last_50": 0.1029515858128434, "mean_token_accuracy": 0.873335212469101, "step": 39220 }, { "epoch": 0.6973850283540434, "grad_norm": 1.2942482565273725, "learning_rate": 0.0001, "loss": 0.5871, "mean_abs_error": 308.8836353870426, "mean_abs_error_last_10": 79.47760589818496, "mean_abs_error_last_25": 103.92467005116889, "mean_abs_error_last_50": 171.24110641145106, "mean_pred_prob": 0.04288098150864243, "mean_pred_prob_last_10": 0.21829548478126526, "mean_pred_prob_last_25": 0.12348972549661993, "mean_pred_prob_last_50": 0.07297132909297943, "mean_token_accuracy": 0.8776421427726746, "step": 39230 }, { "epoch": 0.6975627966508453, "grad_norm": 2.1784266400333143, "learning_rate": 0.0001, "loss": 0.7297, "mean_abs_error": 769.7350560369075, "mean_abs_error_last_10": 438.98933806734414, "mean_abs_error_last_25": 474.47164267216203, "mean_abs_error_last_50": 565.1998334293571, "mean_pred_prob": 0.0483456482848851, "mean_pred_prob_last_10": 0.24318152642517815, "mean_pred_prob_last_25": 0.13615499705192632, "mean_pred_prob_last_50": 0.08233123451645952, "mean_token_accuracy": 0.8751964569091797, "step": 39240 }, { "epoch": 0.6977405649476472, "grad_norm": 1.6765658805395616, "learning_rate": 0.0001, "loss": 0.7605, "mean_abs_error": 1585.6383306088437, "mean_abs_error_last_10": 802.7541794554014, "mean_abs_error_last_25": 947.9201764866057, "mean_abs_error_last_50": 1122.7418296543995, "mean_pred_prob": 0.030289817720768043, "mean_pred_prob_last_10": 0.1465241756872274, "mean_pred_prob_last_25": 0.08509938257338945, "mean_pred_prob_last_50": 0.051801664021331816, "mean_token_accuracy": 0.8690838694572449, "step": 39250 }, { "epoch": 0.6979183332444492, "grad_norm": 0.930096019179224, "learning_rate": 0.0001, "loss": 0.7635, "mean_abs_error": 548.335510099893, "mean_abs_error_last_10": 125.77422338601359, "mean_abs_error_last_25": 203.89057624023272, "mean_abs_error_last_50": 364.66938893422804, "mean_pred_prob": 0.031224239245057107, "mean_pred_prob_last_10": 0.16536193597130477, "mean_pred_prob_last_25": 0.0883537223096937, "mean_pred_prob_last_50": 0.052945652464404705, "mean_token_accuracy": 0.8668434798717499, "step": 39260 }, { "epoch": 0.6980961015412511, "grad_norm": 1.612726328680838, "learning_rate": 0.0001, "loss": 0.7725, "mean_abs_error": 769.7293009294305, "mean_abs_error_last_10": 274.9488837418972, "mean_abs_error_last_25": 267.59636066692184, "mean_abs_error_last_50": 401.54643607674353, "mean_pred_prob": 0.03500960317905992, "mean_pred_prob_last_10": 0.16298978617414833, "mean_pred_prob_last_25": 0.09340894423658028, "mean_pred_prob_last_50": 0.058465825498569754, "mean_token_accuracy": 0.8553761899471283, "step": 39270 }, { "epoch": 0.6982738698380531, "grad_norm": 1.6811793093702119, "learning_rate": 0.0001, "loss": 0.7635, "mean_abs_error": 310.76669334047176, "mean_abs_error_last_10": 75.65354875090878, "mean_abs_error_last_25": 119.94663210484437, "mean_abs_error_last_50": 210.31259281691868, "mean_pred_prob": 0.041728388797491786, "mean_pred_prob_last_10": 0.21049650851637125, "mean_pred_prob_last_25": 0.1151096730493009, "mean_pred_prob_last_50": 0.06980063747614622, "mean_token_accuracy": 0.8755256175994873, "step": 39280 }, { "epoch": 0.698451638134855, "grad_norm": 1.1253967288362903, "learning_rate": 0.0001, "loss": 0.7491, "mean_abs_error": 403.5719476693708, "mean_abs_error_last_10": 223.46132374632708, "mean_abs_error_last_25": 261.0383223449385, "mean_abs_error_last_50": 314.93827825111975, "mean_pred_prob": 0.04107241050805897, "mean_pred_prob_last_10": 0.20255880018230527, "mean_pred_prob_last_25": 0.11774851388763637, "mean_pred_prob_last_50": 0.06972462418489159, "mean_token_accuracy": 0.8736473500728608, "step": 39290 }, { "epoch": 0.6986294064316569, "grad_norm": 1.4918635164531318, "learning_rate": 0.0001, "loss": 0.6743, "mean_abs_error": 619.7266847233398, "mean_abs_error_last_10": 156.04304670753697, "mean_abs_error_last_25": 224.6325961112662, "mean_abs_error_last_50": 323.9477259520466, "mean_pred_prob": 0.030775974283460527, "mean_pred_prob_last_10": 0.1461776986718178, "mean_pred_prob_last_25": 0.08202077860478311, "mean_pred_prob_last_50": 0.05154736964032054, "mean_token_accuracy": 0.8796885728836059, "step": 39300 }, { "epoch": 0.6988071747284589, "grad_norm": 1.5213514425524421, "learning_rate": 0.0001, "loss": 0.8167, "mean_abs_error": 270.2091316335094, "mean_abs_error_last_10": 137.23432580705804, "mean_abs_error_last_25": 130.11893660617335, "mean_abs_error_last_50": 155.53114251955935, "mean_pred_prob": 0.03914274689741433, "mean_pred_prob_last_10": 0.18779176622629165, "mean_pred_prob_last_25": 0.10025148428976535, "mean_pred_prob_last_50": 0.0640741958282888, "mean_token_accuracy": 0.8695875823497772, "step": 39310 }, { "epoch": 0.6989849430252609, "grad_norm": 0.9349900150152699, "learning_rate": 0.0001, "loss": 0.7577, "mean_abs_error": 320.90529063701115, "mean_abs_error_last_10": 58.17754066115059, "mean_abs_error_last_25": 96.20454674541503, "mean_abs_error_last_50": 172.91575496615837, "mean_pred_prob": 0.03286448849830777, "mean_pred_prob_last_10": 0.1448978828266263, "mean_pred_prob_last_25": 0.08051090138033032, "mean_pred_prob_last_50": 0.052624275395646694, "mean_token_accuracy": 0.8744454205036163, "step": 39320 }, { "epoch": 0.6991627113220629, "grad_norm": 1.7606128763738513, "learning_rate": 0.0001, "loss": 0.7485, "mean_abs_error": 2039.8938877004214, "mean_abs_error_last_10": 1405.8520397538846, "mean_abs_error_last_25": 1453.3053394142914, "mean_abs_error_last_50": 1721.5213744818022, "mean_pred_prob": 0.018435126257827505, "mean_pred_prob_last_10": 0.08640338218829129, "mean_pred_prob_last_25": 0.051551563548855484, "mean_pred_prob_last_50": 0.030642976102535614, "mean_token_accuracy": 0.8675427258014679, "step": 39330 }, { "epoch": 0.6993404796188648, "grad_norm": 1.5867610506518581, "learning_rate": 0.0001, "loss": 0.7675, "mean_abs_error": 1095.0822269266976, "mean_abs_error_last_10": 665.7263154799614, "mean_abs_error_last_25": 723.9431889471605, "mean_abs_error_last_50": 846.4549876142939, "mean_pred_prob": 0.04007283257378731, "mean_pred_prob_last_10": 0.18210080946300877, "mean_pred_prob_last_25": 0.10270691624173196, "mean_pred_prob_last_50": 0.06496207302989206, "mean_token_accuracy": 0.8704440772533417, "step": 39340 }, { "epoch": 0.6995182479156667, "grad_norm": 2.4035971146665958, "learning_rate": 0.0001, "loss": 0.6839, "mean_abs_error": 364.6088901083725, "mean_abs_error_last_10": 59.07676267538089, "mean_abs_error_last_25": 83.56890442142716, "mean_abs_error_last_50": 159.24449348365232, "mean_pred_prob": 0.04957043705508113, "mean_pred_prob_last_10": 0.22996570710092784, "mean_pred_prob_last_25": 0.13131200876086951, "mean_pred_prob_last_50": 0.08263515988364815, "mean_token_accuracy": 0.8809556365013123, "step": 39350 }, { "epoch": 0.6996960162124687, "grad_norm": 1.3492232760753313, "learning_rate": 0.0001, "loss": 0.9425, "mean_abs_error": 844.5233422314474, "mean_abs_error_last_10": 429.92259854863744, "mean_abs_error_last_25": 524.4212521007973, "mean_abs_error_last_50": 641.1966578768303, "mean_pred_prob": 0.054796588637691455, "mean_pred_prob_last_10": 0.2577737257903209, "mean_pred_prob_last_25": 0.15049337117525283, "mean_pred_prob_last_50": 0.0931056372122839, "mean_token_accuracy": 0.8742684543132782, "step": 39360 }, { "epoch": 0.6998737845092706, "grad_norm": 1.0680253449595853, "learning_rate": 0.0001, "loss": 0.7021, "mean_abs_error": 361.0572235528074, "mean_abs_error_last_10": 136.97643466945104, "mean_abs_error_last_25": 143.12353249984125, "mean_abs_error_last_50": 193.72349194125158, "mean_pred_prob": 0.034313088736962524, "mean_pred_prob_last_10": 0.17899491989519448, "mean_pred_prob_last_25": 0.0956953865941614, "mean_pred_prob_last_50": 0.05789879749063402, "mean_token_accuracy": 0.8751191020011901, "step": 39370 }, { "epoch": 0.7000515528060726, "grad_norm": 2.3194123469105343, "learning_rate": 0.0001, "loss": 0.7284, "mean_abs_error": 621.3202786200353, "mean_abs_error_last_10": 155.77182503582304, "mean_abs_error_last_25": 205.7790335589938, "mean_abs_error_last_50": 344.41197989480577, "mean_pred_prob": 0.03394181459443644, "mean_pred_prob_last_10": 0.17564418870024384, "mean_pred_prob_last_25": 0.09449853906407953, "mean_pred_prob_last_50": 0.05762220717733726, "mean_token_accuracy": 0.8764656543731689, "step": 39380 }, { "epoch": 0.7002293211028745, "grad_norm": 2.000823769120187, "learning_rate": 0.0001, "loss": 0.7447, "mean_abs_error": 720.9975142314379, "mean_abs_error_last_10": 272.4305632830077, "mean_abs_error_last_25": 396.31561027457894, "mean_abs_error_last_50": 489.8700713899989, "mean_pred_prob": 0.03476761017809622, "mean_pred_prob_last_10": 0.18143353464547546, "mean_pred_prob_last_25": 0.09533544093137607, "mean_pred_prob_last_50": 0.05850613188231364, "mean_token_accuracy": 0.8759653210639954, "step": 39390 }, { "epoch": 0.7004070893996764, "grad_norm": 1.3403480310993414, "learning_rate": 0.0001, "loss": 0.9079, "mean_abs_error": 1265.6184940610194, "mean_abs_error_last_10": 599.5634039463976, "mean_abs_error_last_25": 749.6757121653544, "mean_abs_error_last_50": 904.325144821711, "mean_pred_prob": 0.03292255961714545, "mean_pred_prob_last_10": 0.16752002941502725, "mean_pred_prob_last_25": 0.08870943678484763, "mean_pred_prob_last_50": 0.054648216703208165, "mean_token_accuracy": 0.8732751846313477, "step": 39400 }, { "epoch": 0.7005848576964784, "grad_norm": 1.5964859975374326, "learning_rate": 0.0001, "loss": 0.7787, "mean_abs_error": 440.63115621750984, "mean_abs_error_last_10": 123.67073062325169, "mean_abs_error_last_25": 165.11321576830886, "mean_abs_error_last_50": 251.61928362952358, "mean_pred_prob": 0.02629642407409847, "mean_pred_prob_last_10": 0.1375831052660942, "mean_pred_prob_last_25": 0.07722881380468608, "mean_pred_prob_last_50": 0.04528422774747014, "mean_token_accuracy": 0.8744920909404754, "step": 39410 }, { "epoch": 0.7007626259932803, "grad_norm": 1.3009355264451727, "learning_rate": 0.0001, "loss": 0.8185, "mean_abs_error": 827.6881447264675, "mean_abs_error_last_10": 509.3426139394298, "mean_abs_error_last_25": 501.0517381213499, "mean_abs_error_last_50": 648.7164064590091, "mean_pred_prob": 0.055970396770862865, "mean_pred_prob_last_10": 0.26181259721633976, "mean_pred_prob_last_25": 0.14764717566722538, "mean_pred_prob_last_50": 0.09282410698506283, "mean_token_accuracy": 0.8679377555847168, "step": 39420 }, { "epoch": 0.7009403942900823, "grad_norm": 1.5810127979024662, "learning_rate": 0.0001, "loss": 0.6469, "mean_abs_error": 271.7274656606013, "mean_abs_error_last_10": 106.62362648934263, "mean_abs_error_last_25": 125.19754418852172, "mean_abs_error_last_50": 146.12536116714267, "mean_pred_prob": 0.045947256963700056, "mean_pred_prob_last_10": 0.19331599660217763, "mean_pred_prob_last_25": 0.11573725063353776, "mean_pred_prob_last_50": 0.0747207692824304, "mean_token_accuracy": 0.8840741276741028, "step": 39430 }, { "epoch": 0.7011181625868843, "grad_norm": 2.158910392734196, "learning_rate": 0.0001, "loss": 0.8452, "mean_abs_error": 621.503055580705, "mean_abs_error_last_10": 322.09683270059224, "mean_abs_error_last_25": 377.95882552568554, "mean_abs_error_last_50": 456.4259948588823, "mean_pred_prob": 0.032986779214115816, "mean_pred_prob_last_10": 0.17429469734197484, "mean_pred_prob_last_25": 0.08831391644198447, "mean_pred_prob_last_50": 0.054947608907241374, "mean_token_accuracy": 0.8655930817127228, "step": 39440 }, { "epoch": 0.7012959308836862, "grad_norm": 1.1526927273971872, "learning_rate": 0.0001, "loss": 0.7781, "mean_abs_error": 528.6989802771417, "mean_abs_error_last_10": 153.82767621115295, "mean_abs_error_last_25": 191.43250828372928, "mean_abs_error_last_50": 268.93439733700734, "mean_pred_prob": 0.04758628841955215, "mean_pred_prob_last_10": 0.21363713014870883, "mean_pred_prob_last_25": 0.12052774201147258, "mean_pred_prob_last_50": 0.07704747613752261, "mean_token_accuracy": 0.8746434330940247, "step": 39450 }, { "epoch": 0.7014736991804882, "grad_norm": 2.224480210754831, "learning_rate": 0.0001, "loss": 0.7789, "mean_abs_error": 1036.713693272308, "mean_abs_error_last_10": 638.0957929436806, "mean_abs_error_last_25": 692.7342313565898, "mean_abs_error_last_50": 796.1869876327768, "mean_pred_prob": 0.03354782480746508, "mean_pred_prob_last_10": 0.16675933198130224, "mean_pred_prob_last_25": 0.09415923529886641, "mean_pred_prob_last_50": 0.05617217469844036, "mean_token_accuracy": 0.8698155045509338, "step": 39460 }, { "epoch": 0.7016514674772901, "grad_norm": 1.086194654621911, "learning_rate": 0.0001, "loss": 0.8831, "mean_abs_error": 87.88153149988423, "mean_abs_error_last_10": 45.84033640199353, "mean_abs_error_last_25": 44.96588723187936, "mean_abs_error_last_50": 60.72662446454717, "mean_pred_prob": 0.06334241414442658, "mean_pred_prob_last_10": 0.30715719535946845, "mean_pred_prob_last_25": 0.17066390328109265, "mean_pred_prob_last_50": 0.1048910604789853, "mean_token_accuracy": 0.867999792098999, "step": 39470 }, { "epoch": 0.7018292357740921, "grad_norm": 1.183028701002875, "learning_rate": 0.0001, "loss": 0.705, "mean_abs_error": 201.81484590748659, "mean_abs_error_last_10": 51.310073893717984, "mean_abs_error_last_25": 77.8637934148793, "mean_abs_error_last_50": 127.98160180399584, "mean_pred_prob": 0.0401441914960742, "mean_pred_prob_last_10": 0.20377539619803428, "mean_pred_prob_last_25": 0.10976865608245134, "mean_pred_prob_last_50": 0.06744585894048213, "mean_token_accuracy": 0.864060127735138, "step": 39480 }, { "epoch": 0.702007004070894, "grad_norm": 1.276442230535881, "learning_rate": 0.0001, "loss": 0.7005, "mean_abs_error": 516.6151865174895, "mean_abs_error_last_10": 103.4221428587917, "mean_abs_error_last_25": 234.27742738701355, "mean_abs_error_last_50": 369.45162034859004, "mean_pred_prob": 0.039041978039313106, "mean_pred_prob_last_10": 0.2010934949736111, "mean_pred_prob_last_25": 0.10650940493796952, "mean_pred_prob_last_50": 0.06514503509970382, "mean_token_accuracy": 0.8864845991134643, "step": 39490 }, { "epoch": 0.7021847723676959, "grad_norm": 1.5209263339266328, "learning_rate": 0.0001, "loss": 0.6792, "mean_abs_error": 1335.223946595092, "mean_abs_error_last_10": 557.83501081537, "mean_abs_error_last_25": 699.4968729171826, "mean_abs_error_last_50": 972.4411827351884, "mean_pred_prob": 0.03209887777193217, "mean_pred_prob_last_10": 0.17280417215661145, "mean_pred_prob_last_25": 0.09558782367093954, "mean_pred_prob_last_50": 0.055536723919794896, "mean_token_accuracy": 0.8813772559165954, "step": 39500 }, { "epoch": 0.7023625406644979, "grad_norm": 2.8099187926453384, "learning_rate": 0.0001, "loss": 0.6761, "mean_abs_error": 1334.7173992767569, "mean_abs_error_last_10": 694.2211766109901, "mean_abs_error_last_25": 850.0471808699604, "mean_abs_error_last_50": 970.237777887286, "mean_pred_prob": 0.0310664663935313, "mean_pred_prob_last_10": 0.14596508359827567, "mean_pred_prob_last_25": 0.0810006967818481, "mean_pred_prob_last_50": 0.050950890633976086, "mean_token_accuracy": 0.8747629463672638, "step": 39510 }, { "epoch": 0.7025403089612998, "grad_norm": 0.8158931952258964, "learning_rate": 0.0001, "loss": 0.657, "mean_abs_error": 720.9663969170629, "mean_abs_error_last_10": 403.570792551242, "mean_abs_error_last_25": 435.2415746644095, "mean_abs_error_last_50": 536.2522482932208, "mean_pred_prob": 0.03760027165408246, "mean_pred_prob_last_10": 0.18824804050382227, "mean_pred_prob_last_25": 0.10344922498916276, "mean_pred_prob_last_50": 0.06246362243546173, "mean_token_accuracy": 0.8854922175407409, "step": 39520 }, { "epoch": 0.7027180772581018, "grad_norm": 0.8924638573229449, "learning_rate": 0.0001, "loss": 0.8211, "mean_abs_error": 200.25532735059488, "mean_abs_error_last_10": 78.7925273344974, "mean_abs_error_last_25": 99.80252360690294, "mean_abs_error_last_50": 149.93307170456654, "mean_pred_prob": 0.043631013156846164, "mean_pred_prob_last_10": 0.21867397325113416, "mean_pred_prob_last_25": 0.1199405487626791, "mean_pred_prob_last_50": 0.07260237541049719, "mean_token_accuracy": 0.8794370055198669, "step": 39530 }, { "epoch": 0.7028958455549037, "grad_norm": 2.46747164523516, "learning_rate": 0.0001, "loss": 0.7161, "mean_abs_error": 103.22764374571145, "mean_abs_error_last_10": 44.44731726494201, "mean_abs_error_last_25": 58.628149455331766, "mean_abs_error_last_50": 64.1420673722503, "mean_pred_prob": 0.05219924543052912, "mean_pred_prob_last_10": 0.2712103251367807, "mean_pred_prob_last_25": 0.1452758466824889, "mean_pred_prob_last_50": 0.08828137591481208, "mean_token_accuracy": 0.8804336071014405, "step": 39540 }, { "epoch": 0.7030736138517056, "grad_norm": 1.1919861837903363, "learning_rate": 0.0001, "loss": 0.6864, "mean_abs_error": 202.8860997131091, "mean_abs_error_last_10": 56.19253166315823, "mean_abs_error_last_25": 92.81715198673268, "mean_abs_error_last_50": 110.06580845118185, "mean_pred_prob": 0.05950509631074965, "mean_pred_prob_last_10": 0.28243078105151653, "mean_pred_prob_last_25": 0.16266819825395942, "mean_pred_prob_last_50": 0.09990344732068479, "mean_token_accuracy": 0.8765932321548462, "step": 39550 }, { "epoch": 0.7032513821485077, "grad_norm": 1.5786855173027903, "learning_rate": 0.0001, "loss": 0.667, "mean_abs_error": 318.70029628520444, "mean_abs_error_last_10": 60.07146551530169, "mean_abs_error_last_25": 87.61839761292799, "mean_abs_error_last_50": 137.9129102932296, "mean_pred_prob": 0.04910852299071848, "mean_pred_prob_last_10": 0.24204475919250398, "mean_pred_prob_last_25": 0.13079682900570333, "mean_pred_prob_last_50": 0.08166493700118735, "mean_token_accuracy": 0.8682508111000061, "step": 39560 }, { "epoch": 0.7034291504453096, "grad_norm": 1.5862063067498975, "learning_rate": 0.0001, "loss": 0.7167, "mean_abs_error": 1413.4050171324566, "mean_abs_error_last_10": 719.5427429925596, "mean_abs_error_last_25": 836.9668635223767, "mean_abs_error_last_50": 1083.3526360512215, "mean_pred_prob": 0.01738626449077856, "mean_pred_prob_last_10": 0.08303269502939656, "mean_pred_prob_last_25": 0.047330977887031625, "mean_pred_prob_last_50": 0.02900963788270019, "mean_token_accuracy": 0.8835051476955413, "step": 39570 }, { "epoch": 0.7036069187421116, "grad_norm": 1.178799478259863, "learning_rate": 0.0001, "loss": 0.603, "mean_abs_error": 239.89554692944375, "mean_abs_error_last_10": 43.39017374039936, "mean_abs_error_last_25": 52.03898286391982, "mean_abs_error_last_50": 105.58980167463406, "mean_pred_prob": 0.05610944740474224, "mean_pred_prob_last_10": 0.26829538606107234, "mean_pred_prob_last_25": 0.1551164399832487, "mean_pred_prob_last_50": 0.09380759773775935, "mean_token_accuracy": 0.8807746350765229, "step": 39580 }, { "epoch": 0.7037846870389135, "grad_norm": 3.317907998541728, "learning_rate": 0.0001, "loss": 0.6785, "mean_abs_error": 277.8043562032442, "mean_abs_error_last_10": 73.44385824918928, "mean_abs_error_last_25": 110.59880472284405, "mean_abs_error_last_50": 179.71278335170118, "mean_pred_prob": 0.05844703605398536, "mean_pred_prob_last_10": 0.2680137903429568, "mean_pred_prob_last_25": 0.15478622566442937, "mean_pred_prob_last_50": 0.09666833829833195, "mean_token_accuracy": 0.8770922243595123, "step": 39590 }, { "epoch": 0.7039624553357154, "grad_norm": 1.232963072078601, "learning_rate": 0.0001, "loss": 0.6607, "mean_abs_error": 627.0179508136913, "mean_abs_error_last_10": 194.2548136409126, "mean_abs_error_last_25": 318.4764551820303, "mean_abs_error_last_50": 404.0845568240607, "mean_pred_prob": 0.03618464304599911, "mean_pred_prob_last_10": 0.1910125331953168, "mean_pred_prob_last_25": 0.10333457309752703, "mean_pred_prob_last_50": 0.062277009477838874, "mean_token_accuracy": 0.8763550996780396, "step": 39600 }, { "epoch": 0.7041402236325174, "grad_norm": 1.5327389270317406, "learning_rate": 0.0001, "loss": 0.6288, "mean_abs_error": 100.72783227659382, "mean_abs_error_last_10": 21.444885986155683, "mean_abs_error_last_25": 33.32184432429304, "mean_abs_error_last_50": 53.5162947329694, "mean_pred_prob": 0.05717750042676926, "mean_pred_prob_last_10": 0.2627576649188995, "mean_pred_prob_last_25": 0.1495245635509491, "mean_pred_prob_last_50": 0.09395819995552301, "mean_token_accuracy": 0.8725103080272675, "step": 39610 }, { "epoch": 0.7043179919293193, "grad_norm": 0.9359926417532504, "learning_rate": 0.0001, "loss": 0.9189, "mean_abs_error": 181.91361624368608, "mean_abs_error_last_10": 66.5827080376531, "mean_abs_error_last_25": 108.49736119488018, "mean_abs_error_last_50": 121.59673153707081, "mean_pred_prob": 0.051936558354645966, "mean_pred_prob_last_10": 0.2618406698107719, "mean_pred_prob_last_25": 0.14747267961502075, "mean_pred_prob_last_50": 0.08902144953608512, "mean_token_accuracy": 0.8691780805587769, "step": 39620 }, { "epoch": 0.7044957602261213, "grad_norm": 2.5145734299515237, "learning_rate": 0.0001, "loss": 0.7266, "mean_abs_error": 589.3038324291541, "mean_abs_error_last_10": 215.27303858786286, "mean_abs_error_last_25": 302.0168403242102, "mean_abs_error_last_50": 376.0205228588935, "mean_pred_prob": 0.03680868839146569, "mean_pred_prob_last_10": 0.17785943375201896, "mean_pred_prob_last_25": 0.09781636849511414, "mean_pred_prob_last_50": 0.060803085542283954, "mean_token_accuracy": 0.8761690080165863, "step": 39630 }, { "epoch": 0.7046735285229232, "grad_norm": 0.9690700957736492, "learning_rate": 0.0001, "loss": 1.0997, "mean_abs_error": 477.9347806806544, "mean_abs_error_last_10": 94.1255297690403, "mean_abs_error_last_25": 177.6252662170129, "mean_abs_error_last_50": 254.22042461625082, "mean_pred_prob": 0.032134219515137376, "mean_pred_prob_last_10": 0.17099608201533556, "mean_pred_prob_last_25": 0.09162590289488434, "mean_pred_prob_last_50": 0.055105018941685555, "mean_token_accuracy": 0.8760420501232147, "step": 39640 }, { "epoch": 0.7048512968197251, "grad_norm": 2.29660388762597, "learning_rate": 0.0001, "loss": 0.743, "mean_abs_error": 209.77093153398883, "mean_abs_error_last_10": 100.17377874581413, "mean_abs_error_last_25": 110.90744820210666, "mean_abs_error_last_50": 128.14411436630814, "mean_pred_prob": 0.048662047390826045, "mean_pred_prob_last_10": 0.24079149579629303, "mean_pred_prob_last_25": 0.13656022641807794, "mean_pred_prob_last_50": 0.08288488779217004, "mean_token_accuracy": 0.8742417931556702, "step": 39650 }, { "epoch": 0.7050290651165271, "grad_norm": 1.8405216067999524, "learning_rate": 0.0001, "loss": 0.7304, "mean_abs_error": 755.0627681107243, "mean_abs_error_last_10": 413.67565766034784, "mean_abs_error_last_25": 443.5675008939238, "mean_abs_error_last_50": 531.0207922071188, "mean_pred_prob": 0.054610344298998824, "mean_pred_prob_last_10": 0.21965458778722677, "mean_pred_prob_last_25": 0.13374496518226806, "mean_pred_prob_last_50": 0.08695272303011733, "mean_token_accuracy": 0.8798046112060547, "step": 39660 }, { "epoch": 0.705206833413329, "grad_norm": 0.7380752172596509, "learning_rate": 0.0001, "loss": 0.7172, "mean_abs_error": 335.60670837495235, "mean_abs_error_last_10": 110.35048634341392, "mean_abs_error_last_25": 148.8204123266591, "mean_abs_error_last_50": 190.24157013473663, "mean_pred_prob": 0.042021069885231556, "mean_pred_prob_last_10": 0.1839089596644044, "mean_pred_prob_last_25": 0.10762719372287392, "mean_pred_prob_last_50": 0.06798547599464655, "mean_token_accuracy": 0.8804458439350128, "step": 39670 }, { "epoch": 0.7053846017101311, "grad_norm": 1.3321330407602763, "learning_rate": 0.0001, "loss": 0.8178, "mean_abs_error": 737.2454722061691, "mean_abs_error_last_10": 220.83047986773346, "mean_abs_error_last_25": 340.33096736071633, "mean_abs_error_last_50": 465.9005168920932, "mean_pred_prob": 0.03872629004763439, "mean_pred_prob_last_10": 0.17968953495146706, "mean_pred_prob_last_25": 0.10219725248753093, "mean_pred_prob_last_50": 0.0642254671867704, "mean_token_accuracy": 0.8723260223865509, "step": 39680 }, { "epoch": 0.705562370006933, "grad_norm": 1.3861688615598755, "learning_rate": 0.0001, "loss": 0.7748, "mean_abs_error": 1217.8335674934547, "mean_abs_error_last_10": 634.4479873759295, "mean_abs_error_last_25": 738.4315161657016, "mean_abs_error_last_50": 905.6582751621343, "mean_pred_prob": 0.0479018473502947, "mean_pred_prob_last_10": 0.1998487213772023, "mean_pred_prob_last_25": 0.1173600759851979, "mean_pred_prob_last_50": 0.07464252838108223, "mean_token_accuracy": 0.8750670075416564, "step": 39690 }, { "epoch": 0.7057401383037349, "grad_norm": 1.7748322171991195, "learning_rate": 0.0001, "loss": 0.7332, "mean_abs_error": 236.08827911848851, "mean_abs_error_last_10": 107.35972063439355, "mean_abs_error_last_25": 103.24390789776848, "mean_abs_error_last_50": 125.68203383769216, "mean_pred_prob": 0.04988952339626849, "mean_pred_prob_last_10": 0.23795584887266158, "mean_pred_prob_last_25": 0.13762371372431517, "mean_pred_prob_last_50": 0.0833709517493844, "mean_token_accuracy": 0.8690730512142182, "step": 39700 }, { "epoch": 0.7059179066005369, "grad_norm": 1.5595571393890721, "learning_rate": 0.0001, "loss": 0.6682, "mean_abs_error": 665.3133831077308, "mean_abs_error_last_10": 255.43519722226807, "mean_abs_error_last_25": 317.3694216145809, "mean_abs_error_last_50": 422.5625088292163, "mean_pred_prob": 0.04175455369404517, "mean_pred_prob_last_10": 0.21425930956029332, "mean_pred_prob_last_25": 0.10942508109146729, "mean_pred_prob_last_50": 0.0683189850708004, "mean_token_accuracy": 0.8764667689800263, "step": 39710 }, { "epoch": 0.7060956748973388, "grad_norm": 1.505255006159496, "learning_rate": 0.0001, "loss": 1.0432, "mean_abs_error": 1157.5667882036014, "mean_abs_error_last_10": 549.6903171593244, "mean_abs_error_last_25": 720.634392967406, "mean_abs_error_last_50": 929.1571676555607, "mean_pred_prob": 0.030766712031618227, "mean_pred_prob_last_10": 0.1480050821613986, "mean_pred_prob_last_25": 0.08244959040603135, "mean_pred_prob_last_50": 0.05118676566926297, "mean_token_accuracy": 0.876377934217453, "step": 39720 }, { "epoch": 0.7062734431941408, "grad_norm": 2.325908754775072, "learning_rate": 0.0001, "loss": 0.6802, "mean_abs_error": 318.83704820016055, "mean_abs_error_last_10": 89.9295311259642, "mean_abs_error_last_25": 150.16210841120022, "mean_abs_error_last_50": 192.76009321477417, "mean_pred_prob": 0.03787562190555036, "mean_pred_prob_last_10": 0.18942944053560495, "mean_pred_prob_last_25": 0.10017001833766699, "mean_pred_prob_last_50": 0.06196639919653535, "mean_token_accuracy": 0.8661380589008332, "step": 39730 }, { "epoch": 0.7064512114909427, "grad_norm": 1.4784779192093735, "learning_rate": 0.0001, "loss": 0.7193, "mean_abs_error": 243.15242431226503, "mean_abs_error_last_10": 62.19405783233435, "mean_abs_error_last_25": 88.04770350094041, "mean_abs_error_last_50": 130.54683614560054, "mean_pred_prob": 0.052610459830611946, "mean_pred_prob_last_10": 0.2534133424982429, "mean_pred_prob_last_25": 0.14382528364658356, "mean_pred_prob_last_50": 0.08914822372607886, "mean_token_accuracy": 0.8653737604618073, "step": 39740 }, { "epoch": 0.7066289797877446, "grad_norm": 2.5030677768057634, "learning_rate": 0.0001, "loss": 0.5829, "mean_abs_error": 116.2968628135142, "mean_abs_error_last_10": 38.758915790038834, "mean_abs_error_last_25": 57.26294917721633, "mean_abs_error_last_50": 86.76872862129916, "mean_pred_prob": 0.05408170334994793, "mean_pred_prob_last_10": 0.2619456794112921, "mean_pred_prob_last_25": 0.14678659327328206, "mean_pred_prob_last_50": 0.08920897264033556, "mean_token_accuracy": 0.8805564641952515, "step": 39750 }, { "epoch": 0.7068067480845466, "grad_norm": 2.3566522426349796, "learning_rate": 0.0001, "loss": 0.7512, "mean_abs_error": 254.85106349648655, "mean_abs_error_last_10": 85.27236271657286, "mean_abs_error_last_25": 109.91209024863193, "mean_abs_error_last_50": 169.84675629570881, "mean_pred_prob": 0.03616207712329924, "mean_pred_prob_last_10": 0.18716041259467603, "mean_pred_prob_last_25": 0.10066277869045734, "mean_pred_prob_last_50": 0.06102003124542534, "mean_token_accuracy": 0.8704086184501648, "step": 39760 }, { "epoch": 0.7069845163813485, "grad_norm": 2.168809501966173, "learning_rate": 0.0001, "loss": 0.7829, "mean_abs_error": 937.7818213162033, "mean_abs_error_last_10": 294.7421426294362, "mean_abs_error_last_25": 380.42469051604087, "mean_abs_error_last_50": 556.3002941561417, "mean_pred_prob": 0.0339537688967539, "mean_pred_prob_last_10": 0.1645230749680195, "mean_pred_prob_last_25": 0.0929269875807222, "mean_pred_prob_last_50": 0.05622437629499473, "mean_token_accuracy": 0.8692718863487243, "step": 39770 }, { "epoch": 0.7071622846781505, "grad_norm": 1.6271254687876129, "learning_rate": 0.0001, "loss": 0.6579, "mean_abs_error": 491.3901582725392, "mean_abs_error_last_10": 247.0580331156118, "mean_abs_error_last_25": 283.3601930921655, "mean_abs_error_last_50": 324.0399345464864, "mean_pred_prob": 0.036066818318795414, "mean_pred_prob_last_10": 0.15802297269692644, "mean_pred_prob_last_25": 0.09204966640099883, "mean_pred_prob_last_50": 0.05823891608742997, "mean_token_accuracy": 0.8842535972595215, "step": 39780 }, { "epoch": 0.7073400529749524, "grad_norm": 1.2417186609089153, "learning_rate": 0.0001, "loss": 0.6543, "mean_abs_error": 388.5344070262116, "mean_abs_error_last_10": 146.04150206304857, "mean_abs_error_last_25": 179.3800107652017, "mean_abs_error_last_50": 258.04803766992444, "mean_pred_prob": 0.032255116873420774, "mean_pred_prob_last_10": 0.16698423605412244, "mean_pred_prob_last_25": 0.09154517697170377, "mean_pred_prob_last_50": 0.05428816578350961, "mean_token_accuracy": 0.8803677439689637, "step": 39790 }, { "epoch": 0.7075178212717544, "grad_norm": 1.0212407196029178, "learning_rate": 0.0001, "loss": 0.6678, "mean_abs_error": 474.844472705989, "mean_abs_error_last_10": 176.95258382196923, "mean_abs_error_last_25": 203.15729207961036, "mean_abs_error_last_50": 263.8968720725306, "mean_pred_prob": 0.03980479033780284, "mean_pred_prob_last_10": 0.17643695157021283, "mean_pred_prob_last_25": 0.1027037744410336, "mean_pred_prob_last_50": 0.06490920684300364, "mean_token_accuracy": 0.8696164011955261, "step": 39800 }, { "epoch": 0.7076955895685564, "grad_norm": 1.2300332317290221, "learning_rate": 0.0001, "loss": 0.6598, "mean_abs_error": 561.5570890547664, "mean_abs_error_last_10": 110.48825549281955, "mean_abs_error_last_25": 167.5132302465474, "mean_abs_error_last_50": 292.968693722604, "mean_pred_prob": 0.038745357474545015, "mean_pred_prob_last_10": 0.20391654865816236, "mean_pred_prob_last_25": 0.1110639785300009, "mean_pred_prob_last_50": 0.06565254204906523, "mean_token_accuracy": 0.875389814376831, "step": 39810 }, { "epoch": 0.7078733578653583, "grad_norm": 2.4722425786868625, "learning_rate": 0.0001, "loss": 0.7764, "mean_abs_error": 448.189818559705, "mean_abs_error_last_10": 83.31692289740212, "mean_abs_error_last_25": 153.92369711943817, "mean_abs_error_last_50": 298.8046187513165, "mean_pred_prob": 0.031217152485623956, "mean_pred_prob_last_10": 0.17021284326910974, "mean_pred_prob_last_25": 0.0930519986897707, "mean_pred_prob_last_50": 0.05413574306294322, "mean_token_accuracy": 0.8726610362529754, "step": 39820 }, { "epoch": 0.7080511261621603, "grad_norm": 0.8555480656109663, "learning_rate": 0.0001, "loss": 0.7537, "mean_abs_error": 614.0761209710616, "mean_abs_error_last_10": 228.93132418634872, "mean_abs_error_last_25": 264.5810340660585, "mean_abs_error_last_50": 404.6053927904361, "mean_pred_prob": 0.04818449833546765, "mean_pred_prob_last_10": 0.23632382313371636, "mean_pred_prob_last_25": 0.126478797494201, "mean_pred_prob_last_50": 0.07884128867881372, "mean_token_accuracy": 0.8757307887077331, "step": 39830 }, { "epoch": 0.7082288944589622, "grad_norm": 1.9307347617705262, "learning_rate": 0.0001, "loss": 0.7327, "mean_abs_error": 314.44604660459055, "mean_abs_error_last_10": 191.11855656325523, "mean_abs_error_last_25": 229.30944899719066, "mean_abs_error_last_50": 250.5023856360467, "mean_pred_prob": 0.03864193957997486, "mean_pred_prob_last_10": 0.18590606029611081, "mean_pred_prob_last_25": 0.10311171873472631, "mean_pred_prob_last_50": 0.06354879562277346, "mean_token_accuracy": 0.8763137936592102, "step": 39840 }, { "epoch": 0.7084066627557641, "grad_norm": 1.5763242740659953, "learning_rate": 0.0001, "loss": 0.7711, "mean_abs_error": 1127.395711219504, "mean_abs_error_last_10": 676.0155955003081, "mean_abs_error_last_25": 773.2272265793032, "mean_abs_error_last_50": 844.1780722051005, "mean_pred_prob": 0.02796441386162769, "mean_pred_prob_last_10": 0.14384291028545704, "mean_pred_prob_last_25": 0.07790825152769684, "mean_pred_prob_last_50": 0.047655333908915055, "mean_token_accuracy": 0.8616089642047882, "step": 39850 }, { "epoch": 0.7085844310525661, "grad_norm": 1.2945077359513193, "learning_rate": 0.0001, "loss": 0.6793, "mean_abs_error": 1249.2634564879813, "mean_abs_error_last_10": 734.4587776253101, "mean_abs_error_last_25": 817.91028266638, "mean_abs_error_last_50": 1009.1460152187607, "mean_pred_prob": 0.03417226521487464, "mean_pred_prob_last_10": 0.16611952067323726, "mean_pred_prob_last_25": 0.0925073458769475, "mean_pred_prob_last_50": 0.05720317490995512, "mean_token_accuracy": 0.8727573335170746, "step": 39860 }, { "epoch": 0.708762199349368, "grad_norm": 1.2103584832143224, "learning_rate": 0.0001, "loss": 0.7036, "mean_abs_error": 266.0225847719924, "mean_abs_error_last_10": 39.11807559956402, "mean_abs_error_last_25": 82.14154625491666, "mean_abs_error_last_50": 131.20783235573467, "mean_pred_prob": 0.04250747594051063, "mean_pred_prob_last_10": 0.22120612510479987, "mean_pred_prob_last_25": 0.11935820647049696, "mean_pred_prob_last_50": 0.07228617367800325, "mean_token_accuracy": 0.8764006793498993, "step": 39870 }, { "epoch": 0.70893996764617, "grad_norm": 1.2886082903913842, "learning_rate": 0.0001, "loss": 0.771, "mean_abs_error": 342.23317556670094, "mean_abs_error_last_10": 86.86929674087887, "mean_abs_error_last_25": 135.5774425036591, "mean_abs_error_last_50": 217.92480418452425, "mean_pred_prob": 0.03604954369366169, "mean_pred_prob_last_10": 0.17028422560542822, "mean_pred_prob_last_25": 0.09543682457879185, "mean_pred_prob_last_50": 0.059861696953885254, "mean_token_accuracy": 0.8565841436386108, "step": 39880 }, { "epoch": 0.7091177359429719, "grad_norm": 1.0141288176324226, "learning_rate": 0.0001, "loss": 0.6482, "mean_abs_error": 303.18099731191086, "mean_abs_error_last_10": 53.06926259935538, "mean_abs_error_last_25": 117.50399232688085, "mean_abs_error_last_50": 158.37990379016722, "mean_pred_prob": 0.046479365811683236, "mean_pred_prob_last_10": 0.22169445641338825, "mean_pred_prob_last_25": 0.12504258006811142, "mean_pred_prob_last_50": 0.07731459438800811, "mean_token_accuracy": 0.8711800098419189, "step": 39890 }, { "epoch": 0.7092955042397738, "grad_norm": 1.3608161613637515, "learning_rate": 0.0001, "loss": 0.7314, "mean_abs_error": 746.3474500745081, "mean_abs_error_last_10": 255.8418355879083, "mean_abs_error_last_25": 315.60552473364385, "mean_abs_error_last_50": 417.3950532395846, "mean_pred_prob": 0.043702075936016625, "mean_pred_prob_last_10": 0.21665754333371295, "mean_pred_prob_last_25": 0.11941614079987631, "mean_pred_prob_last_50": 0.07373859601793811, "mean_token_accuracy": 0.8705491364002228, "step": 39900 }, { "epoch": 0.7094732725365758, "grad_norm": 1.242193592153278, "learning_rate": 0.0001, "loss": 0.7556, "mean_abs_error": 505.8226111954993, "mean_abs_error_last_10": 268.12025524121526, "mean_abs_error_last_25": 381.62852884241573, "mean_abs_error_last_50": 442.0202737128623, "mean_pred_prob": 0.02521775767672807, "mean_pred_prob_last_10": 0.12911629676818848, "mean_pred_prob_last_25": 0.07142135668545961, "mean_pred_prob_last_50": 0.04312796122394502, "mean_token_accuracy": 0.867556768655777, "step": 39910 }, { "epoch": 0.7096510408333778, "grad_norm": 2.9260513391689402, "learning_rate": 0.0001, "loss": 0.6591, "mean_abs_error": 530.1657937952522, "mean_abs_error_last_10": 96.0131307285068, "mean_abs_error_last_25": 156.45693888600547, "mean_abs_error_last_50": 282.37627251020103, "mean_pred_prob": 0.029601848917081952, "mean_pred_prob_last_10": 0.1644275479018688, "mean_pred_prob_last_25": 0.08324758354574442, "mean_pred_prob_last_50": 0.04946740148589015, "mean_token_accuracy": 0.8739083409309387, "step": 39920 }, { "epoch": 0.7098288091301798, "grad_norm": 1.524398562064757, "learning_rate": 0.0001, "loss": 0.7727, "mean_abs_error": 958.9985058979835, "mean_abs_error_last_10": 245.92643859629362, "mean_abs_error_last_25": 355.9005575651476, "mean_abs_error_last_50": 535.4370018241739, "mean_pred_prob": 0.04477144833072089, "mean_pred_prob_last_10": 0.19420716243912467, "mean_pred_prob_last_25": 0.12116774542955681, "mean_pred_prob_last_50": 0.07716187933110633, "mean_token_accuracy": 0.8745171546936035, "step": 39930 }, { "epoch": 0.7100065774269817, "grad_norm": 2.442010878973854, "learning_rate": 0.0001, "loss": 0.6372, "mean_abs_error": 397.17169292600096, "mean_abs_error_last_10": 160.9447263187908, "mean_abs_error_last_25": 153.71410537653261, "mean_abs_error_last_50": 258.27417859132265, "mean_pred_prob": 0.031358057237230244, "mean_pred_prob_last_10": 0.1587639169767499, "mean_pred_prob_last_25": 0.085745392087847, "mean_pred_prob_last_50": 0.052058174600824715, "mean_token_accuracy": 0.8697573423385621, "step": 39940 }, { "epoch": 0.7101843457237836, "grad_norm": 1.799294444200469, "learning_rate": 0.0001, "loss": 0.8476, "mean_abs_error": 193.19702239348817, "mean_abs_error_last_10": 55.636989725110666, "mean_abs_error_last_25": 78.11915245129471, "mean_abs_error_last_50": 148.44299349171422, "mean_pred_prob": 0.03864376645069569, "mean_pred_prob_last_10": 0.19104971289634703, "mean_pred_prob_last_25": 0.10477933594956994, "mean_pred_prob_last_50": 0.06427624560892582, "mean_token_accuracy": 0.867109602689743, "step": 39950 }, { "epoch": 0.7103621140205856, "grad_norm": 3.888695004933382, "learning_rate": 0.0001, "loss": 0.6845, "mean_abs_error": 561.2615704726328, "mean_abs_error_last_10": 236.98390096395832, "mean_abs_error_last_25": 263.32643997335265, "mean_abs_error_last_50": 341.09510007248093, "mean_pred_prob": 0.03360131550580263, "mean_pred_prob_last_10": 0.16945465411990882, "mean_pred_prob_last_25": 0.09373479095520451, "mean_pred_prob_last_50": 0.05682768946280703, "mean_token_accuracy": 0.86448894739151, "step": 39960 }, { "epoch": 0.7105398823173875, "grad_norm": 1.5503777131823722, "learning_rate": 0.0001, "loss": 0.7714, "mean_abs_error": 188.41366812213522, "mean_abs_error_last_10": 47.79358457592632, "mean_abs_error_last_25": 71.38942765612455, "mean_abs_error_last_50": 99.63003533644817, "mean_pred_prob": 0.044916134979575875, "mean_pred_prob_last_10": 0.23660225812345742, "mean_pred_prob_last_25": 0.13170589180663228, "mean_pred_prob_last_50": 0.07869400936178864, "mean_token_accuracy": 0.8718436360359192, "step": 39970 }, { "epoch": 0.7107176506141895, "grad_norm": 1.506561038757114, "learning_rate": 0.0001, "loss": 0.7638, "mean_abs_error": 454.237880978075, "mean_abs_error_last_10": 148.48793788900284, "mean_abs_error_last_25": 174.67558305643098, "mean_abs_error_last_50": 224.8433158030126, "mean_pred_prob": 0.03950850325636566, "mean_pred_prob_last_10": 0.20219528400339187, "mean_pred_prob_last_25": 0.10612151471432299, "mean_pred_prob_last_50": 0.06594349006190896, "mean_token_accuracy": 0.876352322101593, "step": 39980 }, { "epoch": 0.7108954189109914, "grad_norm": 1.8376065367830574, "learning_rate": 0.0001, "loss": 0.7438, "mean_abs_error": 479.86304226300734, "mean_abs_error_last_10": 177.22170325918023, "mean_abs_error_last_25": 177.74083863666874, "mean_abs_error_last_50": 329.80385030581004, "mean_pred_prob": 0.050261427811346945, "mean_pred_prob_last_10": 0.2159074192866683, "mean_pred_prob_last_25": 0.126717512588948, "mean_pred_prob_last_50": 0.0819774718489498, "mean_token_accuracy": 0.8751035392284393, "step": 39990 }, { "epoch": 0.7110731872077933, "grad_norm": 0.9954972838920594, "learning_rate": 0.0001, "loss": 0.7123, "mean_abs_error": 647.8720776857436, "mean_abs_error_last_10": 194.40334379765397, "mean_abs_error_last_25": 254.42339260982308, "mean_abs_error_last_50": 343.05967621166053, "mean_pred_prob": 0.041105016210349274, "mean_pred_prob_last_10": 0.2128824101644568, "mean_pred_prob_last_25": 0.11505991229787468, "mean_pred_prob_last_50": 0.06911678970791399, "mean_token_accuracy": 0.8658592104911804, "step": 40000 }, { "epoch": 0.7112509555045953, "grad_norm": 1.7549429631944922, "learning_rate": 0.0001, "loss": 0.6474, "mean_abs_error": 906.1857363461161, "mean_abs_error_last_10": 367.25334455300555, "mean_abs_error_last_25": 509.6880242682323, "mean_abs_error_last_50": 644.9346164136249, "mean_pred_prob": 0.0494056971889222, "mean_pred_prob_last_10": 0.23381879798835142, "mean_pred_prob_last_25": 0.1301610629190691, "mean_pred_prob_last_50": 0.08198075714753941, "mean_token_accuracy": 0.8729501068592072, "step": 40010 }, { "epoch": 0.7114287238013972, "grad_norm": 1.7415279725397803, "learning_rate": 0.0001, "loss": 0.7863, "mean_abs_error": 152.02966977732666, "mean_abs_error_last_10": 34.89904253384037, "mean_abs_error_last_25": 75.71908230553292, "mean_abs_error_last_50": 93.3012146060354, "mean_pred_prob": 0.06421960787847639, "mean_pred_prob_last_10": 0.294142509996891, "mean_pred_prob_last_25": 0.17063454389572144, "mean_pred_prob_last_50": 0.10706617590039968, "mean_token_accuracy": 0.8615513205528259, "step": 40020 }, { "epoch": 0.7116064920981993, "grad_norm": 2.4066120466916665, "learning_rate": 0.0001, "loss": 0.6137, "mean_abs_error": 622.0188751466462, "mean_abs_error_last_10": 375.61136760347387, "mean_abs_error_last_25": 431.09561498609503, "mean_abs_error_last_50": 493.03038936805353, "mean_pred_prob": 0.03501015760994051, "mean_pred_prob_last_10": 0.18797356695286, "mean_pred_prob_last_25": 0.09908158374018967, "mean_pred_prob_last_50": 0.05931173918652348, "mean_token_accuracy": 0.8788359344005585, "step": 40030 }, { "epoch": 0.7117842603950012, "grad_norm": 2.7108174277551593, "learning_rate": 0.0001, "loss": 0.7581, "mean_abs_error": 238.2226974742945, "mean_abs_error_last_10": 92.75516617861201, "mean_abs_error_last_25": 139.0740241441273, "mean_abs_error_last_50": 201.2681139139093, "mean_pred_prob": 0.04003856806084514, "mean_pred_prob_last_10": 0.182486954331398, "mean_pred_prob_last_25": 0.10440033860504627, "mean_pred_prob_last_50": 0.06642980894539505, "mean_token_accuracy": 0.8809988319873809, "step": 40040 }, { "epoch": 0.7119620286918031, "grad_norm": 0.8035785227629321, "learning_rate": 0.0001, "loss": 0.7147, "mean_abs_error": 1253.4111311074862, "mean_abs_error_last_10": 435.6114763728466, "mean_abs_error_last_25": 552.3135486501251, "mean_abs_error_last_50": 840.2993694782117, "mean_pred_prob": 0.03020241387130227, "mean_pred_prob_last_10": 0.14834879060508682, "mean_pred_prob_last_25": 0.08584755917545409, "mean_pred_prob_last_50": 0.05137952659861185, "mean_token_accuracy": 0.8687335431575776, "step": 40050 }, { "epoch": 0.7121397969886051, "grad_norm": 0.8661023441239052, "learning_rate": 0.0001, "loss": 0.6651, "mean_abs_error": 161.05315997198483, "mean_abs_error_last_10": 100.68549586979742, "mean_abs_error_last_25": 90.195644656431, "mean_abs_error_last_50": 109.20397680021506, "mean_pred_prob": 0.06004160437732935, "mean_pred_prob_last_10": 0.2784084664657712, "mean_pred_prob_last_25": 0.15604369826614856, "mean_pred_prob_last_50": 0.09917405918240547, "mean_token_accuracy": 0.8677013516426086, "step": 40060 }, { "epoch": 0.712317565285407, "grad_norm": 1.0803074701392448, "learning_rate": 0.0001, "loss": 0.6781, "mean_abs_error": 284.84192249061766, "mean_abs_error_last_10": 71.05117358925983, "mean_abs_error_last_25": 101.9316174138055, "mean_abs_error_last_50": 165.57805272919902, "mean_pred_prob": 0.04315921114757657, "mean_pred_prob_last_10": 0.21752409413456916, "mean_pred_prob_last_25": 0.11784745762124657, "mean_pred_prob_last_50": 0.07226425660774112, "mean_token_accuracy": 0.8751096725463867, "step": 40070 }, { "epoch": 0.712495333582209, "grad_norm": 1.2860923966893145, "learning_rate": 0.0001, "loss": 0.6962, "mean_abs_error": 304.0637937757743, "mean_abs_error_last_10": 86.81143947269774, "mean_abs_error_last_25": 129.8054939553212, "mean_abs_error_last_50": 196.73010637206568, "mean_pred_prob": 0.03395930794067681, "mean_pred_prob_last_10": 0.17436134815216064, "mean_pred_prob_last_25": 0.09552290886640549, "mean_pred_prob_last_50": 0.058195541612803936, "mean_token_accuracy": 0.8762640357017517, "step": 40080 }, { "epoch": 0.7126731018790109, "grad_norm": 1.9307955520315776, "learning_rate": 0.0001, "loss": 0.7985, "mean_abs_error": 533.0078563451628, "mean_abs_error_last_10": 264.9450664092404, "mean_abs_error_last_25": 425.2426011755136, "mean_abs_error_last_50": 440.5147103159923, "mean_pred_prob": 0.04264173046103679, "mean_pred_prob_last_10": 0.19942716481164097, "mean_pred_prob_last_25": 0.11719753002398647, "mean_pred_prob_last_50": 0.07171261400799267, "mean_token_accuracy": 0.8740039944648743, "step": 40090 }, { "epoch": 0.7128508701758128, "grad_norm": 2.9417584584865484, "learning_rate": 0.0001, "loss": 0.6358, "mean_abs_error": 372.2883081027799, "mean_abs_error_last_10": 65.80061119119553, "mean_abs_error_last_25": 101.36351819657796, "mean_abs_error_last_50": 165.68085265871497, "mean_pred_prob": 0.05089890953386202, "mean_pred_prob_last_10": 0.24272257285192608, "mean_pred_prob_last_25": 0.13712022128747775, "mean_pred_prob_last_50": 0.08491499763913453, "mean_token_accuracy": 0.8733094334602356, "step": 40100 }, { "epoch": 0.7130286384726148, "grad_norm": 1.2819986075266314, "learning_rate": 0.0001, "loss": 0.6743, "mean_abs_error": 904.0037964867317, "mean_abs_error_last_10": 402.28097718184375, "mean_abs_error_last_25": 500.12449251562293, "mean_abs_error_last_50": 643.589115899443, "mean_pred_prob": 0.03442271212552441, "mean_pred_prob_last_10": 0.17603222909092436, "mean_pred_prob_last_25": 0.09455351435462944, "mean_pred_prob_last_50": 0.05674910060188267, "mean_token_accuracy": 0.8688984096050263, "step": 40110 }, { "epoch": 0.7132064067694167, "grad_norm": 3.956286739636634, "learning_rate": 0.0001, "loss": 0.6367, "mean_abs_error": 295.59382549426147, "mean_abs_error_last_10": 207.06919861628361, "mean_abs_error_last_25": 205.67927380290084, "mean_abs_error_last_50": 283.20234886866524, "mean_pred_prob": 0.05659008142538369, "mean_pred_prob_last_10": 0.24416961744427682, "mean_pred_prob_last_25": 0.14399603474885225, "mean_pred_prob_last_50": 0.09245865656994283, "mean_token_accuracy": 0.8735043108463287, "step": 40120 }, { "epoch": 0.7133841750662187, "grad_norm": 2.3114941471313624, "learning_rate": 0.0001, "loss": 0.849, "mean_abs_error": 452.2210624425472, "mean_abs_error_last_10": 97.56968599400624, "mean_abs_error_last_25": 196.98917377027217, "mean_abs_error_last_50": 242.26152346344497, "mean_pred_prob": 0.06958270207978785, "mean_pred_prob_last_10": 0.24301736801862717, "mean_pred_prob_last_25": 0.16198728792369366, "mean_pred_prob_last_50": 0.10707069505006075, "mean_token_accuracy": 0.8702476739883422, "step": 40130 }, { "epoch": 0.7135619433630206, "grad_norm": 3.069208757142506, "learning_rate": 0.0001, "loss": 0.7358, "mean_abs_error": 837.7054939989691, "mean_abs_error_last_10": 167.4924564103873, "mean_abs_error_last_25": 283.4160464824812, "mean_abs_error_last_50": 451.86339171008296, "mean_pred_prob": 0.04123832082259469, "mean_pred_prob_last_10": 0.17736296536168084, "mean_pred_prob_last_25": 0.10766861018491909, "mean_pred_prob_last_50": 0.06830883516231552, "mean_token_accuracy": 0.8717103600502014, "step": 40140 }, { "epoch": 0.7137397116598226, "grad_norm": 1.889939643481415, "learning_rate": 0.0001, "loss": 0.7854, "mean_abs_error": 754.7637568053501, "mean_abs_error_last_10": 329.2634124761988, "mean_abs_error_last_25": 347.91451748333236, "mean_abs_error_last_50": 508.4204328067466, "mean_pred_prob": 0.022042554698418825, "mean_pred_prob_last_10": 0.11793533654417843, "mean_pred_prob_last_25": 0.06212584786117077, "mean_pred_prob_last_50": 0.03694529395434074, "mean_token_accuracy": 0.8737330257892608, "step": 40150 }, { "epoch": 0.7139174799566246, "grad_norm": 2.2341069788204493, "learning_rate": 0.0001, "loss": 0.6801, "mean_abs_error": 997.1567124291236, "mean_abs_error_last_10": 621.0442781519703, "mean_abs_error_last_25": 639.1011775933199, "mean_abs_error_last_50": 748.9520631826462, "mean_pred_prob": 0.038769097582553515, "mean_pred_prob_last_10": 0.19338268528517802, "mean_pred_prob_last_25": 0.11041662410425487, "mean_pred_prob_last_50": 0.06682876691338606, "mean_token_accuracy": 0.8814803957939148, "step": 40160 }, { "epoch": 0.7140952482534265, "grad_norm": 1.3282198874915427, "learning_rate": 0.0001, "loss": 0.7748, "mean_abs_error": 249.25200610722308, "mean_abs_error_last_10": 72.64970092261304, "mean_abs_error_last_25": 73.31057966210035, "mean_abs_error_last_50": 150.50269058186257, "mean_pred_prob": 0.051294862385839225, "mean_pred_prob_last_10": 0.23849170245230197, "mean_pred_prob_last_25": 0.13813368547707797, "mean_pred_prob_last_50": 0.08447798723354935, "mean_token_accuracy": 0.8803399741649628, "step": 40170 }, { "epoch": 0.7142730165502285, "grad_norm": 1.7021850710710484, "learning_rate": 0.0001, "loss": 0.7165, "mean_abs_error": 94.2979929694939, "mean_abs_error_last_10": 25.636725789253536, "mean_abs_error_last_25": 44.295423521778915, "mean_abs_error_last_50": 64.3222651677963, "mean_pred_prob": 0.063710549287498, "mean_pred_prob_last_10": 0.2769420981407166, "mean_pred_prob_last_25": 0.16737489476799966, "mean_pred_prob_last_50": 0.10567845962941647, "mean_token_accuracy": 0.8651356756687164, "step": 40180 }, { "epoch": 0.7144507848470304, "grad_norm": 0.9498411014062812, "learning_rate": 0.0001, "loss": 0.7798, "mean_abs_error": 516.7094911844628, "mean_abs_error_last_10": 255.79380764012848, "mean_abs_error_last_25": 406.96345293775573, "mean_abs_error_last_50": 478.3679373056522, "mean_pred_prob": 0.03300147099653259, "mean_pred_prob_last_10": 0.16320639597252012, "mean_pred_prob_last_25": 0.09176967144012452, "mean_pred_prob_last_50": 0.055272325361147526, "mean_token_accuracy": 0.869704931974411, "step": 40190 }, { "epoch": 0.7146285531438323, "grad_norm": 1.8892881246070437, "learning_rate": 0.0001, "loss": 0.645, "mean_abs_error": 391.3946693895237, "mean_abs_error_last_10": 98.8580933616361, "mean_abs_error_last_25": 166.1882650484378, "mean_abs_error_last_50": 251.6150931226313, "mean_pred_prob": 0.037819431291427466, "mean_pred_prob_last_10": 0.19000855567865074, "mean_pred_prob_last_25": 0.10147802799474448, "mean_pred_prob_last_50": 0.06300350683741271, "mean_token_accuracy": 0.8840583026409149, "step": 40200 }, { "epoch": 0.7148063214406343, "grad_norm": 1.7719880482652484, "learning_rate": 0.0001, "loss": 0.767, "mean_abs_error": 220.07072038448595, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 140.92374253978284, "mean_pred_prob": 0.07468211795203387, "mean_pred_prob_last_10": 0.19328944869339465, "mean_pred_prob_last_25": 0.12977356342598795, "mean_pred_prob_last_50": 0.10986415022052824, "mean_token_accuracy": 0.8677653193473815, "step": 40210 }, { "epoch": 0.7149840897374362, "grad_norm": 1.215272682943167, "learning_rate": 0.0001, "loss": 0.7068, "mean_abs_error": 395.8826611352809, "mean_abs_error_last_10": 350.4473779777226, "mean_abs_error_last_25": 362.460159980414, "mean_abs_error_last_50": 351.57216562563826, "mean_pred_prob": 0.03898497701156885, "mean_pred_prob_last_10": 0.18622314035892487, "mean_pred_prob_last_25": 0.10748869347153231, "mean_pred_prob_last_50": 0.06570271031232551, "mean_token_accuracy": 0.874593997001648, "step": 40220 }, { "epoch": 0.7151618580342382, "grad_norm": 1.3048888501359859, "learning_rate": 0.0001, "loss": 0.7683, "mean_abs_error": 185.12023968036416, "mean_abs_error_last_10": 69.2776050105539, "mean_abs_error_last_25": 96.93776160232315, "mean_abs_error_last_50": 114.25852173077813, "mean_pred_prob": 0.04625990781933069, "mean_pred_prob_last_10": 0.2371396142989397, "mean_pred_prob_last_25": 0.13009957913309336, "mean_pred_prob_last_50": 0.07847791770473123, "mean_token_accuracy": 0.8651889264583588, "step": 40230 }, { "epoch": 0.7153396263310401, "grad_norm": 1.3366514741393534, "learning_rate": 0.0001, "loss": 0.6554, "mean_abs_error": 202.14587095621846, "mean_abs_error_last_10": 102.17722255338442, "mean_abs_error_last_25": 92.89100739680471, "mean_abs_error_last_50": 122.7090312984626, "mean_pred_prob": 0.05048142252489925, "mean_pred_prob_last_10": 0.23044114727526904, "mean_pred_prob_last_25": 0.13114119600504637, "mean_pred_prob_last_50": 0.0817699656356126, "mean_token_accuracy": 0.8762322068214417, "step": 40240 }, { "epoch": 0.715517394627842, "grad_norm": 1.7962607488819686, "learning_rate": 0.0001, "loss": 0.6576, "mean_abs_error": 1977.3558991076832, "mean_abs_error_last_10": 1046.9279065835653, "mean_abs_error_last_25": 1166.3038553354472, "mean_abs_error_last_50": 1391.0032687614732, "mean_pred_prob": 0.030098211497534066, "mean_pred_prob_last_10": 0.13660500314435922, "mean_pred_prob_last_25": 0.07847400228492915, "mean_pred_prob_last_50": 0.04929903839802137, "mean_token_accuracy": 0.8738398730754853, "step": 40250 }, { "epoch": 0.715695162924644, "grad_norm": 1.361943612512406, "learning_rate": 0.0001, "loss": 0.6173, "mean_abs_error": 222.3000012175856, "mean_abs_error_last_10": 44.06624143554825, "mean_abs_error_last_25": 55.026428093409, "mean_abs_error_last_50": 112.70517210626133, "mean_pred_prob": 0.06726535609923304, "mean_pred_prob_last_10": 0.2744357710704207, "mean_pred_prob_last_25": 0.17259405637159944, "mean_pred_prob_last_50": 0.11084240609779954, "mean_token_accuracy": 0.8767445802688598, "step": 40260 }, { "epoch": 0.715872931221446, "grad_norm": 1.1115118752869166, "learning_rate": 0.0001, "loss": 0.681, "mean_abs_error": 649.5890670056822, "mean_abs_error_last_10": 219.09814572111472, "mean_abs_error_last_25": 260.39754002058237, "mean_abs_error_last_50": 414.16470886715445, "mean_pred_prob": 0.023317661543842405, "mean_pred_prob_last_10": 0.12071616910398006, "mean_pred_prob_last_25": 0.06536280333530158, "mean_pred_prob_last_50": 0.03976780556840822, "mean_token_accuracy": 0.8799791634082794, "step": 40270 }, { "epoch": 0.716050699518248, "grad_norm": 0.7210017442092875, "learning_rate": 0.0001, "loss": 0.6722, "mean_abs_error": 163.0744428258594, "mean_abs_error_last_10": 36.80180652556548, "mean_abs_error_last_25": 79.46250277877341, "mean_abs_error_last_50": 114.66226322207208, "mean_pred_prob": 0.049489300209097566, "mean_pred_prob_last_10": 0.22466007862240076, "mean_pred_prob_last_25": 0.12790072048082948, "mean_pred_prob_last_50": 0.08073779758997261, "mean_token_accuracy": 0.8720219373703003, "step": 40280 }, { "epoch": 0.7162284678150499, "grad_norm": 1.696530388410987, "learning_rate": 0.0001, "loss": 0.6477, "mean_abs_error": 150.76218988035583, "mean_abs_error_last_10": 42.93131120640215, "mean_abs_error_last_25": 91.74624286183179, "mean_abs_error_last_50": 108.23121084697046, "mean_pred_prob": 0.0529469789005816, "mean_pred_prob_last_10": 0.2589970076456666, "mean_pred_prob_last_25": 0.14088097428902985, "mean_pred_prob_last_50": 0.08733261590823531, "mean_token_accuracy": 0.8728438377380371, "step": 40290 }, { "epoch": 0.7164062361118518, "grad_norm": 0.8712272912632752, "learning_rate": 0.0001, "loss": 0.7016, "mean_abs_error": 373.92432916037006, "mean_abs_error_last_10": 86.20011542885405, "mean_abs_error_last_25": 119.09302908812546, "mean_abs_error_last_50": 234.86762941745047, "mean_pred_prob": 0.03684804062359035, "mean_pred_prob_last_10": 0.18499892614781857, "mean_pred_prob_last_25": 0.10349216796457768, "mean_pred_prob_last_50": 0.061407058592885734, "mean_token_accuracy": 0.8759524881839752, "step": 40300 }, { "epoch": 0.7165840044086538, "grad_norm": 1.4978260201668152, "learning_rate": 0.0001, "loss": 0.6803, "mean_abs_error": 275.93497084319915, "mean_abs_error_last_10": 65.18416721502714, "mean_abs_error_last_25": 93.40360641338935, "mean_abs_error_last_50": 167.6822158198324, "mean_pred_prob": 0.03914635805413127, "mean_pred_prob_last_10": 0.19522372782230377, "mean_pred_prob_last_25": 0.10533973351120948, "mean_pred_prob_last_50": 0.06487673735246062, "mean_token_accuracy": 0.8659255146980286, "step": 40310 }, { "epoch": 0.7167617727054557, "grad_norm": 2.5339055597107807, "learning_rate": 0.0001, "loss": 0.7491, "mean_abs_error": 565.9439993904716, "mean_abs_error_last_10": 175.8918718205544, "mean_abs_error_last_25": 317.93516731675817, "mean_abs_error_last_50": 416.1451176448484, "mean_pred_prob": 0.03917320936452597, "mean_pred_prob_last_10": 0.18549949247390032, "mean_pred_prob_last_25": 0.10528979571536183, "mean_pred_prob_last_50": 0.0650333479512483, "mean_token_accuracy": 0.8677182674407959, "step": 40320 }, { "epoch": 0.7169395410022577, "grad_norm": 1.0284709883316652, "learning_rate": 0.0001, "loss": 0.6573, "mean_abs_error": 751.289077366844, "mean_abs_error_last_10": 462.8656371984406, "mean_abs_error_last_25": 496.6902980703153, "mean_abs_error_last_50": 578.6703035177738, "mean_pred_prob": 0.034348741438589056, "mean_pred_prob_last_10": 0.17677239199692849, "mean_pred_prob_last_25": 0.09252803366107401, "mean_pred_prob_last_50": 0.05687736599356867, "mean_token_accuracy": 0.8826601088047028, "step": 40330 }, { "epoch": 0.7171173092990596, "grad_norm": 1.7782491018508542, "learning_rate": 0.0001, "loss": 0.673, "mean_abs_error": 1205.1542883830969, "mean_abs_error_last_10": 645.3695263652434, "mean_abs_error_last_25": 701.8405312525477, "mean_abs_error_last_50": 866.8971655154689, "mean_pred_prob": 0.030689023925515358, "mean_pred_prob_last_10": 0.16152711029571948, "mean_pred_prob_last_25": 0.08700138734420762, "mean_pred_prob_last_50": 0.05216719114396255, "mean_token_accuracy": 0.8846260130405426, "step": 40340 }, { "epoch": 0.7172950775958615, "grad_norm": 1.782803241184477, "learning_rate": 0.0001, "loss": 0.6673, "mean_abs_error": 116.09940154644804, "mean_abs_error_last_10": 22.030398160235208, "mean_abs_error_last_25": 42.17218401856632, "mean_abs_error_last_50": 71.20972114863036, "mean_pred_prob": 0.04418019251897931, "mean_pred_prob_last_10": 0.21008114144206047, "mean_pred_prob_last_25": 0.12068049386143684, "mean_pred_prob_last_50": 0.07384395450353623, "mean_token_accuracy": 0.8828852534294128, "step": 40350 }, { "epoch": 0.7174728458926635, "grad_norm": 2.7258887216924323, "learning_rate": 0.0001, "loss": 0.6049, "mean_abs_error": 188.017555684167, "mean_abs_error_last_10": 56.57454764684555, "mean_abs_error_last_25": 71.04457973309337, "mean_abs_error_last_50": 97.96155225981144, "mean_pred_prob": 0.048846631124615666, "mean_pred_prob_last_10": 0.22168281972408294, "mean_pred_prob_last_25": 0.12662044446915388, "mean_pred_prob_last_50": 0.08065798450261355, "mean_token_accuracy": 0.8803392469882965, "step": 40360 }, { "epoch": 0.7176506141894654, "grad_norm": 1.2465131794443411, "learning_rate": 0.0001, "loss": 0.7298, "mean_abs_error": 519.7012318081556, "mean_abs_error_last_10": 187.54572814748786, "mean_abs_error_last_25": 248.19410862465307, "mean_abs_error_last_50": 362.2320461244074, "mean_pred_prob": 0.028953715413808822, "mean_pred_prob_last_10": 0.12983658611774446, "mean_pred_prob_last_25": 0.07479684446007014, "mean_pred_prob_last_50": 0.047713185846805575, "mean_token_accuracy": 0.8702366769313812, "step": 40370 }, { "epoch": 0.7178283824862673, "grad_norm": 1.5058271800067218, "learning_rate": 0.0001, "loss": 0.5562, "mean_abs_error": 836.0799967906136, "mean_abs_error_last_10": 400.29918949359455, "mean_abs_error_last_25": 566.9061229111696, "mean_abs_error_last_50": 738.6513664385914, "mean_pred_prob": 0.05376766462577507, "mean_pred_prob_last_10": 0.2183313079702202, "mean_pred_prob_last_25": 0.13423293765226846, "mean_pred_prob_last_50": 0.08620685932401102, "mean_token_accuracy": 0.8794900357723237, "step": 40380 }, { "epoch": 0.7180061507830694, "grad_norm": 1.8306500445781402, "learning_rate": 0.0001, "loss": 0.7227, "mean_abs_error": 241.37358507728337, "mean_abs_error_last_10": 74.32161327425911, "mean_abs_error_last_25": 161.9286042903142, "mean_abs_error_last_50": 185.4490313366749, "mean_pred_prob": 0.04892647545784712, "mean_pred_prob_last_10": 0.2485674187541008, "mean_pred_prob_last_25": 0.1402106055058539, "mean_pred_prob_last_50": 0.08381158630363643, "mean_token_accuracy": 0.8795201718807221, "step": 40390 }, { "epoch": 0.7181839190798713, "grad_norm": 1.7484605935819075, "learning_rate": 0.0001, "loss": 0.8014, "mean_abs_error": 249.5707622352682, "mean_abs_error_last_10": 97.71658113215554, "mean_abs_error_last_25": 103.59674878437548, "mean_abs_error_last_50": 141.8678507116909, "mean_pred_prob": 0.04878788450732827, "mean_pred_prob_last_10": 0.21801026621833444, "mean_pred_prob_last_25": 0.1296441451413557, "mean_pred_prob_last_50": 0.08117400327464566, "mean_token_accuracy": 0.8648991346359253, "step": 40400 }, { "epoch": 0.7183616873766733, "grad_norm": 2.201197843567124, "learning_rate": 0.0001, "loss": 0.8849, "mean_abs_error": 565.5995286703877, "mean_abs_error_last_10": 162.63285007041904, "mean_abs_error_last_25": 238.253407329625, "mean_abs_error_last_50": 335.93244480203407, "mean_pred_prob": 0.05697100235556718, "mean_pred_prob_last_10": 0.2499561017844826, "mean_pred_prob_last_25": 0.14932896898244508, "mean_pred_prob_last_50": 0.09380482209380717, "mean_token_accuracy": 0.8604027330875397, "step": 40410 }, { "epoch": 0.7185394556734752, "grad_norm": 1.3186097800663898, "learning_rate": 0.0001, "loss": 0.6464, "mean_abs_error": 183.18049405259677, "mean_abs_error_last_10": 31.170694126689018, "mean_abs_error_last_25": 45.331151642744324, "mean_abs_error_last_50": 99.15372525211548, "mean_pred_prob": 0.059368206188082695, "mean_pred_prob_last_10": 0.2770746164023876, "mean_pred_prob_last_25": 0.15814956445246936, "mean_pred_prob_last_50": 0.09990096474066376, "mean_token_accuracy": 0.8853659927845001, "step": 40420 }, { "epoch": 0.7187172239702772, "grad_norm": 1.3835391361421685, "learning_rate": 0.0001, "loss": 0.6899, "mean_abs_error": 1092.8134924090793, "mean_abs_error_last_10": 627.62226697787, "mean_abs_error_last_25": 701.4656358786868, "mean_abs_error_last_50": 806.3145709160615, "mean_pred_prob": 0.02866454011091264, "mean_pred_prob_last_10": 0.15745442206971347, "mean_pred_prob_last_25": 0.08112213230051565, "mean_pred_prob_last_50": 0.048104200806119476, "mean_token_accuracy": 0.8856296479701996, "step": 40430 }, { "epoch": 0.7188949922670791, "grad_norm": 1.0349237273309861, "learning_rate": 0.0001, "loss": 0.7017, "mean_abs_error": 201.81061250995896, "mean_abs_error_last_10": 47.8463811407676, "mean_abs_error_last_25": 64.12445863979792, "mean_abs_error_last_50": 104.30939119573597, "mean_pred_prob": 0.05647468455135822, "mean_pred_prob_last_10": 0.27453521862626074, "mean_pred_prob_last_25": 0.15624471493065356, "mean_pred_prob_last_50": 0.09405402038246394, "mean_token_accuracy": 0.864591759443283, "step": 40440 }, { "epoch": 0.719072760563881, "grad_norm": 2.4930056693785385, "learning_rate": 0.0001, "loss": 0.6977, "mean_abs_error": 834.4364439435085, "mean_abs_error_last_10": 353.2356319035938, "mean_abs_error_last_25": 500.0563562485718, "mean_abs_error_last_50": 642.1554704319107, "mean_pred_prob": 0.03562824305263348, "mean_pred_prob_last_10": 0.1769773386826273, "mean_pred_prob_last_25": 0.0959586082957685, "mean_pred_prob_last_50": 0.05912203428742942, "mean_token_accuracy": 0.8642940521240234, "step": 40450 }, { "epoch": 0.719250528860683, "grad_norm": 2.7660645492128797, "learning_rate": 0.0001, "loss": 0.6611, "mean_abs_error": 672.6709202437947, "mean_abs_error_last_10": 332.05223445062404, "mean_abs_error_last_25": 375.95063840588386, "mean_abs_error_last_50": 508.4524697074274, "mean_pred_prob": 0.023814296768978237, "mean_pred_prob_last_10": 0.13077886309474707, "mean_pred_prob_last_25": 0.06943319384008646, "mean_pred_prob_last_50": 0.040801257360726595, "mean_token_accuracy": 0.8759214878082275, "step": 40460 }, { "epoch": 0.7194282971574849, "grad_norm": 1.4522772084508218, "learning_rate": 0.0001, "loss": 0.7224, "mean_abs_error": 367.32207266392055, "mean_abs_error_last_10": 85.07963407531376, "mean_abs_error_last_25": 154.54901315305312, "mean_abs_error_last_50": 221.81605026765638, "mean_pred_prob": 0.035775334923528135, "mean_pred_prob_last_10": 0.18122760821133851, "mean_pred_prob_last_25": 0.10031895218417049, "mean_pred_prob_last_50": 0.06119955996982753, "mean_token_accuracy": 0.876135504245758, "step": 40470 }, { "epoch": 0.7196060654542868, "grad_norm": 1.6520522504936426, "learning_rate": 0.0001, "loss": 0.7909, "mean_abs_error": 719.0159530191734, "mean_abs_error_last_10": 226.91408315728373, "mean_abs_error_last_25": 342.8497577055877, "mean_abs_error_last_50": 455.1434428998338, "mean_pred_prob": 0.032739234587643294, "mean_pred_prob_last_10": 0.16381044504232706, "mean_pred_prob_last_25": 0.08908761746715754, "mean_pred_prob_last_50": 0.05447361913393252, "mean_token_accuracy": 0.8618821978569031, "step": 40480 }, { "epoch": 0.7197838337510888, "grad_norm": 2.002443928659612, "learning_rate": 0.0001, "loss": 0.7024, "mean_abs_error": 145.44737748542715, "mean_abs_error_last_10": 50.677581362080886, "mean_abs_error_last_25": 52.876871614898185, "mean_abs_error_last_50": 77.94916801315449, "mean_pred_prob": 0.06378955664113165, "mean_pred_prob_last_10": 0.2812770612537861, "mean_pred_prob_last_25": 0.16675963159650564, "mean_pred_prob_last_50": 0.10437021907418967, "mean_token_accuracy": 0.8735026061534882, "step": 40490 }, { "epoch": 0.7199616020478907, "grad_norm": 0.9288915689723246, "learning_rate": 0.0001, "loss": 0.5674, "mean_abs_error": 237.65215912825056, "mean_abs_error_last_10": 143.28967151408145, "mean_abs_error_last_25": 153.0160292794444, "mean_abs_error_last_50": 168.99407530178757, "mean_pred_prob": 0.04457848547026515, "mean_pred_prob_last_10": 0.22253021961078048, "mean_pred_prob_last_25": 0.12266607331112027, "mean_pred_prob_last_50": 0.07466037813574075, "mean_token_accuracy": 0.8884940981864929, "step": 40500 }, { "epoch": 0.7201393703446928, "grad_norm": 1.751914130902621, "learning_rate": 0.0001, "loss": 0.8569, "mean_abs_error": 308.62908667263054, "mean_abs_error_last_10": 91.66738415594176, "mean_abs_error_last_25": 132.53942999631758, "mean_abs_error_last_50": 183.77947357063945, "mean_pred_prob": 0.04188413845840842, "mean_pred_prob_last_10": 0.2117017276585102, "mean_pred_prob_last_25": 0.11685244236141443, "mean_pred_prob_last_50": 0.06944894660264253, "mean_token_accuracy": 0.8743090927600861, "step": 40510 }, { "epoch": 0.7203171386414947, "grad_norm": 1.0310503324591622, "learning_rate": 0.0001, "loss": 0.6783, "mean_abs_error": 539.1704485099067, "mean_abs_error_last_10": 272.1468736502769, "mean_abs_error_last_25": 255.14130404326056, "mean_abs_error_last_50": 318.71212756781904, "mean_pred_prob": 0.027235165308229625, "mean_pred_prob_last_10": 0.13288311802316458, "mean_pred_prob_last_25": 0.07363831266993656, "mean_pred_prob_last_50": 0.04521044193534181, "mean_token_accuracy": 0.8765525221824646, "step": 40520 }, { "epoch": 0.7204949069382967, "grad_norm": 1.3885341771018813, "learning_rate": 0.0001, "loss": 0.7421, "mean_abs_error": 971.8264652550373, "mean_abs_error_last_10": 489.92851332369946, "mean_abs_error_last_25": 576.6355996027925, "mean_abs_error_last_50": 677.9023390661666, "mean_pred_prob": 0.030861070699756965, "mean_pred_prob_last_10": 0.15077102558570915, "mean_pred_prob_last_25": 0.08744321235863026, "mean_pred_prob_last_50": 0.052537662160466424, "mean_token_accuracy": 0.8743190944194794, "step": 40530 }, { "epoch": 0.7206726752350986, "grad_norm": 1.458476654804741, "learning_rate": 0.0001, "loss": 0.7533, "mean_abs_error": 297.2122189789123, "mean_abs_error_last_10": 76.33275497018877, "mean_abs_error_last_25": 104.08476633247624, "mean_abs_error_last_50": 172.54142017249507, "mean_pred_prob": 0.03804974015802145, "mean_pred_prob_last_10": 0.20347978714853526, "mean_pred_prob_last_25": 0.11034939177334309, "mean_pred_prob_last_50": 0.0649864362552762, "mean_token_accuracy": 0.8726868391036987, "step": 40540 }, { "epoch": 0.7208504435319005, "grad_norm": 2.200237094272664, "learning_rate": 0.0001, "loss": 0.665, "mean_abs_error": 100.13946718853586, "mean_abs_error_last_10": 16.470193423299623, "mean_abs_error_last_25": 38.867592788476706, "mean_abs_error_last_50": 56.63145325793323, "mean_pred_prob": 0.06270582331344485, "mean_pred_prob_last_10": 0.3264171227812767, "mean_pred_prob_last_25": 0.17420783471316098, "mean_pred_prob_last_50": 0.10631416346877813, "mean_token_accuracy": 0.8811629772186279, "step": 40550 }, { "epoch": 0.7210282118287025, "grad_norm": 1.2331558288079412, "learning_rate": 0.0001, "loss": 0.6839, "mean_abs_error": 442.1805248482777, "mean_abs_error_last_10": 103.99658140353381, "mean_abs_error_last_25": 168.65167114385036, "mean_abs_error_last_50": 244.55934287543764, "mean_pred_prob": 0.04246204263763502, "mean_pred_prob_last_10": 0.18712130818748846, "mean_pred_prob_last_25": 0.10961410138988867, "mean_pred_prob_last_50": 0.06913299936568365, "mean_token_accuracy": 0.8731107890605927, "step": 40560 }, { "epoch": 0.7212059801255044, "grad_norm": 1.5068065790548137, "learning_rate": 0.0001, "loss": 0.779, "mean_abs_error": 513.4034778908587, "mean_abs_error_last_10": 226.94959946180603, "mean_abs_error_last_25": 310.19708499435444, "mean_abs_error_last_50": 465.8695061461177, "mean_pred_prob": 0.031096013123169543, "mean_pred_prob_last_10": 0.1579375857487321, "mean_pred_prob_last_25": 0.08787354314699769, "mean_pred_prob_last_50": 0.05229837037622929, "mean_token_accuracy": 0.8695293009281159, "step": 40570 }, { "epoch": 0.7213837484223063, "grad_norm": 1.586477644458018, "learning_rate": 0.0001, "loss": 0.7906, "mean_abs_error": 984.6370886935571, "mean_abs_error_last_10": 370.7150148265605, "mean_abs_error_last_25": 531.9391756748374, "mean_abs_error_last_50": 687.5105444852494, "mean_pred_prob": 0.04251187357003801, "mean_pred_prob_last_10": 0.1948927406396251, "mean_pred_prob_last_25": 0.11496870415867307, "mean_pred_prob_last_50": 0.07080130732210818, "mean_token_accuracy": 0.869524347782135, "step": 40580 }, { "epoch": 0.7215615167191083, "grad_norm": 1.5549802334277902, "learning_rate": 0.0001, "loss": 0.7239, "mean_abs_error": 545.0709542168964, "mean_abs_error_last_10": 152.79483875881493, "mean_abs_error_last_25": 197.63167038424984, "mean_abs_error_last_50": 273.1541503125952, "mean_pred_prob": 0.03422388484468684, "mean_pred_prob_last_10": 0.1548721896018833, "mean_pred_prob_last_25": 0.0943332835799083, "mean_pred_prob_last_50": 0.05733588064322248, "mean_token_accuracy": 0.8792499244213104, "step": 40590 }, { "epoch": 0.7217392850159102, "grad_norm": 1.3537729287229079, "learning_rate": 0.0001, "loss": 0.8746, "mean_abs_error": 848.5847717808177, "mean_abs_error_last_10": 428.75976361523465, "mean_abs_error_last_25": 566.223899070538, "mean_abs_error_last_50": 641.1092553186629, "mean_pred_prob": 0.031482251986744814, "mean_pred_prob_last_10": 0.17151405768818223, "mean_pred_prob_last_25": 0.09173679009545595, "mean_pred_prob_last_50": 0.053963189962087196, "mean_token_accuracy": 0.8645026504993438, "step": 40600 }, { "epoch": 0.7219170533127122, "grad_norm": 1.1018429048560237, "learning_rate": 0.0001, "loss": 0.7054, "mean_abs_error": 417.49632234915936, "mean_abs_error_last_10": 131.11015361451334, "mean_abs_error_last_25": 166.42176492651373, "mean_abs_error_last_50": 264.8583130582166, "mean_pred_prob": 0.030956263054395094, "mean_pred_prob_last_10": 0.1510868560173549, "mean_pred_prob_last_25": 0.08288556246552617, "mean_pred_prob_last_50": 0.05197752144886181, "mean_token_accuracy": 0.8668682932853699, "step": 40610 }, { "epoch": 0.7220948216095142, "grad_norm": 3.4073041323241235, "learning_rate": 0.0001, "loss": 0.8689, "mean_abs_error": 477.17545035992634, "mean_abs_error_last_10": 85.29691854473705, "mean_abs_error_last_25": 155.54119973462556, "mean_abs_error_last_50": 273.28944398109996, "mean_pred_prob": 0.05265551510383375, "mean_pred_prob_last_10": 0.24468808602541686, "mean_pred_prob_last_25": 0.14354704883880914, "mean_pred_prob_last_50": 0.08917270031524822, "mean_token_accuracy": 0.8737299561500549, "step": 40620 }, { "epoch": 0.7222725899063162, "grad_norm": 1.0964871770918563, "learning_rate": 0.0001, "loss": 0.6979, "mean_abs_error": 547.2668542047617, "mean_abs_error_last_10": 242.34335437941075, "mean_abs_error_last_25": 419.7740204008839, "mean_abs_error_last_50": 455.1884265099022, "mean_pred_prob": 0.038691142085008325, "mean_pred_prob_last_10": 0.17346409922465683, "mean_pred_prob_last_25": 0.10166244294960051, "mean_pred_prob_last_50": 0.06438390756957232, "mean_token_accuracy": 0.8664131581783294, "step": 40630 }, { "epoch": 0.7224503582031181, "grad_norm": 0.9808028724919595, "learning_rate": 0.0001, "loss": 0.7238, "mean_abs_error": 1009.9166407376302, "mean_abs_error_last_10": 410.48119920544696, "mean_abs_error_last_25": 492.62009639018754, "mean_abs_error_last_50": 672.6134710458331, "mean_pred_prob": 0.023848348882165738, "mean_pred_prob_last_10": 0.12233679872588255, "mean_pred_prob_last_25": 0.06532708596787415, "mean_pred_prob_last_50": 0.040571741649182515, "mean_token_accuracy": 0.877169281244278, "step": 40640 }, { "epoch": 0.72262812649992, "grad_norm": 1.8942324798972847, "learning_rate": 0.0001, "loss": 0.7464, "mean_abs_error": 421.87935927803017, "mean_abs_error_last_10": 244.67804532561757, "mean_abs_error_last_25": 224.5445138794233, "mean_abs_error_last_50": 270.45449651322417, "mean_pred_prob": 0.03891956157749519, "mean_pred_prob_last_10": 0.17546590914716945, "mean_pred_prob_last_25": 0.10053789700614288, "mean_pred_prob_last_50": 0.06354750414611772, "mean_token_accuracy": 0.862265819311142, "step": 40650 }, { "epoch": 0.722805894796722, "grad_norm": 1.4456618558465, "learning_rate": 0.0001, "loss": 0.7147, "mean_abs_error": 243.25878513238393, "mean_abs_error_last_10": 57.55405056618872, "mean_abs_error_last_25": 87.26741517109062, "mean_abs_error_last_50": 166.56026653060044, "mean_pred_prob": 0.03881689482368529, "mean_pred_prob_last_10": 0.19822758845984936, "mean_pred_prob_last_25": 0.10784920863807201, "mean_pred_prob_last_50": 0.0658630990423262, "mean_token_accuracy": 0.8751096308231354, "step": 40660 }, { "epoch": 0.7229836630935239, "grad_norm": 1.4315567940045908, "learning_rate": 0.0001, "loss": 0.7689, "mean_abs_error": 1592.2055574839837, "mean_abs_error_last_10": 981.2800574693798, "mean_abs_error_last_25": 1099.344972797755, "mean_abs_error_last_50": 1279.7475023817929, "mean_pred_prob": 0.028865218525606907, "mean_pred_prob_last_10": 0.1560912605127669, "mean_pred_prob_last_25": 0.0818828809176921, "mean_pred_prob_last_50": 0.04947637291770661, "mean_token_accuracy": 0.8765082478523254, "step": 40670 }, { "epoch": 0.7231614313903258, "grad_norm": 1.2142219561022225, "learning_rate": 0.0001, "loss": 0.7053, "mean_abs_error": 254.4601298244103, "mean_abs_error_last_10": 42.67119013787457, "mean_abs_error_last_25": 71.40509925353645, "mean_abs_error_last_50": 138.54759611886504, "mean_pred_prob": 0.0561357417376712, "mean_pred_prob_last_10": 0.27408348117023706, "mean_pred_prob_last_25": 0.15250225272029638, "mean_pred_prob_last_50": 0.09321852293796837, "mean_token_accuracy": 0.8680520474910736, "step": 40680 }, { "epoch": 0.7233391996871278, "grad_norm": 2.2852962247735404, "learning_rate": 0.0001, "loss": 0.6584, "mean_abs_error": 301.12707685413363, "mean_abs_error_last_10": 37.71783924903611, "mean_abs_error_last_25": 113.23538764926111, "mean_abs_error_last_50": 202.2885159646082, "mean_pred_prob": 0.05452177613042295, "mean_pred_prob_last_10": 0.26077191196382044, "mean_pred_prob_last_25": 0.1520855560898781, "mean_pred_prob_last_50": 0.09222933929413557, "mean_token_accuracy": 0.8641444146633148, "step": 40690 }, { "epoch": 0.7235169679839297, "grad_norm": 3.17027351199597, "learning_rate": 0.0001, "loss": 0.6769, "mean_abs_error": 288.558502108368, "mean_abs_error_last_10": 79.07788350385343, "mean_abs_error_last_25": 62.439415039609955, "mean_abs_error_last_50": 134.34037796135766, "mean_pred_prob": 0.06780510088428855, "mean_pred_prob_last_10": 0.306212522694841, "mean_pred_prob_last_25": 0.17941404979210346, "mean_pred_prob_last_50": 0.11295418444788083, "mean_token_accuracy": 0.8637328803539276, "step": 40700 }, { "epoch": 0.7236947362807317, "grad_norm": 2.5814725119358397, "learning_rate": 0.0001, "loss": 0.7197, "mean_abs_error": 392.0176958322492, "mean_abs_error_last_10": 73.84425045496039, "mean_abs_error_last_25": 105.70818355072211, "mean_abs_error_last_50": 192.56366197136649, "mean_pred_prob": 0.039579922193661334, "mean_pred_prob_last_10": 0.19561118832789362, "mean_pred_prob_last_25": 0.10653440246824175, "mean_pred_prob_last_50": 0.0657312833936885, "mean_token_accuracy": 0.8855919182300568, "step": 40710 }, { "epoch": 0.7238725045775336, "grad_norm": 3.199576088321524, "learning_rate": 0.0001, "loss": 0.711, "mean_abs_error": 288.0175437553955, "mean_abs_error_last_10": 71.31138812485457, "mean_abs_error_last_25": 98.18662399619545, "mean_abs_error_last_50": 177.3140162993671, "mean_pred_prob": 0.030764754628762603, "mean_pred_prob_last_10": 0.17243903130292892, "mean_pred_prob_last_25": 0.08922615591436625, "mean_pred_prob_last_50": 0.052173389680683614, "mean_token_accuracy": 0.8738297462463379, "step": 40720 }, { "epoch": 0.7240502728743355, "grad_norm": 1.1353099772510962, "learning_rate": 0.0001, "loss": 0.7693, "mean_abs_error": 519.5953297130725, "mean_abs_error_last_10": 251.1520641168626, "mean_abs_error_last_25": 292.5644697910601, "mean_abs_error_last_50": 331.2501898356925, "mean_pred_prob": 0.04479603066574782, "mean_pred_prob_last_10": 0.1774690578924492, "mean_pred_prob_last_25": 0.10897595253773033, "mean_pred_prob_last_50": 0.07063142065890134, "mean_token_accuracy": 0.8694123923778534, "step": 40730 }, { "epoch": 0.7242280411711376, "grad_norm": 2.423047645045747, "learning_rate": 0.0001, "loss": 0.6479, "mean_abs_error": 231.05979830469738, "mean_abs_error_last_10": 39.70265648737015, "mean_abs_error_last_25": 78.36033125736522, "mean_abs_error_last_50": 173.28908382116518, "mean_pred_prob": 0.05378299253061414, "mean_pred_prob_last_10": 0.2786548793315887, "mean_pred_prob_last_25": 0.15076780803501605, "mean_pred_prob_last_50": 0.09038461335003375, "mean_token_accuracy": 0.871575516462326, "step": 40740 }, { "epoch": 0.7244058094679395, "grad_norm": 1.5885693202282063, "learning_rate": 0.0001, "loss": 0.6475, "mean_abs_error": 213.66384285686496, "mean_abs_error_last_10": 37.83328790123234, "mean_abs_error_last_25": 89.81391933816738, "mean_abs_error_last_50": 142.95628960422118, "mean_pred_prob": 0.04235355528071523, "mean_pred_prob_last_10": 0.19245706200599672, "mean_pred_prob_last_25": 0.11429139524698258, "mean_pred_prob_last_50": 0.07109399558976293, "mean_token_accuracy": 0.8819620728492736, "step": 40750 }, { "epoch": 0.7245835777647415, "grad_norm": 6.396594744721939, "learning_rate": 0.0001, "loss": 0.7538, "mean_abs_error": 740.6462067652468, "mean_abs_error_last_10": 548.884164645831, "mean_abs_error_last_25": 523.4347441804763, "mean_abs_error_last_50": 596.1506242873447, "mean_pred_prob": 0.09144594507815781, "mean_pred_prob_last_10": 0.3162376631546067, "mean_pred_prob_last_25": 0.2010739112345618, "mean_pred_prob_last_50": 0.14434354229015298, "mean_token_accuracy": 0.8724175751209259, "step": 40760 }, { "epoch": 0.7247613460615434, "grad_norm": 2.0490011365407383, "learning_rate": 0.0001, "loss": 0.8615, "mean_abs_error": 302.69525108706614, "mean_abs_error_last_10": 115.10560826585186, "mean_abs_error_last_25": 189.43693196170662, "mean_abs_error_last_50": 237.85563824394922, "mean_pred_prob": 0.03551455112174153, "mean_pred_prob_last_10": 0.18808590807020664, "mean_pred_prob_last_25": 0.097377067245543, "mean_pred_prob_last_50": 0.058286954741925, "mean_token_accuracy": 0.8692374527454376, "step": 40770 }, { "epoch": 0.7249391143583453, "grad_norm": 1.4423052906118714, "learning_rate": 0.0001, "loss": 0.6973, "mean_abs_error": 95.35031401106187, "mean_abs_error_last_10": 24.211103404199086, "mean_abs_error_last_25": 31.170482695225843, "mean_abs_error_last_50": 52.304121583319656, "mean_pred_prob": 0.0521736104041338, "mean_pred_prob_last_10": 0.23877761848270893, "mean_pred_prob_last_25": 0.13736452274024485, "mean_pred_prob_last_50": 0.08533155284821987, "mean_token_accuracy": 0.8702050924301148, "step": 40780 }, { "epoch": 0.7251168826551473, "grad_norm": 3.090973889947225, "learning_rate": 0.0001, "loss": 0.6455, "mean_abs_error": 309.63116089624316, "mean_abs_error_last_10": 90.53525163999214, "mean_abs_error_last_25": 186.46065833937573, "mean_abs_error_last_50": 241.43710283835654, "mean_pred_prob": 0.06618839916773141, "mean_pred_prob_last_10": 0.2640612764284015, "mean_pred_prob_last_25": 0.17334803696721793, "mean_pred_prob_last_50": 0.10931097171269358, "mean_token_accuracy": 0.8846368730068207, "step": 40790 }, { "epoch": 0.7252946509519492, "grad_norm": 1.9088804806874498, "learning_rate": 0.0001, "loss": 0.7271, "mean_abs_error": 729.0318212912167, "mean_abs_error_last_10": 356.70235166872806, "mean_abs_error_last_25": 399.9883760863806, "mean_abs_error_last_50": 495.9808437940972, "mean_pred_prob": 0.042053924829815514, "mean_pred_prob_last_10": 0.22122430411400273, "mean_pred_prob_last_25": 0.1202512577350717, "mean_pred_prob_last_50": 0.07035799688892438, "mean_token_accuracy": 0.8785263776779175, "step": 40800 }, { "epoch": 0.7254724192487512, "grad_norm": 1.2305446816868315, "learning_rate": 0.0001, "loss": 0.7258, "mean_abs_error": 280.28415829403156, "mean_abs_error_last_10": 66.96089121254687, "mean_abs_error_last_25": 121.90088998139167, "mean_abs_error_last_50": 187.6753724245544, "mean_pred_prob": 0.03311943425796926, "mean_pred_prob_last_10": 0.16598098501563072, "mean_pred_prob_last_25": 0.09076644647866487, "mean_pred_prob_last_50": 0.05567833827808499, "mean_token_accuracy": 0.8780738115310669, "step": 40810 }, { "epoch": 0.7256501875455531, "grad_norm": 1.0814131445768131, "learning_rate": 0.0001, "loss": 0.7167, "mean_abs_error": 414.9704439031918, "mean_abs_error_last_10": 194.47000324946714, "mean_abs_error_last_25": 289.0766130417156, "mean_abs_error_last_50": 331.71409536553745, "mean_pred_prob": 0.03520625680685043, "mean_pred_prob_last_10": 0.17084483958315105, "mean_pred_prob_last_25": 0.09609095738269388, "mean_pred_prob_last_50": 0.059592831565532835, "mean_token_accuracy": 0.8762754440307617, "step": 40820 }, { "epoch": 0.725827955842355, "grad_norm": 1.9484389049700326, "learning_rate": 0.0001, "loss": 0.7737, "mean_abs_error": 604.4778900335674, "mean_abs_error_last_10": 206.3738793210939, "mean_abs_error_last_25": 332.13203549688035, "mean_abs_error_last_50": 445.62342336007794, "mean_pred_prob": 0.03726281889248639, "mean_pred_prob_last_10": 0.17543609999120235, "mean_pred_prob_last_25": 0.10045470459153875, "mean_pred_prob_last_50": 0.06206372615415603, "mean_token_accuracy": 0.8663537919521331, "step": 40830 }, { "epoch": 0.726005724139157, "grad_norm": 1.0504049683902716, "learning_rate": 0.0001, "loss": 0.7225, "mean_abs_error": 454.55863578600366, "mean_abs_error_last_10": 115.94078933458232, "mean_abs_error_last_25": 167.0659762388513, "mean_abs_error_last_50": 255.9006567757073, "mean_pred_prob": 0.049962470028549436, "mean_pred_prob_last_10": 0.23805032894015313, "mean_pred_prob_last_25": 0.1376438507810235, "mean_pred_prob_last_50": 0.083452737191692, "mean_token_accuracy": 0.8777440905570983, "step": 40840 }, { "epoch": 0.7261834924359589, "grad_norm": 1.5265486688139924, "learning_rate": 0.0001, "loss": 0.6597, "mean_abs_error": 442.2443515383335, "mean_abs_error_last_10": 300.14220216440566, "mean_abs_error_last_25": 317.4995899112478, "mean_abs_error_last_50": 371.1657659932292, "mean_pred_prob": 0.03814624990336597, "mean_pred_prob_last_10": 0.18885424127802253, "mean_pred_prob_last_25": 0.107032760232687, "mean_pred_prob_last_50": 0.06422793492674828, "mean_token_accuracy": 0.8778091251850129, "step": 40850 }, { "epoch": 0.726361260732761, "grad_norm": 1.3120825979613397, "learning_rate": 0.0001, "loss": 0.838, "mean_abs_error": 527.2159402703119, "mean_abs_error_last_10": 242.20960035615173, "mean_abs_error_last_25": 265.6726048248157, "mean_abs_error_last_50": 339.89915414109055, "mean_pred_prob": 0.01881013815291226, "mean_pred_prob_last_10": 0.1070278525352478, "mean_pred_prob_last_25": 0.05715127820149064, "mean_pred_prob_last_50": 0.03285189848393202, "mean_token_accuracy": 0.8645455062389373, "step": 40860 }, { "epoch": 0.7265390290295629, "grad_norm": 1.8277853731567109, "learning_rate": 0.0001, "loss": 0.6861, "mean_abs_error": 1998.6833434743799, "mean_abs_error_last_10": 982.6946675994666, "mean_abs_error_last_25": 1111.3508121617, "mean_abs_error_last_50": 1404.3873558451937, "mean_pred_prob": 0.0285885380464606, "mean_pred_prob_last_10": 0.14527716247102945, "mean_pred_prob_last_25": 0.08180896817139001, "mean_pred_prob_last_50": 0.0492380194918951, "mean_token_accuracy": 0.8817305624485016, "step": 40870 }, { "epoch": 0.7267167973263648, "grad_norm": 1.5670391967151578, "learning_rate": 0.0001, "loss": 0.72, "mean_abs_error": 385.7594965008275, "mean_abs_error_last_10": 75.62555049314889, "mean_abs_error_last_25": 190.91149824413623, "mean_abs_error_last_50": 376.36122793283363, "mean_pred_prob": 0.042361938022077085, "mean_pred_prob_last_10": 0.16705342344939708, "mean_pred_prob_last_25": 0.10403282903134822, "mean_pred_prob_last_50": 0.0679144624620676, "mean_token_accuracy": 0.8669016718864441, "step": 40880 }, { "epoch": 0.7268945656231668, "grad_norm": 0.8429980714518023, "learning_rate": 0.0001, "loss": 0.6333, "mean_abs_error": 555.2238575184186, "mean_abs_error_last_10": 125.99535658152804, "mean_abs_error_last_25": 227.0608041362634, "mean_abs_error_last_50": 415.1856053672521, "mean_pred_prob": 0.038101070234552024, "mean_pred_prob_last_10": 0.18644205257296562, "mean_pred_prob_last_25": 0.10280492631718516, "mean_pred_prob_last_50": 0.06388082285411656, "mean_token_accuracy": 0.871779614686966, "step": 40890 }, { "epoch": 0.7270723339199687, "grad_norm": 0.8937838364001979, "learning_rate": 0.0001, "loss": 0.8834, "mean_abs_error": 539.8654220167273, "mean_abs_error_last_10": 180.23249028950056, "mean_abs_error_last_25": 198.3998966294498, "mean_abs_error_last_50": 268.02821851021133, "mean_pred_prob": 0.04000227425713092, "mean_pred_prob_last_10": 0.17662013100925833, "mean_pred_prob_last_25": 0.0997249667881988, "mean_pred_prob_last_50": 0.06440993569558487, "mean_token_accuracy": 0.8711528301239013, "step": 40900 }, { "epoch": 0.7272501022167707, "grad_norm": 1.4941909517109817, "learning_rate": 0.0001, "loss": 0.5712, "mean_abs_error": 782.6650158573264, "mean_abs_error_last_10": 327.63187520046097, "mean_abs_error_last_25": 413.14677392865195, "mean_abs_error_last_50": 524.462894292176, "mean_pred_prob": 0.04258535608678358, "mean_pred_prob_last_10": 0.22007203560788186, "mean_pred_prob_last_25": 0.12318048454471864, "mean_pred_prob_last_50": 0.07332713436626363, "mean_token_accuracy": 0.87359659075737, "step": 40910 }, { "epoch": 0.7274278705135726, "grad_norm": 1.3531314505549552, "learning_rate": 0.0001, "loss": 0.8228, "mean_abs_error": 518.1038278555865, "mean_abs_error_last_10": 227.58701104835478, "mean_abs_error_last_25": 260.3325338315246, "mean_abs_error_last_50": 322.2297497462455, "mean_pred_prob": 0.04145140004402492, "mean_pred_prob_last_10": 0.20135668917791918, "mean_pred_prob_last_25": 0.11308407988399267, "mean_pred_prob_last_50": 0.06911301735672168, "mean_token_accuracy": 0.8645208716392517, "step": 40920 }, { "epoch": 0.7276056388103745, "grad_norm": 2.505395546501687, "learning_rate": 0.0001, "loss": 0.6983, "mean_abs_error": 554.8240716712476, "mean_abs_error_last_10": 111.72695888228122, "mean_abs_error_last_25": 173.01598185179694, "mean_abs_error_last_50": 286.2804039415616, "mean_pred_prob": 0.03771749143488705, "mean_pred_prob_last_10": 0.18482207772321999, "mean_pred_prob_last_25": 0.10502638746984302, "mean_pred_prob_last_50": 0.06461767057189718, "mean_token_accuracy": 0.8666058778762817, "step": 40930 }, { "epoch": 0.7277834071071765, "grad_norm": 2.0894754537774607, "learning_rate": 0.0001, "loss": 0.7798, "mean_abs_error": 2728.8690595701546, "mean_abs_error_last_10": 1993.2411289569275, "mean_abs_error_last_25": 2184.2897528012572, "mean_abs_error_last_50": 2287.5674572187377, "mean_pred_prob": 0.021741080234642142, "mean_pred_prob_last_10": 0.09305238479282707, "mean_pred_prob_last_25": 0.05210862461535726, "mean_pred_prob_last_50": 0.034679696345119734, "mean_token_accuracy": 0.8603055536746979, "step": 40940 }, { "epoch": 0.7279611754039784, "grad_norm": 1.1754627355044456, "learning_rate": 0.0001, "loss": 0.8474, "mean_abs_error": 220.76319124300048, "mean_abs_error_last_10": 60.54441309242766, "mean_abs_error_last_25": 83.31010766241158, "mean_abs_error_last_50": 146.13150439514504, "mean_pred_prob": 0.035810419358313084, "mean_pred_prob_last_10": 0.1948651248589158, "mean_pred_prob_last_25": 0.1035514434799552, "mean_pred_prob_last_50": 0.06099275653250515, "mean_token_accuracy": 0.8686211287975312, "step": 40950 }, { "epoch": 0.7281389437007804, "grad_norm": 1.1762506080381114, "learning_rate": 0.0001, "loss": 0.6819, "mean_abs_error": 216.44319327417526, "mean_abs_error_last_10": 97.73630998565812, "mean_abs_error_last_25": 132.1337997523489, "mean_abs_error_last_50": 198.54581360218623, "mean_pred_prob": 0.04213366839103401, "mean_pred_prob_last_10": 0.20660155043005943, "mean_pred_prob_last_25": 0.10810852330178022, "mean_pred_prob_last_50": 0.0666479947976768, "mean_token_accuracy": 0.8764905035495758, "step": 40960 }, { "epoch": 0.7283167119975823, "grad_norm": 2.5734969047789655, "learning_rate": 0.0001, "loss": 0.6797, "mean_abs_error": 1503.5848138944339, "mean_abs_error_last_10": 932.9197781606156, "mean_abs_error_last_25": 1066.6906952402553, "mean_abs_error_last_50": 1150.7512272036065, "mean_pred_prob": 0.02446211603237316, "mean_pred_prob_last_10": 0.12092048831400462, "mean_pred_prob_last_25": 0.0655413465356105, "mean_pred_prob_last_50": 0.04083986219047801, "mean_token_accuracy": 0.8679694056510925, "step": 40970 }, { "epoch": 0.7284944802943844, "grad_norm": 1.6895457476771505, "learning_rate": 0.0001, "loss": 0.6092, "mean_abs_error": 151.93864110597005, "mean_abs_error_last_10": 23.940103820237603, "mean_abs_error_last_25": 48.576399485961296, "mean_abs_error_last_50": 99.40076340973349, "mean_pred_prob": 0.051614326797425746, "mean_pred_prob_last_10": 0.24972229301929474, "mean_pred_prob_last_25": 0.14146909154951573, "mean_pred_prob_last_50": 0.08618540428578854, "mean_token_accuracy": 0.8767435550689697, "step": 40980 }, { "epoch": 0.7286722485911863, "grad_norm": 2.076245238501161, "learning_rate": 0.0001, "loss": 0.7176, "mean_abs_error": 241.75749907019775, "mean_abs_error_last_10": 75.7976088453893, "mean_abs_error_last_25": 114.22324054540816, "mean_abs_error_last_50": 148.86770769759002, "mean_pred_prob": 0.05828536995686591, "mean_pred_prob_last_10": 0.2125433962792158, "mean_pred_prob_last_25": 0.138147874455899, "mean_pred_prob_last_50": 0.09207074185833335, "mean_token_accuracy": 0.8694755256175994, "step": 40990 }, { "epoch": 0.7288500168879882, "grad_norm": 2.3294244731556635, "learning_rate": 0.0001, "loss": 0.8561, "mean_abs_error": 677.2387684224085, "mean_abs_error_last_10": 184.73693135884128, "mean_abs_error_last_25": 265.49542530411907, "mean_abs_error_last_50": 380.1509739470447, "mean_pred_prob": 0.03336929653887637, "mean_pred_prob_last_10": 0.16962256034603343, "mean_pred_prob_last_25": 0.09506734642200172, "mean_pred_prob_last_50": 0.056792229728307574, "mean_token_accuracy": 0.8679510414600372, "step": 41000 }, { "epoch": 0.7290277851847902, "grad_norm": 1.2343172055704477, "learning_rate": 0.0001, "loss": 0.7722, "mean_abs_error": 309.8670060731868, "mean_abs_error_last_10": 42.94106811630314, "mean_abs_error_last_25": 103.60191221571031, "mean_abs_error_last_50": 180.18467399489936, "mean_pred_prob": 0.05108790243975818, "mean_pred_prob_last_10": 0.264878598973155, "mean_pred_prob_last_25": 0.14999101385474206, "mean_pred_prob_last_50": 0.08885624464601279, "mean_token_accuracy": 0.8833947479724884, "step": 41010 }, { "epoch": 0.7292055534815921, "grad_norm": 1.37663038653881, "learning_rate": 0.0001, "loss": 1.0501, "mean_abs_error": 423.99640065702243, "mean_abs_error_last_10": 88.95536812523946, "mean_abs_error_last_25": 158.34149693592246, "mean_abs_error_last_50": 236.41592264684672, "mean_pred_prob": 0.04829788205679506, "mean_pred_prob_last_10": 0.23375011021271347, "mean_pred_prob_last_25": 0.1506898639490828, "mean_pred_prob_last_50": 0.08492356154602021, "mean_token_accuracy": 0.8681632876396179, "step": 41020 }, { "epoch": 0.729383321778394, "grad_norm": 3.1680679034500825, "learning_rate": 0.0001, "loss": 0.8043, "mean_abs_error": 381.472524199061, "mean_abs_error_last_10": 93.92912133722052, "mean_abs_error_last_25": 110.24480165242036, "mean_abs_error_last_50": 160.85622139700715, "mean_pred_prob": 0.03518100871006027, "mean_pred_prob_last_10": 0.1729572581127286, "mean_pred_prob_last_25": 0.09641106126364321, "mean_pred_prob_last_50": 0.05910964428912848, "mean_token_accuracy": 0.8798496186733246, "step": 41030 }, { "epoch": 0.729561090075196, "grad_norm": 1.2601227771716483, "learning_rate": 0.0001, "loss": 0.6677, "mean_abs_error": 1411.5785232063122, "mean_abs_error_last_10": 793.3017098246362, "mean_abs_error_last_25": 892.9504876928186, "mean_abs_error_last_50": 1051.147075703416, "mean_pred_prob": 0.04675331728940364, "mean_pred_prob_last_10": 0.2200123020040337, "mean_pred_prob_last_25": 0.12821372522448654, "mean_pred_prob_last_50": 0.0788623311149422, "mean_token_accuracy": 0.8704756200313568, "step": 41040 }, { "epoch": 0.7297388583719979, "grad_norm": 2.7952557498846464, "learning_rate": 0.0001, "loss": 1.1691, "mean_abs_error": 390.40864926368886, "mean_abs_error_last_10": 145.13296507846505, "mean_abs_error_last_25": 280.44386089374024, "mean_abs_error_last_50": 349.96210351777484, "mean_pred_prob": 0.045602908334694804, "mean_pred_prob_last_10": 0.21351742260158063, "mean_pred_prob_last_25": 0.1378180316882208, "mean_pred_prob_last_50": 0.07979114474728703, "mean_token_accuracy": 0.8716222584247589, "step": 41050 }, { "epoch": 0.7299166266687999, "grad_norm": 1.3321965511436982, "learning_rate": 0.0001, "loss": 0.6294, "mean_abs_error": 226.3001684894103, "mean_abs_error_last_10": 48.96774485665007, "mean_abs_error_last_25": 75.51076408502907, "mean_abs_error_last_50": 124.27285934337965, "mean_pred_prob": 0.04908362315036356, "mean_pred_prob_last_10": 0.24687775932252407, "mean_pred_prob_last_25": 0.13326296685263514, "mean_pred_prob_last_50": 0.08208141629584134, "mean_token_accuracy": 0.8813272595405579, "step": 41060 }, { "epoch": 0.7300943949656018, "grad_norm": 1.1106871342012465, "learning_rate": 0.0001, "loss": 0.7899, "mean_abs_error": 287.1222554661002, "mean_abs_error_last_10": 72.92166317132425, "mean_abs_error_last_25": 97.83499901540341, "mean_abs_error_last_50": 164.72167870957497, "mean_pred_prob": 0.03787888321094215, "mean_pred_prob_last_10": 0.18287226352840663, "mean_pred_prob_last_25": 0.09909722115844488, "mean_pred_prob_last_50": 0.06131681771948934, "mean_token_accuracy": 0.8678396224975586, "step": 41070 }, { "epoch": 0.7302721632624037, "grad_norm": 4.107524522557146, "learning_rate": 0.0001, "loss": 0.8425, "mean_abs_error": 392.06519858635215, "mean_abs_error_last_10": 91.45221673536555, "mean_abs_error_last_25": 129.99941980126488, "mean_abs_error_last_50": 264.6038882546901, "mean_pred_prob": 0.0281334615778178, "mean_pred_prob_last_10": 0.13987770453095436, "mean_pred_prob_last_25": 0.077809221111238, "mean_pred_prob_last_50": 0.046991562377661465, "mean_token_accuracy": 0.8590385019779205, "step": 41080 }, { "epoch": 0.7304499315592057, "grad_norm": 1.202632932596183, "learning_rate": 0.0001, "loss": 0.7139, "mean_abs_error": 788.3904850185336, "mean_abs_error_last_10": 286.5262675118124, "mean_abs_error_last_25": 350.51862262445, "mean_abs_error_last_50": 551.2430956285225, "mean_pred_prob": 0.015493213548325002, "mean_pred_prob_last_10": 0.07788467016071081, "mean_pred_prob_last_25": 0.044019794650375844, "mean_pred_prob_last_50": 0.026358775328844786, "mean_token_accuracy": 0.8807701110839844, "step": 41090 }, { "epoch": 0.7306276998560077, "grad_norm": 3.1598448350803525, "learning_rate": 0.0001, "loss": 0.8439, "mean_abs_error": 200.13660207895205, "mean_abs_error_last_10": 57.22383358491028, "mean_abs_error_last_25": 76.32993991000761, "mean_abs_error_last_50": 127.09426993094739, "mean_pred_prob": 0.04191929209046066, "mean_pred_prob_last_10": 0.21677375733852386, "mean_pred_prob_last_25": 0.11901961844414473, "mean_pred_prob_last_50": 0.07148194452747703, "mean_token_accuracy": 0.8710885167121887, "step": 41100 }, { "epoch": 0.7308054681528097, "grad_norm": 2.0261545977745583, "learning_rate": 0.0001, "loss": 0.8359, "mean_abs_error": 335.91155784822047, "mean_abs_error_last_10": 74.49460251374346, "mean_abs_error_last_25": 99.83939404574217, "mean_abs_error_last_50": 193.54082274137141, "mean_pred_prob": 0.04693933501839638, "mean_pred_prob_last_10": 0.23022917434573173, "mean_pred_prob_last_25": 0.13163466826081277, "mean_pred_prob_last_50": 0.07980550704523921, "mean_token_accuracy": 0.8689050734043121, "step": 41110 }, { "epoch": 0.7309832364496116, "grad_norm": 1.3902570313061453, "learning_rate": 0.0001, "loss": 0.7591, "mean_abs_error": 736.3488082314768, "mean_abs_error_last_10": 356.2934256302666, "mean_abs_error_last_25": 413.9913543070391, "mean_abs_error_last_50": 521.2664387136924, "mean_pred_prob": 0.024248504685238002, "mean_pred_prob_last_10": 0.1207016855943948, "mean_pred_prob_last_25": 0.06648673084564508, "mean_pred_prob_last_50": 0.040519481804221866, "mean_token_accuracy": 0.8662745714187622, "step": 41120 }, { "epoch": 0.7311610047464135, "grad_norm": 2.287995483212515, "learning_rate": 0.0001, "loss": 0.6138, "mean_abs_error": 1617.756593020703, "mean_abs_error_last_10": 1108.2609256245778, "mean_abs_error_last_25": 1264.8497418840561, "mean_abs_error_last_50": 1354.6540504759846, "mean_pred_prob": 0.050112615716352595, "mean_pred_prob_last_10": 0.2036440551659325, "mean_pred_prob_last_25": 0.12348539848608198, "mean_pred_prob_last_50": 0.08114266080519883, "mean_token_accuracy": 0.876554936170578, "step": 41130 }, { "epoch": 0.7313387730432155, "grad_norm": 1.7601888117164188, "learning_rate": 0.0001, "loss": 0.6253, "mean_abs_error": 169.25346622376196, "mean_abs_error_last_10": 27.638957682329142, "mean_abs_error_last_25": 56.14105683080402, "mean_abs_error_last_50": 95.80019835051898, "mean_pred_prob": 0.04923319835215807, "mean_pred_prob_last_10": 0.2597871504724026, "mean_pred_prob_last_25": 0.14141028374433517, "mean_pred_prob_last_50": 0.08432609308511019, "mean_token_accuracy": 0.8840556263923645, "step": 41140 }, { "epoch": 0.7315165413400174, "grad_norm": 1.8706166687065227, "learning_rate": 0.0001, "loss": 1.0102, "mean_abs_error": 408.27271704498264, "mean_abs_error_last_10": 105.87349327109578, "mean_abs_error_last_25": 139.83274101044296, "mean_abs_error_last_50": 199.53774845382063, "mean_pred_prob": 0.03323584422469139, "mean_pred_prob_last_10": 0.17033121082931757, "mean_pred_prob_last_25": 0.09219116512686014, "mean_pred_prob_last_50": 0.05688333660364151, "mean_token_accuracy": 0.8734867453575135, "step": 41150 }, { "epoch": 0.7316943096368194, "grad_norm": 1.278349947971628, "learning_rate": 0.0001, "loss": 0.7122, "mean_abs_error": 215.32748821175886, "mean_abs_error_last_10": 60.51378795896551, "mean_abs_error_last_25": 81.40850275085411, "mean_abs_error_last_50": 120.89472539601886, "mean_pred_prob": 0.03926127855665982, "mean_pred_prob_last_10": 0.19456903040409088, "mean_pred_prob_last_25": 0.10830615032464266, "mean_pred_prob_last_50": 0.06522695366293192, "mean_token_accuracy": 0.8749417126178741, "step": 41160 }, { "epoch": 0.7318720779336213, "grad_norm": 2.485494194143429, "learning_rate": 0.0001, "loss": 0.8008, "mean_abs_error": 459.3295069963959, "mean_abs_error_last_10": 184.89281084132736, "mean_abs_error_last_25": 248.91630265408912, "mean_abs_error_last_50": 357.50819968526787, "mean_pred_prob": 0.04246297753416002, "mean_pred_prob_last_10": 0.19966895636171103, "mean_pred_prob_last_25": 0.11495573241263628, "mean_pred_prob_last_50": 0.07064793584868312, "mean_token_accuracy": 0.8660453021526336, "step": 41170 }, { "epoch": 0.7320498462304232, "grad_norm": 1.6534001374086793, "learning_rate": 0.0001, "loss": 0.6419, "mean_abs_error": 1022.9481238306942, "mean_abs_error_last_10": 432.2380372243909, "mean_abs_error_last_25": 488.72017291715684, "mean_abs_error_last_50": 695.0720166536374, "mean_pred_prob": 0.038301434530876574, "mean_pred_prob_last_10": 0.19867750277917368, "mean_pred_prob_last_25": 0.10477275802695658, "mean_pred_prob_last_50": 0.06374856318288949, "mean_token_accuracy": 0.8855525612831116, "step": 41180 }, { "epoch": 0.7322276145272252, "grad_norm": 2.4098951325153926, "learning_rate": 0.0001, "loss": 0.6336, "mean_abs_error": 807.0708432933774, "mean_abs_error_last_10": 205.56559782984104, "mean_abs_error_last_25": 297.1854753312865, "mean_abs_error_last_50": 495.8132919981493, "mean_pred_prob": 0.03362263760645874, "mean_pred_prob_last_10": 0.17146219778223895, "mean_pred_prob_last_25": 0.09533693107077852, "mean_pred_prob_last_50": 0.05736561205121689, "mean_token_accuracy": 0.8920718789100647, "step": 41190 }, { "epoch": 0.7324053828240271, "grad_norm": 2.564394739621864, "learning_rate": 0.0001, "loss": 0.8215, "mean_abs_error": 1133.8671930715914, "mean_abs_error_last_10": 565.9596381660106, "mean_abs_error_last_25": 623.4924838787294, "mean_abs_error_last_50": 788.3800364530705, "mean_pred_prob": 0.04817150460221455, "mean_pred_prob_last_10": 0.21759196012280882, "mean_pred_prob_last_25": 0.12497882292955183, "mean_pred_prob_last_50": 0.07871882483232184, "mean_token_accuracy": 0.8644518315792084, "step": 41200 }, { "epoch": 0.7325831511208291, "grad_norm": 1.8476473393472, "learning_rate": 0.0001, "loss": 0.7042, "mean_abs_error": 401.5497086043723, "mean_abs_error_last_10": 167.6676776514853, "mean_abs_error_last_25": 210.0669373772139, "mean_abs_error_last_50": 247.04118331916567, "mean_pred_prob": 0.03364160691853613, "mean_pred_prob_last_10": 0.17385976687073706, "mean_pred_prob_last_25": 0.0954149927943945, "mean_pred_prob_last_50": 0.057959649432450536, "mean_token_accuracy": 0.8648471236228943, "step": 41210 }, { "epoch": 0.7327609194176311, "grad_norm": 1.6990398070068267, "learning_rate": 0.0001, "loss": 0.6487, "mean_abs_error": 150.6870489345167, "mean_abs_error_last_10": 95.48894130580027, "mean_abs_error_last_25": 135.90539336225456, "mean_abs_error_last_50": 140.66801373191316, "mean_pred_prob": 0.05411549131385982, "mean_pred_prob_last_10": 0.25070717111229895, "mean_pred_prob_last_25": 0.14332285402342676, "mean_pred_prob_last_50": 0.09070145972073078, "mean_token_accuracy": 0.8791858315467834, "step": 41220 }, { "epoch": 0.732938687714433, "grad_norm": 1.899998817503819, "learning_rate": 0.0001, "loss": 0.7652, "mean_abs_error": 789.639318402056, "mean_abs_error_last_10": 146.98170384021972, "mean_abs_error_last_25": 222.81858036568602, "mean_abs_error_last_50": 418.52417741623105, "mean_pred_prob": 0.03634466388029978, "mean_pred_prob_last_10": 0.16972716217860578, "mean_pred_prob_last_25": 0.09794586664065719, "mean_pred_prob_last_50": 0.06178639812860638, "mean_token_accuracy": 0.8655422449111938, "step": 41230 }, { "epoch": 0.733116456011235, "grad_norm": 1.8361389745782135, "learning_rate": 0.0001, "loss": 0.6529, "mean_abs_error": 303.56543221229236, "mean_abs_error_last_10": 70.5780576563026, "mean_abs_error_last_25": 96.42754326761, "mean_abs_error_last_50": 152.58393171519148, "mean_pred_prob": 0.04671126930043101, "mean_pred_prob_last_10": 0.23873367980122567, "mean_pred_prob_last_25": 0.13146868012845517, "mean_pred_prob_last_50": 0.07997761163860559, "mean_token_accuracy": 0.874123215675354, "step": 41240 }, { "epoch": 0.7332942243080369, "grad_norm": 1.7615473486860125, "learning_rate": 0.0001, "loss": 0.6816, "mean_abs_error": 124.08338194321043, "mean_abs_error_last_10": 40.50560156698096, "mean_abs_error_last_25": 74.22494496353849, "mean_abs_error_last_50": 91.17246818780788, "mean_pred_prob": 0.04612217545509338, "mean_pred_prob_last_10": 0.21940099038183689, "mean_pred_prob_last_25": 0.11749809216707945, "mean_pred_prob_last_50": 0.0742544362321496, "mean_token_accuracy": 0.8641741275787354, "step": 41250 }, { "epoch": 0.7334719926048389, "grad_norm": 2.3592743175547524, "learning_rate": 0.0001, "loss": 0.7558, "mean_abs_error": 575.0036388287738, "mean_abs_error_last_10": 125.01621137770391, "mean_abs_error_last_25": 122.73292564098662, "mean_abs_error_last_50": 238.41197612354472, "mean_pred_prob": 0.054868484986945985, "mean_pred_prob_last_10": 0.2308537369593978, "mean_pred_prob_last_25": 0.1389264328405261, "mean_pred_prob_last_50": 0.08965020985342562, "mean_token_accuracy": 0.8743915736675263, "step": 41260 }, { "epoch": 0.7336497609016408, "grad_norm": 1.4790013364109813, "learning_rate": 0.0001, "loss": 0.6864, "mean_abs_error": 562.1745071128614, "mean_abs_error_last_10": 188.67626414391412, "mean_abs_error_last_25": 228.34615432117192, "mean_abs_error_last_50": 347.9013904807239, "mean_pred_prob": 0.029983587574679404, "mean_pred_prob_last_10": 0.15623800157336518, "mean_pred_prob_last_25": 0.08740112308878452, "mean_pred_prob_last_50": 0.05135963975917548, "mean_token_accuracy": 0.8708453178405762, "step": 41270 }, { "epoch": 0.7338275291984427, "grad_norm": 1.2051026761514103, "learning_rate": 0.0001, "loss": 0.6546, "mean_abs_error": 493.19514741156416, "mean_abs_error_last_10": 125.85456615385836, "mean_abs_error_last_25": 219.9200348766508, "mean_abs_error_last_50": 307.3110479529431, "mean_pred_prob": 0.025936930591706188, "mean_pred_prob_last_10": 0.12937773538287728, "mean_pred_prob_last_25": 0.07146061093080788, "mean_pred_prob_last_50": 0.043757358402945104, "mean_token_accuracy": 0.8807410836219788, "step": 41280 }, { "epoch": 0.7340052974952447, "grad_norm": 2.5509496632784754, "learning_rate": 0.0001, "loss": 0.6527, "mean_abs_error": 1307.9753568828246, "mean_abs_error_last_10": 628.3074383675824, "mean_abs_error_last_25": 732.8156983845, "mean_abs_error_last_50": 966.4279492202326, "mean_pred_prob": 0.026634360581374493, "mean_pred_prob_last_10": 0.13554302113116137, "mean_pred_prob_last_25": 0.07521835268489667, "mean_pred_prob_last_50": 0.04482544681086438, "mean_token_accuracy": 0.8758318722248077, "step": 41290 }, { "epoch": 0.7341830657920466, "grad_norm": 1.4538644676791184, "learning_rate": 0.0001, "loss": 0.7389, "mean_abs_error": 1549.9981257093941, "mean_abs_error_last_10": 905.3128398808927, "mean_abs_error_last_25": 963.5943687187489, "mean_abs_error_last_50": 1126.6264874734866, "mean_pred_prob": 0.036139590282618886, "mean_pred_prob_last_10": 0.1647307870487566, "mean_pred_prob_last_25": 0.0966510097925493, "mean_pred_prob_last_50": 0.06043745707283961, "mean_token_accuracy": 0.8791095554828644, "step": 41300 }, { "epoch": 0.7343608340888486, "grad_norm": 1.3538851503432505, "learning_rate": 0.0001, "loss": 0.6478, "mean_abs_error": 381.3341409456431, "mean_abs_error_last_10": 172.36213519476814, "mean_abs_error_last_25": 225.25452198049774, "mean_abs_error_last_50": 262.5239469264415, "mean_pred_prob": 0.038183662854135034, "mean_pred_prob_last_10": 0.18429005108773708, "mean_pred_prob_last_25": 0.10472617289051414, "mean_pred_prob_last_50": 0.06498447465710341, "mean_token_accuracy": 0.8810157239437103, "step": 41310 }, { "epoch": 0.7345386023856505, "grad_norm": 1.3457481952706687, "learning_rate": 0.0001, "loss": 0.6367, "mean_abs_error": 895.1103925134341, "mean_abs_error_last_10": 550.5619854241554, "mean_abs_error_last_25": 578.4680008418235, "mean_abs_error_last_50": 621.2555903440054, "mean_pred_prob": 0.044928014223114585, "mean_pred_prob_last_10": 0.1857606909587048, "mean_pred_prob_last_25": 0.11464594443095848, "mean_pred_prob_last_50": 0.07364231588144321, "mean_token_accuracy": 0.868851387500763, "step": 41320 }, { "epoch": 0.7347163706824525, "grad_norm": 1.7377769247337842, "learning_rate": 0.0001, "loss": 0.6712, "mean_abs_error": 387.52159135146576, "mean_abs_error_last_10": 210.54846787232637, "mean_abs_error_last_25": 205.275089110284, "mean_abs_error_last_50": 251.84413403610802, "mean_pred_prob": 0.05371632998576388, "mean_pred_prob_last_10": 0.24376310346415267, "mean_pred_prob_last_25": 0.14371716843452303, "mean_pred_prob_last_50": 0.0875947475375142, "mean_token_accuracy": 0.8688080132007598, "step": 41330 }, { "epoch": 0.7348941389792545, "grad_norm": 1.4042564821595325, "learning_rate": 0.0001, "loss": 0.6517, "mean_abs_error": 443.4534133792449, "mean_abs_error_last_10": 68.6864161050161, "mean_abs_error_last_25": 133.91715311674625, "mean_abs_error_last_50": 300.6837396973307, "mean_pred_prob": 0.04418818005360663, "mean_pred_prob_last_10": 0.20782898906618358, "mean_pred_prob_last_25": 0.11386868553236126, "mean_pred_prob_last_50": 0.07117036646232008, "mean_token_accuracy": 0.8810892283916474, "step": 41340 }, { "epoch": 0.7350719072760564, "grad_norm": 1.3838896596964294, "learning_rate": 0.0001, "loss": 0.7272, "mean_abs_error": 389.0747560270744, "mean_abs_error_last_10": 96.75542898908026, "mean_abs_error_last_25": 122.43018632588671, "mean_abs_error_last_50": 197.69837532156825, "mean_pred_prob": 0.055942184780724345, "mean_pred_prob_last_10": 0.2342663524672389, "mean_pred_prob_last_25": 0.1433342369273305, "mean_pred_prob_last_50": 0.09299878496676683, "mean_token_accuracy": 0.8689791858196259, "step": 41350 }, { "epoch": 0.7352496755728584, "grad_norm": 1.1835823066099955, "learning_rate": 0.0001, "loss": 0.7651, "mean_abs_error": 1319.9513544478177, "mean_abs_error_last_10": 530.2355268079755, "mean_abs_error_last_25": 650.305994504962, "mean_abs_error_last_50": 864.1232473456624, "mean_pred_prob": 0.0337539318454219, "mean_pred_prob_last_10": 0.17339301178872119, "mean_pred_prob_last_25": 0.09454824936110526, "mean_pred_prob_last_50": 0.05643212513823528, "mean_token_accuracy": 0.8707070529460907, "step": 41360 }, { "epoch": 0.7354274438696603, "grad_norm": 1.4809005766321774, "learning_rate": 0.0001, "loss": 0.8143, "mean_abs_error": 336.4593658651277, "mean_abs_error_last_10": 97.30202526535518, "mean_abs_error_last_25": 135.44703074806466, "mean_abs_error_last_50": 205.91945567512744, "mean_pred_prob": 0.03846504378598183, "mean_pred_prob_last_10": 0.18764647245407104, "mean_pred_prob_last_25": 0.10283047575503587, "mean_pred_prob_last_50": 0.06326501234434545, "mean_token_accuracy": 0.8737399756908417, "step": 41370 }, { "epoch": 0.7356052121664622, "grad_norm": 2.2264221673639493, "learning_rate": 0.0001, "loss": 0.7145, "mean_abs_error": 346.65205060004524, "mean_abs_error_last_10": 197.53812654111124, "mean_abs_error_last_25": 307.38975318001144, "mean_abs_error_last_50": 311.53942354109444, "mean_pred_prob": 0.03138000516919419, "mean_pred_prob_last_10": 0.15489609222859144, "mean_pred_prob_last_25": 0.08644004913512618, "mean_pred_prob_last_50": 0.05210942942649126, "mean_token_accuracy": 0.8771677017211914, "step": 41380 }, { "epoch": 0.7357829804632642, "grad_norm": 2.1419879007645775, "learning_rate": 0.0001, "loss": 0.649, "mean_abs_error": 995.7732722213977, "mean_abs_error_last_10": 286.3621701549867, "mean_abs_error_last_25": 368.87790186357245, "mean_abs_error_last_50": 538.1981558445085, "mean_pred_prob": 0.04583537323633209, "mean_pred_prob_last_10": 0.21709759332006798, "mean_pred_prob_last_25": 0.1254495578177739, "mean_pred_prob_last_50": 0.0763178896449972, "mean_token_accuracy": 0.8616855859756469, "step": 41390 }, { "epoch": 0.7359607487600661, "grad_norm": 1.1651443374062709, "learning_rate": 0.0001, "loss": 0.845, "mean_abs_error": 190.83464978915617, "mean_abs_error_last_10": 45.57867121905633, "mean_abs_error_last_25": 61.800594805010896, "mean_abs_error_last_50": 108.59462072340759, "mean_pred_prob": 0.07477485474664718, "mean_pred_prob_last_10": 0.3183932347223163, "mean_pred_prob_last_25": 0.18944971072487532, "mean_pred_prob_last_50": 0.12323738822015003, "mean_token_accuracy": 0.8754837870597839, "step": 41400 }, { "epoch": 0.7361385170568681, "grad_norm": 1.747273403934547, "learning_rate": 0.0001, "loss": 0.7727, "mean_abs_error": 351.55276096924956, "mean_abs_error_last_10": 122.61525999704273, "mean_abs_error_last_25": 173.70897989327045, "mean_abs_error_last_50": 231.00753578196765, "mean_pred_prob": 0.03804237263393588, "mean_pred_prob_last_10": 0.18796886288328096, "mean_pred_prob_last_25": 0.10256829244317486, "mean_pred_prob_last_50": 0.0633668661233969, "mean_token_accuracy": 0.8692535936832428, "step": 41410 }, { "epoch": 0.73631628535367, "grad_norm": 2.1776097738969993, "learning_rate": 0.0001, "loss": 0.6147, "mean_abs_error": 424.84540476264067, "mean_abs_error_last_10": 42.944965069955195, "mean_abs_error_last_25": 83.13590476744821, "mean_abs_error_last_50": 196.35010443340593, "mean_pred_prob": 0.050400670524686574, "mean_pred_prob_last_10": 0.23467226503416896, "mean_pred_prob_last_25": 0.14052417520433663, "mean_pred_prob_last_50": 0.08636393845081329, "mean_token_accuracy": 0.8837378799915314, "step": 41420 }, { "epoch": 0.7364940536504719, "grad_norm": 2.1583291022924875, "learning_rate": 0.0001, "loss": 0.7706, "mean_abs_error": 249.50157581178595, "mean_abs_error_last_10": 37.21452880437163, "mean_abs_error_last_25": 65.58161147994971, "mean_abs_error_last_50": 127.24455635507505, "mean_pred_prob": 0.045080565055832265, "mean_pred_prob_last_10": 0.21971840858459474, "mean_pred_prob_last_25": 0.12269866932183504, "mean_pred_prob_last_50": 0.0752517999149859, "mean_token_accuracy": 0.8762682378292084, "step": 41430 }, { "epoch": 0.7366718219472739, "grad_norm": 1.4236782959059067, "learning_rate": 0.0001, "loss": 0.7285, "mean_abs_error": 588.6687905530449, "mean_abs_error_last_10": 108.8618506331687, "mean_abs_error_last_25": 221.41249004534689, "mean_abs_error_last_50": 399.9372457331685, "mean_pred_prob": 0.03250006835442036, "mean_pred_prob_last_10": 0.16509534381330013, "mean_pred_prob_last_25": 0.09168316936120391, "mean_pred_prob_last_50": 0.055387747939676046, "mean_token_accuracy": 0.8659508943557739, "step": 41440 }, { "epoch": 0.7368495902440759, "grad_norm": 1.2709214909940019, "learning_rate": 0.0001, "loss": 0.7738, "mean_abs_error": 76.82837114656017, "mean_abs_error_last_10": 17.476091840734618, "mean_abs_error_last_25": 56.03756204556265, "mean_abs_error_last_50": 57.397076669071716, "mean_pred_prob": 0.0779173226095736, "mean_pred_prob_last_10": 0.3387415252625942, "mean_pred_prob_last_25": 0.20325534008443355, "mean_pred_prob_last_50": 0.1277529178187251, "mean_token_accuracy": 0.8730650901794433, "step": 41450 }, { "epoch": 0.7370273585408779, "grad_norm": 4.118497366549446, "learning_rate": 0.0001, "loss": 0.8486, "mean_abs_error": 785.7045921943804, "mean_abs_error_last_10": 348.9433360800273, "mean_abs_error_last_25": 380.99876169796573, "mean_abs_error_last_50": 485.8476768348643, "mean_pred_prob": 0.041866723622661084, "mean_pred_prob_last_10": 0.19411559566506184, "mean_pred_prob_last_25": 0.10789414597093128, "mean_pred_prob_last_50": 0.0681787632580381, "mean_token_accuracy": 0.8810141801834106, "step": 41460 }, { "epoch": 0.7372051268376798, "grad_norm": 1.6932716085990416, "learning_rate": 0.0001, "loss": 0.7006, "mean_abs_error": 1116.446146130259, "mean_abs_error_last_10": 375.25268496362287, "mean_abs_error_last_25": 473.6877677820829, "mean_abs_error_last_50": 702.8545541111434, "mean_pred_prob": 0.02066665693128016, "mean_pred_prob_last_10": 0.1045375011046417, "mean_pred_prob_last_25": 0.05666030572028831, "mean_pred_prob_last_50": 0.03487689930479974, "mean_token_accuracy": 0.8788875222206116, "step": 41470 }, { "epoch": 0.7373828951344817, "grad_norm": 1.1851438796248226, "learning_rate": 0.0001, "loss": 0.6743, "mean_abs_error": 213.06235935988843, "mean_abs_error_last_10": 76.88511484955154, "mean_abs_error_last_25": 119.50573147399682, "mean_abs_error_last_50": 154.30553285109818, "mean_pred_prob": 0.06996936837676912, "mean_pred_prob_last_10": 0.32993934117257595, "mean_pred_prob_last_25": 0.18294807467609644, "mean_pred_prob_last_50": 0.11530714030377567, "mean_token_accuracy": 0.8768710017204284, "step": 41480 }, { "epoch": 0.7375606634312837, "grad_norm": 1.302083402938906, "learning_rate": 0.0001, "loss": 0.6958, "mean_abs_error": 906.0735148807919, "mean_abs_error_last_10": 477.48691286376936, "mean_abs_error_last_25": 477.5568578322333, "mean_abs_error_last_50": 645.141814951766, "mean_pred_prob": 0.025519385340157897, "mean_pred_prob_last_10": 0.12617536616162398, "mean_pred_prob_last_25": 0.06807381188264117, "mean_pred_prob_last_50": 0.04182460446609184, "mean_token_accuracy": 0.8703532159328461, "step": 41490 }, { "epoch": 0.7377384317280856, "grad_norm": 1.5881129169845565, "learning_rate": 0.0001, "loss": 0.6702, "mean_abs_error": 758.8904343927752, "mean_abs_error_last_10": 607.0119093046962, "mean_abs_error_last_25": 567.1682030066669, "mean_abs_error_last_50": 700.9688195692924, "mean_pred_prob": 0.044915995368501174, "mean_pred_prob_last_10": 0.21257771616801618, "mean_pred_prob_last_25": 0.11565923047019169, "mean_pred_prob_last_50": 0.0730827790859621, "mean_token_accuracy": 0.8677999377250671, "step": 41500 }, { "epoch": 0.7379162000248876, "grad_norm": 2.2372669084473427, "learning_rate": 0.0001, "loss": 0.8391, "mean_abs_error": 436.9296089858443, "mean_abs_error_last_10": 117.77621579112224, "mean_abs_error_last_25": 159.21142057014222, "mean_abs_error_last_50": 225.75153699793955, "mean_pred_prob": 0.036787885962985455, "mean_pred_prob_last_10": 0.1789645304903388, "mean_pred_prob_last_25": 0.09981032793875784, "mean_pred_prob_last_50": 0.061524058179929855, "mean_token_accuracy": 0.8709857761859894, "step": 41510 }, { "epoch": 0.7380939683216895, "grad_norm": 1.6968397131423623, "learning_rate": 0.0001, "loss": 0.8577, "mean_abs_error": 1018.7834994176168, "mean_abs_error_last_10": 630.592697944224, "mean_abs_error_last_25": 671.6743179986233, "mean_abs_error_last_50": 783.5025412130658, "mean_pred_prob": 0.02910451729549095, "mean_pred_prob_last_10": 0.15293951122730504, "mean_pred_prob_last_25": 0.07924342918558977, "mean_pred_prob_last_50": 0.048188542017305734, "mean_token_accuracy": 0.8666202068328858, "step": 41520 }, { "epoch": 0.7382717366184914, "grad_norm": 1.0615652012180032, "learning_rate": 0.0001, "loss": 0.6868, "mean_abs_error": 705.0309846870026, "mean_abs_error_last_10": 243.87423193796798, "mean_abs_error_last_25": 240.71140193419077, "mean_abs_error_last_50": 394.7985415638285, "mean_pred_prob": 0.030067075608531012, "mean_pred_prob_last_10": 0.15145155617501588, "mean_pred_prob_last_25": 0.0820840366417542, "mean_pred_prob_last_50": 0.049552602466428654, "mean_token_accuracy": 0.863396155834198, "step": 41530 }, { "epoch": 0.7384495049152934, "grad_norm": 2.0775700889021143, "learning_rate": 0.0001, "loss": 0.7323, "mean_abs_error": 550.2803051031176, "mean_abs_error_last_10": 192.69241431294, "mean_abs_error_last_25": 242.10510310744513, "mean_abs_error_last_50": 317.42215957355694, "mean_pred_prob": 0.026527887023985387, "mean_pred_prob_last_10": 0.1341570844873786, "mean_pred_prob_last_25": 0.07264374773949385, "mean_pred_prob_last_50": 0.04422067133709788, "mean_token_accuracy": 0.8686911821365356, "step": 41540 }, { "epoch": 0.7386272732120953, "grad_norm": 1.6059227266195297, "learning_rate": 0.0001, "loss": 0.7254, "mean_abs_error": 351.7947664941063, "mean_abs_error_last_10": 67.89174741206112, "mean_abs_error_last_25": 101.29092692411373, "mean_abs_error_last_50": 184.7822520293334, "mean_pred_prob": 0.028393571125343443, "mean_pred_prob_last_10": 0.15159876365214586, "mean_pred_prob_last_25": 0.07993072187528014, "mean_pred_prob_last_50": 0.04755430463701486, "mean_token_accuracy": 0.8644072473049164, "step": 41550 }, { "epoch": 0.7388050415088973, "grad_norm": 1.1120721478051985, "learning_rate": 0.0001, "loss": 0.6708, "mean_abs_error": 345.2206327445084, "mean_abs_error_last_10": 110.61104252778959, "mean_abs_error_last_25": 175.59100728586048, "mean_abs_error_last_50": 260.37607327898274, "mean_pred_prob": 0.0277491292450577, "mean_pred_prob_last_10": 0.15244904160499573, "mean_pred_prob_last_25": 0.07962488075718284, "mean_pred_prob_last_50": 0.04740002090111375, "mean_token_accuracy": 0.8723950624465943, "step": 41560 }, { "epoch": 0.7389828098056993, "grad_norm": 1.3915640103470563, "learning_rate": 0.0001, "loss": 0.6169, "mean_abs_error": 467.6172212060672, "mean_abs_error_last_10": 231.90797422179662, "mean_abs_error_last_25": 251.63803519783747, "mean_abs_error_last_50": 297.7423736532173, "mean_pred_prob": 0.043583336449228224, "mean_pred_prob_last_10": 0.1895737783284858, "mean_pred_prob_last_25": 0.10992548550711945, "mean_pred_prob_last_50": 0.07027482400881127, "mean_token_accuracy": 0.8748810648918152, "step": 41570 }, { "epoch": 0.7391605781025012, "grad_norm": 1.0437941362338279, "learning_rate": 0.0001, "loss": 0.6794, "mean_abs_error": 316.1644386786676, "mean_abs_error_last_10": 82.4003926160056, "mean_abs_error_last_25": 211.23003320791798, "mean_abs_error_last_50": 307.18397189785594, "mean_pred_prob": 0.04615162555128336, "mean_pred_prob_last_10": 0.22549389507621526, "mean_pred_prob_last_25": 0.12533068982884288, "mean_pred_prob_last_50": 0.07671532868407667, "mean_token_accuracy": 0.8710077404975891, "step": 41580 }, { "epoch": 0.7393383463993032, "grad_norm": 1.5831067976918953, "learning_rate": 0.0001, "loss": 0.7403, "mean_abs_error": 672.6839918471276, "mean_abs_error_last_10": 190.17493937229398, "mean_abs_error_last_25": 295.4964540283105, "mean_abs_error_last_50": 428.4764163324443, "mean_pred_prob": 0.03164056628011167, "mean_pred_prob_last_10": 0.16366742367390544, "mean_pred_prob_last_25": 0.09177763203624636, "mean_pred_prob_last_50": 0.05418320377357304, "mean_token_accuracy": 0.8777562201023101, "step": 41590 }, { "epoch": 0.7395161146961051, "grad_norm": 2.000343895834833, "learning_rate": 0.0001, "loss": 0.7274, "mean_abs_error": 693.4165015538094, "mean_abs_error_last_10": 229.28484789377563, "mean_abs_error_last_25": 275.5207734175689, "mean_abs_error_last_50": 438.1366823694595, "mean_pred_prob": 0.028783605582430028, "mean_pred_prob_last_10": 0.14420993202365934, "mean_pred_prob_last_25": 0.07945495442836545, "mean_pred_prob_last_50": 0.047844782850006595, "mean_token_accuracy": 0.8651979804039002, "step": 41600 }, { "epoch": 0.7396938829929071, "grad_norm": 1.7723258966908744, "learning_rate": 0.0001, "loss": 0.5351, "mean_abs_error": 179.81291826274145, "mean_abs_error_last_10": 28.344542868693246, "mean_abs_error_last_25": 61.546683058396596, "mean_abs_error_last_50": 102.97706189734348, "mean_pred_prob": 0.06503357300534844, "mean_pred_prob_last_10": 0.2971651379019022, "mean_pred_prob_last_25": 0.17032249830663204, "mean_pred_prob_last_50": 0.1071322993375361, "mean_token_accuracy": 0.8863162457942962, "step": 41610 }, { "epoch": 0.739871651289709, "grad_norm": 1.3999764508785033, "learning_rate": 0.0001, "loss": 0.9938, "mean_abs_error": 692.8429117740294, "mean_abs_error_last_10": 326.5380609033384, "mean_abs_error_last_25": 497.14700941043003, "mean_abs_error_last_50": 534.4743904112308, "mean_pred_prob": 0.04761441680311691, "mean_pred_prob_last_10": 0.23054779918165877, "mean_pred_prob_last_25": 0.12843170948617627, "mean_pred_prob_last_50": 0.07852326488646214, "mean_token_accuracy": 0.8613693177700043, "step": 41620 }, { "epoch": 0.7400494195865109, "grad_norm": 2.921845194832501, "learning_rate": 0.0001, "loss": 0.7925, "mean_abs_error": 631.8294680405332, "mean_abs_error_last_10": 116.49055098434819, "mean_abs_error_last_25": 193.1312586527829, "mean_abs_error_last_50": 338.177620415997, "mean_pred_prob": 0.04228573339059949, "mean_pred_prob_last_10": 0.21942877399269492, "mean_pred_prob_last_25": 0.1219381045899354, "mean_pred_prob_last_50": 0.07184959572623484, "mean_token_accuracy": 0.8625504612922669, "step": 41630 }, { "epoch": 0.7402271878833129, "grad_norm": 1.0331348615279345, "learning_rate": 0.0001, "loss": 0.6874, "mean_abs_error": 493.1778950134229, "mean_abs_error_last_10": 173.29080883219098, "mean_abs_error_last_25": 205.67516762751666, "mean_abs_error_last_50": 298.9294288945487, "mean_pred_prob": 0.06702706538490019, "mean_pred_prob_last_10": 0.2728983110864647, "mean_pred_prob_last_25": 0.1785351634141989, "mean_pred_prob_last_50": 0.11055306883063168, "mean_token_accuracy": 0.8669026672840119, "step": 41640 }, { "epoch": 0.7404049561801148, "grad_norm": 1.885926228509596, "learning_rate": 0.0001, "loss": 0.6726, "mean_abs_error": 396.82587847886685, "mean_abs_error_last_10": 155.40425275993096, "mean_abs_error_last_25": 183.7517404172981, "mean_abs_error_last_50": 288.0111622202968, "mean_pred_prob": 0.045721222390420736, "mean_pred_prob_last_10": 0.23328211975749583, "mean_pred_prob_last_25": 0.12311914889141917, "mean_pred_prob_last_50": 0.0761820116546005, "mean_token_accuracy": 0.8696032702922821, "step": 41650 }, { "epoch": 0.7405827244769168, "grad_norm": 1.6857235894004794, "learning_rate": 0.0001, "loss": 0.7276, "mean_abs_error": 585.4801851196488, "mean_abs_error_last_10": 228.91513963100115, "mean_abs_error_last_25": 264.2690362060068, "mean_abs_error_last_50": 356.13829330311466, "mean_pred_prob": 0.03369299083715305, "mean_pred_prob_last_10": 0.1761452059610747, "mean_pred_prob_last_25": 0.09353422037092969, "mean_pred_prob_last_50": 0.05719870898174122, "mean_token_accuracy": 0.8767928898334503, "step": 41660 }, { "epoch": 0.7407604927737187, "grad_norm": 1.4783100315560747, "learning_rate": 0.0001, "loss": 0.6243, "mean_abs_error": 304.5685157881945, "mean_abs_error_last_10": 184.83445362434446, "mean_abs_error_last_25": 243.16993742101403, "mean_abs_error_last_50": 271.2984214704044, "mean_pred_prob": 0.04033451878931373, "mean_pred_prob_last_10": 0.1871516457758844, "mean_pred_prob_last_25": 0.11079140421934426, "mean_pred_prob_last_50": 0.0678161975229159, "mean_token_accuracy": 0.8739334940910339, "step": 41670 }, { "epoch": 0.7409382610705206, "grad_norm": 1.2170626624207164, "learning_rate": 0.0001, "loss": 1.0027, "mean_abs_error": 720.6089170885455, "mean_abs_error_last_10": 378.08602346379564, "mean_abs_error_last_25": 448.60991513971584, "mean_abs_error_last_50": 533.7595402940906, "mean_pred_prob": 0.03773418474884238, "mean_pred_prob_last_10": 0.18046692555653862, "mean_pred_prob_last_25": 0.10019639977545011, "mean_pred_prob_last_50": 0.06195163750235224, "mean_token_accuracy": 0.8687978982925415, "step": 41680 }, { "epoch": 0.7411160293673227, "grad_norm": 1.4160706137130856, "learning_rate": 0.0001, "loss": 0.7441, "mean_abs_error": 687.4142515068845, "mean_abs_error_last_10": 189.8510165971373, "mean_abs_error_last_25": 291.7460086347335, "mean_abs_error_last_50": 400.8340406239514, "mean_pred_prob": 0.042554577282862736, "mean_pred_prob_last_10": 0.21775399091420694, "mean_pred_prob_last_25": 0.12007951406412758, "mean_pred_prob_last_50": 0.07311918463092297, "mean_token_accuracy": 0.8762551426887513, "step": 41690 }, { "epoch": 0.7412937976641246, "grad_norm": 2.2883520025851842, "learning_rate": 0.0001, "loss": 0.7228, "mean_abs_error": 974.4905470434678, "mean_abs_error_last_10": 462.9369633516961, "mean_abs_error_last_25": 518.3118844789547, "mean_abs_error_last_50": 685.1216878890774, "mean_pred_prob": 0.024122342048212887, "mean_pred_prob_last_10": 0.12620266830781474, "mean_pred_prob_last_25": 0.06763904066756368, "mean_pred_prob_last_50": 0.04133597351028584, "mean_token_accuracy": 0.8717836380004883, "step": 41700 }, { "epoch": 0.7414715659609266, "grad_norm": 2.6889514895562474, "learning_rate": 0.0001, "loss": 0.645, "mean_abs_error": 915.7996438395988, "mean_abs_error_last_10": 467.97340526437955, "mean_abs_error_last_25": 465.97941396813513, "mean_abs_error_last_50": 666.7211845579775, "mean_pred_prob": 0.030690771737135948, "mean_pred_prob_last_10": 0.13449208512320182, "mean_pred_prob_last_25": 0.07457294843043201, "mean_pred_prob_last_50": 0.048580672050593424, "mean_token_accuracy": 0.872135853767395, "step": 41710 }, { "epoch": 0.7416493342577285, "grad_norm": 1.5533016388572292, "learning_rate": 0.0001, "loss": 0.7132, "mean_abs_error": 146.5266664713, "mean_abs_error_last_10": 41.94794811314998, "mean_abs_error_last_25": 86.40226626308045, "mean_abs_error_last_50": 114.1026201429227, "mean_pred_prob": 0.05949325216934085, "mean_pred_prob_last_10": 0.2676598533987999, "mean_pred_prob_last_25": 0.15523044411092995, "mean_pred_prob_last_50": 0.09861769108101726, "mean_token_accuracy": 0.8779385507106781, "step": 41720 }, { "epoch": 0.7418271025545304, "grad_norm": 2.9972944174700418, "learning_rate": 0.0001, "loss": 0.7717, "mean_abs_error": 362.66741282290525, "mean_abs_error_last_10": 154.09019852860814, "mean_abs_error_last_25": 137.3587520817808, "mean_abs_error_last_50": 212.71455906827092, "mean_pred_prob": 0.04515752053121105, "mean_pred_prob_last_10": 0.1985576483188197, "mean_pred_prob_last_25": 0.11835685472469777, "mean_pred_prob_last_50": 0.07411201121285557, "mean_token_accuracy": 0.8694278478622437, "step": 41730 }, { "epoch": 0.7420048708513324, "grad_norm": 1.1660279794300883, "learning_rate": 0.0001, "loss": 0.7697, "mean_abs_error": 316.65355880123604, "mean_abs_error_last_10": 154.89386816157858, "mean_abs_error_last_25": 174.47989591241, "mean_abs_error_last_50": 233.14914556754434, "mean_pred_prob": 0.02576235719025135, "mean_pred_prob_last_10": 0.1323351452127099, "mean_pred_prob_last_25": 0.06992436610162259, "mean_pred_prob_last_50": 0.04317761161364615, "mean_token_accuracy": 0.8614117980003357, "step": 41740 }, { "epoch": 0.7421826391481343, "grad_norm": 1.5952648019847078, "learning_rate": 0.0001, "loss": 0.6417, "mean_abs_error": 371.92586492745414, "mean_abs_error_last_10": 146.64955854969205, "mean_abs_error_last_25": 181.78126263531883, "mean_abs_error_last_50": 217.1459700523823, "mean_pred_prob": 0.03893418686348014, "mean_pred_prob_last_10": 0.1893881060765125, "mean_pred_prob_last_25": 0.10191268544876948, "mean_pred_prob_last_50": 0.06365729592507705, "mean_token_accuracy": 0.8586378455162048, "step": 41750 }, { "epoch": 0.7423604074449363, "grad_norm": 1.1356721415488267, "learning_rate": 0.0001, "loss": 0.7634, "mean_abs_error": 327.4986921757064, "mean_abs_error_last_10": 188.4819032478662, "mean_abs_error_last_25": 227.46431410896375, "mean_abs_error_last_50": 252.56112278118044, "mean_pred_prob": 0.03990264241583645, "mean_pred_prob_last_10": 0.2029750385787338, "mean_pred_prob_last_25": 0.11132613052614033, "mean_pred_prob_last_50": 0.06714606112800539, "mean_token_accuracy": 0.8774149656295777, "step": 41760 }, { "epoch": 0.7425381757417382, "grad_norm": 0.9092623406819639, "learning_rate": 0.0001, "loss": 0.7435, "mean_abs_error": 510.15414411704177, "mean_abs_error_last_10": 88.37729179476803, "mean_abs_error_last_25": 151.72973903553648, "mean_abs_error_last_50": 315.4063759174725, "mean_pred_prob": 0.0334037913242355, "mean_pred_prob_last_10": 0.16852726363576948, "mean_pred_prob_last_25": 0.09175075087696313, "mean_pred_prob_last_50": 0.05549993913155049, "mean_token_accuracy": 0.8815193474292755, "step": 41770 }, { "epoch": 0.7427159440385401, "grad_norm": 1.4093432008738347, "learning_rate": 0.0001, "loss": 0.8102, "mean_abs_error": 711.7894233379023, "mean_abs_error_last_10": 248.71440990733018, "mean_abs_error_last_25": 301.7612625804056, "mean_abs_error_last_50": 415.3133102773164, "mean_pred_prob": 0.019546438683755697, "mean_pred_prob_last_10": 0.09984998329309747, "mean_pred_prob_last_25": 0.052985966042615476, "mean_pred_prob_last_50": 0.03262531426153146, "mean_token_accuracy": 0.8686585843563079, "step": 41780 }, { "epoch": 0.7428937123353421, "grad_norm": 1.0114217499693907, "learning_rate": 0.0001, "loss": 0.6869, "mean_abs_error": 624.4480764232818, "mean_abs_error_last_10": 240.48493516043635, "mean_abs_error_last_25": 394.66777563455764, "mean_abs_error_last_50": 514.8983750486324, "mean_pred_prob": 0.04871595594158862, "mean_pred_prob_last_10": 0.23295303572667764, "mean_pred_prob_last_25": 0.13289263212354854, "mean_pred_prob_last_50": 0.08127427098806947, "mean_token_accuracy": 0.8756857931613922, "step": 41790 }, { "epoch": 0.743071480632144, "grad_norm": 1.9483459713394777, "learning_rate": 0.0001, "loss": 0.723, "mean_abs_error": 283.8525615133956, "mean_abs_error_last_10": 58.39439667032453, "mean_abs_error_last_25": 91.4934332816735, "mean_abs_error_last_50": 167.09895739830432, "mean_pred_prob": 0.037159006018191575, "mean_pred_prob_last_10": 0.1991897840052843, "mean_pred_prob_last_25": 0.10779221523553133, "mean_pred_prob_last_50": 0.06363228093832732, "mean_token_accuracy": 0.8734268844127655, "step": 41800 }, { "epoch": 0.7432492489289461, "grad_norm": 2.6099202310361505, "learning_rate": 0.0001, "loss": 0.7508, "mean_abs_error": 389.7441663507029, "mean_abs_error_last_10": 245.53090285774164, "mean_abs_error_last_25": 228.05019560179144, "mean_abs_error_last_50": 288.75989259590784, "mean_pred_prob": 0.03960313161369413, "mean_pred_prob_last_10": 0.1745573591440916, "mean_pred_prob_last_25": 0.10093382466584444, "mean_pred_prob_last_50": 0.06417068177834154, "mean_token_accuracy": 0.8716955542564392, "step": 41810 }, { "epoch": 0.743427017225748, "grad_norm": 1.7139656328649722, "learning_rate": 0.0001, "loss": 0.7174, "mean_abs_error": 1444.46720538205, "mean_abs_error_last_10": 686.6786020508671, "mean_abs_error_last_25": 819.7932624103489, "mean_abs_error_last_50": 984.864986083074, "mean_pred_prob": 0.011938388767885045, "mean_pred_prob_last_10": 0.06825517665711231, "mean_pred_prob_last_25": 0.034941146415076216, "mean_pred_prob_last_50": 0.02075814355048351, "mean_token_accuracy": 0.8726677119731903, "step": 41820 }, { "epoch": 0.7436047855225499, "grad_norm": 2.7517456023250273, "learning_rate": 0.0001, "loss": 0.7083, "mean_abs_error": 631.5644124547259, "mean_abs_error_last_10": 175.32411695529555, "mean_abs_error_last_25": 246.8525127037219, "mean_abs_error_last_50": 376.960408014768, "mean_pred_prob": 0.029509564099134876, "mean_pred_prob_last_10": 0.13637275861110537, "mean_pred_prob_last_25": 0.07902851362014189, "mean_pred_prob_last_50": 0.0487762508681044, "mean_token_accuracy": 0.8660490572452545, "step": 41830 }, { "epoch": 0.7437825538193519, "grad_norm": 1.4541819957251647, "learning_rate": 0.0001, "loss": 0.6564, "mean_abs_error": 190.01365688935874, "mean_abs_error_last_10": 34.82699843517381, "mean_abs_error_last_25": 83.85195484245845, "mean_abs_error_last_50": 106.84432116293883, "mean_pred_prob": 0.039287866465747355, "mean_pred_prob_last_10": 0.20312027409672737, "mean_pred_prob_last_25": 0.10619738027453422, "mean_pred_prob_last_50": 0.06523935049772263, "mean_token_accuracy": 0.874130928516388, "step": 41840 }, { "epoch": 0.7439603221161538, "grad_norm": 1.6886540207869132, "learning_rate": 0.0001, "loss": 0.7894, "mean_abs_error": 489.20795138456407, "mean_abs_error_last_10": 106.39667846010514, "mean_abs_error_last_25": 211.71709892841545, "mean_abs_error_last_50": 293.0134696255335, "mean_pred_prob": 0.04053078514989465, "mean_pred_prob_last_10": 0.19790390748530626, "mean_pred_prob_last_25": 0.11433254880830646, "mean_pred_prob_last_50": 0.06943780374713242, "mean_token_accuracy": 0.8769132614135742, "step": 41850 }, { "epoch": 0.7441380904129558, "grad_norm": 1.8342167927847248, "learning_rate": 0.0001, "loss": 0.798, "mean_abs_error": 323.13231569920407, "mean_abs_error_last_10": 107.52799149577508, "mean_abs_error_last_25": 104.78319226768944, "mean_abs_error_last_50": 145.37847545159076, "mean_pred_prob": 0.04116241754963994, "mean_pred_prob_last_10": 0.20917206890881063, "mean_pred_prob_last_25": 0.11753395702689887, "mean_pred_prob_last_50": 0.07059137960895896, "mean_token_accuracy": 0.8714937329292297, "step": 41860 }, { "epoch": 0.7443158587097577, "grad_norm": 0.8641503405799746, "learning_rate": 0.0001, "loss": 0.6036, "mean_abs_error": 209.66344197751323, "mean_abs_error_last_10": 121.94782507255468, "mean_abs_error_last_25": 159.15120463846768, "mean_abs_error_last_50": 187.20421628228823, "mean_pred_prob": 0.052767863310873506, "mean_pred_prob_last_10": 0.28184561207890513, "mean_pred_prob_last_25": 0.14963558251038195, "mean_pred_prob_last_50": 0.08920554327778518, "mean_token_accuracy": 0.877026629447937, "step": 41870 }, { "epoch": 0.7444936270065596, "grad_norm": 1.0270219039880666, "learning_rate": 0.0001, "loss": 0.6133, "mean_abs_error": 354.3003330038833, "mean_abs_error_last_10": 103.75213663399202, "mean_abs_error_last_25": 181.92156518374628, "mean_abs_error_last_50": 271.68201032211704, "mean_pred_prob": 0.03830432400573045, "mean_pred_prob_last_10": 0.17158978870138525, "mean_pred_prob_last_25": 0.10202511390671135, "mean_pred_prob_last_50": 0.06403366392478346, "mean_token_accuracy": 0.881037586927414, "step": 41880 }, { "epoch": 0.7446713953033616, "grad_norm": 1.6382421226013435, "learning_rate": 0.0001, "loss": 0.892, "mean_abs_error": 909.6279772477767, "mean_abs_error_last_10": 315.86793227147956, "mean_abs_error_last_25": 421.94840575821274, "mean_abs_error_last_50": 606.6778180964758, "mean_pred_prob": 0.01726061129302252, "mean_pred_prob_last_10": 0.09649796080775559, "mean_pred_prob_last_25": 0.04955619643442333, "mean_pred_prob_last_50": 0.029656332527520136, "mean_token_accuracy": 0.8650558471679688, "step": 41890 }, { "epoch": 0.7448491636001635, "grad_norm": 1.7102054134570863, "learning_rate": 0.0001, "loss": 0.7261, "mean_abs_error": 349.2585193343351, "mean_abs_error_last_10": 83.63224086004591, "mean_abs_error_last_25": 116.65529342286126, "mean_abs_error_last_50": 206.00747691352558, "mean_pred_prob": 0.04426856762729585, "mean_pred_prob_last_10": 0.2282165021635592, "mean_pred_prob_last_25": 0.12470587566494942, "mean_pred_prob_last_50": 0.07519332368392498, "mean_token_accuracy": 0.8751703798770905, "step": 41900 }, { "epoch": 0.7450269318969654, "grad_norm": 2.3476993933268684, "learning_rate": 0.0001, "loss": 0.7442, "mean_abs_error": 661.8228199555231, "mean_abs_error_last_10": 309.6670196735088, "mean_abs_error_last_25": 312.10990144484987, "mean_abs_error_last_50": 443.4855583382141, "mean_pred_prob": 0.02150975446566008, "mean_pred_prob_last_10": 0.12152277312707156, "mean_pred_prob_last_25": 0.05991171020432375, "mean_pred_prob_last_50": 0.03571142534492537, "mean_token_accuracy": 0.8717040598392487, "step": 41910 }, { "epoch": 0.7452047001937674, "grad_norm": 2.1111181707053213, "learning_rate": 0.0001, "loss": 0.8102, "mean_abs_error": 267.9801633599084, "mean_abs_error_last_10": 49.982498340468936, "mean_abs_error_last_25": 95.34058002258996, "mean_abs_error_last_50": 167.14672534938762, "mean_pred_prob": 0.04505927902646363, "mean_pred_prob_last_10": 0.2296797253191471, "mean_pred_prob_last_25": 0.12940295226871967, "mean_pred_prob_last_50": 0.0784083523787558, "mean_token_accuracy": 0.8565032601356506, "step": 41920 }, { "epoch": 0.7453824684905694, "grad_norm": 1.5573570935470016, "learning_rate": 0.0001, "loss": 0.6612, "mean_abs_error": 283.80878333673456, "mean_abs_error_last_10": 148.36050908443607, "mean_abs_error_last_25": 144.80422330624532, "mean_abs_error_last_50": 188.1165088407193, "mean_pred_prob": 0.04873678635340184, "mean_pred_prob_last_10": 0.2410282539203763, "mean_pred_prob_last_25": 0.13781812228262424, "mean_pred_prob_last_50": 0.08443652968853713, "mean_token_accuracy": 0.8712931096553802, "step": 41930 }, { "epoch": 0.7455602367873714, "grad_norm": 1.789347278073257, "learning_rate": 0.0001, "loss": 0.6351, "mean_abs_error": 138.97144113725204, "mean_abs_error_last_10": 39.61564162636693, "mean_abs_error_last_25": 67.50965418886489, "mean_abs_error_last_50": 121.84938967896628, "mean_pred_prob": 0.037096644192934035, "mean_pred_prob_last_10": 0.16750580109655858, "mean_pred_prob_last_25": 0.09630516786128282, "mean_pred_prob_last_50": 0.059590636286884546, "mean_token_accuracy": 0.8737316846847534, "step": 41940 }, { "epoch": 0.7457380050841733, "grad_norm": 1.698213973689471, "learning_rate": 0.0001, "loss": 0.7153, "mean_abs_error": 745.9331635811773, "mean_abs_error_last_10": 388.6844453825605, "mean_abs_error_last_25": 380.3236215790711, "mean_abs_error_last_50": 496.6991354811039, "mean_pred_prob": 0.016494265140499918, "mean_pred_prob_last_10": 0.08551260605454444, "mean_pred_prob_last_25": 0.04546567146899179, "mean_pred_prob_last_50": 0.027305484225507826, "mean_token_accuracy": 0.8711526215076446, "step": 41950 }, { "epoch": 0.7459157733809753, "grad_norm": 2.8976080663503843, "learning_rate": 0.0001, "loss": 0.6904, "mean_abs_error": 866.2336145163933, "mean_abs_error_last_10": 312.95411461911925, "mean_abs_error_last_25": 396.6895048041054, "mean_abs_error_last_50": 537.0280120082276, "mean_pred_prob": 0.035696309359627774, "mean_pred_prob_last_10": 0.18017018272075802, "mean_pred_prob_last_25": 0.09956486732698977, "mean_pred_prob_last_50": 0.06023546674987301, "mean_token_accuracy": 0.8773014664649963, "step": 41960 }, { "epoch": 0.7460935416777772, "grad_norm": 1.1415519654320934, "learning_rate": 0.0001, "loss": 0.6581, "mean_abs_error": 1107.212161337104, "mean_abs_error_last_10": 574.1896823059876, "mean_abs_error_last_25": 666.8175430366002, "mean_abs_error_last_50": 771.2506769517074, "mean_pred_prob": 0.036291160045948345, "mean_pred_prob_last_10": 0.18545378243143204, "mean_pred_prob_last_25": 0.10301151867170119, "mean_pred_prob_last_50": 0.06180908873502631, "mean_token_accuracy": 0.8706505477428437, "step": 41970 }, { "epoch": 0.7462713099745791, "grad_norm": 2.359209224852307, "learning_rate": 0.0001, "loss": 0.6998, "mean_abs_error": 113.75382464152578, "mean_abs_error_last_10": 14.03583945196562, "mean_abs_error_last_25": 27.85044553864616, "mean_abs_error_last_50": 44.82294172855565, "mean_pred_prob": 0.05744432639330625, "mean_pred_prob_last_10": 0.28478604555130005, "mean_pred_prob_last_25": 0.16181894205510616, "mean_pred_prob_last_50": 0.09788707681000233, "mean_token_accuracy": 0.8656959354877471, "step": 41980 }, { "epoch": 0.7464490782713811, "grad_norm": 1.1787405534101356, "learning_rate": 0.0001, "loss": 0.6285, "mean_abs_error": 362.2694965309062, "mean_abs_error_last_10": 66.14984943337876, "mean_abs_error_last_25": 110.36647774461741, "mean_abs_error_last_50": 214.11874740795082, "mean_pred_prob": 0.04993117093108594, "mean_pred_prob_last_10": 0.22570595592260362, "mean_pred_prob_last_25": 0.12600132916122675, "mean_pred_prob_last_50": 0.0797368373721838, "mean_token_accuracy": 0.8740615725517273, "step": 41990 }, { "epoch": 0.746626846568183, "grad_norm": 1.2249237338726595, "learning_rate": 0.0001, "loss": 0.8649, "mean_abs_error": 174.44741423427885, "mean_abs_error_last_10": 15.114286411589342, "mean_abs_error_last_25": 55.01725426371689, "mean_abs_error_last_50": 107.91747106278005, "mean_pred_prob": 0.05725669586099684, "mean_pred_prob_last_10": 0.2644243098795414, "mean_pred_prob_last_25": 0.15516089089214802, "mean_pred_prob_last_50": 0.0953620806336403, "mean_token_accuracy": 0.865582925081253, "step": 42000 }, { "epoch": 0.746804614864985, "grad_norm": 1.179288190569604, "learning_rate": 0.0001, "loss": 0.6497, "mean_abs_error": 270.4724039740492, "mean_abs_error_last_10": 176.36881038094208, "mean_abs_error_last_25": 175.0910408722832, "mean_abs_error_last_50": 203.97699106410906, "mean_pred_prob": 0.04179362487047911, "mean_pred_prob_last_10": 0.2021635502576828, "mean_pred_prob_last_25": 0.11226147692650557, "mean_pred_prob_last_50": 0.06912870588712394, "mean_token_accuracy": 0.8736183822154999, "step": 42010 }, { "epoch": 0.7469823831617869, "grad_norm": 1.2815740333518864, "learning_rate": 0.0001, "loss": 0.6595, "mean_abs_error": 591.4819621468815, "mean_abs_error_last_10": 173.80410432314144, "mean_abs_error_last_25": 199.36395959409194, "mean_abs_error_last_50": 331.9892545717033, "mean_pred_prob": 0.025677714287303387, "mean_pred_prob_last_10": 0.12806268022977746, "mean_pred_prob_last_25": 0.07097809697152116, "mean_pred_prob_last_50": 0.042314520600484684, "mean_token_accuracy": 0.8681970953941345, "step": 42020 }, { "epoch": 0.7471601514585888, "grad_norm": 0.9557724291980095, "learning_rate": 0.0001, "loss": 0.7994, "mean_abs_error": 1205.069985755174, "mean_abs_error_last_10": 802.8093719168295, "mean_abs_error_last_25": 803.2269001943641, "mean_abs_error_last_50": 903.9795703221735, "mean_pred_prob": 0.019637614977546035, "mean_pred_prob_last_10": 0.10978306223405525, "mean_pred_prob_last_25": 0.0554529822897166, "mean_pred_prob_last_50": 0.03259093058441067, "mean_token_accuracy": 0.8712645769119263, "step": 42030 }, { "epoch": 0.7473379197553909, "grad_norm": 1.351240143453794, "learning_rate": 0.0001, "loss": 0.712, "mean_abs_error": 883.0386296451761, "mean_abs_error_last_10": 373.3419044425085, "mean_abs_error_last_25": 553.5118120365172, "mean_abs_error_last_50": 660.9663293582228, "mean_pred_prob": 0.027548796017072164, "mean_pred_prob_last_10": 0.1415855800965801, "mean_pred_prob_last_25": 0.07560241096653045, "mean_pred_prob_last_50": 0.04597695101983845, "mean_token_accuracy": 0.8725513935089111, "step": 42040 }, { "epoch": 0.7475156880521928, "grad_norm": 1.5279067273099765, "learning_rate": 0.0001, "loss": 0.7275, "mean_abs_error": 498.5917121361409, "mean_abs_error_last_10": 162.37043749563279, "mean_abs_error_last_25": 184.53242003173426, "mean_abs_error_last_50": 277.00778442973694, "mean_pred_prob": 0.029276926908642052, "mean_pred_prob_last_10": 0.13960101067787037, "mean_pred_prob_last_25": 0.07838743834290654, "mean_pred_prob_last_50": 0.04854577658697963, "mean_token_accuracy": 0.8720181047916412, "step": 42050 }, { "epoch": 0.7476934563489948, "grad_norm": 1.1560064891892046, "learning_rate": 0.0001, "loss": 0.7011, "mean_abs_error": 759.9306815549191, "mean_abs_error_last_10": 270.84288606018663, "mean_abs_error_last_25": 358.04287223080325, "mean_abs_error_last_50": 490.81188200615026, "mean_pred_prob": 0.04160869953921065, "mean_pred_prob_last_10": 0.19551356439478695, "mean_pred_prob_last_25": 0.10800405907211826, "mean_pred_prob_last_50": 0.06781975193880499, "mean_token_accuracy": 0.8670680582523346, "step": 42060 }, { "epoch": 0.7478712246457967, "grad_norm": 3.4277748517526008, "learning_rate": 0.0001, "loss": 0.781, "mean_abs_error": 414.34565360963717, "mean_abs_error_last_10": 238.74677446501545, "mean_abs_error_last_25": 266.1476228537659, "mean_abs_error_last_50": 287.1877724879396, "mean_pred_prob": 0.028125217743217946, "mean_pred_prob_last_10": 0.14658126402646304, "mean_pred_prob_last_25": 0.08008458008989691, "mean_pred_prob_last_50": 0.04824424916878343, "mean_token_accuracy": 0.8703130483627319, "step": 42070 }, { "epoch": 0.7480489929425986, "grad_norm": 1.4412382598648394, "learning_rate": 0.0001, "loss": 0.9499, "mean_abs_error": 676.2712168865766, "mean_abs_error_last_10": 341.5191587985381, "mean_abs_error_last_25": 391.5742109377321, "mean_abs_error_last_50": 460.59781669517963, "mean_pred_prob": 0.044259767467156055, "mean_pred_prob_last_10": 0.19398405951797032, "mean_pred_prob_last_25": 0.11372803235426546, "mean_pred_prob_last_50": 0.07268670692574233, "mean_token_accuracy": 0.8656349658966065, "step": 42080 }, { "epoch": 0.7482267612394006, "grad_norm": 1.4174969565247648, "learning_rate": 0.0001, "loss": 0.6857, "mean_abs_error": 528.492591166081, "mean_abs_error_last_10": 173.51757959824585, "mean_abs_error_last_25": 240.93389968328773, "mean_abs_error_last_50": 336.63194583456783, "mean_pred_prob": 0.028001651214435697, "mean_pred_prob_last_10": 0.14635727778077126, "mean_pred_prob_last_25": 0.07697939509525895, "mean_pred_prob_last_50": 0.04710515318438411, "mean_token_accuracy": 0.8729714572429657, "step": 42090 }, { "epoch": 0.7484045295362025, "grad_norm": 2.1673451898223166, "learning_rate": 0.0001, "loss": 0.6903, "mean_abs_error": 1341.0316392997652, "mean_abs_error_last_10": 553.2024796170065, "mean_abs_error_last_25": 663.0295746032118, "mean_abs_error_last_50": 836.01718395447, "mean_pred_prob": 0.04105387949966825, "mean_pred_prob_last_10": 0.19695565180736593, "mean_pred_prob_last_25": 0.10677541967306751, "mean_pred_prob_last_50": 0.06775778525916394, "mean_token_accuracy": 0.8723589360713959, "step": 42100 }, { "epoch": 0.7485822978330045, "grad_norm": 1.660893939038764, "learning_rate": 0.0001, "loss": 0.8513, "mean_abs_error": 726.1397708701826, "mean_abs_error_last_10": 271.31372861897955, "mean_abs_error_last_25": 406.929943131723, "mean_abs_error_last_50": 448.4577093977192, "mean_pred_prob": 0.030673945834860206, "mean_pred_prob_last_10": 0.13844951536739245, "mean_pred_prob_last_25": 0.08111510000890121, "mean_pred_prob_last_50": 0.05024029354681261, "mean_token_accuracy": 0.8610960066318512, "step": 42110 }, { "epoch": 0.7487600661298064, "grad_norm": 2.1494754789599466, "learning_rate": 0.0001, "loss": 0.8079, "mean_abs_error": 470.30091865584507, "mean_abs_error_last_10": 181.54635062958332, "mean_abs_error_last_25": 240.46048505388734, "mean_abs_error_last_50": 276.55961771735974, "mean_pred_prob": 0.028269312530755996, "mean_pred_prob_last_10": 0.14741464965045453, "mean_pred_prob_last_25": 0.07886972450651228, "mean_pred_prob_last_50": 0.047075085388496515, "mean_token_accuracy": 0.8782821297645569, "step": 42120 }, { "epoch": 0.7489378344266083, "grad_norm": 2.0664663913752497, "learning_rate": 0.0001, "loss": 0.758, "mean_abs_error": 726.5349555828367, "mean_abs_error_last_10": 208.970992785031, "mean_abs_error_last_25": 345.53335591373695, "mean_abs_error_last_50": 528.1015028475776, "mean_pred_prob": 0.02869895085459575, "mean_pred_prob_last_10": 0.14865450931247323, "mean_pred_prob_last_25": 0.07996458762208931, "mean_pred_prob_last_50": 0.047935360204428436, "mean_token_accuracy": 0.8724161207675933, "step": 42130 }, { "epoch": 0.7491156027234103, "grad_norm": 1.663491791924932, "learning_rate": 0.0001, "loss": 0.7067, "mean_abs_error": 657.6260539630091, "mean_abs_error_last_10": 238.90627961392414, "mean_abs_error_last_25": 321.03522423952006, "mean_abs_error_last_50": 440.6205164877789, "mean_pred_prob": 0.03238759573141579, "mean_pred_prob_last_10": 0.1553762945055496, "mean_pred_prob_last_25": 0.08653055847389624, "mean_pred_prob_last_50": 0.054494976432761175, "mean_token_accuracy": 0.8835452497005463, "step": 42140 }, { "epoch": 0.7492933710202122, "grad_norm": 2.4089941516773754, "learning_rate": 0.0001, "loss": 0.6373, "mean_abs_error": 267.0736093536345, "mean_abs_error_last_10": 93.87060626341518, "mean_abs_error_last_25": 113.27060738038764, "mean_abs_error_last_50": 151.96213080857825, "mean_pred_prob": 0.046776544256135824, "mean_pred_prob_last_10": 0.21507343370467424, "mean_pred_prob_last_25": 0.12340153604745865, "mean_pred_prob_last_50": 0.07753106481395662, "mean_token_accuracy": 0.8747484743595123, "step": 42150 }, { "epoch": 0.7494711393170143, "grad_norm": 1.6589306447567131, "learning_rate": 0.0001, "loss": 0.7131, "mean_abs_error": 658.1751030030548, "mean_abs_error_last_10": 157.32396900036625, "mean_abs_error_last_25": 277.417974872812, "mean_abs_error_last_50": 378.501100312933, "mean_pred_prob": 0.03700728400144726, "mean_pred_prob_last_10": 0.15248627141118049, "mean_pred_prob_last_25": 0.09375694347545505, "mean_pred_prob_last_50": 0.059891089191660286, "mean_token_accuracy": 0.8791912436485291, "step": 42160 }, { "epoch": 0.7496489076138162, "grad_norm": 0.9983021105200702, "learning_rate": 0.0001, "loss": 0.6325, "mean_abs_error": 409.94496933276423, "mean_abs_error_last_10": 105.99819593016284, "mean_abs_error_last_25": 201.7177598420647, "mean_abs_error_last_50": 310.72969737427263, "mean_pred_prob": 0.05534943857928738, "mean_pred_prob_last_10": 0.2647954870597459, "mean_pred_prob_last_25": 0.1504477117327042, "mean_pred_prob_last_50": 0.09253329567145556, "mean_token_accuracy": 0.8704458057880402, "step": 42170 }, { "epoch": 0.7498266759106181, "grad_norm": 7.37540387742811, "learning_rate": 0.0001, "loss": 0.6858, "mean_abs_error": 1436.2735134716186, "mean_abs_error_last_10": 793.7742583207726, "mean_abs_error_last_25": 866.5677604523387, "mean_abs_error_last_50": 1068.454244003004, "mean_pred_prob": 0.024144098052056508, "mean_pred_prob_last_10": 0.12038691758061759, "mean_pred_prob_last_25": 0.0655439481488429, "mean_pred_prob_last_50": 0.040548154753923885, "mean_token_accuracy": 0.8750249922275544, "step": 42180 }, { "epoch": 0.7500044442074201, "grad_norm": 0.760568747944006, "learning_rate": 0.0001, "loss": 0.5612, "mean_abs_error": 263.79773082678935, "mean_abs_error_last_10": 63.31321032201966, "mean_abs_error_last_25": 97.45695571840614, "mean_abs_error_last_50": 156.45538504164884, "mean_pred_prob": 0.04298401223495603, "mean_pred_prob_last_10": 0.2113567991182208, "mean_pred_prob_last_25": 0.1190009793266654, "mean_pred_prob_last_50": 0.07257279236800969, "mean_token_accuracy": 0.8745278358459473, "step": 42190 }, { "epoch": 0.750182212504222, "grad_norm": 1.4853978812782478, "learning_rate": 0.0001, "loss": 0.6391, "mean_abs_error": 578.3697492124805, "mean_abs_error_last_10": 174.8613158991626, "mean_abs_error_last_25": 258.0402279879278, "mean_abs_error_last_50": 398.97188006759586, "mean_pred_prob": 0.04057082313811407, "mean_pred_prob_last_10": 0.20248888779897242, "mean_pred_prob_last_25": 0.11165540762012824, "mean_pred_prob_last_50": 0.06759531741845422, "mean_token_accuracy": 0.8719328343868256, "step": 42200 }, { "epoch": 0.750359980801024, "grad_norm": 0.8039353937375631, "learning_rate": 0.0001, "loss": 0.705, "mean_abs_error": 1022.7989664476056, "mean_abs_error_last_10": 542.99916352501, "mean_abs_error_last_25": 602.2127642255629, "mean_abs_error_last_50": 771.3631540787558, "mean_pred_prob": 0.03213550412037876, "mean_pred_prob_last_10": 0.15426331408089028, "mean_pred_prob_last_25": 0.08598294747935142, "mean_pred_prob_last_50": 0.053437327790015844, "mean_token_accuracy": 0.8623893857002258, "step": 42210 }, { "epoch": 0.7505377490978259, "grad_norm": 2.8125503930781184, "learning_rate": 0.0001, "loss": 0.6612, "mean_abs_error": 378.7611337554923, "mean_abs_error_last_10": 88.29206521389906, "mean_abs_error_last_25": 127.25033490833343, "mean_abs_error_last_50": 206.1391152297179, "mean_pred_prob": 0.05832304370123893, "mean_pred_prob_last_10": 0.269410845451057, "mean_pred_prob_last_25": 0.15861492631956936, "mean_pred_prob_last_50": 0.09735743282362819, "mean_token_accuracy": 0.8768263399600983, "step": 42220 }, { "epoch": 0.7507155173946278, "grad_norm": 2.812154984379496, "learning_rate": 0.0001, "loss": 0.75, "mean_abs_error": 704.2027241035856, "mean_abs_error_last_10": 168.6333314518634, "mean_abs_error_last_25": 318.451965338989, "mean_abs_error_last_50": 472.3590260659239, "mean_pred_prob": 0.029129239736357705, "mean_pred_prob_last_10": 0.15303468556376174, "mean_pred_prob_last_25": 0.08534837680053897, "mean_pred_prob_last_50": 0.04900653311633505, "mean_token_accuracy": 0.8692886412143708, "step": 42230 }, { "epoch": 0.7508932856914298, "grad_norm": 1.1032091651088227, "learning_rate": 0.0001, "loss": 0.6745, "mean_abs_error": 372.17480400629904, "mean_abs_error_last_10": 105.11790004278808, "mean_abs_error_last_25": 151.1527814144493, "mean_abs_error_last_50": 210.63346220023345, "mean_pred_prob": 0.0430634252843447, "mean_pred_prob_last_10": 0.2009721178561449, "mean_pred_prob_last_25": 0.11304306633537635, "mean_pred_prob_last_50": 0.07096228643786162, "mean_token_accuracy": 0.8781786262989044, "step": 42240 }, { "epoch": 0.7510710539882317, "grad_norm": 0.9980152402659428, "learning_rate": 0.0001, "loss": 0.5727, "mean_abs_error": 120.46693442448691, "mean_abs_error_last_10": 40.466539249224795, "mean_abs_error_last_25": 56.135029855880774, "mean_abs_error_last_50": 77.14452550586937, "mean_pred_prob": 0.04697503061033785, "mean_pred_prob_last_10": 0.23301834501326085, "mean_pred_prob_last_25": 0.12887933626770973, "mean_pred_prob_last_50": 0.07925649415701627, "mean_token_accuracy": 0.8786891222000122, "step": 42250 }, { "epoch": 0.7512488222850336, "grad_norm": 30.946298026047604, "learning_rate": 0.0001, "loss": 0.718, "mean_abs_error": 366.42608650436034, "mean_abs_error_last_10": 132.2906313867119, "mean_abs_error_last_25": 192.9394922373203, "mean_abs_error_last_50": 255.64429253085586, "mean_pred_prob": 0.03547913720831275, "mean_pred_prob_last_10": 0.18087605014443398, "mean_pred_prob_last_25": 0.0976088697090745, "mean_pred_prob_last_50": 0.05954275615513325, "mean_token_accuracy": 0.875998067855835, "step": 42260 }, { "epoch": 0.7514265905818356, "grad_norm": 1.7393930717780046, "learning_rate": 0.0001, "loss": 0.7064, "mean_abs_error": 66.41081227797488, "mean_abs_error_last_10": 13.701753983480714, "mean_abs_error_last_25": 24.676940440586346, "mean_abs_error_last_50": 36.157097273163245, "mean_pred_prob": 0.05165087301284075, "mean_pred_prob_last_10": 0.2408887892961502, "mean_pred_prob_last_25": 0.13600624427199365, "mean_pred_prob_last_50": 0.08563137799501419, "mean_token_accuracy": 0.8790555953979492, "step": 42270 }, { "epoch": 0.7516043588786376, "grad_norm": 1.307124377916074, "learning_rate": 0.0001, "loss": 0.6771, "mean_abs_error": 342.15192847527817, "mean_abs_error_last_10": 52.658391686340714, "mean_abs_error_last_25": 111.33996112989004, "mean_abs_error_last_50": 192.44997683099947, "mean_pred_prob": 0.059997776441741736, "mean_pred_prob_last_10": 0.2869938582647592, "mean_pred_prob_last_25": 0.16005165486130862, "mean_pred_prob_last_50": 0.0979168287361972, "mean_token_accuracy": 0.8732599735260009, "step": 42280 }, { "epoch": 0.7517821271754396, "grad_norm": 1.5654739058113845, "learning_rate": 0.0001, "loss": 0.649, "mean_abs_error": 445.2098636501664, "mean_abs_error_last_10": 83.47904670532321, "mean_abs_error_last_25": 183.9050612626675, "mean_abs_error_last_50": 347.6778716251067, "mean_pred_prob": 0.034238252276554706, "mean_pred_prob_last_10": 0.1786009009927511, "mean_pred_prob_last_25": 0.0961014456115663, "mean_pred_prob_last_50": 0.05751581732183695, "mean_token_accuracy": 0.8754186928272247, "step": 42290 }, { "epoch": 0.7519598954722415, "grad_norm": 1.6621044037743722, "learning_rate": 0.0001, "loss": 0.7565, "mean_abs_error": 320.3350482992013, "mean_abs_error_last_10": 96.94276161778706, "mean_abs_error_last_25": 141.20006426196147, "mean_abs_error_last_50": 220.69855972620434, "mean_pred_prob": 0.027361789485439658, "mean_pred_prob_last_10": 0.13827115148305893, "mean_pred_prob_last_25": 0.07221257872879505, "mean_pred_prob_last_50": 0.04495528694242239, "mean_token_accuracy": 0.8724827647209168, "step": 42300 }, { "epoch": 0.7521376637690435, "grad_norm": 1.5526405707454074, "learning_rate": 0.0001, "loss": 0.7321, "mean_abs_error": 168.95104032311116, "mean_abs_error_last_10": 38.33005875856121, "mean_abs_error_last_25": 56.797110046883745, "mean_abs_error_last_50": 109.2372198260963, "mean_pred_prob": 0.05519444332458079, "mean_pred_prob_last_10": 0.2525528911501169, "mean_pred_prob_last_25": 0.14861113522201777, "mean_pred_prob_last_50": 0.09303969806060194, "mean_token_accuracy": 0.8734419107437134, "step": 42310 }, { "epoch": 0.7523154320658454, "grad_norm": 1.367672429877533, "learning_rate": 0.0001, "loss": 0.7716, "mean_abs_error": 244.76380153875954, "mean_abs_error_last_10": 66.27573840607303, "mean_abs_error_last_25": 77.74086870776077, "mean_abs_error_last_50": 139.6617150381933, "mean_pred_prob": 0.053157402714714405, "mean_pred_prob_last_10": 0.25864276848733425, "mean_pred_prob_last_25": 0.14856166038662194, "mean_pred_prob_last_50": 0.08862186474725604, "mean_token_accuracy": 0.8712087452411652, "step": 42320 }, { "epoch": 0.7524932003626473, "grad_norm": 1.6266724367393812, "learning_rate": 0.0001, "loss": 0.7307, "mean_abs_error": 817.2397360560019, "mean_abs_error_last_10": 203.86644322668312, "mean_abs_error_last_25": 255.84465983489926, "mean_abs_error_last_50": 427.51118908675807, "mean_pred_prob": 0.02762359384796582, "mean_pred_prob_last_10": 0.13331267251633108, "mean_pred_prob_last_25": 0.07588913353392854, "mean_pred_prob_last_50": 0.046411071333568546, "mean_token_accuracy": 0.8793737709522247, "step": 42330 }, { "epoch": 0.7526709686594493, "grad_norm": 1.823447269662895, "learning_rate": 0.0001, "loss": 0.7585, "mean_abs_error": 737.9904390497811, "mean_abs_error_last_10": 295.72410671509795, "mean_abs_error_last_25": 361.7073085873335, "mean_abs_error_last_50": 464.98196393904675, "mean_pred_prob": 0.03219432725745719, "mean_pred_prob_last_10": 0.16656949444441124, "mean_pred_prob_last_25": 0.08892754057305866, "mean_pred_prob_last_50": 0.05464417639013845, "mean_token_accuracy": 0.8619737505912781, "step": 42340 }, { "epoch": 0.7528487369562512, "grad_norm": 2.1883889072043723, "learning_rate": 0.0001, "loss": 0.6722, "mean_abs_error": 133.2428408225131, "mean_abs_error_last_10": 36.93092090324903, "mean_abs_error_last_25": 49.26372361578918, "mean_abs_error_last_50": 70.68873806418023, "mean_pred_prob": 0.05093447398394346, "mean_pred_prob_last_10": 0.25547959581017493, "mean_pred_prob_last_25": 0.1427720120176673, "mean_pred_prob_last_50": 0.0866250267252326, "mean_token_accuracy": 0.8770832717418671, "step": 42350 }, { "epoch": 0.7530265052530531, "grad_norm": 3.1394369498993124, "learning_rate": 0.0001, "loss": 0.7876, "mean_abs_error": 377.8115018071554, "mean_abs_error_last_10": 118.25489528324074, "mean_abs_error_last_25": 142.42497237626128, "mean_abs_error_last_50": 219.54569375676843, "mean_pred_prob": 0.03684994652867317, "mean_pred_prob_last_10": 0.1882391119375825, "mean_pred_prob_last_25": 0.10138281052932144, "mean_pred_prob_last_50": 0.061256539262831214, "mean_token_accuracy": 0.8713476419448852, "step": 42360 }, { "epoch": 0.7532042735498551, "grad_norm": 1.7441707474106407, "learning_rate": 0.0001, "loss": 0.7591, "mean_abs_error": 410.8870981979911, "mean_abs_error_last_10": 125.69008349414742, "mean_abs_error_last_25": 158.22786985237613, "mean_abs_error_last_50": 210.57860362525633, "mean_pred_prob": 0.03349134351592511, "mean_pred_prob_last_10": 0.17477429676800965, "mean_pred_prob_last_25": 0.09057216662913561, "mean_pred_prob_last_50": 0.056239350652322176, "mean_token_accuracy": 0.8768068015575409, "step": 42370 }, { "epoch": 0.753382041846657, "grad_norm": 1.8616784435392126, "learning_rate": 0.0001, "loss": 0.6589, "mean_abs_error": 691.6175683298268, "mean_abs_error_last_10": 207.7674178120422, "mean_abs_error_last_25": 299.8894928459403, "mean_abs_error_last_50": 399.3331882437295, "mean_pred_prob": 0.028608722030185163, "mean_pred_prob_last_10": 0.14839016012847422, "mean_pred_prob_last_25": 0.07668484887108207, "mean_pred_prob_last_50": 0.04702454493381083, "mean_token_accuracy": 0.876230663061142, "step": 42380 }, { "epoch": 0.753559810143459, "grad_norm": 2.224927330315116, "learning_rate": 0.0001, "loss": 0.698, "mean_abs_error": 817.1329885977066, "mean_abs_error_last_10": 317.27798868502254, "mean_abs_error_last_25": 408.41681355877574, "mean_abs_error_last_50": 546.2347591318404, "mean_pred_prob": 0.024542585556628183, "mean_pred_prob_last_10": 0.12410159752471372, "mean_pred_prob_last_25": 0.06863824890460819, "mean_pred_prob_last_50": 0.04173119541956112, "mean_token_accuracy": 0.8695471346378326, "step": 42390 }, { "epoch": 0.753737578440261, "grad_norm": 1.3929814571360786, "learning_rate": 0.0001, "loss": 0.8116, "mean_abs_error": 330.73416454151953, "mean_abs_error_last_10": 151.7076827728257, "mean_abs_error_last_25": 238.5721042231699, "mean_abs_error_last_50": 273.25728388666664, "mean_pred_prob": 0.034678352624177934, "mean_pred_prob_last_10": 0.17492268458008767, "mean_pred_prob_last_25": 0.09814441045746207, "mean_pred_prob_last_50": 0.059647382935509086, "mean_token_accuracy": 0.8664823710918427, "step": 42400 }, { "epoch": 0.753915346737063, "grad_norm": 2.6111242249319524, "learning_rate": 0.0001, "loss": 0.8555, "mean_abs_error": 385.1818553873919, "mean_abs_error_last_10": 240.90708870866882, "mean_abs_error_last_25": 202.04999215028425, "mean_abs_error_last_50": 260.08119688264543, "mean_pred_prob": 0.03805552464909852, "mean_pred_prob_last_10": 0.1788529383367859, "mean_pred_prob_last_25": 0.10087609607726336, "mean_pred_prob_last_50": 0.0630714907660149, "mean_token_accuracy": 0.870570981502533, "step": 42410 }, { "epoch": 0.7540931150338649, "grad_norm": 1.9303865554858664, "learning_rate": 0.0001, "loss": 0.7363, "mean_abs_error": 417.37465912157893, "mean_abs_error_last_10": 207.57890191268447, "mean_abs_error_last_25": 241.00834233997375, "mean_abs_error_last_50": 322.10694148007315, "mean_pred_prob": 0.026932778651826084, "mean_pred_prob_last_10": 0.13849131129682063, "mean_pred_prob_last_25": 0.07293071523308754, "mean_pred_prob_last_50": 0.04501883680932224, "mean_token_accuracy": 0.8654054999351501, "step": 42420 }, { "epoch": 0.7542708833306668, "grad_norm": 1.2159317098609754, "learning_rate": 0.0001, "loss": 0.7182, "mean_abs_error": 937.1504043174549, "mean_abs_error_last_10": 265.0594503210685, "mean_abs_error_last_25": 423.1329585027427, "mean_abs_error_last_50": 603.2783672835374, "mean_pred_prob": 0.019775181589648128, "mean_pred_prob_last_10": 0.11232957994798198, "mean_pred_prob_last_25": 0.058471095078857616, "mean_pred_prob_last_50": 0.033918696630280465, "mean_token_accuracy": 0.8777563691139221, "step": 42430 }, { "epoch": 0.7544486516274688, "grad_norm": 1.9911689767112482, "learning_rate": 0.0001, "loss": 0.8538, "mean_abs_error": 328.86788346304513, "mean_abs_error_last_10": 105.89358767229257, "mean_abs_error_last_25": 121.11234443407122, "mean_abs_error_last_50": 174.66799505977457, "mean_pred_prob": 0.037455439241603015, "mean_pred_prob_last_10": 0.18699770607054234, "mean_pred_prob_last_25": 0.10515185995027423, "mean_pred_prob_last_50": 0.06477538631297648, "mean_token_accuracy": 0.8837620377540588, "step": 42440 }, { "epoch": 0.7546264199242707, "grad_norm": 1.83504845458629, "learning_rate": 0.0001, "loss": 0.7407, "mean_abs_error": 548.6042632670749, "mean_abs_error_last_10": 258.52376010628745, "mean_abs_error_last_25": 295.37897921017066, "mean_abs_error_last_50": 380.68839057725074, "mean_pred_prob": 0.03275610720738768, "mean_pred_prob_last_10": 0.1411119595519267, "mean_pred_prob_last_25": 0.08378741138149053, "mean_pred_prob_last_50": 0.05360495898639783, "mean_token_accuracy": 0.8720489978790283, "step": 42450 }, { "epoch": 0.7548041882210726, "grad_norm": 1.4818846117072344, "learning_rate": 0.0001, "loss": 0.6765, "mean_abs_error": 434.8383395216404, "mean_abs_error_last_10": 170.35668115372405, "mean_abs_error_last_25": 211.83437246891543, "mean_abs_error_last_50": 227.28211644932944, "mean_pred_prob": 0.034548369818367064, "mean_pred_prob_last_10": 0.1649549636989832, "mean_pred_prob_last_25": 0.08758165780454874, "mean_pred_prob_last_50": 0.05538268592208624, "mean_token_accuracy": 0.8699258029460907, "step": 42460 }, { "epoch": 0.7549819565178746, "grad_norm": 0.9730328554590747, "learning_rate": 0.0001, "loss": 0.712, "mean_abs_error": 708.8269628035932, "mean_abs_error_last_10": 199.3208624037487, "mean_abs_error_last_25": 354.00652077408705, "mean_abs_error_last_50": 445.4629384528853, "mean_pred_prob": 0.051910559108364396, "mean_pred_prob_last_10": 0.24587410591775552, "mean_pred_prob_last_25": 0.1416841904981993, "mean_pred_prob_last_50": 0.08578733520116658, "mean_token_accuracy": 0.8738258957862854, "step": 42470 }, { "epoch": 0.7551597248146765, "grad_norm": 1.239255150809753, "learning_rate": 0.0001, "loss": 0.9152, "mean_abs_error": 285.3338046200733, "mean_abs_error_last_10": 67.98957346250981, "mean_abs_error_last_25": 92.10030606751161, "mean_abs_error_last_50": 150.7254786443918, "mean_pred_prob": 0.04353896677494049, "mean_pred_prob_last_10": 0.21948386244475843, "mean_pred_prob_last_25": 0.1220325087197125, "mean_pred_prob_last_50": 0.07379002766683698, "mean_token_accuracy": 0.8688180625438691, "step": 42480 }, { "epoch": 0.7553374931114785, "grad_norm": 1.6800777840942576, "learning_rate": 0.0001, "loss": 0.6971, "mean_abs_error": 1103.8231573756939, "mean_abs_error_last_10": 728.078938392616, "mean_abs_error_last_25": 807.8163523204673, "mean_abs_error_last_50": 890.3137640130233, "mean_pred_prob": 0.050032361924968426, "mean_pred_prob_last_10": 0.23476077874802287, "mean_pred_prob_last_25": 0.13161417988376342, "mean_pred_prob_last_50": 0.08276964755423251, "mean_token_accuracy": 0.8724640130996704, "step": 42490 }, { "epoch": 0.7555152614082804, "grad_norm": 1.7693434596061104, "learning_rate": 0.0001, "loss": 0.5774, "mean_abs_error": 293.00084970661425, "mean_abs_error_last_10": 97.2640186024619, "mean_abs_error_last_25": 109.27668790729767, "mean_abs_error_last_50": 143.2707847795042, "mean_pred_prob": 0.05594642905052751, "mean_pred_prob_last_10": 0.2533292195759714, "mean_pred_prob_last_25": 0.14528428423218429, "mean_pred_prob_last_50": 0.09102493985556066, "mean_token_accuracy": 0.8787717998027802, "step": 42500 }, { "epoch": 0.7556930297050823, "grad_norm": 1.670155126008351, "learning_rate": 0.0001, "loss": 0.7481, "mean_abs_error": 589.7198211367605, "mean_abs_error_last_10": 171.88046798327585, "mean_abs_error_last_25": 349.58451177708275, "mean_abs_error_last_50": 446.17376724547586, "mean_pred_prob": 0.0342404833441833, "mean_pred_prob_last_10": 0.17386483174050227, "mean_pred_prob_last_25": 0.094165770849213, "mean_pred_prob_last_50": 0.057431163266301155, "mean_token_accuracy": 0.86559699177742, "step": 42510 }, { "epoch": 0.7558707980018844, "grad_norm": 1.4595656826306467, "learning_rate": 0.0001, "loss": 0.6797, "mean_abs_error": 191.40884356364367, "mean_abs_error_last_10": 31.859135579261704, "mean_abs_error_last_25": 75.18108467064759, "mean_abs_error_last_50": 177.22968945206625, "mean_pred_prob": 0.05134773687459528, "mean_pred_prob_last_10": 0.23099029790610076, "mean_pred_prob_last_25": 0.1343568972311914, "mean_pred_prob_last_50": 0.08270825264044106, "mean_token_accuracy": 0.8716344952583313, "step": 42520 }, { "epoch": 0.7560485662986863, "grad_norm": 1.2297977803162123, "learning_rate": 0.0001, "loss": 0.8055, "mean_abs_error": 368.8071856358057, "mean_abs_error_last_10": 112.90088006664216, "mean_abs_error_last_25": 126.96286895804967, "mean_abs_error_last_50": 208.93524093900237, "mean_pred_prob": 0.03986902611795813, "mean_pred_prob_last_10": 0.22332645561546088, "mean_pred_prob_last_25": 0.12012943672016263, "mean_pred_prob_last_50": 0.06908382121473551, "mean_token_accuracy": 0.8664432466030121, "step": 42530 }, { "epoch": 0.7562263345954883, "grad_norm": 1.911536035472855, "learning_rate": 0.0001, "loss": 0.6681, "mean_abs_error": 226.7386396058173, "mean_abs_error_last_10": 49.32282819995832, "mean_abs_error_last_25": 125.72690567904074, "mean_abs_error_last_50": 173.7071190960659, "mean_pred_prob": 0.05289849874097854, "mean_pred_prob_last_10": 0.2503635209752247, "mean_pred_prob_last_25": 0.1373367871157825, "mean_pred_prob_last_50": 0.0859539971454069, "mean_token_accuracy": 0.8782108664512634, "step": 42540 }, { "epoch": 0.7564041028922902, "grad_norm": 3.6365918674182067, "learning_rate": 0.0001, "loss": 0.8445, "mean_abs_error": 1121.4232322268767, "mean_abs_error_last_10": 408.41043035086904, "mean_abs_error_last_25": 519.6588796764589, "mean_abs_error_last_50": 721.3208633578005, "mean_pred_prob": 0.020088511364883745, "mean_pred_prob_last_10": 0.10785677990643308, "mean_pred_prob_last_25": 0.057901672774460164, "mean_pred_prob_last_50": 0.03485091831535101, "mean_token_accuracy": 0.8813194572925568, "step": 42550 }, { "epoch": 0.7565818711890921, "grad_norm": 1.2172011179880065, "learning_rate": 0.0001, "loss": 0.6493, "mean_abs_error": 538.7494725406573, "mean_abs_error_last_10": 111.97040407171271, "mean_abs_error_last_25": 186.07951754160194, "mean_abs_error_last_50": 318.6689324201328, "mean_pred_prob": 0.033753764303401115, "mean_pred_prob_last_10": 0.17897768467664718, "mean_pred_prob_last_25": 0.0916369328275323, "mean_pred_prob_last_50": 0.05528777306899428, "mean_token_accuracy": 0.8729721426963806, "step": 42560 }, { "epoch": 0.7567596394858941, "grad_norm": 2.609626606280947, "learning_rate": 0.0001, "loss": 0.7282, "mean_abs_error": 593.1308825241088, "mean_abs_error_last_10": 147.44204012885146, "mean_abs_error_last_25": 185.19725019552553, "mean_abs_error_last_50": 317.6482327232228, "mean_pred_prob": 0.04019533180398867, "mean_pred_prob_last_10": 0.18080047795083373, "mean_pred_prob_last_25": 0.105443295231089, "mean_pred_prob_last_50": 0.06614495754474774, "mean_token_accuracy": 0.8689935028553009, "step": 42570 }, { "epoch": 0.756937407782696, "grad_norm": 3.6889495455436982, "learning_rate": 0.0001, "loss": 0.9942, "mean_abs_error": 616.5770417002043, "mean_abs_error_last_10": 162.7240347445176, "mean_abs_error_last_25": 234.38921820944557, "mean_abs_error_last_50": 317.3037980046739, "mean_pred_prob": 0.035135289683239534, "mean_pred_prob_last_10": 0.1706364855985157, "mean_pred_prob_last_25": 0.09502559979446232, "mean_pred_prob_last_50": 0.059315407346002755, "mean_token_accuracy": 0.846789437532425, "step": 42580 }, { "epoch": 0.757115176079498, "grad_norm": 0.9204719561788168, "learning_rate": 0.0001, "loss": 0.6317, "mean_abs_error": 264.58350530627786, "mean_abs_error_last_10": 66.72210600321714, "mean_abs_error_last_25": 97.47309621107647, "mean_abs_error_last_50": 149.8597482554812, "mean_pred_prob": 0.05042413561604917, "mean_pred_prob_last_10": 0.23444936536252498, "mean_pred_prob_last_25": 0.1337196059525013, "mean_pred_prob_last_50": 0.08389857830479741, "mean_token_accuracy": 0.8836850106716156, "step": 42590 }, { "epoch": 0.7572929443762999, "grad_norm": 1.9094671360843796, "learning_rate": 0.0001, "loss": 0.7062, "mean_abs_error": 537.3631202137401, "mean_abs_error_last_10": 165.84967082762813, "mean_abs_error_last_25": 236.47072214522132, "mean_abs_error_last_50": 374.49283544600064, "mean_pred_prob": 0.030606328567955643, "mean_pred_prob_last_10": 0.1395253915572539, "mean_pred_prob_last_25": 0.08276968153659255, "mean_pred_prob_last_50": 0.05113738130312413, "mean_token_accuracy": 0.8752159655094147, "step": 42600 }, { "epoch": 0.7574707126731018, "grad_norm": 1.2024495955974694, "learning_rate": 0.0001, "loss": 0.6528, "mean_abs_error": 414.0434280203555, "mean_abs_error_last_10": 82.32498748452743, "mean_abs_error_last_25": 107.07413500265307, "mean_abs_error_last_50": 207.46372472995554, "mean_pred_prob": 0.05160745265893638, "mean_pred_prob_last_10": 0.24405155226122588, "mean_pred_prob_last_25": 0.1433982014656067, "mean_pred_prob_last_50": 0.0865088262129575, "mean_token_accuracy": 0.8728963911533356, "step": 42610 }, { "epoch": 0.7576484809699038, "grad_norm": 1.5535914886343745, "learning_rate": 0.0001, "loss": 0.693, "mean_abs_error": 107.11311512534273, "mean_abs_error_last_10": 31.52429402574458, "mean_abs_error_last_25": 53.464984517658785, "mean_abs_error_last_50": 82.53898017801282, "mean_pred_prob": 0.06202252227813006, "mean_pred_prob_last_10": 0.2702730931341648, "mean_pred_prob_last_25": 0.1609027598053217, "mean_pred_prob_last_50": 0.10006644148379565, "mean_token_accuracy": 0.8781785666942596, "step": 42620 }, { "epoch": 0.7578262492667057, "grad_norm": 1.054794241937455, "learning_rate": 0.0001, "loss": 0.5681, "mean_abs_error": 186.91250823563598, "mean_abs_error_last_10": 45.715461231589494, "mean_abs_error_last_25": 105.8243676479329, "mean_abs_error_last_50": 179.5850577893575, "mean_pred_prob": 0.06335109798237681, "mean_pred_prob_last_10": 0.22466601952910423, "mean_pred_prob_last_25": 0.14949364345520735, "mean_pred_prob_last_50": 0.0998853704892099, "mean_token_accuracy": 0.8829913079738617, "step": 42630 }, { "epoch": 0.7580040175635078, "grad_norm": 1.498280200343057, "learning_rate": 0.0001, "loss": 0.7541, "mean_abs_error": 1084.3917452807414, "mean_abs_error_last_10": 417.65925434760067, "mean_abs_error_last_25": 506.4851822483373, "mean_abs_error_last_50": 757.4049751504689, "mean_pred_prob": 0.029997684729460163, "mean_pred_prob_last_10": 0.16074924165732227, "mean_pred_prob_last_25": 0.08187547449197155, "mean_pred_prob_last_50": 0.05006852559745312, "mean_token_accuracy": 0.8703588366508483, "step": 42640 }, { "epoch": 0.7581817858603097, "grad_norm": 1.0160934797633374, "learning_rate": 0.0001, "loss": 0.6732, "mean_abs_error": 544.8368629550138, "mean_abs_error_last_10": 227.1790151980152, "mean_abs_error_last_25": 300.037608204396, "mean_abs_error_last_50": 357.7854477870007, "mean_pred_prob": 0.030209444370120762, "mean_pred_prob_last_10": 0.14404549039900302, "mean_pred_prob_last_25": 0.07948676561936736, "mean_pred_prob_last_50": 0.04943335778079927, "mean_token_accuracy": 0.8617922008037567, "step": 42650 }, { "epoch": 0.7583595541571116, "grad_norm": 3.625032605319353, "learning_rate": 0.0001, "loss": 0.6619, "mean_abs_error": 757.0943534642516, "mean_abs_error_last_10": 195.67979117041364, "mean_abs_error_last_25": 388.2816864561596, "mean_abs_error_last_50": 540.5856149358794, "mean_pred_prob": 0.03600180597859435, "mean_pred_prob_last_10": 0.1537103382870555, "mean_pred_prob_last_25": 0.09019608108792454, "mean_pred_prob_last_50": 0.057962995872367175, "mean_token_accuracy": 0.8814608633518219, "step": 42660 }, { "epoch": 0.7585373224539136, "grad_norm": 2.07788666765179, "learning_rate": 0.0001, "loss": 0.7844, "mean_abs_error": 765.4763877014367, "mean_abs_error_last_10": 387.00499941119705, "mean_abs_error_last_25": 439.4848718671277, "mean_abs_error_last_50": 551.2631537653322, "mean_pred_prob": 0.03900151482957881, "mean_pred_prob_last_10": 0.18230136577622033, "mean_pred_prob_last_25": 0.10202805670560337, "mean_pred_prob_last_50": 0.06357346526638138, "mean_token_accuracy": 0.874477881193161, "step": 42670 }, { "epoch": 0.7587150907507155, "grad_norm": 1.1870034516315784, "learning_rate": 0.0001, "loss": 0.7644, "mean_abs_error": 617.3376012315732, "mean_abs_error_last_10": 238.16425389201953, "mean_abs_error_last_25": 314.0705194101696, "mean_abs_error_last_50": 435.2156502395128, "mean_pred_prob": 0.04578196064685471, "mean_pred_prob_last_10": 0.23393729869276286, "mean_pred_prob_last_25": 0.1261035309114959, "mean_pred_prob_last_50": 0.07744439318194055, "mean_token_accuracy": 0.8625653624534607, "step": 42680 }, { "epoch": 0.7588928590475175, "grad_norm": 0.9288505420180166, "learning_rate": 0.0001, "loss": 0.7615, "mean_abs_error": 1002.7904684568597, "mean_abs_error_last_10": 455.8135561960912, "mean_abs_error_last_25": 532.3458337621864, "mean_abs_error_last_50": 684.5960188436555, "mean_pred_prob": 0.03643572029977804, "mean_pred_prob_last_10": 0.173778162713279, "mean_pred_prob_last_25": 0.09847310781187843, "mean_pred_prob_last_50": 0.06042533272993751, "mean_token_accuracy": 0.8771171927452087, "step": 42690 }, { "epoch": 0.7590706273443194, "grad_norm": 2.03891048889021, "learning_rate": 0.0001, "loss": 0.6258, "mean_abs_error": 330.1098465025958, "mean_abs_error_last_10": 108.8472645321701, "mean_abs_error_last_25": 115.68689486979306, "mean_abs_error_last_50": 194.38114984972722, "mean_pred_prob": 0.04567502299323678, "mean_pred_prob_last_10": 0.19930040054023265, "mean_pred_prob_last_25": 0.11612412510439754, "mean_pred_prob_last_50": 0.07437677709385752, "mean_token_accuracy": 0.8686441719532013, "step": 42700 }, { "epoch": 0.7592483956411213, "grad_norm": 2.1429207893504176, "learning_rate": 0.0001, "loss": 0.6572, "mean_abs_error": 292.619327587763, "mean_abs_error_last_10": 39.68084502270837, "mean_abs_error_last_25": 80.56215976254256, "mean_abs_error_last_50": 155.97329494845664, "mean_pred_prob": 0.05237836483865976, "mean_pred_prob_last_10": 0.24351443946361542, "mean_pred_prob_last_25": 0.13927672542631625, "mean_pred_prob_last_50": 0.08645584639161825, "mean_token_accuracy": 0.8720121920108795, "step": 42710 }, { "epoch": 0.7594261639379233, "grad_norm": 2.449880023785947, "learning_rate": 0.0001, "loss": 0.778, "mean_abs_error": 799.0539476002018, "mean_abs_error_last_10": 349.6872156860468, "mean_abs_error_last_25": 451.05181736006153, "mean_abs_error_last_50": 544.2025146671642, "mean_pred_prob": 0.04563504253310384, "mean_pred_prob_last_10": 0.22355653799022548, "mean_pred_prob_last_25": 0.1255157305597095, "mean_pred_prob_last_50": 0.07795501012878958, "mean_token_accuracy": 0.8635978460311889, "step": 42720 }, { "epoch": 0.7596039322347252, "grad_norm": 1.1778473615818794, "learning_rate": 0.0001, "loss": 0.8094, "mean_abs_error": 319.33842026453993, "mean_abs_error_last_10": 82.74597671400548, "mean_abs_error_last_25": 126.67454175029798, "mean_abs_error_last_50": 144.86822202843393, "mean_pred_prob": 0.04168131994083524, "mean_pred_prob_last_10": 0.19327448680996895, "mean_pred_prob_last_25": 0.11255268817767501, "mean_pred_prob_last_50": 0.0696324972435832, "mean_token_accuracy": 0.865456885099411, "step": 42730 }, { "epoch": 0.7597817005315272, "grad_norm": 1.3791880193216854, "learning_rate": 0.0001, "loss": 0.728, "mean_abs_error": 944.7789309442629, "mean_abs_error_last_10": 297.77098495555225, "mean_abs_error_last_25": 413.98199987817054, "mean_abs_error_last_50": 598.7535475994586, "mean_pred_prob": 0.036856029348564336, "mean_pred_prob_last_10": 0.17812486136099323, "mean_pred_prob_last_25": 0.10232959181303158, "mean_pred_prob_last_50": 0.06232744774315506, "mean_token_accuracy": 0.8689571619033813, "step": 42740 }, { "epoch": 0.7599594688283292, "grad_norm": 1.5623020449512175, "learning_rate": 0.0001, "loss": 0.6846, "mean_abs_error": 312.8593648442734, "mean_abs_error_last_10": 162.6170835928811, "mean_abs_error_last_25": 171.13492436200087, "mean_abs_error_last_50": 191.44144796904075, "mean_pred_prob": 0.02733350165653974, "mean_pred_prob_last_10": 0.13762595057487487, "mean_pred_prob_last_25": 0.07545550875365734, "mean_pred_prob_last_50": 0.04608500017784536, "mean_token_accuracy": 0.8796416759490967, "step": 42750 }, { "epoch": 0.7601372371251311, "grad_norm": 2.3142663900988256, "learning_rate": 0.0001, "loss": 0.6858, "mean_abs_error": 281.3712859585997, "mean_abs_error_last_10": 56.80909082471428, "mean_abs_error_last_25": 99.541610756945, "mean_abs_error_last_50": 195.0703194130684, "mean_pred_prob": 0.03192561990581453, "mean_pred_prob_last_10": 0.16333517543971537, "mean_pred_prob_last_25": 0.08824285864830017, "mean_pred_prob_last_50": 0.05396484918892384, "mean_token_accuracy": 0.8717852532863617, "step": 42760 }, { "epoch": 0.7603150054219331, "grad_norm": 1.486926373360757, "learning_rate": 0.0001, "loss": 0.7639, "mean_abs_error": 603.3217175539088, "mean_abs_error_last_10": 301.1187721331861, "mean_abs_error_last_25": 321.9398169963976, "mean_abs_error_last_50": 360.1077667879111, "mean_pred_prob": 0.037365518446313216, "mean_pred_prob_last_10": 0.19216642760438846, "mean_pred_prob_last_25": 0.10415679269935936, "mean_pred_prob_last_50": 0.06329381805844606, "mean_token_accuracy": 0.8717766284942627, "step": 42770 }, { "epoch": 0.760492773718735, "grad_norm": 1.2167357999641653, "learning_rate": 0.0001, "loss": 0.7679, "mean_abs_error": 1163.0672069851391, "mean_abs_error_last_10": 510.8990617751527, "mean_abs_error_last_25": 686.0468790594215, "mean_abs_error_last_50": 857.8189578367188, "mean_pred_prob": 0.03803844276408199, "mean_pred_prob_last_10": 0.17842535377130844, "mean_pred_prob_last_25": 0.10006504543707706, "mean_pred_prob_last_50": 0.06318078517797403, "mean_token_accuracy": 0.8773992478847503, "step": 42780 }, { "epoch": 0.760670542015537, "grad_norm": 1.2804346794882466, "learning_rate": 0.0001, "loss": 0.7726, "mean_abs_error": 1628.092805309959, "mean_abs_error_last_10": 965.5912659971469, "mean_abs_error_last_25": 1009.6381463174903, "mean_abs_error_last_50": 1224.2474295276581, "mean_pred_prob": 0.013513544241141063, "mean_pred_prob_last_10": 0.0721193921228405, "mean_pred_prob_last_25": 0.037676149755134246, "mean_pred_prob_last_50": 0.023039513772528154, "mean_token_accuracy": 0.8654528677463531, "step": 42790 }, { "epoch": 0.7608483103123389, "grad_norm": 1.6301200899138653, "learning_rate": 0.0001, "loss": 0.6618, "mean_abs_error": 120.52673128172842, "mean_abs_error_last_10": 44.02005910617956, "mean_abs_error_last_25": 56.580105571088666, "mean_abs_error_last_50": 84.37735066188941, "mean_pred_prob": 0.052091068401932714, "mean_pred_prob_last_10": 0.24374094530940055, "mean_pred_prob_last_25": 0.13944536000490187, "mean_pred_prob_last_50": 0.08312033377587795, "mean_token_accuracy": 0.8766770899295807, "step": 42800 }, { "epoch": 0.7610260786091408, "grad_norm": 2.0287986649066507, "learning_rate": 0.0001, "loss": 0.7168, "mean_abs_error": 485.78964100351624, "mean_abs_error_last_10": 153.556408850796, "mean_abs_error_last_25": 281.8388021391098, "mean_abs_error_last_50": 339.47872717382023, "mean_pred_prob": 0.02337514830287546, "mean_pred_prob_last_10": 0.11713108737021685, "mean_pred_prob_last_25": 0.06455054553225636, "mean_pred_prob_last_50": 0.03968699523247778, "mean_token_accuracy": 0.8700902104377747, "step": 42810 }, { "epoch": 0.7612038469059428, "grad_norm": 1.8757787009475515, "learning_rate": 0.0001, "loss": 0.7447, "mean_abs_error": 516.2130410919376, "mean_abs_error_last_10": 239.431502926752, "mean_abs_error_last_25": 323.3910139802766, "mean_abs_error_last_50": 411.21074950462435, "mean_pred_prob": 0.04057171531021595, "mean_pred_prob_last_10": 0.20443497388623655, "mean_pred_prob_last_25": 0.1105302459327504, "mean_pred_prob_last_50": 0.0674742862349376, "mean_token_accuracy": 0.8748876869678497, "step": 42820 }, { "epoch": 0.7613816152027447, "grad_norm": 1.1485773257691736, "learning_rate": 0.0001, "loss": 0.7007, "mean_abs_error": 807.5313797936087, "mean_abs_error_last_10": 412.2738032261499, "mean_abs_error_last_25": 512.5794103084825, "mean_abs_error_last_50": 562.456532842264, "mean_pred_prob": 0.013854156935121865, "mean_pred_prob_last_10": 0.07403352714609354, "mean_pred_prob_last_25": 0.038848714204505086, "mean_pred_prob_last_50": 0.023473818553611636, "mean_token_accuracy": 0.8853594541549683, "step": 42830 }, { "epoch": 0.7615593834995467, "grad_norm": 0.7714767062007158, "learning_rate": 0.0001, "loss": 0.6126, "mean_abs_error": 732.9906131610095, "mean_abs_error_last_10": 247.66351224570798, "mean_abs_error_last_25": 272.5283076356577, "mean_abs_error_last_50": 404.3130851181274, "mean_pred_prob": 0.04932540847803466, "mean_pred_prob_last_10": 0.24230931205675005, "mean_pred_prob_last_25": 0.1391616582230199, "mean_pred_prob_last_50": 0.084233698353637, "mean_token_accuracy": 0.873899906873703, "step": 42840 }, { "epoch": 0.7617371517963486, "grad_norm": 1.2857571357616897, "learning_rate": 0.0001, "loss": 0.6964, "mean_abs_error": 180.14870398268963, "mean_abs_error_last_10": 33.84887615192244, "mean_abs_error_last_25": 75.35116259130906, "mean_abs_error_last_50": 95.43510348517883, "mean_pred_prob": 0.05567980301566422, "mean_pred_prob_last_10": 0.2688131883740425, "mean_pred_prob_last_25": 0.14840855356305838, "mean_pred_prob_last_50": 0.09260008810088038, "mean_token_accuracy": 0.8740314304828644, "step": 42850 }, { "epoch": 0.7619149200931505, "grad_norm": 1.8204707405108511, "learning_rate": 0.0001, "loss": 0.6635, "mean_abs_error": 490.53473776539613, "mean_abs_error_last_10": 233.45389287055636, "mean_abs_error_last_25": 361.4584154503315, "mean_abs_error_last_50": 393.4175089950817, "mean_pred_prob": 0.048679404321592304, "mean_pred_prob_last_10": 0.22692711218260228, "mean_pred_prob_last_25": 0.1284186553908512, "mean_pred_prob_last_50": 0.08114546206779778, "mean_token_accuracy": 0.8670105159282684, "step": 42860 }, { "epoch": 0.7620926883899526, "grad_norm": 1.8857360526965619, "learning_rate": 0.0001, "loss": 0.9343, "mean_abs_error": 616.7945832319386, "mean_abs_error_last_10": 276.3521846544978, "mean_abs_error_last_25": 257.43650853419604, "mean_abs_error_last_50": 333.4547291356894, "mean_pred_prob": 0.045291737688239665, "mean_pred_prob_last_10": 0.21443682142416948, "mean_pred_prob_last_25": 0.12282877791440114, "mean_pred_prob_last_50": 0.07568514698650688, "mean_token_accuracy": 0.865199190378189, "step": 42870 }, { "epoch": 0.7622704566867545, "grad_norm": 1.3080039546063744, "learning_rate": 0.0001, "loss": 0.7135, "mean_abs_error": 566.2137363556955, "mean_abs_error_last_10": 168.40005359610586, "mean_abs_error_last_25": 252.61673583244078, "mean_abs_error_last_50": 338.8921918937006, "mean_pred_prob": 0.036702659347793085, "mean_pred_prob_last_10": 0.20071482185740025, "mean_pred_prob_last_25": 0.10678486278047786, "mean_pred_prob_last_50": 0.062419309647521, "mean_token_accuracy": 0.8685319721698761, "step": 42880 }, { "epoch": 0.7624482249835565, "grad_norm": 2.570763291735314, "learning_rate": 0.0001, "loss": 0.6372, "mean_abs_error": 118.81287738872217, "mean_abs_error_last_10": 39.27349658107383, "mean_abs_error_last_25": 43.927950967546664, "mean_abs_error_last_50": 57.54674480455194, "mean_pred_prob": 0.050888323411345485, "mean_pred_prob_last_10": 0.24928647577762603, "mean_pred_prob_last_25": 0.14046594984829425, "mean_pred_prob_last_50": 0.08627147283405065, "mean_token_accuracy": 0.8715509653091431, "step": 42890 }, { "epoch": 0.7626259932803584, "grad_norm": 2.5367409333043787, "learning_rate": 0.0001, "loss": 0.6562, "mean_abs_error": 229.8814233627881, "mean_abs_error_last_10": 81.33520276696157, "mean_abs_error_last_25": 136.1854042749399, "mean_abs_error_last_50": 163.38942380180586, "mean_pred_prob": 0.05930878225481138, "mean_pred_prob_last_10": 0.2776350502157584, "mean_pred_prob_last_25": 0.15586643405258654, "mean_pred_prob_last_50": 0.09695501234382391, "mean_token_accuracy": 0.8780681371688843, "step": 42900 }, { "epoch": 0.7628037615771603, "grad_norm": 1.2824483234875665, "learning_rate": 0.0001, "loss": 0.8502, "mean_abs_error": 452.66434325287116, "mean_abs_error_last_10": 249.1841358795165, "mean_abs_error_last_25": 314.1710202226748, "mean_abs_error_last_50": 379.86044655215784, "mean_pred_prob": 0.041539994528284295, "mean_pred_prob_last_10": 0.20813155276118778, "mean_pred_prob_last_25": 0.11697980907629243, "mean_pred_prob_last_50": 0.070298345037736, "mean_token_accuracy": 0.8644788146018982, "step": 42910 }, { "epoch": 0.7629815298739623, "grad_norm": 2.394506507113541, "learning_rate": 0.0001, "loss": 0.7717, "mean_abs_error": 269.79060273696734, "mean_abs_error_last_10": 131.93903696081486, "mean_abs_error_last_25": 174.91491631792198, "mean_abs_error_last_50": 190.53615621489524, "mean_pred_prob": 0.04959249435923994, "mean_pred_prob_last_10": 0.23599114338867366, "mean_pred_prob_last_25": 0.13422936936840416, "mean_pred_prob_last_50": 0.08238543036859483, "mean_token_accuracy": 0.868234246969223, "step": 42920 }, { "epoch": 0.7631592981707642, "grad_norm": 1.9984516037510227, "learning_rate": 0.0001, "loss": 0.6761, "mean_abs_error": 286.82389222090876, "mean_abs_error_last_10": 112.44120844902106, "mean_abs_error_last_25": 107.98891959553983, "mean_abs_error_last_50": 149.19979130964737, "mean_pred_prob": 0.0445379696553573, "mean_pred_prob_last_10": 0.23716190550476313, "mean_pred_prob_last_25": 0.12876583468168973, "mean_pred_prob_last_50": 0.0760765272192657, "mean_token_accuracy": 0.8790523886680603, "step": 42930 }, { "epoch": 0.7633370664675662, "grad_norm": 2.068668324246929, "learning_rate": 0.0001, "loss": 0.6658, "mean_abs_error": 549.6298511244636, "mean_abs_error_last_10": 272.07105662717, "mean_abs_error_last_25": 296.19508135120105, "mean_abs_error_last_50": 332.0560537618438, "mean_pred_prob": 0.023191378090996296, "mean_pred_prob_last_10": 0.1144898000638932, "mean_pred_prob_last_25": 0.06335222208872437, "mean_pred_prob_last_50": 0.038552506174892184, "mean_token_accuracy": 0.8659279406070709, "step": 42940 }, { "epoch": 0.7635148347643681, "grad_norm": 1.5890011152437573, "learning_rate": 0.0001, "loss": 0.7898, "mean_abs_error": 272.2455209593687, "mean_abs_error_last_10": 64.22718281462129, "mean_abs_error_last_25": 105.53846694580268, "mean_abs_error_last_50": 170.27931123262707, "mean_pred_prob": 0.045742744207382204, "mean_pred_prob_last_10": 0.2290762521326542, "mean_pred_prob_last_25": 0.12603223267942668, "mean_pred_prob_last_50": 0.07588963946327568, "mean_token_accuracy": 0.8685550093650818, "step": 42950 }, { "epoch": 0.76369260306117, "grad_norm": 1.5898744290827462, "learning_rate": 0.0001, "loss": 0.7339, "mean_abs_error": 409.095504891707, "mean_abs_error_last_10": 265.28023487812686, "mean_abs_error_last_25": 243.60736092103107, "mean_abs_error_last_50": 256.868674128543, "mean_pred_prob": 0.04553223071852699, "mean_pred_prob_last_10": 0.22651972506428136, "mean_pred_prob_last_25": 0.12496914369985461, "mean_pred_prob_last_50": 0.07730395894031972, "mean_token_accuracy": 0.8732440710067749, "step": 42960 }, { "epoch": 0.763870371357972, "grad_norm": 1.1573637421815655, "learning_rate": 0.0001, "loss": 0.6956, "mean_abs_error": 522.7772209961428, "mean_abs_error_last_10": 240.1564102711856, "mean_abs_error_last_25": 344.69686511426636, "mean_abs_error_last_50": 402.959297429579, "mean_pred_prob": 0.033460228453623134, "mean_pred_prob_last_10": 0.18430628252681344, "mean_pred_prob_last_25": 0.09585167486220598, "mean_pred_prob_last_50": 0.05618974837707356, "mean_token_accuracy": 0.8823812127113342, "step": 42970 }, { "epoch": 0.7640481396547739, "grad_norm": 3.4529558676846994, "learning_rate": 0.0001, "loss": 0.6976, "mean_abs_error": 935.5651913569061, "mean_abs_error_last_10": 518.4541810725062, "mean_abs_error_last_25": 529.7849837966623, "mean_abs_error_last_50": 637.9989787762039, "mean_pred_prob": 0.04811457683972549, "mean_pred_prob_last_10": 0.2201981263642665, "mean_pred_prob_last_25": 0.12012567633355502, "mean_pred_prob_last_50": 0.07674254672019742, "mean_token_accuracy": 0.8676682651042938, "step": 42980 }, { "epoch": 0.764225907951576, "grad_norm": 1.8769695874760584, "learning_rate": 0.0001, "loss": 0.7308, "mean_abs_error": 397.6492814517611, "mean_abs_error_last_10": 245.79776674343006, "mean_abs_error_last_25": 215.17176864729313, "mean_abs_error_last_50": 250.6547890399714, "mean_pred_prob": 0.04294331350829452, "mean_pred_prob_last_10": 0.1809941265033558, "mean_pred_prob_last_25": 0.10688411032315344, "mean_pred_prob_last_50": 0.0697006911272183, "mean_token_accuracy": 0.8799808859825134, "step": 42990 }, { "epoch": 0.7644036762483779, "grad_norm": 2.0745146235068277, "learning_rate": 0.0001, "loss": 0.6412, "mean_abs_error": 333.8137995128938, "mean_abs_error_last_10": 45.5406731571632, "mean_abs_error_last_25": 76.18029001677016, "mean_abs_error_last_50": 155.32413849806352, "mean_pred_prob": 0.030476158880628646, "mean_pred_prob_last_10": 0.16342706847935914, "mean_pred_prob_last_25": 0.08998429886996746, "mean_pred_prob_last_50": 0.05448954571038485, "mean_token_accuracy": 0.8872572183609009, "step": 43000 }, { "epoch": 0.7645814445451798, "grad_norm": 1.9679855997628026, "learning_rate": 0.0001, "loss": 0.666, "mean_abs_error": 602.171262069811, "mean_abs_error_last_10": 267.34767500263786, "mean_abs_error_last_25": 421.78556295522867, "mean_abs_error_last_50": 477.8157028922342, "mean_pred_prob": 0.0432126010506181, "mean_pred_prob_last_10": 0.2150739451753907, "mean_pred_prob_last_25": 0.12016084846691229, "mean_pred_prob_last_50": 0.07237922556232661, "mean_token_accuracy": 0.8692938566207886, "step": 43010 }, { "epoch": 0.7647592128419818, "grad_norm": 1.3779802439663673, "learning_rate": 0.0001, "loss": 0.5979, "mean_abs_error": 184.47708147887624, "mean_abs_error_last_10": 35.93892410332249, "mean_abs_error_last_25": 59.27200037216867, "mean_abs_error_last_50": 97.72511861890166, "mean_pred_prob": 0.045922596799209715, "mean_pred_prob_last_10": 0.2136787936091423, "mean_pred_prob_last_25": 0.12227177619934082, "mean_pred_prob_last_50": 0.07599280411377549, "mean_token_accuracy": 0.8834059655666351, "step": 43020 }, { "epoch": 0.7649369811387837, "grad_norm": 1.0192296466390232, "learning_rate": 0.0001, "loss": 0.7548, "mean_abs_error": 575.0275892631406, "mean_abs_error_last_10": 114.06095405862229, "mean_abs_error_last_25": 191.2020411760731, "mean_abs_error_last_50": 334.47886858760694, "mean_pred_prob": 0.03020692865829915, "mean_pred_prob_last_10": 0.16435054432367907, "mean_pred_prob_last_25": 0.08689609846333042, "mean_pred_prob_last_50": 0.050716496916720644, "mean_token_accuracy": 0.8800638139247894, "step": 43030 }, { "epoch": 0.7651147494355857, "grad_norm": 2.0539243013978536, "learning_rate": 0.0001, "loss": 0.6755, "mean_abs_error": 477.73798092183944, "mean_abs_error_last_10": 79.77371957548226, "mean_abs_error_last_25": 121.8918797676586, "mean_abs_error_last_50": 242.18752918241535, "mean_pred_prob": 0.04595279537606985, "mean_pred_prob_last_10": 0.224972559325397, "mean_pred_prob_last_25": 0.12743744449689984, "mean_pred_prob_last_50": 0.07900431551970541, "mean_token_accuracy": 0.8846216857433319, "step": 43040 }, { "epoch": 0.7652925177323876, "grad_norm": 1.8716825846836558, "learning_rate": 0.0001, "loss": 0.7146, "mean_abs_error": 830.1931469071667, "mean_abs_error_last_10": 472.1572458190602, "mean_abs_error_last_25": 537.537582617638, "mean_abs_error_last_50": 653.0611432569883, "mean_pred_prob": 0.04486944614618551, "mean_pred_prob_last_10": 0.21396115954266862, "mean_pred_prob_last_25": 0.11980257334944326, "mean_pred_prob_last_50": 0.07450419058441185, "mean_token_accuracy": 0.867380028963089, "step": 43050 }, { "epoch": 0.7654702860291895, "grad_norm": 3.3388190553068378, "learning_rate": 0.0001, "loss": 0.5842, "mean_abs_error": 314.9767731222615, "mean_abs_error_last_10": 124.07788754628352, "mean_abs_error_last_25": 138.47546755787238, "mean_abs_error_last_50": 191.96438111070668, "mean_pred_prob": 0.0439292200608179, "mean_pred_prob_last_10": 0.22156150303781033, "mean_pred_prob_last_25": 0.12362609999254345, "mean_pred_prob_last_50": 0.07441069227643311, "mean_token_accuracy": 0.8757641732692718, "step": 43060 }, { "epoch": 0.7656480543259915, "grad_norm": 1.4518560710641586, "learning_rate": 0.0001, "loss": 0.6351, "mean_abs_error": 1248.037084759607, "mean_abs_error_last_10": 776.3665760909081, "mean_abs_error_last_25": 846.5326618775364, "mean_abs_error_last_50": 992.141685115196, "mean_pred_prob": 0.022295864089392124, "mean_pred_prob_last_10": 0.12359756285586627, "mean_pred_prob_last_25": 0.0653795790130971, "mean_pred_prob_last_50": 0.038851145710214044, "mean_token_accuracy": 0.8770124793052674, "step": 43070 }, { "epoch": 0.7658258226227934, "grad_norm": 1.8397939812750068, "learning_rate": 0.0001, "loss": 0.5459, "mean_abs_error": 371.0226173222082, "mean_abs_error_last_10": 46.16967273670256, "mean_abs_error_last_25": 83.76953473204938, "mean_abs_error_last_50": 174.91511508059972, "mean_pred_prob": 0.05517515253741294, "mean_pred_prob_last_10": 0.2562232181429863, "mean_pred_prob_last_25": 0.1495795856229961, "mean_pred_prob_last_50": 0.09370761215686799, "mean_token_accuracy": 0.8848819196224212, "step": 43080 }, { "epoch": 0.7660035909195954, "grad_norm": 1.8826631972269106, "learning_rate": 0.0001, "loss": 0.7003, "mean_abs_error": 670.0854881837553, "mean_abs_error_last_10": 205.43598995477532, "mean_abs_error_last_25": 239.2113714437885, "mean_abs_error_last_50": 398.93392686978206, "mean_pred_prob": 0.04091142532415688, "mean_pred_prob_last_10": 0.19496048537548633, "mean_pred_prob_last_25": 0.10875007524155081, "mean_pred_prob_last_50": 0.06769088476430625, "mean_token_accuracy": 0.8738707184791565, "step": 43090 }, { "epoch": 0.7661813592163973, "grad_norm": 1.858991345266767, "learning_rate": 0.0001, "loss": 0.8167, "mean_abs_error": 796.2385954775739, "mean_abs_error_last_10": 208.4168354189792, "mean_abs_error_last_25": 293.9966415545307, "mean_abs_error_last_50": 489.6949871791133, "mean_pred_prob": 0.03710734284832142, "mean_pred_prob_last_10": 0.16729376756120473, "mean_pred_prob_last_25": 0.09678932125680148, "mean_pred_prob_last_50": 0.061526974738808346, "mean_token_accuracy": 0.863806688785553, "step": 43100 }, { "epoch": 0.7663591275131993, "grad_norm": 1.8066302508162373, "learning_rate": 0.0001, "loss": 0.6738, "mean_abs_error": 597.3508335930625, "mean_abs_error_last_10": 164.10703554012093, "mean_abs_error_last_25": 225.49569010576678, "mean_abs_error_last_50": 338.45759521015265, "mean_pred_prob": 0.045157002937048676, "mean_pred_prob_last_10": 0.204222956544254, "mean_pred_prob_last_25": 0.11944992641219869, "mean_pred_prob_last_50": 0.07325667267432437, "mean_token_accuracy": 0.86964630484581, "step": 43110 }, { "epoch": 0.7665368958100013, "grad_norm": 0.8728178845308214, "learning_rate": 0.0001, "loss": 0.8337, "mean_abs_error": 1058.18940770812, "mean_abs_error_last_10": 527.944032937379, "mean_abs_error_last_25": 588.2925297537421, "mean_abs_error_last_50": 725.7927776495684, "mean_pred_prob": 0.03867123598611215, "mean_pred_prob_last_10": 0.20605863435775973, "mean_pred_prob_last_25": 0.10750546145718545, "mean_pred_prob_last_50": 0.06484347881487337, "mean_token_accuracy": 0.8753463447093963, "step": 43120 }, { "epoch": 0.7667146641068032, "grad_norm": 1.3946110040693924, "learning_rate": 0.0001, "loss": 0.5725, "mean_abs_error": 338.3556874604819, "mean_abs_error_last_10": 135.6305710826662, "mean_abs_error_last_25": 151.70996112429805, "mean_abs_error_last_50": 218.48824489111718, "mean_pred_prob": 0.06453000174369664, "mean_pred_prob_last_10": 0.2511221442837268, "mean_pred_prob_last_25": 0.15650329194031656, "mean_pred_prob_last_50": 0.10332361886976286, "mean_token_accuracy": 0.8814705193042756, "step": 43130 }, { "epoch": 0.7668924324036052, "grad_norm": 1.5141081510789738, "learning_rate": 0.0001, "loss": 0.6619, "mean_abs_error": 166.80408399560923, "mean_abs_error_last_10": 47.755462321514116, "mean_abs_error_last_25": 61.06518766575217, "mean_abs_error_last_50": 87.11202646432615, "mean_pred_prob": 0.04820036794990301, "mean_pred_prob_last_10": 0.2231625460088253, "mean_pred_prob_last_25": 0.13245576489716768, "mean_pred_prob_last_50": 0.08232635725289583, "mean_token_accuracy": 0.87642040848732, "step": 43140 }, { "epoch": 0.7670702007004071, "grad_norm": 2.502482349424724, "learning_rate": 0.0001, "loss": 0.6885, "mean_abs_error": 119.45089649250053, "mean_abs_error_last_10": 22.35737878181126, "mean_abs_error_last_25": 32.834376328887686, "mean_abs_error_last_50": 46.92293791146243, "mean_pred_prob": 0.06795746386051178, "mean_pred_prob_last_10": 0.31134285889565944, "mean_pred_prob_last_25": 0.17959684301167728, "mean_pred_prob_last_50": 0.11246472774073482, "mean_token_accuracy": 0.8705994129180908, "step": 43150 }, { "epoch": 0.767247968997209, "grad_norm": 1.9725093404814253, "learning_rate": 0.0001, "loss": 0.7631, "mean_abs_error": 284.6782163913374, "mean_abs_error_last_10": 106.37223944852317, "mean_abs_error_last_25": 128.91519036551557, "mean_abs_error_last_50": 199.94806036134258, "mean_pred_prob": 0.032657048432156444, "mean_pred_prob_last_10": 0.1625521583482623, "mean_pred_prob_last_25": 0.08748932024464011, "mean_pred_prob_last_50": 0.053850222006440164, "mean_token_accuracy": 0.8563326239585877, "step": 43160 }, { "epoch": 0.767425737294011, "grad_norm": 1.4563914267227787, "learning_rate": 0.0001, "loss": 0.7098, "mean_abs_error": 897.0049308381747, "mean_abs_error_last_10": 491.2172962319108, "mean_abs_error_last_25": 570.2800069614934, "mean_abs_error_last_50": 669.5236567980016, "mean_pred_prob": 0.023253655881853776, "mean_pred_prob_last_10": 0.11527987128356472, "mean_pred_prob_last_25": 0.06300596013315954, "mean_pred_prob_last_50": 0.03860094216652214, "mean_token_accuracy": 0.8631658971309661, "step": 43170 }, { "epoch": 0.7676035055908129, "grad_norm": 3.403357523044466, "learning_rate": 0.0001, "loss": 0.6906, "mean_abs_error": 818.6743022789169, "mean_abs_error_last_10": 315.60038247582054, "mean_abs_error_last_25": 321.96938976116553, "mean_abs_error_last_50": 478.5685297598069, "mean_pred_prob": 0.032004654727643356, "mean_pred_prob_last_10": 0.14994185928953813, "mean_pred_prob_last_25": 0.08600640233489684, "mean_pred_prob_last_50": 0.05329296740237623, "mean_token_accuracy": 0.8718173325061798, "step": 43180 }, { "epoch": 0.7677812738876149, "grad_norm": 3.107786820283394, "learning_rate": 0.0001, "loss": 0.7091, "mean_abs_error": 643.8416005795249, "mean_abs_error_last_10": 354.105836795837, "mean_abs_error_last_25": 404.01108828572563, "mean_abs_error_last_50": 479.05339165896055, "mean_pred_prob": 0.04356463913281914, "mean_pred_prob_last_10": 0.2133233025611844, "mean_pred_prob_last_25": 0.12249876219430007, "mean_pred_prob_last_50": 0.07321535652154125, "mean_token_accuracy": 0.872965544462204, "step": 43190 }, { "epoch": 0.7679590421844168, "grad_norm": 1.1155079925993132, "learning_rate": 0.0001, "loss": 0.671, "mean_abs_error": 135.8927858529068, "mean_abs_error_last_10": 41.03481557711963, "mean_abs_error_last_25": 62.68227870857388, "mean_abs_error_last_50": 86.69489955393621, "mean_pred_prob": 0.046877911500632766, "mean_pred_prob_last_10": 0.22629865109920502, "mean_pred_prob_last_25": 0.12568943202495575, "mean_pred_prob_last_50": 0.07826923485845327, "mean_token_accuracy": 0.8735768318176269, "step": 43200 }, { "epoch": 0.7681368104812187, "grad_norm": 1.669579525209196, "learning_rate": 0.0001, "loss": 0.8173, "mean_abs_error": 1395.4682126964938, "mean_abs_error_last_10": 831.3969137149476, "mean_abs_error_last_25": 844.8163778122323, "mean_abs_error_last_50": 1002.3196438076463, "mean_pred_prob": 0.030120279420225417, "mean_pred_prob_last_10": 0.14803303569206036, "mean_pred_prob_last_25": 0.08616521505027777, "mean_pred_prob_last_50": 0.051399663895426786, "mean_token_accuracy": 0.8627020001411438, "step": 43210 }, { "epoch": 0.7683145787780207, "grad_norm": 1.0848037600570302, "learning_rate": 0.0001, "loss": 0.661, "mean_abs_error": 529.846312451024, "mean_abs_error_last_10": 145.00275583800612, "mean_abs_error_last_25": 235.9426255703754, "mean_abs_error_last_50": 390.5838568607206, "mean_pred_prob": 0.025171793857589365, "mean_pred_prob_last_10": 0.14148743115365506, "mean_pred_prob_last_25": 0.07432950399816037, "mean_pred_prob_last_50": 0.043694398878142235, "mean_token_accuracy": 0.8797486603260041, "step": 43220 }, { "epoch": 0.7684923470748227, "grad_norm": 2.5707456454491515, "learning_rate": 0.0001, "loss": 0.6296, "mean_abs_error": 766.1895596803708, "mean_abs_error_last_10": 236.56302120637378, "mean_abs_error_last_25": 298.64913338023143, "mean_abs_error_last_50": 460.6397999502519, "mean_pred_prob": 0.034535644273273644, "mean_pred_prob_last_10": 0.14485018285922707, "mean_pred_prob_last_25": 0.08404757998650894, "mean_pred_prob_last_50": 0.05564372717635706, "mean_token_accuracy": 0.8724278748035431, "step": 43230 }, { "epoch": 0.7686701153716247, "grad_norm": 1.6238262295409829, "learning_rate": 0.0001, "loss": 0.8352, "mean_abs_error": 190.4271150008291, "mean_abs_error_last_10": 62.26452354463849, "mean_abs_error_last_25": 66.26794181267863, "mean_abs_error_last_50": 105.68085230008077, "mean_pred_prob": 0.054302584705874325, "mean_pred_prob_last_10": 0.26273629255592823, "mean_pred_prob_last_25": 0.14873399389907718, "mean_pred_prob_last_50": 0.09063822729513049, "mean_token_accuracy": 0.8723559975624084, "step": 43240 }, { "epoch": 0.7688478836684266, "grad_norm": 1.5615462734406653, "learning_rate": 0.0001, "loss": 0.6821, "mean_abs_error": 364.30891495690474, "mean_abs_error_last_10": 196.99507562575587, "mean_abs_error_last_25": 238.77322438727484, "mean_abs_error_last_50": 293.1312062741181, "mean_pred_prob": 0.03326992504298687, "mean_pred_prob_last_10": 0.1633101728744805, "mean_pred_prob_last_25": 0.09195866915397347, "mean_pred_prob_last_50": 0.05579152710270137, "mean_token_accuracy": 0.8671492040157318, "step": 43250 }, { "epoch": 0.7690256519652285, "grad_norm": 1.0782059480913955, "learning_rate": 0.0001, "loss": 0.718, "mean_abs_error": 642.4803307823662, "mean_abs_error_last_10": 207.98394222530632, "mean_abs_error_last_25": 273.84846772640265, "mean_abs_error_last_50": 385.19821031028823, "mean_pred_prob": 0.04028661818301771, "mean_pred_prob_last_10": 0.19314704582793638, "mean_pred_prob_last_25": 0.10923206676379778, "mean_pred_prob_last_50": 0.06696822284138762, "mean_token_accuracy": 0.8714286267757416, "step": 43260 }, { "epoch": 0.7692034202620305, "grad_norm": 2.3029948829281284, "learning_rate": 0.0001, "loss": 0.7525, "mean_abs_error": 385.1357774216843, "mean_abs_error_last_10": 225.6199826153332, "mean_abs_error_last_25": 303.05366612699834, "mean_abs_error_last_50": 276.8265219057486, "mean_pred_prob": 0.05060402366798371, "mean_pred_prob_last_10": 0.2432322595268488, "mean_pred_prob_last_25": 0.1386377197690308, "mean_pred_prob_last_50": 0.08555663507431746, "mean_token_accuracy": 0.8690137386322021, "step": 43270 }, { "epoch": 0.7693811885588324, "grad_norm": 1.6502229671110245, "learning_rate": 0.0001, "loss": 0.6566, "mean_abs_error": 683.2916543607868, "mean_abs_error_last_10": 256.8719055698617, "mean_abs_error_last_25": 310.79125963106355, "mean_abs_error_last_50": 408.89005557705565, "mean_pred_prob": 0.031308347557205705, "mean_pred_prob_last_10": 0.1443306343862787, "mean_pred_prob_last_25": 0.08362474431050941, "mean_pred_prob_last_50": 0.051812892605084926, "mean_token_accuracy": 0.8776038229465485, "step": 43280 }, { "epoch": 0.7695589568556344, "grad_norm": 2.740163966431539, "learning_rate": 0.0001, "loss": 0.657, "mean_abs_error": 807.6016309389777, "mean_abs_error_last_10": 314.13704285766056, "mean_abs_error_last_25": 408.6855831691058, "mean_abs_error_last_50": 491.52072795786734, "mean_pred_prob": 0.03736699880391825, "mean_pred_prob_last_10": 0.19767994895810262, "mean_pred_prob_last_25": 0.10642679315642453, "mean_pred_prob_last_50": 0.06370267171878367, "mean_token_accuracy": 0.8750350117683411, "step": 43290 }, { "epoch": 0.7697367251524363, "grad_norm": 1.2975624259897085, "learning_rate": 0.0001, "loss": 0.7637, "mean_abs_error": 893.3411208305604, "mean_abs_error_last_10": 452.18213776429974, "mean_abs_error_last_25": 535.5506994177688, "mean_abs_error_last_50": 651.2759458241561, "mean_pred_prob": 0.049979044565407096, "mean_pred_prob_last_10": 0.2210267165559344, "mean_pred_prob_last_25": 0.1294089420058299, "mean_pred_prob_last_50": 0.07989268760720733, "mean_token_accuracy": 0.8716239273548126, "step": 43300 }, { "epoch": 0.7699144934492382, "grad_norm": 2.7044054298799667, "learning_rate": 0.0001, "loss": 0.948, "mean_abs_error": 780.0905991796983, "mean_abs_error_last_10": 338.14756108976917, "mean_abs_error_last_25": 429.9577309314677, "mean_abs_error_last_50": 586.211019164146, "mean_pred_prob": 0.03395884335914161, "mean_pred_prob_last_10": 0.16457673896802588, "mean_pred_prob_last_25": 0.09286727883736604, "mean_pred_prob_last_50": 0.057122707896633075, "mean_token_accuracy": 0.8567820370197297, "step": 43310 }, { "epoch": 0.7700922617460402, "grad_norm": 2.1944462949399717, "learning_rate": 0.0001, "loss": 0.7084, "mean_abs_error": 1401.731158157303, "mean_abs_error_last_10": 545.3644317703562, "mean_abs_error_last_25": 770.018210329679, "mean_abs_error_last_50": 1030.4883255511727, "mean_pred_prob": 0.02442924206261523, "mean_pred_prob_last_10": 0.13781383437744807, "mean_pred_prob_last_25": 0.0680011984630255, "mean_pred_prob_last_50": 0.041022024318226616, "mean_token_accuracy": 0.8721756637096405, "step": 43320 }, { "epoch": 0.7702700300428421, "grad_norm": 1.2729864994054692, "learning_rate": 0.0001, "loss": 0.677, "mean_abs_error": 468.467135660194, "mean_abs_error_last_10": 261.32430751900534, "mean_abs_error_last_25": 250.88263100976764, "mean_abs_error_last_50": 298.30312143087735, "mean_pred_prob": 0.03816886054701172, "mean_pred_prob_last_10": 0.1856307357957121, "mean_pred_prob_last_25": 0.1030898368335329, "mean_pred_prob_last_50": 0.06360919935395941, "mean_token_accuracy": 0.8768405437469482, "step": 43330 }, { "epoch": 0.7704477983396442, "grad_norm": 1.5677696069144127, "learning_rate": 0.0001, "loss": 0.6324, "mean_abs_error": 357.6289243077161, "mean_abs_error_last_10": 284.1116774829687, "mean_abs_error_last_25": 239.99352265678903, "mean_abs_error_last_50": 244.88238581301653, "mean_pred_prob": 0.07737748011713848, "mean_pred_prob_last_10": 0.31245406945236026, "mean_pred_prob_last_25": 0.19222079541068524, "mean_pred_prob_last_50": 0.1272578668082133, "mean_token_accuracy": 0.8724265992641449, "step": 43340 }, { "epoch": 0.7706255666364461, "grad_norm": 1.2523725450585934, "learning_rate": 0.0001, "loss": 0.8008, "mean_abs_error": 157.69164182821166, "mean_abs_error_last_10": 45.652879167761704, "mean_abs_error_last_25": 58.860827107121644, "mean_abs_error_last_50": 108.07795777703753, "mean_pred_prob": 0.05176115892827511, "mean_pred_prob_last_10": 0.2567249067127705, "mean_pred_prob_last_25": 0.1447651993483305, "mean_pred_prob_last_50": 0.08658974636346102, "mean_token_accuracy": 0.8763634562492371, "step": 43350 }, { "epoch": 0.770803334933248, "grad_norm": 2.131803238993105, "learning_rate": 0.0001, "loss": 0.739, "mean_abs_error": 343.6824537902126, "mean_abs_error_last_10": 101.5317573566035, "mean_abs_error_last_25": 167.78535082241524, "mean_abs_error_last_50": 222.67523655057713, "mean_pred_prob": 0.04064662016462535, "mean_pred_prob_last_10": 0.20284611992537976, "mean_pred_prob_last_25": 0.109640609100461, "mean_pred_prob_last_50": 0.06777622452937067, "mean_token_accuracy": 0.8682098507881164, "step": 43360 }, { "epoch": 0.77098110323005, "grad_norm": 2.2686583340578634, "learning_rate": 0.0001, "loss": 0.7239, "mean_abs_error": 838.7647573100128, "mean_abs_error_last_10": 298.98152803673025, "mean_abs_error_last_25": 367.2938586462137, "mean_abs_error_last_50": 537.5215076394154, "mean_pred_prob": 0.03656055269530043, "mean_pred_prob_last_10": 0.19035936982254498, "mean_pred_prob_last_25": 0.1042683640087489, "mean_pred_prob_last_50": 0.06302845476893708, "mean_token_accuracy": 0.8801862001419067, "step": 43370 }, { "epoch": 0.7711588715268519, "grad_norm": 2.1993009373269934, "learning_rate": 0.0001, "loss": 0.6963, "mean_abs_error": 1510.9436107539368, "mean_abs_error_last_10": 592.0909367402569, "mean_abs_error_last_25": 799.8613243677081, "mean_abs_error_last_50": 1097.7884619462272, "mean_pred_prob": 0.024805294661200607, "mean_pred_prob_last_10": 0.12276005302555859, "mean_pred_prob_last_25": 0.064961438375758, "mean_pred_prob_last_50": 0.04057394337432925, "mean_token_accuracy": 0.8753419101238251, "step": 43380 }, { "epoch": 0.7713366398236539, "grad_norm": 1.782142105472789, "learning_rate": 0.0001, "loss": 0.7065, "mean_abs_error": 433.7674382582404, "mean_abs_error_last_10": 172.63770601361938, "mean_abs_error_last_25": 280.4444224999724, "mean_abs_error_last_50": 375.94676876686765, "mean_pred_prob": 0.03239585410337895, "mean_pred_prob_last_10": 0.15520195960998534, "mean_pred_prob_last_25": 0.08678758852183818, "mean_pred_prob_last_50": 0.053252062480896714, "mean_token_accuracy": 0.8837361097335815, "step": 43390 }, { "epoch": 0.7715144081204558, "grad_norm": 1.623691716014941, "learning_rate": 0.0001, "loss": 0.7371, "mean_abs_error": 644.9565451870346, "mean_abs_error_last_10": 328.64911706194187, "mean_abs_error_last_25": 463.0881327650469, "mean_abs_error_last_50": 497.0207288818668, "mean_pred_prob": 0.044185127297532746, "mean_pred_prob_last_10": 0.2195503393013496, "mean_pred_prob_last_25": 0.12498086648993194, "mean_pred_prob_last_50": 0.0751261070894543, "mean_token_accuracy": 0.8651374280452728, "step": 43400 }, { "epoch": 0.7716921764172577, "grad_norm": 1.3479117180328748, "learning_rate": 0.0001, "loss": 0.7556, "mean_abs_error": 157.97702129688204, "mean_abs_error_last_10": 66.07479225147596, "mean_abs_error_last_25": 69.54961504295537, "mean_abs_error_last_50": 87.25249396432937, "mean_pred_prob": 0.0569922466063872, "mean_pred_prob_last_10": 0.2579731633886695, "mean_pred_prob_last_25": 0.1492016339674592, "mean_pred_prob_last_50": 0.09400220303796232, "mean_token_accuracy": 0.8645098268985748, "step": 43410 }, { "epoch": 0.7718699447140597, "grad_norm": 0.9869357062904096, "learning_rate": 0.0001, "loss": 0.5915, "mean_abs_error": 366.12424977502053, "mean_abs_error_last_10": 193.9125530033172, "mean_abs_error_last_25": 215.90104348097984, "mean_abs_error_last_50": 241.21103646665324, "mean_pred_prob": 0.04675279540824704, "mean_pred_prob_last_10": 0.2270266889827326, "mean_pred_prob_last_25": 0.12692090875934808, "mean_pred_prob_last_50": 0.07829414677107707, "mean_token_accuracy": 0.8771523118019104, "step": 43420 }, { "epoch": 0.7720477130108616, "grad_norm": 1.763465718928949, "learning_rate": 0.0001, "loss": 0.8656, "mean_abs_error": 1318.9789216553258, "mean_abs_error_last_10": 536.0308678174345, "mean_abs_error_last_25": 592.7207610449448, "mean_abs_error_last_50": 810.2586501085859, "mean_pred_prob": 0.015737017121864483, "mean_pred_prob_last_10": 0.0834527660976164, "mean_pred_prob_last_25": 0.044472436830983494, "mean_pred_prob_last_50": 0.026656441716477276, "mean_token_accuracy": 0.8679916560649872, "step": 43430 }, { "epoch": 0.7722254813076636, "grad_norm": 1.5273283980058985, "learning_rate": 0.0001, "loss": 0.7174, "mean_abs_error": 963.8373517165874, "mean_abs_error_last_10": 443.8774295467386, "mean_abs_error_last_25": 564.7654421293618, "mean_abs_error_last_50": 677.1717346439169, "mean_pred_prob": 0.020675417280290275, "mean_pred_prob_last_10": 0.10736143933027051, "mean_pred_prob_last_25": 0.05654886798001826, "mean_pred_prob_last_50": 0.03488824765663594, "mean_token_accuracy": 0.8768959522247315, "step": 43440 }, { "epoch": 0.7724032496044655, "grad_norm": 1.6838363341375195, "learning_rate": 0.0001, "loss": 0.7355, "mean_abs_error": 356.7636736410863, "mean_abs_error_last_10": 104.1461155340991, "mean_abs_error_last_25": 167.16419018529265, "mean_abs_error_last_50": 241.5829328237897, "mean_pred_prob": 0.05639906843425706, "mean_pred_prob_last_10": 0.2657623692415655, "mean_pred_prob_last_25": 0.1534413259010762, "mean_pred_prob_last_50": 0.09532576908823102, "mean_token_accuracy": 0.8692495286464691, "step": 43450 }, { "epoch": 0.7725810179012675, "grad_norm": 2.6153246918090405, "learning_rate": 0.0001, "loss": 0.7708, "mean_abs_error": 390.879426102927, "mean_abs_error_last_10": 89.0581667792727, "mean_abs_error_last_25": 123.89338660299468, "mean_abs_error_last_50": 192.65916734800734, "mean_pred_prob": 0.03842905219644308, "mean_pred_prob_last_10": 0.17969177812337875, "mean_pred_prob_last_25": 0.09929796382784843, "mean_pred_prob_last_50": 0.06229362371377647, "mean_token_accuracy": 0.8716466128826141, "step": 43460 }, { "epoch": 0.7727587861980695, "grad_norm": 2.179227408243703, "learning_rate": 0.0001, "loss": 0.7714, "mean_abs_error": 657.7416995696424, "mean_abs_error_last_10": 246.71372912237058, "mean_abs_error_last_25": 338.6674690376082, "mean_abs_error_last_50": 504.05470982344275, "mean_pred_prob": 0.04538462603231892, "mean_pred_prob_last_10": 0.22935291834292001, "mean_pred_prob_last_25": 0.12394589929026552, "mean_pred_prob_last_50": 0.07600125249009579, "mean_token_accuracy": 0.8743020117282867, "step": 43470 }, { "epoch": 0.7729365544948714, "grad_norm": 1.0864682122550184, "learning_rate": 0.0001, "loss": 0.881, "mean_abs_error": 621.8831618619344, "mean_abs_error_last_10": 285.2647736581732, "mean_abs_error_last_25": 303.063253933675, "mean_abs_error_last_50": 443.21768608685386, "mean_pred_prob": 0.040650079882470894, "mean_pred_prob_last_10": 0.18669359923806042, "mean_pred_prob_last_25": 0.10548514841939323, "mean_pred_prob_last_50": 0.06728699541999958, "mean_token_accuracy": 0.8653947949409485, "step": 43480 }, { "epoch": 0.7731143227916734, "grad_norm": 2.3917981000983044, "learning_rate": 0.0001, "loss": 0.6984, "mean_abs_error": 380.2984981179928, "mean_abs_error_last_10": 148.9565863615797, "mean_abs_error_last_25": 184.79020725161655, "mean_abs_error_last_50": 248.21006627961506, "mean_pred_prob": 0.037777182483114305, "mean_pred_prob_last_10": 0.191022396273911, "mean_pred_prob_last_25": 0.10402599023655057, "mean_pred_prob_last_50": 0.06255940161645412, "mean_token_accuracy": 0.870656567811966, "step": 43490 }, { "epoch": 0.7732920910884753, "grad_norm": 2.8398023070139526, "learning_rate": 0.0001, "loss": 0.7494, "mean_abs_error": 493.52705703266037, "mean_abs_error_last_10": 93.50611415297311, "mean_abs_error_last_25": 187.80949572202246, "mean_abs_error_last_50": 288.9323588665702, "mean_pred_prob": 0.026838229037821293, "mean_pred_prob_last_10": 0.1366190414875746, "mean_pred_prob_last_25": 0.07431443445384503, "mean_pred_prob_last_50": 0.043869066424667834, "mean_token_accuracy": 0.8729504346847534, "step": 43500 }, { "epoch": 0.7734698593852772, "grad_norm": 1.5798619999291696, "learning_rate": 0.0001, "loss": 0.6954, "mean_abs_error": 263.1349282585593, "mean_abs_error_last_10": 56.66612629097506, "mean_abs_error_last_25": 62.51624481284729, "mean_abs_error_last_50": 105.12555766440983, "mean_pred_prob": 0.06424930007196963, "mean_pred_prob_last_10": 0.27370581272989514, "mean_pred_prob_last_25": 0.16246691793203355, "mean_pred_prob_last_50": 0.10466783451847732, "mean_token_accuracy": 0.8721863150596618, "step": 43510 }, { "epoch": 0.7736476276820792, "grad_norm": 1.5059175188078313, "learning_rate": 0.0001, "loss": 0.9519, "mean_abs_error": 158.7908128988737, "mean_abs_error_last_10": 26.320196199308118, "mean_abs_error_last_25": 65.30168974418994, "mean_abs_error_last_50": 96.09940716199861, "mean_pred_prob": 0.051307656709104775, "mean_pred_prob_last_10": 0.2566014889627695, "mean_pred_prob_last_25": 0.14262505378574133, "mean_pred_prob_last_50": 0.08596227951347828, "mean_token_accuracy": 0.8684118092060089, "step": 43520 }, { "epoch": 0.7738253959788811, "grad_norm": 2.1517324600277226, "learning_rate": 0.0001, "loss": 0.6392, "mean_abs_error": 599.8538844497547, "mean_abs_error_last_10": 291.8730147101237, "mean_abs_error_last_25": 317.64179780786037, "mean_abs_error_last_50": 405.3880356307461, "mean_pred_prob": 0.03238641703792382, "mean_pred_prob_last_10": 0.17075102882226928, "mean_pred_prob_last_25": 0.09050209426786751, "mean_pred_prob_last_50": 0.05467331110266969, "mean_token_accuracy": 0.8732677876949311, "step": 43530 }, { "epoch": 0.774003164275683, "grad_norm": 2.5413286532063046, "learning_rate": 0.0001, "loss": 0.7747, "mean_abs_error": 507.69626219289137, "mean_abs_error_last_10": 142.364642908313, "mean_abs_error_last_25": 183.46455885924735, "mean_abs_error_last_50": 296.61742512590956, "mean_pred_prob": 0.017528370115906, "mean_pred_prob_last_10": 0.09704926889389753, "mean_pred_prob_last_25": 0.05150779243558645, "mean_pred_prob_last_50": 0.03048888882622123, "mean_token_accuracy": 0.873019814491272, "step": 43540 }, { "epoch": 0.774180932572485, "grad_norm": 1.217210773073106, "learning_rate": 0.0001, "loss": 0.7487, "mean_abs_error": 489.52244224063617, "mean_abs_error_last_10": 92.95522282607782, "mean_abs_error_last_25": 163.68503054902277, "mean_abs_error_last_50": 275.57082012786043, "mean_pred_prob": 0.041946543293306604, "mean_pred_prob_last_10": 0.23549735799897462, "mean_pred_prob_last_25": 0.12193795967614278, "mean_pred_prob_last_50": 0.07132785326102749, "mean_token_accuracy": 0.8699371457099915, "step": 43550 }, { "epoch": 0.7743587008692869, "grad_norm": 2.6236967976753673, "learning_rate": 0.0001, "loss": 0.7184, "mean_abs_error": 173.34754548561378, "mean_abs_error_last_10": 55.32261109874931, "mean_abs_error_last_25": 66.87640393279744, "mean_abs_error_last_50": 110.02949864117564, "mean_pred_prob": 0.05301863644272089, "mean_pred_prob_last_10": 0.2543090393766761, "mean_pred_prob_last_25": 0.14304851032793522, "mean_pred_prob_last_50": 0.08719023652374744, "mean_token_accuracy": 0.8806261479854584, "step": 43560 }, { "epoch": 0.7745364691660889, "grad_norm": 1.3633480866364627, "learning_rate": 0.0001, "loss": 0.7491, "mean_abs_error": 471.8677172302122, "mean_abs_error_last_10": 132.72918675007526, "mean_abs_error_last_25": 193.18934184352833, "mean_abs_error_last_50": 311.77537894021395, "mean_pred_prob": 0.038520955113926905, "mean_pred_prob_last_10": 0.1912955828011036, "mean_pred_prob_last_25": 0.1073939795838669, "mean_pred_prob_last_50": 0.06498351083137095, "mean_token_accuracy": 0.869662094116211, "step": 43570 }, { "epoch": 0.7747142374628909, "grad_norm": 1.1059791298959483, "learning_rate": 0.0001, "loss": 0.7909, "mean_abs_error": 648.8652683226414, "mean_abs_error_last_10": 57.62576845922789, "mean_abs_error_last_25": 271.2412772194756, "mean_abs_error_last_50": 516.4026986460088, "mean_pred_prob": 0.03301647764164954, "mean_pred_prob_last_10": 0.16456064023077488, "mean_pred_prob_last_25": 0.09362515909597277, "mean_pred_prob_last_50": 0.056318632885813714, "mean_token_accuracy": 0.8700106799602508, "step": 43580 }, { "epoch": 0.7748920057596929, "grad_norm": 1.695304964760285, "learning_rate": 0.0001, "loss": 0.6344, "mean_abs_error": 563.9474526393808, "mean_abs_error_last_10": 205.88553780331367, "mean_abs_error_last_25": 273.95766214269844, "mean_abs_error_last_50": 334.15926133006406, "mean_pred_prob": 0.0488355500070611, "mean_pred_prob_last_10": 0.22042807845864446, "mean_pred_prob_last_25": 0.1283456770470366, "mean_pred_prob_last_50": 0.08034518381464295, "mean_token_accuracy": 0.8738784909248352, "step": 43590 }, { "epoch": 0.7750697740564948, "grad_norm": 1.4208478986750783, "learning_rate": 0.0001, "loss": 0.7503, "mean_abs_error": 1160.2288033688767, "mean_abs_error_last_10": 716.3223662426428, "mean_abs_error_last_25": 854.4649269804574, "mean_abs_error_last_50": 947.96571232879, "mean_pred_prob": 0.04738778696046211, "mean_pred_prob_last_10": 0.22503174427401973, "mean_pred_prob_last_25": 0.13092327476770152, "mean_pred_prob_last_50": 0.0800398200044583, "mean_token_accuracy": 0.8789269864559174, "step": 43600 }, { "epoch": 0.7752475423532967, "grad_norm": 1.681021545133306, "learning_rate": 0.0001, "loss": 0.719, "mean_abs_error": 315.1667421773655, "mean_abs_error_last_10": 88.50356358330146, "mean_abs_error_last_25": 147.45117881700784, "mean_abs_error_last_50": 191.73532392395762, "mean_pred_prob": 0.04680235201958567, "mean_pred_prob_last_10": 0.23033871734514832, "mean_pred_prob_last_25": 0.12852626703679562, "mean_pred_prob_last_50": 0.07879101530415937, "mean_token_accuracy": 0.8715314865112305, "step": 43610 }, { "epoch": 0.7754253106500987, "grad_norm": 2.03987035745571, "learning_rate": 0.0001, "loss": 0.8313, "mean_abs_error": 347.572127745124, "mean_abs_error_last_10": 197.60725579220633, "mean_abs_error_last_25": 191.75529766945158, "mean_abs_error_last_50": 231.66981715717742, "mean_pred_prob": 0.05175584198441356, "mean_pred_prob_last_10": 0.2564394935965538, "mean_pred_prob_last_25": 0.14185293698683382, "mean_pred_prob_last_50": 0.08503015022724866, "mean_token_accuracy": 0.869525671005249, "step": 43620 }, { "epoch": 0.7756030789469006, "grad_norm": 1.7612190165440458, "learning_rate": 0.0001, "loss": 0.6676, "mean_abs_error": 383.07232119946036, "mean_abs_error_last_10": 61.00679312564434, "mean_abs_error_last_25": 102.33757305418045, "mean_abs_error_last_50": 253.31238293579622, "mean_pred_prob": 0.037875051330775025, "mean_pred_prob_last_10": 0.18342136908322573, "mean_pred_prob_last_25": 0.10340542485937476, "mean_pred_prob_last_50": 0.06292618056759239, "mean_token_accuracy": 0.882051807641983, "step": 43630 }, { "epoch": 0.7757808472437026, "grad_norm": 1.088760837190216, "learning_rate": 0.0001, "loss": 0.6183, "mean_abs_error": 185.00300998386922, "mean_abs_error_last_10": 32.58661236103016, "mean_abs_error_last_25": 71.96201939937262, "mean_abs_error_last_50": 118.33676630223673, "mean_pred_prob": 0.05087850634008646, "mean_pred_prob_last_10": 0.24686064906418323, "mean_pred_prob_last_25": 0.13810848630964756, "mean_pred_prob_last_50": 0.08454436967149377, "mean_token_accuracy": 0.8676614999771118, "step": 43640 }, { "epoch": 0.7759586155405045, "grad_norm": 0.9746254756732493, "learning_rate": 0.0001, "loss": 0.7681, "mean_abs_error": 372.74819986474824, "mean_abs_error_last_10": 73.5710993246806, "mean_abs_error_last_25": 122.31888222240585, "mean_abs_error_last_50": 212.13382467316183, "mean_pred_prob": 0.03189303590916097, "mean_pred_prob_last_10": 0.15376560166478156, "mean_pred_prob_last_25": 0.08476471435278654, "mean_pred_prob_last_50": 0.05287410486489534, "mean_token_accuracy": 0.8794097125530242, "step": 43650 }, { "epoch": 0.7761363838373064, "grad_norm": 2.504693473148216, "learning_rate": 0.0001, "loss": 0.6799, "mean_abs_error": 229.9456741180838, "mean_abs_error_last_10": 58.17463831332043, "mean_abs_error_last_25": 93.0869832356515, "mean_abs_error_last_50": 170.72354591147172, "mean_pred_prob": 0.04763993518427014, "mean_pred_prob_last_10": 0.2206827223300934, "mean_pred_prob_last_25": 0.12778510823845862, "mean_pred_prob_last_50": 0.07984675038605929, "mean_token_accuracy": 0.8787375748157501, "step": 43660 }, { "epoch": 0.7763141521341084, "grad_norm": 1.1140110691321652, "learning_rate": 0.0001, "loss": 0.6209, "mean_abs_error": 494.4869274938601, "mean_abs_error_last_10": 121.08320668951117, "mean_abs_error_last_25": 223.3037435625234, "mean_abs_error_last_50": 317.4873922928824, "mean_pred_prob": 0.03701201523654163, "mean_pred_prob_last_10": 0.18932172730565072, "mean_pred_prob_last_25": 0.1001671186182648, "mean_pred_prob_last_50": 0.06077296119183302, "mean_token_accuracy": 0.8709457099437714, "step": 43670 }, { "epoch": 0.7764919204309103, "grad_norm": 0.9918309929578132, "learning_rate": 0.0001, "loss": 0.5139, "mean_abs_error": 346.9990061119262, "mean_abs_error_last_10": 64.8768215389823, "mean_abs_error_last_25": 123.8447520749755, "mean_abs_error_last_50": 228.3875506920703, "mean_pred_prob": 0.04091992354951799, "mean_pred_prob_last_10": 0.19302769601345063, "mean_pred_prob_last_25": 0.10989474542438984, "mean_pred_prob_last_50": 0.06766021568328143, "mean_token_accuracy": 0.8861292064189911, "step": 43680 }, { "epoch": 0.7766696887277122, "grad_norm": 1.675689858126071, "learning_rate": 0.0001, "loss": 0.6577, "mean_abs_error": 243.87861693604532, "mean_abs_error_last_10": 117.38402452810655, "mean_abs_error_last_25": 123.61927308083537, "mean_abs_error_last_50": 157.70611903478218, "mean_pred_prob": 0.03464194932021201, "mean_pred_prob_last_10": 0.1780337080359459, "mean_pred_prob_last_25": 0.09854750838130713, "mean_pred_prob_last_50": 0.05902014896273613, "mean_token_accuracy": 0.8798850774765015, "step": 43690 }, { "epoch": 0.7768474570245143, "grad_norm": 2.2661320936579106, "learning_rate": 0.0001, "loss": 0.578, "mean_abs_error": 1084.3411163951455, "mean_abs_error_last_10": 557.7257971884503, "mean_abs_error_last_25": 667.6578286352375, "mean_abs_error_last_50": 828.3666956547273, "mean_pred_prob": 0.03555409062973922, "mean_pred_prob_last_10": 0.15521444244659505, "mean_pred_prob_last_25": 0.09393657481996343, "mean_pred_prob_last_50": 0.05859287650819169, "mean_token_accuracy": 0.8789550483226776, "step": 43700 }, { "epoch": 0.7770252253213162, "grad_norm": 2.054378455358, "learning_rate": 0.0001, "loss": 0.7914, "mean_abs_error": 1261.2242017866138, "mean_abs_error_last_10": 863.2205274382626, "mean_abs_error_last_25": 942.2730425736532, "mean_abs_error_last_50": 1100.81269711309, "mean_pred_prob": 0.037157073315029265, "mean_pred_prob_last_10": 0.18757453956786777, "mean_pred_prob_last_25": 0.10363369051701739, "mean_pred_prob_last_50": 0.06359581257638638, "mean_token_accuracy": 0.8656445860862731, "step": 43710 }, { "epoch": 0.7772029936181182, "grad_norm": 2.5571730854390458, "learning_rate": 0.0001, "loss": 0.7239, "mean_abs_error": 334.4129274244672, "mean_abs_error_last_10": 47.927325782354856, "mean_abs_error_last_25": 114.2616014399628, "mean_abs_error_last_50": 209.8037402578906, "mean_pred_prob": 0.03598084864206612, "mean_pred_prob_last_10": 0.17918490748852492, "mean_pred_prob_last_25": 0.10194425322115422, "mean_pred_prob_last_50": 0.061049981787800786, "mean_token_accuracy": 0.8770432531833648, "step": 43720 }, { "epoch": 0.7773807619149201, "grad_norm": 1.437033938347606, "learning_rate": 0.0001, "loss": 0.7853, "mean_abs_error": 578.4383665141512, "mean_abs_error_last_10": 237.57348136532974, "mean_abs_error_last_25": 316.6647325878378, "mean_abs_error_last_50": 410.67996208073976, "mean_pred_prob": 0.03838514109957032, "mean_pred_prob_last_10": 0.18861494194716216, "mean_pred_prob_last_25": 0.10088336623157375, "mean_pred_prob_last_50": 0.062216731277294456, "mean_token_accuracy": 0.872089558839798, "step": 43730 }, { "epoch": 0.777558530211722, "grad_norm": 1.3043891108894425, "learning_rate": 0.0001, "loss": 0.7718, "mean_abs_error": 317.3913386190641, "mean_abs_error_last_10": 88.84204348695116, "mean_abs_error_last_25": 101.35025129789697, "mean_abs_error_last_50": 147.67556183127817, "mean_pred_prob": 0.05176780300680548, "mean_pred_prob_last_10": 0.23777121156454087, "mean_pred_prob_last_25": 0.137458564247936, "mean_pred_prob_last_50": 0.08555306210182607, "mean_token_accuracy": 0.8687732100486756, "step": 43740 }, { "epoch": 0.777736298508524, "grad_norm": 1.4370925582685843, "learning_rate": 0.0001, "loss": 0.6537, "mean_abs_error": 268.98226715679846, "mean_abs_error_last_10": 97.60891911651511, "mean_abs_error_last_25": 152.77845278393846, "mean_abs_error_last_50": 177.49348148131548, "mean_pred_prob": 0.05493577921297401, "mean_pred_prob_last_10": 0.24037498594261705, "mean_pred_prob_last_25": 0.13850676699075848, "mean_pred_prob_last_50": 0.0889517369796522, "mean_token_accuracy": 0.8673795998096466, "step": 43750 }, { "epoch": 0.7779140668053259, "grad_norm": 1.3018668217574678, "learning_rate": 0.0001, "loss": 0.6928, "mean_abs_error": 232.19152461992402, "mean_abs_error_last_10": 41.738880860210266, "mean_abs_error_last_25": 67.19877981931857, "mean_abs_error_last_50": 120.44028438351741, "mean_pred_prob": 0.04903013035655022, "mean_pred_prob_last_10": 0.22217419706285, "mean_pred_prob_last_25": 0.12699711602181196, "mean_pred_prob_last_50": 0.08038861574605108, "mean_token_accuracy": 0.866493558883667, "step": 43760 }, { "epoch": 0.7780918351021279, "grad_norm": 1.6536685353284515, "learning_rate": 0.0001, "loss": 0.6911, "mean_abs_error": 574.3903120790666, "mean_abs_error_last_10": 207.40271786550866, "mean_abs_error_last_25": 289.596844492354, "mean_abs_error_last_50": 385.7872891547269, "mean_pred_prob": 0.05315822858246975, "mean_pred_prob_last_10": 0.24347143997438253, "mean_pred_prob_last_25": 0.1365159425884485, "mean_pred_prob_last_50": 0.0869757249543909, "mean_token_accuracy": 0.8682888805866241, "step": 43770 }, { "epoch": 0.7782696033989298, "grad_norm": 1.7997393357934954, "learning_rate": 0.0001, "loss": 0.7829, "mean_abs_error": 507.6606319077376, "mean_abs_error_last_10": 208.32864860064242, "mean_abs_error_last_25": 279.0860569807734, "mean_abs_error_last_50": 335.3944782786948, "mean_pred_prob": 0.032458117115311325, "mean_pred_prob_last_10": 0.1732815783470869, "mean_pred_prob_last_25": 0.09523110343143344, "mean_pred_prob_last_50": 0.056028415216133, "mean_token_accuracy": 0.8704795718193055, "step": 43780 }, { "epoch": 0.7784473716957317, "grad_norm": 2.4241890926398653, "learning_rate": 0.0001, "loss": 0.6791, "mean_abs_error": 363.52654632844224, "mean_abs_error_last_10": 82.02034469193062, "mean_abs_error_last_25": 115.38729433259252, "mean_abs_error_last_50": 212.44211659914035, "mean_pred_prob": 0.0420262445230037, "mean_pred_prob_last_10": 0.20083742181304842, "mean_pred_prob_last_25": 0.11122933754231781, "mean_pred_prob_last_50": 0.06997601633192971, "mean_token_accuracy": 0.8651186227798462, "step": 43790 }, { "epoch": 0.7786251399925337, "grad_norm": 2.428024236980982, "learning_rate": 0.0001, "loss": 0.8174, "mean_abs_error": 777.7886562750303, "mean_abs_error_last_10": 202.50287144528772, "mean_abs_error_last_25": 238.0566574664227, "mean_abs_error_last_50": 363.95003225457947, "mean_pred_prob": 0.020817279390757903, "mean_pred_prob_last_10": 0.12315030174795538, "mean_pred_prob_last_25": 0.06187085580313578, "mean_pred_prob_last_50": 0.03563846093602478, "mean_token_accuracy": 0.8649589061737061, "step": 43800 }, { "epoch": 0.7788029082893356, "grad_norm": 1.8207531050058956, "learning_rate": 0.0001, "loss": 0.6718, "mean_abs_error": 702.4202890623935, "mean_abs_error_last_10": 194.62244714246873, "mean_abs_error_last_25": 332.15772705801203, "mean_abs_error_last_50": 437.07479801169904, "mean_pred_prob": 0.03128804953594226, "mean_pred_prob_last_10": 0.1736786325694993, "mean_pred_prob_last_25": 0.08726031764526851, "mean_pred_prob_last_50": 0.05244783673551865, "mean_token_accuracy": 0.8724665701389313, "step": 43810 }, { "epoch": 0.7789806765861377, "grad_norm": 1.8859142661645032, "learning_rate": 0.0001, "loss": 0.6395, "mean_abs_error": 369.95960810704753, "mean_abs_error_last_10": 103.82171660052889, "mean_abs_error_last_25": 176.8341844562758, "mean_abs_error_last_50": 260.9071097977478, "mean_pred_prob": 0.03491940881940536, "mean_pred_prob_last_10": 0.16812615746166557, "mean_pred_prob_last_25": 0.09465008898405358, "mean_pred_prob_last_50": 0.05809792978689075, "mean_token_accuracy": 0.8685160338878631, "step": 43820 }, { "epoch": 0.7791584448829396, "grad_norm": 1.829724460057238, "learning_rate": 0.0001, "loss": 0.6809, "mean_abs_error": 1158.721456388461, "mean_abs_error_last_10": 565.5387825332889, "mean_abs_error_last_25": 679.0698253211046, "mean_abs_error_last_50": 820.7142517898717, "mean_pred_prob": 0.014065605564974248, "mean_pred_prob_last_10": 0.0816559205413796, "mean_pred_prob_last_25": 0.04177419766201638, "mean_pred_prob_last_50": 0.024799372043344193, "mean_token_accuracy": 0.874182116985321, "step": 43830 }, { "epoch": 0.7793362131797416, "grad_norm": 1.2462836726612054, "learning_rate": 0.0001, "loss": 0.7154, "mean_abs_error": 167.68070181926305, "mean_abs_error_last_10": 19.444271766612253, "mean_abs_error_last_25": 46.925390100782444, "mean_abs_error_last_50": 87.22525132194568, "mean_pred_prob": 0.053443236462771895, "mean_pred_prob_last_10": 0.24458549059927465, "mean_pred_prob_last_25": 0.13844338171184062, "mean_pred_prob_last_50": 0.08712468231096863, "mean_token_accuracy": 0.8792523682117462, "step": 43840 }, { "epoch": 0.7795139814765435, "grad_norm": 1.2166276412512986, "learning_rate": 0.0001, "loss": 0.6471, "mean_abs_error": 550.5697642643445, "mean_abs_error_last_10": 233.17845871900892, "mean_abs_error_last_25": 275.08743720015786, "mean_abs_error_last_50": 363.0905372463334, "mean_pred_prob": 0.044166519396821965, "mean_pred_prob_last_10": 0.21504461176809855, "mean_pred_prob_last_25": 0.11988330013118684, "mean_pred_prob_last_50": 0.07341315961675718, "mean_token_accuracy": 0.8740524351596832, "step": 43850 }, { "epoch": 0.7796917497733454, "grad_norm": 2.0365581384865474, "learning_rate": 0.0001, "loss": 0.8848, "mean_abs_error": 199.08459410542986, "mean_abs_error_last_10": 26.082824326103882, "mean_abs_error_last_25": 83.60814224417967, "mean_abs_error_last_50": 138.11936036139895, "mean_pred_prob": 0.04864245429635048, "mean_pred_prob_last_10": 0.24978899508714675, "mean_pred_prob_last_25": 0.13548336625099183, "mean_pred_prob_last_50": 0.08219579923897982, "mean_token_accuracy": 0.8743194103240967, "step": 43860 }, { "epoch": 0.7798695180701474, "grad_norm": 2.567599655539568, "learning_rate": 0.0001, "loss": 0.6596, "mean_abs_error": 976.7622427266327, "mean_abs_error_last_10": 424.9272175309792, "mean_abs_error_last_25": 503.27024704222094, "mean_abs_error_last_50": 669.2369747992351, "mean_pred_prob": 0.018353183317231014, "mean_pred_prob_last_10": 0.10419300110661425, "mean_pred_prob_last_25": 0.05346737775253132, "mean_pred_prob_last_50": 0.031052090774755924, "mean_token_accuracy": 0.869224625825882, "step": 43870 }, { "epoch": 0.7800472863669493, "grad_norm": 1.981143010026451, "learning_rate": 0.0001, "loss": 0.8397, "mean_abs_error": 399.8550476509536, "mean_abs_error_last_10": 71.0199902685175, "mean_abs_error_last_25": 123.52019575084148, "mean_abs_error_last_50": 228.98855485158, "mean_pred_prob": 0.05268898094072938, "mean_pred_prob_last_10": 0.23959406437352299, "mean_pred_prob_last_25": 0.13793100202456116, "mean_pred_prob_last_50": 0.08544845301657915, "mean_token_accuracy": 0.8725817084312439, "step": 43880 }, { "epoch": 0.7802250546637512, "grad_norm": 1.2157428055400044, "learning_rate": 0.0001, "loss": 0.7108, "mean_abs_error": 365.5332746852663, "mean_abs_error_last_10": 107.75901178525605, "mean_abs_error_last_25": 209.57334100589028, "mean_abs_error_last_50": 334.73192135337615, "mean_pred_prob": 0.05221898518502712, "mean_pred_prob_last_10": 0.24481371603906155, "mean_pred_prob_last_25": 0.1440620081499219, "mean_pred_prob_last_50": 0.08853012043982744, "mean_token_accuracy": 0.863887733221054, "step": 43890 }, { "epoch": 0.7804028229605532, "grad_norm": 1.8323396448791973, "learning_rate": 0.0001, "loss": 0.7533, "mean_abs_error": 786.1427488038028, "mean_abs_error_last_10": 233.33041248617056, "mean_abs_error_last_25": 279.06180560869143, "mean_abs_error_last_50": 409.2332567098082, "mean_pred_prob": 0.044499204127350825, "mean_pred_prob_last_10": 0.2212141668307595, "mean_pred_prob_last_25": 0.1229942747624591, "mean_pred_prob_last_50": 0.07463005148456432, "mean_token_accuracy": 0.8789998948574066, "step": 43900 }, { "epoch": 0.7805805912573551, "grad_norm": 1.2052519257787533, "learning_rate": 0.0001, "loss": 0.7116, "mean_abs_error": 255.633836277488, "mean_abs_error_last_10": 45.9335424249803, "mean_abs_error_last_25": 60.77548049186049, "mean_abs_error_last_50": 132.64227517307918, "mean_pred_prob": 0.04987354525364936, "mean_pred_prob_last_10": 0.23400658182799816, "mean_pred_prob_last_25": 0.13322906494140624, "mean_pred_prob_last_50": 0.08196633541956544, "mean_token_accuracy": 0.8618029236793519, "step": 43910 }, { "epoch": 0.7807583595541571, "grad_norm": 1.1300683166772119, "learning_rate": 0.0001, "loss": 0.6793, "mean_abs_error": 714.5526241288463, "mean_abs_error_last_10": 383.7048021955651, "mean_abs_error_last_25": 408.70272046457234, "mean_abs_error_last_50": 518.2288496400417, "mean_pred_prob": 0.04415749196195975, "mean_pred_prob_last_10": 0.21657508939970285, "mean_pred_prob_last_25": 0.12206289568566717, "mean_pred_prob_last_50": 0.07479854461271315, "mean_token_accuracy": 0.8600495636463166, "step": 43920 }, { "epoch": 0.780936127850959, "grad_norm": 1.7073657150802783, "learning_rate": 0.0001, "loss": 0.7964, "mean_abs_error": 259.3823898689156, "mean_abs_error_last_10": 61.892245368036505, "mean_abs_error_last_25": 89.47201173140255, "mean_abs_error_last_50": 131.554121272377, "mean_pred_prob": 0.0326383929233998, "mean_pred_prob_last_10": 0.17335103414952754, "mean_pred_prob_last_25": 0.09467256870120763, "mean_pred_prob_last_50": 0.05588081916794181, "mean_token_accuracy": 0.8824446678161622, "step": 43930 }, { "epoch": 0.781113896147761, "grad_norm": 1.5807913187547478, "learning_rate": 0.0001, "loss": 0.6938, "mean_abs_error": 992.8615684573315, "mean_abs_error_last_10": 656.5418995255495, "mean_abs_error_last_25": 696.55545082455, "mean_abs_error_last_50": 811.5892999396177, "mean_pred_prob": 0.04062438512919471, "mean_pred_prob_last_10": 0.2146040815510787, "mean_pred_prob_last_25": 0.11941119901603088, "mean_pred_prob_last_50": 0.06966589994117385, "mean_token_accuracy": 0.8682407200336456, "step": 43940 }, { "epoch": 0.781291664444563, "grad_norm": 1.7351376245142442, "learning_rate": 0.0001, "loss": 0.6838, "mean_abs_error": 879.4254845286616, "mean_abs_error_last_10": 462.42525455120256, "mean_abs_error_last_25": 520.7866741697466, "mean_abs_error_last_50": 620.7992734928717, "mean_pred_prob": 0.040653775262762794, "mean_pred_prob_last_10": 0.21349891447171104, "mean_pred_prob_last_25": 0.1112814126216108, "mean_pred_prob_last_50": 0.06728539168543649, "mean_token_accuracy": 0.8686584055423736, "step": 43950 }, { "epoch": 0.7814694327413649, "grad_norm": 1.427542260492813, "learning_rate": 0.0001, "loss": 1.0795, "mean_abs_error": 482.19775702842554, "mean_abs_error_last_10": 234.1820919969135, "mean_abs_error_last_25": 251.21800574013167, "mean_abs_error_last_50": 313.65920784627116, "mean_pred_prob": 0.03105370157863945, "mean_pred_prob_last_10": 0.15740645844489337, "mean_pred_prob_last_25": 0.08701436715200543, "mean_pred_prob_last_50": 0.05257939393632114, "mean_token_accuracy": 0.8756879687309265, "step": 43960 }, { "epoch": 0.7816472010381669, "grad_norm": 1.5365289080225062, "learning_rate": 0.0001, "loss": 0.6135, "mean_abs_error": 246.61724970645213, "mean_abs_error_last_10": 81.14827129185764, "mean_abs_error_last_25": 105.88213927850113, "mean_abs_error_last_50": 163.9153465307026, "mean_pred_prob": 0.05021456340327859, "mean_pred_prob_last_10": 0.2555313499644399, "mean_pred_prob_last_25": 0.13925867034122347, "mean_pred_prob_last_50": 0.08447063625790178, "mean_token_accuracy": 0.8807041823863984, "step": 43970 }, { "epoch": 0.7818249693349688, "grad_norm": 1.4794804888628876, "learning_rate": 0.0001, "loss": 0.6653, "mean_abs_error": 291.32013208716944, "mean_abs_error_last_10": 78.84188589935415, "mean_abs_error_last_25": 89.0982548903444, "mean_abs_error_last_50": 147.0033402365821, "mean_pred_prob": 0.047707238933071494, "mean_pred_prob_last_10": 0.22383275516331197, "mean_pred_prob_last_25": 0.12886911490932107, "mean_pred_prob_last_50": 0.0805025513167493, "mean_token_accuracy": 0.871901524066925, "step": 43980 }, { "epoch": 0.7820027376317708, "grad_norm": 1.4195180453149376, "learning_rate": 0.0001, "loss": 0.7322, "mean_abs_error": 616.9513026075845, "mean_abs_error_last_10": 299.7290343031366, "mean_abs_error_last_25": 273.60849146129874, "mean_abs_error_last_50": 357.01224834629636, "mean_pred_prob": 0.04416530405287631, "mean_pred_prob_last_10": 0.1992750491015613, "mean_pred_prob_last_25": 0.1148447171493899, "mean_pred_prob_last_50": 0.07222619247622789, "mean_token_accuracy": 0.8784521460533142, "step": 43990 }, { "epoch": 0.7821805059285727, "grad_norm": 3.3015461905237036, "learning_rate": 0.0001, "loss": 0.7378, "mean_abs_error": 230.6463194163447, "mean_abs_error_last_10": 68.90458363591486, "mean_abs_error_last_25": 178.22497714988194, "mean_abs_error_last_50": 204.4780398966022, "mean_pred_prob": 0.053643981972709295, "mean_pred_prob_last_10": 0.25971273984760046, "mean_pred_prob_last_25": 0.14927405016496778, "mean_pred_prob_last_50": 0.09214109708555043, "mean_token_accuracy": 0.8707619488239289, "step": 44000 }, { "epoch": 0.7823582742253746, "grad_norm": 1.532145356317716, "learning_rate": 0.0001, "loss": 0.7432, "mean_abs_error": 416.0136914030625, "mean_abs_error_last_10": 162.49013705969116, "mean_abs_error_last_25": 183.0958750993886, "mean_abs_error_last_50": 263.2794886701027, "mean_pred_prob": 0.038356041914084926, "mean_pred_prob_last_10": 0.18460690109059213, "mean_pred_prob_last_25": 0.10674874099204316, "mean_pred_prob_last_50": 0.06513838978717104, "mean_token_accuracy": 0.8695235192775727, "step": 44010 }, { "epoch": 0.7825360425221766, "grad_norm": 1.764687276946791, "learning_rate": 0.0001, "loss": 0.8956, "mean_abs_error": 420.15018403591165, "mean_abs_error_last_10": 100.05095354795982, "mean_abs_error_last_25": 133.33355314201518, "mean_abs_error_last_50": 193.09927048421932, "mean_pred_prob": 0.025225920137017965, "mean_pred_prob_last_10": 0.14028445594012737, "mean_pred_prob_last_25": 0.07261873111128807, "mean_pred_prob_last_50": 0.04283146345987916, "mean_token_accuracy": 0.8702303946018219, "step": 44020 }, { "epoch": 0.7827138108189785, "grad_norm": 2.2613092983872605, "learning_rate": 0.0001, "loss": 0.794, "mean_abs_error": 733.8647984322525, "mean_abs_error_last_10": 261.8891735502717, "mean_abs_error_last_25": 313.32556303246974, "mean_abs_error_last_50": 463.68575501360965, "mean_pred_prob": 0.01250565967638977, "mean_pred_prob_last_10": 0.07425241568125784, "mean_pred_prob_last_25": 0.0369319602381438, "mean_pred_prob_last_50": 0.02093738941475749, "mean_token_accuracy": 0.8680135428905487, "step": 44030 }, { "epoch": 0.7828915791157804, "grad_norm": 1.6845761152843914, "learning_rate": 0.0001, "loss": 0.6653, "mean_abs_error": 265.7854094156854, "mean_abs_error_last_10": 95.70462047647467, "mean_abs_error_last_25": 202.49799875626263, "mean_abs_error_last_50": 264.41305102265306, "mean_pred_prob": 0.06047238386236131, "mean_pred_prob_last_10": 0.2700514174997807, "mean_pred_prob_last_25": 0.16307755233719945, "mean_pred_prob_last_50": 0.1008630565367639, "mean_token_accuracy": 0.8811734557151795, "step": 44040 }, { "epoch": 0.7830693474125825, "grad_norm": 1.2385742225520076, "learning_rate": 0.0001, "loss": 0.7678, "mean_abs_error": 743.8352045460011, "mean_abs_error_last_10": 464.2973329649897, "mean_abs_error_last_25": 490.9768316962578, "mean_abs_error_last_50": 536.601243724151, "mean_pred_prob": 0.03170947517792229, "mean_pred_prob_last_10": 0.17600875456701032, "mean_pred_prob_last_25": 0.09319028354948386, "mean_pred_prob_last_50": 0.05464577225793619, "mean_token_accuracy": 0.8710994064807892, "step": 44050 }, { "epoch": 0.7832471157093844, "grad_norm": 2.1857645977795928, "learning_rate": 0.0001, "loss": 0.7143, "mean_abs_error": 1437.5390560776268, "mean_abs_error_last_10": 809.6226237366412, "mean_abs_error_last_25": 917.3116335781162, "mean_abs_error_last_50": 1065.3233475531301, "mean_pred_prob": 0.0240870090899989, "mean_pred_prob_last_10": 0.12228386766801122, "mean_pred_prob_last_25": 0.06516993885015836, "mean_pred_prob_last_50": 0.039493694731208964, "mean_token_accuracy": 0.8749868214130402, "step": 44060 }, { "epoch": 0.7834248840061864, "grad_norm": 0.7754077531300755, "learning_rate": 0.0001, "loss": 0.6214, "mean_abs_error": 430.9736348110247, "mean_abs_error_last_10": 127.90089156572908, "mean_abs_error_last_25": 163.5873971086383, "mean_abs_error_last_50": 247.09520634592894, "mean_pred_prob": 0.027762128080939873, "mean_pred_prob_last_10": 0.1531131518771872, "mean_pred_prob_last_25": 0.08103482205187902, "mean_pred_prob_last_50": 0.04808608681196347, "mean_token_accuracy": 0.8789400517940521, "step": 44070 }, { "epoch": 0.7836026523029883, "grad_norm": 1.0299570603582584, "learning_rate": 0.0001, "loss": 0.6764, "mean_abs_error": 224.07092931189294, "mean_abs_error_last_10": 73.93379887247701, "mean_abs_error_last_25": 99.01273040089272, "mean_abs_error_last_50": 149.1001681895462, "mean_pred_prob": 0.05083843450993299, "mean_pred_prob_last_10": 0.23777105659246445, "mean_pred_prob_last_25": 0.1333853740245104, "mean_pred_prob_last_50": 0.08352747363969684, "mean_token_accuracy": 0.8805388271808624, "step": 44080 }, { "epoch": 0.7837804205997903, "grad_norm": 1.3810366754763257, "learning_rate": 0.0001, "loss": 0.7882, "mean_abs_error": 556.1888842380902, "mean_abs_error_last_10": 228.18427214253057, "mean_abs_error_last_25": 300.3065909269555, "mean_abs_error_last_50": 384.4099196177357, "mean_pred_prob": 0.06044254002044909, "mean_pred_prob_last_10": 0.26340065475087615, "mean_pred_prob_last_25": 0.15984032916021534, "mean_pred_prob_last_50": 0.10090431330027058, "mean_token_accuracy": 0.8646576881408692, "step": 44090 }, { "epoch": 0.7839581888965922, "grad_norm": 1.6406357290308835, "learning_rate": 0.0001, "loss": 0.6958, "mean_abs_error": 421.3685123080504, "mean_abs_error_last_10": 214.52508358312997, "mean_abs_error_last_25": 230.01367140739345, "mean_abs_error_last_50": 206.08710270090313, "mean_pred_prob": 0.05783780222991482, "mean_pred_prob_last_10": 0.2717044404707849, "mean_pred_prob_last_25": 0.15709187870379537, "mean_pred_prob_last_50": 0.09747121552936733, "mean_token_accuracy": 0.8829809546470642, "step": 44100 }, { "epoch": 0.7841359571933941, "grad_norm": 1.645320874488311, "learning_rate": 0.0001, "loss": 0.696, "mean_abs_error": 636.4312173748524, "mean_abs_error_last_10": 176.50420910947534, "mean_abs_error_last_25": 253.90543279929565, "mean_abs_error_last_50": 316.16053804763925, "mean_pred_prob": 0.03526709530269727, "mean_pred_prob_last_10": 0.17308815240394324, "mean_pred_prob_last_25": 0.0975498222745955, "mean_pred_prob_last_50": 0.05871563428081572, "mean_token_accuracy": 0.8764632344245911, "step": 44110 }, { "epoch": 0.7843137254901961, "grad_norm": 1.93571982178581, "learning_rate": 0.0001, "loss": 0.7602, "mean_abs_error": 1109.038682280605, "mean_abs_error_last_10": 467.6835545481707, "mean_abs_error_last_25": 568.0207086278974, "mean_abs_error_last_50": 735.6081789827772, "mean_pred_prob": 0.03541634313296527, "mean_pred_prob_last_10": 0.15357137814280578, "mean_pred_prob_last_25": 0.09188226165133528, "mean_pred_prob_last_50": 0.05863130584475584, "mean_token_accuracy": 0.8675100982189179, "step": 44120 }, { "epoch": 0.784491493786998, "grad_norm": 1.8051641470035713, "learning_rate": 0.0001, "loss": 0.8564, "mean_abs_error": 132.10449380734636, "mean_abs_error_last_10": 29.4645409845187, "mean_abs_error_last_25": 40.69313819616582, "mean_abs_error_last_50": 70.7678911774045, "mean_pred_prob": 0.04666990227997303, "mean_pred_prob_last_10": 0.2440688494592905, "mean_pred_prob_last_25": 0.13359047025442122, "mean_pred_prob_last_50": 0.08005126388743519, "mean_token_accuracy": 0.8841565907001495, "step": 44130 }, { "epoch": 0.7846692620838, "grad_norm": 2.2037076083466123, "learning_rate": 0.0001, "loss": 0.7737, "mean_abs_error": 473.80358589830695, "mean_abs_error_last_10": 156.1622091222079, "mean_abs_error_last_25": 165.68026299467388, "mean_abs_error_last_50": 240.85563212551952, "mean_pred_prob": 0.034926981362514195, "mean_pred_prob_last_10": 0.19116698522120715, "mean_pred_prob_last_25": 0.10184132354333997, "mean_pred_prob_last_50": 0.0602799245622009, "mean_token_accuracy": 0.8648221611976623, "step": 44140 }, { "epoch": 0.7848470303806019, "grad_norm": 1.7238382072306895, "learning_rate": 0.0001, "loss": 0.6953, "mean_abs_error": 536.3762801751288, "mean_abs_error_last_10": 161.06517287532176, "mean_abs_error_last_25": 196.23111303011075, "mean_abs_error_last_50": 297.5834014678825, "mean_pred_prob": 0.03548866736819036, "mean_pred_prob_last_10": 0.17016391622601076, "mean_pred_prob_last_25": 0.0964732117485255, "mean_pred_prob_last_50": 0.060104507149662825, "mean_token_accuracy": 0.8722867488861084, "step": 44150 }, { "epoch": 0.7850247986774038, "grad_norm": 1.4620978289047708, "learning_rate": 0.0001, "loss": 0.8061, "mean_abs_error": 545.849905768207, "mean_abs_error_last_10": 150.74958940261178, "mean_abs_error_last_25": 225.116338206478, "mean_abs_error_last_50": 385.85826604726816, "mean_pred_prob": 0.02309809266589582, "mean_pred_prob_last_10": 0.11149226166307927, "mean_pred_prob_last_25": 0.06237557232379913, "mean_pred_prob_last_50": 0.038466285169124606, "mean_token_accuracy": 0.8721761524677276, "step": 44160 }, { "epoch": 0.7852025669742059, "grad_norm": 1.9734127698968063, "learning_rate": 0.0001, "loss": 0.8626, "mean_abs_error": 559.8440692376005, "mean_abs_error_last_10": 118.38228464028096, "mean_abs_error_last_25": 152.31960672912265, "mean_abs_error_last_50": 290.3003809642299, "mean_pred_prob": 0.03610571506433189, "mean_pred_prob_last_10": 0.17260246761143208, "mean_pred_prob_last_25": 0.09598239520564675, "mean_pred_prob_last_50": 0.05939840758219361, "mean_token_accuracy": 0.8711705029010772, "step": 44170 }, { "epoch": 0.7853803352710078, "grad_norm": 2.7098209042233052, "learning_rate": 0.0001, "loss": 0.7602, "mean_abs_error": 817.2594009162063, "mean_abs_error_last_10": 307.4343297785303, "mean_abs_error_last_25": 403.8806130949459, "mean_abs_error_last_50": 539.3663077859712, "mean_pred_prob": 0.0350997970730532, "mean_pred_prob_last_10": 0.16075118634616956, "mean_pred_prob_last_25": 0.0946253779809922, "mean_pred_prob_last_50": 0.0603304744057823, "mean_token_accuracy": 0.8745448410511016, "step": 44180 }, { "epoch": 0.7855581035678098, "grad_norm": 2.4961944318361993, "learning_rate": 0.0001, "loss": 0.7783, "mean_abs_error": 630.0602187969963, "mean_abs_error_last_10": 317.7751846612327, "mean_abs_error_last_25": 381.19512196736616, "mean_abs_error_last_50": 440.5867964341207, "mean_pred_prob": 0.02771375421434641, "mean_pred_prob_last_10": 0.14401100864633917, "mean_pred_prob_last_25": 0.0768509010784328, "mean_pred_prob_last_50": 0.046116834692657, "mean_token_accuracy": 0.8728971481323242, "step": 44190 }, { "epoch": 0.7857358718646117, "grad_norm": 1.331592035411485, "learning_rate": 0.0001, "loss": 0.7248, "mean_abs_error": 530.2816236823841, "mean_abs_error_last_10": 174.02417657906068, "mean_abs_error_last_25": 188.7071281738367, "mean_abs_error_last_50": 256.4120792352376, "mean_pred_prob": 0.03964654988376424, "mean_pred_prob_last_10": 0.2058986126445234, "mean_pred_prob_last_25": 0.118037040065974, "mean_pred_prob_last_50": 0.06946357858832926, "mean_token_accuracy": 0.8725185096263885, "step": 44200 }, { "epoch": 0.7859136401614136, "grad_norm": 1.6737547556477483, "learning_rate": 0.0001, "loss": 0.7795, "mean_abs_error": 339.64436622108485, "mean_abs_error_last_10": 146.05763796936944, "mean_abs_error_last_25": 193.23283430038043, "mean_abs_error_last_50": 236.20869100125591, "mean_pred_prob": 0.03416730568278581, "mean_pred_prob_last_10": 0.1750348987057805, "mean_pred_prob_last_25": 0.09399096835404634, "mean_pred_prob_last_50": 0.057133049704134464, "mean_token_accuracy": 0.8685612142086029, "step": 44210 }, { "epoch": 0.7860914084582156, "grad_norm": 1.1786315106409226, "learning_rate": 0.0001, "loss": 0.6294, "mean_abs_error": 235.92075946655783, "mean_abs_error_last_10": 36.10045683633352, "mean_abs_error_last_25": 61.302294970033756, "mean_abs_error_last_50": 148.19112501662534, "mean_pred_prob": 0.05393266803584993, "mean_pred_prob_last_10": 0.23848208785057068, "mean_pred_prob_last_25": 0.14146595243364574, "mean_pred_prob_last_50": 0.08769528055563569, "mean_token_accuracy": 0.8743261158466339, "step": 44220 }, { "epoch": 0.7862691767550175, "grad_norm": 1.3248956148702968, "learning_rate": 0.0001, "loss": 0.7244, "mean_abs_error": 174.60435311710313, "mean_abs_error_last_10": 102.50062580522237, "mean_abs_error_last_25": 137.09284919131682, "mean_abs_error_last_50": 146.71185778729205, "mean_pred_prob": 0.04896402731537819, "mean_pred_prob_last_10": 0.2229712914675474, "mean_pred_prob_last_25": 0.1291394834406674, "mean_pred_prob_last_50": 0.07979751680977643, "mean_token_accuracy": 0.8694291293621064, "step": 44230 }, { "epoch": 0.7864469450518194, "grad_norm": 1.680458023634306, "learning_rate": 0.0001, "loss": 0.7646, "mean_abs_error": 421.2177986796422, "mean_abs_error_last_10": 129.2922481386764, "mean_abs_error_last_25": 182.85210849718197, "mean_abs_error_last_50": 247.91925141447018, "mean_pred_prob": 0.034837009361945094, "mean_pred_prob_last_10": 0.1936056974926032, "mean_pred_prob_last_25": 0.10419625896029175, "mean_pred_prob_last_50": 0.06058981460519135, "mean_token_accuracy": 0.8708047747612, "step": 44240 }, { "epoch": 0.7866247133486214, "grad_norm": 1.1296410292304349, "learning_rate": 0.0001, "loss": 0.6857, "mean_abs_error": 413.3201602775772, "mean_abs_error_last_10": 243.58191463328663, "mean_abs_error_last_25": 247.02181514015666, "mean_abs_error_last_50": 269.6916168067663, "mean_pred_prob": 0.038236432382836937, "mean_pred_prob_last_10": 0.1974895542487502, "mean_pred_prob_last_25": 0.10798983946442604, "mean_pred_prob_last_50": 0.06469289939850568, "mean_token_accuracy": 0.8792676031589508, "step": 44250 }, { "epoch": 0.7868024816454233, "grad_norm": 0.9280860164602095, "learning_rate": 0.0001, "loss": 0.6716, "mean_abs_error": 374.70277809853076, "mean_abs_error_last_10": 90.75701842797532, "mean_abs_error_last_25": 156.69218357431504, "mean_abs_error_last_50": 216.0616347118079, "mean_pred_prob": 0.041964441328309475, "mean_pred_prob_last_10": 0.19961290135979654, "mean_pred_prob_last_25": 0.11133922711014747, "mean_pred_prob_last_50": 0.06951592033728957, "mean_token_accuracy": 0.8717343747615814, "step": 44260 }, { "epoch": 0.7869802499422253, "grad_norm": 1.1770146158556438, "learning_rate": 0.0001, "loss": 0.5774, "mean_abs_error": 1272.4116594057393, "mean_abs_error_last_10": 479.7842957313802, "mean_abs_error_last_25": 607.5062169598784, "mean_abs_error_last_50": 822.227063284691, "mean_pred_prob": 0.02279581450857222, "mean_pred_prob_last_10": 0.12326199268572964, "mean_pred_prob_last_25": 0.06471571160363965, "mean_pred_prob_last_50": 0.03907211436307989, "mean_token_accuracy": 0.8774596869945526, "step": 44270 }, { "epoch": 0.7871580182390272, "grad_norm": 2.0991143232496814, "learning_rate": 0.0001, "loss": 0.7321, "mean_abs_error": 175.69419195659611, "mean_abs_error_last_10": 36.187699282860194, "mean_abs_error_last_25": 69.70307830966185, "mean_abs_error_last_50": 133.97066109144922, "mean_pred_prob": 0.052070093154907224, "mean_pred_prob_last_10": 0.2534601129591465, "mean_pred_prob_last_25": 0.14256719574332238, "mean_pred_prob_last_50": 0.08679581983014942, "mean_token_accuracy": 0.8831535398960113, "step": 44280 }, { "epoch": 0.7873357865358293, "grad_norm": 1.5170117090740107, "learning_rate": 0.0001, "loss": 0.6929, "mean_abs_error": 286.00093108430656, "mean_abs_error_last_10": 68.62803036940373, "mean_abs_error_last_25": 90.1486601299182, "mean_abs_error_last_50": 155.91476300786445, "mean_pred_prob": 0.04808002226054668, "mean_pred_prob_last_10": 0.22859422527253628, "mean_pred_prob_last_25": 0.12984897149726748, "mean_pred_prob_last_50": 0.0795375742483884, "mean_token_accuracy": 0.8770711362361908, "step": 44290 }, { "epoch": 0.7875135548326312, "grad_norm": 1.4281000399005346, "learning_rate": 0.0001, "loss": 0.6634, "mean_abs_error": 448.7379901801108, "mean_abs_error_last_10": 62.62314180311476, "mean_abs_error_last_25": 160.74262894526586, "mean_abs_error_last_50": 270.88947082848904, "mean_pred_prob": 0.02418058426119387, "mean_pred_prob_last_10": 0.12139258850365878, "mean_pred_prob_last_25": 0.06602729381993414, "mean_pred_prob_last_50": 0.04013203037902713, "mean_token_accuracy": 0.8752898514270783, "step": 44300 }, { "epoch": 0.7876913231294331, "grad_norm": 2.1775768753698292, "learning_rate": 0.0001, "loss": 0.6952, "mean_abs_error": 425.3941617631264, "mean_abs_error_last_10": 81.73586150193744, "mean_abs_error_last_25": 135.1668070147293, "mean_abs_error_last_50": 235.17451691626016, "mean_pred_prob": 0.04824261809699237, "mean_pred_prob_last_10": 0.19544502831995486, "mean_pred_prob_last_25": 0.1164352093823254, "mean_pred_prob_last_50": 0.0757141615729779, "mean_token_accuracy": 0.8652046084403991, "step": 44310 }, { "epoch": 0.7878690914262351, "grad_norm": 2.0120907905016177, "learning_rate": 0.0001, "loss": 0.6825, "mean_abs_error": 141.21986858676428, "mean_abs_error_last_10": 43.95952701106164, "mean_abs_error_last_25": 49.029568106836685, "mean_abs_error_last_50": 71.184554287418, "mean_pred_prob": 0.05607940843328833, "mean_pred_prob_last_10": 0.23041593953967093, "mean_pred_prob_last_25": 0.14035650826990603, "mean_pred_prob_last_50": 0.0912861816585064, "mean_token_accuracy": 0.8626393377780914, "step": 44320 }, { "epoch": 0.788046859723037, "grad_norm": 2.409852917726788, "learning_rate": 0.0001, "loss": 0.6691, "mean_abs_error": 295.34048448341434, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 257.0882880620571, "mean_pred_prob": 0.08306180839426816, "mean_pred_prob_last_10": 0.21512055583298206, "mean_pred_prob_last_25": 0.1436059636063874, "mean_pred_prob_last_50": 0.11387057146057486, "mean_token_accuracy": 0.8834147095680237, "step": 44330 }, { "epoch": 0.788224628019839, "grad_norm": 2.360569169715839, "learning_rate": 0.0001, "loss": 0.5958, "mean_abs_error": 545.5764759355231, "mean_abs_error_last_10": 131.00048228382713, "mean_abs_error_last_25": 173.69652749009083, "mean_abs_error_last_50": 282.55287476632617, "mean_pred_prob": 0.045683064899640156, "mean_pred_prob_last_10": 0.2181913150823675, "mean_pred_prob_last_25": 0.13132854219293222, "mean_pred_prob_last_50": 0.0780878983438015, "mean_token_accuracy": 0.8747664809226989, "step": 44340 }, { "epoch": 0.7884023963166409, "grad_norm": 1.9222411973088676, "learning_rate": 0.0001, "loss": 0.7284, "mean_abs_error": 462.0005007287402, "mean_abs_error_last_10": 123.7546230527328, "mean_abs_error_last_25": 247.46370556959255, "mean_abs_error_last_50": 293.833960590205, "mean_pred_prob": 0.040688462322577834, "mean_pred_prob_last_10": 0.18501241691410542, "mean_pred_prob_last_25": 0.10549258757382632, "mean_pred_prob_last_50": 0.06579753309488297, "mean_token_accuracy": 0.8775418758392334, "step": 44350 }, { "epoch": 0.7885801646134428, "grad_norm": 1.391399998613614, "learning_rate": 0.0001, "loss": 0.7021, "mean_abs_error": 375.2587260745054, "mean_abs_error_last_10": 55.469514870711485, "mean_abs_error_last_25": 111.28707089134964, "mean_abs_error_last_50": 166.88375922494808, "mean_pred_prob": 0.044018656329717486, "mean_pred_prob_last_10": 0.21633939389139414, "mean_pred_prob_last_25": 0.12315770988352596, "mean_pred_prob_last_50": 0.07494963463395835, "mean_token_accuracy": 0.8729353189468384, "step": 44360 }, { "epoch": 0.7887579329102448, "grad_norm": 2.3576877102039675, "learning_rate": 0.0001, "loss": 0.6383, "mean_abs_error": 757.7579351385376, "mean_abs_error_last_10": 426.3176859135844, "mean_abs_error_last_25": 497.13290188575246, "mean_abs_error_last_50": 606.5813895224892, "mean_pred_prob": 0.046139605893404226, "mean_pred_prob_last_10": 0.20707505915197544, "mean_pred_prob_last_25": 0.11789499233127572, "mean_pred_prob_last_50": 0.07451909705996514, "mean_token_accuracy": 0.8767560064792633, "step": 44370 }, { "epoch": 0.7889357012070467, "grad_norm": 1.4322088549479375, "learning_rate": 0.0001, "loss": 0.8464, "mean_abs_error": 210.91588680496972, "mean_abs_error_last_10": 63.72047589539871, "mean_abs_error_last_25": 94.90156199131943, "mean_abs_error_last_50": 166.5073847277778, "mean_pred_prob": 0.05088130780495703, "mean_pred_prob_last_10": 0.24876610599458218, "mean_pred_prob_last_25": 0.14030010160058737, "mean_pred_prob_last_50": 0.08598773814737797, "mean_token_accuracy": 0.8767605006694794, "step": 44380 }, { "epoch": 0.7891134695038486, "grad_norm": 2.071357285428462, "learning_rate": 0.0001, "loss": 0.6687, "mean_abs_error": 127.48223560680046, "mean_abs_error_last_10": 33.84367625122618, "mean_abs_error_last_25": 41.27792596093961, "mean_abs_error_last_50": 94.64873795477448, "mean_pred_prob": 0.06290998016484081, "mean_pred_prob_last_10": 0.2919737040996552, "mean_pred_prob_last_25": 0.17222009506076574, "mean_pred_prob_last_50": 0.10679521411657333, "mean_token_accuracy": 0.8733813107013703, "step": 44390 }, { "epoch": 0.7892912378006506, "grad_norm": 3.12938306001094, "learning_rate": 0.0001, "loss": 0.6919, "mean_abs_error": 985.949631541396, "mean_abs_error_last_10": 426.8877440870833, "mean_abs_error_last_25": 623.9172395367566, "mean_abs_error_last_50": 721.3566606378254, "mean_pred_prob": 0.03612614485027734, "mean_pred_prob_last_10": 0.16270101021509617, "mean_pred_prob_last_25": 0.09630639225943013, "mean_pred_prob_last_50": 0.060795536841033025, "mean_token_accuracy": 0.8750736534595489, "step": 44400 }, { "epoch": 0.7894690060974526, "grad_norm": 1.0424311523697212, "learning_rate": 0.0001, "loss": 0.7265, "mean_abs_error": 811.3846141382054, "mean_abs_error_last_10": 305.27671328324794, "mean_abs_error_last_25": 424.3300329237289, "mean_abs_error_last_50": 540.3113289754881, "mean_pred_prob": 0.0446886034013005, "mean_pred_prob_last_10": 0.22667513092746958, "mean_pred_prob_last_25": 0.12573833918140737, "mean_pred_prob_last_50": 0.07557283416390419, "mean_token_accuracy": 0.8811841011047363, "step": 44410 }, { "epoch": 0.7896467743942546, "grad_norm": 1.7100145120640713, "learning_rate": 0.0001, "loss": 0.7655, "mean_abs_error": 1024.974246611263, "mean_abs_error_last_10": 594.1868867744081, "mean_abs_error_last_25": 753.1878883588276, "mean_abs_error_last_50": 767.396565695811, "mean_pred_prob": 0.037451639107894154, "mean_pred_prob_last_10": 0.16493312722304837, "mean_pred_prob_last_25": 0.09904198633157649, "mean_pred_prob_last_50": 0.061580834139022045, "mean_token_accuracy": 0.8677610874176025, "step": 44420 }, { "epoch": 0.7898245426910565, "grad_norm": 1.6248077679980308, "learning_rate": 0.0001, "loss": 0.7645, "mean_abs_error": 1328.4077512517244, "mean_abs_error_last_10": 732.4403491304167, "mean_abs_error_last_25": 810.8518465808231, "mean_abs_error_last_50": 1006.942310870099, "mean_pred_prob": 0.03416281535246526, "mean_pred_prob_last_10": 0.18583451853628502, "mean_pred_prob_last_25": 0.09843377906945534, "mean_pred_prob_last_50": 0.05830913257086649, "mean_token_accuracy": 0.8697512030601502, "step": 44430 }, { "epoch": 0.7900023109878584, "grad_norm": 1.8911397691886376, "learning_rate": 0.0001, "loss": 0.6018, "mean_abs_error": 691.1299056823623, "mean_abs_error_last_10": 330.60806562474033, "mean_abs_error_last_25": 312.2160970115067, "mean_abs_error_last_50": 390.7095561779983, "mean_pred_prob": 0.04758846377662849, "mean_pred_prob_last_10": 0.22790856591309422, "mean_pred_prob_last_25": 0.13088572352426125, "mean_pred_prob_last_50": 0.07969279661774635, "mean_token_accuracy": 0.8761205971240997, "step": 44440 }, { "epoch": 0.7901800792846604, "grad_norm": 1.5407722214864432, "learning_rate": 0.0001, "loss": 0.661, "mean_abs_error": 501.0312652493388, "mean_abs_error_last_10": 355.79952463227903, "mean_abs_error_last_25": 387.21953338689804, "mean_abs_error_last_50": 400.95168820817673, "mean_pred_prob": 0.026815003913361578, "mean_pred_prob_last_10": 0.14269245869945735, "mean_pred_prob_last_25": 0.07549372181529179, "mean_pred_prob_last_50": 0.04444030275335535, "mean_token_accuracy": 0.8757969737052917, "step": 44450 }, { "epoch": 0.7903578475814623, "grad_norm": 1.8082172089900963, "learning_rate": 0.0001, "loss": 0.7969, "mean_abs_error": 330.61871617164803, "mean_abs_error_last_10": 86.78821389250496, "mean_abs_error_last_25": 104.65422301920069, "mean_abs_error_last_50": 163.6965178156275, "mean_pred_prob": 0.04844164215028286, "mean_pred_prob_last_10": 0.20743632558733224, "mean_pred_prob_last_25": 0.12454064264893532, "mean_pred_prob_last_50": 0.07884680982679129, "mean_token_accuracy": 0.8675242960453033, "step": 44460 }, { "epoch": 0.7905356158782643, "grad_norm": 1.7245065211095925, "learning_rate": 0.0001, "loss": 0.6403, "mean_abs_error": 137.35567990568387, "mean_abs_error_last_10": 43.25080064285465, "mean_abs_error_last_25": 58.77198291578186, "mean_abs_error_last_50": 81.34537951315684, "mean_pred_prob": 0.05752798402681947, "mean_pred_prob_last_10": 0.2752516034990549, "mean_pred_prob_last_25": 0.15306773278862237, "mean_pred_prob_last_50": 0.09599662637338043, "mean_token_accuracy": 0.8715398132801055, "step": 44470 }, { "epoch": 0.7907133841750662, "grad_norm": 1.2825182539218252, "learning_rate": 0.0001, "loss": 0.6819, "mean_abs_error": 296.9390394201755, "mean_abs_error_last_10": 70.70055980415535, "mean_abs_error_last_25": 176.13112184619092, "mean_abs_error_last_50": 195.10573088904573, "mean_pred_prob": 0.04370583738200366, "mean_pred_prob_last_10": 0.21683866679668426, "mean_pred_prob_last_25": 0.12041452620178461, "mean_pred_prob_last_50": 0.07186925867572427, "mean_token_accuracy": 0.8725839257240295, "step": 44480 }, { "epoch": 0.7908911524718681, "grad_norm": 2.0149016230412307, "learning_rate": 0.0001, "loss": 0.6598, "mean_abs_error": 351.01161739029874, "mean_abs_error_last_10": 322.7785572618169, "mean_abs_error_last_25": 304.99199368573005, "mean_abs_error_last_50": 342.5873761953052, "mean_pred_prob": 0.036430251272395256, "mean_pred_prob_last_10": 0.16925723385065794, "mean_pred_prob_last_25": 0.09738448224961757, "mean_pred_prob_last_50": 0.060515902983024714, "mean_token_accuracy": 0.8737480759620666, "step": 44490 }, { "epoch": 0.7910689207686701, "grad_norm": 2.719225717524873, "learning_rate": 0.0001, "loss": 0.6234, "mean_abs_error": 361.3366981619931, "mean_abs_error_last_10": 51.796022576594645, "mean_abs_error_last_25": 102.03123031657478, "mean_abs_error_last_50": 175.79418622597845, "mean_pred_prob": 0.052963826432824136, "mean_pred_prob_last_10": 0.25194094702601433, "mean_pred_prob_last_25": 0.14226504638791085, "mean_pred_prob_last_50": 0.08793362146243452, "mean_token_accuracy": 0.889264976978302, "step": 44500 }, { "epoch": 0.791246689065472, "grad_norm": 1.9881530721706968, "learning_rate": 0.0001, "loss": 0.613, "mean_abs_error": 493.3403093872406, "mean_abs_error_last_10": 96.56416711695569, "mean_abs_error_last_25": 173.04060711231546, "mean_abs_error_last_50": 278.90359316450133, "mean_pred_prob": 0.045027651078999045, "mean_pred_prob_last_10": 0.20614196602255105, "mean_pred_prob_last_25": 0.11957535203546285, "mean_pred_prob_last_50": 0.0746681449469179, "mean_token_accuracy": 0.8783509254455566, "step": 44510 }, { "epoch": 0.791424457362274, "grad_norm": 0.9762086023419624, "learning_rate": 0.0001, "loss": 0.7952, "mean_abs_error": 478.3167788396683, "mean_abs_error_last_10": 251.38277367532365, "mean_abs_error_last_25": 301.87396018186917, "mean_abs_error_last_50": 292.1649059845191, "mean_pred_prob": 0.03257007047068328, "mean_pred_prob_last_10": 0.16378984590992332, "mean_pred_prob_last_25": 0.09143641730770469, "mean_pred_prob_last_50": 0.05558150266297161, "mean_token_accuracy": 0.8664612412452698, "step": 44520 }, { "epoch": 0.791602225659076, "grad_norm": 0.7922042926439488, "learning_rate": 0.0001, "loss": 0.6287, "mean_abs_error": 255.5703784503446, "mean_abs_error_last_10": 182.70848780600403, "mean_abs_error_last_25": 248.3191965344281, "mean_abs_error_last_50": 231.5815465533341, "mean_pred_prob": 0.06299370442284272, "mean_pred_prob_last_10": 0.2682681088335812, "mean_pred_prob_last_25": 0.15900083642918617, "mean_pred_prob_last_50": 0.10137193334521725, "mean_token_accuracy": 0.87009596824646, "step": 44530 }, { "epoch": 0.791779993955878, "grad_norm": 1.77635722807572, "learning_rate": 0.0001, "loss": 0.6046, "mean_abs_error": 585.0237312308502, "mean_abs_error_last_10": 162.7717612573054, "mean_abs_error_last_25": 206.83743905019992, "mean_abs_error_last_50": 320.0908970801743, "mean_pred_prob": 0.036126039043301715, "mean_pred_prob_last_10": 0.17818405928555875, "mean_pred_prob_last_25": 0.0996706428588368, "mean_pred_prob_last_50": 0.060716449306346475, "mean_token_accuracy": 0.8773134589195252, "step": 44540 }, { "epoch": 0.7919577622526799, "grad_norm": 1.3914313660290705, "learning_rate": 0.0001, "loss": 0.7385, "mean_abs_error": 901.9939047381546, "mean_abs_error_last_10": 350.2631295258815, "mean_abs_error_last_25": 441.9627129128984, "mean_abs_error_last_50": 574.507548472573, "mean_pred_prob": 0.03817246552789584, "mean_pred_prob_last_10": 0.19160187223460526, "mean_pred_prob_last_25": 0.1072007748822216, "mean_pred_prob_last_50": 0.06615058835013769, "mean_token_accuracy": 0.8751151025295257, "step": 44550 }, { "epoch": 0.7921355305494818, "grad_norm": 1.500474148757468, "learning_rate": 0.0001, "loss": 0.7753, "mean_abs_error": 228.9473705760281, "mean_abs_error_last_10": 106.26761894483562, "mean_abs_error_last_25": 104.40492155009755, "mean_abs_error_last_50": 125.1714219280957, "mean_pred_prob": 0.05744016545359045, "mean_pred_prob_last_10": 0.26888482985086737, "mean_pred_prob_last_25": 0.15687222036067397, "mean_pred_prob_last_50": 0.09600405253004282, "mean_token_accuracy": 0.868863296508789, "step": 44560 }, { "epoch": 0.7923132988462838, "grad_norm": 2.244973626307455, "learning_rate": 0.0001, "loss": 0.656, "mean_abs_error": 409.0780808302341, "mean_abs_error_last_10": 88.77327334418581, "mean_abs_error_last_25": 166.47837307698302, "mean_abs_error_last_50": 257.8739818776641, "mean_pred_prob": 0.024471658864058554, "mean_pred_prob_last_10": 0.1264157511293888, "mean_pred_prob_last_25": 0.06710887616500258, "mean_pred_prob_last_50": 0.041039282968267796, "mean_token_accuracy": 0.8807132363319397, "step": 44570 }, { "epoch": 0.7924910671430857, "grad_norm": 2.0700641299727307, "learning_rate": 0.0001, "loss": 0.6622, "mean_abs_error": 102.6050153157498, "mean_abs_error_last_10": 21.866546184978144, "mean_abs_error_last_25": 42.698380782932965, "mean_abs_error_last_50": 74.93876744438337, "mean_pred_prob": 0.04858974954113364, "mean_pred_prob_last_10": 0.2275795992463827, "mean_pred_prob_last_25": 0.12703354246914386, "mean_pred_prob_last_50": 0.07929490078240634, "mean_token_accuracy": 0.88487508893013, "step": 44580 }, { "epoch": 0.7926688354398876, "grad_norm": 2.3779405380984806, "learning_rate": 0.0001, "loss": 0.853, "mean_abs_error": 777.2430689118413, "mean_abs_error_last_10": 143.6821019403435, "mean_abs_error_last_25": 251.2677813683944, "mean_abs_error_last_50": 417.19966433926874, "mean_pred_prob": 0.03520617975154892, "mean_pred_prob_last_10": 0.18177154189907013, "mean_pred_prob_last_25": 0.10013751721708104, "mean_pred_prob_last_50": 0.06002595481695607, "mean_token_accuracy": 0.8733462929725647, "step": 44590 }, { "epoch": 0.7928466037366896, "grad_norm": 1.6309076516971877, "learning_rate": 0.0001, "loss": 0.7409, "mean_abs_error": 343.6411837041103, "mean_abs_error_last_10": 84.37551592338758, "mean_abs_error_last_25": 169.76492611786207, "mean_abs_error_last_50": 226.5621524590243, "mean_pred_prob": 0.04110330129042268, "mean_pred_prob_last_10": 0.2064372193068266, "mean_pred_prob_last_25": 0.11287199473008513, "mean_pred_prob_last_50": 0.06825034492649137, "mean_token_accuracy": 0.8677526593208313, "step": 44600 }, { "epoch": 0.7930243720334915, "grad_norm": 0.9165621662628809, "learning_rate": 0.0001, "loss": 0.7572, "mean_abs_error": 1572.0998326318236, "mean_abs_error_last_10": 611.0854749817943, "mean_abs_error_last_25": 724.2694651518221, "mean_abs_error_last_50": 1033.254748413727, "mean_pred_prob": 0.025598760855791623, "mean_pred_prob_last_10": 0.12179276464739815, "mean_pred_prob_last_25": 0.06833361189346761, "mean_pred_prob_last_50": 0.04251053406333085, "mean_token_accuracy": 0.8768813729286193, "step": 44610 }, { "epoch": 0.7932021403302935, "grad_norm": 0.951825761715941, "learning_rate": 0.0001, "loss": 0.6488, "mean_abs_error": 158.49236284215286, "mean_abs_error_last_10": 53.6254881087237, "mean_abs_error_last_25": 71.02709347549343, "mean_abs_error_last_50": 88.49765587816967, "mean_pred_prob": 0.05648630480282009, "mean_pred_prob_last_10": 0.2795213395729661, "mean_pred_prob_last_25": 0.15566661898046733, "mean_pred_prob_last_50": 0.09569233721122146, "mean_token_accuracy": 0.888526439666748, "step": 44620 }, { "epoch": 0.7933799086270954, "grad_norm": 1.4367438547532505, "learning_rate": 0.0001, "loss": 0.6506, "mean_abs_error": 71.82786450699041, "mean_abs_error_last_10": 23.42819181323013, "mean_abs_error_last_25": 35.18269619029523, "mean_abs_error_last_50": 44.277634939174774, "mean_pred_prob": 0.06447847373783588, "mean_pred_prob_last_10": 0.30170725360512735, "mean_pred_prob_last_25": 0.17575752697885036, "mean_pred_prob_last_50": 0.10876112151890993, "mean_token_accuracy": 0.8742218017578125, "step": 44630 }, { "epoch": 0.7935576769238973, "grad_norm": 2.711135367071229, "learning_rate": 0.0001, "loss": 0.5909, "mean_abs_error": 480.2467640235903, "mean_abs_error_last_10": 146.02494480263445, "mean_abs_error_last_25": 217.8142672647407, "mean_abs_error_last_50": 315.86332032008045, "mean_pred_prob": 0.04706004820764065, "mean_pred_prob_last_10": 0.21635989462956787, "mean_pred_prob_last_25": 0.12609245185740292, "mean_pred_prob_last_50": 0.07829717871500179, "mean_token_accuracy": 0.8841392159461975, "step": 44640 }, { "epoch": 0.7937354452206994, "grad_norm": 1.6203443997565783, "learning_rate": 0.0001, "loss": 0.7822, "mean_abs_error": 858.9947045845513, "mean_abs_error_last_10": 305.8755664976073, "mean_abs_error_last_25": 347.22984363330033, "mean_abs_error_last_50": 471.6316848059381, "mean_pred_prob": 0.04156591033679433, "mean_pred_prob_last_10": 0.2001689973345492, "mean_pred_prob_last_25": 0.11270484671113082, "mean_pred_prob_last_50": 0.06997698315535672, "mean_token_accuracy": 0.8721648693084717, "step": 44650 }, { "epoch": 0.7939132135175013, "grad_norm": 1.5229375473168212, "learning_rate": 0.0001, "loss": 0.6534, "mean_abs_error": 512.6982507400189, "mean_abs_error_last_10": 150.0184823577529, "mean_abs_error_last_25": 210.02522794499413, "mean_abs_error_last_50": 312.37027586554893, "mean_pred_prob": 0.046821981039829555, "mean_pred_prob_last_10": 0.21607048829318956, "mean_pred_prob_last_25": 0.12532811674755068, "mean_pred_prob_last_50": 0.07695918276440353, "mean_token_accuracy": 0.8793514251708985, "step": 44660 }, { "epoch": 0.7940909818143033, "grad_norm": 1.9531843399805768, "learning_rate": 0.0001, "loss": 0.7636, "mean_abs_error": 548.8921798185512, "mean_abs_error_last_10": 147.90792651332058, "mean_abs_error_last_25": 198.15203524483243, "mean_abs_error_last_50": 317.6320098154266, "mean_pred_prob": 0.033205989393172784, "mean_pred_prob_last_10": 0.1579433414619416, "mean_pred_prob_last_25": 0.08884970344370231, "mean_pred_prob_last_50": 0.054427063895855096, "mean_token_accuracy": 0.8727755904197693, "step": 44670 }, { "epoch": 0.7942687501111052, "grad_norm": 1.9816858041585281, "learning_rate": 0.0001, "loss": 0.8439, "mean_abs_error": 911.737538695844, "mean_abs_error_last_10": 549.7741433595869, "mean_abs_error_last_25": 632.6810364938739, "mean_abs_error_last_50": 715.0464255806346, "mean_pred_prob": 0.03734688111580908, "mean_pred_prob_last_10": 0.1701473049412016, "mean_pred_prob_last_25": 0.09864985158201307, "mean_pred_prob_last_50": 0.060963436280144376, "mean_token_accuracy": 0.8615249335765839, "step": 44680 }, { "epoch": 0.7944465184079071, "grad_norm": 1.3580774694287068, "learning_rate": 0.0001, "loss": 0.7483, "mean_abs_error": 445.72319435495217, "mean_abs_error_last_10": 137.22530592893682, "mean_abs_error_last_25": 167.20592232976347, "mean_abs_error_last_50": 211.70225412616264, "mean_pred_prob": 0.04582849554135464, "mean_pred_prob_last_10": 0.21352049320703373, "mean_pred_prob_last_25": 0.12407134450040758, "mean_pred_prob_last_50": 0.07692590825026854, "mean_token_accuracy": 0.8712096810340881, "step": 44690 }, { "epoch": 0.7946242867047091, "grad_norm": 2.8627555374760285, "learning_rate": 0.0001, "loss": 0.773, "mean_abs_error": 1176.7571603302938, "mean_abs_error_last_10": 635.0969279983672, "mean_abs_error_last_25": 644.1839661079136, "mean_abs_error_last_50": 832.8471689676019, "mean_pred_prob": 0.029805158750968987, "mean_pred_prob_last_10": 0.12889456176781094, "mean_pred_prob_last_25": 0.07823780197068117, "mean_pred_prob_last_50": 0.04848250561626628, "mean_token_accuracy": 0.8674505472183227, "step": 44700 }, { "epoch": 0.794802055001511, "grad_norm": 2.5509981199910796, "learning_rate": 0.0001, "loss": 0.6238, "mean_abs_error": 1036.6304695914791, "mean_abs_error_last_10": 410.33840234917295, "mean_abs_error_last_25": 559.9996847873999, "mean_abs_error_last_50": 743.060738236774, "mean_pred_prob": 0.03858384441991802, "mean_pred_prob_last_10": 0.16001097995904273, "mean_pred_prob_last_25": 0.09961441505001858, "mean_pred_prob_last_50": 0.06346263412851841, "mean_token_accuracy": 0.878759115934372, "step": 44710 }, { "epoch": 0.794979823298313, "grad_norm": 1.1431488687555411, "learning_rate": 0.0001, "loss": 0.5883, "mean_abs_error": 72.57275990368234, "mean_abs_error_last_10": 15.275334561701433, "mean_abs_error_last_25": 33.8328882554829, "mean_abs_error_last_50": 47.224111561352316, "mean_pred_prob": 0.058292390406131746, "mean_pred_prob_last_10": 0.270910382270813, "mean_pred_prob_last_25": 0.15382271371781825, "mean_pred_prob_last_50": 0.09646616205573082, "mean_token_accuracy": 0.881118792295456, "step": 44720 }, { "epoch": 0.7951575915951149, "grad_norm": 1.8586733956667747, "learning_rate": 0.0001, "loss": 0.5445, "mean_abs_error": 63.838895929678095, "mean_abs_error_last_10": 5.075691224200737, "mean_abs_error_last_25": 15.714637489081568, "mean_abs_error_last_50": 32.997498154777055, "mean_pred_prob": 0.06375913172960282, "mean_pred_prob_last_10": 0.3117729902267456, "mean_pred_prob_last_25": 0.17228840067982673, "mean_pred_prob_last_50": 0.10626938641071319, "mean_token_accuracy": 0.8749954104423523, "step": 44730 }, { "epoch": 0.7953353598919168, "grad_norm": 1.811784093618093, "learning_rate": 0.0001, "loss": 0.6933, "mean_abs_error": 541.2450832087501, "mean_abs_error_last_10": 188.24306967287362, "mean_abs_error_last_25": 201.46025720070702, "mean_abs_error_last_50": 279.48705023220657, "mean_pred_prob": 0.0503082008799538, "mean_pred_prob_last_10": 0.2122272617998533, "mean_pred_prob_last_25": 0.1280061430297792, "mean_pred_prob_last_50": 0.08184930949937552, "mean_token_accuracy": 0.8747044026851654, "step": 44740 }, { "epoch": 0.7955131281887188, "grad_norm": 2.6490110226697534, "learning_rate": 0.0001, "loss": 0.7886, "mean_abs_error": 700.858251835523, "mean_abs_error_last_10": 135.85359943889213, "mean_abs_error_last_25": 306.62272243962167, "mean_abs_error_last_50": 488.797254268785, "mean_pred_prob": 0.045302821265067905, "mean_pred_prob_last_10": 0.21373590881703422, "mean_pred_prob_last_25": 0.11963928323239088, "mean_pred_prob_last_50": 0.07574694019858726, "mean_token_accuracy": 0.8702381730079651, "step": 44750 }, { "epoch": 0.7956908964855208, "grad_norm": 1.3572024619380294, "learning_rate": 0.0001, "loss": 0.7406, "mean_abs_error": 222.47671211861115, "mean_abs_error_last_10": 70.61589521494378, "mean_abs_error_last_25": 112.16597069607728, "mean_abs_error_last_50": 168.61166378526664, "mean_pred_prob": 0.03539975668536499, "mean_pred_prob_last_10": 0.1838637484703213, "mean_pred_prob_last_25": 0.09919339083135129, "mean_pred_prob_last_50": 0.059446351043879984, "mean_token_accuracy": 0.8710637271404267, "step": 44760 }, { "epoch": 0.7958686647823228, "grad_norm": 4.846624008171482, "learning_rate": 0.0001, "loss": 0.6875, "mean_abs_error": 647.1265522944763, "mean_abs_error_last_10": 230.72316093568824, "mean_abs_error_last_25": 348.51595998247, "mean_abs_error_last_50": 469.5968930308559, "mean_pred_prob": 0.03837815227161627, "mean_pred_prob_last_10": 0.18064553079893814, "mean_pred_prob_last_25": 0.10224902386544273, "mean_pred_prob_last_50": 0.0634190823417157, "mean_token_accuracy": 0.8674846708774566, "step": 44770 }, { "epoch": 0.7960464330791247, "grad_norm": 1.4880592958272403, "learning_rate": 0.0001, "loss": 0.7714, "mean_abs_error": 1829.9829302353912, "mean_abs_error_last_10": 964.9147978864934, "mean_abs_error_last_25": 1074.6886829936516, "mean_abs_error_last_50": 1322.3068335157982, "mean_pred_prob": 0.04438139730555122, "mean_pred_prob_last_10": 0.19958782326139043, "mean_pred_prob_last_25": 0.11773378800717182, "mean_pred_prob_last_50": 0.07404624662449351, "mean_token_accuracy": 0.882365757226944, "step": 44780 }, { "epoch": 0.7962242013759266, "grad_norm": 1.3824584803813307, "learning_rate": 0.0001, "loss": 0.7857, "mean_abs_error": 841.0540246370231, "mean_abs_error_last_10": 100.92566105104686, "mean_abs_error_last_25": 205.41682919236808, "mean_abs_error_last_50": 420.97716452052043, "mean_pred_prob": 0.04326465490739793, "mean_pred_prob_last_10": 0.18582147350534797, "mean_pred_prob_last_25": 0.1137778487522155, "mean_pred_prob_last_50": 0.07195843299850821, "mean_token_accuracy": 0.8662971198558808, "step": 44790 }, { "epoch": 0.7964019696727286, "grad_norm": 1.6847301751836472, "learning_rate": 0.0001, "loss": 0.6911, "mean_abs_error": 169.32275230359738, "mean_abs_error_last_10": 36.89452919648535, "mean_abs_error_last_25": 65.30895999275853, "mean_abs_error_last_50": 136.13270278733336, "mean_pred_prob": 0.05986058604903519, "mean_pred_prob_last_10": 0.27724353559315207, "mean_pred_prob_last_25": 0.159693000651896, "mean_pred_prob_last_50": 0.09836165979504585, "mean_token_accuracy": 0.8802783906459808, "step": 44800 }, { "epoch": 0.7965797379695305, "grad_norm": 1.7147909900981304, "learning_rate": 0.0001, "loss": 0.6897, "mean_abs_error": 376.3032435382962, "mean_abs_error_last_10": 101.68157032688417, "mean_abs_error_last_25": 134.2551905042188, "mean_abs_error_last_50": 187.66624094048913, "mean_pred_prob": 0.03542537081521004, "mean_pred_prob_last_10": 0.18426947765983642, "mean_pred_prob_last_25": 0.10024339512456208, "mean_pred_prob_last_50": 0.05970601920271292, "mean_token_accuracy": 0.8732890963554383, "step": 44810 }, { "epoch": 0.7967575062663325, "grad_norm": 1.5614350960409151, "learning_rate": 0.0001, "loss": 0.7637, "mean_abs_error": 177.6710982313021, "mean_abs_error_last_10": 39.88692686904327, "mean_abs_error_last_25": 89.45012836550531, "mean_abs_error_last_50": 114.24599179746181, "mean_pred_prob": 0.04632381573319435, "mean_pred_prob_last_10": 0.23866075947880744, "mean_pred_prob_last_25": 0.1299609549343586, "mean_pred_prob_last_50": 0.07773297131061555, "mean_token_accuracy": 0.864241772890091, "step": 44820 }, { "epoch": 0.7969352745631344, "grad_norm": 2.230999150174112, "learning_rate": 0.0001, "loss": 0.7399, "mean_abs_error": 374.18955164010447, "mean_abs_error_last_10": 86.69702187337468, "mean_abs_error_last_25": 176.57769677629207, "mean_abs_error_last_50": 247.71421634752613, "mean_pred_prob": 0.02983468126039952, "mean_pred_prob_last_10": 0.14776026643812656, "mean_pred_prob_last_25": 0.08096821773797273, "mean_pred_prob_last_50": 0.04899006900377571, "mean_token_accuracy": 0.8729858100414276, "step": 44830 }, { "epoch": 0.7971130428599363, "grad_norm": 2.2294781422395196, "learning_rate": 0.0001, "loss": 0.684, "mean_abs_error": 727.7181833665343, "mean_abs_error_last_10": 293.48529144886777, "mean_abs_error_last_25": 356.3722570727567, "mean_abs_error_last_50": 464.0376468448747, "mean_pred_prob": 0.02087593004689552, "mean_pred_prob_last_10": 0.11559533412801101, "mean_pred_prob_last_25": 0.05699874756392091, "mean_pred_prob_last_50": 0.033913929533446206, "mean_token_accuracy": 0.8673468470573426, "step": 44840 }, { "epoch": 0.7972908111567383, "grad_norm": 0.8283080560776268, "learning_rate": 0.0001, "loss": 0.6769, "mean_abs_error": 580.3953329443249, "mean_abs_error_last_10": 153.01546234767298, "mean_abs_error_last_25": 199.47210163660378, "mean_abs_error_last_50": 337.4409914443781, "mean_pred_prob": 0.023256822366965933, "mean_pred_prob_last_10": 0.12100041350349784, "mean_pred_prob_last_25": 0.06364471636479721, "mean_pred_prob_last_50": 0.039084493729751556, "mean_token_accuracy": 0.8780130684375763, "step": 44850 }, { "epoch": 0.7974685794535402, "grad_norm": 1.361155950958593, "learning_rate": 0.0001, "loss": 0.7687, "mean_abs_error": 255.192744885653, "mean_abs_error_last_10": 95.48015309815064, "mean_abs_error_last_25": 173.61699764316066, "mean_abs_error_last_50": 178.21219268127433, "mean_pred_prob": 0.034083450818434356, "mean_pred_prob_last_10": 0.1686256930232048, "mean_pred_prob_last_25": 0.09251335225999355, "mean_pred_prob_last_50": 0.056809703912585975, "mean_token_accuracy": 0.8731161475181579, "step": 44860 }, { "epoch": 0.7976463477503422, "grad_norm": 1.5247195529001476, "learning_rate": 0.0001, "loss": 0.7718, "mean_abs_error": 669.9088047693191, "mean_abs_error_last_10": 383.9253849238668, "mean_abs_error_last_25": 492.7035690809677, "mean_abs_error_last_50": 487.9362994934319, "mean_pred_prob": 0.01247858750866726, "mean_pred_prob_last_10": 0.05541651889216155, "mean_pred_prob_last_25": 0.03086933174636215, "mean_pred_prob_last_50": 0.019945769954938443, "mean_token_accuracy": 0.8717777490615845, "step": 44870 }, { "epoch": 0.7978241160471442, "grad_norm": 1.6792363682792206, "learning_rate": 0.0001, "loss": 0.7, "mean_abs_error": 326.68715626611777, "mean_abs_error_last_10": 73.36255212751209, "mean_abs_error_last_25": 96.28450376032234, "mean_abs_error_last_50": 139.78001380104703, "mean_pred_prob": 0.030820524389855563, "mean_pred_prob_last_10": 0.1519043054431677, "mean_pred_prob_last_25": 0.0834784229286015, "mean_pred_prob_last_50": 0.05121942316181958, "mean_token_accuracy": 0.8743878304958344, "step": 44880 }, { "epoch": 0.7980018843439461, "grad_norm": 1.2723820810705564, "learning_rate": 0.0001, "loss": 0.6261, "mean_abs_error": 324.5707803604983, "mean_abs_error_last_10": 85.03035273259611, "mean_abs_error_last_25": 95.62745822298596, "mean_abs_error_last_50": 202.38614798783124, "mean_pred_prob": 0.04113689525984228, "mean_pred_prob_last_10": 0.21058576926589012, "mean_pred_prob_last_25": 0.11243654154241085, "mean_pred_prob_last_50": 0.06933890925720335, "mean_token_accuracy": 0.8755627930164337, "step": 44890 }, { "epoch": 0.7981796526407481, "grad_norm": 1.5109210648893692, "learning_rate": 0.0001, "loss": 0.821, "mean_abs_error": 452.7068697785556, "mean_abs_error_last_10": 289.79855302086264, "mean_abs_error_last_25": 302.1178296255597, "mean_abs_error_last_50": 332.4300268209105, "mean_pred_prob": 0.02598233022727072, "mean_pred_prob_last_10": 0.11383678037673235, "mean_pred_prob_last_25": 0.06912797912955285, "mean_pred_prob_last_50": 0.04251088285818696, "mean_token_accuracy": 0.8675342559814453, "step": 44900 }, { "epoch": 0.79835742093755, "grad_norm": 2.584071313102383, "learning_rate": 0.0001, "loss": 0.7679, "mean_abs_error": 1128.9910298939574, "mean_abs_error_last_10": 471.81706756834694, "mean_abs_error_last_25": 568.9921743877824, "mean_abs_error_last_50": 765.4170800926595, "mean_pred_prob": 0.03256764759134967, "mean_pred_prob_last_10": 0.16240518493286799, "mean_pred_prob_last_25": 0.08612888507777824, "mean_pred_prob_last_50": 0.05420054747228278, "mean_token_accuracy": 0.8868139386177063, "step": 44910 }, { "epoch": 0.798535189234352, "grad_norm": 1.2788390779931147, "learning_rate": 0.0001, "loss": 0.7207, "mean_abs_error": 991.0268593306242, "mean_abs_error_last_10": 556.8728526661522, "mean_abs_error_last_25": 648.616380899919, "mean_abs_error_last_50": 763.3187958977173, "mean_pred_prob": 0.023854421242140232, "mean_pred_prob_last_10": 0.12541222096187993, "mean_pred_prob_last_25": 0.06615598711650819, "mean_pred_prob_last_50": 0.03993489944259636, "mean_token_accuracy": 0.865682864189148, "step": 44920 }, { "epoch": 0.7987129575311539, "grad_norm": 1.1662585069546543, "learning_rate": 0.0001, "loss": 0.8581, "mean_abs_error": 1342.3596455264885, "mean_abs_error_last_10": 454.3464890627373, "mean_abs_error_last_25": 684.2377514066283, "mean_abs_error_last_50": 939.2267725604622, "mean_pred_prob": 0.03056585964222904, "mean_pred_prob_last_10": 0.1202177645813208, "mean_pred_prob_last_25": 0.08044497074733954, "mean_pred_prob_last_50": 0.050194362353067845, "mean_token_accuracy": 0.874772709608078, "step": 44930 }, { "epoch": 0.7988907258279558, "grad_norm": 3.113492261635662, "learning_rate": 0.0001, "loss": 0.7154, "mean_abs_error": 178.85354246059507, "mean_abs_error_last_10": 12.305737550216282, "mean_abs_error_last_25": 34.67091186247217, "mean_abs_error_last_50": 70.11524217466147, "mean_pred_prob": 0.04698549290187657, "mean_pred_prob_last_10": 0.24268031530082226, "mean_pred_prob_last_25": 0.13629597723484038, "mean_pred_prob_last_50": 0.0803834238089621, "mean_token_accuracy": 0.87530597448349, "step": 44940 }, { "epoch": 0.7990684941247578, "grad_norm": 2.221200800849684, "learning_rate": 0.0001, "loss": 1.0994, "mean_abs_error": 534.6353013371938, "mean_abs_error_last_10": 187.00476101072584, "mean_abs_error_last_25": 332.54198638827216, "mean_abs_error_last_50": 371.23763774985105, "mean_pred_prob": 0.04797220570035279, "mean_pred_prob_last_10": 0.19648701287806034, "mean_pred_prob_last_25": 0.1219428949058056, "mean_pred_prob_last_50": 0.0762365348637104, "mean_token_accuracy": 0.8712182223796845, "step": 44950 }, { "epoch": 0.7992462624215597, "grad_norm": 1.1349051147754623, "learning_rate": 0.0001, "loss": 0.6547, "mean_abs_error": 712.2652727329781, "mean_abs_error_last_10": 210.2467482926661, "mean_abs_error_last_25": 272.08611487570744, "mean_abs_error_last_50": 425.0645116028526, "mean_pred_prob": 0.04889134698314592, "mean_pred_prob_last_10": 0.2344837619632017, "mean_pred_prob_last_25": 0.1328496073372662, "mean_pred_prob_last_50": 0.08266129539697431, "mean_token_accuracy": 0.8728669822216034, "step": 44960 }, { "epoch": 0.7994240307183617, "grad_norm": 1.0922660101047752, "learning_rate": 0.0001, "loss": 0.6085, "mean_abs_error": 794.5762058243038, "mean_abs_error_last_10": 346.9274700067631, "mean_abs_error_last_25": 407.7806850368412, "mean_abs_error_last_50": 523.2756696994264, "mean_pred_prob": 0.037276392517378555, "mean_pred_prob_last_10": 0.19605450259405188, "mean_pred_prob_last_25": 0.10350342454039492, "mean_pred_prob_last_50": 0.06262461923179216, "mean_token_accuracy": 0.8748183429241181, "step": 44970 }, { "epoch": 0.7996017990151636, "grad_norm": 0.8452390343818061, "learning_rate": 0.0001, "loss": 0.5997, "mean_abs_error": 552.5755006156744, "mean_abs_error_last_10": 240.39578454497934, "mean_abs_error_last_25": 267.43798389048754, "mean_abs_error_last_50": 343.7319233832054, "mean_pred_prob": 0.037749057402834296, "mean_pred_prob_last_10": 0.18889775704592465, "mean_pred_prob_last_25": 0.10389658610802144, "mean_pred_prob_last_50": 0.06278451569378377, "mean_token_accuracy": 0.8745712757110595, "step": 44980 }, { "epoch": 0.7997795673119655, "grad_norm": 2.1352084891220855, "learning_rate": 0.0001, "loss": 0.6073, "mean_abs_error": 312.2870800633407, "mean_abs_error_last_10": 60.83060818182861, "mean_abs_error_last_25": 105.0791089996881, "mean_abs_error_last_50": 139.91296817056894, "mean_pred_prob": 0.03318536020815373, "mean_pred_prob_last_10": 0.1665725488215685, "mean_pred_prob_last_25": 0.09284813143312931, "mean_pred_prob_last_50": 0.05662021208554506, "mean_token_accuracy": 0.8829913020133973, "step": 44990 }, { "epoch": 0.7999573356087676, "grad_norm": 2.3698073747215176, "learning_rate": 0.0001, "loss": 0.7155, "mean_abs_error": 357.19722690662127, "mean_abs_error_last_10": 37.20352716661824, "mean_abs_error_last_25": 79.62411523213797, "mean_abs_error_last_50": 180.4831150344704, "mean_pred_prob": 0.05191383210476488, "mean_pred_prob_last_10": 0.2687845489010215, "mean_pred_prob_last_25": 0.1440277243964374, "mean_pred_prob_last_50": 0.08718019789084792, "mean_token_accuracy": 0.879491925239563, "step": 45000 }, { "epoch": 0.8001351039055695, "grad_norm": 1.7533067718669004, "learning_rate": 0.0001, "loss": 0.7839, "mean_abs_error": 1872.589870621523, "mean_abs_error_last_10": 1046.3064133547102, "mean_abs_error_last_25": 1206.556516699608, "mean_abs_error_last_50": 1473.6941168053368, "mean_pred_prob": 0.04185362469725078, "mean_pred_prob_last_10": 0.18679668494150975, "mean_pred_prob_last_25": 0.11124783281993586, "mean_pred_prob_last_50": 0.06950609822670231, "mean_token_accuracy": 0.8695224761962891, "step": 45010 }, { "epoch": 0.8003128722023715, "grad_norm": 0.8721897749689492, "learning_rate": 0.0001, "loss": 0.6321, "mean_abs_error": 719.3521318046967, "mean_abs_error_last_10": 362.2063178990439, "mean_abs_error_last_25": 353.445433449628, "mean_abs_error_last_50": 405.7602548208625, "mean_pred_prob": 0.04137900990317576, "mean_pred_prob_last_10": 0.19787169728660955, "mean_pred_prob_last_25": 0.11071269382955506, "mean_pred_prob_last_50": 0.06884456092957407, "mean_token_accuracy": 0.8728957533836365, "step": 45020 }, { "epoch": 0.8004906404991734, "grad_norm": 1.8459715198428588, "learning_rate": 0.0001, "loss": 0.7733, "mean_abs_error": 480.29619739911425, "mean_abs_error_last_10": 233.91937264428552, "mean_abs_error_last_25": 307.9271685142156, "mean_abs_error_last_50": 362.8383668639361, "mean_pred_prob": 0.03627127150539309, "mean_pred_prob_last_10": 0.18288086634129286, "mean_pred_prob_last_25": 0.1020451981574297, "mean_pred_prob_last_50": 0.06216298122890294, "mean_token_accuracy": 0.8662541091442109, "step": 45030 }, { "epoch": 0.8006684087959753, "grad_norm": 1.340707452472809, "learning_rate": 0.0001, "loss": 0.6837, "mean_abs_error": 940.7614438655974, "mean_abs_error_last_10": 481.4307145726451, "mean_abs_error_last_25": 563.6104059622705, "mean_abs_error_last_50": 681.6312116055328, "mean_pred_prob": 0.029969198642356788, "mean_pred_prob_last_10": 0.16101433816365898, "mean_pred_prob_last_25": 0.08441721379931551, "mean_pred_prob_last_50": 0.050306898567941974, "mean_token_accuracy": 0.8642734587192535, "step": 45040 }, { "epoch": 0.8008461770927773, "grad_norm": 1.5688824373703625, "learning_rate": 0.0001, "loss": 0.5988, "mean_abs_error": 275.95557224532575, "mean_abs_error_last_10": 98.71092203449903, "mean_abs_error_last_25": 128.66736595368138, "mean_abs_error_last_50": 190.03213956333843, "mean_pred_prob": 0.06013110593194142, "mean_pred_prob_last_10": 0.2627888503135182, "mean_pred_prob_last_25": 0.15200519376667215, "mean_pred_prob_last_50": 0.09752972835558467, "mean_token_accuracy": 0.872653192281723, "step": 45050 }, { "epoch": 0.8010239453895792, "grad_norm": 1.8682022928250277, "learning_rate": 0.0001, "loss": 0.6052, "mean_abs_error": 171.162530729347, "mean_abs_error_last_10": 48.378058915291135, "mean_abs_error_last_25": 62.16669106288357, "mean_abs_error_last_50": 96.48302278985621, "mean_pred_prob": 0.04717511544004083, "mean_pred_prob_last_10": 0.22472903765738012, "mean_pred_prob_last_25": 0.13007343448698522, "mean_pred_prob_last_50": 0.07965058507397771, "mean_token_accuracy": 0.876415878534317, "step": 45060 }, { "epoch": 0.8012017136863812, "grad_norm": 1.2019033856262977, "learning_rate": 0.0001, "loss": 0.6049, "mean_abs_error": 483.1197622232113, "mean_abs_error_last_10": 152.69476774144837, "mean_abs_error_last_25": 286.0076620175165, "mean_abs_error_last_50": 373.87752335848234, "mean_pred_prob": 0.05500028035603464, "mean_pred_prob_last_10": 0.21817947370000185, "mean_pred_prob_last_25": 0.1381363856839016, "mean_pred_prob_last_50": 0.08857747132424265, "mean_token_accuracy": 0.8722898662090302, "step": 45070 }, { "epoch": 0.8013794819831831, "grad_norm": 2.016290973894753, "learning_rate": 0.0001, "loss": 0.8611, "mean_abs_error": 589.9914415690662, "mean_abs_error_last_10": 178.24078116230885, "mean_abs_error_last_25": 233.1799054071226, "mean_abs_error_last_50": 327.3110383315427, "mean_pred_prob": 0.035717458097497004, "mean_pred_prob_last_10": 0.1860906891291961, "mean_pred_prob_last_25": 0.10006765854777769, "mean_pred_prob_last_50": 0.060656523163197563, "mean_token_accuracy": 0.8659489810466766, "step": 45080 }, { "epoch": 0.801557250279985, "grad_norm": 1.129130470812853, "learning_rate": 0.0001, "loss": 0.8111, "mean_abs_error": 993.2501238781966, "mean_abs_error_last_10": 364.2165863777993, "mean_abs_error_last_25": 496.31407267808993, "mean_abs_error_last_50": 678.3479640678318, "mean_pred_prob": 0.030113606512895786, "mean_pred_prob_last_10": 0.15692368975142018, "mean_pred_prob_last_25": 0.08585121918586083, "mean_pred_prob_last_50": 0.051604147144826126, "mean_token_accuracy": 0.8713627636432648, "step": 45090 }, { "epoch": 0.801735018576787, "grad_norm": 0.9668997057871296, "learning_rate": 0.0001, "loss": 0.7178, "mean_abs_error": 956.0567879424686, "mean_abs_error_last_10": 211.24014803623763, "mean_abs_error_last_25": 339.1028031680403, "mean_abs_error_last_50": 538.3876451560525, "mean_pred_prob": 0.03205604510731064, "mean_pred_prob_last_10": 0.15582521910546349, "mean_pred_prob_last_25": 0.08642757015768439, "mean_pred_prob_last_50": 0.054725432174745944, "mean_token_accuracy": 0.8757422268390656, "step": 45100 }, { "epoch": 0.8019127868735889, "grad_norm": 2.5414701246395515, "learning_rate": 0.0001, "loss": 0.7284, "mean_abs_error": 391.5594184223648, "mean_abs_error_last_10": 167.48808484363377, "mean_abs_error_last_25": 224.5595886427472, "mean_abs_error_last_50": 295.7824068601729, "mean_pred_prob": 0.03602837361395359, "mean_pred_prob_last_10": 0.18650652449578048, "mean_pred_prob_last_25": 0.10275683077052236, "mean_pred_prob_last_50": 0.06138401497155428, "mean_token_accuracy": 0.8782472014427185, "step": 45110 }, { "epoch": 0.802090555170391, "grad_norm": 2.986360771707115, "learning_rate": 0.0001, "loss": 0.8155, "mean_abs_error": 674.3737394416737, "mean_abs_error_last_10": 152.9981356497148, "mean_abs_error_last_25": 187.1607859017516, "mean_abs_error_last_50": 310.0073966469014, "mean_pred_prob": 0.03496244241250679, "mean_pred_prob_last_10": 0.16071288739331066, "mean_pred_prob_last_25": 0.09394175773486495, "mean_pred_prob_last_50": 0.0576928069582209, "mean_token_accuracy": 0.8722395360469818, "step": 45120 }, { "epoch": 0.8022683234671929, "grad_norm": 2.846255708732809, "learning_rate": 0.0001, "loss": 0.6872, "mean_abs_error": 735.8892384531443, "mean_abs_error_last_10": 394.2710197347, "mean_abs_error_last_25": 453.6562367010218, "mean_abs_error_last_50": 533.36542988169, "mean_pred_prob": 0.040169526170939204, "mean_pred_prob_last_10": 0.19091594821074978, "mean_pred_prob_last_25": 0.10553195532993413, "mean_pred_prob_last_50": 0.06590598286129534, "mean_token_accuracy": 0.8709259688854217, "step": 45130 }, { "epoch": 0.8024460917639948, "grad_norm": 1.6301018022516915, "learning_rate": 0.0001, "loss": 0.7005, "mean_abs_error": 519.0371479986211, "mean_abs_error_last_10": 213.2305544430473, "mean_abs_error_last_25": 240.06114294514094, "mean_abs_error_last_50": 309.61519979236635, "mean_pred_prob": 0.03308536814292893, "mean_pred_prob_last_10": 0.17482734918594361, "mean_pred_prob_last_25": 0.093492373614572, "mean_pred_prob_last_50": 0.05679995849495754, "mean_token_accuracy": 0.8653128266334533, "step": 45140 }, { "epoch": 0.8026238600607968, "grad_norm": 1.0241385811038546, "learning_rate": 0.0001, "loss": 0.6285, "mean_abs_error": 420.57700888681956, "mean_abs_error_last_10": 204.67708343494624, "mean_abs_error_last_25": 219.3217975811027, "mean_abs_error_last_50": 275.1022202134858, "mean_pred_prob": 0.04702249219408259, "mean_pred_prob_last_10": 0.22865426195785404, "mean_pred_prob_last_25": 0.1291300522047095, "mean_pred_prob_last_50": 0.07912635012762621, "mean_token_accuracy": 0.8772432327270507, "step": 45150 }, { "epoch": 0.8028016283575987, "grad_norm": 1.2982069118918067, "learning_rate": 0.0001, "loss": 0.8795, "mean_abs_error": 439.41764693722854, "mean_abs_error_last_10": 204.69021608926556, "mean_abs_error_last_25": 251.08102220133992, "mean_abs_error_last_50": 388.9093192265953, "mean_pred_prob": 0.023153366823680698, "mean_pred_prob_last_10": 0.13151178455445917, "mean_pred_prob_last_25": 0.06619567696470767, "mean_pred_prob_last_50": 0.038508747098967434, "mean_token_accuracy": 0.8650484561920166, "step": 45160 }, { "epoch": 0.8029793966544007, "grad_norm": 1.6516186554691383, "learning_rate": 0.0001, "loss": 0.6387, "mean_abs_error": 228.97794762437607, "mean_abs_error_last_10": 67.83928545452417, "mean_abs_error_last_25": 89.582445805004, "mean_abs_error_last_50": 126.33100702681618, "mean_pred_prob": 0.036039538541808726, "mean_pred_prob_last_10": 0.1816621631383896, "mean_pred_prob_last_25": 0.09758358411490917, "mean_pred_prob_last_50": 0.05998315410688519, "mean_token_accuracy": 0.8756658256053924, "step": 45170 }, { "epoch": 0.8031571649512026, "grad_norm": 1.8344791404415879, "learning_rate": 0.0001, "loss": 0.6425, "mean_abs_error": 265.93322661315494, "mean_abs_error_last_10": 95.98157969011972, "mean_abs_error_last_25": 139.89872130631198, "mean_abs_error_last_50": 192.80253457657855, "mean_pred_prob": 0.03256926718167961, "mean_pred_prob_last_10": 0.16816166006028652, "mean_pred_prob_last_25": 0.09110158793628216, "mean_pred_prob_last_50": 0.05614266106858849, "mean_token_accuracy": 0.8775304615497589, "step": 45180 }, { "epoch": 0.8033349332480045, "grad_norm": 2.564689643961249, "learning_rate": 0.0001, "loss": 0.7015, "mean_abs_error": 464.1273144035087, "mean_abs_error_last_10": 171.25308283773137, "mean_abs_error_last_25": 248.46652416096418, "mean_abs_error_last_50": 313.64263106438045, "mean_pred_prob": 0.03685547385830432, "mean_pred_prob_last_10": 0.17352060303092004, "mean_pred_prob_last_25": 0.09851797372102737, "mean_pred_prob_last_50": 0.061097540287300946, "mean_token_accuracy": 0.8746736347675323, "step": 45190 }, { "epoch": 0.8035127015448065, "grad_norm": 2.552936584365315, "learning_rate": 0.0001, "loss": 0.8305, "mean_abs_error": 611.0310667043897, "mean_abs_error_last_10": 184.64390737319258, "mean_abs_error_last_25": 287.27736238098686, "mean_abs_error_last_50": 388.0762490822313, "mean_pred_prob": 0.037008053669705984, "mean_pred_prob_last_10": 0.17387467107037083, "mean_pred_prob_last_25": 0.09921023227507249, "mean_pred_prob_last_50": 0.06076698045362718, "mean_token_accuracy": 0.868878960609436, "step": 45200 }, { "epoch": 0.8036904698416084, "grad_norm": 1.6234464472785255, "learning_rate": 0.0001, "loss": 0.7648, "mean_abs_error": 375.1869429402306, "mean_abs_error_last_10": 107.3178464669156, "mean_abs_error_last_25": 149.45444185738216, "mean_abs_error_last_50": 198.4960890715049, "mean_pred_prob": 0.02858646879903972, "mean_pred_prob_last_10": 0.14287773370742798, "mean_pred_prob_last_25": 0.07823141096159816, "mean_pred_prob_last_50": 0.04754605069756508, "mean_token_accuracy": 0.8655394732952117, "step": 45210 }, { "epoch": 0.8038682381384104, "grad_norm": 2.2066656924096377, "learning_rate": 0.0001, "loss": 0.6464, "mean_abs_error": 232.72829658089373, "mean_abs_error_last_10": 90.64728118018141, "mean_abs_error_last_25": 88.84344099536384, "mean_abs_error_last_50": 144.64523299721475, "mean_pred_prob": 0.044858642481267454, "mean_pred_prob_last_10": 0.21192607581615447, "mean_pred_prob_last_25": 0.12207407560199499, "mean_pred_prob_last_50": 0.07598786409944296, "mean_token_accuracy": 0.8714126169681549, "step": 45220 }, { "epoch": 0.8040460064352123, "grad_norm": 1.505333441486871, "learning_rate": 0.0001, "loss": 0.7445, "mean_abs_error": 948.0215941408017, "mean_abs_error_last_10": 337.6025618343853, "mean_abs_error_last_25": 487.7670942951495, "mean_abs_error_last_50": 583.1052531786656, "mean_pred_prob": 0.013625563710229471, "mean_pred_prob_last_10": 0.07440647789044305, "mean_pred_prob_last_25": 0.03818671882618219, "mean_pred_prob_last_50": 0.02305480927461758, "mean_token_accuracy": 0.8746055722236633, "step": 45230 }, { "epoch": 0.8042237747320143, "grad_norm": 1.65060247701727, "learning_rate": 0.0001, "loss": 0.824, "mean_abs_error": 355.6143252113042, "mean_abs_error_last_10": 299.4065477905891, "mean_abs_error_last_25": 279.84784201054106, "mean_abs_error_last_50": 272.8095744455591, "mean_pred_prob": 0.035697324713692066, "mean_pred_prob_last_10": 0.1696860113297589, "mean_pred_prob_last_25": 0.09569759886944666, "mean_pred_prob_last_50": 0.05949234026484192, "mean_token_accuracy": 0.8782578706741333, "step": 45240 }, { "epoch": 0.8044015430288163, "grad_norm": 2.4619153030267866, "learning_rate": 0.0001, "loss": 0.6742, "mean_abs_error": 1377.1173689911784, "mean_abs_error_last_10": 741.6555688621702, "mean_abs_error_last_25": 878.633975814472, "mean_abs_error_last_50": 1055.167742240203, "mean_pred_prob": 0.03356637506767583, "mean_pred_prob_last_10": 0.1642119796051702, "mean_pred_prob_last_25": 0.09168721096502849, "mean_pred_prob_last_50": 0.05636025205894839, "mean_token_accuracy": 0.8856021940708161, "step": 45250 }, { "epoch": 0.8045793113256182, "grad_norm": 1.6069028767364246, "learning_rate": 0.0001, "loss": 0.739, "mean_abs_error": 237.22870384622564, "mean_abs_error_last_10": 78.58982572925932, "mean_abs_error_last_25": 96.93073117582709, "mean_abs_error_last_50": 131.5184319655936, "mean_pred_prob": 0.05563844214193523, "mean_pred_prob_last_10": 0.2530530489981174, "mean_pred_prob_last_25": 0.14735452281311154, "mean_pred_prob_last_50": 0.09087716392241418, "mean_token_accuracy": 0.8636976540088653, "step": 45260 }, { "epoch": 0.8047570796224202, "grad_norm": 1.6436236106065785, "learning_rate": 0.0001, "loss": 0.5645, "mean_abs_error": 120.38718171262155, "mean_abs_error_last_10": 40.82853551101125, "mean_abs_error_last_25": 65.63611290429932, "mean_abs_error_last_50": 80.50437686864895, "mean_pred_prob": 0.06826449264772236, "mean_pred_prob_last_10": 0.3221726410090923, "mean_pred_prob_last_25": 0.18215996492654085, "mean_pred_prob_last_50": 0.112730857077986, "mean_token_accuracy": 0.8805957853794097, "step": 45270 }, { "epoch": 0.8049348479192221, "grad_norm": 2.7661182845542966, "learning_rate": 0.0001, "loss": 0.7828, "mean_abs_error": 715.0866000318025, "mean_abs_error_last_10": 310.93178308863753, "mean_abs_error_last_25": 344.06176691668196, "mean_abs_error_last_50": 471.72105170539925, "mean_pred_prob": 0.06245426377281547, "mean_pred_prob_last_10": 0.2665279674693011, "mean_pred_prob_last_25": 0.16631706779589878, "mean_pred_prob_last_50": 0.10420086410013027, "mean_token_accuracy": 0.8725690186023712, "step": 45280 }, { "epoch": 0.805112616216024, "grad_norm": 2.400578047174149, "learning_rate": 0.0001, "loss": 0.6984, "mean_abs_error": 305.7954971427298, "mean_abs_error_last_10": 79.48672277335217, "mean_abs_error_last_25": 93.59516509513233, "mean_abs_error_last_50": 167.56060093044127, "mean_pred_prob": 0.05599093911005184, "mean_pred_prob_last_10": 0.262179713556543, "mean_pred_prob_last_25": 0.15033775040647016, "mean_pred_prob_last_50": 0.0922409558785148, "mean_token_accuracy": 0.8775438606739044, "step": 45290 }, { "epoch": 0.805290384512826, "grad_norm": 1.1745860422342167, "learning_rate": 0.0001, "loss": 0.7953, "mean_abs_error": 839.0606030338515, "mean_abs_error_last_10": 502.53865012301185, "mean_abs_error_last_25": 572.6074459253434, "mean_abs_error_last_50": 659.5954783754872, "mean_pred_prob": 0.03786201251205057, "mean_pred_prob_last_10": 0.17673571245977654, "mean_pred_prob_last_25": 0.09989383981446735, "mean_pred_prob_last_50": 0.06169806277612224, "mean_token_accuracy": 0.8766317069530487, "step": 45300 }, { "epoch": 0.8054681528096279, "grad_norm": 1.8865112352426294, "learning_rate": 0.0001, "loss": 0.6213, "mean_abs_error": 223.24445667881864, "mean_abs_error_last_10": 72.81971934638727, "mean_abs_error_last_25": 132.15298244899572, "mean_abs_error_last_50": 178.34721182844046, "mean_pred_prob": 0.055926207639276984, "mean_pred_prob_last_10": 0.24863873608410358, "mean_pred_prob_last_25": 0.1444373787380755, "mean_pred_prob_last_50": 0.09147508963942527, "mean_token_accuracy": 0.8783478438854218, "step": 45310 }, { "epoch": 0.8056459211064299, "grad_norm": 1.4349338001211396, "learning_rate": 0.0001, "loss": 0.6769, "mean_abs_error": 109.98886671186774, "mean_abs_error_last_10": 37.43936718827437, "mean_abs_error_last_25": 55.0774948860603, "mean_abs_error_last_50": 73.62963350382402, "mean_pred_prob": 0.04821389755234122, "mean_pred_prob_last_10": 0.22864588871598243, "mean_pred_prob_last_25": 0.12857256159186364, "mean_pred_prob_last_50": 0.07937102746218443, "mean_token_accuracy": 0.8737179040908813, "step": 45320 }, { "epoch": 0.8058236894032318, "grad_norm": 1.6153153614696698, "learning_rate": 0.0001, "loss": 0.7699, "mean_abs_error": 695.7710308787942, "mean_abs_error_last_10": 314.0579525347715, "mean_abs_error_last_25": 368.1362848245943, "mean_abs_error_last_50": 462.8688447610035, "mean_pred_prob": 0.0345359570463188, "mean_pred_prob_last_10": 0.18664660321664997, "mean_pred_prob_last_25": 0.1028939948126208, "mean_pred_prob_last_50": 0.060368123778607695, "mean_token_accuracy": 0.8713938236236572, "step": 45330 }, { "epoch": 0.8060014577000337, "grad_norm": 0.9974496143294007, "learning_rate": 0.0001, "loss": 0.7405, "mean_abs_error": 835.7902419175091, "mean_abs_error_last_10": 305.6794210926004, "mean_abs_error_last_25": 409.4276916257929, "mean_abs_error_last_50": 545.0069115047693, "mean_pred_prob": 0.038454740546876566, "mean_pred_prob_last_10": 0.18998700261581689, "mean_pred_prob_last_25": 0.1011066172417486, "mean_pred_prob_last_50": 0.062235878710635004, "mean_token_accuracy": 0.8678404211997985, "step": 45340 }, { "epoch": 0.8061792259968357, "grad_norm": 0.9921173013709439, "learning_rate": 0.0001, "loss": 0.7803, "mean_abs_error": 296.2565090052659, "mean_abs_error_last_10": 102.19185239595853, "mean_abs_error_last_25": 133.81776093108454, "mean_abs_error_last_50": 197.46698039392422, "mean_pred_prob": 0.0350844684522599, "mean_pred_prob_last_10": 0.17259598560631276, "mean_pred_prob_last_25": 0.09424990694969893, "mean_pred_prob_last_50": 0.058446255140006544, "mean_token_accuracy": 0.8686314702033997, "step": 45350 }, { "epoch": 0.8063569942936377, "grad_norm": 1.139647031310032, "learning_rate": 0.0001, "loss": 0.8023, "mean_abs_error": 1129.1760642798345, "mean_abs_error_last_10": 482.4239018831721, "mean_abs_error_last_25": 594.2175501919244, "mean_abs_error_last_50": 774.2421168261837, "mean_pred_prob": 0.0312343008510652, "mean_pred_prob_last_10": 0.16211832025437617, "mean_pred_prob_last_25": 0.08709730826958548, "mean_pred_prob_last_50": 0.052693207102129234, "mean_token_accuracy": 0.867614084482193, "step": 45360 }, { "epoch": 0.8065347625904397, "grad_norm": 1.7280882684611378, "learning_rate": 0.0001, "loss": 0.7689, "mean_abs_error": 203.78031302962088, "mean_abs_error_last_10": 88.75998467129385, "mean_abs_error_last_25": 117.52043184488605, "mean_abs_error_last_50": 125.69719412487045, "mean_pred_prob": 0.05094579556025565, "mean_pred_prob_last_10": 0.22739496622234584, "mean_pred_prob_last_25": 0.131710631493479, "mean_pred_prob_last_50": 0.08371448046527803, "mean_token_accuracy": 0.8652218759059906, "step": 45370 }, { "epoch": 0.8067125308872416, "grad_norm": 1.496265591560427, "learning_rate": 0.0001, "loss": 0.6993, "mean_abs_error": 1355.3958467505495, "mean_abs_error_last_10": 740.0920706934942, "mean_abs_error_last_25": 779.9778377558061, "mean_abs_error_last_50": 1049.4719981374371, "mean_pred_prob": 0.04806735062593361, "mean_pred_prob_last_10": 0.20460632138419896, "mean_pred_prob_last_25": 0.11856482948496705, "mean_pred_prob_last_50": 0.07848953893990256, "mean_token_accuracy": 0.8682559609413147, "step": 45380 }, { "epoch": 0.8068902991840435, "grad_norm": 1.084034777639146, "learning_rate": 0.0001, "loss": 0.782, "mean_abs_error": 336.3843258641729, "mean_abs_error_last_10": 199.6627356389123, "mean_abs_error_last_25": 218.5727158430659, "mean_abs_error_last_50": 297.3286925321248, "mean_pred_prob": 0.035426920279860497, "mean_pred_prob_last_10": 0.17658172212541104, "mean_pred_prob_last_25": 0.09939049249514938, "mean_pred_prob_last_50": 0.05899640079587698, "mean_token_accuracy": 0.8667628169059753, "step": 45390 }, { "epoch": 0.8070680674808455, "grad_norm": 1.818581395753498, "learning_rate": 0.0001, "loss": 0.735, "mean_abs_error": 442.11685805266916, "mean_abs_error_last_10": 156.07353000013936, "mean_abs_error_last_25": 216.00195366032116, "mean_abs_error_last_50": 285.4071982460242, "mean_pred_prob": 0.016063048969954252, "mean_pred_prob_last_10": 0.0908315647393465, "mean_pred_prob_last_25": 0.04775607492774725, "mean_pred_prob_last_50": 0.02787390062585473, "mean_token_accuracy": 0.8744026899337769, "step": 45400 }, { "epoch": 0.8072458357776474, "grad_norm": 1.6222129301980912, "learning_rate": 0.0001, "loss": 0.4995, "mean_abs_error": 295.5386405496889, "mean_abs_error_last_10": 104.80324122892337, "mean_abs_error_last_25": 163.75389270075593, "mean_abs_error_last_50": 270.9110942990454, "mean_pred_prob": 0.03874934311024845, "mean_pred_prob_last_10": 0.20178310927003623, "mean_pred_prob_last_25": 0.10831314595416189, "mean_pred_prob_last_50": 0.0646908204536885, "mean_token_accuracy": 0.8861068546772003, "step": 45410 }, { "epoch": 0.8074236040744494, "grad_norm": 1.9133396553757924, "learning_rate": 0.0001, "loss": 0.6343, "mean_abs_error": 334.1478190504271, "mean_abs_error_last_10": 88.99668029777007, "mean_abs_error_last_25": 137.4016004397801, "mean_abs_error_last_50": 183.1976244866397, "mean_pred_prob": 0.040343893656972794, "mean_pred_prob_last_10": 0.20896482875104994, "mean_pred_prob_last_25": 0.11244674995541573, "mean_pred_prob_last_50": 0.06763300045859069, "mean_token_accuracy": 0.8803221583366394, "step": 45420 }, { "epoch": 0.8076013723712513, "grad_norm": 1.8492515644506073, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 266.40763615762546, "mean_abs_error_last_10": 335.66630544303143, "mean_abs_error_last_25": 289.4061599967142, "mean_abs_error_last_50": 290.7773055622805, "mean_pred_prob": 0.050186899537220594, "mean_pred_prob_last_10": 0.24260802026838063, "mean_pred_prob_last_25": 0.1375940337777138, "mean_pred_prob_last_50": 0.08394269449636341, "mean_token_accuracy": 0.8711890816688538, "step": 45430 }, { "epoch": 0.8077791406680532, "grad_norm": 1.6284091395730862, "learning_rate": 0.0001, "loss": 0.7154, "mean_abs_error": 336.65310214782886, "mean_abs_error_last_10": 175.7978005988944, "mean_abs_error_last_25": 216.37799725234558, "mean_abs_error_last_50": 252.0649853688442, "mean_pred_prob": 0.044045423821080475, "mean_pred_prob_last_10": 0.18379952232935465, "mean_pred_prob_last_25": 0.10863124576862901, "mean_pred_prob_last_50": 0.07088890325394459, "mean_token_accuracy": 0.8803904831409455, "step": 45440 }, { "epoch": 0.8079569089648552, "grad_norm": 2.834363897985467, "learning_rate": 0.0001, "loss": 0.6949, "mean_abs_error": 460.42370121664334, "mean_abs_error_last_10": 217.13543932263278, "mean_abs_error_last_25": 233.3549134415943, "mean_abs_error_last_50": 295.8663254415127, "mean_pred_prob": 0.04713523057289422, "mean_pred_prob_last_10": 0.2152152605354786, "mean_pred_prob_last_25": 0.12242527194321155, "mean_pred_prob_last_50": 0.07714681825600564, "mean_token_accuracy": 0.8701710939407349, "step": 45450 }, { "epoch": 0.8081346772616571, "grad_norm": 2.2537120384290787, "learning_rate": 0.0001, "loss": 0.6958, "mean_abs_error": 177.99964186056164, "mean_abs_error_last_10": 60.28006318192797, "mean_abs_error_last_25": 75.07835445022212, "mean_abs_error_last_50": 128.96238022779033, "mean_pred_prob": 0.05232732370495796, "mean_pred_prob_last_10": 0.24064442217350007, "mean_pred_prob_last_25": 0.14134420752525328, "mean_pred_prob_last_50": 0.0867607519030571, "mean_token_accuracy": 0.8764967381954193, "step": 45460 }, { "epoch": 0.8083124455584592, "grad_norm": 1.6962418913825215, "learning_rate": 0.0001, "loss": 0.7739, "mean_abs_error": 199.5698036185201, "mean_abs_error_last_10": 37.606591947095495, "mean_abs_error_last_25": 55.34306837818461, "mean_abs_error_last_50": 108.8147043367881, "mean_pred_prob": 0.04670141888782382, "mean_pred_prob_last_10": 0.21620449982583523, "mean_pred_prob_last_25": 0.12542083840817214, "mean_pred_prob_last_50": 0.07825604397803546, "mean_token_accuracy": 0.8670346081256867, "step": 45470 }, { "epoch": 0.8084902138552611, "grad_norm": 1.6048468642912528, "learning_rate": 0.0001, "loss": 0.6267, "mean_abs_error": 986.7028723714544, "mean_abs_error_last_10": 266.63703312914674, "mean_abs_error_last_25": 413.5735423142197, "mean_abs_error_last_50": 594.8611236641513, "mean_pred_prob": 0.026118890516227112, "mean_pred_prob_last_10": 0.13590560076991096, "mean_pred_prob_last_25": 0.0722017289372161, "mean_pred_prob_last_50": 0.044832968118134885, "mean_token_accuracy": 0.8806899905204773, "step": 45480 }, { "epoch": 0.808667982152063, "grad_norm": 2.2079676547787845, "learning_rate": 0.0001, "loss": 0.7645, "mean_abs_error": 355.32073490481383, "mean_abs_error_last_10": 57.31491204497341, "mean_abs_error_last_25": 94.20659539212538, "mean_abs_error_last_50": 236.5084525799934, "mean_pred_prob": 0.04515372072346509, "mean_pred_prob_last_10": 0.22173907030373813, "mean_pred_prob_last_25": 0.12766100829467178, "mean_pred_prob_last_50": 0.07693404294550418, "mean_token_accuracy": 0.8667884945869446, "step": 45490 }, { "epoch": 0.808845750448865, "grad_norm": 1.1865580436292318, "learning_rate": 0.0001, "loss": 0.7229, "mean_abs_error": 356.09144717734836, "mean_abs_error_last_10": 62.66770736465355, "mean_abs_error_last_25": 139.68420995887212, "mean_abs_error_last_50": 212.72642467613332, "mean_pred_prob": 0.03578503667376935, "mean_pred_prob_last_10": 0.18974604606628417, "mean_pred_prob_last_25": 0.09901306070387364, "mean_pred_prob_last_50": 0.0599889725446701, "mean_token_accuracy": 0.876436573266983, "step": 45500 }, { "epoch": 0.8090235187456669, "grad_norm": 1.1261433909376322, "learning_rate": 0.0001, "loss": 0.7119, "mean_abs_error": 86.78542420015162, "mean_abs_error_last_10": 29.4196727436542, "mean_abs_error_last_25": 32.487085370991984, "mean_abs_error_last_50": 48.29871207971932, "mean_pred_prob": 0.047936899866908786, "mean_pred_prob_last_10": 0.22777350693941117, "mean_pred_prob_last_25": 0.12758548222482205, "mean_pred_prob_last_50": 0.07964143976569175, "mean_token_accuracy": 0.8757360577583313, "step": 45510 }, { "epoch": 0.8092012870424689, "grad_norm": 1.0701483308507165, "learning_rate": 0.0001, "loss": 0.6556, "mean_abs_error": 174.99336382596073, "mean_abs_error_last_10": 25.87198980852105, "mean_abs_error_last_25": 53.19678225719524, "mean_abs_error_last_50": 109.64362261356423, "mean_pred_prob": 0.05734078176319599, "mean_pred_prob_last_10": 0.27241928204894067, "mean_pred_prob_last_25": 0.1572370145469904, "mean_pred_prob_last_50": 0.09715204779058695, "mean_token_accuracy": 0.8811490476131439, "step": 45520 }, { "epoch": 0.8093790553392708, "grad_norm": 1.6000981531440726, "learning_rate": 0.0001, "loss": 0.7117, "mean_abs_error": 2049.4637920625855, "mean_abs_error_last_10": 799.7908083086267, "mean_abs_error_last_25": 1029.092698424168, "mean_abs_error_last_50": 1279.652943920898, "mean_pred_prob": 0.023838339193025603, "mean_pred_prob_last_10": 0.09394042059429922, "mean_pred_prob_last_25": 0.05622228490537964, "mean_pred_prob_last_50": 0.037105293059721586, "mean_token_accuracy": 0.8663863956928253, "step": 45530 }, { "epoch": 0.8095568236360727, "grad_norm": 1.8025113910031776, "learning_rate": 0.0001, "loss": 0.7329, "mean_abs_error": 208.94552763791594, "mean_abs_error_last_10": 32.548812253138365, "mean_abs_error_last_25": 83.79394274662587, "mean_abs_error_last_50": 112.71288323146527, "mean_pred_prob": 0.0546326485928148, "mean_pred_prob_last_10": 0.2612654287368059, "mean_pred_prob_last_25": 0.14671665392816066, "mean_pred_prob_last_50": 0.09089584313333035, "mean_token_accuracy": 0.8739933490753173, "step": 45540 }, { "epoch": 0.8097345919328747, "grad_norm": 1.2657621519570879, "learning_rate": 0.0001, "loss": 0.7049, "mean_abs_error": 203.54527719934268, "mean_abs_error_last_10": 75.5551464569611, "mean_abs_error_last_25": 122.91536691386682, "mean_abs_error_last_50": 165.06064240205143, "mean_pred_prob": 0.037624261993914845, "mean_pred_prob_last_10": 0.1796560836955905, "mean_pred_prob_last_25": 0.09933559158816933, "mean_pred_prob_last_50": 0.06123071713373065, "mean_token_accuracy": 0.8738402366638184, "step": 45550 }, { "epoch": 0.8099123602296766, "grad_norm": 2.6176155455074728, "learning_rate": 0.0001, "loss": 0.8306, "mean_abs_error": 432.6794884777881, "mean_abs_error_last_10": 105.17261365788104, "mean_abs_error_last_25": 131.85096446012565, "mean_abs_error_last_50": 230.0860687948416, "mean_pred_prob": 0.027241704519838095, "mean_pred_prob_last_10": 0.14092307537794113, "mean_pred_prob_last_25": 0.07529103355482221, "mean_pred_prob_last_50": 0.04610766209661961, "mean_token_accuracy": 0.8656176030635834, "step": 45560 }, { "epoch": 0.8100901285264785, "grad_norm": 1.411005867113829, "learning_rate": 0.0001, "loss": 0.7331, "mean_abs_error": 392.4038173402258, "mean_abs_error_last_10": 284.46339787095036, "mean_abs_error_last_25": 407.69138796215265, "mean_abs_error_last_50": 358.43441365495653, "mean_pred_prob": 0.04052090807817876, "mean_pred_prob_last_10": 0.19567830059677363, "mean_pred_prob_last_25": 0.1126850905828178, "mean_pred_prob_last_50": 0.06914948606863618, "mean_token_accuracy": 0.8656337022781372, "step": 45570 }, { "epoch": 0.8102678968232805, "grad_norm": 1.604788580459535, "learning_rate": 0.0001, "loss": 0.6263, "mean_abs_error": 341.9970686491671, "mean_abs_error_last_10": 55.7393473298362, "mean_abs_error_last_25": 80.49915547077815, "mean_abs_error_last_50": 158.78691762002387, "mean_pred_prob": 0.04775937139056623, "mean_pred_prob_last_10": 0.20855417884886265, "mean_pred_prob_last_25": 0.12064590957015753, "mean_pred_prob_last_50": 0.07674102568998933, "mean_token_accuracy": 0.867365562915802, "step": 45580 }, { "epoch": 0.8104456651200825, "grad_norm": 2.1402194698801282, "learning_rate": 0.0001, "loss": 0.7004, "mean_abs_error": 241.5412116036992, "mean_abs_error_last_10": 115.43061784226786, "mean_abs_error_last_25": 115.11071397641012, "mean_abs_error_last_50": 167.632097920055, "mean_pred_prob": 0.03779433169402182, "mean_pred_prob_last_10": 0.19203803576529027, "mean_pred_prob_last_25": 0.10662254486232996, "mean_pred_prob_last_50": 0.06357305096462369, "mean_token_accuracy": 0.8772362232208252, "step": 45590 }, { "epoch": 0.8106234334168845, "grad_norm": 2.6428298870481997, "learning_rate": 0.0001, "loss": 0.8289, "mean_abs_error": 948.4422032723985, "mean_abs_error_last_10": 502.48749189793546, "mean_abs_error_last_25": 570.3844304567585, "mean_abs_error_last_50": 672.6230410784106, "mean_pred_prob": 0.029159008667920715, "mean_pred_prob_last_10": 0.15158246636856348, "mean_pred_prob_last_25": 0.07982214946532622, "mean_pred_prob_last_50": 0.04841498409223277, "mean_token_accuracy": 0.8744734168052674, "step": 45600 }, { "epoch": 0.8108012017136864, "grad_norm": 0.8952168065095377, "learning_rate": 0.0001, "loss": 0.707, "mean_abs_error": 311.3552134211392, "mean_abs_error_last_10": 64.76395567972995, "mean_abs_error_last_25": 114.73100817393194, "mean_abs_error_last_50": 205.5585860087844, "mean_pred_prob": 0.04683367414399982, "mean_pred_prob_last_10": 0.22730425065383314, "mean_pred_prob_last_25": 0.1250816813088022, "mean_pred_prob_last_50": 0.07691262831212953, "mean_token_accuracy": 0.8703850269317627, "step": 45610 }, { "epoch": 0.8109789700104884, "grad_norm": 1.609399978663793, "learning_rate": 0.0001, "loss": 0.7476, "mean_abs_error": 430.75915667510765, "mean_abs_error_last_10": 147.4195825745174, "mean_abs_error_last_25": 255.50608853349678, "mean_abs_error_last_50": 313.71231032517034, "mean_pred_prob": 0.02209548125974834, "mean_pred_prob_last_10": 0.12497002799063921, "mean_pred_prob_last_25": 0.062214575614780186, "mean_pred_prob_last_50": 0.037147461669519544, "mean_token_accuracy": 0.8711305916309356, "step": 45620 }, { "epoch": 0.8111567383072903, "grad_norm": 2.108497698252975, "learning_rate": 0.0001, "loss": 0.743, "mean_abs_error": 974.83806015437, "mean_abs_error_last_10": 432.2468643194691, "mean_abs_error_last_25": 508.0005495512816, "mean_abs_error_last_50": 774.0317310854707, "mean_pred_prob": 0.03603502297482919, "mean_pred_prob_last_10": 0.19358162507705856, "mean_pred_prob_last_25": 0.10708439495938364, "mean_pred_prob_last_50": 0.06251713692618069, "mean_token_accuracy": 0.850987458229065, "step": 45630 }, { "epoch": 0.8113345066040922, "grad_norm": 1.811569022202129, "learning_rate": 0.0001, "loss": 0.9121, "mean_abs_error": 547.94772184693, "mean_abs_error_last_10": 211.5834882523318, "mean_abs_error_last_25": 282.2843531596189, "mean_abs_error_last_50": 408.14050633183604, "mean_pred_prob": 0.02202706059906632, "mean_pred_prob_last_10": 0.11522649936378002, "mean_pred_prob_last_25": 0.06270240917801857, "mean_pred_prob_last_50": 0.03700865344144404, "mean_token_accuracy": 0.8672204196453095, "step": 45640 }, { "epoch": 0.8115122749008942, "grad_norm": 1.1521177812765122, "learning_rate": 0.0001, "loss": 0.6695, "mean_abs_error": 819.2591583988608, "mean_abs_error_last_10": 371.86517341586415, "mean_abs_error_last_25": 418.7364693616326, "mean_abs_error_last_50": 477.70480940022543, "mean_pred_prob": 0.030504648201167585, "mean_pred_prob_last_10": 0.14486389460507781, "mean_pred_prob_last_25": 0.08139409220311791, "mean_pred_prob_last_50": 0.050975938106421384, "mean_token_accuracy": 0.868300837278366, "step": 45650 }, { "epoch": 0.8116900431976961, "grad_norm": 1.086740417484494, "learning_rate": 0.0001, "loss": 0.7857, "mean_abs_error": 713.91443016506, "mean_abs_error_last_10": 154.7535713951471, "mean_abs_error_last_25": 196.07071684011765, "mean_abs_error_last_50": 363.4121131031188, "mean_pred_prob": 0.04365851343027316, "mean_pred_prob_last_10": 0.1977277180645615, "mean_pred_prob_last_25": 0.11326381450053305, "mean_pred_prob_last_50": 0.07148432085523382, "mean_token_accuracy": 0.8709441125392914, "step": 45660 }, { "epoch": 0.811867811494498, "grad_norm": 0.8378945703496117, "learning_rate": 0.0001, "loss": 0.7052, "mean_abs_error": 1234.0545567236236, "mean_abs_error_last_10": 651.6345408625265, "mean_abs_error_last_25": 740.2978094413967, "mean_abs_error_last_50": 905.523418969478, "mean_pred_prob": 0.029407276207348333, "mean_pred_prob_last_10": 0.13900292104808615, "mean_pred_prob_last_25": 0.07583150344144088, "mean_pred_prob_last_50": 0.04789988721604459, "mean_token_accuracy": 0.8712549030780792, "step": 45670 }, { "epoch": 0.8120455797913, "grad_norm": 2.8508132667004857, "learning_rate": 0.0001, "loss": 0.7037, "mean_abs_error": 301.40539128270314, "mean_abs_error_last_10": 138.06887183804793, "mean_abs_error_last_25": 173.24875351997025, "mean_abs_error_last_50": 212.93503752152623, "mean_pred_prob": 0.04752985807135701, "mean_pred_prob_last_10": 0.2490295821800828, "mean_pred_prob_last_25": 0.13601826233789324, "mean_pred_prob_last_50": 0.08126812023110688, "mean_token_accuracy": 0.8829108774662018, "step": 45680 }, { "epoch": 0.8122233480881019, "grad_norm": 0.961423170721713, "learning_rate": 0.0001, "loss": 0.6774, "mean_abs_error": 409.8850412780979, "mean_abs_error_last_10": 134.1503052883456, "mean_abs_error_last_25": 161.37817297169158, "mean_abs_error_last_50": 256.40409921711847, "mean_pred_prob": 0.02197857997380197, "mean_pred_prob_last_10": 0.10796701144427061, "mean_pred_prob_last_25": 0.06089843595400453, "mean_pred_prob_last_50": 0.0369749731849879, "mean_token_accuracy": 0.8668783605098724, "step": 45690 }, { "epoch": 0.8124011163849039, "grad_norm": 1.3271926856685343, "learning_rate": 0.0001, "loss": 0.8276, "mean_abs_error": 795.0264261477612, "mean_abs_error_last_10": 546.6336106782861, "mean_abs_error_last_25": 640.2472255090315, "mean_abs_error_last_50": 616.1393857954889, "mean_pred_prob": 0.04332215881731827, "mean_pred_prob_last_10": 0.21711910099256784, "mean_pred_prob_last_25": 0.12463235620234628, "mean_pred_prob_last_50": 0.07450974646490068, "mean_token_accuracy": 0.8734924733638764, "step": 45700 }, { "epoch": 0.8125788846817059, "grad_norm": 1.3263337556753836, "learning_rate": 0.0001, "loss": 0.8098, "mean_abs_error": 714.3213880925109, "mean_abs_error_last_10": 303.4478004637127, "mean_abs_error_last_25": 399.1353804806219, "mean_abs_error_last_50": 457.1355498644859, "mean_pred_prob": 0.02308725398615934, "mean_pred_prob_last_10": 0.1101988397538662, "mean_pred_prob_last_25": 0.0622991802287288, "mean_pred_prob_last_50": 0.038589769351528955, "mean_token_accuracy": 0.8673660755157471, "step": 45710 }, { "epoch": 0.8127566529785079, "grad_norm": 3.030636177121049, "learning_rate": 0.0001, "loss": 0.7134, "mean_abs_error": 383.9619734641326, "mean_abs_error_last_10": 265.6123219432191, "mean_abs_error_last_25": 269.6437008191341, "mean_abs_error_last_50": 286.87810343847696, "mean_pred_prob": 0.02956003271974623, "mean_pred_prob_last_10": 0.15190452691167594, "mean_pred_prob_last_25": 0.08022530721500516, "mean_pred_prob_last_50": 0.04898452013731003, "mean_token_accuracy": 0.8614418625831604, "step": 45720 }, { "epoch": 0.8129344212753098, "grad_norm": 3.1293258133318202, "learning_rate": 0.0001, "loss": 0.7033, "mean_abs_error": 800.3515874570155, "mean_abs_error_last_10": 287.9530047975568, "mean_abs_error_last_25": 341.1883719320297, "mean_abs_error_last_50": 488.56787319776123, "mean_pred_prob": 0.049762941592780406, "mean_pred_prob_last_10": 0.23926684765610845, "mean_pred_prob_last_25": 0.13390602627187037, "mean_pred_prob_last_50": 0.08374086277908646, "mean_token_accuracy": 0.8729734599590302, "step": 45730 }, { "epoch": 0.8131121895721117, "grad_norm": 1.6871208570356586, "learning_rate": 0.0001, "loss": 0.7233, "mean_abs_error": 416.91148818285865, "mean_abs_error_last_10": 136.3087249459113, "mean_abs_error_last_25": 135.65067427678997, "mean_abs_error_last_50": 202.77355339394109, "mean_pred_prob": 0.026876265229657293, "mean_pred_prob_last_10": 0.13509959056973458, "mean_pred_prob_last_25": 0.06986998915672302, "mean_pred_prob_last_50": 0.04360146708786487, "mean_token_accuracy": 0.8740116477012634, "step": 45740 }, { "epoch": 0.8132899578689137, "grad_norm": 1.2789727413698782, "learning_rate": 0.0001, "loss": 0.7519, "mean_abs_error": 629.2542929247294, "mean_abs_error_last_10": 196.07702271175415, "mean_abs_error_last_25": 278.89674571593946, "mean_abs_error_last_50": 367.0633942319645, "mean_pred_prob": 0.02255008471547626, "mean_pred_prob_last_10": 0.13159274295903742, "mean_pred_prob_last_25": 0.06562164335045964, "mean_pred_prob_last_50": 0.038647831382695584, "mean_token_accuracy": 0.8750503301620484, "step": 45750 }, { "epoch": 0.8134677261657156, "grad_norm": 1.2509593449895726, "learning_rate": 0.0001, "loss": 0.6894, "mean_abs_error": 79.3211276005066, "mean_abs_error_last_10": 21.394478318086207, "mean_abs_error_last_25": 32.08093902672204, "mean_abs_error_last_50": 51.368017563068996, "mean_pred_prob": 0.05569311520084739, "mean_pred_prob_last_10": 0.27274750992655755, "mean_pred_prob_last_25": 0.15091382823884486, "mean_pred_prob_last_50": 0.09264227356761694, "mean_token_accuracy": 0.8801884531974793, "step": 45760 }, { "epoch": 0.8136454944625175, "grad_norm": 2.2702767048797936, "learning_rate": 0.0001, "loss": 0.8132, "mean_abs_error": 745.0502685229906, "mean_abs_error_last_10": 334.5800004517777, "mean_abs_error_last_25": 408.0524145178139, "mean_abs_error_last_50": 512.7116489260613, "mean_pred_prob": 0.033538408401363996, "mean_pred_prob_last_10": 0.16696827088599092, "mean_pred_prob_last_25": 0.08932935258781072, "mean_pred_prob_last_50": 0.054352018708596, "mean_token_accuracy": 0.8776547014713287, "step": 45770 }, { "epoch": 0.8138232627593195, "grad_norm": 1.1032047747107765, "learning_rate": 0.0001, "loss": 0.7653, "mean_abs_error": 928.1977704690082, "mean_abs_error_last_10": 619.1983285666223, "mean_abs_error_last_25": 721.8024966206224, "mean_abs_error_last_50": 784.4899341910625, "mean_pred_prob": 0.026963719527702778, "mean_pred_prob_last_10": 0.13294462368940002, "mean_pred_prob_last_25": 0.07329154663602822, "mean_pred_prob_last_50": 0.04476169031113386, "mean_token_accuracy": 0.8677705585956573, "step": 45780 }, { "epoch": 0.8140010310561214, "grad_norm": 1.328546740633954, "learning_rate": 0.0001, "loss": 0.7485, "mean_abs_error": 1651.2618918072235, "mean_abs_error_last_10": 1151.813692442904, "mean_abs_error_last_25": 1268.8316213509402, "mean_abs_error_last_50": 1385.0431850879274, "mean_pred_prob": 0.05391590758954408, "mean_pred_prob_last_10": 0.26364143196115036, "mean_pred_prob_last_25": 0.14821698770538205, "mean_pred_prob_last_50": 0.08930919587000971, "mean_token_accuracy": 0.8767780542373658, "step": 45790 }, { "epoch": 0.8141787993529234, "grad_norm": 2.3647360543843505, "learning_rate": 0.0001, "loss": 0.8526, "mean_abs_error": 1035.4210969818048, "mean_abs_error_last_10": 470.47417866185276, "mean_abs_error_last_25": 629.6734141833073, "mean_abs_error_last_50": 801.2074856610665, "mean_pred_prob": 0.029218555640545672, "mean_pred_prob_last_10": 0.14217908032587728, "mean_pred_prob_last_25": 0.07754312010947614, "mean_pred_prob_last_50": 0.0481332995404955, "mean_token_accuracy": 0.8700487434864044, "step": 45800 }, { "epoch": 0.8143565676497253, "grad_norm": 1.1676888166476815, "learning_rate": 0.0001, "loss": 0.7195, "mean_abs_error": 582.7089232695655, "mean_abs_error_last_10": 148.4886041324713, "mean_abs_error_last_25": 193.8384594196977, "mean_abs_error_last_50": 310.8470002204118, "mean_pred_prob": 0.0519557909516152, "mean_pred_prob_last_10": 0.23435245242435485, "mean_pred_prob_last_25": 0.1345120221725665, "mean_pred_prob_last_50": 0.08421601221780292, "mean_token_accuracy": 0.8722255825996399, "step": 45810 }, { "epoch": 0.8145343359465272, "grad_norm": 2.2342442078466536, "learning_rate": 0.0001, "loss": 0.6521, "mean_abs_error": 304.52347175400314, "mean_abs_error_last_10": 86.90197023783362, "mean_abs_error_last_25": 105.6044172489409, "mean_abs_error_last_50": 161.6914224993309, "mean_pred_prob": 0.0375510897487402, "mean_pred_prob_last_10": 0.17769373320043086, "mean_pred_prob_last_25": 0.10017952397465706, "mean_pred_prob_last_50": 0.06252241190522909, "mean_token_accuracy": 0.874401992559433, "step": 45820 }, { "epoch": 0.8147121042433293, "grad_norm": 1.3679563180780303, "learning_rate": 0.0001, "loss": 0.6839, "mean_abs_error": 313.59896984967344, "mean_abs_error_last_10": 89.25945367318054, "mean_abs_error_last_25": 108.51712923425899, "mean_abs_error_last_50": 147.70405723140794, "mean_pred_prob": 0.053277097758837044, "mean_pred_prob_last_10": 0.2547607174143195, "mean_pred_prob_last_25": 0.14226425113156438, "mean_pred_prob_last_50": 0.08917346796952189, "mean_token_accuracy": 0.8796527564525605, "step": 45830 }, { "epoch": 0.8148898725401312, "grad_norm": 2.1552070523858613, "learning_rate": 0.0001, "loss": 0.6798, "mean_abs_error": 184.01028421565766, "mean_abs_error_last_10": 73.45588281707624, "mean_abs_error_last_25": 76.58010488595745, "mean_abs_error_last_50": 127.06752285403263, "mean_pred_prob": 0.04464162159711123, "mean_pred_prob_last_10": 0.23005081228911878, "mean_pred_prob_last_25": 0.12734086187556387, "mean_pred_prob_last_50": 0.07647975655272603, "mean_token_accuracy": 0.8744902431964874, "step": 45840 }, { "epoch": 0.8150676408369332, "grad_norm": 2.7201361444040293, "learning_rate": 0.0001, "loss": 0.8098, "mean_abs_error": 381.33349977119144, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 271.2871102330785, "mean_pred_prob": 0.08338912769104354, "mean_pred_prob_last_10": 0.24184338236227632, "mean_pred_prob_last_25": 0.15634823871077969, "mean_pred_prob_last_50": 0.12983715855516492, "mean_token_accuracy": 0.8722811877727509, "step": 45850 }, { "epoch": 0.8152454091337351, "grad_norm": 2.4037114956610357, "learning_rate": 0.0001, "loss": 0.691, "mean_abs_error": 243.18140044912062, "mean_abs_error_last_10": 121.30563081664131, "mean_abs_error_last_25": 147.50737059438433, "mean_abs_error_last_50": 157.8545216687336, "mean_pred_prob": 0.042534504737704995, "mean_pred_prob_last_10": 0.18773223981261253, "mean_pred_prob_last_25": 0.10859958194196224, "mean_pred_prob_last_50": 0.06882445383816957, "mean_token_accuracy": 0.8709231793880463, "step": 45860 }, { "epoch": 0.815423177430537, "grad_norm": 1.1125051078093628, "learning_rate": 0.0001, "loss": 0.7083, "mean_abs_error": 283.53937712726605, "mean_abs_error_last_10": 90.7556462026252, "mean_abs_error_last_25": 176.8936265550923, "mean_abs_error_last_50": 191.05322410101672, "mean_pred_prob": 0.04704543356783688, "mean_pred_prob_last_10": 0.21335068568587304, "mean_pred_prob_last_25": 0.12283762451261282, "mean_pred_prob_last_50": 0.07721153479069472, "mean_token_accuracy": 0.8667733132839203, "step": 45870 }, { "epoch": 0.815600945727339, "grad_norm": 3.2737165025779253, "learning_rate": 0.0001, "loss": 0.6788, "mean_abs_error": 380.2463996908612, "mean_abs_error_last_10": 196.42880700123396, "mean_abs_error_last_25": 229.46528581730846, "mean_abs_error_last_50": 271.2140778834058, "mean_pred_prob": 0.03391888022888452, "mean_pred_prob_last_10": 0.174965650588274, "mean_pred_prob_last_25": 0.09412465831264853, "mean_pred_prob_last_50": 0.05714915110729635, "mean_token_accuracy": 0.8769092321395874, "step": 45880 }, { "epoch": 0.8157787140241409, "grad_norm": 1.8022433083695846, "learning_rate": 0.0001, "loss": 0.6917, "mean_abs_error": 768.8230699494501, "mean_abs_error_last_10": 225.13174927196692, "mean_abs_error_last_25": 283.46344828038497, "mean_abs_error_last_50": 425.79288078208594, "mean_pred_prob": 0.029595294522005135, "mean_pred_prob_last_10": 0.15415288789663464, "mean_pred_prob_last_25": 0.08478521530050784, "mean_pred_prob_last_50": 0.050559635640820486, "mean_token_accuracy": 0.8752465128898621, "step": 45890 }, { "epoch": 0.8159564823209429, "grad_norm": 1.597844133816905, "learning_rate": 0.0001, "loss": 0.7299, "mean_abs_error": 306.51313147314147, "mean_abs_error_last_10": 143.90649588802313, "mean_abs_error_last_25": 159.58400369132673, "mean_abs_error_last_50": 240.09550107446339, "mean_pred_prob": 0.05257205236703157, "mean_pred_prob_last_10": 0.24674614612013102, "mean_pred_prob_last_25": 0.13952369038015605, "mean_pred_prob_last_50": 0.08697071201168001, "mean_token_accuracy": 0.8730957329273223, "step": 45900 }, { "epoch": 0.8161342506177448, "grad_norm": 1.6479375761070294, "learning_rate": 0.0001, "loss": 0.651, "mean_abs_error": 192.98523245414702, "mean_abs_error_last_10": 64.52254906836068, "mean_abs_error_last_25": 78.70206666862757, "mean_abs_error_last_50": 105.15635429043246, "mean_pred_prob": 0.04171504583209753, "mean_pred_prob_last_10": 0.20412289276719092, "mean_pred_prob_last_25": 0.11536384979262948, "mean_pred_prob_last_50": 0.07033556634560227, "mean_token_accuracy": 0.8836390912532807, "step": 45910 }, { "epoch": 0.8163120189145467, "grad_norm": 1.3853454091028592, "learning_rate": 0.0001, "loss": 0.6191, "mean_abs_error": 378.5064934982096, "mean_abs_error_last_10": 44.73883380708362, "mean_abs_error_last_25": 82.99404227957272, "mean_abs_error_last_50": 177.82850755775334, "mean_pred_prob": 0.05851995379198342, "mean_pred_prob_last_10": 0.27355139665305617, "mean_pred_prob_last_25": 0.15420658802613615, "mean_pred_prob_last_50": 0.09499352495186031, "mean_token_accuracy": 0.8683610618114471, "step": 45920 }, { "epoch": 0.8164897872113487, "grad_norm": 2.0908486735801737, "learning_rate": 0.0001, "loss": 0.8128, "mean_abs_error": 692.3236847948995, "mean_abs_error_last_10": 209.72301199520848, "mean_abs_error_last_25": 253.89307935140718, "mean_abs_error_last_50": 404.9232862986872, "mean_pred_prob": 0.03813398331403732, "mean_pred_prob_last_10": 0.1827692549326457, "mean_pred_prob_last_25": 0.10591983014019206, "mean_pred_prob_last_50": 0.06477165535907262, "mean_token_accuracy": 0.867612224817276, "step": 45930 }, { "epoch": 0.8166675555081506, "grad_norm": 1.8804448813490693, "learning_rate": 0.0001, "loss": 0.8516, "mean_abs_error": 686.8624450101594, "mean_abs_error_last_10": 211.5801789682117, "mean_abs_error_last_25": 279.0056125431325, "mean_abs_error_last_50": 538.2262151166476, "mean_pred_prob": 0.04595333986799233, "mean_pred_prob_last_10": 0.2305279608175624, "mean_pred_prob_last_25": 0.12744076036615298, "mean_pred_prob_last_50": 0.07664308551466092, "mean_token_accuracy": 0.8686337053775788, "step": 45940 }, { "epoch": 0.8168453238049527, "grad_norm": 1.3978573454063743, "learning_rate": 0.0001, "loss": 0.8993, "mean_abs_error": 1109.9295989711072, "mean_abs_error_last_10": 359.88791924457706, "mean_abs_error_last_25": 448.4219773484636, "mean_abs_error_last_50": 624.942644198453, "mean_pred_prob": 0.021847695048199965, "mean_pred_prob_last_10": 0.11579869756824338, "mean_pred_prob_last_25": 0.06401055759633892, "mean_pred_prob_last_50": 0.0379767285019625, "mean_token_accuracy": 0.8503281533718109, "step": 45950 }, { "epoch": 0.8170230921017546, "grad_norm": 1.2040557447695122, "learning_rate": 0.0001, "loss": 0.6817, "mean_abs_error": 372.1007791231833, "mean_abs_error_last_10": 123.16478567876042, "mean_abs_error_last_25": 148.41496574077632, "mean_abs_error_last_50": 220.29671928237832, "mean_pred_prob": 0.029760644864290954, "mean_pred_prob_last_10": 0.14978148750960826, "mean_pred_prob_last_25": 0.08156022131443023, "mean_pred_prob_last_50": 0.050205194298177956, "mean_token_accuracy": 0.869655305147171, "step": 45960 }, { "epoch": 0.8172008603985566, "grad_norm": 2.010996987925549, "learning_rate": 0.0001, "loss": 0.682, "mean_abs_error": 643.2892848874969, "mean_abs_error_last_10": 228.56441561104856, "mean_abs_error_last_25": 271.55240152249996, "mean_abs_error_last_50": 371.25619082693163, "mean_pred_prob": 0.020075646636541933, "mean_pred_prob_last_10": 0.10590764188673348, "mean_pred_prob_last_25": 0.05650435285642743, "mean_pred_prob_last_50": 0.03398685500724241, "mean_token_accuracy": 0.8659926533699036, "step": 45970 }, { "epoch": 0.8173786286953585, "grad_norm": 2.4737175615121703, "learning_rate": 0.0001, "loss": 0.7359, "mean_abs_error": 384.61108555483816, "mean_abs_error_last_10": 183.22239371361053, "mean_abs_error_last_25": 217.98527865033913, "mean_abs_error_last_50": 250.34677702191303, "mean_pred_prob": 0.03391725462861359, "mean_pred_prob_last_10": 0.17346464470028877, "mean_pred_prob_last_25": 0.09605036629363894, "mean_pred_prob_last_50": 0.05896553262136876, "mean_token_accuracy": 0.871499902009964, "step": 45980 }, { "epoch": 0.8175563969921604, "grad_norm": 1.4117536028619597, "learning_rate": 0.0001, "loss": 0.6204, "mean_abs_error": 383.7098519974134, "mean_abs_error_last_10": 170.8788891457664, "mean_abs_error_last_25": 168.94264435303378, "mean_abs_error_last_50": 223.49013698742615, "mean_pred_prob": 0.029763069096952678, "mean_pred_prob_last_10": 0.1461557958740741, "mean_pred_prob_last_25": 0.08384005336556584, "mean_pred_prob_last_50": 0.050363276689313355, "mean_token_accuracy": 0.8847737908363342, "step": 45990 }, { "epoch": 0.8177341652889624, "grad_norm": 1.112516596924884, "learning_rate": 0.0001, "loss": 0.8035, "mean_abs_error": 617.4785282716724, "mean_abs_error_last_10": 96.13066354267347, "mean_abs_error_last_25": 162.5491882729921, "mean_abs_error_last_50": 353.1882141975782, "mean_pred_prob": 0.03291140699875541, "mean_pred_prob_last_10": 0.1765028340741992, "mean_pred_prob_last_25": 0.09526810313109309, "mean_pred_prob_last_50": 0.056476177042350174, "mean_token_accuracy": 0.8700801014900208, "step": 46000 }, { "epoch": 0.8179119335857643, "grad_norm": 1.4432170007017708, "learning_rate": 0.0001, "loss": 0.6224, "mean_abs_error": 330.8335243855389, "mean_abs_error_last_10": 158.5327495415329, "mean_abs_error_last_25": 177.65733922749547, "mean_abs_error_last_50": 219.76293570415856, "mean_pred_prob": 0.047516685980372134, "mean_pred_prob_last_10": 0.22782808113843203, "mean_pred_prob_last_25": 0.12525707818567752, "mean_pred_prob_last_50": 0.07819464313797653, "mean_token_accuracy": 0.877192211151123, "step": 46010 }, { "epoch": 0.8180897018825662, "grad_norm": 1.1033302777131677, "learning_rate": 0.0001, "loss": 0.6623, "mean_abs_error": 536.906887079088, "mean_abs_error_last_10": 211.23852255597404, "mean_abs_error_last_25": 291.25945398929883, "mean_abs_error_last_50": 356.58684970763704, "mean_pred_prob": 0.03923909881850705, "mean_pred_prob_last_10": 0.18460415489971638, "mean_pred_prob_last_25": 0.10594709941651673, "mean_pred_prob_last_50": 0.06600923364749178, "mean_token_accuracy": 0.8742689847946167, "step": 46020 }, { "epoch": 0.8182674701793682, "grad_norm": 1.2434704418682148, "learning_rate": 0.0001, "loss": 0.678, "mean_abs_error": 1119.3428698043601, "mean_abs_error_last_10": 634.8755195973586, "mean_abs_error_last_25": 722.7092717100245, "mean_abs_error_last_50": 856.2687874530999, "mean_pred_prob": 0.0346247917987057, "mean_pred_prob_last_10": 0.14330341058084742, "mean_pred_prob_last_25": 0.08616695613891352, "mean_pred_prob_last_50": 0.0549968062347034, "mean_token_accuracy": 0.8747921466827393, "step": 46030 }, { "epoch": 0.8184452384761701, "grad_norm": 1.3151901525096221, "learning_rate": 0.0001, "loss": 0.7445, "mean_abs_error": 1320.801538444503, "mean_abs_error_last_10": 741.5145438659787, "mean_abs_error_last_25": 842.8010662396022, "mean_abs_error_last_50": 1007.693379112344, "mean_pred_prob": 0.02410783403684036, "mean_pred_prob_last_10": 0.12605757229903247, "mean_pred_prob_last_25": 0.06818336884971359, "mean_pred_prob_last_50": 0.041225267659319796, "mean_token_accuracy": 0.8764040768146515, "step": 46040 }, { "epoch": 0.8186230067729721, "grad_norm": 1.5486506662024557, "learning_rate": 0.0001, "loss": 0.6413, "mean_abs_error": 2287.0042079563887, "mean_abs_error_last_10": 1447.33108801186, "mean_abs_error_last_25": 1612.5983517088664, "mean_abs_error_last_50": 1752.1572393461545, "mean_pred_prob": 0.041837694489368006, "mean_pred_prob_last_10": 0.1911146398168057, "mean_pred_prob_last_25": 0.10670050782209728, "mean_pred_prob_last_50": 0.06723694373067701, "mean_token_accuracy": 0.8831586360931396, "step": 46050 }, { "epoch": 0.8188007750697741, "grad_norm": 0.9330032294561739, "learning_rate": 0.0001, "loss": 0.5645, "mean_abs_error": 343.98966843796745, "mean_abs_error_last_10": 196.4503271384649, "mean_abs_error_last_25": 305.6242024870265, "mean_abs_error_last_50": 312.8035992527265, "mean_pred_prob": 0.04766022339463234, "mean_pred_prob_last_10": 0.22565532773733138, "mean_pred_prob_last_25": 0.13287132177501917, "mean_pred_prob_last_50": 0.07948380215093494, "mean_token_accuracy": 0.8758054554462433, "step": 46060 }, { "epoch": 0.818978543366576, "grad_norm": 1.4348921265421828, "learning_rate": 0.0001, "loss": 0.7873, "mean_abs_error": 570.3996972583057, "mean_abs_error_last_10": 183.5118590941487, "mean_abs_error_last_25": 228.20297025629253, "mean_abs_error_last_50": 318.5411424488516, "mean_pred_prob": 0.037881872162688526, "mean_pred_prob_last_10": 0.1750322552281432, "mean_pred_prob_last_25": 0.09887871604878455, "mean_pred_prob_last_50": 0.06125280827982351, "mean_token_accuracy": 0.8636854529380799, "step": 46070 }, { "epoch": 0.819156311663378, "grad_norm": 1.4788460369502792, "learning_rate": 0.0001, "loss": 0.6778, "mean_abs_error": 565.6441902176691, "mean_abs_error_last_10": 170.41734959881768, "mean_abs_error_last_25": 304.8820607637041, "mean_abs_error_last_50": 456.3412032418234, "mean_pred_prob": 0.044860017183236775, "mean_pred_prob_last_10": 0.2184773998335004, "mean_pred_prob_last_25": 0.12245024191215634, "mean_pred_prob_last_50": 0.07413172996602953, "mean_token_accuracy": 0.8757430672645569, "step": 46080 }, { "epoch": 0.8193340799601799, "grad_norm": 0.9021797790876507, "learning_rate": 0.0001, "loss": 0.6697, "mean_abs_error": 1020.1221205635211, "mean_abs_error_last_10": 467.1913597009626, "mean_abs_error_last_25": 588.0696713075905, "mean_abs_error_last_50": 737.3630952158418, "mean_pred_prob": 0.021363834719522856, "mean_pred_prob_last_10": 0.11311646430403925, "mean_pred_prob_last_25": 0.062371587791130875, "mean_pred_prob_last_50": 0.036599617853062226, "mean_token_accuracy": 0.8702824294567109, "step": 46090 }, { "epoch": 0.8195118482569819, "grad_norm": 1.161998817615259, "learning_rate": 0.0001, "loss": 0.7339, "mean_abs_error": 375.44420073149456, "mean_abs_error_last_10": 163.21569118219392, "mean_abs_error_last_25": 149.21051777025224, "mean_abs_error_last_50": 202.79113806149098, "mean_pred_prob": 0.02228727042675018, "mean_pred_prob_last_10": 0.1049378739669919, "mean_pred_prob_last_25": 0.05872143004089594, "mean_pred_prob_last_50": 0.036700611980631945, "mean_token_accuracy": 0.8669384956359864, "step": 46100 }, { "epoch": 0.8196896165537838, "grad_norm": 1.4738836737959833, "learning_rate": 0.0001, "loss": 0.5099, "mean_abs_error": 696.0723916517204, "mean_abs_error_last_10": 250.5175096686947, "mean_abs_error_last_25": 323.6880423284641, "mean_abs_error_last_50": 476.3377483295534, "mean_pred_prob": 0.04125637130928226, "mean_pred_prob_last_10": 0.20191417148453183, "mean_pred_prob_last_25": 0.11105056224623695, "mean_pred_prob_last_50": 0.06854995672474615, "mean_token_accuracy": 0.8751844108104706, "step": 46110 }, { "epoch": 0.8198673848505857, "grad_norm": 1.8448601910792692, "learning_rate": 0.0001, "loss": 0.8057, "mean_abs_error": 1099.8519180850958, "mean_abs_error_last_10": 801.7429534127849, "mean_abs_error_last_25": 815.0145381192735, "mean_abs_error_last_50": 839.978823128287, "mean_pred_prob": 0.041407204464485405, "mean_pred_prob_last_10": 0.19187404241238254, "mean_pred_prob_last_25": 0.10895461011241422, "mean_pred_prob_last_50": 0.06747912826831452, "mean_token_accuracy": 0.8649363160133362, "step": 46120 }, { "epoch": 0.8200451531473877, "grad_norm": 1.1695381031380017, "learning_rate": 0.0001, "loss": 0.7621, "mean_abs_error": 495.49176095296343, "mean_abs_error_last_10": 259.36097989281683, "mean_abs_error_last_25": 378.33869191472206, "mean_abs_error_last_50": 414.11064845111304, "mean_pred_prob": 0.04033964825794101, "mean_pred_prob_last_10": 0.19370321596506984, "mean_pred_prob_last_25": 0.11040581597480922, "mean_pred_prob_last_50": 0.06784923794912175, "mean_token_accuracy": 0.8633211970329284, "step": 46130 }, { "epoch": 0.8202229214441896, "grad_norm": 1.0193516866458587, "learning_rate": 0.0001, "loss": 0.5643, "mean_abs_error": 508.63235934672923, "mean_abs_error_last_10": 39.250440676637204, "mean_abs_error_last_25": 96.17005295460646, "mean_abs_error_last_50": 238.17845362365802, "mean_pred_prob": 0.043479490024037656, "mean_pred_prob_last_10": 0.20981792733073235, "mean_pred_prob_last_25": 0.12157925749197603, "mean_pred_prob_last_50": 0.07299205125309527, "mean_token_accuracy": 0.8837122738361358, "step": 46140 }, { "epoch": 0.8204006897409916, "grad_norm": 1.4941398402810024, "learning_rate": 0.0001, "loss": 0.663, "mean_abs_error": 279.66979686254444, "mean_abs_error_last_10": 59.230159249277605, "mean_abs_error_last_25": 85.54553057633798, "mean_abs_error_last_50": 159.95430862993788, "mean_pred_prob": 0.055641696439124644, "mean_pred_prob_last_10": 0.25050998851656914, "mean_pred_prob_last_25": 0.14898416996002198, "mean_pred_prob_last_50": 0.09253835040144623, "mean_token_accuracy": 0.8754188537597656, "step": 46150 }, { "epoch": 0.8205784580377935, "grad_norm": 0.9674103504203563, "learning_rate": 0.0001, "loss": 0.7045, "mean_abs_error": 469.24508184861804, "mean_abs_error_last_10": 179.42076397700737, "mean_abs_error_last_25": 176.31887810815448, "mean_abs_error_last_50": 248.6446427339834, "mean_pred_prob": 0.045898329885676506, "mean_pred_prob_last_10": 0.1862791668623686, "mean_pred_prob_last_25": 0.11638447288423777, "mean_pred_prob_last_50": 0.07559220180846751, "mean_token_accuracy": 0.8742380321025849, "step": 46160 }, { "epoch": 0.8207562263345954, "grad_norm": 1.5939248045786052, "learning_rate": 0.0001, "loss": 0.583, "mean_abs_error": 253.48852812724053, "mean_abs_error_last_10": 53.56331305248882, "mean_abs_error_last_25": 76.21609406948554, "mean_abs_error_last_50": 161.42584177211612, "mean_pred_prob": 0.04217949833255261, "mean_pred_prob_last_10": 0.21080711148679257, "mean_pred_prob_last_25": 0.11345553109422327, "mean_pred_prob_last_50": 0.06946241771802306, "mean_token_accuracy": 0.8805929303169251, "step": 46170 }, { "epoch": 0.8209339946313975, "grad_norm": 2.348268744081063, "learning_rate": 0.0001, "loss": 0.7169, "mean_abs_error": 916.9914162247347, "mean_abs_error_last_10": 485.20186907445014, "mean_abs_error_last_25": 488.3935082489719, "mean_abs_error_last_50": 546.7052071900041, "mean_pred_prob": 0.019154783175326883, "mean_pred_prob_last_10": 0.10226934367092326, "mean_pred_prob_last_25": 0.05451333360979334, "mean_pred_prob_last_50": 0.032621392945293336, "mean_token_accuracy": 0.8757753252983094, "step": 46180 }, { "epoch": 0.8211117629281994, "grad_norm": 2.4613931551844375, "learning_rate": 0.0001, "loss": 0.7397, "mean_abs_error": 357.98282135438296, "mean_abs_error_last_10": 100.74167777275238, "mean_abs_error_last_25": 153.90015656830408, "mean_abs_error_last_50": 217.71948058603894, "mean_pred_prob": 0.03740774721372873, "mean_pred_prob_last_10": 0.18382773883640766, "mean_pred_prob_last_25": 0.1012724919244647, "mean_pred_prob_last_50": 0.06208833535201848, "mean_token_accuracy": 0.8684222400188446, "step": 46190 }, { "epoch": 0.8212895312250014, "grad_norm": 1.6031212935504946, "learning_rate": 0.0001, "loss": 0.8254, "mean_abs_error": 311.6740551901236, "mean_abs_error_last_10": 51.25459941523799, "mean_abs_error_last_25": 139.57076244177296, "mean_abs_error_last_50": 225.7685198438549, "mean_pred_prob": 0.045845066430047154, "mean_pred_prob_last_10": 0.22696651592850686, "mean_pred_prob_last_25": 0.12489449232816696, "mean_pred_prob_last_50": 0.07666011434048414, "mean_token_accuracy": 0.883981567621231, "step": 46200 }, { "epoch": 0.8214672995218033, "grad_norm": 1.0936568501506334, "learning_rate": 0.0001, "loss": 0.8319, "mean_abs_error": 396.50052253849026, "mean_abs_error_last_10": 143.6159934699271, "mean_abs_error_last_25": 159.5165690189874, "mean_abs_error_last_50": 206.71358107364244, "mean_pred_prob": 0.036224669846706095, "mean_pred_prob_last_10": 0.18108082767575978, "mean_pred_prob_last_25": 0.10465859724208712, "mean_pred_prob_last_50": 0.06325712325051427, "mean_token_accuracy": 0.8697278916835784, "step": 46210 }, { "epoch": 0.8216450678186052, "grad_norm": 0.9727581892410513, "learning_rate": 0.0001, "loss": 0.6984, "mean_abs_error": 453.71882764164485, "mean_abs_error_last_10": 211.55655837475493, "mean_abs_error_last_25": 240.71549166072282, "mean_abs_error_last_50": 323.03976971955717, "mean_pred_prob": 0.056604922443511896, "mean_pred_prob_last_10": 0.26041178631130607, "mean_pred_prob_last_25": 0.15043759712716565, "mean_pred_prob_last_50": 0.09409383901511319, "mean_token_accuracy": 0.8694219887256622, "step": 46220 }, { "epoch": 0.8218228361154072, "grad_norm": 2.711692388963983, "learning_rate": 0.0001, "loss": 0.7179, "mean_abs_error": 412.57331977444284, "mean_abs_error_last_10": 76.87652190005413, "mean_abs_error_last_25": 135.27049788691232, "mean_abs_error_last_50": 238.50284077394272, "mean_pred_prob": 0.04668339932686649, "mean_pred_prob_last_10": 0.2334500856231898, "mean_pred_prob_last_25": 0.1259425880154595, "mean_pred_prob_last_50": 0.07609889024170116, "mean_token_accuracy": 0.8783876478672028, "step": 46230 }, { "epoch": 0.8220006044122091, "grad_norm": 1.1444190075281098, "learning_rate": 0.0001, "loss": 0.686, "mean_abs_error": 291.54596957067963, "mean_abs_error_last_10": 56.591482726050984, "mean_abs_error_last_25": 84.43011863722816, "mean_abs_error_last_50": 149.62716565640477, "mean_pred_prob": 0.04806545446626842, "mean_pred_prob_last_10": 0.20939926151186228, "mean_pred_prob_last_25": 0.1263705429621041, "mean_pred_prob_last_50": 0.07989759454503656, "mean_token_accuracy": 0.8789696514606475, "step": 46240 }, { "epoch": 0.8221783727090111, "grad_norm": 1.35872102753537, "learning_rate": 0.0001, "loss": 0.6911, "mean_abs_error": 209.31903153096013, "mean_abs_error_last_10": 73.48347542080123, "mean_abs_error_last_25": 107.30779362880983, "mean_abs_error_last_50": 145.00090854427734, "mean_pred_prob": 0.04725130619481206, "mean_pred_prob_last_10": 0.22765399068593978, "mean_pred_prob_last_25": 0.12847744226455687, "mean_pred_prob_last_50": 0.07876715874299407, "mean_token_accuracy": 0.8766707062721253, "step": 46250 }, { "epoch": 0.822356141005813, "grad_norm": 1.2873723992854023, "learning_rate": 0.0001, "loss": 0.7013, "mean_abs_error": 374.76791039009055, "mean_abs_error_last_10": 161.24892101398405, "mean_abs_error_last_25": 150.37026006844272, "mean_abs_error_last_50": 225.82984391317245, "mean_pred_prob": 0.031152597937034444, "mean_pred_prob_last_10": 0.14227615657728165, "mean_pred_prob_last_25": 0.080920891801361, "mean_pred_prob_last_50": 0.05071916755987331, "mean_token_accuracy": 0.8675854861736297, "step": 46260 }, { "epoch": 0.8225339093026149, "grad_norm": 0.7540437163131335, "learning_rate": 0.0001, "loss": 0.5397, "mean_abs_error": 1026.0774709045659, "mean_abs_error_last_10": 577.9129468048803, "mean_abs_error_last_25": 692.6745846886547, "mean_abs_error_last_50": 814.2921130409013, "mean_pred_prob": 0.03327014769602101, "mean_pred_prob_last_10": 0.1842276327253785, "mean_pred_prob_last_25": 0.09606444837118033, "mean_pred_prob_last_50": 0.05652167365769856, "mean_token_accuracy": 0.8788053810596466, "step": 46270 }, { "epoch": 0.8227116775994169, "grad_norm": 1.2487836750658212, "learning_rate": 0.0001, "loss": 0.6801, "mean_abs_error": 642.9239278242227, "mean_abs_error_last_10": 424.70998841149714, "mean_abs_error_last_25": 427.9267554134026, "mean_abs_error_last_50": 469.6463194250808, "mean_pred_prob": 0.02154076562728733, "mean_pred_prob_last_10": 0.1010380367282778, "mean_pred_prob_last_25": 0.05778212531004101, "mean_pred_prob_last_50": 0.03606520236935466, "mean_token_accuracy": 0.8784983575344085, "step": 46280 }, { "epoch": 0.8228894458962188, "grad_norm": 0.8814271595843478, "learning_rate": 0.0001, "loss": 0.7545, "mean_abs_error": 1502.025874259535, "mean_abs_error_last_10": 818.660030815237, "mean_abs_error_last_25": 888.9595041467294, "mean_abs_error_last_50": 1169.3393218228014, "mean_pred_prob": 0.026799500825291034, "mean_pred_prob_last_10": 0.13905063427228015, "mean_pred_prob_last_25": 0.0760253171029035, "mean_pred_prob_last_50": 0.045533324329881, "mean_token_accuracy": 0.8741171419620514, "step": 46290 }, { "epoch": 0.8230672141930209, "grad_norm": 1.226462940189782, "learning_rate": 0.0001, "loss": 0.803, "mean_abs_error": 589.843807003557, "mean_abs_error_last_10": 161.34387016512102, "mean_abs_error_last_25": 189.93216796772307, "mean_abs_error_last_50": 288.80081631957194, "mean_pred_prob": 0.0466700630960986, "mean_pred_prob_last_10": 0.2100966228172183, "mean_pred_prob_last_25": 0.12529064686968922, "mean_pred_prob_last_50": 0.07769362244289368, "mean_token_accuracy": 0.8710837483406066, "step": 46300 }, { "epoch": 0.8232449824898228, "grad_norm": 1.346258499648781, "learning_rate": 0.0001, "loss": 0.7517, "mean_abs_error": 335.80637351370865, "mean_abs_error_last_10": 178.7619929317043, "mean_abs_error_last_25": 218.77617536737495, "mean_abs_error_last_50": 242.28943765203084, "mean_pred_prob": 0.04510143012739718, "mean_pred_prob_last_10": 0.19282463882118464, "mean_pred_prob_last_25": 0.11643749023787678, "mean_pred_prob_last_50": 0.0731528891948983, "mean_token_accuracy": 0.86531702876091, "step": 46310 }, { "epoch": 0.8234227507866247, "grad_norm": 2.244523643487953, "learning_rate": 0.0001, "loss": 0.7346, "mean_abs_error": 776.2213951783411, "mean_abs_error_last_10": 538.3218112409741, "mean_abs_error_last_25": 579.5596500359609, "mean_abs_error_last_50": 644.1143833423969, "mean_pred_prob": 0.04961696122772992, "mean_pred_prob_last_10": 0.24820965589024127, "mean_pred_prob_last_25": 0.137607910236693, "mean_pred_prob_last_50": 0.08336454326054081, "mean_token_accuracy": 0.8722777247428894, "step": 46320 }, { "epoch": 0.8236005190834267, "grad_norm": 1.621169639602692, "learning_rate": 0.0001, "loss": 0.6426, "mean_abs_error": 222.5064635506887, "mean_abs_error_last_10": 53.387647495237175, "mean_abs_error_last_25": 88.08966918151299, "mean_abs_error_last_50": 137.09373401062254, "mean_pred_prob": 0.04653141461312771, "mean_pred_prob_last_10": 0.23524980135262014, "mean_pred_prob_last_25": 0.12850775104016066, "mean_pred_prob_last_50": 0.07783705433830619, "mean_token_accuracy": 0.876783549785614, "step": 46330 }, { "epoch": 0.8237782873802286, "grad_norm": 2.366792965943807, "learning_rate": 0.0001, "loss": 0.7645, "mean_abs_error": 345.7894355891925, "mean_abs_error_last_10": 137.2151589748221, "mean_abs_error_last_25": 195.32071379166305, "mean_abs_error_last_50": 274.06453288766755, "mean_pred_prob": 0.060194851085543635, "mean_pred_prob_last_10": 0.26847403440624473, "mean_pred_prob_last_25": 0.1600918537005782, "mean_pred_prob_last_50": 0.10173483854159712, "mean_token_accuracy": 0.8776701927185059, "step": 46340 }, { "epoch": 0.8239560556770306, "grad_norm": 1.8490263572365528, "learning_rate": 0.0001, "loss": 0.7466, "mean_abs_error": 917.0462270947139, "mean_abs_error_last_10": 499.0775450506576, "mean_abs_error_last_25": 486.3625844078456, "mean_abs_error_last_50": 610.5596800972475, "mean_pred_prob": 0.046947902886313383, "mean_pred_prob_last_10": 0.18787704953865614, "mean_pred_prob_last_25": 0.11643555357004516, "mean_pred_prob_last_50": 0.07567093274265062, "mean_token_accuracy": 0.8722607970237732, "step": 46350 }, { "epoch": 0.8241338239738325, "grad_norm": 2.000078317313813, "learning_rate": 0.0001, "loss": 0.7432, "mean_abs_error": 253.7009523751805, "mean_abs_error_last_10": 43.16282818642387, "mean_abs_error_last_25": 126.99072403718917, "mean_abs_error_last_50": 204.76317555428372, "mean_pred_prob": 0.04939864510670304, "mean_pred_prob_last_10": 0.24497779160737992, "mean_pred_prob_last_25": 0.13462175615131855, "mean_pred_prob_last_50": 0.08210410550236702, "mean_token_accuracy": 0.8720116794109345, "step": 46360 }, { "epoch": 0.8243115922706344, "grad_norm": 1.173681609031793, "learning_rate": 0.0001, "loss": 0.6173, "mean_abs_error": 395.84672799877865, "mean_abs_error_last_10": 71.4184478001205, "mean_abs_error_last_25": 115.58518311931098, "mean_abs_error_last_50": 302.35967180882943, "mean_pred_prob": 0.06129908352158964, "mean_pred_prob_last_10": 0.2850224972702563, "mean_pred_prob_last_25": 0.16711012069135905, "mean_pred_prob_last_50": 0.10166311534121633, "mean_token_accuracy": 0.8897397994995118, "step": 46370 }, { "epoch": 0.8244893605674364, "grad_norm": 2.082785718969149, "learning_rate": 0.0001, "loss": 0.8082, "mean_abs_error": 1063.8240093416482, "mean_abs_error_last_10": 699.2473536993178, "mean_abs_error_last_25": 784.6397355792617, "mean_abs_error_last_50": 857.5413903502533, "mean_pred_prob": 0.04545949304010719, "mean_pred_prob_last_10": 0.20606881004059688, "mean_pred_prob_last_25": 0.12103611580678261, "mean_pred_prob_last_50": 0.07460027934284881, "mean_token_accuracy": 0.8643888652324676, "step": 46380 }, { "epoch": 0.8246671288642383, "grad_norm": 2.1281632175812937, "learning_rate": 0.0001, "loss": 0.6677, "mean_abs_error": 716.3546583451674, "mean_abs_error_last_10": 295.7138956040904, "mean_abs_error_last_25": 290.55391303008616, "mean_abs_error_last_50": 403.1917106122122, "mean_pred_prob": 0.03084241371252574, "mean_pred_prob_last_10": 0.14508465928956865, "mean_pred_prob_last_25": 0.0815466211119201, "mean_pred_prob_last_50": 0.05165246643591672, "mean_token_accuracy": 0.8668428659439087, "step": 46390 }, { "epoch": 0.8248448971610403, "grad_norm": 1.2478075319511233, "learning_rate": 0.0001, "loss": 0.5987, "mean_abs_error": 531.0969097689992, "mean_abs_error_last_10": 204.9544509483907, "mean_abs_error_last_25": 243.06441492020025, "mean_abs_error_last_50": 381.5977365545449, "mean_pred_prob": 0.047744155424879864, "mean_pred_prob_last_10": 0.20867307841544971, "mean_pred_prob_last_25": 0.12400703952880576, "mean_pred_prob_last_50": 0.07598534458084032, "mean_token_accuracy": 0.860520988702774, "step": 46400 }, { "epoch": 0.8250226654578422, "grad_norm": 1.225172698791675, "learning_rate": 0.0001, "loss": 0.7169, "mean_abs_error": 691.2415934871035, "mean_abs_error_last_10": 384.97397864413944, "mean_abs_error_last_25": 433.269998895633, "mean_abs_error_last_50": 529.2204714168013, "mean_pred_prob": 0.0286783333925996, "mean_pred_prob_last_10": 0.1354700783500448, "mean_pred_prob_last_25": 0.07537179624196141, "mean_pred_prob_last_50": 0.046901997656095776, "mean_token_accuracy": 0.8648913085460663, "step": 46410 }, { "epoch": 0.8252004337546442, "grad_norm": 1.4351334075041258, "learning_rate": 0.0001, "loss": 0.7719, "mean_abs_error": 904.9636535865004, "mean_abs_error_last_10": 302.3642523539133, "mean_abs_error_last_25": 355.639860314384, "mean_abs_error_last_50": 533.1679051280614, "mean_pred_prob": 0.034694125948590226, "mean_pred_prob_last_10": 0.15958282798528672, "mean_pred_prob_last_25": 0.09240165870287456, "mean_pred_prob_last_50": 0.05761587696615607, "mean_token_accuracy": 0.8708460330963135, "step": 46420 }, { "epoch": 0.8253782020514462, "grad_norm": 2.8744823695567536, "learning_rate": 0.0001, "loss": 0.6476, "mean_abs_error": 420.45379064745987, "mean_abs_error_last_10": 272.0141208383378, "mean_abs_error_last_25": 250.21640984921206, "mean_abs_error_last_50": 280.5690827602011, "mean_pred_prob": 0.03242725735763088, "mean_pred_prob_last_10": 0.17581827400717884, "mean_pred_prob_last_25": 0.09183404694776982, "mean_pred_prob_last_50": 0.055549439135938884, "mean_token_accuracy": 0.875899749994278, "step": 46430 }, { "epoch": 0.8255559703482481, "grad_norm": 3.097609730636624, "learning_rate": 0.0001, "loss": 0.7045, "mean_abs_error": 372.21218060929937, "mean_abs_error_last_10": 96.08069752728976, "mean_abs_error_last_25": 157.7788120901094, "mean_abs_error_last_50": 219.93956331316053, "mean_pred_prob": 0.04254391109570861, "mean_pred_prob_last_10": 0.209535401314497, "mean_pred_prob_last_25": 0.11437352849170565, "mean_pred_prob_last_50": 0.07093142159283161, "mean_token_accuracy": 0.8703187346458435, "step": 46440 }, { "epoch": 0.8257337386450501, "grad_norm": 1.3171388289568653, "learning_rate": 0.0001, "loss": 0.7797, "mean_abs_error": 1366.012051809673, "mean_abs_error_last_10": 707.0055049042619, "mean_abs_error_last_25": 795.4091644093788, "mean_abs_error_last_50": 973.5824805547594, "mean_pred_prob": 0.02774488089926308, "mean_pred_prob_last_10": 0.14005635287903714, "mean_pred_prob_last_25": 0.07637002985284198, "mean_pred_prob_last_50": 0.04658299755537883, "mean_token_accuracy": 0.867436808347702, "step": 46450 }, { "epoch": 0.825911506941852, "grad_norm": 1.3737702179277669, "learning_rate": 0.0001, "loss": 0.6334, "mean_abs_error": 1259.474698326247, "mean_abs_error_last_10": 710.1352609085187, "mean_abs_error_last_25": 791.8675688626819, "mean_abs_error_last_50": 974.8632746100477, "mean_pred_prob": 0.0321576258240384, "mean_pred_prob_last_10": 0.1631897272920469, "mean_pred_prob_last_25": 0.08629519811656791, "mean_pred_prob_last_50": 0.05310163021058543, "mean_token_accuracy": 0.8711519598960876, "step": 46460 }, { "epoch": 0.8260892752386539, "grad_norm": 1.3083050749013938, "learning_rate": 0.0001, "loss": 0.7058, "mean_abs_error": 124.98812331808638, "mean_abs_error_last_10": 12.538333556953795, "mean_abs_error_last_25": 25.674664341912955, "mean_abs_error_last_50": 53.901279013211436, "mean_pred_prob": 0.05605619521811604, "mean_pred_prob_last_10": 0.2722946159541607, "mean_pred_prob_last_25": 0.15279224514961243, "mean_pred_prob_last_50": 0.09399535991251469, "mean_token_accuracy": 0.8609601020812988, "step": 46470 }, { "epoch": 0.8262670435354559, "grad_norm": 1.0873781615473472, "learning_rate": 0.0001, "loss": 0.6971, "mean_abs_error": 283.49298539228, "mean_abs_error_last_10": 71.97787977181285, "mean_abs_error_last_25": 181.4347246535777, "mean_abs_error_last_50": 203.0654874241065, "mean_pred_prob": 0.045189452916383745, "mean_pred_prob_last_10": 0.21856985352933406, "mean_pred_prob_last_25": 0.12377690244466066, "mean_pred_prob_last_50": 0.07544447239488364, "mean_token_accuracy": 0.8677614152431488, "step": 46480 }, { "epoch": 0.8264448118322578, "grad_norm": 1.5139353516922647, "learning_rate": 0.0001, "loss": 0.7683, "mean_abs_error": 371.0240316365556, "mean_abs_error_last_10": 125.22545134332806, "mean_abs_error_last_25": 142.74787282685884, "mean_abs_error_last_50": 217.23901784115338, "mean_pred_prob": 0.028829669393599034, "mean_pred_prob_last_10": 0.13616380393505095, "mean_pred_prob_last_25": 0.08319058883935213, "mean_pred_prob_last_50": 0.04920195518061519, "mean_token_accuracy": 0.869471651315689, "step": 46490 }, { "epoch": 0.8266225801290598, "grad_norm": 3.0549734474948997, "learning_rate": 0.0001, "loss": 0.6703, "mean_abs_error": 1434.4728345634269, "mean_abs_error_last_10": 810.6131873225193, "mean_abs_error_last_25": 858.5287381947646, "mean_abs_error_last_50": 1008.5738796676696, "mean_pred_prob": 0.015238258047611452, "mean_pred_prob_last_10": 0.08865202466258779, "mean_pred_prob_last_25": 0.04481163122400176, "mean_pred_prob_last_50": 0.026085887491353786, "mean_token_accuracy": 0.8712300419807434, "step": 46500 }, { "epoch": 0.8268003484258617, "grad_norm": 1.4776249969033135, "learning_rate": 0.0001, "loss": 0.8401, "mean_abs_error": 897.9900877084635, "mean_abs_error_last_10": 481.5504395938573, "mean_abs_error_last_25": 589.3469623005328, "mean_abs_error_last_50": 644.9572225090186, "mean_pred_prob": 0.0265655927330954, "mean_pred_prob_last_10": 0.139785081933951, "mean_pred_prob_last_25": 0.07250710229855031, "mean_pred_prob_last_50": 0.04391370625817217, "mean_token_accuracy": 0.8673732817173004, "step": 46510 }, { "epoch": 0.8269781167226636, "grad_norm": 2.219371371951356, "learning_rate": 0.0001, "loss": 0.62, "mean_abs_error": 245.2957675919768, "mean_abs_error_last_10": 145.32711160331337, "mean_abs_error_last_25": 112.7685740066814, "mean_abs_error_last_50": 146.50626380445232, "mean_pred_prob": 0.05436624643625691, "mean_pred_prob_last_10": 0.24687920918222517, "mean_pred_prob_last_25": 0.14495134844910354, "mean_pred_prob_last_50": 0.08973153696861118, "mean_token_accuracy": 0.8825834929943085, "step": 46520 }, { "epoch": 0.8271558850194656, "grad_norm": 1.827431365940119, "learning_rate": 0.0001, "loss": 0.8793, "mean_abs_error": 1566.943177904875, "mean_abs_error_last_10": 1025.7597039387447, "mean_abs_error_last_25": 1127.8358260099772, "mean_abs_error_last_50": 1231.5124017985308, "mean_pred_prob": 0.05626835323128034, "mean_pred_prob_last_10": 0.2541339162045915, "mean_pred_prob_last_25": 0.15221656571156927, "mean_pred_prob_last_50": 0.09485575121871079, "mean_token_accuracy": 0.8805483520030976, "step": 46530 }, { "epoch": 0.8273336533162676, "grad_norm": 0.964043333901378, "learning_rate": 0.0001, "loss": 0.6456, "mean_abs_error": 619.0548517209234, "mean_abs_error_last_10": 151.86992277385883, "mean_abs_error_last_25": 212.10845312481615, "mean_abs_error_last_50": 375.32443645711237, "mean_pred_prob": 0.0348214668687433, "mean_pred_prob_last_10": 0.16772005085367708, "mean_pred_prob_last_25": 0.0933846562053077, "mean_pred_prob_last_50": 0.05752329883398488, "mean_token_accuracy": 0.8701912999153137, "step": 46540 }, { "epoch": 0.8275114216130696, "grad_norm": 1.2673856079135375, "learning_rate": 0.0001, "loss": 0.672, "mean_abs_error": 233.05653950581592, "mean_abs_error_last_10": 150.7311844329417, "mean_abs_error_last_25": 149.31046273282328, "mean_abs_error_last_50": 150.07076664601954, "mean_pred_prob": 0.049854652816429736, "mean_pred_prob_last_10": 0.2544049682095647, "mean_pred_prob_last_25": 0.14159567449241878, "mean_pred_prob_last_50": 0.0849422256462276, "mean_token_accuracy": 0.8704861342906952, "step": 46550 }, { "epoch": 0.8276891899098715, "grad_norm": 2.028904212222653, "learning_rate": 0.0001, "loss": 0.6872, "mean_abs_error": 426.0062192614826, "mean_abs_error_last_10": 89.57945041882161, "mean_abs_error_last_25": 123.72790891345967, "mean_abs_error_last_50": 233.04540396175054, "mean_pred_prob": 0.06865693726576864, "mean_pred_prob_last_10": 0.28139013652689754, "mean_pred_prob_last_25": 0.17077665211400017, "mean_pred_prob_last_50": 0.11050544798490591, "mean_token_accuracy": 0.8759443640708924, "step": 46560 }, { "epoch": 0.8278669582066734, "grad_norm": 1.686659467081087, "learning_rate": 0.0001, "loss": 0.6882, "mean_abs_error": 201.18963560040362, "mean_abs_error_last_10": 27.57678163237167, "mean_abs_error_last_25": 48.47840566198875, "mean_abs_error_last_50": 115.39982530783841, "mean_pred_prob": 0.05106284110806882, "mean_pred_prob_last_10": 0.23794704880565404, "mean_pred_prob_last_25": 0.13864660784602165, "mean_pred_prob_last_50": 0.0854705898091197, "mean_token_accuracy": 0.8728136122226715, "step": 46570 }, { "epoch": 0.8280447265034754, "grad_norm": 0.8747344595370847, "learning_rate": 0.0001, "loss": 0.6481, "mean_abs_error": 339.06771739974204, "mean_abs_error_last_10": 78.85845457250056, "mean_abs_error_last_25": 115.40902759347959, "mean_abs_error_last_50": 184.73314506762148, "mean_pred_prob": 0.048239509435370566, "mean_pred_prob_last_10": 0.20702928826212882, "mean_pred_prob_last_25": 0.12498142961412669, "mean_pred_prob_last_50": 0.07946462212130427, "mean_token_accuracy": 0.8812678456306458, "step": 46580 }, { "epoch": 0.8282224948002773, "grad_norm": 1.7999913593414423, "learning_rate": 0.0001, "loss": 0.7015, "mean_abs_error": 673.5766671733528, "mean_abs_error_last_10": 316.28463862404294, "mean_abs_error_last_25": 295.9299877201884, "mean_abs_error_last_50": 424.27993195147565, "mean_pred_prob": 0.03688152108516078, "mean_pred_prob_last_10": 0.18215402635978534, "mean_pred_prob_last_25": 0.10206919113988988, "mean_pred_prob_last_50": 0.0619327662163414, "mean_token_accuracy": 0.8673097312450408, "step": 46590 }, { "epoch": 0.8284002630970793, "grad_norm": 1.4001417984080728, "learning_rate": 0.0001, "loss": 0.7696, "mean_abs_error": 618.7269399723112, "mean_abs_error_last_10": 179.21802124281712, "mean_abs_error_last_25": 252.88689894216992, "mean_abs_error_last_50": 396.2064770276346, "mean_pred_prob": 0.029596543268417007, "mean_pred_prob_last_10": 0.15208435871172696, "mean_pred_prob_last_25": 0.08105477298377081, "mean_pred_prob_last_50": 0.04870883216499351, "mean_token_accuracy": 0.8658196926116943, "step": 46600 }, { "epoch": 0.8285780313938812, "grad_norm": 2.3640527411192713, "learning_rate": 0.0001, "loss": 0.7735, "mean_abs_error": 789.3141737645599, "mean_abs_error_last_10": 482.9365923415059, "mean_abs_error_last_25": 487.9573200483248, "mean_abs_error_last_50": 573.8458689691931, "mean_pred_prob": 0.04542533456115052, "mean_pred_prob_last_10": 0.20154907353280577, "mean_pred_prob_last_25": 0.11959223567973823, "mean_pred_prob_last_50": 0.07515976822178345, "mean_token_accuracy": 0.8766468584537506, "step": 46610 }, { "epoch": 0.8287557996906831, "grad_norm": 1.6162834715062198, "learning_rate": 0.0001, "loss": 0.6426, "mean_abs_error": 1784.4897434816496, "mean_abs_error_last_10": 1005.6953526620913, "mean_abs_error_last_25": 1216.426464273574, "mean_abs_error_last_50": 1435.3501774459055, "mean_pred_prob": 0.028861977696215034, "mean_pred_prob_last_10": 0.15218249522004043, "mean_pred_prob_last_25": 0.08343846971329186, "mean_pred_prob_last_50": 0.04941133721731603, "mean_token_accuracy": 0.8842769682407379, "step": 46620 }, { "epoch": 0.8289335679874851, "grad_norm": 1.6755334460658124, "learning_rate": 0.0001, "loss": 0.6397, "mean_abs_error": 318.1797214222768, "mean_abs_error_last_10": 101.98881689519399, "mean_abs_error_last_25": 117.52065884442126, "mean_abs_error_last_50": 195.65465467687633, "mean_pred_prob": 0.052149705868214366, "mean_pred_prob_last_10": 0.24909884724766015, "mean_pred_prob_last_25": 0.1440909151919186, "mean_pred_prob_last_50": 0.08770694364793599, "mean_token_accuracy": 0.8764067173004151, "step": 46630 }, { "epoch": 0.829111336284287, "grad_norm": 1.2582976864661772, "learning_rate": 0.0001, "loss": 0.6947, "mean_abs_error": 830.8903390373194, "mean_abs_error_last_10": 236.23980291617, "mean_abs_error_last_25": 315.4768319852989, "mean_abs_error_last_50": 513.393495479706, "mean_pred_prob": 0.02609052627813071, "mean_pred_prob_last_10": 0.1139522946672514, "mean_pred_prob_last_25": 0.06455622968496755, "mean_pred_prob_last_50": 0.041388340655248615, "mean_token_accuracy": 0.8694732904434204, "step": 46640 }, { "epoch": 0.829289104581089, "grad_norm": 2.5589553332616317, "learning_rate": 0.0001, "loss": 0.7849, "mean_abs_error": 429.58032841942315, "mean_abs_error_last_10": 223.89493094187623, "mean_abs_error_last_25": 252.915723524761, "mean_abs_error_last_50": 339.43521502963233, "mean_pred_prob": 0.04491893148515373, "mean_pred_prob_last_10": 0.19135435968637465, "mean_pred_prob_last_25": 0.11227683871984481, "mean_pred_prob_last_50": 0.07151757981628179, "mean_token_accuracy": 0.8713320851325989, "step": 46650 }, { "epoch": 0.829466872877891, "grad_norm": 2.0573700707806704, "learning_rate": 0.0001, "loss": 0.6344, "mean_abs_error": 590.471507499081, "mean_abs_error_last_10": 182.39812390182092, "mean_abs_error_last_25": 281.43015347944055, "mean_abs_error_last_50": 377.3536976906412, "mean_pred_prob": 0.03146490285289474, "mean_pred_prob_last_10": 0.16312347025377677, "mean_pred_prob_last_25": 0.08720346018089913, "mean_pred_prob_last_50": 0.05235550697543658, "mean_token_accuracy": 0.874653023481369, "step": 46660 }, { "epoch": 0.8296446411746929, "grad_norm": 0.820136096626506, "learning_rate": 0.0001, "loss": 0.6733, "mean_abs_error": 536.8819665909273, "mean_abs_error_last_10": 240.79549767302052, "mean_abs_error_last_25": 244.59896180285267, "mean_abs_error_last_50": 274.6030548841669, "mean_pred_prob": 0.021377013344317675, "mean_pred_prob_last_10": 0.11521052340976894, "mean_pred_prob_last_25": 0.057360009360127154, "mean_pred_prob_last_50": 0.03454752285033465, "mean_token_accuracy": 0.8775351941585541, "step": 46670 }, { "epoch": 0.8298224094714949, "grad_norm": 2.2895281447137643, "learning_rate": 0.0001, "loss": 0.7449, "mean_abs_error": 236.68855745694205, "mean_abs_error_last_10": 39.70673866705393, "mean_abs_error_last_25": 69.15520285908698, "mean_abs_error_last_50": 123.11590684463923, "mean_pred_prob": 0.04660043264739215, "mean_pred_prob_last_10": 0.2321096196770668, "mean_pred_prob_last_25": 0.1288283420726657, "mean_pred_prob_last_50": 0.07900359919294715, "mean_token_accuracy": 0.8685580849647522, "step": 46680 }, { "epoch": 0.8300001777682968, "grad_norm": 1.500710940738399, "learning_rate": 0.0001, "loss": 0.6827, "mean_abs_error": 886.9067662190761, "mean_abs_error_last_10": 280.7090262465517, "mean_abs_error_last_25": 322.00403285038044, "mean_abs_error_last_50": 456.33531420934116, "mean_pred_prob": 0.0247105524817016, "mean_pred_prob_last_10": 0.13269757003290578, "mean_pred_prob_last_25": 0.06766317358706146, "mean_pred_prob_last_50": 0.04128531735623255, "mean_token_accuracy": 0.8788325548171997, "step": 46690 }, { "epoch": 0.8301779460650988, "grad_norm": 1.9058559928857801, "learning_rate": 0.0001, "loss": 0.7616, "mean_abs_error": 1117.5630920265774, "mean_abs_error_last_10": 930.7610607377176, "mean_abs_error_last_25": 1025.1390437145487, "mean_abs_error_last_50": 1042.7131423982105, "mean_pred_prob": 0.03804805094696349, "mean_pred_prob_last_10": 0.19669941994361578, "mean_pred_prob_last_25": 0.10609230580448639, "mean_pred_prob_last_50": 0.06375146713398863, "mean_token_accuracy": 0.863256698846817, "step": 46700 }, { "epoch": 0.8303557143619007, "grad_norm": 1.9827073853220119, "learning_rate": 0.0001, "loss": 0.6061, "mean_abs_error": 261.1591380851606, "mean_abs_error_last_10": 30.119544985142017, "mean_abs_error_last_25": 86.32873426329044, "mean_abs_error_last_50": 172.35068418757515, "mean_pred_prob": 0.046990069933235644, "mean_pred_prob_last_10": 0.24684708714485168, "mean_pred_prob_last_25": 0.13314032927155495, "mean_pred_prob_last_50": 0.07968363435938955, "mean_token_accuracy": 0.8834032893180848, "step": 46710 }, { "epoch": 0.8305334826587026, "grad_norm": 1.2943181654992946, "learning_rate": 0.0001, "loss": 0.672, "mean_abs_error": 513.0576027048154, "mean_abs_error_last_10": 277.32761763906035, "mean_abs_error_last_25": 331.25318459007275, "mean_abs_error_last_50": 405.8031111688375, "mean_pred_prob": 0.04215328019636218, "mean_pred_prob_last_10": 0.20059963923995383, "mean_pred_prob_last_25": 0.11367399703594856, "mean_pred_prob_last_50": 0.06978003960684873, "mean_token_accuracy": 0.8715523660182953, "step": 46720 }, { "epoch": 0.8307112509555046, "grad_norm": 1.9131759331668334, "learning_rate": 0.0001, "loss": 0.7159, "mean_abs_error": 1434.3302214092869, "mean_abs_error_last_10": 887.6442542618986, "mean_abs_error_last_25": 999.2206738361881, "mean_abs_error_last_50": 1171.8540813602717, "mean_pred_prob": 0.03082255927583901, "mean_pred_prob_last_10": 0.15847813115397003, "mean_pred_prob_last_25": 0.08809872747951886, "mean_pred_prob_last_50": 0.052593968181463426, "mean_token_accuracy": 0.8683502018451691, "step": 46730 }, { "epoch": 0.8308890192523065, "grad_norm": 1.5762420479239685, "learning_rate": 0.0001, "loss": 0.591, "mean_abs_error": 682.4486611908554, "mean_abs_error_last_10": 370.4572561870385, "mean_abs_error_last_25": 400.4323515613971, "mean_abs_error_last_50": 445.1010767172567, "mean_pred_prob": 0.03722051334916614, "mean_pred_prob_last_10": 0.1847689270740375, "mean_pred_prob_last_25": 0.10177213459974155, "mean_pred_prob_last_50": 0.0611299796088133, "mean_token_accuracy": 0.8775481700897216, "step": 46740 }, { "epoch": 0.8310667875491085, "grad_norm": 1.2624633524716338, "learning_rate": 0.0001, "loss": 0.6938, "mean_abs_error": 750.7297076421686, "mean_abs_error_last_10": 397.5025031868043, "mean_abs_error_last_25": 435.54333980088825, "mean_abs_error_last_50": 524.7248315466567, "mean_pred_prob": 0.053971972982981245, "mean_pred_prob_last_10": 0.24635621297347826, "mean_pred_prob_last_25": 0.14578784592449665, "mean_pred_prob_last_50": 0.0903860413847724, "mean_token_accuracy": 0.8639892578125, "step": 46750 }, { "epoch": 0.8312445558459104, "grad_norm": 1.1624785347871864, "learning_rate": 0.0001, "loss": 0.7843, "mean_abs_error": 629.8609739397201, "mean_abs_error_last_10": 174.3605692308419, "mean_abs_error_last_25": 273.83248742475723, "mean_abs_error_last_50": 448.66924952764583, "mean_pred_prob": 0.046596365043660624, "mean_pred_prob_last_10": 0.22875420404598118, "mean_pred_prob_last_25": 0.12586118850158529, "mean_pred_prob_last_50": 0.07815287407720461, "mean_token_accuracy": 0.8678127944469451, "step": 46760 }, { "epoch": 0.8314223241427124, "grad_norm": 1.3359205410155264, "learning_rate": 0.0001, "loss": 0.7354, "mean_abs_error": 354.54590169151703, "mean_abs_error_last_10": 101.16961052334831, "mean_abs_error_last_25": 217.58466527752694, "mean_abs_error_last_50": 266.1089760576345, "mean_pred_prob": 0.04710317342542112, "mean_pred_prob_last_10": 0.2184290697798133, "mean_pred_prob_last_25": 0.12313656825572253, "mean_pred_prob_last_50": 0.07650387473404408, "mean_token_accuracy": 0.8718859553337097, "step": 46770 }, { "epoch": 0.8316000924395144, "grad_norm": 1.5207141241847058, "learning_rate": 0.0001, "loss": 0.6963, "mean_abs_error": 967.8356589573689, "mean_abs_error_last_10": 639.1658295427018, "mean_abs_error_last_25": 674.6643555165814, "mean_abs_error_last_50": 770.7452024660043, "mean_pred_prob": 0.04048734593670815, "mean_pred_prob_last_10": 0.1890715713292593, "mean_pred_prob_last_25": 0.10902089017035906, "mean_pred_prob_last_50": 0.06672945355530828, "mean_token_accuracy": 0.8717612564563751, "step": 46780 }, { "epoch": 0.8317778607363163, "grad_norm": 5.298931657887578, "learning_rate": 0.0001, "loss": 0.6788, "mean_abs_error": 173.8788700620388, "mean_abs_error_last_10": 40.980724096461984, "mean_abs_error_last_25": 71.89835505920695, "mean_abs_error_last_50": 97.11945920907618, "mean_pred_prob": 0.04800689313560724, "mean_pred_prob_last_10": 0.2505651548504829, "mean_pred_prob_last_25": 0.13749122731387614, "mean_pred_prob_last_50": 0.08197645209729672, "mean_token_accuracy": 0.8803182959556579, "step": 46790 }, { "epoch": 0.8319556290331183, "grad_norm": 1.361128756096562, "learning_rate": 0.0001, "loss": 0.687, "mean_abs_error": 456.6986180171428, "mean_abs_error_last_10": 259.5491034029089, "mean_abs_error_last_25": 327.11006487910106, "mean_abs_error_last_50": 375.38571733599036, "mean_pred_prob": 0.0403202832385432, "mean_pred_prob_last_10": 0.19909074120223522, "mean_pred_prob_last_25": 0.11005074614658952, "mean_pred_prob_last_50": 0.0679202702071052, "mean_token_accuracy": 0.8727169930934906, "step": 46800 }, { "epoch": 0.8321333973299202, "grad_norm": 1.0864574410608014, "learning_rate": 0.0001, "loss": 0.9071, "mean_abs_error": 246.9176152894291, "mean_abs_error_last_10": 148.27767219156183, "mean_abs_error_last_25": 179.07778778982967, "mean_abs_error_last_50": 177.5302622859969, "mean_pred_prob": 0.034516709588933735, "mean_pred_prob_last_10": 0.18906366978771985, "mean_pred_prob_last_25": 0.09862023612949997, "mean_pred_prob_last_50": 0.05885855255182833, "mean_token_accuracy": 0.8657624542713165, "step": 46810 }, { "epoch": 0.8323111656267221, "grad_norm": 2.584568597230507, "learning_rate": 0.0001, "loss": 0.7463, "mean_abs_error": 279.1719669309039, "mean_abs_error_last_10": 76.9831122199842, "mean_abs_error_last_25": 126.81601718422576, "mean_abs_error_last_50": 167.61576105280835, "mean_pred_prob": 0.028861330077052116, "mean_pred_prob_last_10": 0.15295051112771035, "mean_pred_prob_last_25": 0.0807393491268158, "mean_pred_prob_last_50": 0.04906336162239313, "mean_token_accuracy": 0.877720856666565, "step": 46820 }, { "epoch": 0.8324889339235241, "grad_norm": 1.2261527174476408, "learning_rate": 0.0001, "loss": 0.6606, "mean_abs_error": 539.3750962544188, "mean_abs_error_last_10": 147.45768215421762, "mean_abs_error_last_25": 208.0034742861983, "mean_abs_error_last_50": 311.1441445808509, "mean_pred_prob": 0.029727793810889126, "mean_pred_prob_last_10": 0.14633633038029076, "mean_pred_prob_last_25": 0.08071607295423747, "mean_pred_prob_last_50": 0.04961561476811767, "mean_token_accuracy": 0.8745625913143158, "step": 46830 }, { "epoch": 0.832666702220326, "grad_norm": 0.8761817221867277, "learning_rate": 0.0001, "loss": 0.7053, "mean_abs_error": 382.4423104432143, "mean_abs_error_last_10": 107.36582604415212, "mean_abs_error_last_25": 192.39751538287118, "mean_abs_error_last_50": 257.74035183587114, "mean_pred_prob": 0.048416727758012715, "mean_pred_prob_last_10": 0.2050717351026833, "mean_pred_prob_last_25": 0.125010935170576, "mean_pred_prob_last_50": 0.07892719181254507, "mean_token_accuracy": 0.875465589761734, "step": 46840 }, { "epoch": 0.832844470517128, "grad_norm": 3.782014163200293, "learning_rate": 0.0001, "loss": 0.7507, "mean_abs_error": 273.1548320096943, "mean_abs_error_last_10": 94.17374926224736, "mean_abs_error_last_25": 123.91780213315303, "mean_abs_error_last_50": 184.2034176329281, "mean_pred_prob": 0.04723593653179705, "mean_pred_prob_last_10": 0.23668214455246925, "mean_pred_prob_last_25": 0.13166534770280122, "mean_pred_prob_last_50": 0.08034996045753359, "mean_token_accuracy": 0.8682510554790497, "step": 46850 }, { "epoch": 0.8330222388139299, "grad_norm": 2.071474037375685, "learning_rate": 0.0001, "loss": 0.6901, "mean_abs_error": 463.53215552783814, "mean_abs_error_last_10": 141.08416181007502, "mean_abs_error_last_25": 190.81811583607708, "mean_abs_error_last_50": 276.13219342622097, "mean_pred_prob": 0.05763969704275951, "mean_pred_prob_last_10": 0.2493366375216283, "mean_pred_prob_last_25": 0.14882186835166067, "mean_pred_prob_last_50": 0.09301484584575519, "mean_token_accuracy": 0.8677745938301087, "step": 46860 }, { "epoch": 0.8332000071107318, "grad_norm": 1.5247058916113678, "learning_rate": 0.0001, "loss": 0.6121, "mean_abs_error": 362.13425205917326, "mean_abs_error_last_10": 66.05945963800808, "mean_abs_error_last_25": 103.18692347855213, "mean_abs_error_last_50": 192.3668008079265, "mean_pred_prob": 0.051860569836571814, "mean_pred_prob_last_10": 0.2451938234269619, "mean_pred_prob_last_25": 0.14171089325100183, "mean_pred_prob_last_50": 0.08730579307302833, "mean_token_accuracy": 0.8861622154712677, "step": 46870 }, { "epoch": 0.8333777754075338, "grad_norm": 1.7074834103084118, "learning_rate": 0.0001, "loss": 0.7065, "mean_abs_error": 800.9178904152382, "mean_abs_error_last_10": 315.38310574379244, "mean_abs_error_last_25": 369.2146763128782, "mean_abs_error_last_50": 475.88469421829905, "mean_pred_prob": 0.03394079270656221, "mean_pred_prob_last_10": 0.17067515185335652, "mean_pred_prob_last_25": 0.09527306100353598, "mean_pred_prob_last_50": 0.05779607546282932, "mean_token_accuracy": 0.8776892066001892, "step": 46880 }, { "epoch": 0.8335555437043358, "grad_norm": 1.5117166031359923, "learning_rate": 0.0001, "loss": 0.5579, "mean_abs_error": 127.52203989647845, "mean_abs_error_last_10": 22.358984230295068, "mean_abs_error_last_25": 38.69471400080642, "mean_abs_error_last_50": 67.30577829136695, "mean_pred_prob": 0.04887376008555293, "mean_pred_prob_last_10": 0.23525103516876697, "mean_pred_prob_last_25": 0.1328006846830249, "mean_pred_prob_last_50": 0.08172151427716016, "mean_token_accuracy": 0.8834918856620788, "step": 46890 }, { "epoch": 0.8337333120011378, "grad_norm": 1.1896713724497194, "learning_rate": 0.0001, "loss": 0.6528, "mean_abs_error": 109.37539897997374, "mean_abs_error_last_10": 27.685248801996938, "mean_abs_error_last_25": 43.83427843131484, "mean_abs_error_last_50": 64.34553375562216, "mean_pred_prob": 0.05091366833075881, "mean_pred_prob_last_10": 0.256320421397686, "mean_pred_prob_last_25": 0.1391101025044918, "mean_pred_prob_last_50": 0.08494333233684301, "mean_token_accuracy": 0.8786693036556243, "step": 46900 }, { "epoch": 0.8339110802979397, "grad_norm": 1.5586098437555038, "learning_rate": 0.0001, "loss": 0.5741, "mean_abs_error": 1143.8818827648715, "mean_abs_error_last_10": 752.2403451943695, "mean_abs_error_last_25": 846.5859047927968, "mean_abs_error_last_50": 940.1133022302072, "mean_pred_prob": 0.0419532236737723, "mean_pred_prob_last_10": 0.20723588795153774, "mean_pred_prob_last_25": 0.11251223977305927, "mean_pred_prob_last_50": 0.06885805434867506, "mean_token_accuracy": 0.8784340679645538, "step": 46910 }, { "epoch": 0.8340888485947416, "grad_norm": 1.492559465634305, "learning_rate": 0.0001, "loss": 0.6053, "mean_abs_error": 206.57059832912842, "mean_abs_error_last_10": 126.11497393775406, "mean_abs_error_last_25": 107.8383535959056, "mean_abs_error_last_50": 142.10619794206886, "mean_pred_prob": 0.05346770517062396, "mean_pred_prob_last_10": 0.23107028398662804, "mean_pred_prob_last_25": 0.1366839822381735, "mean_pred_prob_last_50": 0.08835776187479497, "mean_token_accuracy": 0.8675707638263702, "step": 46920 }, { "epoch": 0.8342666168915436, "grad_norm": 2.7150893809219343, "learning_rate": 0.0001, "loss": 0.7772, "mean_abs_error": 236.97573517658014, "mean_abs_error_last_10": 118.64035180915138, "mean_abs_error_last_25": 168.21049103965393, "mean_abs_error_last_50": 226.81991319655316, "mean_pred_prob": 0.04764020019210875, "mean_pred_prob_last_10": 0.2424001580104232, "mean_pred_prob_last_25": 0.1300182480365038, "mean_pred_prob_last_50": 0.07930435808375477, "mean_token_accuracy": 0.869169807434082, "step": 46930 }, { "epoch": 0.8344443851883455, "grad_norm": 2.5869162067114737, "learning_rate": 0.0001, "loss": 0.7921, "mean_abs_error": 388.6548880096484, "mean_abs_error_last_10": 102.15101742564741, "mean_abs_error_last_25": 157.01213952517497, "mean_abs_error_last_50": 236.21052061663494, "mean_pred_prob": 0.04822271976154298, "mean_pred_prob_last_10": 0.23876071218401193, "mean_pred_prob_last_25": 0.13465059362351894, "mean_pred_prob_last_50": 0.08091353899799288, "mean_token_accuracy": 0.8730327188968658, "step": 46940 }, { "epoch": 0.8346221534851475, "grad_norm": 1.4188066288265215, "learning_rate": 0.0001, "loss": 0.6984, "mean_abs_error": 160.86389383250952, "mean_abs_error_last_10": 43.22280870753095, "mean_abs_error_last_25": 63.10207148779951, "mean_abs_error_last_50": 97.82552495771932, "mean_pred_prob": 0.04874847284518182, "mean_pred_prob_last_10": 0.24666492827236652, "mean_pred_prob_last_25": 0.13156711123883724, "mean_pred_prob_last_50": 0.07967343358322979, "mean_token_accuracy": 0.8792544186115265, "step": 46950 }, { "epoch": 0.8347999217819494, "grad_norm": 1.3529237073980986, "learning_rate": 0.0001, "loss": 0.6235, "mean_abs_error": 76.71318922573822, "mean_abs_error_last_10": 17.920328854454528, "mean_abs_error_last_25": 38.90037940813111, "mean_abs_error_last_50": 50.91801271240022, "mean_pred_prob": 0.05847016787156463, "mean_pred_prob_last_10": 0.30787088945508, "mean_pred_prob_last_25": 0.1640725776553154, "mean_pred_prob_last_50": 0.09918704088777304, "mean_token_accuracy": 0.8717122614383698, "step": 46960 }, { "epoch": 0.8349776900787513, "grad_norm": 1.143089764661113, "learning_rate": 0.0001, "loss": 0.8236, "mean_abs_error": 89.55838752236453, "mean_abs_error_last_10": 8.568346277334598, "mean_abs_error_last_25": 21.912258837878376, "mean_abs_error_last_50": 50.53935450233527, "mean_pred_prob": 0.06857008785009384, "mean_pred_prob_last_10": 0.3194973237812519, "mean_pred_prob_last_25": 0.18489756360650061, "mean_pred_prob_last_50": 0.11488834153860808, "mean_token_accuracy": 0.8697066485881806, "step": 46970 }, { "epoch": 0.8351554583755533, "grad_norm": 0.883061066521874, "learning_rate": 0.0001, "loss": 0.7402, "mean_abs_error": 280.40704362623995, "mean_abs_error_last_10": 43.95304173667213, "mean_abs_error_last_25": 84.40147652750747, "mean_abs_error_last_50": 162.00545624379978, "mean_pred_prob": 0.038616288593038915, "mean_pred_prob_last_10": 0.19734530672430992, "mean_pred_prob_last_25": 0.10653556641191245, "mean_pred_prob_last_50": 0.06490633385255933, "mean_token_accuracy": 0.8714741110801697, "step": 46980 }, { "epoch": 0.8353332266723552, "grad_norm": 1.357493032797626, "learning_rate": 0.0001, "loss": 0.645, "mean_abs_error": 218.55934370598533, "mean_abs_error_last_10": 39.44665559993723, "mean_abs_error_last_25": 116.94448927662586, "mean_abs_error_last_50": 152.31855572776507, "mean_pred_prob": 0.04279647506773472, "mean_pred_prob_last_10": 0.22407870106399058, "mean_pred_prob_last_25": 0.11576222013682128, "mean_pred_prob_last_50": 0.07019685320556164, "mean_token_accuracy": 0.8764226794242859, "step": 46990 }, { "epoch": 0.8355109949691572, "grad_norm": 1.3558345171238138, "learning_rate": 0.0001, "loss": 0.628, "mean_abs_error": 145.25876430284822, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 96.7899546929698, "mean_pred_prob": 0.09184112823568284, "mean_pred_prob_last_10": 0.29493190050125123, "mean_pred_prob_last_25": 0.18486655317246914, "mean_pred_prob_last_50": 0.1375075730495155, "mean_token_accuracy": 0.8798230707645416, "step": 47000 }, { "epoch": 0.8356887632659592, "grad_norm": 1.0067969239061785, "learning_rate": 0.0001, "loss": 0.7297, "mean_abs_error": 1512.2954597515072, "mean_abs_error_last_10": 547.6490769436525, "mean_abs_error_last_25": 614.8708896919643, "mean_abs_error_last_50": 976.4958334035485, "mean_pred_prob": 0.023253459032275714, "mean_pred_prob_last_10": 0.1291298123949673, "mean_pred_prob_last_25": 0.06720438146730885, "mean_pred_prob_last_50": 0.0395921170420479, "mean_token_accuracy": 0.8690775454044342, "step": 47010 }, { "epoch": 0.8358665315627611, "grad_norm": 3.0375859292763487, "learning_rate": 0.0001, "loss": 0.685, "mean_abs_error": 899.566165289755, "mean_abs_error_last_10": 425.52573429041456, "mean_abs_error_last_25": 505.24434544116457, "mean_abs_error_last_50": 634.7303361578184, "mean_pred_prob": 0.03350314947019797, "mean_pred_prob_last_10": 0.16637056790059432, "mean_pred_prob_last_25": 0.09289249686407856, "mean_pred_prob_last_50": 0.05683690392761491, "mean_token_accuracy": 0.8681500732898713, "step": 47020 }, { "epoch": 0.8360442998595631, "grad_norm": 1.2603568326550074, "learning_rate": 0.0001, "loss": 0.6674, "mean_abs_error": 483.7304071288697, "mean_abs_error_last_10": 83.7027170242772, "mean_abs_error_last_25": 132.85403964659858, "mean_abs_error_last_50": 245.16672128912714, "mean_pred_prob": 0.05695836202357896, "mean_pred_prob_last_10": 0.26705651711672546, "mean_pred_prob_last_25": 0.15663928419817238, "mean_pred_prob_last_50": 0.09414906682213768, "mean_token_accuracy": 0.8717009961605072, "step": 47030 }, { "epoch": 0.836222068156365, "grad_norm": 2.4774497285355244, "learning_rate": 0.0001, "loss": 0.6638, "mean_abs_error": 820.8104551727413, "mean_abs_error_last_10": 335.9293503966851, "mean_abs_error_last_25": 414.5397120739907, "mean_abs_error_last_50": 594.5800630168004, "mean_pred_prob": 0.026770864153513686, "mean_pred_prob_last_10": 0.14317517750314437, "mean_pred_prob_last_25": 0.07490995801053942, "mean_pred_prob_last_50": 0.044413829542463645, "mean_token_accuracy": 0.8803654909133911, "step": 47040 }, { "epoch": 0.836399836453167, "grad_norm": 2.0114993641771646, "learning_rate": 0.0001, "loss": 0.6668, "mean_abs_error": 674.6745005881672, "mean_abs_error_last_10": 204.43715057112374, "mean_abs_error_last_25": 279.6535217808165, "mean_abs_error_last_50": 419.4275972793327, "mean_pred_prob": 0.03451393217837904, "mean_pred_prob_last_10": 0.16531520577846096, "mean_pred_prob_last_25": 0.0926933531765826, "mean_pred_prob_last_50": 0.05719435783685185, "mean_token_accuracy": 0.8608809769153595, "step": 47050 }, { "epoch": 0.8365776047499689, "grad_norm": 2.648841409562673, "learning_rate": 0.0001, "loss": 0.7366, "mean_abs_error": 395.6444462252044, "mean_abs_error_last_10": 152.64200538688345, "mean_abs_error_last_25": 206.49594345994268, "mean_abs_error_last_50": 325.50351484312, "mean_pred_prob": 0.03131251239683479, "mean_pred_prob_last_10": 0.17102075200527905, "mean_pred_prob_last_25": 0.09083451982587576, "mean_pred_prob_last_50": 0.0549111838452518, "mean_token_accuracy": 0.8779043614864349, "step": 47060 }, { "epoch": 0.8367553730467708, "grad_norm": 1.4439755612449772, "learning_rate": 0.0001, "loss": 0.7289, "mean_abs_error": 269.41410832241456, "mean_abs_error_last_10": 82.39383695298429, "mean_abs_error_last_25": 139.90543527185804, "mean_abs_error_last_50": 177.6709248308175, "mean_pred_prob": 0.040108619211241606, "mean_pred_prob_last_10": 0.18336102180182934, "mean_pred_prob_last_25": 0.10285601504147053, "mean_pred_prob_last_50": 0.06401106314733625, "mean_token_accuracy": 0.8697649300098419, "step": 47070 }, { "epoch": 0.8369331413435728, "grad_norm": 1.7594175406595443, "learning_rate": 0.0001, "loss": 0.7495, "mean_abs_error": 638.927499527772, "mean_abs_error_last_10": 125.27449286778315, "mean_abs_error_last_25": 223.04754648852304, "mean_abs_error_last_50": 337.053269304354, "mean_pred_prob": 0.045453517662826924, "mean_pred_prob_last_10": 0.24223022332880645, "mean_pred_prob_last_25": 0.1288716406095773, "mean_pred_prob_last_50": 0.07781246468657628, "mean_token_accuracy": 0.8660419762134552, "step": 47080 }, { "epoch": 0.8371109096403747, "grad_norm": 1.1369800561340937, "learning_rate": 0.0001, "loss": 0.6323, "mean_abs_error": 246.71007130798708, "mean_abs_error_last_10": 54.06806843917197, "mean_abs_error_last_25": 87.9901563305514, "mean_abs_error_last_50": 137.31582852386475, "mean_pred_prob": 0.04244763730093837, "mean_pred_prob_last_10": 0.20030318945646286, "mean_pred_prob_last_25": 0.11339423805475235, "mean_pred_prob_last_50": 0.06965308133512735, "mean_token_accuracy": 0.8723830282688141, "step": 47090 }, { "epoch": 0.8372886779371767, "grad_norm": 1.1880946107812442, "learning_rate": 0.0001, "loss": 0.8068, "mean_abs_error": 558.8745204257777, "mean_abs_error_last_10": 202.8885627586659, "mean_abs_error_last_25": 208.6551008809557, "mean_abs_error_last_50": 344.7804943235234, "mean_pred_prob": 0.020779901114292443, "mean_pred_prob_last_10": 0.11385597996413707, "mean_pred_prob_last_25": 0.05972624495625496, "mean_pred_prob_last_50": 0.03567493809387088, "mean_token_accuracy": 0.8613830745220185, "step": 47100 }, { "epoch": 0.8374664462339786, "grad_norm": 1.9599422796359829, "learning_rate": 0.0001, "loss": 0.7432, "mean_abs_error": 195.35765792016082, "mean_abs_error_last_10": 134.43456510430838, "mean_abs_error_last_25": 144.74912741819315, "mean_abs_error_last_50": 146.88940252705612, "mean_pred_prob": 0.04126889128237963, "mean_pred_prob_last_10": 0.1892193913459778, "mean_pred_prob_last_25": 0.11124776732176542, "mean_pred_prob_last_50": 0.06992626897990703, "mean_token_accuracy": 0.8687856316566467, "step": 47110 }, { "epoch": 0.8376442145307805, "grad_norm": 1.9058550776450542, "learning_rate": 0.0001, "loss": 0.7177, "mean_abs_error": 597.5387657643419, "mean_abs_error_last_10": 140.00803138152145, "mean_abs_error_last_25": 206.89921072393048, "mean_abs_error_last_50": 377.39736764535746, "mean_pred_prob": 0.03618307773722336, "mean_pred_prob_last_10": 0.18164334129542112, "mean_pred_prob_last_25": 0.09629582830239088, "mean_pred_prob_last_50": 0.05985093372873962, "mean_token_accuracy": 0.8706838667392731, "step": 47120 }, { "epoch": 0.8378219828275826, "grad_norm": 1.6095772165145021, "learning_rate": 0.0001, "loss": 0.719, "mean_abs_error": 592.1830817445409, "mean_abs_error_last_10": 103.45081487197797, "mean_abs_error_last_25": 185.44282152577674, "mean_abs_error_last_50": 336.6791116979626, "mean_pred_prob": 0.041782800923101604, "mean_pred_prob_last_10": 0.1835338667500764, "mean_pred_prob_last_25": 0.10912660245085135, "mean_pred_prob_last_50": 0.06879524195101112, "mean_token_accuracy": 0.8602587401866912, "step": 47130 }, { "epoch": 0.8379997511243845, "grad_norm": 2.6737316251829575, "learning_rate": 0.0001, "loss": 0.6414, "mean_abs_error": 116.16797436140294, "mean_abs_error_last_10": 24.30628688518147, "mean_abs_error_last_25": 56.83612479762079, "mean_abs_error_last_50": 89.45072634987653, "mean_pred_prob": 0.055440330877900124, "mean_pred_prob_last_10": 0.2704104110598564, "mean_pred_prob_last_25": 0.1478762509301305, "mean_pred_prob_last_50": 0.09215734861791133, "mean_token_accuracy": 0.8769096255302429, "step": 47140 }, { "epoch": 0.8381775194211865, "grad_norm": 1.3266628179588702, "learning_rate": 0.0001, "loss": 0.6811, "mean_abs_error": 579.8418260759023, "mean_abs_error_last_10": 159.44628681286036, "mean_abs_error_last_25": 191.60241206006958, "mean_abs_error_last_50": 293.73846124480576, "mean_pred_prob": 0.04212125919293612, "mean_pred_prob_last_10": 0.21114562368020415, "mean_pred_prob_last_25": 0.12170909424312412, "mean_pred_prob_last_50": 0.07291955761611461, "mean_token_accuracy": 0.8780083179473877, "step": 47150 }, { "epoch": 0.8383552877179884, "grad_norm": 1.3510169977862718, "learning_rate": 0.0001, "loss": 0.6397, "mean_abs_error": 433.0114007855157, "mean_abs_error_last_10": 71.0743217807748, "mean_abs_error_last_25": 115.13086939483374, "mean_abs_error_last_50": 209.99586950325775, "mean_pred_prob": 0.04448165751527995, "mean_pred_prob_last_10": 0.20838851910084485, "mean_pred_prob_last_25": 0.11526723429560662, "mean_pred_prob_last_50": 0.0725901531521231, "mean_token_accuracy": 0.8722157299518585, "step": 47160 }, { "epoch": 0.8385330560147903, "grad_norm": 3.3956439557256823, "learning_rate": 0.0001, "loss": 0.7, "mean_abs_error": 401.87283790025407, "mean_abs_error_last_10": 135.24377144567785, "mean_abs_error_last_25": 209.47244810012404, "mean_abs_error_last_50": 277.0978458525651, "mean_pred_prob": 0.03506859731860459, "mean_pred_prob_last_10": 0.16491690394468606, "mean_pred_prob_last_25": 0.09051261744461954, "mean_pred_prob_last_50": 0.05642633696552366, "mean_token_accuracy": 0.8798739790916443, "step": 47170 }, { "epoch": 0.8387108243115923, "grad_norm": 1.2361169875379103, "learning_rate": 0.0001, "loss": 0.808, "mean_abs_error": 489.7363519534555, "mean_abs_error_last_10": 77.15284485650588, "mean_abs_error_last_25": 114.35801752734923, "mean_abs_error_last_50": 216.8294088144558, "mean_pred_prob": 0.04537314450717531, "mean_pred_prob_last_10": 0.22968551144003868, "mean_pred_prob_last_25": 0.12411347962915897, "mean_pred_prob_last_50": 0.07545962905278429, "mean_token_accuracy": 0.8683060526847839, "step": 47180 }, { "epoch": 0.8388885926083942, "grad_norm": 1.9475871638192388, "learning_rate": 0.0001, "loss": 0.839, "mean_abs_error": 85.64065593982639, "mean_abs_error_last_10": 18.444046455574266, "mean_abs_error_last_25": 34.73047886132173, "mean_abs_error_last_50": 56.41880835010102, "mean_pred_prob": 0.051220659352838996, "mean_pred_prob_last_10": 0.24762318655848503, "mean_pred_prob_last_25": 0.13723761402070522, "mean_pred_prob_last_50": 0.08489529751241207, "mean_token_accuracy": 0.8697348475456238, "step": 47190 }, { "epoch": 0.8390663609051962, "grad_norm": 1.386466816474513, "learning_rate": 0.0001, "loss": 0.6721, "mean_abs_error": 522.4879187061615, "mean_abs_error_last_10": 229.71554168286556, "mean_abs_error_last_25": 287.06520623394465, "mean_abs_error_last_50": 357.91460958879463, "mean_pred_prob": 0.04033854162262287, "mean_pred_prob_last_10": 0.20278852120391094, "mean_pred_prob_last_25": 0.1075007442268543, "mean_pred_prob_last_50": 0.06692821693141013, "mean_token_accuracy": 0.8678613841533661, "step": 47200 }, { "epoch": 0.8392441292019981, "grad_norm": 1.452601362476325, "learning_rate": 0.0001, "loss": 0.7553, "mean_abs_error": 443.3368191024898, "mean_abs_error_last_10": 421.2841770286761, "mean_abs_error_last_25": 437.5139012491553, "mean_abs_error_last_50": 416.6608379527356, "mean_pred_prob": 0.044719053449807686, "mean_pred_prob_last_10": 0.20333298798650504, "mean_pred_prob_last_25": 0.12085558855324052, "mean_pred_prob_last_50": 0.07469729115255178, "mean_token_accuracy": 0.8625005424022675, "step": 47210 }, { "epoch": 0.8394218974988, "grad_norm": 1.2306937169924181, "learning_rate": 0.0001, "loss": 0.7456, "mean_abs_error": 292.71524416912337, "mean_abs_error_last_10": 110.37616264054122, "mean_abs_error_last_25": 132.56938415603406, "mean_abs_error_last_50": 172.59287585966447, "mean_pred_prob": 0.02725315047428012, "mean_pred_prob_last_10": 0.13698883522301913, "mean_pred_prob_last_25": 0.0773901374079287, "mean_pred_prob_last_50": 0.045989014022052285, "mean_token_accuracy": 0.8692876935005188, "step": 47220 }, { "epoch": 0.839599665795602, "grad_norm": 0.9990445546583665, "learning_rate": 0.0001, "loss": 0.64, "mean_abs_error": 229.27459945260517, "mean_abs_error_last_10": 96.68293411781299, "mean_abs_error_last_25": 162.93842835089853, "mean_abs_error_last_50": 237.33733448604707, "mean_pred_prob": 0.04793750336393714, "mean_pred_prob_last_10": 0.2285642433911562, "mean_pred_prob_last_25": 0.12797964811325074, "mean_pred_prob_last_50": 0.0791201057843864, "mean_token_accuracy": 0.8789935410022736, "step": 47230 }, { "epoch": 0.8397774340924039, "grad_norm": 2.411849767374026, "learning_rate": 0.0001, "loss": 0.6534, "mean_abs_error": 897.969819621927, "mean_abs_error_last_10": 377.2148780134207, "mean_abs_error_last_25": 465.0291309086383, "mean_abs_error_last_50": 626.8682910826058, "mean_pred_prob": 0.03650705167237902, "mean_pred_prob_last_10": 0.17369700966519303, "mean_pred_prob_last_25": 0.10002086288004648, "mean_pred_prob_last_50": 0.061396563815651464, "mean_token_accuracy": 0.8724661767482758, "step": 47240 }, { "epoch": 0.839955202389206, "grad_norm": 1.4943390250978854, "learning_rate": 0.0001, "loss": 0.7183, "mean_abs_error": 1318.2897063004007, "mean_abs_error_last_10": 740.7995052081279, "mean_abs_error_last_25": 796.5262193639121, "mean_abs_error_last_50": 984.6236519716791, "mean_pred_prob": 0.02693309574169689, "mean_pred_prob_last_10": 0.1383571549115004, "mean_pred_prob_last_25": 0.07408414041856304, "mean_pred_prob_last_50": 0.044533291383413595, "mean_token_accuracy": 0.876813805103302, "step": 47250 }, { "epoch": 0.8401329706860079, "grad_norm": 1.3260116499395351, "learning_rate": 0.0001, "loss": 0.6526, "mean_abs_error": 787.811951900202, "mean_abs_error_last_10": 302.4138352034296, "mean_abs_error_last_25": 421.59873715029033, "mean_abs_error_last_50": 553.0222838993001, "mean_pred_prob": 0.030156730528688058, "mean_pred_prob_last_10": 0.1518357463995926, "mean_pred_prob_last_25": 0.0832246373523958, "mean_pred_prob_last_50": 0.05026689532678574, "mean_token_accuracy": 0.878184336423874, "step": 47260 }, { "epoch": 0.8403107389828098, "grad_norm": 1.2158430491834864, "learning_rate": 0.0001, "loss": 0.6938, "mean_abs_error": 732.3329483555018, "mean_abs_error_last_10": 202.65222041942022, "mean_abs_error_last_25": 260.6699288995623, "mean_abs_error_last_50": 383.18669669803137, "mean_pred_prob": 0.03999161524698138, "mean_pred_prob_last_10": 0.20477938083931804, "mean_pred_prob_last_25": 0.1153887230437249, "mean_pred_prob_last_50": 0.06858718784060329, "mean_token_accuracy": 0.8739303827285767, "step": 47270 }, { "epoch": 0.8404885072796118, "grad_norm": 2.0709076115452403, "learning_rate": 0.0001, "loss": 0.6126, "mean_abs_error": 159.5951504969787, "mean_abs_error_last_10": 37.0132080149076, "mean_abs_error_last_25": 86.69357302248072, "mean_abs_error_last_50": 126.73438505119896, "mean_pred_prob": 0.05609509348869324, "mean_pred_prob_last_10": 0.26518064104020594, "mean_pred_prob_last_25": 0.1559093963354826, "mean_pred_prob_last_50": 0.0964952887967229, "mean_token_accuracy": 0.8786443829536438, "step": 47280 }, { "epoch": 0.8406662755764137, "grad_norm": 1.304394440277931, "learning_rate": 0.0001, "loss": 0.7085, "mean_abs_error": 411.6634073700844, "mean_abs_error_last_10": 176.36815735647812, "mean_abs_error_last_25": 176.79454116168472, "mean_abs_error_last_50": 275.0851379777269, "mean_pred_prob": 0.02207673005759716, "mean_pred_prob_last_10": 0.10853171795606613, "mean_pred_prob_last_25": 0.06122967209666967, "mean_pred_prob_last_50": 0.037194952555000785, "mean_token_accuracy": 0.8760845720767975, "step": 47290 }, { "epoch": 0.8408440438732157, "grad_norm": 1.794713350035221, "learning_rate": 0.0001, "loss": 0.8278, "mean_abs_error": 304.1488634171094, "mean_abs_error_last_10": 56.431386205613286, "mean_abs_error_last_25": 103.81121116997383, "mean_abs_error_last_50": 169.25404268990698, "mean_pred_prob": 0.04413838728796691, "mean_pred_prob_last_10": 0.22259479723870754, "mean_pred_prob_last_25": 0.12099113147705794, "mean_pred_prob_last_50": 0.07407385413534939, "mean_token_accuracy": 0.8698744475841522, "step": 47300 }, { "epoch": 0.8410218121700176, "grad_norm": 1.3589695116331302, "learning_rate": 0.0001, "loss": 0.6401, "mean_abs_error": 512.9109503166165, "mean_abs_error_last_10": 185.87299061116533, "mean_abs_error_last_25": 229.98546867457216, "mean_abs_error_last_50": 330.4567809372901, "mean_pred_prob": 0.045750663889339196, "mean_pred_prob_last_10": 0.21637900349451228, "mean_pred_prob_last_25": 0.12048131680348888, "mean_pred_prob_last_50": 0.07482260812539607, "mean_token_accuracy": 0.8802396357059479, "step": 47310 }, { "epoch": 0.8411995804668195, "grad_norm": 1.7780203295027288, "learning_rate": 0.0001, "loss": 0.5682, "mean_abs_error": 507.94784025602985, "mean_abs_error_last_10": 224.78039520805265, "mean_abs_error_last_25": 228.92328402760177, "mean_abs_error_last_50": 296.5320281897972, "mean_pred_prob": 0.03128710806486197, "mean_pred_prob_last_10": 0.1584831677377224, "mean_pred_prob_last_25": 0.08475916768657044, "mean_pred_prob_last_50": 0.05250692968256772, "mean_token_accuracy": 0.8856671452522278, "step": 47320 }, { "epoch": 0.8413773487636215, "grad_norm": 1.3790546774953185, "learning_rate": 0.0001, "loss": 0.7007, "mean_abs_error": 691.0571930408603, "mean_abs_error_last_10": 413.27802459259374, "mean_abs_error_last_25": 448.7054526460958, "mean_abs_error_last_50": 471.3838452721633, "mean_pred_prob": 0.02315930024487898, "mean_pred_prob_last_10": 0.11922318949364126, "mean_pred_prob_last_25": 0.06517995222238823, "mean_pred_prob_last_50": 0.03933409950695932, "mean_token_accuracy": 0.8763334512710571, "step": 47330 }, { "epoch": 0.8415551170604234, "grad_norm": 1.1689647228274054, "learning_rate": 0.0001, "loss": 0.7977, "mean_abs_error": 223.97159424388687, "mean_abs_error_last_10": 74.29646242465388, "mean_abs_error_last_25": 89.48440403361255, "mean_abs_error_last_50": 125.10450620939261, "mean_pred_prob": 0.048403639858588574, "mean_pred_prob_last_10": 0.2477023173123598, "mean_pred_prob_last_25": 0.13856613393872977, "mean_pred_prob_last_50": 0.08149746097624302, "mean_token_accuracy": 0.8806884169578553, "step": 47340 }, { "epoch": 0.8417328853572253, "grad_norm": 2.1241932579991527, "learning_rate": 0.0001, "loss": 0.7271, "mean_abs_error": 311.73112148820115, "mean_abs_error_last_10": 182.001937172823, "mean_abs_error_last_25": 189.14459589748358, "mean_abs_error_last_50": 236.50651599660705, "mean_pred_prob": 0.050794820813462135, "mean_pred_prob_last_10": 0.24307681079953908, "mean_pred_prob_last_25": 0.13982409071177243, "mean_pred_prob_last_50": 0.08514427188783884, "mean_token_accuracy": 0.8617946386337281, "step": 47350 }, { "epoch": 0.8419106536540273, "grad_norm": 1.5915526380358793, "learning_rate": 0.0001, "loss": 0.7782, "mean_abs_error": 448.01940923545044, "mean_abs_error_last_10": 147.67702551477572, "mean_abs_error_last_25": 192.89653906407403, "mean_abs_error_last_50": 265.6278804974124, "mean_pred_prob": 0.027365457103587686, "mean_pred_prob_last_10": 0.14203079361468554, "mean_pred_prob_last_25": 0.07872293684631586, "mean_pred_prob_last_50": 0.04731623129919171, "mean_token_accuracy": 0.8722911238670349, "step": 47360 }, { "epoch": 0.8420884219508293, "grad_norm": 1.5636432251436707, "learning_rate": 0.0001, "loss": 0.7135, "mean_abs_error": 174.7605581976997, "mean_abs_error_last_10": 81.85991676171736, "mean_abs_error_last_25": 119.48820352166926, "mean_abs_error_last_50": 186.02676851683617, "mean_pred_prob": 0.05063039227388799, "mean_pred_prob_last_10": 0.23041931986808778, "mean_pred_prob_last_25": 0.13201964208856226, "mean_pred_prob_last_50": 0.08306112317368389, "mean_token_accuracy": 0.867792284488678, "step": 47370 }, { "epoch": 0.8422661902476313, "grad_norm": 1.3876410064240132, "learning_rate": 0.0001, "loss": 0.6617, "mean_abs_error": 364.1930960311865, "mean_abs_error_last_10": 223.78729878209555, "mean_abs_error_last_25": 281.0719974616243, "mean_abs_error_last_50": 276.13384538811243, "mean_pred_prob": 0.05788329563802108, "mean_pred_prob_last_10": 0.25843773630913347, "mean_pred_prob_last_25": 0.15407385190483183, "mean_pred_prob_last_50": 0.09516509511158802, "mean_token_accuracy": 0.8715495109558106, "step": 47380 }, { "epoch": 0.8424439585444332, "grad_norm": 1.777597554742186, "learning_rate": 0.0001, "loss": 0.7485, "mean_abs_error": 574.7996736123412, "mean_abs_error_last_10": 177.58902069729257, "mean_abs_error_last_25": 257.6384615725452, "mean_abs_error_last_50": 341.4183308780049, "mean_pred_prob": 0.02670592609792948, "mean_pred_prob_last_10": 0.15561118349432945, "mean_pred_prob_last_25": 0.0804320984520018, "mean_pred_prob_last_50": 0.046564257610589264, "mean_token_accuracy": 0.8670737981796265, "step": 47390 }, { "epoch": 0.8426217268412352, "grad_norm": 1.5364413984830385, "learning_rate": 0.0001, "loss": 0.7943, "mean_abs_error": 339.07839445844013, "mean_abs_error_last_10": 139.77780199334282, "mean_abs_error_last_25": 157.50463931100003, "mean_abs_error_last_50": 184.48561640839984, "mean_pred_prob": 0.04854263475863263, "mean_pred_prob_last_10": 0.24384244135580957, "mean_pred_prob_last_25": 0.13567629021126776, "mean_pred_prob_last_50": 0.08089396513532847, "mean_token_accuracy": 0.8757696926593781, "step": 47400 }, { "epoch": 0.8427994951380371, "grad_norm": 1.5162398638313421, "learning_rate": 0.0001, "loss": 0.933, "mean_abs_error": 668.0290881169469, "mean_abs_error_last_10": 331.9894856984419, "mean_abs_error_last_25": 459.6104175901377, "mean_abs_error_last_50": 619.3156126503762, "mean_pred_prob": 0.03977220400702208, "mean_pred_prob_last_10": 0.2035125156864524, "mean_pred_prob_last_25": 0.11255999775603413, "mean_pred_prob_last_50": 0.06806159266270698, "mean_token_accuracy": 0.8623327493667603, "step": 47410 }, { "epoch": 0.842977263434839, "grad_norm": 2.0415121257587328, "learning_rate": 0.0001, "loss": 0.7924, "mean_abs_error": 375.7209394313486, "mean_abs_error_last_10": 176.52411903990784, "mean_abs_error_last_25": 212.959577570569, "mean_abs_error_last_50": 253.82117198732368, "mean_pred_prob": 0.028468757087830455, "mean_pred_prob_last_10": 0.13428076308919118, "mean_pred_prob_last_25": 0.07511722015915438, "mean_pred_prob_last_50": 0.04698800485348329, "mean_token_accuracy": 0.8581636846065521, "step": 47420 }, { "epoch": 0.843155031731641, "grad_norm": 1.0846621477815106, "learning_rate": 0.0001, "loss": 0.774, "mean_abs_error": 365.27452963087455, "mean_abs_error_last_10": 118.05522833094298, "mean_abs_error_last_25": 145.113058010815, "mean_abs_error_last_50": 216.43723815843904, "mean_pred_prob": 0.04517733945394866, "mean_pred_prob_last_10": 0.22999844616279005, "mean_pred_prob_last_25": 0.12752096556359901, "mean_pred_prob_last_50": 0.07574362037703394, "mean_token_accuracy": 0.8779507756233216, "step": 47430 }, { "epoch": 0.8433328000284429, "grad_norm": 2.1906048554743807, "learning_rate": 0.0001, "loss": 0.7042, "mean_abs_error": 570.2646190435212, "mean_abs_error_last_10": 156.64091859891022, "mean_abs_error_last_25": 218.09817907510728, "mean_abs_error_last_50": 367.5797003835061, "mean_pred_prob": 0.024722430779365822, "mean_pred_prob_last_10": 0.12406552828615532, "mean_pred_prob_last_25": 0.06725980332121254, "mean_pred_prob_last_50": 0.04115839507430792, "mean_token_accuracy": 0.8690466046333313, "step": 47440 }, { "epoch": 0.8435105683252448, "grad_norm": 1.4076838132441811, "learning_rate": 0.0001, "loss": 0.635, "mean_abs_error": 139.43098085236537, "mean_abs_error_last_10": 47.058547292424535, "mean_abs_error_last_25": 82.53350316137158, "mean_abs_error_last_50": 102.12204858394057, "mean_pred_prob": 0.04642470180988312, "mean_pred_prob_last_10": 0.22188302613794803, "mean_pred_prob_last_25": 0.12560438178479671, "mean_pred_prob_last_50": 0.07756828758865594, "mean_token_accuracy": 0.8743105232715607, "step": 47450 }, { "epoch": 0.8436883366220468, "grad_norm": 2.5293531197766668, "learning_rate": 0.0001, "loss": 0.6489, "mean_abs_error": 932.8368461699711, "mean_abs_error_last_10": 456.5626933205861, "mean_abs_error_last_25": 455.4465922653052, "mean_abs_error_last_50": 654.7723106735377, "mean_pred_prob": 0.014157307514688, "mean_pred_prob_last_10": 0.07957034795545041, "mean_pred_prob_last_25": 0.03843514517648146, "mean_pred_prob_last_50": 0.023285780375590547, "mean_token_accuracy": 0.8721913576126099, "step": 47460 }, { "epoch": 0.8438661049188487, "grad_norm": 0.8448854309122833, "learning_rate": 0.0001, "loss": 0.6031, "mean_abs_error": 214.4618040762694, "mean_abs_error_last_10": 25.33099203854871, "mean_abs_error_last_25": 45.39229314301087, "mean_abs_error_last_50": 102.43431143033872, "mean_pred_prob": 0.04749212292954326, "mean_pred_prob_last_10": 0.22596071921288968, "mean_pred_prob_last_25": 0.1268058679997921, "mean_pred_prob_last_50": 0.07856816686689853, "mean_token_accuracy": 0.8754377067089081, "step": 47470 }, { "epoch": 0.8440438732156508, "grad_norm": 2.201444912786207, "learning_rate": 0.0001, "loss": 0.6715, "mean_abs_error": 189.84102459508608, "mean_abs_error_last_10": 113.130114807093, "mean_abs_error_last_25": 124.87681103995627, "mean_abs_error_last_50": 156.66337869065265, "mean_pred_prob": 0.0604911871952936, "mean_pred_prob_last_10": 0.25397396872285755, "mean_pred_prob_last_25": 0.15637308378936723, "mean_pred_prob_last_50": 0.09887212680187077, "mean_token_accuracy": 0.8787929832935333, "step": 47480 }, { "epoch": 0.8442216415124527, "grad_norm": 1.1092132060442228, "learning_rate": 0.0001, "loss": 0.6412, "mean_abs_error": 979.8665437371031, "mean_abs_error_last_10": 599.0575696663419, "mean_abs_error_last_25": 672.139461867554, "mean_abs_error_last_50": 754.3538827226587, "mean_pred_prob": 0.0460795795879676, "mean_pred_prob_last_10": 0.2302750888164155, "mean_pred_prob_last_25": 0.13104719182592817, "mean_pred_prob_last_50": 0.07960391978267581, "mean_token_accuracy": 0.8732766032218933, "step": 47490 }, { "epoch": 0.8443994098092547, "grad_norm": 1.2751889428761123, "learning_rate": 0.0001, "loss": 0.7339, "mean_abs_error": 438.5780987046246, "mean_abs_error_last_10": 206.41660866887796, "mean_abs_error_last_25": 244.38842242302744, "mean_abs_error_last_50": 316.7338008472831, "mean_pred_prob": 0.05364248870755546, "mean_pred_prob_last_10": 0.2343231688835658, "mean_pred_prob_last_25": 0.1392166554753203, "mean_pred_prob_last_50": 0.08667174467700534, "mean_token_accuracy": 0.8828213512897491, "step": 47500 }, { "epoch": 0.8445771781060566, "grad_norm": 1.1314823877474753, "learning_rate": 0.0001, "loss": 0.642, "mean_abs_error": 780.5417519412414, "mean_abs_error_last_10": 292.73170902517313, "mean_abs_error_last_25": 358.13493862959893, "mean_abs_error_last_50": 511.69009569340506, "mean_pred_prob": 0.04773137374722865, "mean_pred_prob_last_10": 0.22807469082181342, "mean_pred_prob_last_25": 0.12779686396825127, "mean_pred_prob_last_50": 0.07926147556281649, "mean_token_accuracy": 0.8783976137638092, "step": 47510 }, { "epoch": 0.8447549464028585, "grad_norm": 1.0031408494410425, "learning_rate": 0.0001, "loss": 0.6509, "mean_abs_error": 1331.0007271331178, "mean_abs_error_last_10": 774.4382090548951, "mean_abs_error_last_25": 901.8213949935629, "mean_abs_error_last_50": 1083.7737136912624, "mean_pred_prob": 0.034168624227459074, "mean_pred_prob_last_10": 0.15566435085784178, "mean_pred_prob_last_25": 0.09229638567121583, "mean_pred_prob_last_50": 0.057185706675227266, "mean_token_accuracy": 0.8669344186782837, "step": 47520 }, { "epoch": 0.8449327146996605, "grad_norm": 1.319618490704679, "learning_rate": 0.0001, "loss": 0.6392, "mean_abs_error": 611.7369921791603, "mean_abs_error_last_10": 163.29239069254544, "mean_abs_error_last_25": 191.7341038573436, "mean_abs_error_last_50": 366.31409286431983, "mean_pred_prob": 0.040256550291087476, "mean_pred_prob_last_10": 0.18453364978777245, "mean_pred_prob_last_25": 0.10632223298307508, "mean_pred_prob_last_50": 0.06622629010817035, "mean_token_accuracy": 0.8694228053092956, "step": 47530 }, { "epoch": 0.8451104829964624, "grad_norm": 1.6137689078262871, "learning_rate": 0.0001, "loss": 0.6515, "mean_abs_error": 510.12523079196563, "mean_abs_error_last_10": 124.66260490742886, "mean_abs_error_last_25": 156.93324529979054, "mean_abs_error_last_50": 295.06632908047266, "mean_pred_prob": 0.037188758095726374, "mean_pred_prob_last_10": 0.18415514342486858, "mean_pred_prob_last_25": 0.10038459179922939, "mean_pred_prob_last_50": 0.06182051878422499, "mean_token_accuracy": 0.8771957933902741, "step": 47540 }, { "epoch": 0.8452882512932643, "grad_norm": 1.3540327784072073, "learning_rate": 0.0001, "loss": 0.6667, "mean_abs_error": 809.026136453796, "mean_abs_error_last_10": 474.0995587702606, "mean_abs_error_last_25": 511.5472583639783, "mean_abs_error_last_50": 597.8123210386206, "mean_pred_prob": 0.040475499034801035, "mean_pred_prob_last_10": 0.18358892702963203, "mean_pred_prob_last_25": 0.1060760002117604, "mean_pred_prob_last_50": 0.0661721426731674, "mean_token_accuracy": 0.8709444165229797, "step": 47550 }, { "epoch": 0.8454660195900663, "grad_norm": 1.3354686958575124, "learning_rate": 0.0001, "loss": 0.5597, "mean_abs_error": 435.77040228075657, "mean_abs_error_last_10": 126.96394633132654, "mean_abs_error_last_25": 147.97330170318585, "mean_abs_error_last_50": 244.75942959083142, "mean_pred_prob": 0.04566715093096718, "mean_pred_prob_last_10": 0.22890503332018852, "mean_pred_prob_last_25": 0.1300579355331138, "mean_pred_prob_last_50": 0.07757778086233884, "mean_token_accuracy": 0.8805470943450928, "step": 47560 }, { "epoch": 0.8456437878868682, "grad_norm": 1.3570243404526843, "learning_rate": 0.0001, "loss": 0.7318, "mean_abs_error": 675.0212674454399, "mean_abs_error_last_10": 346.2526052251096, "mean_abs_error_last_25": 402.971996244258, "mean_abs_error_last_50": 502.44559998779084, "mean_pred_prob": 0.0275700167985633, "mean_pred_prob_last_10": 0.13177784737199544, "mean_pred_prob_last_25": 0.07710641073063015, "mean_pred_prob_last_50": 0.047407654020935296, "mean_token_accuracy": 0.8660261571407318, "step": 47570 }, { "epoch": 0.8458215561836702, "grad_norm": 1.1441158486801652, "learning_rate": 0.0001, "loss": 0.6308, "mean_abs_error": 825.1275455549501, "mean_abs_error_last_10": 463.1854268186633, "mean_abs_error_last_25": 407.90573342485624, "mean_abs_error_last_50": 533.3346463719001, "mean_pred_prob": 0.025974815656081774, "mean_pred_prob_last_10": 0.1284147010417655, "mean_pred_prob_last_25": 0.07312295024748891, "mean_pred_prob_last_50": 0.043590616708388555, "mean_token_accuracy": 0.8685145914554596, "step": 47580 }, { "epoch": 0.8459993244804721, "grad_norm": 1.9035630204390805, "learning_rate": 0.0001, "loss": 0.614, "mean_abs_error": 356.03311652001133, "mean_abs_error_last_10": 66.30114370758871, "mean_abs_error_last_25": 164.95535776740059, "mean_abs_error_last_50": 211.53827219694045, "mean_pred_prob": 0.041226077103056015, "mean_pred_prob_last_10": 0.2135022647678852, "mean_pred_prob_last_25": 0.11889680661261082, "mean_pred_prob_last_50": 0.070096904784441, "mean_token_accuracy": 0.8822640955448151, "step": 47590 }, { "epoch": 0.8461770927772742, "grad_norm": 2.0015241726537147, "learning_rate": 0.0001, "loss": 0.6633, "mean_abs_error": 598.5313683303445, "mean_abs_error_last_10": 270.4441440083268, "mean_abs_error_last_25": 266.28970283469357, "mean_abs_error_last_50": 338.58342405399594, "mean_pred_prob": 0.05030372865730896, "mean_pred_prob_last_10": 0.22775923631852493, "mean_pred_prob_last_25": 0.13535874179797247, "mean_pred_prob_last_50": 0.0847011226520408, "mean_token_accuracy": 0.8735277831554413, "step": 47600 }, { "epoch": 0.8463548610740761, "grad_norm": 2.2807019252809, "learning_rate": 0.0001, "loss": 0.6894, "mean_abs_error": 482.9126832215309, "mean_abs_error_last_10": 176.2579067451419, "mean_abs_error_last_25": 196.21277065244627, "mean_abs_error_last_50": 327.860304493409, "mean_pred_prob": 0.033443780289962886, "mean_pred_prob_last_10": 0.16399070322513581, "mean_pred_prob_last_25": 0.09343541590496898, "mean_pred_prob_last_50": 0.05696433437988162, "mean_token_accuracy": 0.8775048613548279, "step": 47610 }, { "epoch": 0.846532629370878, "grad_norm": 1.7498789793649026, "learning_rate": 0.0001, "loss": 0.6736, "mean_abs_error": 443.29311627589396, "mean_abs_error_last_10": 83.79246553424169, "mean_abs_error_last_25": 122.56108407030763, "mean_abs_error_last_50": 248.86257203111342, "mean_pred_prob": 0.04219548337860033, "mean_pred_prob_last_10": 0.20939410345163195, "mean_pred_prob_last_25": 0.11932913449127228, "mean_pred_prob_last_50": 0.0719446683418937, "mean_token_accuracy": 0.8635236740112304, "step": 47620 }, { "epoch": 0.84671039766768, "grad_norm": 2.4717741581392323, "learning_rate": 0.0001, "loss": 0.6036, "mean_abs_error": 357.9282656558119, "mean_abs_error_last_10": 111.322100434156, "mean_abs_error_last_25": 129.50888675725793, "mean_abs_error_last_50": 193.29136580084946, "mean_pred_prob": 0.05364260688656941, "mean_pred_prob_last_10": 0.24684224855154752, "mean_pred_prob_last_25": 0.14135804600082338, "mean_pred_prob_last_50": 0.08750234110048041, "mean_token_accuracy": 0.8868961930274963, "step": 47630 }, { "epoch": 0.8468881659644819, "grad_norm": 0.9872391720455714, "learning_rate": 0.0001, "loss": 0.6375, "mean_abs_error": 561.704445244542, "mean_abs_error_last_10": 87.9208276906163, "mean_abs_error_last_25": 196.3156766930951, "mean_abs_error_last_50": 372.31159739181555, "mean_pred_prob": 0.036607570061460135, "mean_pred_prob_last_10": 0.19014521054923533, "mean_pred_prob_last_25": 0.09970621392130852, "mean_pred_prob_last_50": 0.060839876811951396, "mean_token_accuracy": 0.8751621007919311, "step": 47640 }, { "epoch": 0.8470659342612838, "grad_norm": 1.2736232975314807, "learning_rate": 0.0001, "loss": 0.6432, "mean_abs_error": 206.39967025830066, "mean_abs_error_last_10": 57.73261324980981, "mean_abs_error_last_25": 75.16637862359006, "mean_abs_error_last_50": 150.29855109276568, "mean_pred_prob": 0.06506284419447184, "mean_pred_prob_last_10": 0.292458089068532, "mean_pred_prob_last_25": 0.17034896407276393, "mean_pred_prob_last_50": 0.107198241725564, "mean_token_accuracy": 0.8808294832706451, "step": 47650 }, { "epoch": 0.8472437025580858, "grad_norm": 0.93197902815047, "learning_rate": 0.0001, "loss": 0.6763, "mean_abs_error": 1239.1715395619733, "mean_abs_error_last_10": 655.7472124426392, "mean_abs_error_last_25": 726.2743846598639, "mean_abs_error_last_50": 915.2389148134122, "mean_pred_prob": 0.03688255706729251, "mean_pred_prob_last_10": 0.18841364192194304, "mean_pred_prob_last_25": 0.10421521680837031, "mean_pred_prob_last_50": 0.06267413788154955, "mean_token_accuracy": 0.8711202681064606, "step": 47660 }, { "epoch": 0.8474214708548877, "grad_norm": 0.8392885826098436, "learning_rate": 0.0001, "loss": 0.6885, "mean_abs_error": 596.3085635765223, "mean_abs_error_last_10": 287.51569646676603, "mean_abs_error_last_25": 348.48362491720104, "mean_abs_error_last_50": 374.8212763234571, "mean_pred_prob": 0.050540062331128864, "mean_pred_prob_last_10": 0.24085422521457076, "mean_pred_prob_last_25": 0.13891916690627112, "mean_pred_prob_last_50": 0.08522484079003334, "mean_token_accuracy": 0.8694127440452576, "step": 47670 }, { "epoch": 0.8475992391516897, "grad_norm": 1.2799970339913982, "learning_rate": 0.0001, "loss": 0.7263, "mean_abs_error": 246.12782709408816, "mean_abs_error_last_10": 81.37679376644014, "mean_abs_error_last_25": 92.88658151281626, "mean_abs_error_last_50": 167.95037488529033, "mean_pred_prob": 0.0466442296281457, "mean_pred_prob_last_10": 0.22138429433107376, "mean_pred_prob_last_25": 0.1245477817952633, "mean_pred_prob_last_50": 0.07716999286785722, "mean_token_accuracy": 0.8748880445957183, "step": 47680 }, { "epoch": 0.8477770074484916, "grad_norm": 1.0210104324675862, "learning_rate": 0.0001, "loss": 0.6359, "mean_abs_error": 594.754279383749, "mean_abs_error_last_10": 204.73795048205693, "mean_abs_error_last_25": 263.4396791116461, "mean_abs_error_last_50": 379.5488903074371, "mean_pred_prob": 0.04156526552978903, "mean_pred_prob_last_10": 0.20284313815645874, "mean_pred_prob_last_25": 0.109931093105115, "mean_pred_prob_last_50": 0.0683420708635822, "mean_token_accuracy": 0.8663171350955963, "step": 47690 }, { "epoch": 0.8479547757452935, "grad_norm": 1.1799400038961476, "learning_rate": 0.0001, "loss": 0.5987, "mean_abs_error": 439.61337175296694, "mean_abs_error_last_10": 182.9187795679903, "mean_abs_error_last_25": 170.7363470839241, "mean_abs_error_last_50": 225.07362211727246, "mean_pred_prob": 0.03988237185403705, "mean_pred_prob_last_10": 0.19120236532762647, "mean_pred_prob_last_25": 0.10679538091644644, "mean_pred_prob_last_50": 0.06607318012975157, "mean_token_accuracy": 0.8774991273880005, "step": 47700 }, { "epoch": 0.8481325440420955, "grad_norm": 2.1901731170444005, "learning_rate": 0.0001, "loss": 0.7403, "mean_abs_error": 242.15955838993426, "mean_abs_error_last_10": 73.8558548437899, "mean_abs_error_last_25": 89.74125698222943, "mean_abs_error_last_50": 152.48304738178314, "mean_pred_prob": 0.03360438905656338, "mean_pred_prob_last_10": 0.17327525038272143, "mean_pred_prob_last_25": 0.09270253609865904, "mean_pred_prob_last_50": 0.05552759962156415, "mean_token_accuracy": 0.8709235429763794, "step": 47710 }, { "epoch": 0.8483103123388975, "grad_norm": 1.815204041309049, "learning_rate": 0.0001, "loss": 0.6469, "mean_abs_error": 305.4014113568525, "mean_abs_error_last_10": 91.50631001275363, "mean_abs_error_last_25": 92.77449292785431, "mean_abs_error_last_50": 151.95266305505294, "mean_pred_prob": 0.0467175881145522, "mean_pred_prob_last_10": 0.2326239168876782, "mean_pred_prob_last_25": 0.13062915999908, "mean_pred_prob_last_50": 0.07872287550708279, "mean_token_accuracy": 0.8693989336490631, "step": 47720 }, { "epoch": 0.8484880806356995, "grad_norm": 1.719342848777462, "learning_rate": 0.0001, "loss": 0.6942, "mean_abs_error": 476.4176437766273, "mean_abs_error_last_10": 206.38551999437908, "mean_abs_error_last_25": 202.494229973151, "mean_abs_error_last_50": 276.9984892621786, "mean_pred_prob": 0.04628344384254888, "mean_pred_prob_last_10": 0.19803994102403522, "mean_pred_prob_last_25": 0.11987941712141037, "mean_pred_prob_last_50": 0.07496976683614776, "mean_token_accuracy": 0.8619233965873718, "step": 47730 }, { "epoch": 0.8486658489325014, "grad_norm": 1.6850115654819169, "learning_rate": 0.0001, "loss": 0.8477, "mean_abs_error": 398.1619771528609, "mean_abs_error_last_10": 112.82508702021805, "mean_abs_error_last_25": 210.58177881780017, "mean_abs_error_last_50": 268.11252911275625, "mean_pred_prob": 0.031581923831254244, "mean_pred_prob_last_10": 0.16176742296665908, "mean_pred_prob_last_25": 0.08773799603804947, "mean_pred_prob_last_50": 0.05352709759026766, "mean_token_accuracy": 0.8683532476425171, "step": 47740 }, { "epoch": 0.8488436172293033, "grad_norm": 2.09691527986936, "learning_rate": 0.0001, "loss": 0.6109, "mean_abs_error": 245.69889286215684, "mean_abs_error_last_10": 58.60772045582953, "mean_abs_error_last_25": 94.43810701250155, "mean_abs_error_last_50": 171.35982925000687, "mean_pred_prob": 0.04504388067871332, "mean_pred_prob_last_10": 0.22389016319066285, "mean_pred_prob_last_25": 0.12194750271737576, "mean_pred_prob_last_50": 0.07520922762341797, "mean_token_accuracy": 0.877136892080307, "step": 47750 }, { "epoch": 0.8490213855261053, "grad_norm": 2.2675098849066, "learning_rate": 0.0001, "loss": 0.7127, "mean_abs_error": 474.89015841929705, "mean_abs_error_last_10": 118.07950707386696, "mean_abs_error_last_25": 201.7540059947234, "mean_abs_error_last_50": 284.1666623778591, "mean_pred_prob": 0.04932693463051692, "mean_pred_prob_last_10": 0.22709836512804032, "mean_pred_prob_last_25": 0.12784017794765531, "mean_pred_prob_last_50": 0.07940450778696687, "mean_token_accuracy": 0.8847114980220795, "step": 47760 }, { "epoch": 0.8491991538229072, "grad_norm": 2.0325135778609575, "learning_rate": 0.0001, "loss": 0.7549, "mean_abs_error": 484.93942843770145, "mean_abs_error_last_10": 145.3417549237717, "mean_abs_error_last_25": 233.54138838814384, "mean_abs_error_last_50": 322.39972862039406, "mean_pred_prob": 0.03547010070178658, "mean_pred_prob_last_10": 0.1738590289838612, "mean_pred_prob_last_25": 0.0968040746403858, "mean_pred_prob_last_50": 0.05845599116291851, "mean_token_accuracy": 0.8767182171344757, "step": 47770 }, { "epoch": 0.8493769221197092, "grad_norm": 1.590820764429073, "learning_rate": 0.0001, "loss": 0.7425, "mean_abs_error": 378.27404108346366, "mean_abs_error_last_10": 70.40504415370744, "mean_abs_error_last_25": 91.32096612134922, "mean_abs_error_last_50": 164.01190793749504, "mean_pred_prob": 0.04228012757375836, "mean_pred_prob_last_10": 0.1930541429668665, "mean_pred_prob_last_25": 0.11290288101881743, "mean_pred_prob_last_50": 0.06934837242588401, "mean_token_accuracy": 0.8694046378135681, "step": 47780 }, { "epoch": 0.8495546904165111, "grad_norm": 2.368879131741055, "learning_rate": 0.0001, "loss": 0.8301, "mean_abs_error": 805.1203428596765, "mean_abs_error_last_10": 267.84501652562903, "mean_abs_error_last_25": 429.37494259305697, "mean_abs_error_last_50": 505.74705182552424, "mean_pred_prob": 0.024293678608955815, "mean_pred_prob_last_10": 0.12413036501966417, "mean_pred_prob_last_25": 0.06832010763464495, "mean_pred_prob_last_50": 0.041093259840272366, "mean_token_accuracy": 0.8698191344738007, "step": 47790 }, { "epoch": 0.849732458713313, "grad_norm": 1.2128828403481726, "learning_rate": 0.0001, "loss": 0.6768, "mean_abs_error": 606.0411644518618, "mean_abs_error_last_10": 262.8097361140327, "mean_abs_error_last_25": 287.6114921510668, "mean_abs_error_last_50": 381.71742362702946, "mean_pred_prob": 0.034251388348639014, "mean_pred_prob_last_10": 0.16122226307634263, "mean_pred_prob_last_25": 0.09298737635836005, "mean_pred_prob_last_50": 0.055966137570794675, "mean_token_accuracy": 0.8776836216449737, "step": 47800 }, { "epoch": 0.849910227010115, "grad_norm": 1.5788755652978033, "learning_rate": 0.0001, "loss": 0.6406, "mean_abs_error": 500.05974592270184, "mean_abs_error_last_10": 376.6037452204647, "mean_abs_error_last_25": 396.27639462148557, "mean_abs_error_last_50": 470.36697018207803, "mean_pred_prob": 0.05942927697906271, "mean_pred_prob_last_10": 0.2514563343953341, "mean_pred_prob_last_25": 0.15371697963564657, "mean_pred_prob_last_50": 0.09704188029281795, "mean_token_accuracy": 0.8621874153614044, "step": 47810 }, { "epoch": 0.8500879953069169, "grad_norm": 1.3225183863426544, "learning_rate": 0.0001, "loss": 0.7731, "mean_abs_error": 257.7957797793494, "mean_abs_error_last_10": 76.49072155207564, "mean_abs_error_last_25": 111.32500817364803, "mean_abs_error_last_50": 132.45387125615167, "mean_pred_prob": 0.04948546546511352, "mean_pred_prob_last_10": 0.2148717038333416, "mean_pred_prob_last_25": 0.12803924102336167, "mean_pred_prob_last_50": 0.07985080601647496, "mean_token_accuracy": 0.8773311972618103, "step": 47820 }, { "epoch": 0.8502657636037189, "grad_norm": 1.826952214514179, "learning_rate": 0.0001, "loss": 0.752, "mean_abs_error": 586.2985880025592, "mean_abs_error_last_10": 154.3849668891149, "mean_abs_error_last_25": 256.32124170414346, "mean_abs_error_last_50": 357.92154090464817, "mean_pred_prob": 0.03549567842273973, "mean_pred_prob_last_10": 0.16868508412735536, "mean_pred_prob_last_25": 0.09569844958605245, "mean_pred_prob_last_50": 0.05885469393688254, "mean_token_accuracy": 0.8749501347541809, "step": 47830 }, { "epoch": 0.8504435319005209, "grad_norm": 1.8397763520539203, "learning_rate": 0.0001, "loss": 0.702, "mean_abs_error": 367.56541592937555, "mean_abs_error_last_10": 85.85882942775709, "mean_abs_error_last_25": 118.35696942317054, "mean_abs_error_last_50": 192.1079432125568, "mean_pred_prob": 0.04459818124305457, "mean_pred_prob_last_10": 0.19680096562951804, "mean_pred_prob_last_25": 0.10797641891986132, "mean_pred_prob_last_50": 0.07143209944479167, "mean_token_accuracy": 0.877961415052414, "step": 47840 }, { "epoch": 0.8506213001973228, "grad_norm": 2.6542081953142205, "learning_rate": 0.0001, "loss": 0.5934, "mean_abs_error": 98.23890097687224, "mean_abs_error_last_10": 12.523100179204507, "mean_abs_error_last_25": 26.4813670351214, "mean_abs_error_last_50": 47.36195394003976, "mean_pred_prob": 0.053289673570543525, "mean_pred_prob_last_10": 0.2550282783806324, "mean_pred_prob_last_25": 0.14456411339342595, "mean_pred_prob_last_50": 0.08956111203879118, "mean_token_accuracy": 0.8876476526260376, "step": 47850 }, { "epoch": 0.8507990684941248, "grad_norm": 1.7683858722296795, "learning_rate": 0.0001, "loss": 0.6689, "mean_abs_error": 276.53275384510744, "mean_abs_error_last_10": 91.65658256848397, "mean_abs_error_last_25": 145.21626284678385, "mean_abs_error_last_50": 193.13559543255832, "mean_pred_prob": 0.048275716695934535, "mean_pred_prob_last_10": 0.22858908362686634, "mean_pred_prob_last_25": 0.13214534427970648, "mean_pred_prob_last_50": 0.0806240675970912, "mean_token_accuracy": 0.8673648476600647, "step": 47860 }, { "epoch": 0.8509768367909267, "grad_norm": 1.4260666651587224, "learning_rate": 0.0001, "loss": 0.7807, "mean_abs_error": 821.1337241421022, "mean_abs_error_last_10": 254.3627408433851, "mean_abs_error_last_25": 414.19986955138313, "mean_abs_error_last_50": 579.5902344961845, "mean_pred_prob": 0.02265455677988939, "mean_pred_prob_last_10": 0.11752675415482372, "mean_pred_prob_last_25": 0.06267503534909338, "mean_pred_prob_last_50": 0.037970145139843224, "mean_token_accuracy": 0.8715987980365754, "step": 47870 }, { "epoch": 0.8511546050877287, "grad_norm": 1.76630831759342, "learning_rate": 0.0001, "loss": 0.8, "mean_abs_error": 533.0787124232154, "mean_abs_error_last_10": 202.7311212030923, "mean_abs_error_last_25": 289.3613470152233, "mean_abs_error_last_50": 395.3532343831952, "mean_pred_prob": 0.04613430691533722, "mean_pred_prob_last_10": 0.22389736394397913, "mean_pred_prob_last_25": 0.12486993091297335, "mean_pred_prob_last_50": 0.07630297761643305, "mean_token_accuracy": 0.8676418185234069, "step": 47880 }, { "epoch": 0.8513323733845306, "grad_norm": 1.6387964587162918, "learning_rate": 0.0001, "loss": 0.6307, "mean_abs_error": 45.82723374448492, "mean_abs_error_last_10": 9.082852029070601, "mean_abs_error_last_25": 21.67538691016515, "mean_abs_error_last_50": 32.23330325909976, "mean_pred_prob": 0.06907125357538461, "mean_pred_prob_last_10": 0.31517752110958097, "mean_pred_prob_last_25": 0.17923821732401848, "mean_pred_prob_last_50": 0.11365954577922821, "mean_token_accuracy": 0.8735856056213379, "step": 47890 }, { "epoch": 0.8515101416813325, "grad_norm": 1.2015494539192728, "learning_rate": 0.0001, "loss": 0.6344, "mean_abs_error": 225.80879564536417, "mean_abs_error_last_10": 95.0332540869558, "mean_abs_error_last_25": 90.78299195175445, "mean_abs_error_last_50": 150.14898945133965, "mean_pred_prob": 0.030716108018532397, "mean_pred_prob_last_10": 0.15348283424973488, "mean_pred_prob_last_25": 0.08668428808450698, "mean_pred_prob_last_50": 0.05196894006803632, "mean_token_accuracy": 0.8696674108505249, "step": 47900 }, { "epoch": 0.8516879099781345, "grad_norm": 1.148655289019222, "learning_rate": 0.0001, "loss": 0.6966, "mean_abs_error": 526.30081145406, "mean_abs_error_last_10": 130.18944198118896, "mean_abs_error_last_25": 254.03348988725747, "mean_abs_error_last_50": 331.7834248714887, "mean_pred_prob": 0.04179478387814015, "mean_pred_prob_last_10": 0.17756573855876923, "mean_pred_prob_last_25": 0.10888721328228712, "mean_pred_prob_last_50": 0.06865386264398694, "mean_token_accuracy": 0.8755277395248413, "step": 47910 }, { "epoch": 0.8518656782749364, "grad_norm": 0.9530202762997836, "learning_rate": 0.0001, "loss": 0.6306, "mean_abs_error": 378.9884723477113, "mean_abs_error_last_10": 58.196144839427234, "mean_abs_error_last_25": 138.89092380264776, "mean_abs_error_last_50": 254.47609459415315, "mean_pred_prob": 0.043422030145302416, "mean_pred_prob_last_10": 0.23210697583854198, "mean_pred_prob_last_25": 0.12628776198253036, "mean_pred_prob_last_50": 0.07283465401269495, "mean_token_accuracy": 0.874262136220932, "step": 47920 }, { "epoch": 0.8520434465717384, "grad_norm": 1.0252489672648497, "learning_rate": 0.0001, "loss": 0.6869, "mean_abs_error": 382.93482693776014, "mean_abs_error_last_10": 71.46368623367673, "mean_abs_error_last_25": 114.19569654400316, "mean_abs_error_last_50": 202.05279713927803, "mean_pred_prob": 0.05227375838439911, "mean_pred_prob_last_10": 0.23567389631643892, "mean_pred_prob_last_25": 0.14015394817106425, "mean_pred_prob_last_50": 0.08674144402612001, "mean_token_accuracy": 0.8788108885288238, "step": 47930 }, { "epoch": 0.8522212148685403, "grad_norm": 1.0534256207012327, "learning_rate": 0.0001, "loss": 0.7998, "mean_abs_error": 207.85684457204394, "mean_abs_error_last_10": 66.74125789965558, "mean_abs_error_last_25": 147.99825970153375, "mean_abs_error_last_50": 187.61189444844632, "mean_pred_prob": 0.06456688302569091, "mean_pred_prob_last_10": 0.3136673400178552, "mean_pred_prob_last_25": 0.1754255869425833, "mean_pred_prob_last_50": 0.10675364178605377, "mean_token_accuracy": 0.8797492444515228, "step": 47940 }, { "epoch": 0.8523989831653422, "grad_norm": 1.5562571669192695, "learning_rate": 0.0001, "loss": 0.7145, "mean_abs_error": 1753.2028027381689, "mean_abs_error_last_10": 769.0745454686094, "mean_abs_error_last_25": 1005.1146671581475, "mean_abs_error_last_50": 1269.702885420425, "mean_pred_prob": 0.017375919544429054, "mean_pred_prob_last_10": 0.10221253744675778, "mean_pred_prob_last_25": 0.04828454530215822, "mean_pred_prob_last_50": 0.028580271906685085, "mean_token_accuracy": 0.8694225788116455, "step": 47950 }, { "epoch": 0.8525767514621443, "grad_norm": 2.532537766799472, "learning_rate": 0.0001, "loss": 0.7549, "mean_abs_error": 306.95286416297705, "mean_abs_error_last_10": 36.612507202675324, "mean_abs_error_last_25": 58.91087424580809, "mean_abs_error_last_50": 137.5183618266285, "mean_pred_prob": 0.05545771117322147, "mean_pred_prob_last_10": 0.2514088263735175, "mean_pred_prob_last_25": 0.15177424689754843, "mean_pred_prob_last_50": 0.09333546063862741, "mean_token_accuracy": 0.8782503008842468, "step": 47960 }, { "epoch": 0.8527545197589462, "grad_norm": 1.332950353563076, "learning_rate": 0.0001, "loss": 0.8586, "mean_abs_error": 490.1258705239349, "mean_abs_error_last_10": 288.9644664030102, "mean_abs_error_last_25": 299.32677703639536, "mean_abs_error_last_50": 357.9457561560989, "mean_pred_prob": 0.044297985709272326, "mean_pred_prob_last_10": 0.21432276902487502, "mean_pred_prob_last_25": 0.11523710393230431, "mean_pred_prob_last_50": 0.07212671018205583, "mean_token_accuracy": 0.8746812403202057, "step": 47970 }, { "epoch": 0.8529322880557482, "grad_norm": 1.0154453808528212, "learning_rate": 0.0001, "loss": 0.6733, "mean_abs_error": 499.5096690628981, "mean_abs_error_last_10": 49.91079620890504, "mean_abs_error_last_25": 223.29949563361674, "mean_abs_error_last_50": 352.4559293929775, "mean_pred_prob": 0.03325424734503031, "mean_pred_prob_last_10": 0.1516309179365635, "mean_pred_prob_last_25": 0.08675942681729794, "mean_pred_prob_last_50": 0.05438048038631678, "mean_token_accuracy": 0.8725951373577118, "step": 47980 }, { "epoch": 0.8531100563525501, "grad_norm": 0.9148308321628486, "learning_rate": 0.0001, "loss": 0.6183, "mean_abs_error": 139.2744688429564, "mean_abs_error_last_10": 74.96830220694105, "mean_abs_error_last_25": 98.64910155210873, "mean_abs_error_last_50": 109.1868979252589, "mean_pred_prob": 0.046869309945032, "mean_pred_prob_last_10": 0.21334163509309292, "mean_pred_prob_last_25": 0.12091570235788822, "mean_pred_prob_last_50": 0.07658241307362915, "mean_token_accuracy": 0.8782031834125519, "step": 47990 }, { "epoch": 0.853287824649352, "grad_norm": 3.1681427418587016, "learning_rate": 0.0001, "loss": 0.6631, "mean_abs_error": 204.21962717673824, "mean_abs_error_last_10": 40.1557278816357, "mean_abs_error_last_25": 79.3447071140009, "mean_abs_error_last_50": 127.71876515175836, "mean_pred_prob": 0.037770038936287165, "mean_pred_prob_last_10": 0.18675218150019646, "mean_pred_prob_last_25": 0.10303123649209738, "mean_pred_prob_last_50": 0.06252398919314146, "mean_token_accuracy": 0.874099737405777, "step": 48000 }, { "epoch": 0.853465592946154, "grad_norm": 2.244825154096273, "learning_rate": 0.0001, "loss": 0.6765, "mean_abs_error": 401.5808075981604, "mean_abs_error_last_10": 358.78816703918454, "mean_abs_error_last_25": 364.65271250432613, "mean_abs_error_last_50": 373.09588804059183, "mean_pred_prob": 0.03929772374685854, "mean_pred_prob_last_10": 0.20124309891834855, "mean_pred_prob_last_25": 0.10840046578086912, "mean_pred_prob_last_50": 0.06488919979892671, "mean_token_accuracy": 0.8770433902740479, "step": 48010 }, { "epoch": 0.8536433612429559, "grad_norm": 1.691658707793114, "learning_rate": 0.0001, "loss": 0.8831, "mean_abs_error": 611.9455180120151, "mean_abs_error_last_10": 289.68483889085644, "mean_abs_error_last_25": 303.8845981207249, "mean_abs_error_last_50": 402.8724590114821, "mean_pred_prob": 0.03770398750784807, "mean_pred_prob_last_10": 0.16175693923723883, "mean_pred_prob_last_25": 0.0968928226735443, "mean_pred_prob_last_50": 0.062112878542393446, "mean_token_accuracy": 0.8609725594520569, "step": 48020 }, { "epoch": 0.8538211295397579, "grad_norm": 0.9479626239469907, "learning_rate": 0.0001, "loss": 0.7252, "mean_abs_error": 1585.7371157593493, "mean_abs_error_last_10": 744.9860498950804, "mean_abs_error_last_25": 826.0997142449426, "mean_abs_error_last_50": 1093.92472057381, "mean_pred_prob": 0.03961501158773899, "mean_pred_prob_last_10": 0.18109674956358504, "mean_pred_prob_last_25": 0.10766930539102759, "mean_pred_prob_last_50": 0.0655261148524005, "mean_token_accuracy": 0.8794802069664002, "step": 48030 }, { "epoch": 0.8539988978365598, "grad_norm": 2.3208596438613878, "learning_rate": 0.0001, "loss": 0.6681, "mean_abs_error": 656.3629516107185, "mean_abs_error_last_10": 250.8727120904242, "mean_abs_error_last_25": 314.43457830144655, "mean_abs_error_last_50": 438.8580384109243, "mean_pred_prob": 0.025087936647469177, "mean_pred_prob_last_10": 0.11597181453835219, "mean_pred_prob_last_25": 0.06559982955805026, "mean_pred_prob_last_50": 0.04182001265580766, "mean_token_accuracy": 0.8722387433052063, "step": 48040 }, { "epoch": 0.8541766661333617, "grad_norm": 1.2905817987680634, "learning_rate": 0.0001, "loss": 0.7701, "mean_abs_error": 349.76430017727427, "mean_abs_error_last_10": 155.97052127740795, "mean_abs_error_last_25": 242.44067343573775, "mean_abs_error_last_50": 274.6155784685612, "mean_pred_prob": 0.0429083869792521, "mean_pred_prob_last_10": 0.21252079568803312, "mean_pred_prob_last_25": 0.12093536332249641, "mean_pred_prob_last_50": 0.07167448811233043, "mean_token_accuracy": 0.8738030016422271, "step": 48050 }, { "epoch": 0.8543544344301637, "grad_norm": 2.9308221536337804, "learning_rate": 0.0001, "loss": 0.7955, "mean_abs_error": 139.77173838263076, "mean_abs_error_last_10": 25.76437938243991, "mean_abs_error_last_25": 45.65424062110821, "mean_abs_error_last_50": 76.73451117361225, "mean_pred_prob": 0.04639782523736358, "mean_pred_prob_last_10": 0.21887581311166288, "mean_pred_prob_last_25": 0.1262496927753091, "mean_pred_prob_last_50": 0.07702759644016624, "mean_token_accuracy": 0.8766775250434875, "step": 48060 }, { "epoch": 0.8545322027269657, "grad_norm": 1.440403206269295, "learning_rate": 0.0001, "loss": 0.6862, "mean_abs_error": 609.4458898342897, "mean_abs_error_last_10": 268.87101048412416, "mean_abs_error_last_25": 276.90738305928255, "mean_abs_error_last_50": 323.16218327359644, "mean_pred_prob": 0.03974694577627815, "mean_pred_prob_last_10": 0.18878810793394224, "mean_pred_prob_last_25": 0.10901089484686963, "mean_pred_prob_last_50": 0.0658079755725339, "mean_token_accuracy": 0.8611660957336426, "step": 48070 }, { "epoch": 0.8547099710237677, "grad_norm": 1.4967635280880156, "learning_rate": 0.0001, "loss": 0.6614, "mean_abs_error": 514.4553709424889, "mean_abs_error_last_10": 137.9395660341562, "mean_abs_error_last_25": 282.3530438536106, "mean_abs_error_last_50": 339.5934253062673, "mean_pred_prob": 0.029963424103334545, "mean_pred_prob_last_10": 0.1462361991405487, "mean_pred_prob_last_25": 0.08068346008658409, "mean_pred_prob_last_50": 0.04972315980121493, "mean_token_accuracy": 0.8751821756362915, "step": 48080 }, { "epoch": 0.8548877393205696, "grad_norm": 2.044499338275669, "learning_rate": 0.0001, "loss": 0.6505, "mean_abs_error": 369.3232367111815, "mean_abs_error_last_10": 65.801909963834, "mean_abs_error_last_25": 139.43741773099327, "mean_abs_error_last_50": 229.32655256017978, "mean_pred_prob": 0.04099090304225683, "mean_pred_prob_last_10": 0.21422164961695672, "mean_pred_prob_last_25": 0.11548994518816472, "mean_pred_prob_last_50": 0.06886511920019985, "mean_token_accuracy": 0.8733742833137512, "step": 48090 }, { "epoch": 0.8550655076173715, "grad_norm": 0.905500527172346, "learning_rate": 0.0001, "loss": 0.6469, "mean_abs_error": 158.34514068484174, "mean_abs_error_last_10": 50.63706679211667, "mean_abs_error_last_25": 71.18598236860107, "mean_abs_error_last_50": 112.19492441926816, "mean_pred_prob": 0.050600971281528476, "mean_pred_prob_last_10": 0.2467450775206089, "mean_pred_prob_last_25": 0.13624504376202823, "mean_pred_prob_last_50": 0.0839383989572525, "mean_token_accuracy": 0.8710749089717865, "step": 48100 }, { "epoch": 0.8552432759141735, "grad_norm": 1.6239563013893055, "learning_rate": 0.0001, "loss": 0.7777, "mean_abs_error": 155.8803082950254, "mean_abs_error_last_10": 29.749653753543345, "mean_abs_error_last_25": 49.21699191505194, "mean_abs_error_last_50": 86.70736240355461, "mean_pred_prob": 0.05901360297575593, "mean_pred_prob_last_10": 0.29278172068297864, "mean_pred_prob_last_25": 0.16279945559799672, "mean_pred_prob_last_50": 0.09938793182373047, "mean_token_accuracy": 0.8662309288978577, "step": 48110 }, { "epoch": 0.8554210442109754, "grad_norm": 1.4125790827578055, "learning_rate": 0.0001, "loss": 0.8124, "mean_abs_error": 1034.8005957798498, "mean_abs_error_last_10": 200.89225153250436, "mean_abs_error_last_25": 353.31793091610245, "mean_abs_error_last_50": 625.1500457512548, "mean_pred_prob": 0.03391497316188179, "mean_pred_prob_last_10": 0.1583570290938951, "mean_pred_prob_last_25": 0.08779548341990448, "mean_pred_prob_last_50": 0.055398114904528485, "mean_token_accuracy": 0.8623010575771332, "step": 48120 }, { "epoch": 0.8555988125077774, "grad_norm": 1.17550649000444, "learning_rate": 0.0001, "loss": 0.6891, "mean_abs_error": 829.1450065647912, "mean_abs_error_last_10": 208.2304946554396, "mean_abs_error_last_25": 361.9756813183248, "mean_abs_error_last_50": 526.853123515055, "mean_pred_prob": 0.015512707480229437, "mean_pred_prob_last_10": 0.08615476184058934, "mean_pred_prob_last_25": 0.04582823378732428, "mean_pred_prob_last_50": 0.02600163402967155, "mean_token_accuracy": 0.8705945551395416, "step": 48130 }, { "epoch": 0.8557765808045793, "grad_norm": 1.311713182125655, "learning_rate": 0.0001, "loss": 0.8536, "mean_abs_error": 438.9685007439824, "mean_abs_error_last_10": 151.69139523674545, "mean_abs_error_last_25": 201.65250015053488, "mean_abs_error_last_50": 287.823174920742, "mean_pred_prob": 0.04113074260530993, "mean_pred_prob_last_10": 0.20735166823724285, "mean_pred_prob_last_25": 0.11088041227776557, "mean_pred_prob_last_50": 0.06733681414625607, "mean_token_accuracy": 0.8723885536193847, "step": 48140 }, { "epoch": 0.8559543491013812, "grad_norm": 1.6562923151050377, "learning_rate": 0.0001, "loss": 0.7545, "mean_abs_error": 563.9508371414482, "mean_abs_error_last_10": 228.10994827398673, "mean_abs_error_last_25": 256.05717657253723, "mean_abs_error_last_50": 364.8451018343673, "mean_pred_prob": 0.03555097973439843, "mean_pred_prob_last_10": 0.17339837849140166, "mean_pred_prob_last_25": 0.09682787470519542, "mean_pred_prob_last_50": 0.05939707821235061, "mean_token_accuracy": 0.878932923078537, "step": 48150 }, { "epoch": 0.8561321173981832, "grad_norm": 1.5718218320603135, "learning_rate": 0.0001, "loss": 0.8429, "mean_abs_error": 320.8635970716393, "mean_abs_error_last_10": 147.0150491106093, "mean_abs_error_last_25": 248.14171334056067, "mean_abs_error_last_50": 277.113007459165, "mean_pred_prob": 0.03618096876889467, "mean_pred_prob_last_10": 0.17529782075434924, "mean_pred_prob_last_25": 0.09779872810468078, "mean_pred_prob_last_50": 0.05984411570243538, "mean_token_accuracy": 0.8667130053043366, "step": 48160 }, { "epoch": 0.8563098856949851, "grad_norm": 1.9778125820240073, "learning_rate": 0.0001, "loss": 0.8485, "mean_abs_error": 736.1513935734913, "mean_abs_error_last_10": 323.50345799996524, "mean_abs_error_last_25": 386.2449473647116, "mean_abs_error_last_50": 544.5230307155001, "mean_pred_prob": 0.045050308978534306, "mean_pred_prob_last_10": 0.21796637613442726, "mean_pred_prob_last_25": 0.12483467293204739, "mean_pred_prob_last_50": 0.07670491132594179, "mean_token_accuracy": 0.871317857503891, "step": 48170 }, { "epoch": 0.8564876539917871, "grad_norm": 1.5289974622142612, "learning_rate": 0.0001, "loss": 0.7333, "mean_abs_error": 1114.8452857193968, "mean_abs_error_last_10": 546.6564038969004, "mean_abs_error_last_25": 665.1623414033844, "mean_abs_error_last_50": 803.7104916549295, "mean_pred_prob": 0.032240299278055315, "mean_pred_prob_last_10": 0.16523511041887104, "mean_pred_prob_last_25": 0.08686784761957825, "mean_pred_prob_last_50": 0.05341391547699459, "mean_token_accuracy": 0.8720191895961762, "step": 48180 }, { "epoch": 0.8566654222885891, "grad_norm": 0.9059051274341852, "learning_rate": 0.0001, "loss": 0.553, "mean_abs_error": 96.69708695728133, "mean_abs_error_last_10": 28.9849503696254, "mean_abs_error_last_25": 45.43868520983854, "mean_abs_error_last_50": 66.13046478793264, "mean_pred_prob": 0.042963721137493846, "mean_pred_prob_last_10": 0.22374809496104717, "mean_pred_prob_last_25": 0.1206888573244214, "mean_pred_prob_last_50": 0.07174377292394638, "mean_token_accuracy": 0.8771430313587188, "step": 48190 }, { "epoch": 0.856843190585391, "grad_norm": 1.3361061262375717, "learning_rate": 0.0001, "loss": 0.6277, "mean_abs_error": 114.09468010836981, "mean_abs_error_last_10": 14.331573325677619, "mean_abs_error_last_25": 26.12037474981917, "mean_abs_error_last_50": 49.99989303675292, "mean_pred_prob": 0.06224873326718807, "mean_pred_prob_last_10": 0.3001825213432312, "mean_pred_prob_last_25": 0.16810479238629342, "mean_pred_prob_last_50": 0.10447407383471727, "mean_token_accuracy": 0.8735039949417114, "step": 48200 }, { "epoch": 0.857020958882193, "grad_norm": 0.7853785434406328, "learning_rate": 0.0001, "loss": 0.6118, "mean_abs_error": 776.2213910990101, "mean_abs_error_last_10": 430.0352794903274, "mean_abs_error_last_25": 507.4043764275101, "mean_abs_error_last_50": 601.1786326887513, "mean_pred_prob": 0.044136924587655814, "mean_pred_prob_last_10": 0.21280079785501585, "mean_pred_prob_last_25": 0.12134609261702281, "mean_pred_prob_last_50": 0.0747494387702318, "mean_token_accuracy": 0.8705349802970886, "step": 48210 }, { "epoch": 0.8571987271789949, "grad_norm": 2.5861521475040217, "learning_rate": 0.0001, "loss": 0.6919, "mean_abs_error": 1179.6429016851366, "mean_abs_error_last_10": 451.1692068352651, "mean_abs_error_last_25": 465.45324626822486, "mean_abs_error_last_50": 717.0051105024927, "mean_pred_prob": 0.04325149463256821, "mean_pred_prob_last_10": 0.20427965841081458, "mean_pred_prob_last_25": 0.1153898706252221, "mean_pred_prob_last_50": 0.07181546290812549, "mean_token_accuracy": 0.8720011830329895, "step": 48220 }, { "epoch": 0.8573764954757969, "grad_norm": 1.594287962276266, "learning_rate": 0.0001, "loss": 1.0226, "mean_abs_error": 424.0405111737074, "mean_abs_error_last_10": 267.5798234160222, "mean_abs_error_last_25": 236.9652553924108, "mean_abs_error_last_50": 250.15114912582652, "mean_pred_prob": 0.03718933077761903, "mean_pred_prob_last_10": 0.15363996885716916, "mean_pred_prob_last_25": 0.09636096600443125, "mean_pred_prob_last_50": 0.06105667403899133, "mean_token_accuracy": 0.8664190590381622, "step": 48230 }, { "epoch": 0.8575542637725988, "grad_norm": 1.1903118020708532, "learning_rate": 0.0001, "loss": 0.7235, "mean_abs_error": 873.0265310132218, "mean_abs_error_last_10": 320.6589605578968, "mean_abs_error_last_25": 359.9295987604925, "mean_abs_error_last_50": 534.7952099162717, "mean_pred_prob": 0.030929260162520223, "mean_pred_prob_last_10": 0.1610778473666869, "mean_pred_prob_last_25": 0.09073685058392585, "mean_pred_prob_last_50": 0.0534475133055821, "mean_token_accuracy": 0.8727677047252655, "step": 48240 }, { "epoch": 0.8577320320694007, "grad_norm": 0.8079376171586379, "learning_rate": 0.0001, "loss": 0.6856, "mean_abs_error": 494.1310984021693, "mean_abs_error_last_10": 218.09779083318898, "mean_abs_error_last_25": 258.5387888754311, "mean_abs_error_last_50": 303.4877759007295, "mean_pred_prob": 0.03157572557101958, "mean_pred_prob_last_10": 0.1675727572874166, "mean_pred_prob_last_25": 0.08752182003809139, "mean_pred_prob_last_50": 0.05232288691913709, "mean_token_accuracy": 0.876678079366684, "step": 48250 }, { "epoch": 0.8579098003662027, "grad_norm": 1.4468581152366846, "learning_rate": 0.0001, "loss": 0.6651, "mean_abs_error": 255.9633701610121, "mean_abs_error_last_10": 33.26770972953966, "mean_abs_error_last_25": 63.55929856169678, "mean_abs_error_last_50": 126.81900441684971, "mean_pred_prob": 0.043942243419587614, "mean_pred_prob_last_10": 0.23530972376465797, "mean_pred_prob_last_25": 0.1275153500959277, "mean_pred_prob_last_50": 0.0757939888164401, "mean_token_accuracy": 0.8711565256118774, "step": 48260 }, { "epoch": 0.8580875686630046, "grad_norm": 1.4346629228713892, "learning_rate": 0.0001, "loss": 0.658, "mean_abs_error": 196.60443080538124, "mean_abs_error_last_10": 75.71764243182862, "mean_abs_error_last_25": 118.43586819845072, "mean_abs_error_last_50": 143.2074588587661, "mean_pred_prob": 0.0629153671208769, "mean_pred_prob_last_10": 0.2942177776247263, "mean_pred_prob_last_25": 0.17456822674721478, "mean_pred_prob_last_50": 0.10685833059251308, "mean_token_accuracy": 0.8720624446868896, "step": 48270 }, { "epoch": 0.8582653369598066, "grad_norm": 1.2157501864493365, "learning_rate": 0.0001, "loss": 0.6801, "mean_abs_error": 386.416484700541, "mean_abs_error_last_10": 83.41002482820633, "mean_abs_error_last_25": 169.53505429068215, "mean_abs_error_last_50": 261.28631631085267, "mean_pred_prob": 0.03280827249400318, "mean_pred_prob_last_10": 0.171784033998847, "mean_pred_prob_last_25": 0.09288897635415197, "mean_pred_prob_last_50": 0.054888574732467534, "mean_token_accuracy": 0.8725342571735382, "step": 48280 }, { "epoch": 0.8584431052566085, "grad_norm": 1.7808821830698522, "learning_rate": 0.0001, "loss": 0.6253, "mean_abs_error": 67.60592912784146, "mean_abs_error_last_10": 18.796693780150605, "mean_abs_error_last_25": 24.481269097199466, "mean_abs_error_last_50": 40.969954747779425, "mean_pred_prob": 0.06390233421698213, "mean_pred_prob_last_10": 0.2911556474864483, "mean_pred_prob_last_25": 0.17000200524926184, "mean_pred_prob_last_50": 0.10468312837183476, "mean_token_accuracy": 0.8728149592876434, "step": 48290 }, { "epoch": 0.8586208735534104, "grad_norm": 1.563517863371899, "learning_rate": 0.0001, "loss": 0.7725, "mean_abs_error": 1100.1941004715677, "mean_abs_error_last_10": 621.728435250156, "mean_abs_error_last_25": 684.0137457484727, "mean_abs_error_last_50": 794.4799407693732, "mean_pred_prob": 0.03565214380214456, "mean_pred_prob_last_10": 0.16658310843922663, "mean_pred_prob_last_25": 0.09516862348828, "mean_pred_prob_last_50": 0.059000494360225274, "mean_token_accuracy": 0.8766561806201935, "step": 48300 }, { "epoch": 0.8587986418502125, "grad_norm": 1.946189703868253, "learning_rate": 0.0001, "loss": 0.6996, "mean_abs_error": 1813.1722191711876, "mean_abs_error_last_10": 750.1618855110862, "mean_abs_error_last_25": 869.3712533829842, "mean_abs_error_last_50": 1193.4218405652127, "mean_pred_prob": 0.013260554886073805, "mean_pred_prob_last_10": 0.07765350384288468, "mean_pred_prob_last_25": 0.03919696152152028, "mean_pred_prob_last_50": 0.022775245111552066, "mean_token_accuracy": 0.8825593173503876, "step": 48310 }, { "epoch": 0.8589764101470144, "grad_norm": 1.3222295590232724, "learning_rate": 0.0001, "loss": 0.7627, "mean_abs_error": 550.418910016914, "mean_abs_error_last_10": 163.15098766955467, "mean_abs_error_last_25": 206.90762423954297, "mean_abs_error_last_50": 377.37599899593454, "mean_pred_prob": 0.06304591533262283, "mean_pred_prob_last_10": 0.27623083812650295, "mean_pred_prob_last_25": 0.16621754806838, "mean_pred_prob_last_50": 0.10495116837555543, "mean_token_accuracy": 0.8783969938755035, "step": 48320 }, { "epoch": 0.8591541784438164, "grad_norm": 1.4631866641559272, "learning_rate": 0.0001, "loss": 0.6648, "mean_abs_error": 534.0057048558473, "mean_abs_error_last_10": 170.5272649358071, "mean_abs_error_last_25": 258.9894637447457, "mean_abs_error_last_50": 313.4561552764246, "mean_pred_prob": 0.04257078697555698, "mean_pred_prob_last_10": 0.20861945030046627, "mean_pred_prob_last_25": 0.11546355340396985, "mean_pred_prob_last_50": 0.070835034531774, "mean_token_accuracy": 0.8697665274143219, "step": 48330 }, { "epoch": 0.8593319467406183, "grad_norm": 1.4656763690922292, "learning_rate": 0.0001, "loss": 0.574, "mean_abs_error": 478.6796976149661, "mean_abs_error_last_10": 92.94259526534269, "mean_abs_error_last_25": 153.06288141757324, "mean_abs_error_last_50": 270.3974122045439, "mean_pred_prob": 0.03718021656386554, "mean_pred_prob_last_10": 0.19739917991682887, "mean_pred_prob_last_25": 0.10901833451353013, "mean_pred_prob_last_50": 0.06432717265561223, "mean_token_accuracy": 0.877711421251297, "step": 48340 }, { "epoch": 0.8595097150374202, "grad_norm": 1.2103535714190035, "learning_rate": 0.0001, "loss": 0.709, "mean_abs_error": 387.8850267028077, "mean_abs_error_last_10": 100.5936807541946, "mean_abs_error_last_25": 219.85364567195302, "mean_abs_error_last_50": 253.6048153722426, "mean_pred_prob": 0.042511390056461096, "mean_pred_prob_last_10": 0.2066940560936928, "mean_pred_prob_last_25": 0.11786704212427139, "mean_pred_prob_last_50": 0.07289695348590612, "mean_token_accuracy": 0.8668331503868103, "step": 48350 }, { "epoch": 0.8596874833342222, "grad_norm": 1.9662802586742214, "learning_rate": 0.0001, "loss": 0.7476, "mean_abs_error": 246.76163798098514, "mean_abs_error_last_10": 67.66960463015265, "mean_abs_error_last_25": 145.4884331343252, "mean_abs_error_last_50": 170.24829553394846, "mean_pred_prob": 0.046548183728009465, "mean_pred_prob_last_10": 0.21500876266509295, "mean_pred_prob_last_25": 0.12193943271413446, "mean_pred_prob_last_50": 0.07737028244882822, "mean_token_accuracy": 0.8744481682777405, "step": 48360 }, { "epoch": 0.8598652516310241, "grad_norm": 1.5833257253354818, "learning_rate": 0.0001, "loss": 0.6705, "mean_abs_error": 737.6618428757766, "mean_abs_error_last_10": 373.35759736592087, "mean_abs_error_last_25": 418.81614406106326, "mean_abs_error_last_50": 491.11278170539816, "mean_pred_prob": 0.0470696223230334, "mean_pred_prob_last_10": 0.22484000293770806, "mean_pred_prob_last_25": 0.1295981359027792, "mean_pred_prob_last_50": 0.07965070133795962, "mean_token_accuracy": 0.8662404298782349, "step": 48370 }, { "epoch": 0.8600430199278261, "grad_norm": 2.262649291653432, "learning_rate": 0.0001, "loss": 0.685, "mean_abs_error": 737.2871686346177, "mean_abs_error_last_10": 297.99611839790003, "mean_abs_error_last_25": 418.5402361277812, "mean_abs_error_last_50": 509.9594880389942, "mean_pred_prob": 0.05417365581961349, "mean_pred_prob_last_10": 0.23342937045381404, "mean_pred_prob_last_25": 0.14138746780226938, "mean_pred_prob_last_50": 0.08963917613727972, "mean_token_accuracy": 0.8846044778823853, "step": 48380 }, { "epoch": 0.860220788224628, "grad_norm": 2.7545393372437954, "learning_rate": 0.0001, "loss": 0.6906, "mean_abs_error": 163.34814608891492, "mean_abs_error_last_10": 40.76469221338941, "mean_abs_error_last_25": 60.44807409401575, "mean_abs_error_last_50": 121.70445507852419, "mean_pred_prob": 0.05660230666399002, "mean_pred_prob_last_10": 0.2567556321620941, "mean_pred_prob_last_25": 0.15126058049499988, "mean_pred_prob_last_50": 0.09522143565118313, "mean_token_accuracy": 0.8703159630298615, "step": 48390 }, { "epoch": 0.8603985565214299, "grad_norm": 1.4630366121743963, "learning_rate": 0.0001, "loss": 0.9554, "mean_abs_error": 231.0391878206139, "mean_abs_error_last_10": 54.29831917940826, "mean_abs_error_last_25": 83.49564600944127, "mean_abs_error_last_50": 125.43978096802012, "mean_pred_prob": 0.047754052036907524, "mean_pred_prob_last_10": 0.22731834747828544, "mean_pred_prob_last_25": 0.12975252505857499, "mean_pred_prob_last_50": 0.07949526733718812, "mean_token_accuracy": 0.8633116543292999, "step": 48400 }, { "epoch": 0.8605763248182319, "grad_norm": 2.0211430691400603, "learning_rate": 0.0001, "loss": 0.7615, "mean_abs_error": 401.8314854110614, "mean_abs_error_last_10": 92.77462303266728, "mean_abs_error_last_25": 148.51047653749657, "mean_abs_error_last_50": 203.61506286132504, "mean_pred_prob": 0.05147567399544641, "mean_pred_prob_last_10": 0.23535501449368895, "mean_pred_prob_last_25": 0.13675795816816388, "mean_pred_prob_last_50": 0.08565979981794954, "mean_token_accuracy": 0.8754973173141479, "step": 48410 }, { "epoch": 0.8607540931150338, "grad_norm": 1.55175761854901, "learning_rate": 0.0001, "loss": 1.0835, "mean_abs_error": 493.0549412445286, "mean_abs_error_last_10": 222.60078830242043, "mean_abs_error_last_25": 302.7417568680487, "mean_abs_error_last_50": 424.6224734887895, "mean_pred_prob": 0.026826814166270196, "mean_pred_prob_last_10": 0.15052391570061446, "mean_pred_prob_last_25": 0.08158357394859195, "mean_pred_prob_last_50": 0.046453763358294965, "mean_token_accuracy": 0.8709803104400635, "step": 48420 }, { "epoch": 0.8609318614118359, "grad_norm": 1.9570401782863618, "learning_rate": 0.0001, "loss": 0.7848, "mean_abs_error": 273.7449136740335, "mean_abs_error_last_10": 39.161415408815095, "mean_abs_error_last_25": 76.50651377803084, "mean_abs_error_last_50": 136.72601410059926, "mean_pred_prob": 0.0376521815545857, "mean_pred_prob_last_10": 0.2082362711429596, "mean_pred_prob_last_25": 0.11054886933416128, "mean_pred_prob_last_50": 0.06547465929761528, "mean_token_accuracy": 0.8775015234947204, "step": 48430 }, { "epoch": 0.8611096297086378, "grad_norm": 1.5684217671412397, "learning_rate": 0.0001, "loss": 0.8259, "mean_abs_error": 847.9304013985181, "mean_abs_error_last_10": 537.9838988192909, "mean_abs_error_last_25": 513.5420290669708, "mean_abs_error_last_50": 608.8039605358179, "mean_pred_prob": 0.036366932559758425, "mean_pred_prob_last_10": 0.17398057539830916, "mean_pred_prob_last_25": 0.09315545863937587, "mean_pred_prob_last_50": 0.05836850568593945, "mean_token_accuracy": 0.8702484786510467, "step": 48440 }, { "epoch": 0.8612873980054397, "grad_norm": 1.3498886026238952, "learning_rate": 0.0001, "loss": 0.8206, "mean_abs_error": 516.0475026494536, "mean_abs_error_last_10": 108.27991755432583, "mean_abs_error_last_25": 142.97896089714882, "mean_abs_error_last_50": 296.40775939149205, "mean_pred_prob": 0.03598065766273066, "mean_pred_prob_last_10": 0.19494314570911228, "mean_pred_prob_last_25": 0.105492536816746, "mean_pred_prob_last_50": 0.06058446913957596, "mean_token_accuracy": 0.8635039746761322, "step": 48450 }, { "epoch": 0.8614651663022417, "grad_norm": 1.561900113990972, "learning_rate": 0.0001, "loss": 0.8022, "mean_abs_error": 604.4869190133038, "mean_abs_error_last_10": 368.0284907437018, "mean_abs_error_last_25": 339.94642579328445, "mean_abs_error_last_50": 395.89734702005524, "mean_pred_prob": 0.04070772771956399, "mean_pred_prob_last_10": 0.19634819498169237, "mean_pred_prob_last_25": 0.10750445563462563, "mean_pred_prob_last_50": 0.06676138018374331, "mean_token_accuracy": 0.8746982038021087, "step": 48460 }, { "epoch": 0.8616429345990436, "grad_norm": 1.54382055766285, "learning_rate": 0.0001, "loss": 0.8223, "mean_abs_error": 420.08070187784494, "mean_abs_error_last_10": 97.51693148863153, "mean_abs_error_last_25": 170.4727662848432, "mean_abs_error_last_50": 287.0468654747392, "mean_pred_prob": 0.033335523831192405, "mean_pred_prob_last_10": 0.16836124078836293, "mean_pred_prob_last_25": 0.09012454183539376, "mean_pred_prob_last_50": 0.05520496341050603, "mean_token_accuracy": 0.884836345911026, "step": 48470 }, { "epoch": 0.8618207028958456, "grad_norm": 1.2733668366606319, "learning_rate": 0.0001, "loss": 0.6933, "mean_abs_error": 369.9128953288708, "mean_abs_error_last_10": 78.66835252523882, "mean_abs_error_last_25": 134.74487727066722, "mean_abs_error_last_50": 214.33540410211475, "mean_pred_prob": 0.04965777893085033, "mean_pred_prob_last_10": 0.2367715634405613, "mean_pred_prob_last_25": 0.13230120660737157, "mean_pred_prob_last_50": 0.08285255418159068, "mean_token_accuracy": 0.8756543338298798, "step": 48480 }, { "epoch": 0.8619984711926475, "grad_norm": 4.2370542295846185, "learning_rate": 0.0001, "loss": 1.0929, "mean_abs_error": 595.2542810728684, "mean_abs_error_last_10": 270.214501071041, "mean_abs_error_last_25": 276.8446059945882, "mean_abs_error_last_50": 343.3995247644145, "mean_pred_prob": 0.0449302232649643, "mean_pred_prob_last_10": 0.23024537140736356, "mean_pred_prob_last_25": 0.12463164759101346, "mean_pred_prob_last_50": 0.07635253941407427, "mean_token_accuracy": 0.8702726781368255, "step": 48490 }, { "epoch": 0.8621762394894494, "grad_norm": 1.5899347433072972, "learning_rate": 0.0001, "loss": 0.6486, "mean_abs_error": 475.83135769697026, "mean_abs_error_last_10": 91.88411324448825, "mean_abs_error_last_25": 184.09842660936482, "mean_abs_error_last_50": 298.515150139091, "mean_pred_prob": 0.034540949203073976, "mean_pred_prob_last_10": 0.17637949604541064, "mean_pred_prob_last_25": 0.09207873828709126, "mean_pred_prob_last_50": 0.05612954599782825, "mean_token_accuracy": 0.8713792204856873, "step": 48500 }, { "epoch": 0.8623540077862514, "grad_norm": 1.1357377964138198, "learning_rate": 0.0001, "loss": 0.7941, "mean_abs_error": 506.8299205956875, "mean_abs_error_last_10": 190.661920160289, "mean_abs_error_last_25": 240.4731028242962, "mean_abs_error_last_50": 345.0473894104923, "mean_pred_prob": 0.03671670368057676, "mean_pred_prob_last_10": 0.18217573495348915, "mean_pred_prob_last_25": 0.10014640633016825, "mean_pred_prob_last_50": 0.06100209042197093, "mean_token_accuracy": 0.8695998966693879, "step": 48510 }, { "epoch": 0.8625317760830533, "grad_norm": 1.484774154035663, "learning_rate": 0.0001, "loss": 0.5853, "mean_abs_error": 200.7615308818586, "mean_abs_error_last_10": 42.40097413432667, "mean_abs_error_last_25": 74.15225260353935, "mean_abs_error_last_50": 116.62124774331983, "mean_pred_prob": 0.0516062839422375, "mean_pred_prob_last_10": 0.2544844552874565, "mean_pred_prob_last_25": 0.14139352459460497, "mean_pred_prob_last_50": 0.08700097557157278, "mean_token_accuracy": 0.8836840450763702, "step": 48520 }, { "epoch": 0.8627095443798553, "grad_norm": 1.5340825365028026, "learning_rate": 0.0001, "loss": 0.7251, "mean_abs_error": 283.16074945670505, "mean_abs_error_last_10": 76.7936307485362, "mean_abs_error_last_25": 151.2141799402179, "mean_abs_error_last_50": 215.13302148188, "mean_pred_prob": 0.0387135973200202, "mean_pred_prob_last_10": 0.1986363086849451, "mean_pred_prob_last_25": 0.10740483961999417, "mean_pred_prob_last_50": 0.06519752265885473, "mean_token_accuracy": 0.8700062036514282, "step": 48530 }, { "epoch": 0.8628873126766572, "grad_norm": 1.6323766367993224, "learning_rate": 0.0001, "loss": 0.5936, "mean_abs_error": 335.20905136882044, "mean_abs_error_last_10": 64.36240203648026, "mean_abs_error_last_25": 176.11758419907724, "mean_abs_error_last_50": 247.3627047654969, "mean_pred_prob": 0.0347115324344486, "mean_pred_prob_last_10": 0.16581089254468678, "mean_pred_prob_last_25": 0.09242264702916145, "mean_pred_prob_last_50": 0.05755211291834712, "mean_token_accuracy": 0.8805688321590424, "step": 48540 }, { "epoch": 0.8630650809734592, "grad_norm": 0.9053783172788956, "learning_rate": 0.0001, "loss": 0.73, "mean_abs_error": 449.1499100340032, "mean_abs_error_last_10": 158.62432941252067, "mean_abs_error_last_25": 190.42313540545337, "mean_abs_error_last_50": 254.59208336900716, "mean_pred_prob": 0.04615223702276126, "mean_pred_prob_last_10": 0.2282858308404684, "mean_pred_prob_last_25": 0.12760556909488513, "mean_pred_prob_last_50": 0.07784443915588782, "mean_token_accuracy": 0.8734091937541961, "step": 48550 }, { "epoch": 0.8632428492702612, "grad_norm": 1.1694373221592986, "learning_rate": 0.0001, "loss": 0.7446, "mean_abs_error": 655.9106852327743, "mean_abs_error_last_10": 172.0381856705222, "mean_abs_error_last_25": 227.81833253981685, "mean_abs_error_last_50": 367.8441050896356, "mean_pred_prob": 0.025858895148849113, "mean_pred_prob_last_10": 0.1390794776729308, "mean_pred_prob_last_25": 0.07233030963689088, "mean_pred_prob_last_50": 0.04368732261355035, "mean_token_accuracy": 0.8795597553253174, "step": 48560 }, { "epoch": 0.8634206175670631, "grad_norm": 1.4579722697486146, "learning_rate": 0.0001, "loss": 0.8427, "mean_abs_error": 274.15352613580546, "mean_abs_error_last_10": 201.70287466475753, "mean_abs_error_last_25": 230.42162288620722, "mean_abs_error_last_50": 223.1065858312351, "mean_pred_prob": 0.03442396274767816, "mean_pred_prob_last_10": 0.17963796034455298, "mean_pred_prob_last_25": 0.09463073667138815, "mean_pred_prob_last_50": 0.05716476417146623, "mean_token_accuracy": 0.871703964471817, "step": 48570 }, { "epoch": 0.8635983858638651, "grad_norm": 1.3658228172989635, "learning_rate": 0.0001, "loss": 0.6305, "mean_abs_error": 280.68256859020676, "mean_abs_error_last_10": 67.40328424514868, "mean_abs_error_last_25": 93.19783977623713, "mean_abs_error_last_50": 142.64949485384543, "mean_pred_prob": 0.046212812140583995, "mean_pred_prob_last_10": 0.22971427738666533, "mean_pred_prob_last_25": 0.13026322461664677, "mean_pred_prob_last_50": 0.07722393907606602, "mean_token_accuracy": 0.8771945357322692, "step": 48580 }, { "epoch": 0.863776154160667, "grad_norm": 2.113131325591479, "learning_rate": 0.0001, "loss": 0.7045, "mean_abs_error": 271.1526746278133, "mean_abs_error_last_10": 54.07790169330169, "mean_abs_error_last_25": 115.49971531819888, "mean_abs_error_last_50": 166.37543512390448, "mean_pred_prob": 0.04346667299978435, "mean_pred_prob_last_10": 0.20728153139352798, "mean_pred_prob_last_25": 0.11670306157320738, "mean_pred_prob_last_50": 0.0707840546965599, "mean_token_accuracy": 0.8774046957492828, "step": 48590 }, { "epoch": 0.8639539224574689, "grad_norm": 1.631372601194896, "learning_rate": 0.0001, "loss": 0.8879, "mean_abs_error": 215.55659316850324, "mean_abs_error_last_10": 104.56186254720728, "mean_abs_error_last_25": 156.75240515398787, "mean_abs_error_last_50": 209.4186946473747, "mean_pred_prob": 0.056632675463333726, "mean_pred_prob_last_10": 0.2584264390170574, "mean_pred_prob_last_25": 0.15136358207091688, "mean_pred_prob_last_50": 0.0927619569003582, "mean_token_accuracy": 0.859340500831604, "step": 48600 }, { "epoch": 0.8641316907542709, "grad_norm": 2.3622673419699103, "learning_rate": 0.0001, "loss": 0.7352, "mean_abs_error": 355.74896892155977, "mean_abs_error_last_10": 223.67611429972771, "mean_abs_error_last_25": 291.99635740931456, "mean_abs_error_last_50": 278.9304582209251, "mean_pred_prob": 0.05151095769833773, "mean_pred_prob_last_10": 0.21579293413087725, "mean_pred_prob_last_25": 0.13166353199630976, "mean_pred_prob_last_50": 0.08366133975796401, "mean_token_accuracy": 0.8718060374259948, "step": 48610 }, { "epoch": 0.8643094590510728, "grad_norm": 2.7185060338137874, "learning_rate": 0.0001, "loss": 0.705, "mean_abs_error": 1426.7033409643632, "mean_abs_error_last_10": 884.6079052203761, "mean_abs_error_last_25": 1023.6604390566987, "mean_abs_error_last_50": 1153.667389227898, "mean_pred_prob": 0.04746959793665155, "mean_pred_prob_last_10": 0.22714970078232, "mean_pred_prob_last_25": 0.12470436927251285, "mean_pred_prob_last_50": 0.07825289136017091, "mean_token_accuracy": 0.8765345931053161, "step": 48620 }, { "epoch": 0.8644872273478748, "grad_norm": 1.5738148343542058, "learning_rate": 0.0001, "loss": 0.7276, "mean_abs_error": 910.8320792357806, "mean_abs_error_last_10": 401.50591791971607, "mean_abs_error_last_25": 455.4957991028362, "mean_abs_error_last_50": 610.9758144674508, "mean_pred_prob": 0.04793942933320068, "mean_pred_prob_last_10": 0.23821098199114205, "mean_pred_prob_last_25": 0.13239538334310055, "mean_pred_prob_last_50": 0.0795209213916678, "mean_token_accuracy": 0.8741752803325653, "step": 48630 }, { "epoch": 0.8646649956446767, "grad_norm": 1.058277517508725, "learning_rate": 0.0001, "loss": 0.7613, "mean_abs_error": 319.467854642019, "mean_abs_error_last_10": 22.599319896497427, "mean_abs_error_last_25": 57.07433399009062, "mean_abs_error_last_50": 164.76751605958472, "mean_pred_prob": 0.060228678537532686, "mean_pred_prob_last_10": 0.26891932673752306, "mean_pred_prob_last_25": 0.16292238123714925, "mean_pred_prob_last_50": 0.09971627024933696, "mean_token_accuracy": 0.8613924562931061, "step": 48640 }, { "epoch": 0.8648427639414786, "grad_norm": 2.0298701575829687, "learning_rate": 0.0001, "loss": 1.0469, "mean_abs_error": 593.4364420464533, "mean_abs_error_last_10": 169.5995928365381, "mean_abs_error_last_25": 257.8127833702471, "mean_abs_error_last_50": 374.55759667114626, "mean_pred_prob": 0.033288907798123546, "mean_pred_prob_last_10": 0.18587361862882973, "mean_pred_prob_last_25": 0.09547841017483734, "mean_pred_prob_last_50": 0.056781650165794416, "mean_token_accuracy": 0.8650472640991211, "step": 48650 }, { "epoch": 0.8650205322382806, "grad_norm": 2.262976130841069, "learning_rate": 0.0001, "loss": 0.7057, "mean_abs_error": 144.10209294717282, "mean_abs_error_last_10": 24.113107360408275, "mean_abs_error_last_25": 44.706586904357934, "mean_abs_error_last_50": 79.5834164749631, "mean_pred_prob": 0.052882762625813484, "mean_pred_prob_last_10": 0.25023217871785164, "mean_pred_prob_last_25": 0.14262976087629795, "mean_pred_prob_last_50": 0.08709022589027882, "mean_token_accuracy": 0.8722202599048614, "step": 48660 }, { "epoch": 0.8651983005350826, "grad_norm": 1.0860098153023923, "learning_rate": 0.0001, "loss": 0.7622, "mean_abs_error": 1667.5605509643763, "mean_abs_error_last_10": 815.3576093134182, "mean_abs_error_last_25": 1090.6865096169859, "mean_abs_error_last_50": 1291.098930156045, "mean_pred_prob": 0.030826802153023893, "mean_pred_prob_last_10": 0.14207927323877811, "mean_pred_prob_last_25": 0.07822411907254719, "mean_pred_prob_last_50": 0.05004309728101362, "mean_token_accuracy": 0.8688883125782013, "step": 48670 }, { "epoch": 0.8653760688318846, "grad_norm": 1.4716214416078837, "learning_rate": 0.0001, "loss": 0.9208, "mean_abs_error": 426.9096428178774, "mean_abs_error_last_10": 143.14413126480912, "mean_abs_error_last_25": 193.33873304077417, "mean_abs_error_last_50": 299.6539598955801, "mean_pred_prob": 0.025695634447038172, "mean_pred_prob_last_10": 0.1407470464706421, "mean_pred_prob_last_25": 0.0735841454938054, "mean_pred_prob_last_50": 0.043110182601958515, "mean_token_accuracy": 0.870667713880539, "step": 48680 }, { "epoch": 0.8655538371286865, "grad_norm": 0.9088519889574399, "learning_rate": 0.0001, "loss": 0.6934, "mean_abs_error": 307.5040874166747, "mean_abs_error_last_10": 188.42994169695288, "mean_abs_error_last_25": 230.62856430745637, "mean_abs_error_last_50": 243.56672620565837, "mean_pred_prob": 0.03960700617171824, "mean_pred_prob_last_10": 0.1845938267186284, "mean_pred_prob_last_25": 0.10426459275186062, "mean_pred_prob_last_50": 0.06524692615494132, "mean_token_accuracy": 0.8738320589065551, "step": 48690 }, { "epoch": 0.8657316054254884, "grad_norm": 2.1140456806909818, "learning_rate": 0.0001, "loss": 0.7373, "mean_abs_error": 157.62726080352883, "mean_abs_error_last_10": 26.112098900325712, "mean_abs_error_last_25": 65.7353279806282, "mean_abs_error_last_50": 100.83738240115663, "mean_pred_prob": 0.052326653711497785, "mean_pred_prob_last_10": 0.2643997944891453, "mean_pred_prob_last_25": 0.14104706197977065, "mean_pred_prob_last_50": 0.086744733620435, "mean_token_accuracy": 0.8739510416984558, "step": 48700 }, { "epoch": 0.8659093737222904, "grad_norm": 1.2086332282222263, "learning_rate": 0.0001, "loss": 0.5798, "mean_abs_error": 875.0996308780595, "mean_abs_error_last_10": 490.37508157578543, "mean_abs_error_last_25": 550.5703359634105, "mean_abs_error_last_50": 685.8918369883155, "mean_pred_prob": 0.04429832333262311, "mean_pred_prob_last_10": 0.20832753860304365, "mean_pred_prob_last_25": 0.11790743942110567, "mean_pred_prob_last_50": 0.07406250056665158, "mean_token_accuracy": 0.8615728318691254, "step": 48710 }, { "epoch": 0.8660871420190923, "grad_norm": 1.8086592180327339, "learning_rate": 0.0001, "loss": 0.8113, "mean_abs_error": 885.751792422558, "mean_abs_error_last_10": 275.15451671908295, "mean_abs_error_last_25": 388.538142775823, "mean_abs_error_last_50": 567.853532959997, "mean_pred_prob": 0.024924752849619834, "mean_pred_prob_last_10": 0.11496092753950507, "mean_pred_prob_last_25": 0.06405858823563904, "mean_pred_prob_last_50": 0.04073885411489755, "mean_token_accuracy": 0.8644120097160339, "step": 48720 }, { "epoch": 0.8662649103158943, "grad_norm": 1.2243505249347673, "learning_rate": 0.0001, "loss": 0.6634, "mean_abs_error": 181.22512441244564, "mean_abs_error_last_10": 31.819200517756553, "mean_abs_error_last_25": 67.31372203308096, "mean_abs_error_last_50": 122.8713032947863, "mean_pred_prob": 0.05267365416511893, "mean_pred_prob_last_10": 0.2560881871730089, "mean_pred_prob_last_25": 0.14314764812588693, "mean_pred_prob_last_50": 0.08823313592001795, "mean_token_accuracy": 0.8774236977100373, "step": 48730 }, { "epoch": 0.8664426786126962, "grad_norm": 2.34473389722806, "learning_rate": 0.0001, "loss": 0.6781, "mean_abs_error": 520.9105380430179, "mean_abs_error_last_10": 201.0844708878283, "mean_abs_error_last_25": 276.76076621973385, "mean_abs_error_last_50": 334.687252651968, "mean_pred_prob": 0.03304529439192265, "mean_pred_prob_last_10": 0.15901966807432472, "mean_pred_prob_last_25": 0.08986959210596979, "mean_pred_prob_last_50": 0.05529911888297647, "mean_token_accuracy": 0.8721203148365021, "step": 48740 }, { "epoch": 0.8666204469094981, "grad_norm": 2.2535423817634266, "learning_rate": 0.0001, "loss": 0.7774, "mean_abs_error": 590.3471001164613, "mean_abs_error_last_10": 368.8022230352287, "mean_abs_error_last_25": 387.4102134100786, "mean_abs_error_last_50": 491.61394477603255, "mean_pred_prob": 0.04607055183150806, "mean_pred_prob_last_10": 0.20845079076243564, "mean_pred_prob_last_25": 0.12346359432558529, "mean_pred_prob_last_50": 0.0763698211114388, "mean_token_accuracy": 0.8743105709552765, "step": 48750 }, { "epoch": 0.8667982152063001, "grad_norm": 2.037385882833306, "learning_rate": 0.0001, "loss": 0.7149, "mean_abs_error": 1643.3077720579354, "mean_abs_error_last_10": 1140.133313025447, "mean_abs_error_last_25": 1237.7612574760792, "mean_abs_error_last_50": 1335.385469197052, "mean_pred_prob": 0.017551969902706334, "mean_pred_prob_last_10": 0.09673790294473292, "mean_pred_prob_last_25": 0.048105103602574674, "mean_pred_prob_last_50": 0.028956224651483353, "mean_token_accuracy": 0.8786300957202912, "step": 48760 }, { "epoch": 0.866975983503102, "grad_norm": 1.6042260712230685, "learning_rate": 0.0001, "loss": 0.7207, "mean_abs_error": 1277.541619410622, "mean_abs_error_last_10": 565.6737314869706, "mean_abs_error_last_25": 725.3393217808509, "mean_abs_error_last_50": 918.8975541728848, "mean_pred_prob": 0.04159113381319912, "mean_pred_prob_last_10": 0.21006077685160562, "mean_pred_prob_last_25": 0.11998297493846621, "mean_pred_prob_last_50": 0.07055064406886231, "mean_token_accuracy": 0.884103512763977, "step": 48770 }, { "epoch": 0.8671537517999041, "grad_norm": 1.3861984526744866, "learning_rate": 0.0001, "loss": 0.779, "mean_abs_error": 1321.1745140900523, "mean_abs_error_last_10": 800.1890731697946, "mean_abs_error_last_25": 753.1177655900027, "mean_abs_error_last_50": 907.1655393464538, "mean_pred_prob": 0.028650810569524766, "mean_pred_prob_last_10": 0.1161715767957503, "mean_pred_prob_last_25": 0.07217185345944017, "mean_pred_prob_last_50": 0.04694349349010736, "mean_token_accuracy": 0.8624196231365204, "step": 48780 }, { "epoch": 0.867331520096706, "grad_norm": 1.61875642375569, "learning_rate": 0.0001, "loss": 0.7073, "mean_abs_error": 953.7330174384197, "mean_abs_error_last_10": 515.7031816632893, "mean_abs_error_last_25": 640.5658077474075, "mean_abs_error_last_50": 725.9584281106703, "mean_pred_prob": 0.05272451876226114, "mean_pred_prob_last_10": 0.2390298098733183, "mean_pred_prob_last_25": 0.13645703532674816, "mean_pred_prob_last_50": 0.08619930753484369, "mean_token_accuracy": 0.876851063966751, "step": 48790 }, { "epoch": 0.8675092883935079, "grad_norm": 3.047581467222702, "learning_rate": 0.0001, "loss": 0.6949, "mean_abs_error": 859.7874212256977, "mean_abs_error_last_10": 370.44276013175204, "mean_abs_error_last_25": 440.9041859749621, "mean_abs_error_last_50": 529.0145176506999, "mean_pred_prob": 0.04044090867391788, "mean_pred_prob_last_10": 0.2274389967031311, "mean_pred_prob_last_25": 0.1174337025207933, "mean_pred_prob_last_50": 0.06836940015200525, "mean_token_accuracy": 0.8697623312473297, "step": 48800 }, { "epoch": 0.8676870566903099, "grad_norm": 1.3962371366970001, "learning_rate": 0.0001, "loss": 0.7237, "mean_abs_error": 375.2576821281154, "mean_abs_error_last_10": 121.67033974652107, "mean_abs_error_last_25": 169.38219303079796, "mean_abs_error_last_50": 240.46840050804454, "mean_pred_prob": 0.04417135415133089, "mean_pred_prob_last_10": 0.19376373459817842, "mean_pred_prob_last_25": 0.11368915020721033, "mean_pred_prob_last_50": 0.07178339923266322, "mean_token_accuracy": 0.8661302864551544, "step": 48810 }, { "epoch": 0.8678648249871118, "grad_norm": 1.4219757431129145, "learning_rate": 0.0001, "loss": 0.5955, "mean_abs_error": 392.3338187552798, "mean_abs_error_last_10": 80.66318985881031, "mean_abs_error_last_25": 113.53577476263729, "mean_abs_error_last_50": 205.76233873396814, "mean_pred_prob": 0.043311932031065224, "mean_pred_prob_last_10": 0.2103613492101431, "mean_pred_prob_last_25": 0.11635723467916251, "mean_pred_prob_last_50": 0.07094031181186437, "mean_token_accuracy": 0.8807827472686768, "step": 48820 }, { "epoch": 0.8680425932839138, "grad_norm": 1.533288554341098, "learning_rate": 0.0001, "loss": 0.7815, "mean_abs_error": 85.57828403713178, "mean_abs_error_last_10": 7.34367944163035, "mean_abs_error_last_25": 23.057276806898564, "mean_abs_error_last_50": 49.0929953138496, "mean_pred_prob": 0.06475172284990549, "mean_pred_prob_last_10": 0.33844916298985483, "mean_pred_prob_last_25": 0.18354173861443995, "mean_pred_prob_last_50": 0.10816266592592001, "mean_token_accuracy": 0.8758591294288636, "step": 48830 }, { "epoch": 0.8682203615807157, "grad_norm": 1.6931808659168825, "learning_rate": 0.0001, "loss": 0.5727, "mean_abs_error": 467.7143509383118, "mean_abs_error_last_10": 197.47117836901012, "mean_abs_error_last_25": 221.55805554262488, "mean_abs_error_last_50": 270.72639292135966, "mean_pred_prob": 0.028701498173177242, "mean_pred_prob_last_10": 0.13529677425976844, "mean_pred_prob_last_25": 0.07369635283248499, "mean_pred_prob_last_50": 0.046813187340740116, "mean_token_accuracy": 0.8739204466342926, "step": 48840 }, { "epoch": 0.8683981298775176, "grad_norm": 1.4055989620003466, "learning_rate": 0.0001, "loss": 0.5621, "mean_abs_error": 263.335175073379, "mean_abs_error_last_10": 82.11467266469893, "mean_abs_error_last_25": 159.87880643729923, "mean_abs_error_last_50": 186.57183933716527, "mean_pred_prob": 0.04614643095992506, "mean_pred_prob_last_10": 0.23716446831822396, "mean_pred_prob_last_25": 0.12933729430660607, "mean_pred_prob_last_50": 0.07721089888364077, "mean_token_accuracy": 0.881420761346817, "step": 48850 }, { "epoch": 0.8685758981743196, "grad_norm": 1.6014278401423976, "learning_rate": 0.0001, "loss": 0.7101, "mean_abs_error": 687.8541948765487, "mean_abs_error_last_10": 318.6975360989586, "mean_abs_error_last_25": 328.9935859891398, "mean_abs_error_last_50": 402.9878934625118, "mean_pred_prob": 0.035459207370877266, "mean_pred_prob_last_10": 0.1837086146697402, "mean_pred_prob_last_25": 0.09894498866051435, "mean_pred_prob_last_50": 0.060154964402318004, "mean_token_accuracy": 0.8761734783649444, "step": 48860 }, { "epoch": 0.8687536664711215, "grad_norm": 0.8660487921690234, "learning_rate": 0.0001, "loss": 0.567, "mean_abs_error": 694.5513593942144, "mean_abs_error_last_10": 380.4974324290024, "mean_abs_error_last_25": 411.4713774795088, "mean_abs_error_last_50": 549.319970079759, "mean_pred_prob": 0.025733552771271206, "mean_pred_prob_last_10": 0.12853191341273487, "mean_pred_prob_last_25": 0.07186457251664251, "mean_pred_prob_last_50": 0.04349893078906462, "mean_token_accuracy": 0.8684845745563508, "step": 48870 }, { "epoch": 0.8689314347679234, "grad_norm": 1.153672224416644, "learning_rate": 0.0001, "loss": 0.5775, "mean_abs_error": 292.4120336665201, "mean_abs_error_last_10": 131.5526934755248, "mean_abs_error_last_25": 150.7752008750226, "mean_abs_error_last_50": 219.71867925905457, "mean_pred_prob": 0.04166234252043068, "mean_pred_prob_last_10": 0.21326855570077896, "mean_pred_prob_last_25": 0.11863444969058037, "mean_pred_prob_last_50": 0.07114766482263804, "mean_token_accuracy": 0.8739471554756164, "step": 48880 }, { "epoch": 0.8691092030647254, "grad_norm": 1.5953518163847233, "learning_rate": 0.0001, "loss": 0.8858, "mean_abs_error": 114.37400732967379, "mean_abs_error_last_10": 21.519268448469823, "mean_abs_error_last_25": 33.71459544322656, "mean_abs_error_last_50": 63.38172534893057, "mean_pred_prob": 0.0650197851471603, "mean_pred_prob_last_10": 0.2875329293310642, "mean_pred_prob_last_25": 0.17538470551371574, "mean_pred_prob_last_50": 0.10804283376783133, "mean_token_accuracy": 0.8819752335548401, "step": 48890 }, { "epoch": 0.8692869713615274, "grad_norm": 2.2751903589704963, "learning_rate": 0.0001, "loss": 0.8017, "mean_abs_error": 259.32619729564294, "mean_abs_error_last_10": 90.81140170768501, "mean_abs_error_last_25": 97.91726883928324, "mean_abs_error_last_50": 154.2169112481426, "mean_pred_prob": 0.0339384320192039, "mean_pred_prob_last_10": 0.14697448648512362, "mean_pred_prob_last_25": 0.08044403493404388, "mean_pred_prob_last_50": 0.05208587152883411, "mean_token_accuracy": 0.8718498170375824, "step": 48900 }, { "epoch": 0.8694647396583294, "grad_norm": 1.3161833028136931, "learning_rate": 0.0001, "loss": 0.7002, "mean_abs_error": 907.6962818407972, "mean_abs_error_last_10": 294.82998465186944, "mean_abs_error_last_25": 388.30083705971407, "mean_abs_error_last_50": 572.7053270976674, "mean_pred_prob": 0.016718156714341605, "mean_pred_prob_last_10": 0.09690431203343905, "mean_pred_prob_last_25": 0.05066109987674281, "mean_pred_prob_last_50": 0.02861294136964716, "mean_token_accuracy": 0.8729366362094879, "step": 48910 }, { "epoch": 0.8696425079551313, "grad_norm": 1.4617011633909145, "learning_rate": 0.0001, "loss": 0.5225, "mean_abs_error": 200.93240992395215, "mean_abs_error_last_10": 49.86602380435617, "mean_abs_error_last_25": 74.02033776410697, "mean_abs_error_last_50": 133.84200707076576, "mean_pred_prob": 0.058656653575599196, "mean_pred_prob_last_10": 0.27976321838796137, "mean_pred_prob_last_25": 0.15770764779299498, "mean_pred_prob_last_50": 0.09787425985559821, "mean_token_accuracy": 0.8795007050037384, "step": 48920 }, { "epoch": 0.8698202762519333, "grad_norm": 2.362997979197682, "learning_rate": 0.0001, "loss": 0.5789, "mean_abs_error": 610.1334547382198, "mean_abs_error_last_10": 235.1361894527028, "mean_abs_error_last_25": 322.37209102972156, "mean_abs_error_last_50": 449.9683385973008, "mean_pred_prob": 0.034845596746890806, "mean_pred_prob_last_10": 0.18744032932445406, "mean_pred_prob_last_25": 0.09734751367359422, "mean_pred_prob_last_50": 0.05858918655721936, "mean_token_accuracy": 0.881994080543518, "step": 48930 }, { "epoch": 0.8699980445487352, "grad_norm": 1.793168138601153, "learning_rate": 0.0001, "loss": 0.7066, "mean_abs_error": 308.47359843253787, "mean_abs_error_last_10": 76.6266129234247, "mean_abs_error_last_25": 112.37973232545251, "mean_abs_error_last_50": 158.31399395449463, "mean_pred_prob": 0.05731622795574367, "mean_pred_prob_last_10": 0.25196584183722737, "mean_pred_prob_last_25": 0.1498604747466743, "mean_pred_prob_last_50": 0.09490066994912923, "mean_token_accuracy": 0.8689453721046447, "step": 48940 }, { "epoch": 0.8701758128455371, "grad_norm": 2.013442396877959, "learning_rate": 0.0001, "loss": 0.7139, "mean_abs_error": 240.52038086604222, "mean_abs_error_last_10": 89.37890029241962, "mean_abs_error_last_25": 147.18233312297002, "mean_abs_error_last_50": 201.4372708136351, "mean_pred_prob": 0.03665279131382704, "mean_pred_prob_last_10": 0.18653056677430868, "mean_pred_prob_last_25": 0.09887505788356066, "mean_pred_prob_last_50": 0.059814196359366176, "mean_token_accuracy": 0.8703874409198761, "step": 48950 }, { "epoch": 0.8703535811423391, "grad_norm": 0.8174558401918169, "learning_rate": 0.0001, "loss": 0.7356, "mean_abs_error": 644.463584380286, "mean_abs_error_last_10": 207.1068711466043, "mean_abs_error_last_25": 303.6811501224569, "mean_abs_error_last_50": 414.57895014901726, "mean_pred_prob": 0.027210559070226736, "mean_pred_prob_last_10": 0.13838272026041523, "mean_pred_prob_last_25": 0.07585807557916269, "mean_pred_prob_last_50": 0.04546999329177197, "mean_token_accuracy": 0.877156388759613, "step": 48960 }, { "epoch": 0.870531349439141, "grad_norm": 1.5789124923142066, "learning_rate": 0.0001, "loss": 0.813, "mean_abs_error": 453.0823318572723, "mean_abs_error_last_10": 141.36449771959713, "mean_abs_error_last_25": 206.19205824120272, "mean_abs_error_last_50": 296.8575421723174, "mean_pred_prob": 0.0321617312845774, "mean_pred_prob_last_10": 0.16804245174862445, "mean_pred_prob_last_25": 0.08857032393570989, "mean_pred_prob_last_50": 0.052804987638955934, "mean_token_accuracy": 0.8666785955429077, "step": 48970 }, { "epoch": 0.870709117735943, "grad_norm": 2.6133017391320204, "learning_rate": 0.0001, "loss": 0.6588, "mean_abs_error": 521.0173350196102, "mean_abs_error_last_10": 251.18344609958535, "mean_abs_error_last_25": 401.67578666463595, "mean_abs_error_last_50": 428.16009079253234, "mean_pred_prob": 0.0524339611234609, "mean_pred_prob_last_10": 0.25296529157785697, "mean_pred_prob_last_25": 0.14494514632388017, "mean_pred_prob_last_50": 0.08793774045770988, "mean_token_accuracy": 0.8736226677894592, "step": 48980 }, { "epoch": 0.8708868860327449, "grad_norm": 2.1042213220990016, "learning_rate": 0.0001, "loss": 0.9008, "mean_abs_error": 963.9337466486732, "mean_abs_error_last_10": 562.0844708974445, "mean_abs_error_last_25": 553.5897479679743, "mean_abs_error_last_50": 633.6178537242346, "mean_pred_prob": 0.05058178481122013, "mean_pred_prob_last_10": 0.22496622036851477, "mean_pred_prob_last_25": 0.1341567577852402, "mean_pred_prob_last_50": 0.08366698347381316, "mean_token_accuracy": 0.8634852707386017, "step": 48990 }, { "epoch": 0.8710646543295468, "grad_norm": 0.8756676037368288, "learning_rate": 0.0001, "loss": 0.7912, "mean_abs_error": 321.20726638497, "mean_abs_error_last_10": 94.35076104120574, "mean_abs_error_last_25": 119.87580704768861, "mean_abs_error_last_50": 192.71431630855594, "mean_pred_prob": 0.042092310590669514, "mean_pred_prob_last_10": 0.1925553472712636, "mean_pred_prob_last_25": 0.10770972147583961, "mean_pred_prob_last_50": 0.06832439498975873, "mean_token_accuracy": 0.8683166563510895, "step": 49000 }, { "epoch": 0.8712424226263488, "grad_norm": 1.0471221311130285, "learning_rate": 0.0001, "loss": 0.6347, "mean_abs_error": 241.36373060801506, "mean_abs_error_last_10": 45.67553319034644, "mean_abs_error_last_25": 62.52604312255185, "mean_abs_error_last_50": 115.85126246221887, "mean_pred_prob": 0.06134555763565004, "mean_pred_prob_last_10": 0.2949649067595601, "mean_pred_prob_last_25": 0.16823599245399237, "mean_pred_prob_last_50": 0.1025239915587008, "mean_token_accuracy": 0.884977114200592, "step": 49010 }, { "epoch": 0.8714201909231508, "grad_norm": 1.810814280401662, "learning_rate": 0.0001, "loss": 0.6193, "mean_abs_error": 255.1167429451619, "mean_abs_error_last_10": 61.75383047190077, "mean_abs_error_last_25": 81.65750117754818, "mean_abs_error_last_50": 145.47581322970217, "mean_pred_prob": 0.041832706239074466, "mean_pred_prob_last_10": 0.2151654500514269, "mean_pred_prob_last_25": 0.1177466543391347, "mean_pred_prob_last_50": 0.07049129363149405, "mean_token_accuracy": 0.8838575124740601, "step": 49020 }, { "epoch": 0.8715979592199528, "grad_norm": 2.061190482730143, "learning_rate": 0.0001, "loss": 0.7017, "mean_abs_error": 517.273432081359, "mean_abs_error_last_10": 128.10415686578722, "mean_abs_error_last_25": 198.610417036013, "mean_abs_error_last_50": 271.3929592021612, "mean_pred_prob": 0.03773468090221286, "mean_pred_prob_last_10": 0.19506641738116742, "mean_pred_prob_last_25": 0.10682616172125563, "mean_pred_prob_last_50": 0.06560133955208584, "mean_token_accuracy": 0.8791176617145539, "step": 49030 }, { "epoch": 0.8717757275167547, "grad_norm": 3.034937413666082, "learning_rate": 0.0001, "loss": 0.8352, "mean_abs_error": 1736.1162108509204, "mean_abs_error_last_10": 999.6431851197033, "mean_abs_error_last_25": 1055.0080814476007, "mean_abs_error_last_50": 1274.363484884027, "mean_pred_prob": 0.024562649193103425, "mean_pred_prob_last_10": 0.10645997735846321, "mean_pred_prob_last_25": 0.06253811849164777, "mean_pred_prob_last_50": 0.04024374590953812, "mean_token_accuracy": 0.8680279672145843, "step": 49040 }, { "epoch": 0.8719534958135566, "grad_norm": 1.1365777941000599, "learning_rate": 0.0001, "loss": 0.6588, "mean_abs_error": 240.2702263129308, "mean_abs_error_last_10": 95.99205059321477, "mean_abs_error_last_25": 123.19570460634809, "mean_abs_error_last_50": 129.48420961714672, "mean_pred_prob": 0.05992840377148241, "mean_pred_prob_last_10": 0.27720718216150997, "mean_pred_prob_last_25": 0.15420202389359475, "mean_pred_prob_last_50": 0.09701760117895901, "mean_token_accuracy": 0.8713878095149994, "step": 49050 }, { "epoch": 0.8721312641103586, "grad_norm": 1.758399422506767, "learning_rate": 0.0001, "loss": 0.8312, "mean_abs_error": 1915.9035634907352, "mean_abs_error_last_10": 952.912426830155, "mean_abs_error_last_25": 1185.592407381207, "mean_abs_error_last_50": 1355.4811272453458, "mean_pred_prob": 0.028358430054504426, "mean_pred_prob_last_10": 0.12932264630217105, "mean_pred_prob_last_25": 0.07273216255707667, "mean_pred_prob_last_50": 0.04508202942961361, "mean_token_accuracy": 0.8605612218379974, "step": 49060 }, { "epoch": 0.8723090324071605, "grad_norm": 1.7689831594262158, "learning_rate": 0.0001, "loss": 0.7653, "mean_abs_error": 1239.4674505931023, "mean_abs_error_last_10": 408.5258867772426, "mean_abs_error_last_25": 604.7437391730873, "mean_abs_error_last_50": 818.4782027798625, "mean_pred_prob": 0.019458928768290206, "mean_pred_prob_last_10": 0.10123767980840057, "mean_pred_prob_last_25": 0.05428185412893072, "mean_pred_prob_last_50": 0.032610726507846265, "mean_token_accuracy": 0.8707459211349488, "step": 49070 }, { "epoch": 0.8724868007039625, "grad_norm": 1.3157867352134502, "learning_rate": 0.0001, "loss": 0.742, "mean_abs_error": 1247.9397673338015, "mean_abs_error_last_10": 700.8023069402439, "mean_abs_error_last_25": 824.0639902775614, "mean_abs_error_last_50": 972.7296491239415, "mean_pred_prob": 0.03261581112310523, "mean_pred_prob_last_10": 0.15277814165601739, "mean_pred_prob_last_25": 0.08584192081325455, "mean_pred_prob_last_50": 0.053511946951039134, "mean_token_accuracy": 0.8762890100479126, "step": 49080 }, { "epoch": 0.8726645690007644, "grad_norm": 1.2515710997205636, "learning_rate": 0.0001, "loss": 0.7896, "mean_abs_error": 411.97142348832705, "mean_abs_error_last_10": 153.33270086709945, "mean_abs_error_last_25": 175.21057105567155, "mean_abs_error_last_50": 187.65903161950217, "mean_pred_prob": 0.05578891695477069, "mean_pred_prob_last_10": 0.23828904626425357, "mean_pred_prob_last_25": 0.13991629937663674, "mean_pred_prob_last_50": 0.08964499721769244, "mean_token_accuracy": 0.8700356423854828, "step": 49090 }, { "epoch": 0.8728423372975663, "grad_norm": 1.918400250127702, "learning_rate": 0.0001, "loss": 0.6938, "mean_abs_error": 129.80675069637488, "mean_abs_error_last_10": 36.968885614152796, "mean_abs_error_last_25": 87.09112386364816, "mean_abs_error_last_50": 104.07930878161797, "mean_pred_prob": 0.04665757147595286, "mean_pred_prob_last_10": 0.2292628489434719, "mean_pred_prob_last_25": 0.12653320152312517, "mean_pred_prob_last_50": 0.07961130980402231, "mean_token_accuracy": 0.8857791423797607, "step": 49100 }, { "epoch": 0.8730201055943683, "grad_norm": 1.1200115346694193, "learning_rate": 0.0001, "loss": 0.6389, "mean_abs_error": 1140.9958142301741, "mean_abs_error_last_10": 643.9733727934289, "mean_abs_error_last_25": 758.3914751172649, "mean_abs_error_last_50": 893.0323548121476, "mean_pred_prob": 0.033195530228840656, "mean_pred_prob_last_10": 0.1591386213316582, "mean_pred_prob_last_25": 0.08897042046446586, "mean_pred_prob_last_50": 0.054921178135555235, "mean_token_accuracy": 0.8761420786380768, "step": 49110 }, { "epoch": 0.8731978738911702, "grad_norm": 1.347731771180193, "learning_rate": 0.0001, "loss": 0.6212, "mean_abs_error": 1192.0305693030175, "mean_abs_error_last_10": 382.68489090738893, "mean_abs_error_last_25": 522.4441808272466, "mean_abs_error_last_50": 757.4117796251589, "mean_pred_prob": 0.028765968125662768, "mean_pred_prob_last_10": 0.14755896698334253, "mean_pred_prob_last_25": 0.0828352989454288, "mean_pred_prob_last_50": 0.049540668301051485, "mean_token_accuracy": 0.8696549475193024, "step": 49120 }, { "epoch": 0.8733756421879721, "grad_norm": 1.9878480373020218, "learning_rate": 0.0001, "loss": 0.767, "mean_abs_error": 248.53256218951574, "mean_abs_error_last_10": 57.784453929426874, "mean_abs_error_last_25": 94.5845769118926, "mean_abs_error_last_50": 155.05220501671388, "mean_pred_prob": 0.05297897658310831, "mean_pred_prob_last_10": 0.270838413387537, "mean_pred_prob_last_25": 0.14724835399538277, "mean_pred_prob_last_50": 0.08937240322120488, "mean_token_accuracy": 0.8793338119983674, "step": 49130 }, { "epoch": 0.8735534104847742, "grad_norm": 1.8952914660527438, "learning_rate": 0.0001, "loss": 0.6895, "mean_abs_error": 216.96613426527134, "mean_abs_error_last_10": 30.76667446105721, "mean_abs_error_last_25": 64.35053123117981, "mean_abs_error_last_50": 118.03166240217115, "mean_pred_prob": 0.05430632792413235, "mean_pred_prob_last_10": 0.2708949603140354, "mean_pred_prob_last_25": 0.146731267683208, "mean_pred_prob_last_50": 0.08963348837569356, "mean_token_accuracy": 0.8722290635108948, "step": 49140 }, { "epoch": 0.8737311787815761, "grad_norm": 1.483453560337024, "learning_rate": 0.0001, "loss": 0.6433, "mean_abs_error": 957.015394225619, "mean_abs_error_last_10": 471.31751584752857, "mean_abs_error_last_25": 493.7573899084964, "mean_abs_error_last_50": 616.7204581609362, "mean_pred_prob": 0.0329261042206781, "mean_pred_prob_last_10": 0.16027738537231925, "mean_pred_prob_last_25": 0.09066915811854415, "mean_pred_prob_last_50": 0.055221927681122905, "mean_token_accuracy": 0.8695876836776734, "step": 49150 }, { "epoch": 0.8739089470783781, "grad_norm": 0.9702340120899604, "learning_rate": 0.0001, "loss": 0.6828, "mean_abs_error": 508.1330934029359, "mean_abs_error_last_10": 285.3447547202694, "mean_abs_error_last_25": 290.6927407383233, "mean_abs_error_last_50": 372.5260735933891, "mean_pred_prob": 0.028455573198152707, "mean_pred_prob_last_10": 0.15009876744006762, "mean_pred_prob_last_25": 0.0810000118915923, "mean_pred_prob_last_50": 0.04738296124269255, "mean_token_accuracy": 0.8776415228843689, "step": 49160 }, { "epoch": 0.87408671537518, "grad_norm": 1.0315157629225182, "learning_rate": 0.0001, "loss": 0.6052, "mean_abs_error": 780.0276823291933, "mean_abs_error_last_10": 435.58955139643257, "mean_abs_error_last_25": 475.40745837311187, "mean_abs_error_last_50": 582.5453168185712, "mean_pred_prob": 0.04147474113269709, "mean_pred_prob_last_10": 0.2085705679928651, "mean_pred_prob_last_25": 0.11851776306284592, "mean_pred_prob_last_50": 0.07069615010404959, "mean_token_accuracy": 0.8809759080410003, "step": 49170 }, { "epoch": 0.874264483671982, "grad_norm": 1.1586959528846157, "learning_rate": 0.0001, "loss": 0.7277, "mean_abs_error": 373.6291007174995, "mean_abs_error_last_10": 54.45021445587317, "mean_abs_error_last_25": 103.41165066772169, "mean_abs_error_last_50": 172.35261266805884, "mean_pred_prob": 0.038068034779280426, "mean_pred_prob_last_10": 0.18871015794575213, "mean_pred_prob_last_25": 0.10303711332380772, "mean_pred_prob_last_50": 0.06284078564494848, "mean_token_accuracy": 0.8596903324127197, "step": 49180 }, { "epoch": 0.8744422519687839, "grad_norm": 0.9686303656395693, "learning_rate": 0.0001, "loss": 0.6382, "mean_abs_error": 427.3830011854975, "mean_abs_error_last_10": 163.7963799568474, "mean_abs_error_last_25": 199.11934989505812, "mean_abs_error_last_50": 261.47106973207735, "mean_pred_prob": 0.04573998481500894, "mean_pred_prob_last_10": 0.1881523837801069, "mean_pred_prob_last_25": 0.11371037923963741, "mean_pred_prob_last_50": 0.07476779699791222, "mean_token_accuracy": 0.8724831700325012, "step": 49190 }, { "epoch": 0.8746200202655858, "grad_norm": 1.2157977568627003, "learning_rate": 0.0001, "loss": 0.7802, "mean_abs_error": 380.44896935001475, "mean_abs_error_last_10": 139.04761548572543, "mean_abs_error_last_25": 233.97783742708697, "mean_abs_error_last_50": 311.8944214029051, "mean_pred_prob": 0.048129669926129284, "mean_pred_prob_last_10": 0.2546012560836971, "mean_pred_prob_last_25": 0.14046745281666517, "mean_pred_prob_last_50": 0.08259943379089237, "mean_token_accuracy": 0.8682918667793273, "step": 49200 }, { "epoch": 0.8747977885623878, "grad_norm": 1.1827609134419659, "learning_rate": 0.0001, "loss": 0.6894, "mean_abs_error": 363.50182633785283, "mean_abs_error_last_10": 79.41694603650028, "mean_abs_error_last_25": 152.93883220313154, "mean_abs_error_last_50": 244.92694098791839, "mean_pred_prob": 0.03398882457986474, "mean_pred_prob_last_10": 0.16084399521350862, "mean_pred_prob_last_25": 0.08878270043060184, "mean_pred_prob_last_50": 0.0556648351252079, "mean_token_accuracy": 0.8670653343200684, "step": 49210 }, { "epoch": 0.8749755568591897, "grad_norm": 1.5869442404553702, "learning_rate": 0.0001, "loss": 0.7479, "mean_abs_error": 175.04887556021828, "mean_abs_error_last_10": 63.36904966162874, "mean_abs_error_last_25": 66.66514392976273, "mean_abs_error_last_50": 112.20156586855369, "mean_pred_prob": 0.03637743103317916, "mean_pred_prob_last_10": 0.17189052514731884, "mean_pred_prob_last_25": 0.09782036952674389, "mean_pred_prob_last_50": 0.060061910189688206, "mean_token_accuracy": 0.8694652378559112, "step": 49220 }, { "epoch": 0.8751533251559916, "grad_norm": 1.1947025502529072, "learning_rate": 0.0001, "loss": 0.6923, "mean_abs_error": 262.6023078716313, "mean_abs_error_last_10": 62.83739118680146, "mean_abs_error_last_25": 76.37633095999398, "mean_abs_error_last_50": 148.2346099852044, "mean_pred_prob": 0.04872337377164513, "mean_pred_prob_last_10": 0.2238585686311126, "mean_pred_prob_last_25": 0.13067725356668233, "mean_pred_prob_last_50": 0.08121032477356493, "mean_token_accuracy": 0.8740718007087708, "step": 49230 }, { "epoch": 0.8753310934527936, "grad_norm": 1.74838517299852, "learning_rate": 0.0001, "loss": 0.6638, "mean_abs_error": 258.20266787972645, "mean_abs_error_last_10": 26.621149793820393, "mean_abs_error_last_25": 43.48396545842906, "mean_abs_error_last_50": 92.98158936651355, "mean_pred_prob": 0.07099634490441531, "mean_pred_prob_last_10": 0.3188346611335874, "mean_pred_prob_last_25": 0.19018983673304318, "mean_pred_prob_last_50": 0.11812005150131881, "mean_token_accuracy": 0.8806942939758301, "step": 49240 }, { "epoch": 0.8755088617495955, "grad_norm": 2.011959233009994, "learning_rate": 0.0001, "loss": 0.6793, "mean_abs_error": 348.6436342811998, "mean_abs_error_last_10": 52.56795743008612, "mean_abs_error_last_25": 92.11283400407086, "mean_abs_error_last_50": 203.27231025568574, "mean_pred_prob": 0.056077225832268596, "mean_pred_prob_last_10": 0.24653685316443444, "mean_pred_prob_last_25": 0.14310139566659927, "mean_pred_prob_last_50": 0.09112041369080544, "mean_token_accuracy": 0.8736327588558197, "step": 49250 }, { "epoch": 0.8756866300463976, "grad_norm": 1.189864508162408, "learning_rate": 0.0001, "loss": 0.6676, "mean_abs_error": 390.0740451224494, "mean_abs_error_last_10": 87.39551429921296, "mean_abs_error_last_25": 122.71151950213532, "mean_abs_error_last_50": 239.49773822431902, "mean_pred_prob": 0.04791841529658995, "mean_pred_prob_last_10": 0.242167704901658, "mean_pred_prob_last_25": 0.13536400971934198, "mean_pred_prob_last_50": 0.08184254625812173, "mean_token_accuracy": 0.87784823179245, "step": 49260 }, { "epoch": 0.8758643983431995, "grad_norm": 2.2323096106443256, "learning_rate": 0.0001, "loss": 1.0431, "mean_abs_error": 375.2957450773565, "mean_abs_error_last_10": 64.64446851525284, "mean_abs_error_last_25": 116.44074553463251, "mean_abs_error_last_50": 201.09454161786203, "mean_pred_prob": 0.04615991632454097, "mean_pred_prob_last_10": 0.21641059629619122, "mean_pred_prob_last_25": 0.1217860721051693, "mean_pred_prob_last_50": 0.07588121462613344, "mean_token_accuracy": 0.8738737106323242, "step": 49270 }, { "epoch": 0.8760421666400015, "grad_norm": 1.3397148812666484, "learning_rate": 0.0001, "loss": 0.8162, "mean_abs_error": 325.66125262890984, "mean_abs_error_last_10": 67.46849267265435, "mean_abs_error_last_25": 88.4770346023119, "mean_abs_error_last_50": 154.94327284525878, "mean_pred_prob": 0.04352125313598663, "mean_pred_prob_last_10": 0.21268589198589324, "mean_pred_prob_last_25": 0.11578014669939876, "mean_pred_prob_last_50": 0.07240232811309397, "mean_token_accuracy": 0.8679304778575897, "step": 49280 }, { "epoch": 0.8762199349368034, "grad_norm": 1.635979304039363, "learning_rate": 0.0001, "loss": 0.7154, "mean_abs_error": 209.13623112080245, "mean_abs_error_last_10": 33.748025758393126, "mean_abs_error_last_25": 67.69149007660572, "mean_abs_error_last_50": 115.13006575916009, "mean_pred_prob": 0.03791981572285295, "mean_pred_prob_last_10": 0.19337474033236504, "mean_pred_prob_last_25": 0.10577004849910736, "mean_pred_prob_last_50": 0.06444425331428646, "mean_token_accuracy": 0.871832263469696, "step": 49290 }, { "epoch": 0.8763977032336053, "grad_norm": 1.1279854497316162, "learning_rate": 0.0001, "loss": 0.5971, "mean_abs_error": 471.83234069464953, "mean_abs_error_last_10": 126.551536727395, "mean_abs_error_last_25": 186.1299676214649, "mean_abs_error_last_50": 214.1936481492822, "mean_pred_prob": 0.04250917198369279, "mean_pred_prob_last_10": 0.20086237674113364, "mean_pred_prob_last_25": 0.12020779813174158, "mean_pred_prob_last_50": 0.07252701557008549, "mean_token_accuracy": 0.8740011155605316, "step": 49300 }, { "epoch": 0.8765754715304073, "grad_norm": 2.0342019519240475, "learning_rate": 0.0001, "loss": 0.6143, "mean_abs_error": 74.53722298493872, "mean_abs_error_last_10": 15.945308917176984, "mean_abs_error_last_25": 25.915094015198445, "mean_abs_error_last_50": 41.02554826532032, "mean_pred_prob": 0.07136736856773496, "mean_pred_prob_last_10": 0.3287318401038647, "mean_pred_prob_last_25": 0.19248356819152831, "mean_pred_prob_last_50": 0.11908518746495247, "mean_token_accuracy": 0.8805301725864411, "step": 49310 }, { "epoch": 0.8767532398272092, "grad_norm": 2.162433361760607, "learning_rate": 0.0001, "loss": 0.7012, "mean_abs_error": 751.6605124149787, "mean_abs_error_last_10": 341.8572053497222, "mean_abs_error_last_25": 440.3755616008606, "mean_abs_error_last_50": 580.3443816459593, "mean_pred_prob": 0.04801456827117363, "mean_pred_prob_last_10": 0.22745243194513023, "mean_pred_prob_last_25": 0.12999466524051967, "mean_pred_prob_last_50": 0.07996465621108655, "mean_token_accuracy": 0.8750877976417542, "step": 49320 }, { "epoch": 0.8769310081240111, "grad_norm": 1.2398008556398565, "learning_rate": 0.0001, "loss": 0.6803, "mean_abs_error": 301.18700575627287, "mean_abs_error_last_10": 90.58260873538168, "mean_abs_error_last_25": 126.45874228608963, "mean_abs_error_last_50": 202.5490081559865, "mean_pred_prob": 0.043494107038713994, "mean_pred_prob_last_10": 0.1981289116665721, "mean_pred_prob_last_25": 0.11359152821823955, "mean_pred_prob_last_50": 0.0707023283932358, "mean_token_accuracy": 0.8744416654109954, "step": 49330 }, { "epoch": 0.8771087764208131, "grad_norm": 0.991691566511437, "learning_rate": 0.0001, "loss": 0.7305, "mean_abs_error": 455.12400179561325, "mean_abs_error_last_10": 148.58389169332423, "mean_abs_error_last_25": 238.5487625530756, "mean_abs_error_last_50": 307.2978217326065, "mean_pred_prob": 0.04235697952099145, "mean_pred_prob_last_10": 0.19995358008891345, "mean_pred_prob_last_25": 0.11417852705344558, "mean_pred_prob_last_50": 0.07067962326109409, "mean_token_accuracy": 0.8645597517490387, "step": 49340 }, { "epoch": 0.877286544717615, "grad_norm": 1.4134699036732203, "learning_rate": 0.0001, "loss": 0.6811, "mean_abs_error": 981.1327673803464, "mean_abs_error_last_10": 288.427856063508, "mean_abs_error_last_25": 351.1464128567065, "mean_abs_error_last_50": 511.12117315632986, "mean_pred_prob": 0.01836076932377182, "mean_pred_prob_last_10": 0.0936489662504755, "mean_pred_prob_last_25": 0.04956089125480503, "mean_pred_prob_last_50": 0.030672928277635948, "mean_token_accuracy": 0.8682619214057923, "step": 49350 }, { "epoch": 0.877464313014417, "grad_norm": 1.777062188732499, "learning_rate": 0.0001, "loss": 0.6177, "mean_abs_error": 391.8568801156095, "mean_abs_error_last_10": 76.4908054300336, "mean_abs_error_last_25": 127.38904893595597, "mean_abs_error_last_50": 203.5782761987387, "mean_pred_prob": 0.03780140015296638, "mean_pred_prob_last_10": 0.1767196122556925, "mean_pred_prob_last_25": 0.09658857313916087, "mean_pred_prob_last_50": 0.06107981973327696, "mean_token_accuracy": 0.8659588515758514, "step": 49360 }, { "epoch": 0.8776420813112189, "grad_norm": 1.76979628280282, "learning_rate": 0.0001, "loss": 0.6167, "mean_abs_error": 621.327770015795, "mean_abs_error_last_10": 121.16111180421724, "mean_abs_error_last_25": 178.89905187011, "mean_abs_error_last_50": 353.6537418313494, "mean_pred_prob": 0.05618345815455541, "mean_pred_prob_last_10": 0.21886291997507215, "mean_pred_prob_last_25": 0.13216503052972256, "mean_pred_prob_last_50": 0.08912414426449686, "mean_token_accuracy": 0.8798310399055481, "step": 49370 }, { "epoch": 0.877819849608021, "grad_norm": 1.4352921634918994, "learning_rate": 0.0001, "loss": 0.6672, "mean_abs_error": 342.1737643047347, "mean_abs_error_last_10": 63.8572372269108, "mean_abs_error_last_25": 107.3575368936296, "mean_abs_error_last_50": 147.37896638559687, "mean_pred_prob": 0.04899939075112343, "mean_pred_prob_last_10": 0.2292110648006201, "mean_pred_prob_last_25": 0.14166377019137144, "mean_pred_prob_last_50": 0.085070097213611, "mean_token_accuracy": 0.8801519453525544, "step": 49380 }, { "epoch": 0.8779976179048229, "grad_norm": 1.8095881480978342, "learning_rate": 0.0001, "loss": 0.7855, "mean_abs_error": 164.83059100502078, "mean_abs_error_last_10": 32.69954636989298, "mean_abs_error_last_25": 52.9659704654718, "mean_abs_error_last_50": 82.9838103474049, "mean_pred_prob": 0.05892278151586652, "mean_pred_prob_last_10": 0.2688130956143141, "mean_pred_prob_last_25": 0.1547689510509372, "mean_pred_prob_last_50": 0.09768360573798418, "mean_token_accuracy": 0.8728603959083557, "step": 49390 }, { "epoch": 0.8781753862016248, "grad_norm": 1.6817513774471815, "learning_rate": 0.0001, "loss": 0.6589, "mean_abs_error": 428.4495347647511, "mean_abs_error_last_10": 112.65480368197223, "mean_abs_error_last_25": 185.77796516047565, "mean_abs_error_last_50": 265.79485343221864, "mean_pred_prob": 0.05000822394504212, "mean_pred_prob_last_10": 0.2336703364737332, "mean_pred_prob_last_25": 0.13207663936773315, "mean_pred_prob_last_50": 0.08149265989195556, "mean_token_accuracy": 0.8714479804039001, "step": 49400 }, { "epoch": 0.8783531544984268, "grad_norm": 1.204222419662372, "learning_rate": 0.0001, "loss": 0.7378, "mean_abs_error": 950.0715214322379, "mean_abs_error_last_10": 401.90503383501067, "mean_abs_error_last_25": 469.17925972985574, "mean_abs_error_last_50": 612.5213190120345, "mean_pred_prob": 0.037244029887369835, "mean_pred_prob_last_10": 0.18402774471032898, "mean_pred_prob_last_25": 0.09958885855448898, "mean_pred_prob_last_50": 0.0617216073704185, "mean_token_accuracy": 0.8666751444339752, "step": 49410 }, { "epoch": 0.8785309227952287, "grad_norm": 0.8318111377021511, "learning_rate": 0.0001, "loss": 0.6845, "mean_abs_error": 167.6913043391565, "mean_abs_error_last_10": 38.73033392602518, "mean_abs_error_last_25": 52.406845812153975, "mean_abs_error_last_50": 74.10485972860499, "mean_pred_prob": 0.055044400645419954, "mean_pred_prob_last_10": 0.2704041313380003, "mean_pred_prob_last_25": 0.14807700421661138, "mean_pred_prob_last_50": 0.09104763176292181, "mean_token_accuracy": 0.8818426728248596, "step": 49420 }, { "epoch": 0.8787086910920306, "grad_norm": 2.112552687148761, "learning_rate": 0.0001, "loss": 0.6021, "mean_abs_error": 450.37746942272605, "mean_abs_error_last_10": 72.65292683014842, "mean_abs_error_last_25": 118.43544793786066, "mean_abs_error_last_50": 217.79470678241006, "mean_pred_prob": 0.03664882816374302, "mean_pred_prob_last_10": 0.18590826578438283, "mean_pred_prob_last_25": 0.1003425981849432, "mean_pred_prob_last_50": 0.061538893077522513, "mean_token_accuracy": 0.8779554426670074, "step": 49430 }, { "epoch": 0.8788864593888326, "grad_norm": 1.0341557593800939, "learning_rate": 0.0001, "loss": 0.6732, "mean_abs_error": 208.7620000900084, "mean_abs_error_last_10": 60.31256123879807, "mean_abs_error_last_25": 99.52702923538688, "mean_abs_error_last_50": 146.33721636895623, "mean_pred_prob": 0.032588342158123854, "mean_pred_prob_last_10": 0.1775788299739361, "mean_pred_prob_last_25": 0.09601918458938599, "mean_pred_prob_last_50": 0.05649156011641025, "mean_token_accuracy": 0.8654727160930633, "step": 49440 }, { "epoch": 0.8790642276856345, "grad_norm": 0.9047328093754009, "learning_rate": 0.0001, "loss": 0.7575, "mean_abs_error": 743.0213099605276, "mean_abs_error_last_10": 388.4705702363157, "mean_abs_error_last_25": 425.25129615658017, "mean_abs_error_last_50": 519.0776544367666, "mean_pred_prob": 0.031823630636790765, "mean_pred_prob_last_10": 0.14825794199714437, "mean_pred_prob_last_25": 0.07879881066619418, "mean_pred_prob_last_50": 0.051198076753644274, "mean_token_accuracy": 0.8705695033073425, "step": 49450 }, { "epoch": 0.8792419959824365, "grad_norm": 2.2351949935827915, "learning_rate": 0.0001, "loss": 0.7052, "mean_abs_error": 770.9124844870913, "mean_abs_error_last_10": 307.55278132784287, "mean_abs_error_last_25": 355.9408557833484, "mean_abs_error_last_50": 442.3756462592234, "mean_pred_prob": 0.04035199075588025, "mean_pred_prob_last_10": 0.19815122028812765, "mean_pred_prob_last_25": 0.10855414116522297, "mean_pred_prob_last_50": 0.06652138059725984, "mean_token_accuracy": 0.867794007062912, "step": 49460 }, { "epoch": 0.8794197642792384, "grad_norm": 1.2888732800499236, "learning_rate": 0.0001, "loss": 0.7328, "mean_abs_error": 384.16697807399584, "mean_abs_error_last_10": 180.4214338001509, "mean_abs_error_last_25": 218.94710196009456, "mean_abs_error_last_50": 270.7579359906935, "mean_pred_prob": 0.031589662004262206, "mean_pred_prob_last_10": 0.16743736807256937, "mean_pred_prob_last_25": 0.08952887859195471, "mean_pred_prob_last_50": 0.05294578867033124, "mean_token_accuracy": 0.8884198188781738, "step": 49470 }, { "epoch": 0.8795975325760403, "grad_norm": 1.9561963427632094, "learning_rate": 0.0001, "loss": 0.7453, "mean_abs_error": 466.7696120488119, "mean_abs_error_last_10": 92.1867459520907, "mean_abs_error_last_25": 159.2447431862019, "mean_abs_error_last_50": 239.19871157320094, "mean_pred_prob": 0.022720760991796853, "mean_pred_prob_last_10": 0.12133254818618297, "mean_pred_prob_last_25": 0.06400477439165116, "mean_pred_prob_last_50": 0.03820706335827708, "mean_token_accuracy": 0.8716232776641846, "step": 49480 }, { "epoch": 0.8797753008728424, "grad_norm": 1.1172240029996308, "learning_rate": 0.0001, "loss": 0.6611, "mean_abs_error": 117.07428220592541, "mean_abs_error_last_10": 16.448083933854615, "mean_abs_error_last_25": 30.30934748286984, "mean_abs_error_last_50": 47.22920026318968, "mean_pred_prob": 0.06926201907917857, "mean_pred_prob_last_10": 0.3064995165914297, "mean_pred_prob_last_25": 0.1817447317764163, "mean_pred_prob_last_50": 0.11287467032670975, "mean_token_accuracy": 0.8767951488494873, "step": 49490 }, { "epoch": 0.8799530691696443, "grad_norm": 1.4102127042656816, "learning_rate": 0.0001, "loss": 0.6441, "mean_abs_error": 876.8829611969946, "mean_abs_error_last_10": 602.6035374529839, "mean_abs_error_last_25": 689.3717066996486, "mean_abs_error_last_50": 764.1735412731024, "mean_pred_prob": 0.057355849063606, "mean_pred_prob_last_10": 0.2780154002655763, "mean_pred_prob_last_25": 0.1557525976910256, "mean_pred_prob_last_50": 0.09572023378423182, "mean_token_accuracy": 0.8776543855667114, "step": 49500 }, { "epoch": 0.8801308374664463, "grad_norm": 2.283836973080284, "learning_rate": 0.0001, "loss": 0.6065, "mean_abs_error": 784.2897885181443, "mean_abs_error_last_10": 367.1553771455859, "mean_abs_error_last_25": 454.0880734351461, "mean_abs_error_last_50": 586.7491319389836, "mean_pred_prob": 0.041166326770326125, "mean_pred_prob_last_10": 0.19451423728605732, "mean_pred_prob_last_25": 0.111128619563533, "mean_pred_prob_last_50": 0.06865975004329812, "mean_token_accuracy": 0.8800680875778198, "step": 49510 }, { "epoch": 0.8803086057632482, "grad_norm": 4.828515150522525, "learning_rate": 0.0001, "loss": 0.654, "mean_abs_error": 655.8400344067672, "mean_abs_error_last_10": 161.06113679476408, "mean_abs_error_last_25": 307.85833707497034, "mean_abs_error_last_50": 395.2750485936805, "mean_pred_prob": 0.03449185451027006, "mean_pred_prob_last_10": 0.18439449525903912, "mean_pred_prob_last_25": 0.09808003919897601, "mean_pred_prob_last_50": 0.058350493980105965, "mean_token_accuracy": 0.8797452509403229, "step": 49520 }, { "epoch": 0.8804863740600501, "grad_norm": 2.397555047283242, "learning_rate": 0.0001, "loss": 0.8076, "mean_abs_error": 1802.0545153658065, "mean_abs_error_last_10": 651.1331882857567, "mean_abs_error_last_25": 855.6082691971664, "mean_abs_error_last_50": 1177.8717714616666, "mean_pred_prob": 0.017430353110830764, "mean_pred_prob_last_10": 0.09249251459841616, "mean_pred_prob_last_25": 0.04783253519999562, "mean_pred_prob_last_50": 0.028791627397004048, "mean_token_accuracy": 0.8627018511295319, "step": 49530 }, { "epoch": 0.8806641423568521, "grad_norm": 1.6484667985620134, "learning_rate": 0.0001, "loss": 0.7812, "mean_abs_error": 841.382332735136, "mean_abs_error_last_10": 367.1132346369822, "mean_abs_error_last_25": 425.85555418197754, "mean_abs_error_last_50": 466.63228312002974, "mean_pred_prob": 0.03635328913806006, "mean_pred_prob_last_10": 0.16229453202104197, "mean_pred_prob_last_25": 0.09477493243757636, "mean_pred_prob_last_50": 0.05961678519379347, "mean_token_accuracy": 0.8788870513439179, "step": 49540 }, { "epoch": 0.880841910653654, "grad_norm": 1.2243999931514584, "learning_rate": 0.0001, "loss": 0.6886, "mean_abs_error": 321.80651086562597, "mean_abs_error_last_10": 125.20124668322413, "mean_abs_error_last_25": 130.79103233128075, "mean_abs_error_last_50": 168.66872922680867, "mean_pred_prob": 0.04874995071440935, "mean_pred_prob_last_10": 0.23993961941450834, "mean_pred_prob_last_25": 0.13059616247192024, "mean_pred_prob_last_50": 0.07969211433082819, "mean_token_accuracy": 0.8923922300338745, "step": 49550 }, { "epoch": 0.881019678950456, "grad_norm": 1.3386509141595233, "learning_rate": 0.0001, "loss": 0.8083, "mean_abs_error": 1171.2114025801316, "mean_abs_error_last_10": 708.7345555751608, "mean_abs_error_last_25": 768.1164354816369, "mean_abs_error_last_50": 872.7072640659874, "mean_pred_prob": 0.037974608447984795, "mean_pred_prob_last_10": 0.18742166195370374, "mean_pred_prob_last_25": 0.10138117252645315, "mean_pred_prob_last_50": 0.061875022039021135, "mean_token_accuracy": 0.8627116739749908, "step": 49560 }, { "epoch": 0.8811974472472579, "grad_norm": 0.9654404903763689, "learning_rate": 0.0001, "loss": 0.651, "mean_abs_error": 651.2784905374116, "mean_abs_error_last_10": 167.15391558294723, "mean_abs_error_last_25": 344.5681302478089, "mean_abs_error_last_50": 468.45901125101574, "mean_pred_prob": 0.03143348508747294, "mean_pred_prob_last_10": 0.17331473495578392, "mean_pred_prob_last_25": 0.0873959862627089, "mean_pred_prob_last_50": 0.05291626706020906, "mean_token_accuracy": 0.8693287372589111, "step": 49570 }, { "epoch": 0.8813752155440598, "grad_norm": 2.1733516591993025, "learning_rate": 0.0001, "loss": 0.7291, "mean_abs_error": 683.2819316642283, "mean_abs_error_last_10": 126.32689119973034, "mean_abs_error_last_25": 203.82566994992723, "mean_abs_error_last_50": 380.6137651933908, "mean_pred_prob": 0.03514667180133983, "mean_pred_prob_last_10": 0.17724993337178602, "mean_pred_prob_last_25": 0.09600114568602294, "mean_pred_prob_last_50": 0.05937384132994339, "mean_token_accuracy": 0.8857232630252838, "step": 49580 }, { "epoch": 0.8815529838408618, "grad_norm": 2.833184731748237, "learning_rate": 0.0001, "loss": 0.608, "mean_abs_error": 303.00769209186274, "mean_abs_error_last_10": 40.37864629229814, "mean_abs_error_last_25": 66.43743055370172, "mean_abs_error_last_50": 139.27161972267933, "mean_pred_prob": 0.0651247677858919, "mean_pred_prob_last_10": 0.2961407122667879, "mean_pred_prob_last_25": 0.17306058681569994, "mean_pred_prob_last_50": 0.10717452047392725, "mean_token_accuracy": 0.8667809069156647, "step": 49590 }, { "epoch": 0.8817307521376637, "grad_norm": 1.5907233036216288, "learning_rate": 0.0001, "loss": 0.6036, "mean_abs_error": 1032.5034393122999, "mean_abs_error_last_10": 537.8935883491349, "mean_abs_error_last_25": 630.3396336334798, "mean_abs_error_last_50": 768.6324289907245, "mean_pred_prob": 0.04506386117136572, "mean_pred_prob_last_10": 0.20634808111062738, "mean_pred_prob_last_25": 0.1196785441308748, "mean_pred_prob_last_50": 0.07458823898050468, "mean_token_accuracy": 0.8755589127540588, "step": 49600 }, { "epoch": 0.8819085204344658, "grad_norm": 1.546513452878312, "learning_rate": 0.0001, "loss": 0.91, "mean_abs_error": 738.0288257070858, "mean_abs_error_last_10": 112.13802493659459, "mean_abs_error_last_25": 161.66286546180476, "mean_abs_error_last_50": 347.9036362130589, "mean_pred_prob": 0.029261929128551856, "mean_pred_prob_last_10": 0.13224470716668293, "mean_pred_prob_last_25": 0.07689327881671489, "mean_pred_prob_last_50": 0.048554065951611845, "mean_token_accuracy": 0.8716336131095886, "step": 49610 }, { "epoch": 0.8820862887312677, "grad_norm": 1.4443726425796755, "learning_rate": 0.0001, "loss": 0.8628, "mean_abs_error": 706.6613462032449, "mean_abs_error_last_10": 366.8280618108914, "mean_abs_error_last_25": 424.07620459924954, "mean_abs_error_last_50": 544.5387582145788, "mean_pred_prob": 0.05457208513107616, "mean_pred_prob_last_10": 0.26051008053473196, "mean_pred_prob_last_25": 0.1448753802513238, "mean_pred_prob_last_50": 0.08980312997009605, "mean_token_accuracy": 0.878465723991394, "step": 49620 }, { "epoch": 0.8822640570280696, "grad_norm": 1.0862580003442395, "learning_rate": 0.0001, "loss": 0.6632, "mean_abs_error": 324.8796008803985, "mean_abs_error_last_10": 94.31438296517877, "mean_abs_error_last_25": 102.78305752908685, "mean_abs_error_last_50": 164.3016837759057, "mean_pred_prob": 0.051860503200441596, "mean_pred_prob_last_10": 0.2403694786829874, "mean_pred_prob_last_25": 0.13654151428490877, "mean_pred_prob_last_50": 0.08511252782773226, "mean_token_accuracy": 0.8756618738174439, "step": 49630 }, { "epoch": 0.8824418253248716, "grad_norm": 2.570474110146917, "learning_rate": 0.0001, "loss": 0.7159, "mean_abs_error": 428.1265155017022, "mean_abs_error_last_10": 99.4379550209396, "mean_abs_error_last_25": 173.35140751852884, "mean_abs_error_last_50": 244.68684942307345, "mean_pred_prob": 0.033744972478598353, "mean_pred_prob_last_10": 0.18585924729704856, "mean_pred_prob_last_25": 0.1002526618540287, "mean_pred_prob_last_50": 0.05752032371237874, "mean_token_accuracy": 0.8823466300964355, "step": 49640 }, { "epoch": 0.8826195936216735, "grad_norm": 2.9477368374039603, "learning_rate": 0.0001, "loss": 0.6686, "mean_abs_error": 722.4315038596148, "mean_abs_error_last_10": 388.1170552997767, "mean_abs_error_last_25": 436.62055678808576, "mean_abs_error_last_50": 508.23727208409554, "mean_pred_prob": 0.044776448787888515, "mean_pred_prob_last_10": 0.2149822678999044, "mean_pred_prob_last_25": 0.12048707490030211, "mean_pred_prob_last_50": 0.07553711590007879, "mean_token_accuracy": 0.8888453304767608, "step": 49650 }, { "epoch": 0.8827973619184755, "grad_norm": 2.813394967995532, "learning_rate": 0.0001, "loss": 0.8083, "mean_abs_error": 576.7648630312146, "mean_abs_error_last_10": 74.79028930277013, "mean_abs_error_last_25": 123.84862330135441, "mean_abs_error_last_50": 319.0561380571163, "mean_pred_prob": 0.021993068000301718, "mean_pred_prob_last_10": 0.11903092637658119, "mean_pred_prob_last_25": 0.063131801225245, "mean_pred_prob_last_50": 0.037552237417548893, "mean_token_accuracy": 0.8674079895019531, "step": 49660 }, { "epoch": 0.8829751302152774, "grad_norm": 2.212606725839089, "learning_rate": 0.0001, "loss": 0.7256, "mean_abs_error": 316.2497832079723, "mean_abs_error_last_10": 62.71299841999824, "mean_abs_error_last_25": 144.77394591874616, "mean_abs_error_last_50": 263.50450772942247, "mean_pred_prob": 0.0471330483444035, "mean_pred_prob_last_10": 0.24443170949816703, "mean_pred_prob_last_25": 0.13204821590334176, "mean_pred_prob_last_50": 0.07901350185275077, "mean_token_accuracy": 0.8801490306854248, "step": 49670 }, { "epoch": 0.8831528985120793, "grad_norm": 1.239687420121232, "learning_rate": 0.0001, "loss": 0.6785, "mean_abs_error": 670.8191559071801, "mean_abs_error_last_10": 276.61875635655497, "mean_abs_error_last_25": 361.05257272713527, "mean_abs_error_last_50": 459.266458530391, "mean_pred_prob": 0.061112381098791954, "mean_pred_prob_last_10": 0.23518192154588177, "mean_pred_prob_last_25": 0.147721776922117, "mean_pred_prob_last_50": 0.09816483829636127, "mean_token_accuracy": 0.8804366171360016, "step": 49680 }, { "epoch": 0.8833306668088813, "grad_norm": 1.645831408582725, "learning_rate": 0.0001, "loss": 0.6613, "mean_abs_error": 162.69455646373686, "mean_abs_error_last_10": 58.89043076276117, "mean_abs_error_last_25": 72.76738264045407, "mean_abs_error_last_50": 96.23545262685859, "mean_pred_prob": 0.04814820517785847, "mean_pred_prob_last_10": 0.2258831724524498, "mean_pred_prob_last_25": 0.12688249368220567, "mean_pred_prob_last_50": 0.07940043238922953, "mean_token_accuracy": 0.8739695906639099, "step": 49690 }, { "epoch": 0.8835084351056832, "grad_norm": 2.0121735585285143, "learning_rate": 0.0001, "loss": 0.6751, "mean_abs_error": 407.1138413655218, "mean_abs_error_last_10": 86.67550846226165, "mean_abs_error_last_25": 161.7817814374602, "mean_abs_error_last_50": 257.26176635604827, "mean_pred_prob": 0.05386050140368752, "mean_pred_prob_last_10": 0.24004840593552218, "mean_pred_prob_last_25": 0.1400379837723449, "mean_pred_prob_last_50": 0.08905279561877251, "mean_token_accuracy": 0.8754796087741852, "step": 49700 }, { "epoch": 0.8836862034024852, "grad_norm": 1.680609158200222, "learning_rate": 0.0001, "loss": 0.6559, "mean_abs_error": 513.4859971777361, "mean_abs_error_last_10": 230.47172136183218, "mean_abs_error_last_25": 328.3293771688692, "mean_abs_error_last_50": 360.39979135698616, "mean_pred_prob": 0.03284234912134707, "mean_pred_prob_last_10": 0.15527602471411228, "mean_pred_prob_last_25": 0.08927723536617123, "mean_pred_prob_last_50": 0.054801345022860916, "mean_token_accuracy": 0.8650616347789765, "step": 49710 }, { "epoch": 0.8838639716992871, "grad_norm": 2.90087805202693, "learning_rate": 0.0001, "loss": 0.8259, "mean_abs_error": 194.91247286145344, "mean_abs_error_last_10": 54.42422702807041, "mean_abs_error_last_25": 96.76090862815127, "mean_abs_error_last_50": 123.09585941337218, "mean_pred_prob": 0.047240604367107156, "mean_pred_prob_last_10": 0.25183159224689006, "mean_pred_prob_last_25": 0.13367160260677338, "mean_pred_prob_last_50": 0.08062359970062971, "mean_token_accuracy": 0.8824976086616516, "step": 49720 }, { "epoch": 0.8840417399960891, "grad_norm": 2.0743307730412943, "learning_rate": 0.0001, "loss": 0.7349, "mean_abs_error": 477.16950754266827, "mean_abs_error_last_10": 108.48023704496116, "mean_abs_error_last_25": 177.27082808553027, "mean_abs_error_last_50": 340.65831700421177, "mean_pred_prob": 0.03704714284394868, "mean_pred_prob_last_10": 0.17635322210844606, "mean_pred_prob_last_25": 0.09785784678533674, "mean_pred_prob_last_50": 0.06127388095483184, "mean_token_accuracy": 0.8718631565570831, "step": 49730 }, { "epoch": 0.8842195082928911, "grad_norm": 0.9078897671375088, "learning_rate": 0.0001, "loss": 0.7759, "mean_abs_error": 594.9454732009242, "mean_abs_error_last_10": 182.2897049000157, "mean_abs_error_last_25": 274.3159575475789, "mean_abs_error_last_50": 369.33573281082846, "mean_pred_prob": 0.03352689706953242, "mean_pred_prob_last_10": 0.1676798836677335, "mean_pred_prob_last_25": 0.09293504818342627, "mean_pred_prob_last_50": 0.05583327006315812, "mean_token_accuracy": 0.8713033556938171, "step": 49740 }, { "epoch": 0.884397276589693, "grad_norm": 1.4176910605398283, "learning_rate": 0.0001, "loss": 0.6402, "mean_abs_error": 558.9406053664441, "mean_abs_error_last_10": 122.59598354589419, "mean_abs_error_last_25": 208.93373349236109, "mean_abs_error_last_50": 291.9442279418888, "mean_pred_prob": 0.018437073845416308, "mean_pred_prob_last_10": 0.10157396476715803, "mean_pred_prob_last_25": 0.05315637197345495, "mean_pred_prob_last_50": 0.032045629573985934, "mean_token_accuracy": 0.8760688364505768, "step": 49750 }, { "epoch": 0.884575044886495, "grad_norm": 2.24852619678596, "learning_rate": 0.0001, "loss": 0.7093, "mean_abs_error": 688.8239910329828, "mean_abs_error_last_10": 245.2134419659169, "mean_abs_error_last_25": 304.8823850698945, "mean_abs_error_last_50": 406.70014777759286, "mean_pred_prob": 0.030626073241000994, "mean_pred_prob_last_10": 0.1618233460234478, "mean_pred_prob_last_25": 0.08684829411213286, "mean_pred_prob_last_50": 0.05278116440749727, "mean_token_accuracy": 0.8770488381385804, "step": 49760 }, { "epoch": 0.8847528131832969, "grad_norm": 1.8480881491014203, "learning_rate": 0.0001, "loss": 0.6154, "mean_abs_error": 213.3075360916158, "mean_abs_error_last_10": 41.504659047849586, "mean_abs_error_last_25": 49.21026594981835, "mean_abs_error_last_50": 91.65882693788174, "mean_pred_prob": 0.0549098236951977, "mean_pred_prob_last_10": 0.2507826711982489, "mean_pred_prob_last_25": 0.14602254750207067, "mean_pred_prob_last_50": 0.09183836048468948, "mean_token_accuracy": 0.8777880072593689, "step": 49770 }, { "epoch": 0.8849305814800988, "grad_norm": 1.2735349837006975, "learning_rate": 0.0001, "loss": 0.591, "mean_abs_error": 1511.8885003493351, "mean_abs_error_last_10": 1009.1210707272555, "mean_abs_error_last_25": 1093.263471153077, "mean_abs_error_last_50": 1266.1429814791204, "mean_pred_prob": 0.04103499800730788, "mean_pred_prob_last_10": 0.20204306134110084, "mean_pred_prob_last_25": 0.11279573719148175, "mean_pred_prob_last_50": 0.06843581937355339, "mean_token_accuracy": 0.8889421641826629, "step": 49780 }, { "epoch": 0.8851083497769008, "grad_norm": 1.303001310390208, "learning_rate": 0.0001, "loss": 0.6079, "mean_abs_error": 1060.9487080177942, "mean_abs_error_last_10": 528.570814365543, "mean_abs_error_last_25": 613.3546697670739, "mean_abs_error_last_50": 735.9115657947046, "mean_pred_prob": 0.03744278634694638, "mean_pred_prob_last_10": 0.17921572617196943, "mean_pred_prob_last_25": 0.09687503584718797, "mean_pred_prob_last_50": 0.06056574431422632, "mean_token_accuracy": 0.8775706231594086, "step": 49790 }, { "epoch": 0.8852861180737027, "grad_norm": 2.954901170958341, "learning_rate": 0.0001, "loss": 0.8503, "mean_abs_error": 483.00238058544494, "mean_abs_error_last_10": 95.38533742202866, "mean_abs_error_last_25": 159.18974170132333, "mean_abs_error_last_50": 275.7392905083651, "mean_pred_prob": 0.03144780399743467, "mean_pred_prob_last_10": 0.15604035463184118, "mean_pred_prob_last_25": 0.08815961165819317, "mean_pred_prob_last_50": 0.052022771234624086, "mean_token_accuracy": 0.8676549851894378, "step": 49800 }, { "epoch": 0.8854638863705047, "grad_norm": 1.2507055548254187, "learning_rate": 0.0001, "loss": 0.7349, "mean_abs_error": 102.785965232377, "mean_abs_error_last_10": 24.601152243916502, "mean_abs_error_last_25": 61.382172321261535, "mean_abs_error_last_50": 84.22605774728355, "mean_pred_prob": 0.0733768210746348, "mean_pred_prob_last_10": 0.34970933347940447, "mean_pred_prob_last_25": 0.19738927222788333, "mean_pred_prob_last_50": 0.12169171255081893, "mean_token_accuracy": 0.8595299363136292, "step": 49810 }, { "epoch": 0.8856416546673066, "grad_norm": 1.247780059537711, "learning_rate": 0.0001, "loss": 0.5816, "mean_abs_error": 496.70951221085886, "mean_abs_error_last_10": 124.91903071112804, "mean_abs_error_last_25": 220.76404463714692, "mean_abs_error_last_50": 270.80682046961306, "mean_pred_prob": 0.044898901268607, "mean_pred_prob_last_10": 0.21566268858732657, "mean_pred_prob_last_25": 0.11941964688594453, "mean_pred_prob_last_50": 0.0746512376179453, "mean_token_accuracy": 0.8744695007801055, "step": 49820 }, { "epoch": 0.8858194229641085, "grad_norm": 1.4460295608212936, "learning_rate": 0.0001, "loss": 0.6829, "mean_abs_error": 279.1909671962436, "mean_abs_error_last_10": 92.12226305337116, "mean_abs_error_last_25": 129.7699198709838, "mean_abs_error_last_50": 174.71901803701812, "mean_pred_prob": 0.03620516932569444, "mean_pred_prob_last_10": 0.1821017436683178, "mean_pred_prob_last_25": 0.09699572250247002, "mean_pred_prob_last_50": 0.06075204564258456, "mean_token_accuracy": 0.8624700844287873, "step": 49830 }, { "epoch": 0.8859971912609105, "grad_norm": 2.0672617387960095, "learning_rate": 0.0001, "loss": 0.6676, "mean_abs_error": 342.47408899894356, "mean_abs_error_last_10": 109.92681170426438, "mean_abs_error_last_25": 236.23917055515494, "mean_abs_error_last_50": 316.01709436280817, "mean_pred_prob": 0.04209227510727942, "mean_pred_prob_last_10": 0.19014895744621754, "mean_pred_prob_last_25": 0.11172376703470946, "mean_pred_prob_last_50": 0.06893865708261729, "mean_token_accuracy": 0.8678506553173065, "step": 49840 }, { "epoch": 0.8861749595577125, "grad_norm": 1.3280487542582395, "learning_rate": 0.0001, "loss": 0.6887, "mean_abs_error": 440.2683214131013, "mean_abs_error_last_10": 189.91411845196177, "mean_abs_error_last_25": 215.6988029240077, "mean_abs_error_last_50": 268.85646170431943, "mean_pred_prob": 0.04276467408053577, "mean_pred_prob_last_10": 0.187518089893274, "mean_pred_prob_last_25": 0.11100220901425928, "mean_pred_prob_last_50": 0.06949426645878702, "mean_token_accuracy": 0.8726393640041351, "step": 49850 }, { "epoch": 0.8863527278545145, "grad_norm": 1.4446126267468355, "learning_rate": 0.0001, "loss": 0.7201, "mean_abs_error": 770.8246517792653, "mean_abs_error_last_10": 301.0925028524352, "mean_abs_error_last_25": 365.99954398851787, "mean_abs_error_last_50": 516.0315593754777, "mean_pred_prob": 0.03704735670471564, "mean_pred_prob_last_10": 0.17878025163081473, "mean_pred_prob_last_25": 0.10021395243238658, "mean_pred_prob_last_50": 0.06084034675150178, "mean_token_accuracy": 0.8746582388877868, "step": 49860 }, { "epoch": 0.8865304961513164, "grad_norm": 1.7769594269113722, "learning_rate": 0.0001, "loss": 0.6851, "mean_abs_error": 386.8999042076576, "mean_abs_error_last_10": 167.23734138464891, "mean_abs_error_last_25": 225.98051011557004, "mean_abs_error_last_50": 290.6499652765311, "mean_pred_prob": 0.03746310700662434, "mean_pred_prob_last_10": 0.17165072485804558, "mean_pred_prob_last_25": 0.09815702624619008, "mean_pred_prob_last_50": 0.061620882153511046, "mean_token_accuracy": 0.8808414757251739, "step": 49870 }, { "epoch": 0.8867082644481183, "grad_norm": 1.1043342419998587, "learning_rate": 0.0001, "loss": 0.7728, "mean_abs_error": 294.2064987337994, "mean_abs_error_last_10": 88.06167755316302, "mean_abs_error_last_25": 83.95331167966702, "mean_abs_error_last_50": 154.4733807881534, "mean_pred_prob": 0.04639084199443459, "mean_pred_prob_last_10": 0.21775911264121534, "mean_pred_prob_last_25": 0.12185788545757532, "mean_pred_prob_last_50": 0.076562016364187, "mean_token_accuracy": 0.8745233476161957, "step": 49880 }, { "epoch": 0.8868860327449203, "grad_norm": 1.8197316320216204, "learning_rate": 0.0001, "loss": 0.6533, "mean_abs_error": 288.21821887020207, "mean_abs_error_last_10": 69.03414074417893, "mean_abs_error_last_25": 133.04658738023386, "mean_abs_error_last_50": 166.75233794534012, "mean_pred_prob": 0.055118666309863326, "mean_pred_prob_last_10": 0.26429533660411836, "mean_pred_prob_last_25": 0.1470574291422963, "mean_pred_prob_last_50": 0.09194393884390592, "mean_token_accuracy": 0.8740281760692596, "step": 49890 }, { "epoch": 0.8870638010417222, "grad_norm": 0.9317503272429174, "learning_rate": 0.0001, "loss": 0.6746, "mean_abs_error": 312.89075708215285, "mean_abs_error_last_10": 75.51120591046211, "mean_abs_error_last_25": 101.9177387559782, "mean_abs_error_last_50": 131.36400956178719, "mean_pred_prob": 0.02717376803047955, "mean_pred_prob_last_10": 0.13662802185863257, "mean_pred_prob_last_25": 0.07246294980868698, "mean_pred_prob_last_50": 0.045682527218014, "mean_token_accuracy": 0.8765471458435059, "step": 49900 }, { "epoch": 0.8872415693385242, "grad_norm": 1.11134402034574, "learning_rate": 0.0001, "loss": 0.7358, "mean_abs_error": 234.02633473376855, "mean_abs_error_last_10": 48.428358283495314, "mean_abs_error_last_25": 86.87081753982844, "mean_abs_error_last_50": 138.7461713493056, "mean_pred_prob": 0.039294778695330025, "mean_pred_prob_last_10": 0.18763581477105618, "mean_pred_prob_last_25": 0.10460124015808106, "mean_pred_prob_last_50": 0.06537835616618395, "mean_token_accuracy": 0.8680208265781403, "step": 49910 }, { "epoch": 0.8874193376353261, "grad_norm": 3.3485327077050098, "learning_rate": 0.0001, "loss": 0.6306, "mean_abs_error": 195.431032821798, "mean_abs_error_last_10": 85.39613407909304, "mean_abs_error_last_25": 141.19018786647433, "mean_abs_error_last_50": 174.37885492706766, "mean_pred_prob": 0.06382686365395784, "mean_pred_prob_last_10": 0.2798312410712242, "mean_pred_prob_last_25": 0.1652598419226706, "mean_pred_prob_last_50": 0.1035532958805561, "mean_token_accuracy": 0.8830708742141724, "step": 49920 }, { "epoch": 0.887597105932128, "grad_norm": 1.6863308182354058, "learning_rate": 0.0001, "loss": 0.6092, "mean_abs_error": 238.29321501553756, "mean_abs_error_last_10": 54.49671388623533, "mean_abs_error_last_25": 116.63872524436138, "mean_abs_error_last_50": 141.4896542828758, "mean_pred_prob": 0.055685430439189075, "mean_pred_prob_last_10": 0.26687442846596243, "mean_pred_prob_last_25": 0.14625119864940644, "mean_pred_prob_last_50": 0.09169793920591474, "mean_token_accuracy": 0.8733174443244934, "step": 49930 }, { "epoch": 0.88777487422893, "grad_norm": 3.8233170403115206, "learning_rate": 0.0001, "loss": 0.7692, "mean_abs_error": 615.6684744905626, "mean_abs_error_last_10": 279.9224055748078, "mean_abs_error_last_25": 330.5227981227801, "mean_abs_error_last_50": 448.73518575731015, "mean_pred_prob": 0.038186503062024715, "mean_pred_prob_last_10": 0.18736242003506048, "mean_pred_prob_last_25": 0.09976061572087928, "mean_pred_prob_last_50": 0.06285711948294193, "mean_token_accuracy": 0.873802763223648, "step": 49940 }, { "epoch": 0.8879526425257319, "grad_norm": 1.8346477763928717, "learning_rate": 0.0001, "loss": 0.7552, "mean_abs_error": 576.0680158589147, "mean_abs_error_last_10": 139.79929815949646, "mean_abs_error_last_25": 291.0897454060202, "mean_abs_error_last_50": 360.26773811935135, "mean_pred_prob": 0.03293436333187856, "mean_pred_prob_last_10": 0.1575589755899273, "mean_pred_prob_last_25": 0.08794549085432664, "mean_pred_prob_last_50": 0.05511268749833107, "mean_token_accuracy": 0.8712481796741486, "step": 49950 }, { "epoch": 0.8881304108225339, "grad_norm": 1.2844500136243155, "learning_rate": 0.0001, "loss": 0.7021, "mean_abs_error": 255.40568623878957, "mean_abs_error_last_10": 249.54951584011624, "mean_abs_error_last_25": 232.4415007933785, "mean_abs_error_last_50": 229.74834341495162, "mean_pred_prob": 0.04267369488370605, "mean_pred_prob_last_10": 0.19147235767450183, "mean_pred_prob_last_25": 0.11311121039325371, "mean_pred_prob_last_50": 0.07102961285272613, "mean_token_accuracy": 0.8712049007415772, "step": 49960 }, { "epoch": 0.8883081791193359, "grad_norm": 0.759925157311419, "learning_rate": 0.0001, "loss": 0.651, "mean_abs_error": 432.32039458960963, "mean_abs_error_last_10": 146.06884928719006, "mean_abs_error_last_25": 206.1985828683472, "mean_abs_error_last_50": 274.2181593409747, "mean_pred_prob": 0.04216353050433099, "mean_pred_prob_last_10": 0.18510418200166895, "mean_pred_prob_last_25": 0.10956507421797142, "mean_pred_prob_last_50": 0.06951284898677841, "mean_token_accuracy": 0.8648862183094025, "step": 49970 }, { "epoch": 0.8884859474161378, "grad_norm": 1.5375486209617526, "learning_rate": 0.0001, "loss": 0.7336, "mean_abs_error": 846.228505688044, "mean_abs_error_last_10": 516.6368708822637, "mean_abs_error_last_25": 592.926862203896, "mean_abs_error_last_50": 726.1814449740075, "mean_pred_prob": 0.028215650824131445, "mean_pred_prob_last_10": 0.1565926515904721, "mean_pred_prob_last_25": 0.07857183720043395, "mean_pred_prob_last_50": 0.047366166804567914, "mean_token_accuracy": 0.87038534283638, "step": 49980 }, { "epoch": 0.8886637157129398, "grad_norm": 1.4808719917646473, "learning_rate": 0.0001, "loss": 0.618, "mean_abs_error": 349.7099058736536, "mean_abs_error_last_10": 113.0381573865947, "mean_abs_error_last_25": 104.7761884219207, "mean_abs_error_last_50": 238.0533281756584, "mean_pred_prob": 0.05586049957200885, "mean_pred_prob_last_10": 0.24794819988310338, "mean_pred_prob_last_25": 0.1421395606826991, "mean_pred_prob_last_50": 0.09070738479495048, "mean_token_accuracy": 0.8766854286193848, "step": 49990 }, { "epoch": 0.8888414840097417, "grad_norm": 1.2307838831398743, "learning_rate": 0.0001, "loss": 0.7097, "mean_abs_error": 826.6279698005243, "mean_abs_error_last_10": 441.7494463481688, "mean_abs_error_last_25": 535.2268372131509, "mean_abs_error_last_50": 636.6567928498389, "mean_pred_prob": 0.061942750805610555, "mean_pred_prob_last_10": 0.28612336522783155, "mean_pred_prob_last_25": 0.16706433526269393, "mean_pred_prob_last_50": 0.10251166690868559, "mean_token_accuracy": 0.8771183669567109, "step": 50000 }, { "epoch": 0.8890192523065437, "grad_norm": 1.5085477717702402, "learning_rate": 0.0001, "loss": 0.6115, "mean_abs_error": 149.93857922773685, "mean_abs_error_last_10": 36.90807887763687, "mean_abs_error_last_25": 61.974516848711175, "mean_abs_error_last_50": 93.20736712941026, "mean_pred_prob": 0.05041876323521137, "mean_pred_prob_last_10": 0.2316451992839575, "mean_pred_prob_last_25": 0.13389519024640323, "mean_pred_prob_last_50": 0.08358632810413838, "mean_token_accuracy": 0.8782869696617126, "step": 50010 }, { "epoch": 0.8891970206033456, "grad_norm": 2.241930566473709, "learning_rate": 0.0001, "loss": 0.7894, "mean_abs_error": 675.6566936285077, "mean_abs_error_last_10": 410.95683082487113, "mean_abs_error_last_25": 454.34241717400465, "mean_abs_error_last_50": 532.3334040408242, "mean_pred_prob": 0.04896581381035503, "mean_pred_prob_last_10": 0.22685122760885862, "mean_pred_prob_last_25": 0.12396514556603506, "mean_pred_prob_last_50": 0.08032084559090436, "mean_token_accuracy": 0.8797349810600281, "step": 50020 }, { "epoch": 0.8893747889001475, "grad_norm": 1.2734318664690187, "learning_rate": 0.0001, "loss": 0.5566, "mean_abs_error": 371.87556473023045, "mean_abs_error_last_10": 256.95504950758243, "mean_abs_error_last_25": 233.11410366705218, "mean_abs_error_last_50": 252.08572111964654, "mean_pred_prob": 0.04156866169068962, "mean_pred_prob_last_10": 0.19802560582756995, "mean_pred_prob_last_25": 0.11005209758877754, "mean_pred_prob_last_50": 0.0683641015086323, "mean_token_accuracy": 0.8849923133850097, "step": 50030 }, { "epoch": 0.8895525571969495, "grad_norm": 1.9868635414023565, "learning_rate": 0.0001, "loss": 0.6989, "mean_abs_error": 478.2272875059005, "mean_abs_error_last_10": 170.457575406009, "mean_abs_error_last_25": 208.01796411290834, "mean_abs_error_last_50": 264.9958342281266, "mean_pred_prob": 0.02789183166460134, "mean_pred_prob_last_10": 0.14770795149961485, "mean_pred_prob_last_25": 0.07837350775953382, "mean_pred_prob_last_50": 0.04690639866166748, "mean_token_accuracy": 0.878298431634903, "step": 50040 }, { "epoch": 0.8897303254937514, "grad_norm": 1.2212541647804185, "learning_rate": 0.0001, "loss": 0.7079, "mean_abs_error": 658.2825503888132, "mean_abs_error_last_10": 84.47642874780729, "mean_abs_error_last_25": 135.994303709848, "mean_abs_error_last_50": 306.3862278851349, "mean_pred_prob": 0.03915555485291407, "mean_pred_prob_last_10": 0.19204773551318793, "mean_pred_prob_last_25": 0.10692703430540859, "mean_pred_prob_last_50": 0.06569263973506168, "mean_token_accuracy": 0.8750748753547668, "step": 50050 }, { "epoch": 0.8899080937905534, "grad_norm": 0.7701735281298137, "learning_rate": 0.0001, "loss": 0.6845, "mean_abs_error": 499.99325943167435, "mean_abs_error_last_10": 166.258502752459, "mean_abs_error_last_25": 154.7348891629389, "mean_abs_error_last_50": 234.2290724350492, "mean_pred_prob": 0.04449434612179175, "mean_pred_prob_last_10": 0.21080233048414812, "mean_pred_prob_last_25": 0.12172896374249831, "mean_pred_prob_last_50": 0.07384655810892582, "mean_token_accuracy": 0.8759336292743682, "step": 50060 }, { "epoch": 0.8900858620873553, "grad_norm": 1.2915719829467909, "learning_rate": 0.0001, "loss": 0.6758, "mean_abs_error": 604.9475001694088, "mean_abs_error_last_10": 333.0812899946565, "mean_abs_error_last_25": 352.92605464254746, "mean_abs_error_last_50": 438.87493462857117, "mean_pred_prob": 0.05083171990409028, "mean_pred_prob_last_10": 0.224477205198491, "mean_pred_prob_last_25": 0.13426672461791894, "mean_pred_prob_last_50": 0.0838511424663011, "mean_token_accuracy": 0.8790096402168274, "step": 50070 }, { "epoch": 0.8902636303841572, "grad_norm": 1.1101892120428225, "learning_rate": 0.0001, "loss": 0.6615, "mean_abs_error": 364.5195319318777, "mean_abs_error_last_10": 117.08243860509381, "mean_abs_error_last_25": 144.76471712546564, "mean_abs_error_last_50": 205.834168781456, "mean_pred_prob": 0.05417111093411222, "mean_pred_prob_last_10": 0.2579642516793683, "mean_pred_prob_last_25": 0.14768033679574727, "mean_pred_prob_last_50": 0.08989977875025942, "mean_token_accuracy": 0.8828332304954529, "step": 50080 }, { "epoch": 0.8904413986809593, "grad_norm": 0.9341206913500564, "learning_rate": 0.0001, "loss": 0.636, "mean_abs_error": 574.9225841131316, "mean_abs_error_last_10": 246.76133479050927, "mean_abs_error_last_25": 271.49695814514473, "mean_abs_error_last_50": 361.0466892619732, "mean_pred_prob": 0.04349358463950921, "mean_pred_prob_last_10": 0.2175370598619338, "mean_pred_prob_last_25": 0.11815702637541108, "mean_pred_prob_last_50": 0.07144564560730941, "mean_token_accuracy": 0.8679646730422974, "step": 50090 }, { "epoch": 0.8906191669777612, "grad_norm": 2.7037493160169066, "learning_rate": 0.0001, "loss": 0.7689, "mean_abs_error": 706.8167252936202, "mean_abs_error_last_10": 268.55477497879025, "mean_abs_error_last_25": 360.3973369575688, "mean_abs_error_last_50": 494.9798459965535, "mean_pred_prob": 0.03659981403034181, "mean_pred_prob_last_10": 0.19143662244314327, "mean_pred_prob_last_25": 0.10334201811929233, "mean_pred_prob_last_50": 0.06056581172160804, "mean_token_accuracy": 0.8765288233757019, "step": 50100 }, { "epoch": 0.8907969352745632, "grad_norm": 1.2858769220739406, "learning_rate": 0.0001, "loss": 0.8774, "mean_abs_error": 303.7955593308906, "mean_abs_error_last_10": 86.64252155230996, "mean_abs_error_last_25": 103.74815484851854, "mean_abs_error_last_50": 165.66787755602576, "mean_pred_prob": 0.032368888473138215, "mean_pred_prob_last_10": 0.15044876597821713, "mean_pred_prob_last_25": 0.08597676968201995, "mean_pred_prob_last_50": 0.05321624744683504, "mean_token_accuracy": 0.8784146010875702, "step": 50110 }, { "epoch": 0.8909747035713651, "grad_norm": 1.8237777741798429, "learning_rate": 0.0001, "loss": 0.6896, "mean_abs_error": 125.08244884254614, "mean_abs_error_last_10": 10.42008073519689, "mean_abs_error_last_25": 37.817004363824566, "mean_abs_error_last_50": 71.23428004992351, "mean_pred_prob": 0.06429350022226572, "mean_pred_prob_last_10": 0.3372812315821648, "mean_pred_prob_last_25": 0.18422363996505736, "mean_pred_prob_last_50": 0.11034051775932312, "mean_token_accuracy": 0.8763637185096741, "step": 50120 }, { "epoch": 0.891152471868167, "grad_norm": 1.7418859460304819, "learning_rate": 0.0001, "loss": 0.7071, "mean_abs_error": 263.4542148863099, "mean_abs_error_last_10": 67.3372628479562, "mean_abs_error_last_25": 74.82087683983754, "mean_abs_error_last_50": 144.0731771461637, "mean_pred_prob": 0.046621048264205454, "mean_pred_prob_last_10": 0.23620057962834834, "mean_pred_prob_last_25": 0.132137599773705, "mean_pred_prob_last_50": 0.0794101427309215, "mean_token_accuracy": 0.8722512245178222, "step": 50130 }, { "epoch": 0.891330240164969, "grad_norm": 2.439776262218735, "learning_rate": 0.0001, "loss": 0.7734, "mean_abs_error": 289.4617997098554, "mean_abs_error_last_10": 147.48877659918926, "mean_abs_error_last_25": 280.4110962584995, "mean_abs_error_last_50": 274.5251170884072, "mean_pred_prob": 0.034024896728806195, "mean_pred_prob_last_10": 0.1671324273571372, "mean_pred_prob_last_25": 0.0959146990440786, "mean_pred_prob_last_50": 0.058204060420393944, "mean_token_accuracy": 0.874569696187973, "step": 50140 }, { "epoch": 0.8915080084617709, "grad_norm": 1.2023572980389536, "learning_rate": 0.0001, "loss": 0.6806, "mean_abs_error": 708.9257018735957, "mean_abs_error_last_10": 101.90631679367422, "mean_abs_error_last_25": 169.30790869504278, "mean_abs_error_last_50": 349.5895414213674, "mean_pred_prob": 0.050222071309690364, "mean_pred_prob_last_10": 0.24495736856479197, "mean_pred_prob_last_25": 0.13738348467741163, "mean_pred_prob_last_50": 0.0845533199084457, "mean_token_accuracy": 0.8755490839481354, "step": 50150 }, { "epoch": 0.8916857767585729, "grad_norm": 1.1254295239799335, "learning_rate": 0.0001, "loss": 0.6761, "mean_abs_error": 629.9504946427397, "mean_abs_error_last_10": 341.28759694562825, "mean_abs_error_last_25": 340.95054827384627, "mean_abs_error_last_50": 438.16424462097564, "mean_pred_prob": 0.027126621443312616, "mean_pred_prob_last_10": 0.14687687730765903, "mean_pred_prob_last_25": 0.07568927686661482, "mean_pred_prob_last_50": 0.04528409247868694, "mean_token_accuracy": 0.8647985935211182, "step": 50160 }, { "epoch": 0.8918635450553748, "grad_norm": 5.5054545574476625, "learning_rate": 0.0001, "loss": 0.835, "mean_abs_error": 376.1043340706541, "mean_abs_error_last_10": 153.23825527505602, "mean_abs_error_last_25": 206.31701089826683, "mean_abs_error_last_50": 275.60728953774486, "mean_pred_prob": 0.033835761999944224, "mean_pred_prob_last_10": 0.15969766379566863, "mean_pred_prob_last_25": 0.08885373292723671, "mean_pred_prob_last_50": 0.05595562105299905, "mean_token_accuracy": 0.8656263411045074, "step": 50170 }, { "epoch": 0.8920413133521767, "grad_norm": 1.6539533709503127, "learning_rate": 0.0001, "loss": 0.7595, "mean_abs_error": 179.47912476674964, "mean_abs_error_last_10": 82.28707832576704, "mean_abs_error_last_25": 66.1430557259698, "mean_abs_error_last_50": 90.02022029170129, "mean_pred_prob": 0.05776720135472715, "mean_pred_prob_last_10": 0.25059201624244454, "mean_pred_prob_last_25": 0.15099704833701252, "mean_pred_prob_last_50": 0.09424727391451597, "mean_token_accuracy": 0.865681529045105, "step": 50180 }, { "epoch": 0.8922190816489787, "grad_norm": 1.2942229607942237, "learning_rate": 0.0001, "loss": 0.7961, "mean_abs_error": 513.5073084173271, "mean_abs_error_last_10": 126.76061820084037, "mean_abs_error_last_25": 167.5521972305883, "mean_abs_error_last_50": 280.04874268951914, "mean_pred_prob": 0.030020111514022573, "mean_pred_prob_last_10": 0.15936499201925472, "mean_pred_prob_last_25": 0.08471057346323504, "mean_pred_prob_last_50": 0.05025555592146702, "mean_token_accuracy": 0.8617364704608917, "step": 50190 }, { "epoch": 0.8923968499457807, "grad_norm": 2.097703635301131, "learning_rate": 0.0001, "loss": 0.6959, "mean_abs_error": 1266.6598901918082, "mean_abs_error_last_10": 718.602090290034, "mean_abs_error_last_25": 795.7590183632823, "mean_abs_error_last_50": 924.1981655176598, "mean_pred_prob": 0.04087238889042055, "mean_pred_prob_last_10": 0.18590770417795283, "mean_pred_prob_last_25": 0.11328438581258524, "mean_pred_prob_last_50": 0.06964989991683979, "mean_token_accuracy": 0.8732577383518219, "step": 50200 }, { "epoch": 0.8925746182425827, "grad_norm": 3.2969473150710797, "learning_rate": 0.0001, "loss": 0.8057, "mean_abs_error": 410.0643569326462, "mean_abs_error_last_10": 66.00443532712785, "mean_abs_error_last_25": 139.04207639206078, "mean_abs_error_last_50": 249.6515418015875, "mean_pred_prob": 0.027238332945853472, "mean_pred_prob_last_10": 0.1437148153781891, "mean_pred_prob_last_25": 0.07604350559413434, "mean_pred_prob_last_50": 0.0461946071125567, "mean_token_accuracy": 0.8673474550247192, "step": 50210 }, { "epoch": 0.8927523865393846, "grad_norm": 2.4036254121098306, "learning_rate": 0.0001, "loss": 0.7008, "mean_abs_error": 338.2482098630097, "mean_abs_error_last_10": 43.02739716341328, "mean_abs_error_last_25": 86.15420455372245, "mean_abs_error_last_50": 187.207906711572, "mean_pred_prob": 0.03564787786453962, "mean_pred_prob_last_10": 0.19606793150305749, "mean_pred_prob_last_25": 0.10810358058661222, "mean_pred_prob_last_50": 0.06231262926012278, "mean_token_accuracy": 0.8765773773193359, "step": 50220 }, { "epoch": 0.8929301548361865, "grad_norm": 2.7781604945738057, "learning_rate": 0.0001, "loss": 0.9083, "mean_abs_error": 773.0025918629298, "mean_abs_error_last_10": 442.60912498088254, "mean_abs_error_last_25": 487.12171522106553, "mean_abs_error_last_50": 588.1858192565568, "mean_pred_prob": 0.020195812871679663, "mean_pred_prob_last_10": 0.10712573722703382, "mean_pred_prob_last_25": 0.057652496924856675, "mean_pred_prob_last_50": 0.034110975911607964, "mean_token_accuracy": 0.8710870742797852, "step": 50230 }, { "epoch": 0.8931079231329885, "grad_norm": 1.4145379696880023, "learning_rate": 0.0001, "loss": 0.629, "mean_abs_error": 702.2406989254271, "mean_abs_error_last_10": 327.01696512245894, "mean_abs_error_last_25": 377.9448077060119, "mean_abs_error_last_50": 506.5690476397555, "mean_pred_prob": 0.04359848726599012, "mean_pred_prob_last_10": 0.20947212455212139, "mean_pred_prob_last_25": 0.11789354553911835, "mean_pred_prob_last_50": 0.07278409154969268, "mean_token_accuracy": 0.8845182836055756, "step": 50240 }, { "epoch": 0.8932856914297904, "grad_norm": 1.46728486417261, "learning_rate": 0.0001, "loss": 0.5914, "mean_abs_error": 128.61155675183076, "mean_abs_error_last_10": 53.00318927682889, "mean_abs_error_last_25": 68.38758096179, "mean_abs_error_last_50": 89.03517763195666, "mean_pred_prob": 0.040380124002695084, "mean_pred_prob_last_10": 0.2080768894404173, "mean_pred_prob_last_25": 0.1084380878135562, "mean_pred_prob_last_50": 0.06630324395373463, "mean_token_accuracy": 0.8780866801738739, "step": 50250 }, { "epoch": 0.8934634597265924, "grad_norm": 1.8664983227128678, "learning_rate": 0.0001, "loss": 0.6567, "mean_abs_error": 554.3500641979385, "mean_abs_error_last_10": 127.31197620826906, "mean_abs_error_last_25": 218.32619571020513, "mean_abs_error_last_50": 348.6144205740287, "mean_pred_prob": 0.03249359899200499, "mean_pred_prob_last_10": 0.16015201471745968, "mean_pred_prob_last_25": 0.089825848210603, "mean_pred_prob_last_50": 0.05455122776329517, "mean_token_accuracy": 0.8731098711490631, "step": 50260 }, { "epoch": 0.8936412280233943, "grad_norm": 1.1821773360591716, "learning_rate": 0.0001, "loss": 0.6493, "mean_abs_error": 292.7990503851286, "mean_abs_error_last_10": 47.544312489010224, "mean_abs_error_last_25": 85.81771369962617, "mean_abs_error_last_50": 158.3611656091027, "mean_pred_prob": 0.03326749256812036, "mean_pred_prob_last_10": 0.16851569823920726, "mean_pred_prob_last_25": 0.09179294481873512, "mean_pred_prob_last_50": 0.05482044294476509, "mean_token_accuracy": 0.87754585146904, "step": 50270 }, { "epoch": 0.8938189963201962, "grad_norm": 1.1014011427327655, "learning_rate": 0.0001, "loss": 0.7287, "mean_abs_error": 558.553173008485, "mean_abs_error_last_10": 206.26891218385109, "mean_abs_error_last_25": 309.7393087810831, "mean_abs_error_last_50": 361.7974036557478, "mean_pred_prob": 0.04512859428068623, "mean_pred_prob_last_10": 0.21501995036378502, "mean_pred_prob_last_25": 0.1185399592621252, "mean_pred_prob_last_50": 0.07470909878611565, "mean_token_accuracy": 0.8660681188106537, "step": 50280 }, { "epoch": 0.8939967646169982, "grad_norm": 1.2644496815515538, "learning_rate": 0.0001, "loss": 0.686, "mean_abs_error": 348.94768299773875, "mean_abs_error_last_10": 66.91877012620816, "mean_abs_error_last_25": 138.37795644859133, "mean_abs_error_last_50": 187.80517310855714, "mean_pred_prob": 0.043498222145717594, "mean_pred_prob_last_10": 0.19367423108778895, "mean_pred_prob_last_25": 0.11537508424371481, "mean_pred_prob_last_50": 0.0720956623321399, "mean_token_accuracy": 0.8686371088027954, "step": 50290 }, { "epoch": 0.8941745329138001, "grad_norm": 2.115097781189935, "learning_rate": 0.0001, "loss": 0.7299, "mean_abs_error": 447.0782431247174, "mean_abs_error_last_10": 69.56475546438506, "mean_abs_error_last_25": 138.7202412484333, "mean_abs_error_last_50": 213.07119737348916, "mean_pred_prob": 0.04368795303744264, "mean_pred_prob_last_10": 0.21440856095869093, "mean_pred_prob_last_25": 0.12065536270383745, "mean_pred_prob_last_50": 0.07377959099831059, "mean_token_accuracy": 0.8681550562381745, "step": 50300 }, { "epoch": 0.894352301210602, "grad_norm": 1.4848215818593908, "learning_rate": 0.0001, "loss": 0.6141, "mean_abs_error": 336.43236520087265, "mean_abs_error_last_10": 52.179117301572795, "mean_abs_error_last_25": 108.52302787044626, "mean_abs_error_last_50": 179.70869311296423, "mean_pred_prob": 0.05064727682620287, "mean_pred_prob_last_10": 0.22453794907778502, "mean_pred_prob_last_25": 0.13885911935940384, "mean_pred_prob_last_50": 0.08539507980458438, "mean_token_accuracy": 0.8757264256477356, "step": 50310 }, { "epoch": 0.8945300695074041, "grad_norm": 1.2687737908683514, "learning_rate": 0.0001, "loss": 0.6545, "mean_abs_error": 341.20603472891736, "mean_abs_error_last_10": 34.4644413234003, "mean_abs_error_last_25": 184.76092001990713, "mean_abs_error_last_50": 247.96898813091735, "mean_pred_prob": 0.05026721539907157, "mean_pred_prob_last_10": 0.23268004395067693, "mean_pred_prob_last_25": 0.1394685672596097, "mean_pred_prob_last_50": 0.08498534196987748, "mean_token_accuracy": 0.8805442750453949, "step": 50320 }, { "epoch": 0.894707837804206, "grad_norm": 1.3084739952784186, "learning_rate": 0.0001, "loss": 0.6722, "mean_abs_error": 582.2636477942133, "mean_abs_error_last_10": 272.42215320428227, "mean_abs_error_last_25": 287.0514965851865, "mean_abs_error_last_50": 359.11788518038594, "mean_pred_prob": 0.04509947827318683, "mean_pred_prob_last_10": 0.2319081814493984, "mean_pred_prob_last_25": 0.12502826940035447, "mean_pred_prob_last_50": 0.07505697787273675, "mean_token_accuracy": 0.8681577324867249, "step": 50330 }, { "epoch": 0.894885606101008, "grad_norm": 1.4435185658418137, "learning_rate": 0.0001, "loss": 0.931, "mean_abs_error": 244.85269310049912, "mean_abs_error_last_10": 62.53050875395284, "mean_abs_error_last_25": 112.57264171430911, "mean_abs_error_last_50": 141.23850950607303, "mean_pred_prob": 0.04651581854559481, "mean_pred_prob_last_10": 0.22263222970068455, "mean_pred_prob_last_25": 0.1245327565819025, "mean_pred_prob_last_50": 0.07647640202194453, "mean_token_accuracy": 0.8665130019187928, "step": 50340 }, { "epoch": 0.8950633743978099, "grad_norm": 1.9242155951335878, "learning_rate": 0.0001, "loss": 0.7931, "mean_abs_error": 448.97033139054736, "mean_abs_error_last_10": 102.98680974652689, "mean_abs_error_last_25": 125.25852011718828, "mean_abs_error_last_50": 189.34688571455087, "mean_pred_prob": 0.03889119749655947, "mean_pred_prob_last_10": 0.20604225967545062, "mean_pred_prob_last_25": 0.11030528682749718, "mean_pred_prob_last_50": 0.06574871735647321, "mean_token_accuracy": 0.8698725461959839, "step": 50350 }, { "epoch": 0.8952411426946119, "grad_norm": 1.5060465662899185, "learning_rate": 0.0001, "loss": 0.6253, "mean_abs_error": 353.5810664288323, "mean_abs_error_last_10": 47.41111028586979, "mean_abs_error_last_25": 77.98241367687078, "mean_abs_error_last_50": 173.342911192033, "mean_pred_prob": 0.055956721189431846, "mean_pred_prob_last_10": 0.28663118220865724, "mean_pred_prob_last_25": 0.1553691151086241, "mean_pred_prob_last_50": 0.09315473018214107, "mean_token_accuracy": 0.8752955973148346, "step": 50360 }, { "epoch": 0.8954189109914138, "grad_norm": 1.5683012338681608, "learning_rate": 0.0001, "loss": 0.7457, "mean_abs_error": 606.9288120741367, "mean_abs_error_last_10": 252.68701763043083, "mean_abs_error_last_25": 333.04232609078684, "mean_abs_error_last_50": 370.7450130929745, "mean_pred_prob": 0.02133204519050196, "mean_pred_prob_last_10": 0.11935195410624147, "mean_pred_prob_last_25": 0.060177582234609873, "mean_pred_prob_last_50": 0.03593410652829334, "mean_token_accuracy": 0.8646041631698609, "step": 50370 }, { "epoch": 0.8955966792882157, "grad_norm": 1.6475675439110635, "learning_rate": 0.0001, "loss": 0.6628, "mean_abs_error": 133.48396340317578, "mean_abs_error_last_10": 10.73102745176665, "mean_abs_error_last_25": 37.72560586220207, "mean_abs_error_last_50": 94.57335720572986, "mean_pred_prob": 0.06314294431358576, "mean_pred_prob_last_10": 0.3155749708414078, "mean_pred_prob_last_25": 0.1802846947684884, "mean_pred_prob_last_50": 0.10831964984536172, "mean_token_accuracy": 0.8659599125385284, "step": 50380 }, { "epoch": 0.8957744475850177, "grad_norm": 2.3962731830552464, "learning_rate": 0.0001, "loss": 0.815, "mean_abs_error": 506.94283955779866, "mean_abs_error_last_10": 289.5261333459484, "mean_abs_error_last_25": 366.7335691551033, "mean_abs_error_last_50": 357.6266629287667, "mean_pred_prob": 0.04845997203374282, "mean_pred_prob_last_10": 0.20930362606886774, "mean_pred_prob_last_25": 0.12520592510700226, "mean_pred_prob_last_50": 0.07867453808430583, "mean_token_accuracy": 0.8642622888088226, "step": 50390 }, { "epoch": 0.8959522158818196, "grad_norm": 1.4949635958306111, "learning_rate": 0.0001, "loss": 0.7605, "mean_abs_error": 428.1744761797119, "mean_abs_error_last_10": 127.01711716753644, "mean_abs_error_last_25": 152.83676929949132, "mean_abs_error_last_50": 221.67160754283572, "mean_pred_prob": 0.04598985152551904, "mean_pred_prob_last_10": 0.2142015689983964, "mean_pred_prob_last_25": 0.1229171009734273, "mean_pred_prob_last_50": 0.076587865059264, "mean_token_accuracy": 0.8785944104194641, "step": 50400 }, { "epoch": 0.8961299841786216, "grad_norm": 0.9717110530024407, "learning_rate": 0.0001, "loss": 0.8728, "mean_abs_error": 280.195583060485, "mean_abs_error_last_10": 45.91004592794981, "mean_abs_error_last_25": 71.44845268204762, "mean_abs_error_last_50": 124.78030897605245, "mean_pred_prob": 0.061980458442121744, "mean_pred_prob_last_10": 0.267961335927248, "mean_pred_prob_last_25": 0.15984779205173255, "mean_pred_prob_last_50": 0.10084877340123058, "mean_token_accuracy": 0.87755366563797, "step": 50410 }, { "epoch": 0.8963077524754235, "grad_norm": 0.9034824822760453, "learning_rate": 0.0001, "loss": 0.7214, "mean_abs_error": 321.13021014494745, "mean_abs_error_last_10": 126.93351176060887, "mean_abs_error_last_25": 142.928454647838, "mean_abs_error_last_50": 222.4322100062114, "mean_pred_prob": 0.04275940943043679, "mean_pred_prob_last_10": 0.2075270799919963, "mean_pred_prob_last_25": 0.11224651774391532, "mean_pred_prob_last_50": 0.06993114724755287, "mean_token_accuracy": 0.8725375950336456, "step": 50420 }, { "epoch": 0.8964855207722254, "grad_norm": 1.7176094571295188, "learning_rate": 0.0001, "loss": 0.6541, "mean_abs_error": 231.5117181925581, "mean_abs_error_last_10": 54.52235022977409, "mean_abs_error_last_25": 114.23531906464905, "mean_abs_error_last_50": 137.6035523854852, "mean_pred_prob": 0.04455462009645998, "mean_pred_prob_last_10": 0.22704444099217652, "mean_pred_prob_last_25": 0.12181086391210556, "mean_pred_prob_last_50": 0.07497427435591816, "mean_token_accuracy": 0.8715495288372039, "step": 50430 }, { "epoch": 0.8966632890690275, "grad_norm": 0.9161261126940226, "learning_rate": 0.0001, "loss": 0.6299, "mean_abs_error": 1149.4691226621821, "mean_abs_error_last_10": 534.6795210285591, "mean_abs_error_last_25": 680.6636246967437, "mean_abs_error_last_50": 865.5137705425786, "mean_pred_prob": 0.02041885346698109, "mean_pred_prob_last_10": 0.10493254386819899, "mean_pred_prob_last_25": 0.054251071458566, "mean_pred_prob_last_50": 0.03309271014295519, "mean_token_accuracy": 0.8744852721691132, "step": 50440 }, { "epoch": 0.8968410573658294, "grad_norm": 2.325970281702622, "learning_rate": 0.0001, "loss": 0.6723, "mean_abs_error": 349.62274316930603, "mean_abs_error_last_10": 85.95363596005305, "mean_abs_error_last_25": 141.1247013717794, "mean_abs_error_last_50": 186.74220558548058, "mean_pred_prob": 0.03640614205505699, "mean_pred_prob_last_10": 0.1840278532356024, "mean_pred_prob_last_25": 0.10450666132383049, "mean_pred_prob_last_50": 0.06315103692468256, "mean_token_accuracy": 0.8673092782497406, "step": 50450 }, { "epoch": 0.8970188256626314, "grad_norm": 2.060786882615215, "learning_rate": 0.0001, "loss": 0.7316, "mean_abs_error": 601.815744037239, "mean_abs_error_last_10": 203.16016995548384, "mean_abs_error_last_25": 318.6674163278201, "mean_abs_error_last_50": 440.20300978929697, "mean_pred_prob": 0.03390183919109404, "mean_pred_prob_last_10": 0.15425344203831629, "mean_pred_prob_last_25": 0.09194688542047516, "mean_pred_prob_last_50": 0.05588514003902674, "mean_token_accuracy": 0.8616739094257355, "step": 50460 }, { "epoch": 0.8971965939594333, "grad_norm": 2.2687959063519227, "learning_rate": 0.0001, "loss": 0.7618, "mean_abs_error": 244.1202663202816, "mean_abs_error_last_10": 47.843055902955896, "mean_abs_error_last_25": 89.42315300247158, "mean_abs_error_last_50": 145.94895309954458, "mean_pred_prob": 0.05089102070778608, "mean_pred_prob_last_10": 0.2260524094104767, "mean_pred_prob_last_25": 0.1347985382191837, "mean_pred_prob_last_50": 0.0832556439563632, "mean_token_accuracy": 0.8713031888008118, "step": 50470 }, { "epoch": 0.8973743622562352, "grad_norm": 1.6406855107186777, "learning_rate": 0.0001, "loss": 0.6265, "mean_abs_error": 254.7622425094588, "mean_abs_error_last_10": 95.3747880143269, "mean_abs_error_last_25": 139.62508245400872, "mean_abs_error_last_50": 196.75951160534146, "mean_pred_prob": 0.02655106191523373, "mean_pred_prob_last_10": 0.14181245379149915, "mean_pred_prob_last_25": 0.07333680279552937, "mean_pred_prob_last_50": 0.04389353971928358, "mean_token_accuracy": 0.8764877915382385, "step": 50480 }, { "epoch": 0.8975521305530372, "grad_norm": 1.277339122996495, "learning_rate": 0.0001, "loss": 0.6361, "mean_abs_error": 573.985299733362, "mean_abs_error_last_10": 153.26144033730577, "mean_abs_error_last_25": 205.4269035371446, "mean_abs_error_last_50": 341.7738561367025, "mean_pred_prob": 0.039916730689583346, "mean_pred_prob_last_10": 0.17905306807369925, "mean_pred_prob_last_25": 0.1044918700994458, "mean_pred_prob_last_50": 0.06571119402651675, "mean_token_accuracy": 0.8759285569190979, "step": 50490 }, { "epoch": 0.8977298988498391, "grad_norm": 1.3498227619243162, "learning_rate": 0.0001, "loss": 0.7428, "mean_abs_error": 310.2668248536432, "mean_abs_error_last_10": 87.46156832109496, "mean_abs_error_last_25": 97.78551659633993, "mean_abs_error_last_50": 140.04919587261648, "mean_pred_prob": 0.05110263987444341, "mean_pred_prob_last_10": 0.2361582987010479, "mean_pred_prob_last_25": 0.13669794853776693, "mean_pred_prob_last_50": 0.08504402237012983, "mean_token_accuracy": 0.8633095264434815, "step": 50500 }, { "epoch": 0.897907667146641, "grad_norm": 1.8259977851168097, "learning_rate": 0.0001, "loss": 0.7099, "mean_abs_error": 124.8209498354468, "mean_abs_error_last_10": 25.41568966582769, "mean_abs_error_last_25": 41.09438330809722, "mean_abs_error_last_50": 75.36986218138497, "mean_pred_prob": 0.05263575473800301, "mean_pred_prob_last_10": 0.2715245492756367, "mean_pred_prob_last_25": 0.1472278844565153, "mean_pred_prob_last_50": 0.08736093640327454, "mean_token_accuracy": 0.8680102407932282, "step": 50510 }, { "epoch": 0.898085435443443, "grad_norm": 1.9790458946900369, "learning_rate": 0.0001, "loss": 0.6119, "mean_abs_error": 581.5951115400574, "mean_abs_error_last_10": 239.601275623634, "mean_abs_error_last_25": 295.7712656669066, "mean_abs_error_last_50": 360.9885983786015, "mean_pred_prob": 0.026645169232506306, "mean_pred_prob_last_10": 0.13326563781592995, "mean_pred_prob_last_25": 0.07552665823604912, "mean_pred_prob_last_50": 0.04515223919879645, "mean_token_accuracy": 0.8729977965354919, "step": 50520 }, { "epoch": 0.8982632037402449, "grad_norm": 2.510191786238105, "learning_rate": 0.0001, "loss": 0.6739, "mean_abs_error": 1010.2122916249695, "mean_abs_error_last_10": 466.012262085774, "mean_abs_error_last_25": 624.3382183460665, "mean_abs_error_last_50": 783.1105703789617, "mean_pred_prob": 0.049957430025824576, "mean_pred_prob_last_10": 0.21028042901307345, "mean_pred_prob_last_25": 0.12768176380195656, "mean_pred_prob_last_50": 0.08228880277019926, "mean_token_accuracy": 0.8747627198696136, "step": 50530 }, { "epoch": 0.8984409720370469, "grad_norm": 1.0873279789975898, "learning_rate": 0.0001, "loss": 0.5953, "mean_abs_error": 232.07843270509835, "mean_abs_error_last_10": 56.4999019587523, "mean_abs_error_last_25": 110.5435937947133, "mean_abs_error_last_50": 154.2712373268026, "mean_pred_prob": 0.0494170596357435, "mean_pred_prob_last_10": 0.2148362984880805, "mean_pred_prob_last_25": 0.1299552947282791, "mean_pred_prob_last_50": 0.08123040748760105, "mean_token_accuracy": 0.8773203253746032, "step": 50540 }, { "epoch": 0.8986187403338488, "grad_norm": 1.8377225355827826, "learning_rate": 0.0001, "loss": 0.7599, "mean_abs_error": 238.48125213213666, "mean_abs_error_last_10": 67.14647960572697, "mean_abs_error_last_25": 98.8276083601509, "mean_abs_error_last_50": 157.7288141540416, "mean_pred_prob": 0.04108078377321363, "mean_pred_prob_last_10": 0.2202954389154911, "mean_pred_prob_last_25": 0.11927482914179563, "mean_pred_prob_last_50": 0.06955082034692169, "mean_token_accuracy": 0.8844235002994537, "step": 50550 }, { "epoch": 0.8987965086306509, "grad_norm": 1.354447977859944, "learning_rate": 0.0001, "loss": 0.7754, "mean_abs_error": 465.10797663854345, "mean_abs_error_last_10": 118.17487665628224, "mean_abs_error_last_25": 180.64451308924453, "mean_abs_error_last_50": 284.54351742315583, "mean_pred_prob": 0.030843862536130473, "mean_pred_prob_last_10": 0.15922086600912735, "mean_pred_prob_last_25": 0.08413714780472219, "mean_pred_prob_last_50": 0.050992060377029705, "mean_token_accuracy": 0.8735741078853607, "step": 50560 }, { "epoch": 0.8989742769274528, "grad_norm": 2.319472992018966, "learning_rate": 0.0001, "loss": 0.7357, "mean_abs_error": 739.6576335209386, "mean_abs_error_last_10": 659.543840492004, "mean_abs_error_last_25": 738.2329082186579, "mean_abs_error_last_50": 707.3904392133212, "mean_pred_prob": 0.030926217458909378, "mean_pred_prob_last_10": 0.13433670641388745, "mean_pred_prob_last_25": 0.08230210075853392, "mean_pred_prob_last_50": 0.04964828595984727, "mean_token_accuracy": 0.8720863699913025, "step": 50570 }, { "epoch": 0.8991520452242547, "grad_norm": 1.5693960483777807, "learning_rate": 0.0001, "loss": 0.733, "mean_abs_error": 1019.493384595489, "mean_abs_error_last_10": 569.5260520381379, "mean_abs_error_last_25": 640.9220278899268, "mean_abs_error_last_50": 734.927155439812, "mean_pred_prob": 0.02928053116920637, "mean_pred_prob_last_10": 0.14865543218911625, "mean_pred_prob_last_25": 0.0838766425877111, "mean_pred_prob_last_50": 0.04983120044053067, "mean_token_accuracy": 0.8687586188316345, "step": 50580 }, { "epoch": 0.8993298135210567, "grad_norm": 1.8406554243349176, "learning_rate": 0.0001, "loss": 0.8052, "mean_abs_error": 354.63726961110206, "mean_abs_error_last_10": 183.01108402997252, "mean_abs_error_last_25": 187.58431091931772, "mean_abs_error_last_50": 220.5732996587099, "mean_pred_prob": 0.03529298972571269, "mean_pred_prob_last_10": 0.15957392214331775, "mean_pred_prob_last_25": 0.09176016352139413, "mean_pred_prob_last_50": 0.058482035622000696, "mean_token_accuracy": 0.8709194779396057, "step": 50590 }, { "epoch": 0.8995075818178586, "grad_norm": 2.330110827628597, "learning_rate": 0.0001, "loss": 0.6529, "mean_abs_error": 217.53295969090647, "mean_abs_error_last_10": 30.455870209866372, "mean_abs_error_last_25": 56.59302613898866, "mean_abs_error_last_50": 92.68639460006546, "mean_pred_prob": 0.0519945663632825, "mean_pred_prob_last_10": 0.24485209677368402, "mean_pred_prob_last_25": 0.1381495429202914, "mean_pred_prob_last_50": 0.08689798205159605, "mean_token_accuracy": 0.8772820651531219, "step": 50600 }, { "epoch": 0.8996853501146606, "grad_norm": 1.1559841216546314, "learning_rate": 0.0001, "loss": 0.6782, "mean_abs_error": 273.5848044944049, "mean_abs_error_last_10": 84.5744281214975, "mean_abs_error_last_25": 149.43777379148153, "mean_abs_error_last_50": 204.1990677983154, "mean_pred_prob": 0.054171904316172005, "mean_pred_prob_last_10": 0.2220332046970725, "mean_pred_prob_last_25": 0.135381364217028, "mean_pred_prob_last_50": 0.08611497282981873, "mean_token_accuracy": 0.8763929784297944, "step": 50610 }, { "epoch": 0.8998631184114625, "grad_norm": 2.7251526094492706, "learning_rate": 0.0001, "loss": 0.6276, "mean_abs_error": 67.22246771870628, "mean_abs_error_last_10": 14.421843054131907, "mean_abs_error_last_25": 22.247288962192943, "mean_abs_error_last_50": 41.3446929941757, "mean_pred_prob": 0.06996214715763927, "mean_pred_prob_last_10": 0.3056946747004986, "mean_pred_prob_last_25": 0.1823767639696598, "mean_pred_prob_last_50": 0.11495415847748518, "mean_token_accuracy": 0.878241342306137, "step": 50620 }, { "epoch": 0.9000408867082644, "grad_norm": 2.1453099145956718, "learning_rate": 0.0001, "loss": 0.7238, "mean_abs_error": 939.9961201167914, "mean_abs_error_last_10": 315.4935832737616, "mean_abs_error_last_25": 426.0377156971398, "mean_abs_error_last_50": 568.3332258894889, "mean_pred_prob": 0.02686619790911209, "mean_pred_prob_last_10": 0.1354686495848, "mean_pred_prob_last_25": 0.07423293249448762, "mean_pred_prob_last_50": 0.0449274227314163, "mean_token_accuracy": 0.8649392902851105, "step": 50630 }, { "epoch": 0.9002186550050664, "grad_norm": 2.099113607431079, "learning_rate": 0.0001, "loss": 0.6597, "mean_abs_error": 547.675046458064, "mean_abs_error_last_10": 215.28915877684707, "mean_abs_error_last_25": 298.8378935945772, "mean_abs_error_last_50": 420.56789681131715, "mean_pred_prob": 0.03911022700485774, "mean_pred_prob_last_10": 0.17671194137074053, "mean_pred_prob_last_25": 0.10266975878621451, "mean_pred_prob_last_50": 0.06407717124675401, "mean_token_accuracy": 0.8708640575408936, "step": 50640 }, { "epoch": 0.9003964233018683, "grad_norm": 1.0579956450003478, "learning_rate": 0.0001, "loss": 0.6859, "mean_abs_error": 430.34772326520323, "mean_abs_error_last_10": 153.41775959222755, "mean_abs_error_last_25": 162.883341451435, "mean_abs_error_last_50": 257.35090546965176, "mean_pred_prob": 0.03973855290096253, "mean_pred_prob_last_10": 0.18630788438022136, "mean_pred_prob_last_25": 0.10802369145676494, "mean_pred_prob_last_50": 0.0659284770488739, "mean_token_accuracy": 0.8765370011329651, "step": 50650 }, { "epoch": 0.9005741915986702, "grad_norm": 2.410250026560324, "learning_rate": 0.0001, "loss": 0.6756, "mean_abs_error": 704.1102628375427, "mean_abs_error_last_10": 273.95797451825297, "mean_abs_error_last_25": 359.4444097305565, "mean_abs_error_last_50": 436.339876150734, "mean_pred_prob": 0.03469703797018155, "mean_pred_prob_last_10": 0.16190407504327595, "mean_pred_prob_last_25": 0.09483989827567711, "mean_pred_prob_last_50": 0.05791478713508695, "mean_token_accuracy": 0.8748893022537232, "step": 50660 }, { "epoch": 0.9007519598954722, "grad_norm": 1.142676200155839, "learning_rate": 0.0001, "loss": 0.7634, "mean_abs_error": 217.3510521112558, "mean_abs_error_last_10": 46.88979442582468, "mean_abs_error_last_25": 71.85001972850935, "mean_abs_error_last_50": 122.38372690487674, "mean_pred_prob": 0.061033785436302425, "mean_pred_prob_last_10": 0.28629602380096913, "mean_pred_prob_last_25": 0.16578016690909864, "mean_pred_prob_last_50": 0.10076094446703791, "mean_token_accuracy": 0.8729355335235596, "step": 50670 }, { "epoch": 0.9009297281922742, "grad_norm": 1.6836947159570588, "learning_rate": 0.0001, "loss": 0.6082, "mean_abs_error": 287.6802503427947, "mean_abs_error_last_10": 88.6948336973953, "mean_abs_error_last_25": 128.03518095146381, "mean_abs_error_last_50": 162.36000708023045, "mean_pred_prob": 0.04395751403644681, "mean_pred_prob_last_10": 0.2127227303571999, "mean_pred_prob_last_25": 0.12483332133851946, "mean_pred_prob_last_50": 0.0751732790376991, "mean_token_accuracy": 0.8700244903564454, "step": 50680 }, { "epoch": 0.9011074964890762, "grad_norm": 1.205339386871394, "learning_rate": 0.0001, "loss": 0.9142, "mean_abs_error": 672.8984520256296, "mean_abs_error_last_10": 99.40036430428611, "mean_abs_error_last_25": 189.70166978287432, "mean_abs_error_last_50": 360.5034080237596, "mean_pred_prob": 0.03884091316722334, "mean_pred_prob_last_10": 0.19887892827391623, "mean_pred_prob_last_25": 0.11401785202324391, "mean_pred_prob_last_50": 0.06844617193564773, "mean_token_accuracy": 0.8714790582656861, "step": 50690 }, { "epoch": 0.9012852647858781, "grad_norm": 1.5350228245348678, "learning_rate": 0.0001, "loss": 0.6612, "mean_abs_error": 643.5602355962804, "mean_abs_error_last_10": 221.04712862955407, "mean_abs_error_last_25": 301.7258574290894, "mean_abs_error_last_50": 421.74399245198236, "mean_pred_prob": 0.026391796534880996, "mean_pred_prob_last_10": 0.13869818691164254, "mean_pred_prob_last_25": 0.07318692919798195, "mean_pred_prob_last_50": 0.044711018726229665, "mean_token_accuracy": 0.8687493920326232, "step": 50700 }, { "epoch": 0.90146303308268, "grad_norm": 2.3387135808688275, "learning_rate": 0.0001, "loss": 0.7361, "mean_abs_error": 449.2887844733182, "mean_abs_error_last_10": 127.35647306687845, "mean_abs_error_last_25": 193.47221958796237, "mean_abs_error_last_50": 275.0159052493103, "mean_pred_prob": 0.02959357821382582, "mean_pred_prob_last_10": 0.14502689633518456, "mean_pred_prob_last_25": 0.08231543339788913, "mean_pred_prob_last_50": 0.049257423542439935, "mean_token_accuracy": 0.8753108501434326, "step": 50710 }, { "epoch": 0.901640801379482, "grad_norm": 1.569810842587197, "learning_rate": 0.0001, "loss": 0.8306, "mean_abs_error": 407.056844750104, "mean_abs_error_last_10": 117.13692364920776, "mean_abs_error_last_25": 158.3728666047593, "mean_abs_error_last_50": 249.17956453388086, "mean_pred_prob": 0.03081885785795748, "mean_pred_prob_last_10": 0.14518313817679881, "mean_pred_prob_last_25": 0.08124920036643743, "mean_pred_prob_last_50": 0.05087878229096532, "mean_token_accuracy": 0.8727608144283294, "step": 50720 }, { "epoch": 0.9018185696762839, "grad_norm": 1.623632816548815, "learning_rate": 0.0001, "loss": 0.7865, "mean_abs_error": 797.8806194660764, "mean_abs_error_last_10": 406.24817907286484, "mean_abs_error_last_25": 522.5384029059148, "mean_abs_error_last_50": 582.2525380480041, "mean_pred_prob": 0.024698892212472855, "mean_pred_prob_last_10": 0.1210723041149322, "mean_pred_prob_last_25": 0.06623655637376942, "mean_pred_prob_last_50": 0.04077174600679427, "mean_token_accuracy": 0.8684132695198059, "step": 50730 }, { "epoch": 0.9019963379730859, "grad_norm": 1.666183677451282, "learning_rate": 0.0001, "loss": 0.6657, "mean_abs_error": 367.85021719096153, "mean_abs_error_last_10": 75.11878303010519, "mean_abs_error_last_25": 120.88000280055819, "mean_abs_error_last_50": 216.4599516318744, "mean_pred_prob": 0.026796829933300615, "mean_pred_prob_last_10": 0.13165123090147973, "mean_pred_prob_last_25": 0.07274882774800062, "mean_pred_prob_last_50": 0.04470522543415427, "mean_token_accuracy": 0.8695530533790589, "step": 50740 }, { "epoch": 0.9021741062698878, "grad_norm": 1.3381125231704354, "learning_rate": 0.0001, "loss": 0.7366, "mean_abs_error": 648.5922963580119, "mean_abs_error_last_10": 195.5783013908802, "mean_abs_error_last_25": 248.8714154524674, "mean_abs_error_last_50": 378.10960134273785, "mean_pred_prob": 0.038439012179151175, "mean_pred_prob_last_10": 0.18868640454020352, "mean_pred_prob_last_25": 0.10260426534805447, "mean_pred_prob_last_50": 0.06257683848962188, "mean_token_accuracy": 0.8727829992771149, "step": 50750 }, { "epoch": 0.9023518745666897, "grad_norm": 1.354591512948065, "learning_rate": 0.0001, "loss": 0.7087, "mean_abs_error": 318.21366021683286, "mean_abs_error_last_10": 90.97982858289754, "mean_abs_error_last_25": 158.13717249104198, "mean_abs_error_last_50": 221.6090195014659, "mean_pred_prob": 0.05515357656404376, "mean_pred_prob_last_10": 0.24987288787961007, "mean_pred_prob_last_25": 0.14641189351677894, "mean_pred_prob_last_50": 0.0911654339171946, "mean_token_accuracy": 0.8569900035858155, "step": 50760 }, { "epoch": 0.9025296428634917, "grad_norm": 1.7731391347890753, "learning_rate": 0.0001, "loss": 0.6253, "mean_abs_error": 716.0547872432987, "mean_abs_error_last_10": 219.26440760160844, "mean_abs_error_last_25": 279.3283654004515, "mean_abs_error_last_50": 425.5821781217269, "mean_pred_prob": 0.0345092227042187, "mean_pred_prob_last_10": 0.14443927454994993, "mean_pred_prob_last_25": 0.08675622490700334, "mean_pred_prob_last_50": 0.05493959341547452, "mean_token_accuracy": 0.8738495945930481, "step": 50770 }, { "epoch": 0.9027074111602936, "grad_norm": 1.3301882169125803, "learning_rate": 0.0001, "loss": 0.695, "mean_abs_error": 295.16211346205546, "mean_abs_error_last_10": 62.80022796873487, "mean_abs_error_last_25": 77.57555499840049, "mean_abs_error_last_50": 148.13197656016328, "mean_pred_prob": 0.047733190143480896, "mean_pred_prob_last_10": 0.2205290637910366, "mean_pred_prob_last_25": 0.1312330319546163, "mean_pred_prob_last_50": 0.08122346471063793, "mean_token_accuracy": 0.8758995234966278, "step": 50780 }, { "epoch": 0.9028851794570957, "grad_norm": 2.153997674438065, "learning_rate": 0.0001, "loss": 0.7101, "mean_abs_error": 1439.4325681019118, "mean_abs_error_last_10": 527.279426454017, "mean_abs_error_last_25": 688.7357030113471, "mean_abs_error_last_50": 925.0380401403827, "mean_pred_prob": 0.03957906307914527, "mean_pred_prob_last_10": 0.18948777447512838, "mean_pred_prob_last_25": 0.10507723368355074, "mean_pred_prob_last_50": 0.06592254103452433, "mean_token_accuracy": 0.8732541620731353, "step": 50790 }, { "epoch": 0.9030629477538976, "grad_norm": 1.1007324750599972, "learning_rate": 0.0001, "loss": 0.7279, "mean_abs_error": 589.50958505521, "mean_abs_error_last_10": 217.5250322844603, "mean_abs_error_last_25": 289.8935163356244, "mean_abs_error_last_50": 463.2337296465891, "mean_pred_prob": 0.03736204069573432, "mean_pred_prob_last_10": 0.18553207200020552, "mean_pred_prob_last_25": 0.10235502747818828, "mean_pred_prob_last_50": 0.0631677526049316, "mean_token_accuracy": 0.8751447856426239, "step": 50800 }, { "epoch": 0.9032407160506996, "grad_norm": 2.0481079874679082, "learning_rate": 0.0001, "loss": 0.7156, "mean_abs_error": 175.24641241515744, "mean_abs_error_last_10": 38.83457091605302, "mean_abs_error_last_25": 66.42466844919778, "mean_abs_error_last_50": 117.04651408009548, "mean_pred_prob": 0.05996257178485394, "mean_pred_prob_last_10": 0.2667907830327749, "mean_pred_prob_last_25": 0.16004383210092782, "mean_pred_prob_last_50": 0.09959829589352012, "mean_token_accuracy": 0.8818376898765564, "step": 50810 }, { "epoch": 0.9034184843475015, "grad_norm": 1.176857307519184, "learning_rate": 0.0001, "loss": 0.7682, "mean_abs_error": 387.0257235069386, "mean_abs_error_last_10": 70.76533419558334, "mean_abs_error_last_25": 105.47907529735099, "mean_abs_error_last_50": 204.21108121104768, "mean_pred_prob": 0.04783112268196419, "mean_pred_prob_last_10": 0.23178676315583288, "mean_pred_prob_last_25": 0.1317256791051477, "mean_pred_prob_last_50": 0.07993379163090139, "mean_token_accuracy": 0.8637499451637268, "step": 50820 }, { "epoch": 0.9035962526443034, "grad_norm": 1.5863848524356186, "learning_rate": 0.0001, "loss": 0.712, "mean_abs_error": 358.00153468425566, "mean_abs_error_last_10": 195.83395071929212, "mean_abs_error_last_25": 207.9320030092491, "mean_abs_error_last_50": 221.51380152698516, "mean_pred_prob": 0.047736779088154435, "mean_pred_prob_last_10": 0.2407972775399685, "mean_pred_prob_last_25": 0.128958042524755, "mean_pred_prob_last_50": 0.07790856771171092, "mean_token_accuracy": 0.874071615934372, "step": 50830 }, { "epoch": 0.9037740209411054, "grad_norm": 1.6669152273719774, "learning_rate": 0.0001, "loss": 0.6924, "mean_abs_error": 950.6922331203701, "mean_abs_error_last_10": 577.0951545812143, "mean_abs_error_last_25": 618.1253537127942, "mean_abs_error_last_50": 722.3290832452301, "mean_pred_prob": 0.036286179412854835, "mean_pred_prob_last_10": 0.17137818837654778, "mean_pred_prob_last_25": 0.09710488351120147, "mean_pred_prob_last_50": 0.06005897658324102, "mean_token_accuracy": 0.8712578058242798, "step": 50840 }, { "epoch": 0.9039517892379073, "grad_norm": 1.759679951799492, "learning_rate": 0.0001, "loss": 0.7101, "mean_abs_error": 393.12671848299414, "mean_abs_error_last_10": 219.5826171663676, "mean_abs_error_last_25": 228.54750115421479, "mean_abs_error_last_50": 289.3552401861929, "mean_pred_prob": 0.04789633939508349, "mean_pred_prob_last_10": 0.22764574121683837, "mean_pred_prob_last_25": 0.1297186279669404, "mean_pred_prob_last_50": 0.07927275570109486, "mean_token_accuracy": 0.8707992672920227, "step": 50850 }, { "epoch": 0.9041295575347092, "grad_norm": 1.0954746913290068, "learning_rate": 0.0001, "loss": 0.6272, "mean_abs_error": 853.1959796009917, "mean_abs_error_last_10": 333.55652000797437, "mean_abs_error_last_25": 394.50677609760197, "mean_abs_error_last_50": 511.46668478384663, "mean_pred_prob": 0.03322412382403854, "mean_pred_prob_last_10": 0.15943258775514552, "mean_pred_prob_last_25": 0.08934546290547588, "mean_pred_prob_last_50": 0.055748881358886136, "mean_token_accuracy": 0.8772008359432221, "step": 50860 }, { "epoch": 0.9043073258315112, "grad_norm": 1.2775245376718551, "learning_rate": 0.0001, "loss": 0.7874, "mean_abs_error": 640.2976750683611, "mean_abs_error_last_10": 116.1469446927126, "mean_abs_error_last_25": 194.4061566798657, "mean_abs_error_last_50": 325.2590635812823, "mean_pred_prob": 0.02266111853532493, "mean_pred_prob_last_10": 0.10856394991278648, "mean_pred_prob_last_25": 0.06322593940421939, "mean_pred_prob_last_50": 0.039053648710250854, "mean_token_accuracy": 0.8669895648956298, "step": 50870 }, { "epoch": 0.9044850941283131, "grad_norm": 0.8874789241582854, "learning_rate": 0.0001, "loss": 0.6454, "mean_abs_error": 200.14817902605813, "mean_abs_error_last_10": 69.23972979268888, "mean_abs_error_last_25": 107.8693364557734, "mean_abs_error_last_50": 151.75600616549954, "mean_pred_prob": 0.05264224377460778, "mean_pred_prob_last_10": 0.26015731133520603, "mean_pred_prob_last_25": 0.14194664042443036, "mean_pred_prob_last_50": 0.08763981349766255, "mean_token_accuracy": 0.8843398928642273, "step": 50880 }, { "epoch": 0.9046628624251151, "grad_norm": 1.7961026409157022, "learning_rate": 0.0001, "loss": 0.7515, "mean_abs_error": 628.667653750828, "mean_abs_error_last_10": 347.3301841510033, "mean_abs_error_last_25": 457.1751707339066, "mean_abs_error_last_50": 523.5726582181067, "mean_pred_prob": 0.01851311548380181, "mean_pred_prob_last_10": 0.08643262316472829, "mean_pred_prob_last_25": 0.048747923341579735, "mean_pred_prob_last_50": 0.030016235751099886, "mean_token_accuracy": 0.8671968221664429, "step": 50890 }, { "epoch": 0.904840630721917, "grad_norm": 1.2228075534140135, "learning_rate": 0.0001, "loss": 0.8304, "mean_abs_error": 660.4348918355547, "mean_abs_error_last_10": 268.2644899619787, "mean_abs_error_last_25": 330.66637730058267, "mean_abs_error_last_50": 437.65101999238516, "mean_pred_prob": 0.05241038908425253, "mean_pred_prob_last_10": 0.25900833978666926, "mean_pred_prob_last_25": 0.1455583535193, "mean_pred_prob_last_50": 0.0886028068838641, "mean_token_accuracy": 0.8801218271255493, "step": 50900 }, { "epoch": 0.905018399018719, "grad_norm": 2.0039496006487525, "learning_rate": 0.0001, "loss": 0.6956, "mean_abs_error": 614.3008162283584, "mean_abs_error_last_10": 139.17041795945323, "mean_abs_error_last_25": 255.465745541022, "mean_abs_error_last_50": 402.2796953356499, "mean_pred_prob": 0.02553781138267368, "mean_pred_prob_last_10": 0.12685316167771815, "mean_pred_prob_last_25": 0.06801000367850066, "mean_pred_prob_last_50": 0.04193015233613551, "mean_token_accuracy": 0.8708633601665496, "step": 50910 }, { "epoch": 0.905196167315521, "grad_norm": 1.7170509163902046, "learning_rate": 0.0001, "loss": 0.7675, "mean_abs_error": 1339.5161363569428, "mean_abs_error_last_10": 682.0041373714529, "mean_abs_error_last_25": 897.8397046807855, "mean_abs_error_last_50": 1091.5846978597885, "mean_pred_prob": 0.03485165701713413, "mean_pred_prob_last_10": 0.1620658533356618, "mean_pred_prob_last_25": 0.09726478712400422, "mean_pred_prob_last_50": 0.05803141653595958, "mean_token_accuracy": 0.8792457222938538, "step": 50920 }, { "epoch": 0.9053739356123229, "grad_norm": 1.862921458304067, "learning_rate": 0.0001, "loss": 0.6202, "mean_abs_error": 599.3214927848826, "mean_abs_error_last_10": 240.0497095219996, "mean_abs_error_last_25": 322.1729247620062, "mean_abs_error_last_50": 437.56361946727094, "mean_pred_prob": 0.038502058284939265, "mean_pred_prob_last_10": 0.1859557416231837, "mean_pred_prob_last_25": 0.10421343937632628, "mean_pred_prob_last_50": 0.06426525714341551, "mean_token_accuracy": 0.8782886266708374, "step": 50930 }, { "epoch": 0.9055517039091249, "grad_norm": 1.0520831119995246, "learning_rate": 0.0001, "loss": 0.6107, "mean_abs_error": 225.58452783703038, "mean_abs_error_last_10": 70.43733233263671, "mean_abs_error_last_25": 134.75593956982817, "mean_abs_error_last_50": 177.07430099239596, "mean_pred_prob": 0.043260404467582704, "mean_pred_prob_last_10": 0.19729389138519765, "mean_pred_prob_last_25": 0.11277829464524984, "mean_pred_prob_last_50": 0.0706405290402472, "mean_token_accuracy": 0.8813311696052551, "step": 50940 }, { "epoch": 0.9057294722059268, "grad_norm": 1.385513322888621, "learning_rate": 0.0001, "loss": 0.7176, "mean_abs_error": 581.6889508081953, "mean_abs_error_last_10": 182.1126210443131, "mean_abs_error_last_25": 290.20857076945305, "mean_abs_error_last_50": 437.99415904273064, "mean_pred_prob": 0.02604307868168689, "mean_pred_prob_last_10": 0.13417256340617315, "mean_pred_prob_last_25": 0.07286170306615532, "mean_pred_prob_last_50": 0.043826652434654535, "mean_token_accuracy": 0.8699989080429077, "step": 50950 }, { "epoch": 0.9059072405027288, "grad_norm": 1.4365243275820094, "learning_rate": 0.0001, "loss": 0.6381, "mean_abs_error": 266.3183435489656, "mean_abs_error_last_10": 92.01038299433725, "mean_abs_error_last_25": 115.8114283212365, "mean_abs_error_last_50": 144.79300429228346, "mean_pred_prob": 0.04032102497294545, "mean_pred_prob_last_10": 0.1897557284682989, "mean_pred_prob_last_25": 0.10859299469739199, "mean_pred_prob_last_50": 0.06671499423682689, "mean_token_accuracy": 0.8770342350006104, "step": 50960 }, { "epoch": 0.9060850087995307, "grad_norm": 1.3515972486255359, "learning_rate": 0.0001, "loss": 0.6303, "mean_abs_error": 515.5931595070231, "mean_abs_error_last_10": 227.348428023241, "mean_abs_error_last_25": 253.71286338319322, "mean_abs_error_last_50": 301.7294533675732, "mean_pred_prob": 0.05002772472798824, "mean_pred_prob_last_10": 0.2227589674293995, "mean_pred_prob_last_25": 0.13834437649929895, "mean_pred_prob_last_50": 0.0841252934304066, "mean_token_accuracy": 0.8712055861949921, "step": 50970 }, { "epoch": 0.9062627770963326, "grad_norm": 1.5093168542638824, "learning_rate": 0.0001, "loss": 0.7035, "mean_abs_error": 720.1896890223292, "mean_abs_error_last_10": 198.39229788205316, "mean_abs_error_last_25": 279.8367700583807, "mean_abs_error_last_50": 442.94937222616056, "mean_pred_prob": 0.037987578587490134, "mean_pred_prob_last_10": 0.19300634042592718, "mean_pred_prob_last_25": 0.1070095238799695, "mean_pred_prob_last_50": 0.06405560175771825, "mean_token_accuracy": 0.8818853259086609, "step": 50980 }, { "epoch": 0.9064405453931346, "grad_norm": 0.8575221208050486, "learning_rate": 0.0001, "loss": 0.6489, "mean_abs_error": 421.4998188070833, "mean_abs_error_last_10": 244.00519628963406, "mean_abs_error_last_25": 240.15120262354952, "mean_abs_error_last_50": 302.05054760182855, "mean_pred_prob": 0.03952921699965373, "mean_pred_prob_last_10": 0.20137327659176663, "mean_pred_prob_last_25": 0.10991886029951274, "mean_pred_prob_last_50": 0.06663255639141426, "mean_token_accuracy": 0.8840280890464782, "step": 50990 }, { "epoch": 0.9066183136899365, "grad_norm": 2.2951217434417344, "learning_rate": 0.0001, "loss": 0.6781, "mean_abs_error": 222.6258382778987, "mean_abs_error_last_10": 58.68645414747059, "mean_abs_error_last_25": 64.98451524539232, "mean_abs_error_last_50": 104.73336054953752, "mean_pred_prob": 0.04752649120055139, "mean_pred_prob_last_10": 0.24476355612277984, "mean_pred_prob_last_25": 0.13288378361612557, "mean_pred_prob_last_50": 0.08065409725531936, "mean_token_accuracy": 0.8675595700740815, "step": 51000 }, { "epoch": 0.9067960819867384, "grad_norm": 0.8065193493320492, "learning_rate": 0.0001, "loss": 0.5808, "mean_abs_error": 258.54948208394126, "mean_abs_error_last_10": 103.87107134512924, "mean_abs_error_last_25": 148.0081263022553, "mean_abs_error_last_50": 161.89873769019877, "mean_pred_prob": 0.03586162980645895, "mean_pred_prob_last_10": 0.18659569919109345, "mean_pred_prob_last_25": 0.1007949311286211, "mean_pred_prob_last_50": 0.060693818517029284, "mean_token_accuracy": 0.8697472751140595, "step": 51010 }, { "epoch": 0.9069738502835404, "grad_norm": 1.5584786628915253, "learning_rate": 0.0001, "loss": 0.6715, "mean_abs_error": 470.7384011303402, "mean_abs_error_last_10": 233.2172969142724, "mean_abs_error_last_25": 275.7318733013487, "mean_abs_error_last_50": 312.77960160382617, "mean_pred_prob": 0.042658472125185656, "mean_pred_prob_last_10": 0.21013032798655332, "mean_pred_prob_last_25": 0.11581214210018516, "mean_pred_prob_last_50": 0.07073742194334046, "mean_token_accuracy": 0.8674613058567047, "step": 51020 }, { "epoch": 0.9071516185803424, "grad_norm": 1.7383293798352957, "learning_rate": 0.0001, "loss": 0.736, "mean_abs_error": 1373.9746773834283, "mean_abs_error_last_10": 592.2949511867489, "mean_abs_error_last_25": 660.4504552881228, "mean_abs_error_last_50": 907.4010359577987, "mean_pred_prob": 0.016807723534293474, "mean_pred_prob_last_10": 0.08822213093808387, "mean_pred_prob_last_25": 0.047636261416482736, "mean_pred_prob_last_50": 0.02890417140442878, "mean_token_accuracy": 0.8744429171085357, "step": 51030 }, { "epoch": 0.9073293868771444, "grad_norm": 2.1667450863601467, "learning_rate": 0.0001, "loss": 0.7481, "mean_abs_error": 275.2964052189776, "mean_abs_error_last_10": 64.62726994961947, "mean_abs_error_last_25": 113.8283055505319, "mean_abs_error_last_50": 228.5719391781066, "mean_pred_prob": 0.04597140364348888, "mean_pred_prob_last_10": 0.2220597133040428, "mean_pred_prob_last_25": 0.12499251682311296, "mean_pred_prob_last_50": 0.07564727924764156, "mean_token_accuracy": 0.8781964421272278, "step": 51040 }, { "epoch": 0.9075071551739463, "grad_norm": 1.3264250131929463, "learning_rate": 0.0001, "loss": 0.796, "mean_abs_error": 738.1232037532034, "mean_abs_error_last_10": 279.5806164864126, "mean_abs_error_last_25": 315.3198628197304, "mean_abs_error_last_50": 367.9698913287674, "mean_pred_prob": 0.034051496512256564, "mean_pred_prob_last_10": 0.1719806816894561, "mean_pred_prob_last_25": 0.092090286873281, "mean_pred_prob_last_50": 0.05642333481227979, "mean_token_accuracy": 0.869133597612381, "step": 51050 }, { "epoch": 0.9076849234707483, "grad_norm": 2.002888103655598, "learning_rate": 0.0001, "loss": 0.6437, "mean_abs_error": 583.5558000195458, "mean_abs_error_last_10": 188.80940757398247, "mean_abs_error_last_25": 282.40768006789216, "mean_abs_error_last_50": 377.4353994326725, "mean_pred_prob": 0.05502142100594938, "mean_pred_prob_last_10": 0.2599047321244143, "mean_pred_prob_last_25": 0.14855330286663956, "mean_pred_prob_last_50": 0.09179926507058553, "mean_token_accuracy": 0.8687551915645599, "step": 51060 }, { "epoch": 0.9078626917675502, "grad_norm": 1.1705978499767509, "learning_rate": 0.0001, "loss": 0.6672, "mean_abs_error": 377.62544578771224, "mean_abs_error_last_10": 149.22737931388104, "mean_abs_error_last_25": 215.8792658801432, "mean_abs_error_last_50": 297.461444980678, "mean_pred_prob": 0.03276338167488575, "mean_pred_prob_last_10": 0.1540049623697996, "mean_pred_prob_last_25": 0.0854410793632269, "mean_pred_prob_last_50": 0.05373084777966142, "mean_token_accuracy": 0.8760227143764496, "step": 51070 }, { "epoch": 0.9080404600643521, "grad_norm": 1.5032012327695303, "learning_rate": 0.0001, "loss": 0.8305, "mean_abs_error": 248.8992370145022, "mean_abs_error_last_10": 63.90137504953244, "mean_abs_error_last_25": 105.59462276417125, "mean_abs_error_last_50": 163.52061420028764, "mean_pred_prob": 0.05065157818607986, "mean_pred_prob_last_10": 0.23400439526885747, "mean_pred_prob_last_25": 0.13965280037373304, "mean_pred_prob_last_50": 0.08574669118970632, "mean_token_accuracy": 0.8704986870288849, "step": 51080 }, { "epoch": 0.9082182283611541, "grad_norm": 1.2647143654917181, "learning_rate": 0.0001, "loss": 0.6738, "mean_abs_error": 154.63951512735554, "mean_abs_error_last_10": 54.31598030787277, "mean_abs_error_last_25": 88.89512254663863, "mean_abs_error_last_50": 110.42598902486773, "mean_pred_prob": 0.06174935712479055, "mean_pred_prob_last_10": 0.29708139691501856, "mean_pred_prob_last_25": 0.1672733413055539, "mean_pred_prob_last_50": 0.10376134999096394, "mean_token_accuracy": 0.8719251751899719, "step": 51090 }, { "epoch": 0.908395996657956, "grad_norm": 1.7161652631392932, "learning_rate": 0.0001, "loss": 0.6856, "mean_abs_error": 778.5271107535389, "mean_abs_error_last_10": 472.0194585877287, "mean_abs_error_last_25": 546.960846480998, "mean_abs_error_last_50": 609.4333123027924, "mean_pred_prob": 0.055306133022531866, "mean_pred_prob_last_10": 0.2724064690788509, "mean_pred_prob_last_25": 0.15304599439550656, "mean_pred_prob_last_50": 0.09188970000832342, "mean_token_accuracy": 0.882754123210907, "step": 51100 }, { "epoch": 0.908573764954758, "grad_norm": 1.1455416692575888, "learning_rate": 0.0001, "loss": 0.5915, "mean_abs_error": 57.358334588853346, "mean_abs_error_last_10": 7.2555990062955855, "mean_abs_error_last_25": 20.56472903693332, "mean_abs_error_last_50": 39.67354535403342, "mean_pred_prob": 0.07083251569420099, "mean_pred_prob_last_10": 0.3409458190202713, "mean_pred_prob_last_25": 0.18837452456355094, "mean_pred_prob_last_50": 0.11666331030428409, "mean_token_accuracy": 0.8744138181209564, "step": 51110 }, { "epoch": 0.9087515332515599, "grad_norm": 2.1067226808151425, "learning_rate": 0.0001, "loss": 0.6732, "mean_abs_error": 490.08831518484703, "mean_abs_error_last_10": 262.41730838046044, "mean_abs_error_last_25": 281.7760020841777, "mean_abs_error_last_50": 379.93581340116975, "mean_pred_prob": 0.06084984049666673, "mean_pred_prob_last_10": 0.2884475440718234, "mean_pred_prob_last_25": 0.1716358107049018, "mean_pred_prob_last_50": 0.1032019887585193, "mean_token_accuracy": 0.8888244509696961, "step": 51120 }, { "epoch": 0.9089293015483618, "grad_norm": 1.7101742456243916, "learning_rate": 0.0001, "loss": 0.658, "mean_abs_error": 506.76179560999424, "mean_abs_error_last_10": 107.935831914232, "mean_abs_error_last_25": 170.18946575521062, "mean_abs_error_last_50": 276.4304813967113, "mean_pred_prob": 0.03854854895034805, "mean_pred_prob_last_10": 0.19415688631124794, "mean_pred_prob_last_25": 0.1045436403946951, "mean_pred_prob_last_50": 0.06506222033058293, "mean_token_accuracy": 0.876698476076126, "step": 51130 }, { "epoch": 0.9091070698451638, "grad_norm": 2.1806295510907114, "learning_rate": 0.0001, "loss": 0.7942, "mean_abs_error": 1031.6217787934204, "mean_abs_error_last_10": 655.9621377354031, "mean_abs_error_last_25": 691.5087913780915, "mean_abs_error_last_50": 748.7462925185923, "mean_pred_prob": 0.029135072091594338, "mean_pred_prob_last_10": 0.1421391358366236, "mean_pred_prob_last_25": 0.07673719477170379, "mean_pred_prob_last_50": 0.04848252125811996, "mean_token_accuracy": 0.8702479898929596, "step": 51140 }, { "epoch": 0.9092848381419658, "grad_norm": 1.2406397264163818, "learning_rate": 0.0001, "loss": 0.7599, "mean_abs_error": 956.7221050250512, "mean_abs_error_last_10": 575.0151820944386, "mean_abs_error_last_25": 614.6215132058974, "mean_abs_error_last_50": 719.5071288128377, "mean_pred_prob": 0.03860899230639916, "mean_pred_prob_last_10": 0.19931509527086747, "mean_pred_prob_last_25": 0.10928519561130087, "mean_pred_prob_last_50": 0.06499660437402781, "mean_token_accuracy": 0.8683984994888305, "step": 51150 }, { "epoch": 0.9094626064387678, "grad_norm": 2.019592171003753, "learning_rate": 0.0001, "loss": 0.7109, "mean_abs_error": 496.0881281221564, "mean_abs_error_last_10": 241.44949874970226, "mean_abs_error_last_25": 251.72224154653813, "mean_abs_error_last_50": 332.82983836806693, "mean_pred_prob": 0.0288324860855937, "mean_pred_prob_last_10": 0.14109693598002196, "mean_pred_prob_last_25": 0.07491108402609825, "mean_pred_prob_last_50": 0.04720133570954203, "mean_token_accuracy": 0.8669462084770203, "step": 51160 }, { "epoch": 0.9096403747355697, "grad_norm": 1.76522399701, "learning_rate": 0.0001, "loss": 0.6316, "mean_abs_error": 746.8130608479505, "mean_abs_error_last_10": 275.97396524184484, "mean_abs_error_last_25": 371.29480738292114, "mean_abs_error_last_50": 475.27922993789025, "mean_pred_prob": 0.05033498545817565, "mean_pred_prob_last_10": 0.2088605605706107, "mean_pred_prob_last_25": 0.12462142137228512, "mean_pred_prob_last_50": 0.08021914617856965, "mean_token_accuracy": 0.8597575008869172, "step": 51170 }, { "epoch": 0.9098181430323716, "grad_norm": 1.4670763541021241, "learning_rate": 0.0001, "loss": 0.6908, "mean_abs_error": 1111.7078703072143, "mean_abs_error_last_10": 650.0018177966597, "mean_abs_error_last_25": 702.0014137428295, "mean_abs_error_last_50": 814.5516357258487, "mean_pred_prob": 0.02216596235812176, "mean_pred_prob_last_10": 0.10250636560376733, "mean_pred_prob_last_25": 0.056876441402710046, "mean_pred_prob_last_50": 0.03635294625419192, "mean_token_accuracy": 0.8540120482444763, "step": 51180 }, { "epoch": 0.9099959113291736, "grad_norm": 2.370225382535769, "learning_rate": 0.0001, "loss": 0.7148, "mean_abs_error": 497.7073530940581, "mean_abs_error_last_10": 312.4572453428308, "mean_abs_error_last_25": 306.19505051732506, "mean_abs_error_last_50": 302.6519722550273, "mean_pred_prob": 0.04435750728007406, "mean_pred_prob_last_10": 0.20828746855258942, "mean_pred_prob_last_25": 0.11648909635841846, "mean_pred_prob_last_50": 0.07362840226851404, "mean_token_accuracy": 0.8849582731723785, "step": 51190 }, { "epoch": 0.9101736796259755, "grad_norm": 1.3884268505176387, "learning_rate": 0.0001, "loss": 0.779, "mean_abs_error": 1290.0435393423177, "mean_abs_error_last_10": 828.7166111782293, "mean_abs_error_last_25": 1003.1833475063625, "mean_abs_error_last_50": 1085.3978170964342, "mean_pred_prob": 0.03353167733148439, "mean_pred_prob_last_10": 0.15777935642108787, "mean_pred_prob_last_25": 0.09158076021121815, "mean_pred_prob_last_50": 0.055098642685334195, "mean_token_accuracy": 0.8642694175243377, "step": 51200 }, { "epoch": 0.9103514479227774, "grad_norm": 2.8293674782104667, "learning_rate": 0.0001, "loss": 0.8714, "mean_abs_error": 868.3599304839245, "mean_abs_error_last_10": 442.0143512442992, "mean_abs_error_last_25": 569.839259478792, "mean_abs_error_last_50": 651.0708324705636, "mean_pred_prob": 0.03484569682914298, "mean_pred_prob_last_10": 0.17175177543540485, "mean_pred_prob_last_25": 0.09162945464777295, "mean_pred_prob_last_50": 0.05731699423631653, "mean_token_accuracy": 0.8664839744567872, "step": 51210 }, { "epoch": 0.9105292162195794, "grad_norm": 1.4613595516945304, "learning_rate": 0.0001, "loss": 0.6819, "mean_abs_error": 280.8136102779855, "mean_abs_error_last_10": 124.48547975629239, "mean_abs_error_last_25": 107.25986227846678, "mean_abs_error_last_50": 159.9999491400759, "mean_pred_prob": 0.05012470085639507, "mean_pred_prob_last_10": 0.2351003156043589, "mean_pred_prob_last_25": 0.12959017958492042, "mean_pred_prob_last_50": 0.0811831234022975, "mean_token_accuracy": 0.8686532318592072, "step": 51220 }, { "epoch": 0.9107069845163813, "grad_norm": 1.3654573528778933, "learning_rate": 0.0001, "loss": 0.6085, "mean_abs_error": 1131.3876759454392, "mean_abs_error_last_10": 741.5957161530866, "mean_abs_error_last_25": 842.2617402005847, "mean_abs_error_last_50": 889.4929178680246, "mean_pred_prob": 0.0481925912637962, "mean_pred_prob_last_10": 0.20740349893749227, "mean_pred_prob_last_25": 0.12081731445505284, "mean_pred_prob_last_50": 0.07709811902604997, "mean_token_accuracy": 0.8712657690048218, "step": 51230 }, { "epoch": 0.9108847528131833, "grad_norm": 1.9124323577712221, "learning_rate": 0.0001, "loss": 0.7799, "mean_abs_error": 518.7272467993729, "mean_abs_error_last_10": 116.14897770282775, "mean_abs_error_last_25": 168.63663049717056, "mean_abs_error_last_50": 290.50353967873934, "mean_pred_prob": 0.04602139033377171, "mean_pred_prob_last_10": 0.20954495649784805, "mean_pred_prob_last_25": 0.12256024349480868, "mean_pred_prob_last_50": 0.07589354212395846, "mean_token_accuracy": 0.8739831626415253, "step": 51240 }, { "epoch": 0.9110625211099852, "grad_norm": 1.2318975624152484, "learning_rate": 0.0001, "loss": 0.618, "mean_abs_error": 421.3585246211609, "mean_abs_error_last_10": 149.35992933043664, "mean_abs_error_last_25": 201.29283847664712, "mean_abs_error_last_50": 269.9243785145147, "mean_pred_prob": 0.04164021089673042, "mean_pred_prob_last_10": 0.20519629201153294, "mean_pred_prob_last_25": 0.11324200257658959, "mean_pred_prob_last_50": 0.06849396051838993, "mean_token_accuracy": 0.863268256187439, "step": 51250 }, { "epoch": 0.9112402894067871, "grad_norm": 1.4709586569486501, "learning_rate": 0.0001, "loss": 0.5509, "mean_abs_error": 277.6236630379127, "mean_abs_error_last_10": 132.13810951139098, "mean_abs_error_last_25": 112.04691402242364, "mean_abs_error_last_50": 125.72061662305293, "mean_pred_prob": 0.04903151215985417, "mean_pred_prob_last_10": 0.23370541948825121, "mean_pred_prob_last_25": 0.13009404605254532, "mean_pred_prob_last_50": 0.08133639870211481, "mean_token_accuracy": 0.8736862182617188, "step": 51260 }, { "epoch": 0.9114180577035892, "grad_norm": 1.35306118227047, "learning_rate": 0.0001, "loss": 0.5775, "mean_abs_error": 358.06472183574994, "mean_abs_error_last_10": 190.2853640519738, "mean_abs_error_last_25": 185.4252357255977, "mean_abs_error_last_50": 241.13203984630513, "mean_pred_prob": 0.05376577517017722, "mean_pred_prob_last_10": 0.2537016237387434, "mean_pred_prob_last_25": 0.14410409738775343, "mean_pred_prob_last_50": 0.08927449131151662, "mean_token_accuracy": 0.8711185038089753, "step": 51270 }, { "epoch": 0.9115958260003911, "grad_norm": 2.3244484059884596, "learning_rate": 0.0001, "loss": 0.6394, "mean_abs_error": 476.36065108149944, "mean_abs_error_last_10": 99.0853386324701, "mean_abs_error_last_25": 181.35348331003215, "mean_abs_error_last_50": 282.4405642498712, "mean_pred_prob": 0.046347837831126525, "mean_pred_prob_last_10": 0.18953689434565604, "mean_pred_prob_last_25": 0.1160631651058793, "mean_pred_prob_last_50": 0.07512335308128967, "mean_token_accuracy": 0.8737462639808655, "step": 51280 }, { "epoch": 0.9117735942971931, "grad_norm": 0.8652324334840494, "learning_rate": 0.0001, "loss": 0.7029, "mean_abs_error": 313.3172985086047, "mean_abs_error_last_10": 129.4601015594273, "mean_abs_error_last_25": 198.7637651529219, "mean_abs_error_last_50": 233.22043409542675, "mean_pred_prob": 0.053163998387753965, "mean_pred_prob_last_10": 0.25108974874019624, "mean_pred_prob_last_25": 0.14091448625549674, "mean_pred_prob_last_50": 0.08722242270596325, "mean_token_accuracy": 0.8796429514884949, "step": 51290 }, { "epoch": 0.911951362593995, "grad_norm": 2.226654882021768, "learning_rate": 0.0001, "loss": 0.7001, "mean_abs_error": 441.0660499416864, "mean_abs_error_last_10": 113.70337752233365, "mean_abs_error_last_25": 199.8605590569999, "mean_abs_error_last_50": 249.16865779315043, "mean_pred_prob": 0.03191833905875683, "mean_pred_prob_last_10": 0.16146550951525568, "mean_pred_prob_last_25": 0.08470607176423073, "mean_pred_prob_last_50": 0.05270764571614563, "mean_token_accuracy": 0.8735200583934783, "step": 51300 }, { "epoch": 0.912129130890797, "grad_norm": 2.0237532557865205, "learning_rate": 0.0001, "loss": 0.8176, "mean_abs_error": 722.6889077048463, "mean_abs_error_last_10": 207.08014179874925, "mean_abs_error_last_25": 242.11789076456975, "mean_abs_error_last_50": 466.2478453133434, "mean_pred_prob": 0.03426897320896387, "mean_pred_prob_last_10": 0.1804429312585853, "mean_pred_prob_last_25": 0.10030518788844348, "mean_pred_prob_last_50": 0.05966282688314095, "mean_token_accuracy": 0.8659809231758118, "step": 51310 }, { "epoch": 0.9123068991875989, "grad_norm": 1.0345689771515922, "learning_rate": 0.0001, "loss": 0.7256, "mean_abs_error": 333.1989967593451, "mean_abs_error_last_10": 81.05635915407797, "mean_abs_error_last_25": 146.78455057634488, "mean_abs_error_last_50": 269.4845925564873, "mean_pred_prob": 0.037720845406875016, "mean_pred_prob_last_10": 0.17958225775510073, "mean_pred_prob_last_25": 0.09847299959510565, "mean_pred_prob_last_50": 0.061361717199906705, "mean_token_accuracy": 0.8733828246593476, "step": 51320 }, { "epoch": 0.9124846674844008, "grad_norm": 3.6532182611697483, "learning_rate": 0.0001, "loss": 0.7742, "mean_abs_error": 1814.0901425041034, "mean_abs_error_last_10": 791.6549990881069, "mean_abs_error_last_25": 971.8941186624224, "mean_abs_error_last_50": 1237.5016232474807, "mean_pred_prob": 0.030469400044239592, "mean_pred_prob_last_10": 0.15712016248435248, "mean_pred_prob_last_25": 0.08815611908794381, "mean_pred_prob_last_50": 0.05271393726579845, "mean_token_accuracy": 0.8754456520080567, "step": 51330 }, { "epoch": 0.9126624357812028, "grad_norm": 2.914982818863111, "learning_rate": 0.0001, "loss": 0.7435, "mean_abs_error": 390.24864894836963, "mean_abs_error_last_10": 34.166644543904745, "mean_abs_error_last_25": 105.48297609519162, "mean_abs_error_last_50": 247.4306354189179, "mean_pred_prob": 0.04472694445867091, "mean_pred_prob_last_10": 0.22419887445867062, "mean_pred_prob_last_25": 0.12186522325500845, "mean_pred_prob_last_50": 0.07475338107906282, "mean_token_accuracy": 0.88272265791893, "step": 51340 }, { "epoch": 0.9128402040780047, "grad_norm": 0.903190538917091, "learning_rate": 0.0001, "loss": 0.6747, "mean_abs_error": 578.3544073414571, "mean_abs_error_last_10": 193.14028202743899, "mean_abs_error_last_25": 234.66763953112178, "mean_abs_error_last_50": 348.88350346478194, "mean_pred_prob": 0.03365057150949724, "mean_pred_prob_last_10": 0.1685379080940038, "mean_pred_prob_last_25": 0.09488755642669275, "mean_pred_prob_last_50": 0.056958001013845204, "mean_token_accuracy": 0.8682097256183624, "step": 51350 }, { "epoch": 0.9130179723748066, "grad_norm": 1.5195081547906166, "learning_rate": 0.0001, "loss": 0.6633, "mean_abs_error": 168.87135898309333, "mean_abs_error_last_10": 59.20386445242574, "mean_abs_error_last_25": 60.501490657686944, "mean_abs_error_last_50": 105.93957398709033, "mean_pred_prob": 0.04762478144839406, "mean_pred_prob_last_10": 0.22472582533955573, "mean_pred_prob_last_25": 0.12454022988677024, "mean_pred_prob_last_50": 0.07798155732452869, "mean_token_accuracy": 0.8634229063987732, "step": 51360 }, { "epoch": 0.9131957406716086, "grad_norm": 2.521925359604472, "learning_rate": 0.0001, "loss": 0.733, "mean_abs_error": 233.6239250738501, "mean_abs_error_last_10": 72.18335743852894, "mean_abs_error_last_25": 81.99693492513134, "mean_abs_error_last_50": 135.94950344293147, "mean_pred_prob": 0.04562127934768796, "mean_pred_prob_last_10": 0.22064133435487748, "mean_pred_prob_last_25": 0.1224522665143013, "mean_pred_prob_last_50": 0.07552280789241195, "mean_token_accuracy": 0.8690452992916107, "step": 51370 }, { "epoch": 0.9133735089684105, "grad_norm": 1.8418183364333267, "learning_rate": 0.0001, "loss": 0.6161, "mean_abs_error": 674.0828646546901, "mean_abs_error_last_10": 120.49469116815278, "mean_abs_error_last_25": 201.25818425168742, "mean_abs_error_last_50": 361.4913732394261, "mean_pred_prob": 0.048170742142247036, "mean_pred_prob_last_10": 0.21818712456151843, "mean_pred_prob_last_25": 0.12895468837814406, "mean_pred_prob_last_50": 0.07976016369066201, "mean_token_accuracy": 0.8786248981952667, "step": 51380 }, { "epoch": 0.9135512772652126, "grad_norm": 1.1517992136120756, "learning_rate": 0.0001, "loss": 0.638, "mean_abs_error": 330.4334456356181, "mean_abs_error_last_10": 186.8610586940776, "mean_abs_error_last_25": 255.413244721554, "mean_abs_error_last_50": 273.7628645331777, "mean_pred_prob": 0.04047477841377258, "mean_pred_prob_last_10": 0.18969320151954888, "mean_pred_prob_last_25": 0.10871948152780533, "mean_pred_prob_last_50": 0.06763078095391392, "mean_token_accuracy": 0.8788450539112092, "step": 51390 }, { "epoch": 0.9137290455620145, "grad_norm": 1.0560453222747073, "learning_rate": 0.0001, "loss": 0.6182, "mean_abs_error": 105.70384628664367, "mean_abs_error_last_10": 45.701587952596924, "mean_abs_error_last_25": 44.56969191053274, "mean_abs_error_last_50": 61.00534605571263, "mean_pred_prob": 0.060864991834387185, "mean_pred_prob_last_10": 0.26650297474116086, "mean_pred_prob_last_25": 0.15682798223569988, "mean_pred_prob_last_50": 0.10013191225007176, "mean_token_accuracy": 0.8757061302661896, "step": 51400 }, { "epoch": 0.9139068138588164, "grad_norm": 1.453159702179939, "learning_rate": 0.0001, "loss": 0.69, "mean_abs_error": 1483.5779296718006, "mean_abs_error_last_10": 646.7596673700522, "mean_abs_error_last_25": 685.5062880024244, "mean_abs_error_last_50": 893.9652155193453, "mean_pred_prob": 0.02373252597899409, "mean_pred_prob_last_10": 0.1067762323829811, "mean_pred_prob_last_25": 0.06127939070574939, "mean_pred_prob_last_50": 0.039249092843965626, "mean_token_accuracy": 0.8703098356723785, "step": 51410 }, { "epoch": 0.9140845821556184, "grad_norm": 1.528862757677043, "learning_rate": 0.0001, "loss": 0.5981, "mean_abs_error": 187.444318118205, "mean_abs_error_last_10": 67.22493783727779, "mean_abs_error_last_25": 88.89133084839415, "mean_abs_error_last_50": 117.89444015553651, "mean_pred_prob": 0.055923882266506554, "mean_pred_prob_last_10": 0.24783450923860073, "mean_pred_prob_last_25": 0.1442733235657215, "mean_pred_prob_last_50": 0.09238629275932908, "mean_token_accuracy": 0.8808993816375732, "step": 51420 }, { "epoch": 0.9142623504524203, "grad_norm": 1.778194762446048, "learning_rate": 0.0001, "loss": 0.8849, "mean_abs_error": 1492.7741090039613, "mean_abs_error_last_10": 850.0442098062392, "mean_abs_error_last_25": 987.0017024017125, "mean_abs_error_last_50": 1147.9354913404306, "mean_pred_prob": 0.019983590509218628, "mean_pred_prob_last_10": 0.11472195415408351, "mean_pred_prob_last_25": 0.056899672566214574, "mean_pred_prob_last_50": 0.033754093368770555, "mean_token_accuracy": 0.8743927955627442, "step": 51430 }, { "epoch": 0.9144401187492223, "grad_norm": 1.1927401571714262, "learning_rate": 0.0001, "loss": 0.6885, "mean_abs_error": 564.6992136211851, "mean_abs_error_last_10": 231.25020983333934, "mean_abs_error_last_25": 335.0580843925717, "mean_abs_error_last_50": 434.1879427170131, "mean_pred_prob": 0.028113793442025782, "mean_pred_prob_last_10": 0.132117260445375, "mean_pred_prob_last_25": 0.07569573250948451, "mean_pred_prob_last_50": 0.04658145239809528, "mean_token_accuracy": 0.8622451424598694, "step": 51440 }, { "epoch": 0.9146178870460242, "grad_norm": 0.9464638031285402, "learning_rate": 0.0001, "loss": 0.6068, "mean_abs_error": 737.0255085297041, "mean_abs_error_last_10": 205.70908584341632, "mean_abs_error_last_25": 284.8370778831881, "mean_abs_error_last_50": 427.6698861438954, "mean_pred_prob": 0.03787610034341924, "mean_pred_prob_last_10": 0.17682847301475704, "mean_pred_prob_last_25": 0.10253532296046615, "mean_pred_prob_last_50": 0.06283326221164316, "mean_token_accuracy": 0.8768582224845887, "step": 51450 }, { "epoch": 0.9147956553428261, "grad_norm": 1.4303609787762255, "learning_rate": 0.0001, "loss": 0.7814, "mean_abs_error": 384.24483959212006, "mean_abs_error_last_10": 118.85474416117036, "mean_abs_error_last_25": 129.3352151910619, "mean_abs_error_last_50": 203.4419817894724, "mean_pred_prob": 0.03759120292961597, "mean_pred_prob_last_10": 0.18491807635873556, "mean_pred_prob_last_25": 0.10127258170396089, "mean_pred_prob_last_50": 0.06016753604635596, "mean_token_accuracy": 0.867396605014801, "step": 51460 }, { "epoch": 0.9149734236396281, "grad_norm": 2.4537422802015345, "learning_rate": 0.0001, "loss": 0.6836, "mean_abs_error": 700.6867185411899, "mean_abs_error_last_10": 194.6109182202642, "mean_abs_error_last_25": 268.3553427975288, "mean_abs_error_last_50": 366.81195808520414, "mean_pred_prob": 0.0334236339549534, "mean_pred_prob_last_10": 0.16761742532253265, "mean_pred_prob_last_25": 0.09324777638539672, "mean_pred_prob_last_50": 0.056192865583579985, "mean_token_accuracy": 0.868983405828476, "step": 51470 }, { "epoch": 0.91515119193643, "grad_norm": 1.4140363736196284, "learning_rate": 0.0001, "loss": 0.6305, "mean_abs_error": 306.50411972295484, "mean_abs_error_last_10": 86.03083645314862, "mean_abs_error_last_25": 139.70649806260207, "mean_abs_error_last_50": 209.69615169818684, "mean_pred_prob": 0.04276695023290813, "mean_pred_prob_last_10": 0.21399569250643252, "mean_pred_prob_last_25": 0.12280173804610968, "mean_pred_prob_last_50": 0.07335507720708848, "mean_token_accuracy": 0.8825060188770294, "step": 51480 }, { "epoch": 0.915328960233232, "grad_norm": 2.7219129099441233, "learning_rate": 0.0001, "loss": 0.6225, "mean_abs_error": 415.894898680646, "mean_abs_error_last_10": 248.17580755787282, "mean_abs_error_last_25": 264.8829611119331, "mean_abs_error_last_50": 301.3533602121438, "mean_pred_prob": 0.04585050430614501, "mean_pred_prob_last_10": 0.21449081217870117, "mean_pred_prob_last_25": 0.12406307412311435, "mean_pred_prob_last_50": 0.07811591303907335, "mean_token_accuracy": 0.8785408794879913, "step": 51490 }, { "epoch": 0.915506728530034, "grad_norm": 1.9750680453110716, "learning_rate": 0.0001, "loss": 0.7399, "mean_abs_error": 639.5841797661201, "mean_abs_error_last_10": 232.2672171031251, "mean_abs_error_last_25": 299.52836457786805, "mean_abs_error_last_50": 420.6021493938488, "mean_pred_prob": 0.01832023118622601, "mean_pred_prob_last_10": 0.09589541850727983, "mean_pred_prob_last_25": 0.05250019686645828, "mean_pred_prob_last_50": 0.03116698076482862, "mean_token_accuracy": 0.8749646723270417, "step": 51500 }, { "epoch": 0.915684496826836, "grad_norm": 1.890841895932244, "learning_rate": 0.0001, "loss": 0.6826, "mean_abs_error": 681.2209191919213, "mean_abs_error_last_10": 157.67061137726242, "mean_abs_error_last_25": 193.19422044584329, "mean_abs_error_last_50": 387.9995046757066, "mean_pred_prob": 0.032713979587424544, "mean_pred_prob_last_10": 0.1659045555163175, "mean_pred_prob_last_25": 0.0883597885724157, "mean_pred_prob_last_50": 0.054087974736467, "mean_token_accuracy": 0.8816655933856964, "step": 51510 }, { "epoch": 0.9158622651236379, "grad_norm": 1.1756407813454306, "learning_rate": 0.0001, "loss": 0.6065, "mean_abs_error": 465.3042636481021, "mean_abs_error_last_10": 313.7963511602101, "mean_abs_error_last_25": 253.1625246167436, "mean_abs_error_last_50": 301.4068423072687, "mean_pred_prob": 0.0405429842940066, "mean_pred_prob_last_10": 0.18436988597968593, "mean_pred_prob_last_25": 0.10674219749635086, "mean_pred_prob_last_50": 0.06628533418988809, "mean_token_accuracy": 0.8767907559871674, "step": 51520 }, { "epoch": 0.9160400334204398, "grad_norm": 2.592298391001183, "learning_rate": 0.0001, "loss": 0.6137, "mean_abs_error": 378.82740194487224, "mean_abs_error_last_10": 89.01801179967636, "mean_abs_error_last_25": 165.7148070304757, "mean_abs_error_last_50": 276.89278180701547, "mean_pred_prob": 0.0441141074988991, "mean_pred_prob_last_10": 0.21042520864866673, "mean_pred_prob_last_25": 0.12096922614146025, "mean_pred_prob_last_50": 0.0734802589053288, "mean_token_accuracy": 0.8647126019001007, "step": 51530 }, { "epoch": 0.9162178017172418, "grad_norm": 1.2789645087081414, "learning_rate": 0.0001, "loss": 0.6965, "mean_abs_error": 444.9648419893463, "mean_abs_error_last_10": 167.56952033759626, "mean_abs_error_last_25": 269.18362361247307, "mean_abs_error_last_50": 358.7204198472956, "mean_pred_prob": 0.029784380982164294, "mean_pred_prob_last_10": 0.1512016760185361, "mean_pred_prob_last_25": 0.08286708019440994, "mean_pred_prob_last_50": 0.050262056686915454, "mean_token_accuracy": 0.8626811742782593, "step": 51540 }, { "epoch": 0.9163955700140437, "grad_norm": 1.7078931136875692, "learning_rate": 0.0001, "loss": 0.6873, "mean_abs_error": 180.48870559640358, "mean_abs_error_last_10": 24.260431355787226, "mean_abs_error_last_25": 59.14421505844316, "mean_abs_error_last_50": 103.20739103404014, "mean_pred_prob": 0.05586376851424575, "mean_pred_prob_last_10": 0.2716406028717756, "mean_pred_prob_last_25": 0.15341152288019658, "mean_pred_prob_last_50": 0.09333824384957552, "mean_token_accuracy": 0.8706318259239196, "step": 51550 }, { "epoch": 0.9165733383108456, "grad_norm": 1.1483364714350317, "learning_rate": 0.0001, "loss": 0.641, "mean_abs_error": 807.5447466104481, "mean_abs_error_last_10": 387.13731398030575, "mean_abs_error_last_25": 485.93983347291453, "mean_abs_error_last_50": 574.1475475328723, "mean_pred_prob": 0.03590671160782222, "mean_pred_prob_last_10": 0.18457898153574206, "mean_pred_prob_last_25": 0.10018640026100911, "mean_pred_prob_last_50": 0.06053731247375253, "mean_token_accuracy": 0.8763810932636261, "step": 51560 }, { "epoch": 0.9167511066076476, "grad_norm": 2.395940081902904, "learning_rate": 0.0001, "loss": 0.8099, "mean_abs_error": 365.6314034177896, "mean_abs_error_last_10": 110.42809085921763, "mean_abs_error_last_25": 228.33528616593193, "mean_abs_error_last_50": 326.86581309351914, "mean_pred_prob": 0.04503958271816373, "mean_pred_prob_last_10": 0.2174011941999197, "mean_pred_prob_last_25": 0.12048687655478715, "mean_pred_prob_last_50": 0.07441843543201684, "mean_token_accuracy": 0.8705804824829102, "step": 51570 }, { "epoch": 0.9169288749044495, "grad_norm": 1.6229355910959966, "learning_rate": 0.0001, "loss": 0.7176, "mean_abs_error": 471.906415933619, "mean_abs_error_last_10": 304.38653953242823, "mean_abs_error_last_25": 343.79986172802165, "mean_abs_error_last_50": 357.0184572529869, "mean_pred_prob": 0.02384470828110352, "mean_pred_prob_last_10": 0.1260537587106228, "mean_pred_prob_last_25": 0.06759289370384067, "mean_pred_prob_last_50": 0.04040577306877822, "mean_token_accuracy": 0.8560757458209991, "step": 51580 }, { "epoch": 0.9171066432012515, "grad_norm": 2.1769634183481372, "learning_rate": 0.0001, "loss": 0.6896, "mean_abs_error": 876.8547007725974, "mean_abs_error_last_10": 499.6289990897755, "mean_abs_error_last_25": 592.2774319407379, "mean_abs_error_last_50": 643.7897708234124, "mean_pred_prob": 0.062286294568912125, "mean_pred_prob_last_10": 0.2600373767025303, "mean_pred_prob_last_25": 0.15812262594117782, "mean_pred_prob_last_50": 0.10207510284089949, "mean_token_accuracy": 0.8754710733890534, "step": 51590 }, { "epoch": 0.9172844114980534, "grad_norm": 1.6798850560820604, "learning_rate": 0.0001, "loss": 0.6296, "mean_abs_error": 450.64345997619887, "mean_abs_error_last_10": 134.23509559080412, "mean_abs_error_last_25": 227.46117613420046, "mean_abs_error_last_50": 283.7962374679746, "mean_pred_prob": 0.03441826829221099, "mean_pred_prob_last_10": 0.17076789829879999, "mean_pred_prob_last_25": 0.08875974463298916, "mean_pred_prob_last_50": 0.056180513883009556, "mean_token_accuracy": 0.87364262342453, "step": 51600 }, { "epoch": 0.9174621797948553, "grad_norm": 1.8477103807242086, "learning_rate": 0.0001, "loss": 0.6564, "mean_abs_error": 218.75238879904674, "mean_abs_error_last_10": 174.2774078597635, "mean_abs_error_last_25": 176.50370303111748, "mean_abs_error_last_50": 193.00317787270063, "mean_pred_prob": 0.055367062240839, "mean_pred_prob_last_10": 0.26497381217777727, "mean_pred_prob_last_25": 0.14882964761927725, "mean_pred_prob_last_50": 0.09116730811074376, "mean_token_accuracy": 0.8705266714096069, "step": 51610 }, { "epoch": 0.9176399480916574, "grad_norm": 1.2019473608850229, "learning_rate": 0.0001, "loss": 0.6135, "mean_abs_error": 892.5224670019691, "mean_abs_error_last_10": 400.18208853808136, "mean_abs_error_last_25": 465.5861923317134, "mean_abs_error_last_50": 583.0544301485126, "mean_pred_prob": 0.0468811122700572, "mean_pred_prob_last_10": 0.22361839771037922, "mean_pred_prob_last_25": 0.12669227276055608, "mean_pred_prob_last_50": 0.07819111246790271, "mean_token_accuracy": 0.8785353660583496, "step": 51620 }, { "epoch": 0.9178177163884593, "grad_norm": 1.3918675197494996, "learning_rate": 0.0001, "loss": 0.7599, "mean_abs_error": 1121.1440894330312, "mean_abs_error_last_10": 466.47602310747914, "mean_abs_error_last_25": 607.0873291595474, "mean_abs_error_last_50": 813.6831043195839, "mean_pred_prob": 0.02646996455005137, "mean_pred_prob_last_10": 0.13488958289090078, "mean_pred_prob_last_25": 0.07172112427651882, "mean_pred_prob_last_50": 0.04414770818548277, "mean_token_accuracy": 0.8671217441558838, "step": 51630 }, { "epoch": 0.9179954846852613, "grad_norm": 1.116986613658657, "learning_rate": 0.0001, "loss": 0.6229, "mean_abs_error": 217.83207755644008, "mean_abs_error_last_10": 71.64916464291626, "mean_abs_error_last_25": 128.76277380857937, "mean_abs_error_last_50": 159.85859342993155, "mean_pred_prob": 0.049461237248033287, "mean_pred_prob_last_10": 0.2200948351994157, "mean_pred_prob_last_25": 0.12911236509680749, "mean_pred_prob_last_50": 0.08087084204889834, "mean_token_accuracy": 0.8729884505271912, "step": 51640 }, { "epoch": 0.9181732529820632, "grad_norm": 1.6159274793415077, "learning_rate": 0.0001, "loss": 0.7144, "mean_abs_error": 1129.2258312839872, "mean_abs_error_last_10": 801.8248745748517, "mean_abs_error_last_25": 846.0746110365237, "mean_abs_error_last_50": 860.7387947434821, "mean_pred_prob": 0.04524283342034323, "mean_pred_prob_last_10": 0.21880167608032935, "mean_pred_prob_last_25": 0.11666496214602376, "mean_pred_prob_last_50": 0.07393604476528708, "mean_token_accuracy": 0.8736737787723541, "step": 51650 }, { "epoch": 0.9183510212788651, "grad_norm": 1.2900025456514466, "learning_rate": 0.0001, "loss": 0.6021, "mean_abs_error": 286.49670920667154, "mean_abs_error_last_10": 148.59173199727826, "mean_abs_error_last_25": 134.5236288147605, "mean_abs_error_last_50": 182.1710230366726, "mean_pred_prob": 0.027861832920461894, "mean_pred_prob_last_10": 0.14713947027921676, "mean_pred_prob_last_25": 0.07569223949685693, "mean_pred_prob_last_50": 0.04632239481434226, "mean_token_accuracy": 0.8786074817180634, "step": 51660 }, { "epoch": 0.9185287895756671, "grad_norm": 2.36763084167971, "learning_rate": 0.0001, "loss": 0.7485, "mean_abs_error": 453.36185121666523, "mean_abs_error_last_10": 135.28675706216936, "mean_abs_error_last_25": 165.98292168715435, "mean_abs_error_last_50": 232.95433852255775, "mean_pred_prob": 0.032381089846603574, "mean_pred_prob_last_10": 0.1693099606782198, "mean_pred_prob_last_25": 0.09268473349511623, "mean_pred_prob_last_50": 0.055580787872895596, "mean_token_accuracy": 0.8759045720100402, "step": 51670 }, { "epoch": 0.918706557872469, "grad_norm": 1.214229937423311, "learning_rate": 0.0001, "loss": 0.642, "mean_abs_error": 399.1005218789602, "mean_abs_error_last_10": 49.82536422009398, "mean_abs_error_last_25": 105.10005242943494, "mean_abs_error_last_50": 196.5413075021735, "mean_pred_prob": 0.03523676344193518, "mean_pred_prob_last_10": 0.17648015450686216, "mean_pred_prob_last_25": 0.09610200496390461, "mean_pred_prob_last_50": 0.05818876908160746, "mean_token_accuracy": 0.8774022579193115, "step": 51680 }, { "epoch": 0.918884326169271, "grad_norm": 1.7793972701920897, "learning_rate": 0.0001, "loss": 0.6649, "mean_abs_error": 125.4811445756853, "mean_abs_error_last_10": 20.445743707992673, "mean_abs_error_last_25": 61.06842587306312, "mean_abs_error_last_50": 98.34481045920411, "mean_pred_prob": 0.05121485106647015, "mean_pred_prob_last_10": 0.2681680083274841, "mean_pred_prob_last_25": 0.14703062884509563, "mean_pred_prob_last_50": 0.08659938238561153, "mean_token_accuracy": 0.8765489220619201, "step": 51690 }, { "epoch": 0.9190620944660729, "grad_norm": 1.3816897485952742, "learning_rate": 0.0001, "loss": 0.7857, "mean_abs_error": 519.8492970615104, "mean_abs_error_last_10": 156.70012150105066, "mean_abs_error_last_25": 232.22544624360268, "mean_abs_error_last_50": 320.667010350876, "mean_pred_prob": 0.019758119899779557, "mean_pred_prob_last_10": 0.1039538448676467, "mean_pred_prob_last_25": 0.05569490408524871, "mean_pred_prob_last_50": 0.03297287807799876, "mean_token_accuracy": 0.8622296452522278, "step": 51700 }, { "epoch": 0.9192398627628748, "grad_norm": 1.4201185161103862, "learning_rate": 0.0001, "loss": 0.7019, "mean_abs_error": 421.51055725186586, "mean_abs_error_last_10": 323.93225782015026, "mean_abs_error_last_25": 413.34131371225476, "mean_abs_error_last_50": 383.29458621616584, "mean_pred_prob": 0.05725928557803854, "mean_pred_prob_last_10": 0.26686178862582893, "mean_pred_prob_last_25": 0.15584104822482914, "mean_pred_prob_last_50": 0.09652213074732571, "mean_token_accuracy": 0.8737931311130523, "step": 51710 }, { "epoch": 0.9194176310596768, "grad_norm": 2.174055359645286, "learning_rate": 0.0001, "loss": 0.6837, "mean_abs_error": 296.8227847495881, "mean_abs_error_last_10": 36.2014712013847, "mean_abs_error_last_25": 60.32418858031001, "mean_abs_error_last_50": 164.04985867462761, "mean_pred_prob": 0.07140426270198078, "mean_pred_prob_last_10": 0.33752233888953925, "mean_pred_prob_last_25": 0.19362456388771535, "mean_pred_prob_last_50": 0.11892379969358444, "mean_token_accuracy": 0.875173544883728, "step": 51720 }, { "epoch": 0.9195953993564787, "grad_norm": 1.4358156826007886, "learning_rate": 0.0001, "loss": 0.6438, "mean_abs_error": 467.9007078900865, "mean_abs_error_last_10": 244.72896318440343, "mean_abs_error_last_25": 392.18117160579106, "mean_abs_error_last_50": 404.68671184039334, "mean_pred_prob": 0.05167290936224163, "mean_pred_prob_last_10": 0.2341103171929717, "mean_pred_prob_last_25": 0.13267096187919378, "mean_pred_prob_last_50": 0.08446060176938772, "mean_token_accuracy": 0.875026285648346, "step": 51730 }, { "epoch": 0.9197731676532808, "grad_norm": 1.6694149773457079, "learning_rate": 0.0001, "loss": 0.5345, "mean_abs_error": 148.55077575934607, "mean_abs_error_last_10": 50.38197724959976, "mean_abs_error_last_25": 67.27151319797773, "mean_abs_error_last_50": 90.59361308665105, "mean_pred_prob": 0.05625748159363866, "mean_pred_prob_last_10": 0.28233177922666075, "mean_pred_prob_last_25": 0.15490064881742, "mean_pred_prob_last_50": 0.09431535182520748, "mean_token_accuracy": 0.8859222829341888, "step": 51740 }, { "epoch": 0.9199509359500827, "grad_norm": 1.8740263769633407, "learning_rate": 0.0001, "loss": 0.7617, "mean_abs_error": 538.5913744401684, "mean_abs_error_last_10": 267.51267339287324, "mean_abs_error_last_25": 319.06300311796804, "mean_abs_error_last_50": 405.43778046180677, "mean_pred_prob": 0.05059261425631121, "mean_pred_prob_last_10": 0.24377890896284954, "mean_pred_prob_last_25": 0.13634696223889478, "mean_pred_prob_last_50": 0.0840354340034537, "mean_token_accuracy": 0.8694670557975769, "step": 51750 }, { "epoch": 0.9201287042468846, "grad_norm": 1.5256026761265167, "learning_rate": 0.0001, "loss": 0.6005, "mean_abs_error": 601.6768489120166, "mean_abs_error_last_10": 171.43027452664393, "mean_abs_error_last_25": 239.35357650477113, "mean_abs_error_last_50": 356.49491320146615, "mean_pred_prob": 0.039144880068488416, "mean_pred_prob_last_10": 0.19456704133190214, "mean_pred_prob_last_25": 0.10895187922287733, "mean_pred_prob_last_50": 0.06518926298012957, "mean_token_accuracy": 0.8793963253498077, "step": 51760 }, { "epoch": 0.9203064725436866, "grad_norm": 3.424562189425041, "learning_rate": 0.0001, "loss": 0.6595, "mean_abs_error": 294.1244072708049, "mean_abs_error_last_10": 69.89492735274585, "mean_abs_error_last_25": 103.56837083771036, "mean_abs_error_last_50": 169.20402377149009, "mean_pred_prob": 0.047288561193272474, "mean_pred_prob_last_10": 0.22203980702906848, "mean_pred_prob_last_25": 0.12943917559459805, "mean_pred_prob_last_50": 0.0797593530267477, "mean_token_accuracy": 0.876367038488388, "step": 51770 }, { "epoch": 0.9204842408404885, "grad_norm": 2.211321266812542, "learning_rate": 0.0001, "loss": 0.7274, "mean_abs_error": 197.49433733540934, "mean_abs_error_last_10": 73.72467413482585, "mean_abs_error_last_25": 112.95819926368365, "mean_abs_error_last_50": 140.341237834996, "mean_pred_prob": 0.04624326913617551, "mean_pred_prob_last_10": 0.2169606124982238, "mean_pred_prob_last_25": 0.12167289992794394, "mean_pred_prob_last_50": 0.07633986431173981, "mean_token_accuracy": 0.876260507106781, "step": 51780 }, { "epoch": 0.9206620091372905, "grad_norm": 1.134569347555498, "learning_rate": 0.0001, "loss": 0.6939, "mean_abs_error": 323.5726408959623, "mean_abs_error_last_10": 152.418559544451, "mean_abs_error_last_25": 167.3567200241079, "mean_abs_error_last_50": 187.96237704975078, "mean_pred_prob": 0.02809410870540887, "mean_pred_prob_last_10": 0.1448662057518959, "mean_pred_prob_last_25": 0.0766436849720776, "mean_pred_prob_last_50": 0.04730289857834578, "mean_token_accuracy": 0.8757230043411255, "step": 51790 }, { "epoch": 0.9208397774340924, "grad_norm": 1.436020431745476, "learning_rate": 0.0001, "loss": 0.731, "mean_abs_error": 418.74601342758706, "mean_abs_error_last_10": 213.55412362335375, "mean_abs_error_last_25": 260.27243702654226, "mean_abs_error_last_50": 315.39992557864235, "mean_pred_prob": 0.03612756135407835, "mean_pred_prob_last_10": 0.18491013143211604, "mean_pred_prob_last_25": 0.10229313634335994, "mean_pred_prob_last_50": 0.06193094968330115, "mean_token_accuracy": 0.8647349953651429, "step": 51800 }, { "epoch": 0.9210175457308943, "grad_norm": 0.9130025540499066, "learning_rate": 0.0001, "loss": 0.6974, "mean_abs_error": 343.6964317437646, "mean_abs_error_last_10": 260.25353468511787, "mean_abs_error_last_25": 303.44012950443386, "mean_abs_error_last_50": 282.77623830156614, "mean_pred_prob": 0.04405109256040305, "mean_pred_prob_last_10": 0.21255198642611503, "mean_pred_prob_last_25": 0.12777968859300018, "mean_pred_prob_last_50": 0.07544981734827161, "mean_token_accuracy": 0.874146169424057, "step": 51810 }, { "epoch": 0.9211953140276963, "grad_norm": 1.5945780251765096, "learning_rate": 0.0001, "loss": 0.7173, "mean_abs_error": 432.3937919893489, "mean_abs_error_last_10": 170.91158461402864, "mean_abs_error_last_25": 188.20551855016524, "mean_abs_error_last_50": 246.03822071834833, "mean_pred_prob": 0.04386963851866312, "mean_pred_prob_last_10": 0.21267713580746203, "mean_pred_prob_last_25": 0.12320366139756515, "mean_pred_prob_last_50": 0.07399468262447044, "mean_token_accuracy": 0.8773027896881104, "step": 51820 }, { "epoch": 0.9213730823244982, "grad_norm": 1.0930844749506021, "learning_rate": 0.0001, "loss": 0.5837, "mean_abs_error": 148.64141170846136, "mean_abs_error_last_10": 35.61317206733755, "mean_abs_error_last_25": 50.33137950936849, "mean_abs_error_last_50": 81.94581442956311, "mean_pred_prob": 0.06205317254643887, "mean_pred_prob_last_10": 0.296945464797318, "mean_pred_prob_last_25": 0.16951301638036967, "mean_pred_prob_last_50": 0.10470940698869527, "mean_token_accuracy": 0.8789857923984528, "step": 51830 }, { "epoch": 0.9215508506213002, "grad_norm": 4.10343176657321, "learning_rate": 0.0001, "loss": 0.8223, "mean_abs_error": 606.6199889645354, "mean_abs_error_last_10": 230.0519065038114, "mean_abs_error_last_25": 284.02256631639796, "mean_abs_error_last_50": 430.8878951476978, "mean_pred_prob": 0.026806255744304508, "mean_pred_prob_last_10": 0.13461261736229063, "mean_pred_prob_last_25": 0.07614940180210397, "mean_pred_prob_last_50": 0.045187333784997466, "mean_token_accuracy": 0.8766743123531342, "step": 51840 }, { "epoch": 0.9217286189181021, "grad_norm": 1.1871857211334336, "learning_rate": 0.0001, "loss": 0.631, "mean_abs_error": 111.46988362409427, "mean_abs_error_last_10": 22.248915130045113, "mean_abs_error_last_25": 41.37985167141551, "mean_abs_error_last_50": 68.53790580873104, "mean_pred_prob": 0.05732927229255438, "mean_pred_prob_last_10": 0.27263310104608535, "mean_pred_prob_last_25": 0.15394015461206437, "mean_pred_prob_last_50": 0.0952291440218687, "mean_token_accuracy": 0.87691730260849, "step": 51850 }, { "epoch": 0.9219063872149041, "grad_norm": 2.5134044976855314, "learning_rate": 0.0001, "loss": 0.7096, "mean_abs_error": 415.9691577195878, "mean_abs_error_last_10": 70.8713726116885, "mean_abs_error_last_25": 114.85229628055826, "mean_abs_error_last_50": 296.3144480265509, "mean_pred_prob": 0.033010582672432065, "mean_pred_prob_last_10": 0.17416528947651386, "mean_pred_prob_last_25": 0.09088002499192953, "mean_pred_prob_last_50": 0.055817908700555564, "mean_token_accuracy": 0.8737098276615143, "step": 51860 }, { "epoch": 0.9220841555117061, "grad_norm": 0.8735320900941534, "learning_rate": 0.0001, "loss": 0.752, "mean_abs_error": 1160.6115322344062, "mean_abs_error_last_10": 317.1136040013401, "mean_abs_error_last_25": 502.44459173981056, "mean_abs_error_last_50": 703.846014124466, "mean_pred_prob": 0.04165881557273678, "mean_pred_prob_last_10": 0.1947946562198922, "mean_pred_prob_last_25": 0.1114563329028897, "mean_pred_prob_last_50": 0.06850187985692173, "mean_token_accuracy": 0.8655996322631836, "step": 51870 }, { "epoch": 0.922261923808508, "grad_norm": 2.3523892577215153, "learning_rate": 0.0001, "loss": 0.6433, "mean_abs_error": 228.57440781410384, "mean_abs_error_last_10": 110.02778474762174, "mean_abs_error_last_25": 134.2531129226358, "mean_abs_error_last_50": 135.90734842399092, "mean_pred_prob": 0.054548277892172335, "mean_pred_prob_last_10": 0.25475570000708103, "mean_pred_prob_last_25": 0.14381119534373282, "mean_pred_prob_last_50": 0.09003941658884287, "mean_token_accuracy": 0.8717492282390594, "step": 51880 }, { "epoch": 0.92243969210531, "grad_norm": 1.2978462657264025, "learning_rate": 0.0001, "loss": 0.7781, "mean_abs_error": 280.48947541947393, "mean_abs_error_last_10": 139.91479521384127, "mean_abs_error_last_25": 146.2653801492689, "mean_abs_error_last_50": 207.8344131033171, "mean_pred_prob": 0.047003052895888686, "mean_pred_prob_last_10": 0.21164658218622207, "mean_pred_prob_last_25": 0.12446873039007186, "mean_pred_prob_last_50": 0.07768809800036251, "mean_token_accuracy": 0.8706938862800598, "step": 51890 }, { "epoch": 0.9226174604021119, "grad_norm": 0.9916004827401785, "learning_rate": 0.0001, "loss": 0.6536, "mean_abs_error": 114.68227076785979, "mean_abs_error_last_10": 35.40100882482549, "mean_abs_error_last_25": 50.23669183479645, "mean_abs_error_last_50": 78.04069513495122, "mean_pred_prob": 0.052660133503377435, "mean_pred_prob_last_10": 0.26493143644183875, "mean_pred_prob_last_25": 0.14694625036790968, "mean_pred_prob_last_50": 0.08800892420113086, "mean_token_accuracy": 0.874691516160965, "step": 51900 }, { "epoch": 0.9227952286989138, "grad_norm": 2.446022863946622, "learning_rate": 0.0001, "loss": 0.787, "mean_abs_error": 1323.59939009196, "mean_abs_error_last_10": 640.1582004124101, "mean_abs_error_last_25": 779.337822091122, "mean_abs_error_last_50": 949.8616605150546, "mean_pred_prob": 0.031360053401294864, "mean_pred_prob_last_10": 0.1571177757155965, "mean_pred_prob_last_25": 0.08834681577864104, "mean_pred_prob_last_50": 0.05401703688548878, "mean_token_accuracy": 0.8777107834815979, "step": 51910 }, { "epoch": 0.9229729969957158, "grad_norm": 1.9011986289299916, "learning_rate": 0.0001, "loss": 0.7218, "mean_abs_error": 839.8613728103571, "mean_abs_error_last_10": 200.65657811497346, "mean_abs_error_last_25": 290.4745017786648, "mean_abs_error_last_50": 510.03689694785743, "mean_pred_prob": 0.0244288514892105, "mean_pred_prob_last_10": 0.11792502915486694, "mean_pred_prob_last_25": 0.06559308501891792, "mean_pred_prob_last_50": 0.03964787759468891, "mean_token_accuracy": 0.870699119567871, "step": 51920 }, { "epoch": 0.9231507652925177, "grad_norm": 1.6985775536091023, "learning_rate": 0.0001, "loss": 0.6433, "mean_abs_error": 528.8505443977683, "mean_abs_error_last_10": 139.15578274278445, "mean_abs_error_last_25": 182.73854338000262, "mean_abs_error_last_50": 313.8929484970569, "mean_pred_prob": 0.038679749821312726, "mean_pred_prob_last_10": 0.17701408676803113, "mean_pred_prob_last_25": 0.1038987667299807, "mean_pred_prob_last_50": 0.06335901985876262, "mean_token_accuracy": 0.8741781949996948, "step": 51930 }, { "epoch": 0.9233285335893197, "grad_norm": 1.6451931008734488, "learning_rate": 0.0001, "loss": 0.7313, "mean_abs_error": 598.6011290117177, "mean_abs_error_last_10": 162.2654621759762, "mean_abs_error_last_25": 287.16266301060483, "mean_abs_error_last_50": 332.4371141540238, "mean_pred_prob": 0.04043091328348965, "mean_pred_prob_last_10": 0.18210999947041273, "mean_pred_prob_last_25": 0.11106463437899947, "mean_pred_prob_last_50": 0.06841187044046819, "mean_token_accuracy": 0.8583912253379822, "step": 51940 }, { "epoch": 0.9235063018861216, "grad_norm": 1.9255998561643732, "learning_rate": 0.0001, "loss": 0.8044, "mean_abs_error": 695.9961943211877, "mean_abs_error_last_10": 178.9711737182548, "mean_abs_error_last_25": 260.8164835846766, "mean_abs_error_last_50": 400.1356412740137, "mean_pred_prob": 0.03160929794248659, "mean_pred_prob_last_10": 0.17053367006592451, "mean_pred_prob_last_25": 0.09322235621511936, "mean_pred_prob_last_50": 0.05439555948250927, "mean_token_accuracy": 0.8711919903755188, "step": 51950 }, { "epoch": 0.9236840701829235, "grad_norm": 2.1921727884345072, "learning_rate": 0.0001, "loss": 0.6184, "mean_abs_error": 191.49183486634925, "mean_abs_error_last_10": 121.99713764105641, "mean_abs_error_last_25": 137.55378264076268, "mean_abs_error_last_50": 131.22566620788956, "mean_pred_prob": 0.04437258079415187, "mean_pred_prob_last_10": 0.22307479700539262, "mean_pred_prob_last_25": 0.12213306599296629, "mean_pred_prob_last_50": 0.0746415976784192, "mean_token_accuracy": 0.8647661447525025, "step": 51960 }, { "epoch": 0.9238618384797255, "grad_norm": 2.065323793269368, "learning_rate": 0.0001, "loss": 0.747, "mean_abs_error": 423.47392016954444, "mean_abs_error_last_10": 118.80227711824486, "mean_abs_error_last_25": 292.24888193944105, "mean_abs_error_last_50": 393.00616189794283, "mean_pred_prob": 0.044397601345554, "mean_pred_prob_last_10": 0.19170073736459017, "mean_pred_prob_last_25": 0.1111207407899201, "mean_pred_prob_last_50": 0.07245292728766799, "mean_token_accuracy": 0.8732040882110595, "step": 51970 }, { "epoch": 0.9240396067765275, "grad_norm": 2.455702171816419, "learning_rate": 0.0001, "loss": 0.7916, "mean_abs_error": 376.774926590943, "mean_abs_error_last_10": 187.2135784201912, "mean_abs_error_last_25": 305.8267007232315, "mean_abs_error_last_50": 344.48415203515185, "mean_pred_prob": 0.035495204874314366, "mean_pred_prob_last_10": 0.17412631940096618, "mean_pred_prob_last_25": 0.094898397102952, "mean_pred_prob_last_50": 0.05731357070617378, "mean_token_accuracy": 0.8787756085395813, "step": 51980 }, { "epoch": 0.9242173750733295, "grad_norm": 1.1699389108521276, "learning_rate": 0.0001, "loss": 0.667, "mean_abs_error": 455.34327606498255, "mean_abs_error_last_10": 148.88695815382957, "mean_abs_error_last_25": 189.20886572125102, "mean_abs_error_last_50": 243.67831951721502, "mean_pred_prob": 0.03049654485657811, "mean_pred_prob_last_10": 0.15589949515415355, "mean_pred_prob_last_25": 0.08365311314119026, "mean_pred_prob_last_50": 0.05100282765924931, "mean_token_accuracy": 0.8805035293102265, "step": 51990 }, { "epoch": 0.9243951433701314, "grad_norm": 1.1019319516699888, "learning_rate": 0.0001, "loss": 0.6554, "mean_abs_error": 392.4104565019155, "mean_abs_error_last_10": 102.75278021804604, "mean_abs_error_last_25": 126.50012011410904, "mean_abs_error_last_50": 180.73232941041113, "mean_pred_prob": 0.043576968880370257, "mean_pred_prob_last_10": 0.1974420330952853, "mean_pred_prob_last_25": 0.11036189140286297, "mean_pred_prob_last_50": 0.06965319393202662, "mean_token_accuracy": 0.887911343574524, "step": 52000 }, { "epoch": 0.9245729116669333, "grad_norm": 1.6877446304107333, "learning_rate": 0.0001, "loss": 0.7203, "mean_abs_error": 853.882804150208, "mean_abs_error_last_10": 231.37364665552846, "mean_abs_error_last_25": 291.556233579488, "mean_abs_error_last_50": 512.7933266735351, "mean_pred_prob": 0.023129138606600465, "mean_pred_prob_last_10": 0.1288394982344471, "mean_pred_prob_last_25": 0.06898235490662045, "mean_pred_prob_last_50": 0.039619645447237416, "mean_token_accuracy": 0.8730705440044403, "step": 52010 }, { "epoch": 0.9247506799637353, "grad_norm": 1.3470425227791163, "learning_rate": 0.0001, "loss": 0.7885, "mean_abs_error": 375.4656986102636, "mean_abs_error_last_10": 55.89429305213942, "mean_abs_error_last_25": 100.44846937101647, "mean_abs_error_last_50": 196.4936578212633, "mean_pred_prob": 0.04036206089658663, "mean_pred_prob_last_10": 0.16474171001464127, "mean_pred_prob_last_25": 0.09780468516983092, "mean_pred_prob_last_50": 0.064352269587107, "mean_token_accuracy": 0.8670951426029205, "step": 52020 }, { "epoch": 0.9249284482605372, "grad_norm": 1.3985703622506513, "learning_rate": 0.0001, "loss": 0.6952, "mean_abs_error": 310.012008361103, "mean_abs_error_last_10": 72.15836860817868, "mean_abs_error_last_25": 107.06044945562239, "mean_abs_error_last_50": 188.9926691750841, "mean_pred_prob": 0.033429376501590016, "mean_pred_prob_last_10": 0.1564272329211235, "mean_pred_prob_last_25": 0.08979919692501426, "mean_pred_prob_last_50": 0.05551052857190371, "mean_token_accuracy": 0.8764356791973114, "step": 52030 }, { "epoch": 0.9251062165573392, "grad_norm": 1.100173397687635, "learning_rate": 0.0001, "loss": 0.8351, "mean_abs_error": 593.4219873252498, "mean_abs_error_last_10": 149.33001320996232, "mean_abs_error_last_25": 230.9108036020546, "mean_abs_error_last_50": 360.76424210657103, "mean_pred_prob": 0.0253178697952535, "mean_pred_prob_last_10": 0.13986421105219052, "mean_pred_prob_last_25": 0.07238200338324532, "mean_pred_prob_last_50": 0.04273195606074296, "mean_token_accuracy": 0.8636921107769012, "step": 52040 }, { "epoch": 0.9252839848541411, "grad_norm": 2.124403461352874, "learning_rate": 0.0001, "loss": 0.6562, "mean_abs_error": 327.1637350438677, "mean_abs_error_last_10": 87.7710676341639, "mean_abs_error_last_25": 237.04356997768818, "mean_abs_error_last_50": 283.124965904997, "mean_pred_prob": 0.04335904093459249, "mean_pred_prob_last_10": 0.19327099733054637, "mean_pred_prob_last_25": 0.11103266291320324, "mean_pred_prob_last_50": 0.06937957732006908, "mean_token_accuracy": 0.8788404047489167, "step": 52050 }, { "epoch": 0.925461753150943, "grad_norm": 0.9464130801889775, "learning_rate": 0.0001, "loss": 0.5945, "mean_abs_error": 433.92094874519, "mean_abs_error_last_10": 61.30144847600367, "mean_abs_error_last_25": 85.05712496190726, "mean_abs_error_last_50": 192.71901515396286, "mean_pred_prob": 0.040896265767514706, "mean_pred_prob_last_10": 0.19636612497270106, "mean_pred_prob_last_25": 0.1117160170339048, "mean_pred_prob_last_50": 0.06877341852523386, "mean_token_accuracy": 0.8771135628223419, "step": 52060 }, { "epoch": 0.925639521447745, "grad_norm": 1.9221344757067196, "learning_rate": 0.0001, "loss": 0.8251, "mean_abs_error": 486.52350371548874, "mean_abs_error_last_10": 248.11102405855073, "mean_abs_error_last_25": 233.37415273961102, "mean_abs_error_last_50": 314.680431458489, "mean_pred_prob": 0.02897978958208114, "mean_pred_prob_last_10": 0.14081385713070632, "mean_pred_prob_last_25": 0.07966421619057655, "mean_pred_prob_last_50": 0.0493480034172535, "mean_token_accuracy": 0.8673072695732117, "step": 52070 }, { "epoch": 0.9258172897445469, "grad_norm": 2.05894350373332, "learning_rate": 0.0001, "loss": 0.7385, "mean_abs_error": 354.22622816131747, "mean_abs_error_last_10": 122.83049732048819, "mean_abs_error_last_25": 148.59280840056255, "mean_abs_error_last_50": 208.22754759677218, "mean_pred_prob": 0.02682604044675827, "mean_pred_prob_last_10": 0.15179689638316632, "mean_pred_prob_last_25": 0.07718732003122568, "mean_pred_prob_last_50": 0.0457175949588418, "mean_token_accuracy": 0.8736386656761169, "step": 52080 }, { "epoch": 0.9259950580413489, "grad_norm": 1.3063961355774165, "learning_rate": 0.0001, "loss": 0.7423, "mean_abs_error": 1118.048046057893, "mean_abs_error_last_10": 592.8992393823797, "mean_abs_error_last_25": 726.0320856789991, "mean_abs_error_last_50": 876.918188966805, "mean_pred_prob": 0.03095698477118276, "mean_pred_prob_last_10": 0.1649069470295217, "mean_pred_prob_last_25": 0.08856508645112626, "mean_pred_prob_last_50": 0.052705549946404064, "mean_token_accuracy": 0.8745147705078125, "step": 52090 }, { "epoch": 0.9261728263381509, "grad_norm": 2.7685843515579855, "learning_rate": 0.0001, "loss": 0.6273, "mean_abs_error": 367.26117662146646, "mean_abs_error_last_10": 76.57246343960716, "mean_abs_error_last_25": 110.22498528815117, "mean_abs_error_last_50": 197.3464118775417, "mean_pred_prob": 0.044035155326128006, "mean_pred_prob_last_10": 0.22634585946798325, "mean_pred_prob_last_25": 0.1239632297307253, "mean_pred_prob_last_50": 0.07593666207976639, "mean_token_accuracy": 0.8803319752216339, "step": 52100 }, { "epoch": 0.9263505946349528, "grad_norm": 1.6196565470689552, "learning_rate": 0.0001, "loss": 0.6002, "mean_abs_error": 577.0827499737883, "mean_abs_error_last_10": 322.9367337589721, "mean_abs_error_last_25": 362.9723195315647, "mean_abs_error_last_50": 411.7057312553105, "mean_pred_prob": 0.02105787502368912, "mean_pred_prob_last_10": 0.09883919211570173, "mean_pred_prob_last_25": 0.05707328047137707, "mean_pred_prob_last_50": 0.03505168433766812, "mean_token_accuracy": 0.8787450015544891, "step": 52110 }, { "epoch": 0.9265283629317548, "grad_norm": 1.8390185057583253, "learning_rate": 0.0001, "loss": 0.7774, "mean_abs_error": 515.4838947262217, "mean_abs_error_last_10": 156.22544871585734, "mean_abs_error_last_25": 222.31199952491676, "mean_abs_error_last_50": 338.01598550944675, "mean_pred_prob": 0.0365405076299794, "mean_pred_prob_last_10": 0.1817286261706613, "mean_pred_prob_last_25": 0.10087884140666574, "mean_pred_prob_last_50": 0.06207927940995432, "mean_token_accuracy": 0.8635880172252655, "step": 52120 }, { "epoch": 0.9267061312285567, "grad_norm": 1.9416554616523596, "learning_rate": 0.0001, "loss": 0.8166, "mean_abs_error": 727.4423690668873, "mean_abs_error_last_10": 259.45343918175, "mean_abs_error_last_25": 279.3562783093938, "mean_abs_error_last_50": 437.22933739668133, "mean_pred_prob": 0.044499620626447724, "mean_pred_prob_last_10": 0.2034582715597935, "mean_pred_prob_last_25": 0.11585730358492582, "mean_pred_prob_last_50": 0.07327742530032992, "mean_token_accuracy": 0.8827414333820343, "step": 52130 }, { "epoch": 0.9268838995253587, "grad_norm": 1.1871415483841032, "learning_rate": 0.0001, "loss": 0.8104, "mean_abs_error": 638.1605152788956, "mean_abs_error_last_10": 306.51189336698565, "mean_abs_error_last_25": 457.59639147438554, "mean_abs_error_last_50": 575.1651926324064, "mean_pred_prob": 0.01822609871160239, "mean_pred_prob_last_10": 0.1035756440833211, "mean_pred_prob_last_25": 0.05164287704974413, "mean_pred_prob_last_50": 0.031015157140791415, "mean_token_accuracy": 0.8631169080734253, "step": 52140 }, { "epoch": 0.9270616678221606, "grad_norm": 1.8260325717598096, "learning_rate": 0.0001, "loss": 0.6264, "mean_abs_error": 373.86248930176515, "mean_abs_error_last_10": 164.8162623671568, "mean_abs_error_last_25": 192.1263251149419, "mean_abs_error_last_50": 267.31206124975927, "mean_pred_prob": 0.0356013702461496, "mean_pred_prob_last_10": 0.16747566014528276, "mean_pred_prob_last_25": 0.09364616423845291, "mean_pred_prob_last_50": 0.0591069505084306, "mean_token_accuracy": 0.8695590078830719, "step": 52150 }, { "epoch": 0.9272394361189625, "grad_norm": 1.3016891636876227, "learning_rate": 0.0001, "loss": 0.7903, "mean_abs_error": 1139.5487167511951, "mean_abs_error_last_10": 696.5004902883127, "mean_abs_error_last_25": 783.069209330454, "mean_abs_error_last_50": 928.0750560294257, "mean_pred_prob": 0.034854017994803144, "mean_pred_prob_last_10": 0.16812410123384325, "mean_pred_prob_last_25": 0.09303547144663753, "mean_pred_prob_last_50": 0.05786951431655325, "mean_token_accuracy": 0.8723235368728638, "step": 52160 }, { "epoch": 0.9274172044157645, "grad_norm": 1.5190966577675997, "learning_rate": 0.0001, "loss": 0.8185, "mean_abs_error": 692.533133186922, "mean_abs_error_last_10": 357.47268003381816, "mean_abs_error_last_25": 332.44002723054706, "mean_abs_error_last_50": 416.5072167743115, "mean_pred_prob": 0.04615704789175652, "mean_pred_prob_last_10": 0.19123886351590044, "mean_pred_prob_last_25": 0.11773361334344372, "mean_pred_prob_last_50": 0.07506492994725704, "mean_token_accuracy": 0.8687506675720215, "step": 52170 }, { "epoch": 0.9275949727125664, "grad_norm": 1.1707128046645892, "learning_rate": 0.0001, "loss": 0.7974, "mean_abs_error": 286.7421730331745, "mean_abs_error_last_10": 148.80277248014815, "mean_abs_error_last_25": 133.8039837046344, "mean_abs_error_last_50": 193.43880006486296, "mean_pred_prob": 0.04262738614343107, "mean_pred_prob_last_10": 0.2187979083508253, "mean_pred_prob_last_25": 0.12013321109116078, "mean_pred_prob_last_50": 0.07335309502668678, "mean_token_accuracy": 0.8673185765743255, "step": 52180 }, { "epoch": 0.9277727410093684, "grad_norm": 0.764932138419552, "learning_rate": 0.0001, "loss": 1.0073, "mean_abs_error": 375.4635597073274, "mean_abs_error_last_10": 209.65073950271525, "mean_abs_error_last_25": 272.5498336717012, "mean_abs_error_last_50": 307.4783496237868, "mean_pred_prob": 0.03543315627612174, "mean_pred_prob_last_10": 0.19991910960525275, "mean_pred_prob_last_25": 0.10188774140551686, "mean_pred_prob_last_50": 0.06032898365519941, "mean_token_accuracy": 0.8744879126548767, "step": 52190 }, { "epoch": 0.9279505093061703, "grad_norm": 1.7073166644990792, "learning_rate": 0.0001, "loss": 0.7805, "mean_abs_error": 317.4763975048864, "mean_abs_error_last_10": 134.10901838931454, "mean_abs_error_last_25": 137.04387480139897, "mean_abs_error_last_50": 197.05201890496056, "mean_pred_prob": 0.044363940879702565, "mean_pred_prob_last_10": 0.2097888259217143, "mean_pred_prob_last_25": 0.12097810329869389, "mean_pred_prob_last_50": 0.07344406498596072, "mean_token_accuracy": 0.8710229396820068, "step": 52200 }, { "epoch": 0.9281282776029723, "grad_norm": 1.301864449192264, "learning_rate": 0.0001, "loss": 0.7434, "mean_abs_error": 1067.953600795606, "mean_abs_error_last_10": 542.9738177546168, "mean_abs_error_last_25": 713.2615089108997, "mean_abs_error_last_50": 848.5939456013248, "mean_pred_prob": 0.0395253970673366, "mean_pred_prob_last_10": 0.19017015164718032, "mean_pred_prob_last_25": 0.10418233553209574, "mean_pred_prob_last_50": 0.06419833711988758, "mean_token_accuracy": 0.8722419679164887, "step": 52210 }, { "epoch": 0.9283060458997743, "grad_norm": 1.0494625484512619, "learning_rate": 0.0001, "loss": 0.6974, "mean_abs_error": 173.57841046482935, "mean_abs_error_last_10": 45.353578389508705, "mean_abs_error_last_25": 77.48654125007002, "mean_abs_error_last_50": 150.65963707468168, "mean_pred_prob": 0.06942470059730113, "mean_pred_prob_last_10": 0.30600000321865084, "mean_pred_prob_last_25": 0.17818298749625683, "mean_pred_prob_last_50": 0.11237146998755634, "mean_token_accuracy": 0.8559396088123321, "step": 52220 }, { "epoch": 0.9284838141965762, "grad_norm": 1.6076757764000293, "learning_rate": 0.0001, "loss": 0.6981, "mean_abs_error": 503.8137761049403, "mean_abs_error_last_10": 125.32291996516066, "mean_abs_error_last_25": 178.91379227824353, "mean_abs_error_last_50": 322.9808259189941, "mean_pred_prob": 0.04051290536299348, "mean_pred_prob_last_10": 0.2071070171892643, "mean_pred_prob_last_25": 0.11766668437048793, "mean_pred_prob_last_50": 0.07012152671813965, "mean_token_accuracy": 0.8818376123905182, "step": 52230 }, { "epoch": 0.9286615824933782, "grad_norm": 2.0435567386785687, "learning_rate": 0.0001, "loss": 0.6708, "mean_abs_error": 952.1975550259131, "mean_abs_error_last_10": 532.820308944262, "mean_abs_error_last_25": 618.9676176628093, "mean_abs_error_last_50": 736.8418860621247, "mean_pred_prob": 0.0489230834384216, "mean_pred_prob_last_10": 0.20881752816494553, "mean_pred_prob_last_25": 0.12976002804061865, "mean_pred_prob_last_50": 0.08035450994939311, "mean_token_accuracy": 0.8653956055641174, "step": 52240 }, { "epoch": 0.9288393507901801, "grad_norm": 1.6343759562251696, "learning_rate": 0.0001, "loss": 0.664, "mean_abs_error": 347.8566113241869, "mean_abs_error_last_10": 113.74331484903136, "mean_abs_error_last_25": 148.65918172555854, "mean_abs_error_last_50": 243.15562500095226, "mean_pred_prob": 0.02990094624692574, "mean_pred_prob_last_10": 0.14147184123285114, "mean_pred_prob_last_25": 0.08374346948694437, "mean_pred_prob_last_50": 0.05164124050643295, "mean_token_accuracy": 0.8761702358722687, "step": 52250 }, { "epoch": 0.929017119086982, "grad_norm": 2.1696818124131076, "learning_rate": 0.0001, "loss": 0.71, "mean_abs_error": 594.0155159587296, "mean_abs_error_last_10": 268.72521644415986, "mean_abs_error_last_25": 289.5804803733898, "mean_abs_error_last_50": 413.9188763700978, "mean_pred_prob": 0.04449728121398948, "mean_pred_prob_last_10": 0.21046036488842218, "mean_pred_prob_last_25": 0.11528312322916463, "mean_pred_prob_last_50": 0.07177990062627941, "mean_token_accuracy": 0.87305166721344, "step": 52260 }, { "epoch": 0.929194887383784, "grad_norm": 3.1358864299542453, "learning_rate": 0.0001, "loss": 0.7372, "mean_abs_error": 255.81755025675488, "mean_abs_error_last_10": 176.7218794281386, "mean_abs_error_last_25": 315.74657036720794, "mean_abs_error_last_50": 295.0898706057818, "mean_pred_prob": 0.07219528695568442, "mean_pred_prob_last_10": 0.2925959285348654, "mean_pred_prob_last_25": 0.18165533659048377, "mean_pred_prob_last_50": 0.1165852680336684, "mean_token_accuracy": 0.8798716962337494, "step": 52270 }, { "epoch": 0.9293726556805859, "grad_norm": 0.9195355426446895, "learning_rate": 0.0001, "loss": 0.5894, "mean_abs_error": 533.3882390499587, "mean_abs_error_last_10": 96.39012880886553, "mean_abs_error_last_25": 244.29370752528507, "mean_abs_error_last_50": 344.14695691125405, "mean_pred_prob": 0.02180572976358235, "mean_pred_prob_last_10": 0.12050214782357216, "mean_pred_prob_last_25": 0.06254591532051564, "mean_pred_prob_last_50": 0.0372709256131202, "mean_token_accuracy": 0.8775759994983673, "step": 52280 }, { "epoch": 0.9295504239773879, "grad_norm": 2.1196443874496853, "learning_rate": 0.0001, "loss": 0.7007, "mean_abs_error": 521.834899736941, "mean_abs_error_last_10": 84.90737658510093, "mean_abs_error_last_25": 174.74106573870066, "mean_abs_error_last_50": 269.3606734164355, "mean_pred_prob": 0.05114694544463418, "mean_pred_prob_last_10": 0.21930252651218324, "mean_pred_prob_last_25": 0.12758020891342312, "mean_pred_prob_last_50": 0.08291734667727724, "mean_token_accuracy": 0.875084125995636, "step": 52290 }, { "epoch": 0.9297281922741898, "grad_norm": 1.5907114231856077, "learning_rate": 0.0001, "loss": 0.7153, "mean_abs_error": 967.2264990052554, "mean_abs_error_last_10": 317.2292761703919, "mean_abs_error_last_25": 398.59534265194713, "mean_abs_error_last_50": 571.9855584732462, "mean_pred_prob": 0.02901447725016624, "mean_pred_prob_last_10": 0.13570332705858162, "mean_pred_prob_last_25": 0.07737438589101657, "mean_pred_prob_last_50": 0.048169792781118305, "mean_token_accuracy": 0.8724551796913147, "step": 52300 }, { "epoch": 0.9299059605709917, "grad_norm": 1.0136774780256284, "learning_rate": 0.0001, "loss": 0.7204, "mean_abs_error": 933.9152355245794, "mean_abs_error_last_10": 348.37002898922503, "mean_abs_error_last_25": 570.460851199812, "mean_abs_error_last_50": 730.4019980250644, "mean_pred_prob": 0.034155955244204964, "mean_pred_prob_last_10": 0.17270758988452145, "mean_pred_prob_last_25": 0.09637781908968464, "mean_pred_prob_last_50": 0.057274899882031605, "mean_token_accuracy": 0.87444908618927, "step": 52310 }, { "epoch": 0.9300837288677937, "grad_norm": 1.4337324197964256, "learning_rate": 0.0001, "loss": 0.7995, "mean_abs_error": 643.2937099905738, "mean_abs_error_last_10": 383.6573958213288, "mean_abs_error_last_25": 364.90689399215387, "mean_abs_error_last_50": 431.0328338963662, "mean_pred_prob": 0.04727090636151843, "mean_pred_prob_last_10": 0.2266756761644501, "mean_pred_prob_last_25": 0.12956132215913385, "mean_pred_prob_last_50": 0.07831288669840433, "mean_token_accuracy": 0.8642576992511749, "step": 52320 }, { "epoch": 0.9302614971645957, "grad_norm": 1.6287236418246944, "learning_rate": 0.0001, "loss": 0.7418, "mean_abs_error": 460.03134133663053, "mean_abs_error_last_10": 182.269103873346, "mean_abs_error_last_25": 211.1711678181311, "mean_abs_error_last_50": 251.73396544969333, "mean_pred_prob": 0.021004310250282286, "mean_pred_prob_last_10": 0.10927883479744196, "mean_pred_prob_last_25": 0.05748763801530003, "mean_pred_prob_last_50": 0.03513668705709279, "mean_token_accuracy": 0.8649936974048614, "step": 52330 }, { "epoch": 0.9304392654613977, "grad_norm": 1.5623705043274891, "learning_rate": 0.0001, "loss": 0.693, "mean_abs_error": 208.81239145348133, "mean_abs_error_last_10": 77.43199619013176, "mean_abs_error_last_25": 84.6193985198657, "mean_abs_error_last_50": 126.82669131196376, "mean_pred_prob": 0.04222666313871741, "mean_pred_prob_last_10": 0.20489412620663644, "mean_pred_prob_last_25": 0.11454091221094131, "mean_pred_prob_last_50": 0.06940978355705738, "mean_token_accuracy": 0.8786504626274109, "step": 52340 }, { "epoch": 0.9306170337581996, "grad_norm": 0.7935538329313296, "learning_rate": 0.0001, "loss": 0.7688, "mean_abs_error": 1026.8346351157413, "mean_abs_error_last_10": 308.7257445992172, "mean_abs_error_last_25": 471.38362262613543, "mean_abs_error_last_50": 650.4845775641703, "mean_pred_prob": 0.019205533131025732, "mean_pred_prob_last_10": 0.09998633469222114, "mean_pred_prob_last_25": 0.052945523528615014, "mean_pred_prob_last_50": 0.032203195139300075, "mean_token_accuracy": 0.8656736552715302, "step": 52350 }, { "epoch": 0.9307948020550015, "grad_norm": 1.6579066286894406, "learning_rate": 0.0001, "loss": 0.6149, "mean_abs_error": 204.4015110755094, "mean_abs_error_last_10": 56.75952591533663, "mean_abs_error_last_25": 101.4166280035796, "mean_abs_error_last_50": 150.22524790669013, "mean_pred_prob": 0.05348750208504498, "mean_pred_prob_last_10": 0.23387665227055549, "mean_pred_prob_last_25": 0.13584893234074116, "mean_pred_prob_last_50": 0.08625515596941113, "mean_token_accuracy": 0.876557570695877, "step": 52360 }, { "epoch": 0.9309725703518035, "grad_norm": 1.4590584811421274, "learning_rate": 0.0001, "loss": 0.6749, "mean_abs_error": 374.9936821900018, "mean_abs_error_last_10": 108.43023032399256, "mean_abs_error_last_25": 139.28031339489442, "mean_abs_error_last_50": 208.51316762663618, "mean_pred_prob": 0.03179513940121979, "mean_pred_prob_last_10": 0.15111673325300218, "mean_pred_prob_last_25": 0.08300851518288255, "mean_pred_prob_last_50": 0.05261999871581793, "mean_token_accuracy": 0.8733400642871857, "step": 52370 }, { "epoch": 0.9311503386486054, "grad_norm": 2.0548032031836616, "learning_rate": 0.0001, "loss": 0.589, "mean_abs_error": 97.4170689110355, "mean_abs_error_last_10": 16.410135460985746, "mean_abs_error_last_25": 36.11250352086925, "mean_abs_error_last_50": 46.71055350012339, "mean_pred_prob": 0.06289176326245069, "mean_pred_prob_last_10": 0.29488110467791556, "mean_pred_prob_last_25": 0.16518313214182853, "mean_pred_prob_last_50": 0.10409001782536506, "mean_token_accuracy": 0.8720053374767304, "step": 52380 }, { "epoch": 0.9313281069454074, "grad_norm": 1.917949845018901, "learning_rate": 0.0001, "loss": 0.6545, "mean_abs_error": 648.0169137776398, "mean_abs_error_last_10": 417.586697665663, "mean_abs_error_last_25": 427.5330356979446, "mean_abs_error_last_50": 520.3332166538569, "mean_pred_prob": 0.02945125524420291, "mean_pred_prob_last_10": 0.1476615346444305, "mean_pred_prob_last_25": 0.08183297645300627, "mean_pred_prob_last_50": 0.049297551915515214, "mean_token_accuracy": 0.8680002510547637, "step": 52390 }, { "epoch": 0.9315058752422093, "grad_norm": 1.9743031891637615, "learning_rate": 0.0001, "loss": 0.6746, "mean_abs_error": 425.9618103665419, "mean_abs_error_last_10": 139.4907637639132, "mean_abs_error_last_25": 180.77629017296022, "mean_abs_error_last_50": 256.90986551228883, "mean_pred_prob": 0.03989879600703716, "mean_pred_prob_last_10": 0.17871213767211885, "mean_pred_prob_last_25": 0.1038704715669155, "mean_pred_prob_last_50": 0.06535586446989328, "mean_token_accuracy": 0.874896889925003, "step": 52400 }, { "epoch": 0.9316836435390112, "grad_norm": 1.7003202879058856, "learning_rate": 0.0001, "loss": 0.7188, "mean_abs_error": 191.59849161376434, "mean_abs_error_last_10": 26.45084906613817, "mean_abs_error_last_25": 46.993508493102254, "mean_abs_error_last_50": 95.70700831976811, "mean_pred_prob": 0.043038972467184064, "mean_pred_prob_last_10": 0.2148567408323288, "mean_pred_prob_last_25": 0.11950770057737828, "mean_pred_prob_last_50": 0.07265608850866556, "mean_token_accuracy": 0.8692321240901947, "step": 52410 }, { "epoch": 0.9318614118358132, "grad_norm": 1.6237215970323446, "learning_rate": 0.0001, "loss": 0.7727, "mean_abs_error": 424.410998101291, "mean_abs_error_last_10": 243.66345808940773, "mean_abs_error_last_25": 226.65376299628764, "mean_abs_error_last_50": 280.17702741368805, "mean_pred_prob": 0.03226878722198308, "mean_pred_prob_last_10": 0.14917880073189735, "mean_pred_prob_last_25": 0.08597811628133059, "mean_pred_prob_last_50": 0.05348426476120949, "mean_token_accuracy": 0.8696795403957367, "step": 52420 }, { "epoch": 0.9320391801326151, "grad_norm": 2.336352138179837, "learning_rate": 0.0001, "loss": 0.6472, "mean_abs_error": 257.2535786497503, "mean_abs_error_last_10": 85.00462987409529, "mean_abs_error_last_25": 85.94253388042131, "mean_abs_error_last_50": 130.2819742790444, "mean_pred_prob": 0.05016481403727084, "mean_pred_prob_last_10": 0.22528104465454818, "mean_pred_prob_last_25": 0.13062636544927955, "mean_pred_prob_last_50": 0.08203562162816525, "mean_token_accuracy": 0.8797115623950958, "step": 52430 }, { "epoch": 0.932216948429417, "grad_norm": 1.8275665047854774, "learning_rate": 0.0001, "loss": 0.812, "mean_abs_error": 1485.0648269689673, "mean_abs_error_last_10": 894.1389909618135, "mean_abs_error_last_25": 1019.6400840205176, "mean_abs_error_last_50": 1181.8771615593205, "mean_pred_prob": 0.031194545893959, "mean_pred_prob_last_10": 0.1669749690030585, "mean_pred_prob_last_25": 0.089754583548347, "mean_pred_prob_last_50": 0.05300211590511026, "mean_token_accuracy": 0.8623385965824127, "step": 52440 }, { "epoch": 0.9323947167262191, "grad_norm": 1.470312989074876, "learning_rate": 0.0001, "loss": 0.6587, "mean_abs_error": 125.02867027319716, "mean_abs_error_last_10": 13.640769532419222, "mean_abs_error_last_25": 49.9423149970694, "mean_abs_error_last_50": 86.31796815114176, "mean_pred_prob": 0.055590875819325446, "mean_pred_prob_last_10": 0.2828943707048893, "mean_pred_prob_last_25": 0.14603686109185218, "mean_pred_prob_last_50": 0.09089985843747854, "mean_token_accuracy": 0.8842820048332214, "step": 52450 }, { "epoch": 0.932572485023021, "grad_norm": 1.9952527964945097, "learning_rate": 0.0001, "loss": 0.6031, "mean_abs_error": 951.1596757076934, "mean_abs_error_last_10": 499.8477128281376, "mean_abs_error_last_25": 583.9000562749604, "mean_abs_error_last_50": 724.4249787386677, "mean_pred_prob": 0.023942015104694292, "mean_pred_prob_last_10": 0.12733152582368348, "mean_pred_prob_last_25": 0.06926086790044791, "mean_pred_prob_last_50": 0.0410407215997111, "mean_token_accuracy": 0.8781299233436585, "step": 52460 }, { "epoch": 0.932750253319823, "grad_norm": 1.3898344334073731, "learning_rate": 0.0001, "loss": 0.7015, "mean_abs_error": 730.194626432768, "mean_abs_error_last_10": 228.48251479700426, "mean_abs_error_last_25": 259.44633870255905, "mean_abs_error_last_50": 362.5759607411268, "mean_pred_prob": 0.0293678147951141, "mean_pred_prob_last_10": 0.13688188451342284, "mean_pred_prob_last_25": 0.07586119172628969, "mean_pred_prob_last_50": 0.0480099958833307, "mean_token_accuracy": 0.8759670734405518, "step": 52470 }, { "epoch": 0.9329280216166249, "grad_norm": 1.3883845497529232, "learning_rate": 0.0001, "loss": 0.7708, "mean_abs_error": 358.34673927992696, "mean_abs_error_last_10": 88.49961581657651, "mean_abs_error_last_25": 123.60409322018143, "mean_abs_error_last_50": 204.51105322730402, "mean_pred_prob": 0.04691169185680337, "mean_pred_prob_last_10": 0.2380267939530313, "mean_pred_prob_last_25": 0.12772150069940835, "mean_pred_prob_last_50": 0.07806164257926866, "mean_token_accuracy": 0.8724275588989258, "step": 52480 }, { "epoch": 0.9331057899134269, "grad_norm": 2.29853352826959, "learning_rate": 0.0001, "loss": 0.6648, "mean_abs_error": 235.30636239292102, "mean_abs_error_last_10": 96.26157473042906, "mean_abs_error_last_25": 100.40119911901903, "mean_abs_error_last_50": 134.66869409469888, "mean_pred_prob": 0.04768908959813416, "mean_pred_prob_last_10": 0.19560646489262581, "mean_pred_prob_last_25": 0.11972853867337108, "mean_pred_prob_last_50": 0.07679995195940137, "mean_token_accuracy": 0.871781986951828, "step": 52490 }, { "epoch": 0.9332835582102288, "grad_norm": 1.2025776768657372, "learning_rate": 0.0001, "loss": 0.6666, "mean_abs_error": 186.8944785056396, "mean_abs_error_last_10": 69.56803196983165, "mean_abs_error_last_25": 85.41380466969659, "mean_abs_error_last_50": 132.53949991552435, "mean_pred_prob": 0.045184982009232044, "mean_pred_prob_last_10": 0.1908197969198227, "mean_pred_prob_last_25": 0.10790791139006614, "mean_pred_prob_last_50": 0.07142335902899503, "mean_token_accuracy": 0.8799140274524688, "step": 52500 }, { "epoch": 0.9334613265070307, "grad_norm": 1.1243662220385082, "learning_rate": 0.0001, "loss": 0.6035, "mean_abs_error": 1602.078496114555, "mean_abs_error_last_10": 985.6142979281855, "mean_abs_error_last_25": 1138.2751627839993, "mean_abs_error_last_50": 1298.8042250797832, "mean_pred_prob": 0.05018502365273889, "mean_pred_prob_last_10": 0.20894148956576827, "mean_pred_prob_last_25": 0.12785107870367937, "mean_pred_prob_last_50": 0.08172381407057401, "mean_token_accuracy": 0.8747966706752777, "step": 52510 }, { "epoch": 0.9336390948038327, "grad_norm": 1.2699142029142907, "learning_rate": 0.0001, "loss": 0.6934, "mean_abs_error": 447.50504256194097, "mean_abs_error_last_10": 281.45660164361027, "mean_abs_error_last_25": 313.06233147344517, "mean_abs_error_last_50": 347.75436529555384, "mean_pred_prob": 0.05671935623977333, "mean_pred_prob_last_10": 0.25021585561335086, "mean_pred_prob_last_25": 0.14767691474407912, "mean_pred_prob_last_50": 0.09121560389176012, "mean_token_accuracy": 0.8687150537967682, "step": 52520 }, { "epoch": 0.9338168631006346, "grad_norm": 1.9474346457322764, "learning_rate": 0.0001, "loss": 0.6225, "mean_abs_error": 523.1488140829239, "mean_abs_error_last_10": 92.10793475436935, "mean_abs_error_last_25": 149.39774757125082, "mean_abs_error_last_50": 279.9401436912194, "mean_pred_prob": 0.023754274658858775, "mean_pred_prob_last_10": 0.13545027896761894, "mean_pred_prob_last_25": 0.07066835071891546, "mean_pred_prob_last_50": 0.04042253303341568, "mean_token_accuracy": 0.865364956855774, "step": 52530 }, { "epoch": 0.9339946313974365, "grad_norm": 0.8200113510546078, "learning_rate": 0.0001, "loss": 0.7246, "mean_abs_error": 642.4381925613019, "mean_abs_error_last_10": 161.65145867359064, "mean_abs_error_last_25": 167.12598122139943, "mean_abs_error_last_50": 296.24380268728567, "mean_pred_prob": 0.03902892281766981, "mean_pred_prob_last_10": 0.18516902206465602, "mean_pred_prob_last_25": 0.10499841701239347, "mean_pred_prob_last_50": 0.06494919192045927, "mean_token_accuracy": 0.8659819006919861, "step": 52540 }, { "epoch": 0.9341723996942385, "grad_norm": 2.9056944206199886, "learning_rate": 0.0001, "loss": 0.7193, "mean_abs_error": 631.731022774566, "mean_abs_error_last_10": 256.78747458783346, "mean_abs_error_last_25": 308.42642912246635, "mean_abs_error_last_50": 420.8660158171666, "mean_pred_prob": 0.03069614421692677, "mean_pred_prob_last_10": 0.14303769170073793, "mean_pred_prob_last_25": 0.08161870571784675, "mean_pred_prob_last_50": 0.05014969787443988, "mean_token_accuracy": 0.8695140898227691, "step": 52550 }, { "epoch": 0.9343501679910404, "grad_norm": 1.2856421795964268, "learning_rate": 0.0001, "loss": 0.6181, "mean_abs_error": 152.36848865703348, "mean_abs_error_last_10": 22.996428676569188, "mean_abs_error_last_25": 44.30822183594305, "mean_abs_error_last_50": 88.23289800936273, "mean_pred_prob": 0.04768871432170272, "mean_pred_prob_last_10": 0.24083654582500458, "mean_pred_prob_last_25": 0.13038096204400063, "mean_pred_prob_last_50": 0.07888683192431926, "mean_token_accuracy": 0.8723736643791199, "step": 52560 }, { "epoch": 0.9345279362878425, "grad_norm": 1.5510204401436267, "learning_rate": 0.0001, "loss": 0.7974, "mean_abs_error": 365.5337000318476, "mean_abs_error_last_10": 85.8858741324993, "mean_abs_error_last_25": 132.99085288453023, "mean_abs_error_last_50": 245.41445763848066, "mean_pred_prob": 0.03341399668715894, "mean_pred_prob_last_10": 0.19321647174656392, "mean_pred_prob_last_25": 0.10252639977261424, "mean_pred_prob_last_50": 0.05828994526527822, "mean_token_accuracy": 0.8708197951316834, "step": 52570 }, { "epoch": 0.9347057045846444, "grad_norm": 1.3445074795729202, "learning_rate": 0.0001, "loss": 0.5367, "mean_abs_error": 810.5771234537526, "mean_abs_error_last_10": 333.58697049043155, "mean_abs_error_last_25": 377.1841354949704, "mean_abs_error_last_50": 523.9028564882053, "mean_pred_prob": 0.043503694684477526, "mean_pred_prob_last_10": 0.2012271507002879, "mean_pred_prob_last_25": 0.11412948664510622, "mean_pred_prob_last_50": 0.07204838651232422, "mean_token_accuracy": 0.8772378087043762, "step": 52580 }, { "epoch": 0.9348834728814464, "grad_norm": 2.6445511536008195, "learning_rate": 0.0001, "loss": 0.7489, "mean_abs_error": 730.2671758654855, "mean_abs_error_last_10": 343.14819116221383, "mean_abs_error_last_25": 391.3965534083492, "mean_abs_error_last_50": 513.704469192773, "mean_pred_prob": 0.041482424645801076, "mean_pred_prob_last_10": 0.18253843985148704, "mean_pred_prob_last_25": 0.10883928315597587, "mean_pred_prob_last_50": 0.06860997785697691, "mean_token_accuracy": 0.8786362767219543, "step": 52590 }, { "epoch": 0.9350612411782483, "grad_norm": 1.637370718086285, "learning_rate": 0.0001, "loss": 0.7796, "mean_abs_error": 349.98868613314335, "mean_abs_error_last_10": 86.33204459766998, "mean_abs_error_last_25": 147.33483575002225, "mean_abs_error_last_50": 237.3373055166422, "mean_pred_prob": 0.03795789752621204, "mean_pred_prob_last_10": 0.20321767423301934, "mean_pred_prob_last_25": 0.11436481000855565, "mean_pred_prob_last_50": 0.06661982000805437, "mean_token_accuracy": 0.8725276410579681, "step": 52600 }, { "epoch": 0.9352390094750502, "grad_norm": 1.7252096509482122, "learning_rate": 0.0001, "loss": 0.7158, "mean_abs_error": 1438.534475090158, "mean_abs_error_last_10": 970.6597728784775, "mean_abs_error_last_25": 1024.8328937232018, "mean_abs_error_last_50": 1146.291807512712, "mean_pred_prob": 0.03332149805282825, "mean_pred_prob_last_10": 0.15286471956060269, "mean_pred_prob_last_25": 0.08761601881997194, "mean_pred_prob_last_50": 0.05550106541486457, "mean_token_accuracy": 0.8723783612251281, "step": 52610 }, { "epoch": 0.9354167777718522, "grad_norm": 2.5126149303122487, "learning_rate": 0.0001, "loss": 0.6764, "mean_abs_error": 523.3110730621855, "mean_abs_error_last_10": 255.73853270003332, "mean_abs_error_last_25": 331.9903841858751, "mean_abs_error_last_50": 356.8735037138961, "mean_pred_prob": 0.03492119872244075, "mean_pred_prob_last_10": 0.16765024525811895, "mean_pred_prob_last_25": 0.09237631576834246, "mean_pred_prob_last_50": 0.057750880590174344, "mean_token_accuracy": 0.8680406332015991, "step": 52620 }, { "epoch": 0.9355945460686541, "grad_norm": 1.1639949273592034, "learning_rate": 0.0001, "loss": 0.6533, "mean_abs_error": 1101.2645780538649, "mean_abs_error_last_10": 633.475309124429, "mean_abs_error_last_25": 693.8341461170852, "mean_abs_error_last_50": 884.4384857515909, "mean_pred_prob": 0.021833088106359356, "mean_pred_prob_last_10": 0.10267034831922502, "mean_pred_prob_last_25": 0.05762885938165709, "mean_pred_prob_last_50": 0.03523166539089288, "mean_token_accuracy": 0.8650842607021332, "step": 52630 }, { "epoch": 0.935772314365456, "grad_norm": 2.0829965809299598, "learning_rate": 0.0001, "loss": 0.7323, "mean_abs_error": 1296.419837544076, "mean_abs_error_last_10": 753.3984435685314, "mean_abs_error_last_25": 900.0626176720264, "mean_abs_error_last_50": 1032.3857150676872, "mean_pred_prob": 0.03982782907842193, "mean_pred_prob_last_10": 0.13416587136161978, "mean_pred_prob_last_25": 0.0926385072816629, "mean_pred_prob_last_50": 0.06305268428986892, "mean_token_accuracy": 0.8598162233829498, "step": 52640 }, { "epoch": 0.935950082662258, "grad_norm": 0.8866511130991259, "learning_rate": 0.0001, "loss": 0.6214, "mean_abs_error": 550.6629427420777, "mean_abs_error_last_10": 115.31899406334564, "mean_abs_error_last_25": 153.2774420384469, "mean_abs_error_last_50": 281.3001672029958, "mean_pred_prob": 0.03793362466967665, "mean_pred_prob_last_10": 0.1884872333612293, "mean_pred_prob_last_25": 0.10163827739888802, "mean_pred_prob_last_50": 0.06214642166160047, "mean_token_accuracy": 0.8772833108901977, "step": 52650 }, { "epoch": 0.9361278509590599, "grad_norm": 2.842132837591492, "learning_rate": 0.0001, "loss": 0.6724, "mean_abs_error": 725.9920396580748, "mean_abs_error_last_10": 264.34621878790256, "mean_abs_error_last_25": 368.9888344946372, "mean_abs_error_last_50": 493.9258726773016, "mean_pred_prob": 0.017637849668972194, "mean_pred_prob_last_10": 0.09785787870641798, "mean_pred_prob_last_25": 0.05085939558921382, "mean_pred_prob_last_50": 0.030259103025309743, "mean_token_accuracy": 0.8700114607810974, "step": 52660 }, { "epoch": 0.9363056192558619, "grad_norm": 1.8120598304375166, "learning_rate": 0.0001, "loss": 0.7338, "mean_abs_error": 323.9129265158293, "mean_abs_error_last_10": 35.634616222646734, "mean_abs_error_last_25": 123.4229189399048, "mean_abs_error_last_50": 177.72636581066692, "mean_pred_prob": 0.04318308904767036, "mean_pred_prob_last_10": 0.19193430282175541, "mean_pred_prob_last_25": 0.11354064643383026, "mean_pred_prob_last_50": 0.07185306018218399, "mean_token_accuracy": 0.8716195285320282, "step": 52670 }, { "epoch": 0.9364833875526638, "grad_norm": 1.8295452864267994, "learning_rate": 0.0001, "loss": 0.9209, "mean_abs_error": 479.0791794029099, "mean_abs_error_last_10": 282.05820728023735, "mean_abs_error_last_25": 310.4649473029196, "mean_abs_error_last_50": 355.54301005958064, "mean_pred_prob": 0.034046314668376, "mean_pred_prob_last_10": 0.17053222404792906, "mean_pred_prob_last_25": 0.0937127479352057, "mean_pred_prob_last_50": 0.057277425983920695, "mean_token_accuracy": 0.8666980564594269, "step": 52680 }, { "epoch": 0.9366611558494659, "grad_norm": 2.0081099364491397, "learning_rate": 0.0001, "loss": 0.8374, "mean_abs_error": 766.0073621360425, "mean_abs_error_last_10": 165.8710687041268, "mean_abs_error_last_25": 163.0587353250388, "mean_abs_error_last_50": 307.2470317977389, "mean_pred_prob": 0.04453467393759638, "mean_pred_prob_last_10": 0.19962238487787545, "mean_pred_prob_last_25": 0.11742605164181441, "mean_pred_prob_last_50": 0.07337578542064875, "mean_token_accuracy": 0.8645504355430603, "step": 52690 }, { "epoch": 0.9368389241462678, "grad_norm": 1.4884234594074521, "learning_rate": 0.0001, "loss": 0.6524, "mean_abs_error": 997.2138465391438, "mean_abs_error_last_10": 565.5768047100037, "mean_abs_error_last_25": 570.3884496288945, "mean_abs_error_last_50": 703.3606010490637, "mean_pred_prob": 0.03411815977015067, "mean_pred_prob_last_10": 0.17117722292314283, "mean_pred_prob_last_25": 0.0944390351709444, "mean_pred_prob_last_50": 0.05807401058555115, "mean_token_accuracy": 0.8764729261398315, "step": 52700 }, { "epoch": 0.9370166924430697, "grad_norm": 2.4443011123362868, "learning_rate": 0.0001, "loss": 0.6326, "mean_abs_error": 587.7316731061165, "mean_abs_error_last_10": 221.24230674453906, "mean_abs_error_last_25": 357.3003454570968, "mean_abs_error_last_50": 475.2667196819605, "mean_pred_prob": 0.04137382532353513, "mean_pred_prob_last_10": 0.20081830619019456, "mean_pred_prob_last_25": 0.10478907696087844, "mean_pred_prob_last_50": 0.06567674400284887, "mean_token_accuracy": 0.8858836352825165, "step": 52710 }, { "epoch": 0.9371944607398717, "grad_norm": 1.1245363099751997, "learning_rate": 0.0001, "loss": 0.6661, "mean_abs_error": 209.4745207075911, "mean_abs_error_last_10": 31.140080246082352, "mean_abs_error_last_25": 63.0488258169652, "mean_abs_error_last_50": 101.9372385736995, "mean_pred_prob": 0.048989918734878304, "mean_pred_prob_last_10": 0.23971665576100348, "mean_pred_prob_last_25": 0.1338121986016631, "mean_pred_prob_last_50": 0.08201584350317717, "mean_token_accuracy": 0.8734806716442108, "step": 52720 }, { "epoch": 0.9373722290366736, "grad_norm": 1.847631774708207, "learning_rate": 0.0001, "loss": 0.6853, "mean_abs_error": 248.6336379569247, "mean_abs_error_last_10": 40.10144073626897, "mean_abs_error_last_25": 80.29774894906173, "mean_abs_error_last_50": 129.45839500872768, "mean_pred_prob": 0.05344263706356287, "mean_pred_prob_last_10": 0.2348103027790785, "mean_pred_prob_last_25": 0.13315740395337344, "mean_pred_prob_last_50": 0.08543412843719125, "mean_token_accuracy": 0.8884160816669464, "step": 52730 }, { "epoch": 0.9375499973334755, "grad_norm": 1.3935351341337154, "learning_rate": 0.0001, "loss": 0.6291, "mean_abs_error": 555.6323624930058, "mean_abs_error_last_10": 99.11471937984724, "mean_abs_error_last_25": 247.37054682956855, "mean_abs_error_last_50": 366.9799428100059, "mean_pred_prob": 0.047380965307820586, "mean_pred_prob_last_10": 0.21025430832523853, "mean_pred_prob_last_25": 0.12322365825530142, "mean_pred_prob_last_50": 0.07818095851689577, "mean_token_accuracy": 0.8816617608070374, "step": 52740 }, { "epoch": 0.9377277656302775, "grad_norm": 1.1374271210467908, "learning_rate": 0.0001, "loss": 0.5829, "mean_abs_error": 1791.432762250502, "mean_abs_error_last_10": 984.1865907293359, "mean_abs_error_last_25": 1117.8948796611935, "mean_abs_error_last_50": 1376.9559416444447, "mean_pred_prob": 0.03639984478140832, "mean_pred_prob_last_10": 0.17227014232921647, "mean_pred_prob_last_25": 0.09839676054834853, "mean_pred_prob_last_50": 0.06113565200357698, "mean_token_accuracy": 0.8848629355430603, "step": 52750 }, { "epoch": 0.9379055339270794, "grad_norm": 1.466100438938101, "learning_rate": 0.0001, "loss": 0.5806, "mean_abs_error": 210.68300761227118, "mean_abs_error_last_10": 37.149084610025525, "mean_abs_error_last_25": 85.80637396450848, "mean_abs_error_last_50": 101.92615098722999, "mean_pred_prob": 0.0577101016882807, "mean_pred_prob_last_10": 0.2722358115017414, "mean_pred_prob_last_25": 0.1552118444815278, "mean_pred_prob_last_50": 0.09608082789927722, "mean_token_accuracy": 0.876913434267044, "step": 52760 }, { "epoch": 0.9380833022238814, "grad_norm": 1.108878579150045, "learning_rate": 0.0001, "loss": 0.5117, "mean_abs_error": 378.9338534263592, "mean_abs_error_last_10": 118.06667993603189, "mean_abs_error_last_25": 171.9373118115206, "mean_abs_error_last_50": 266.2727901986983, "mean_pred_prob": 0.03467170521616936, "mean_pred_prob_last_10": 0.17294114790856838, "mean_pred_prob_last_25": 0.09395309481769801, "mean_pred_prob_last_50": 0.05798873463645578, "mean_token_accuracy": 0.8765658140182495, "step": 52770 }, { "epoch": 0.9382610705206833, "grad_norm": 1.937497271957663, "learning_rate": 0.0001, "loss": 0.6738, "mean_abs_error": 949.2286636008964, "mean_abs_error_last_10": 506.45043507788495, "mean_abs_error_last_25": 582.6952674952694, "mean_abs_error_last_50": 721.6349883069199, "mean_pred_prob": 0.05261235203652177, "mean_pred_prob_last_10": 0.24325494510121642, "mean_pred_prob_last_25": 0.13840154847421218, "mean_pred_prob_last_50": 0.08716929713409627, "mean_token_accuracy": 0.8734292328357697, "step": 52780 }, { "epoch": 0.9384388388174852, "grad_norm": 1.987484956084699, "learning_rate": 0.0001, "loss": 0.7164, "mean_abs_error": 296.17292713294756, "mean_abs_error_last_10": 102.42909323822832, "mean_abs_error_last_25": 149.93018847463608, "mean_abs_error_last_50": 204.0379260911904, "mean_pred_prob": 0.05875006935093552, "mean_pred_prob_last_10": 0.2680581819266081, "mean_pred_prob_last_25": 0.16024043885990977, "mean_pred_prob_last_50": 0.09920515799894929, "mean_token_accuracy": 0.8806515514850617, "step": 52790 }, { "epoch": 0.9386166071142872, "grad_norm": 1.684261235541228, "learning_rate": 0.0001, "loss": 0.627, "mean_abs_error": 835.2349898169075, "mean_abs_error_last_10": 455.62643581106187, "mean_abs_error_last_25": 514.7735882222958, "mean_abs_error_last_50": 636.0416959934671, "mean_pred_prob": 0.03604625237639993, "mean_pred_prob_last_10": 0.17266526377934496, "mean_pred_prob_last_25": 0.10231233806407544, "mean_pred_prob_last_50": 0.06045458006265107, "mean_token_accuracy": 0.8749271929264069, "step": 52800 }, { "epoch": 0.9387943754110892, "grad_norm": 1.4441394347106749, "learning_rate": 0.0001, "loss": 0.6843, "mean_abs_error": 138.52117895035195, "mean_abs_error_last_10": 25.76694011154043, "mean_abs_error_last_25": 50.6921895279781, "mean_abs_error_last_50": 79.8645072816878, "mean_pred_prob": 0.053266117349267006, "mean_pred_prob_last_10": 0.2621541522443295, "mean_pred_prob_last_25": 0.14479017965495586, "mean_pred_prob_last_50": 0.08933410868048668, "mean_token_accuracy": 0.876880693435669, "step": 52810 }, { "epoch": 0.9389721437078912, "grad_norm": 1.852639460494709, "learning_rate": 0.0001, "loss": 0.6155, "mean_abs_error": 494.01457806264114, "mean_abs_error_last_10": 165.35676409504316, "mean_abs_error_last_25": 194.95414347802415, "mean_abs_error_last_50": 268.9758535747125, "mean_pred_prob": 0.03198615799192339, "mean_pred_prob_last_10": 0.1530312129529193, "mean_pred_prob_last_25": 0.0865549762151204, "mean_pred_prob_last_50": 0.05354122701101005, "mean_token_accuracy": 0.8834848642349243, "step": 52820 }, { "epoch": 0.9391499120046931, "grad_norm": 1.0830755551338798, "learning_rate": 0.0001, "loss": 0.6672, "mean_abs_error": 681.5066274071622, "mean_abs_error_last_10": 225.63454074369952, "mean_abs_error_last_25": 343.1698236300075, "mean_abs_error_last_50": 601.1418287988349, "mean_pred_prob": 0.03410746571316849, "mean_pred_prob_last_10": 0.1624752617208287, "mean_pred_prob_last_25": 0.08967881572316401, "mean_pred_prob_last_50": 0.05574081654194742, "mean_token_accuracy": 0.8710758447647095, "step": 52830 }, { "epoch": 0.939327680301495, "grad_norm": 1.7639191553070652, "learning_rate": 0.0001, "loss": 0.8059, "mean_abs_error": 276.33571840233213, "mean_abs_error_last_10": 94.27789319550324, "mean_abs_error_last_25": 132.1044631693701, "mean_abs_error_last_50": 176.8161036110961, "mean_pred_prob": 0.04959015515632927, "mean_pred_prob_last_10": 0.23392531797289848, "mean_pred_prob_last_25": 0.13238559737801553, "mean_pred_prob_last_50": 0.08205573633313179, "mean_token_accuracy": 0.8617779791355134, "step": 52840 }, { "epoch": 0.939505448598297, "grad_norm": 1.91280437788822, "learning_rate": 0.0001, "loss": 0.7243, "mean_abs_error": 139.779917283928, "mean_abs_error_last_10": 23.8999570577362, "mean_abs_error_last_25": 64.94819139227016, "mean_abs_error_last_50": 94.3965855850614, "mean_pred_prob": 0.059092590538784864, "mean_pred_prob_last_10": 0.26605063751339914, "mean_pred_prob_last_25": 0.1542732620611787, "mean_pred_prob_last_50": 0.0960747710429132, "mean_token_accuracy": 0.8671692907810211, "step": 52850 }, { "epoch": 0.9396832168950989, "grad_norm": 1.636644405840634, "learning_rate": 0.0001, "loss": 0.7136, "mean_abs_error": 614.4846257747877, "mean_abs_error_last_10": 199.3919637336532, "mean_abs_error_last_25": 344.1336871923777, "mean_abs_error_last_50": 488.3110679272957, "mean_pred_prob": 0.04301724459510296, "mean_pred_prob_last_10": 0.19394117249757983, "mean_pred_prob_last_25": 0.10960369110107422, "mean_pred_prob_last_50": 0.0693695057212608, "mean_token_accuracy": 0.8689754068851471, "step": 52860 }, { "epoch": 0.9398609851919009, "grad_norm": 0.8253284965361859, "learning_rate": 0.0001, "loss": 0.6556, "mean_abs_error": 167.23362922082566, "mean_abs_error_last_10": 60.178947301222934, "mean_abs_error_last_25": 82.84887912239806, "mean_abs_error_last_50": 106.89375307032472, "mean_pred_prob": 0.056153090810403226, "mean_pred_prob_last_10": 0.26235019583255054, "mean_pred_prob_last_25": 0.15124553041532635, "mean_pred_prob_last_50": 0.09550499022006989, "mean_token_accuracy": 0.8772248089313507, "step": 52870 }, { "epoch": 0.9400387534887028, "grad_norm": 1.1999107022653768, "learning_rate": 0.0001, "loss": 0.6644, "mean_abs_error": 344.26353296216524, "mean_abs_error_last_10": 198.71650178080571, "mean_abs_error_last_25": 239.19545409103412, "mean_abs_error_last_50": 278.2145704290027, "mean_pred_prob": 0.029210198973305523, "mean_pred_prob_last_10": 0.1749593226239085, "mean_pred_prob_last_25": 0.08870900142937899, "mean_pred_prob_last_50": 0.05094234137795865, "mean_token_accuracy": 0.8570034861564636, "step": 52880 }, { "epoch": 0.9402165217855047, "grad_norm": 1.1009271303559804, "learning_rate": 0.0001, "loss": 0.7141, "mean_abs_error": 542.3850044469377, "mean_abs_error_last_10": 109.36151401308182, "mean_abs_error_last_25": 170.03241806928878, "mean_abs_error_last_50": 303.3842885212716, "mean_pred_prob": 0.04469560525030829, "mean_pred_prob_last_10": 0.24404535903595387, "mean_pred_prob_last_25": 0.12829144924180583, "mean_pred_prob_last_50": 0.07622358608059585, "mean_token_accuracy": 0.8737267553806305, "step": 52890 }, { "epoch": 0.9403942900823067, "grad_norm": 1.5843217342165397, "learning_rate": 0.0001, "loss": 0.7274, "mean_abs_error": 570.2879551057013, "mean_abs_error_last_10": 327.71021704053777, "mean_abs_error_last_25": 510.49373985291356, "mean_abs_error_last_50": 498.0046943692111, "mean_pred_prob": 0.028980229608714582, "mean_pred_prob_last_10": 0.14627714231610298, "mean_pred_prob_last_25": 0.07832886558026075, "mean_pred_prob_last_50": 0.04777769856154919, "mean_token_accuracy": 0.8636776745319367, "step": 52900 }, { "epoch": 0.9405720583791086, "grad_norm": 1.5550455561516487, "learning_rate": 0.0001, "loss": 0.6531, "mean_abs_error": 377.97781250549053, "mean_abs_error_last_10": 89.12780603586413, "mean_abs_error_last_25": 173.02531269300226, "mean_abs_error_last_50": 211.00115494332687, "mean_pred_prob": 0.04899240350350738, "mean_pred_prob_last_10": 0.21790289469063281, "mean_pred_prob_last_25": 0.12781843431293965, "mean_pred_prob_last_50": 0.08004251373931766, "mean_token_accuracy": 0.867671936750412, "step": 52910 }, { "epoch": 0.9407498266759107, "grad_norm": 2.4099530815883035, "learning_rate": 0.0001, "loss": 0.7705, "mean_abs_error": 911.83054272807, "mean_abs_error_last_10": 516.0430478141103, "mean_abs_error_last_25": 582.4275961831855, "mean_abs_error_last_50": 726.4923880636488, "mean_pred_prob": 0.032175378105603156, "mean_pred_prob_last_10": 0.16122683150751982, "mean_pred_prob_last_25": 0.0897934096865356, "mean_pred_prob_last_50": 0.05427695308171678, "mean_token_accuracy": 0.8631025373935699, "step": 52920 }, { "epoch": 0.9409275949727126, "grad_norm": 0.9127305141434432, "learning_rate": 0.0001, "loss": 0.6301, "mean_abs_error": 158.85875012411, "mean_abs_error_last_10": 42.72723102563444, "mean_abs_error_last_25": 78.87825078892536, "mean_abs_error_last_50": 108.4128215651854, "mean_pred_prob": 0.04535272680222988, "mean_pred_prob_last_10": 0.21170850843191147, "mean_pred_prob_last_25": 0.12094232551753521, "mean_pred_prob_last_50": 0.07460063342005015, "mean_token_accuracy": 0.88158278465271, "step": 52930 }, { "epoch": 0.9411053632695145, "grad_norm": 0.8449052720126443, "learning_rate": 0.0001, "loss": 0.8299, "mean_abs_error": 419.77098630776146, "mean_abs_error_last_10": 206.07709040487117, "mean_abs_error_last_25": 274.14919030249547, "mean_abs_error_last_50": 349.192143118015, "mean_pred_prob": 0.03971260405378416, "mean_pred_prob_last_10": 0.19105702885426581, "mean_pred_prob_last_25": 0.11074498856905848, "mean_pred_prob_last_50": 0.06740006739855744, "mean_token_accuracy": 0.8676135241985321, "step": 52940 }, { "epoch": 0.9412831315663165, "grad_norm": 1.0630344403146044, "learning_rate": 0.0001, "loss": 0.683, "mean_abs_error": 510.7323598931569, "mean_abs_error_last_10": 193.7055515590943, "mean_abs_error_last_25": 251.72463453114028, "mean_abs_error_last_50": 353.7538125854914, "mean_pred_prob": 0.03482154096127488, "mean_pred_prob_last_10": 0.1808038561255671, "mean_pred_prob_last_25": 0.09651054010028019, "mean_pred_prob_last_50": 0.05939563038409688, "mean_token_accuracy": 0.8684463322162628, "step": 52950 }, { "epoch": 0.9414608998631184, "grad_norm": 2.0119220737230963, "learning_rate": 0.0001, "loss": 0.8692, "mean_abs_error": 231.25854864603252, "mean_abs_error_last_10": 52.95907281433949, "mean_abs_error_last_25": 103.93842894207448, "mean_abs_error_last_50": 143.54107366952138, "mean_pred_prob": 0.03490392151288688, "mean_pred_prob_last_10": 0.1831297107040882, "mean_pred_prob_last_25": 0.09817200768738985, "mean_pred_prob_last_50": 0.059529855754226445, "mean_token_accuracy": 0.8664694368839264, "step": 52960 }, { "epoch": 0.9416386681599204, "grad_norm": 1.4207205481427554, "learning_rate": 0.0001, "loss": 0.7165, "mean_abs_error": 418.5504549621936, "mean_abs_error_last_10": 131.4053104515275, "mean_abs_error_last_25": 124.81946942892723, "mean_abs_error_last_50": 264.3782135064929, "mean_pred_prob": 0.04766076963860542, "mean_pred_prob_last_10": 0.23211801815778016, "mean_pred_prob_last_25": 0.12731790393590928, "mean_pred_prob_last_50": 0.07871926971711218, "mean_token_accuracy": 0.8712081372737884, "step": 52970 }, { "epoch": 0.9418164364567223, "grad_norm": 2.5494645873330084, "learning_rate": 0.0001, "loss": 0.7079, "mean_abs_error": 819.190434570862, "mean_abs_error_last_10": 541.7590336119839, "mean_abs_error_last_25": 608.5469509108506, "mean_abs_error_last_50": 679.5945118665795, "mean_pred_prob": 0.05799407499434892, "mean_pred_prob_last_10": 0.2646054140277556, "mean_pred_prob_last_25": 0.15151064388774102, "mean_pred_prob_last_50": 0.09539378526533256, "mean_token_accuracy": 0.8655229568481445, "step": 52980 }, { "epoch": 0.9419942047535242, "grad_norm": 1.4080808522790458, "learning_rate": 0.0001, "loss": 0.6399, "mean_abs_error": 114.26460104206387, "mean_abs_error_last_10": 28.09223129081825, "mean_abs_error_last_25": 69.03338119488316, "mean_abs_error_last_50": 101.657156655066, "mean_pred_prob": 0.05533711370080709, "mean_pred_prob_last_10": 0.27598224133253096, "mean_pred_prob_last_25": 0.15231467150151728, "mean_pred_prob_last_50": 0.09166655726730824, "mean_token_accuracy": 0.8758316814899445, "step": 52990 }, { "epoch": 0.9421719730503262, "grad_norm": 4.456571764910584, "learning_rate": 0.0001, "loss": 0.7084, "mean_abs_error": 413.81890451194033, "mean_abs_error_last_10": 120.02980275497217, "mean_abs_error_last_25": 130.16024346058452, "mean_abs_error_last_50": 224.81143884459462, "mean_pred_prob": 0.03936033704667352, "mean_pred_prob_last_10": 0.20777850928716363, "mean_pred_prob_last_25": 0.11344026292208582, "mean_pred_prob_last_50": 0.0661227137898095, "mean_token_accuracy": 0.876415240764618, "step": 53000 }, { "epoch": 0.9423497413471281, "grad_norm": 1.7614149156491385, "learning_rate": 0.0001, "loss": 0.7236, "mean_abs_error": 233.62805148620902, "mean_abs_error_last_10": 97.88048891740458, "mean_abs_error_last_25": 119.0821737018413, "mean_abs_error_last_50": 146.9832353654192, "mean_pred_prob": 0.04403220573440194, "mean_pred_prob_last_10": 0.18441304005682468, "mean_pred_prob_last_25": 0.11199362184852361, "mean_pred_prob_last_50": 0.07151306057348847, "mean_token_accuracy": 0.8737333953380585, "step": 53010 }, { "epoch": 0.9425275096439301, "grad_norm": 0.8168258480408964, "learning_rate": 0.0001, "loss": 0.5629, "mean_abs_error": 555.0809999295203, "mean_abs_error_last_10": 122.3986174403705, "mean_abs_error_last_25": 294.24613372746205, "mean_abs_error_last_50": 403.76258554120477, "mean_pred_prob": 0.04483307609334588, "mean_pred_prob_last_10": 0.19609807506203653, "mean_pred_prob_last_25": 0.11696406025439501, "mean_pred_prob_last_50": 0.07494444958865643, "mean_token_accuracy": 0.8855299293994904, "step": 53020 }, { "epoch": 0.942705277940732, "grad_norm": 2.2785939754964435, "learning_rate": 0.0001, "loss": 0.8034, "mean_abs_error": 452.20526761465743, "mean_abs_error_last_10": 168.04081617484204, "mean_abs_error_last_25": 292.18051543636545, "mean_abs_error_last_50": 276.1642180906185, "mean_pred_prob": 0.035791000025346874, "mean_pred_prob_last_10": 0.18849359191954135, "mean_pred_prob_last_25": 0.10417220834642649, "mean_pred_prob_last_50": 0.06066949544474483, "mean_token_accuracy": 0.8680926859378815, "step": 53030 }, { "epoch": 0.942883046237534, "grad_norm": 2.200217395092037, "learning_rate": 0.0001, "loss": 0.6976, "mean_abs_error": 192.36598478221353, "mean_abs_error_last_10": 59.23711099522475, "mean_abs_error_last_25": 102.17320916415144, "mean_abs_error_last_50": 136.64342245332148, "mean_pred_prob": 0.04254777603782713, "mean_pred_prob_last_10": 0.21539078131318093, "mean_pred_prob_last_25": 0.11239103190600871, "mean_pred_prob_last_50": 0.06809640722349286, "mean_token_accuracy": 0.8746632158756256, "step": 53040 }, { "epoch": 0.943060814534336, "grad_norm": 1.7643187795942674, "learning_rate": 0.0001, "loss": 0.6681, "mean_abs_error": 174.31501372841055, "mean_abs_error_last_10": 22.758390259125722, "mean_abs_error_last_25": 55.71788666327026, "mean_abs_error_last_50": 94.75073068994473, "mean_pred_prob": 0.04961115322075784, "mean_pred_prob_last_10": 0.23846536949276925, "mean_pred_prob_last_25": 0.13463406935334205, "mean_pred_prob_last_50": 0.08202601419761776, "mean_token_accuracy": 0.8734089195728302, "step": 53050 }, { "epoch": 0.9432385828311379, "grad_norm": 1.7042604068703116, "learning_rate": 0.0001, "loss": 0.6309, "mean_abs_error": 1012.9661689771916, "mean_abs_error_last_10": 505.63725940070435, "mean_abs_error_last_25": 666.494046392265, "mean_abs_error_last_50": 755.0044417903744, "mean_pred_prob": 0.045950281201658075, "mean_pred_prob_last_10": 0.19872818349977023, "mean_pred_prob_last_25": 0.11923943489964586, "mean_pred_prob_last_50": 0.0754937477700878, "mean_token_accuracy": 0.8668420732021331, "step": 53060 }, { "epoch": 0.9434163511279399, "grad_norm": 1.2748525966152595, "learning_rate": 0.0001, "loss": 0.7445, "mean_abs_error": 294.3951898767891, "mean_abs_error_last_10": 91.72061649166389, "mean_abs_error_last_25": 99.75565473851356, "mean_abs_error_last_50": 157.83863470427332, "mean_pred_prob": 0.05680094435811043, "mean_pred_prob_last_10": 0.23138473220169545, "mean_pred_prob_last_25": 0.14203870790079237, "mean_pred_prob_last_50": 0.0912330309394747, "mean_token_accuracy": 0.8636643469333649, "step": 53070 }, { "epoch": 0.9435941194247418, "grad_norm": 2.2107489808495915, "learning_rate": 0.0001, "loss": 0.7397, "mean_abs_error": 348.98572460953164, "mean_abs_error_last_10": 103.06462989474092, "mean_abs_error_last_25": 99.27569976224868, "mean_abs_error_last_50": 209.84427366775563, "mean_pred_prob": 0.02993982145562768, "mean_pred_prob_last_10": 0.1608455153182149, "mean_pred_prob_last_25": 0.08714277418330311, "mean_pred_prob_last_50": 0.05134258610196411, "mean_token_accuracy": 0.8766298413276672, "step": 53080 }, { "epoch": 0.9437718877215437, "grad_norm": 1.1977387389004304, "learning_rate": 0.0001, "loss": 0.9148, "mean_abs_error": 715.2078164741276, "mean_abs_error_last_10": 320.4069374875119, "mean_abs_error_last_25": 389.51973751534905, "mean_abs_error_last_50": 492.48753942481414, "mean_pred_prob": 0.03250450462801382, "mean_pred_prob_last_10": 0.1584120413288474, "mean_pred_prob_last_25": 0.08694950570352375, "mean_pred_prob_last_50": 0.05345724151702598, "mean_token_accuracy": 0.8714947640895844, "step": 53090 }, { "epoch": 0.9439496560183457, "grad_norm": 1.6004755653668232, "learning_rate": 0.0001, "loss": 0.793, "mean_abs_error": 1448.570010926554, "mean_abs_error_last_10": 752.9713290105748, "mean_abs_error_last_25": 873.9929517887483, "mean_abs_error_last_50": 1094.6290286579986, "mean_pred_prob": 0.030310427020594943, "mean_pred_prob_last_10": 0.1533592658044654, "mean_pred_prob_last_25": 0.08073155868769391, "mean_pred_prob_last_50": 0.049940384984074625, "mean_token_accuracy": 0.8700929820537567, "step": 53100 }, { "epoch": 0.9441274243151476, "grad_norm": 0.9630133875930138, "learning_rate": 0.0001, "loss": 0.6859, "mean_abs_error": 110.42255767916349, "mean_abs_error_last_10": 25.22113583061455, "mean_abs_error_last_25": 34.64971556637894, "mean_abs_error_last_50": 59.434741544360705, "mean_pred_prob": 0.05113600948825479, "mean_pred_prob_last_10": 0.24617682993412018, "mean_pred_prob_last_25": 0.13670994006097317, "mean_pred_prob_last_50": 0.08496960084885359, "mean_token_accuracy": 0.8761510014533996, "step": 53110 }, { "epoch": 0.9443051926119496, "grad_norm": 1.395380046996993, "learning_rate": 0.0001, "loss": 0.6316, "mean_abs_error": 243.42132070946167, "mean_abs_error_last_10": 44.079944380857526, "mean_abs_error_last_25": 69.96603969876362, "mean_abs_error_last_50": 116.3682181224492, "mean_pred_prob": 0.052667011274024844, "mean_pred_prob_last_10": 0.2338475851342082, "mean_pred_prob_last_25": 0.1377042275853455, "mean_pred_prob_last_50": 0.08671793616376818, "mean_token_accuracy": 0.8760425329208374, "step": 53120 }, { "epoch": 0.9444829609087515, "grad_norm": 0.8218457292976173, "learning_rate": 0.0001, "loss": 0.7048, "mean_abs_error": 415.89014959464475, "mean_abs_error_last_10": 271.1915077900187, "mean_abs_error_last_25": 294.00423518018385, "mean_abs_error_last_50": 311.781003435469, "mean_pred_prob": 0.03493268826277927, "mean_pred_prob_last_10": 0.17805211020167916, "mean_pred_prob_last_25": 0.09965990252094344, "mean_pred_prob_last_50": 0.05978216140065342, "mean_token_accuracy": 0.8710773169994355, "step": 53130 }, { "epoch": 0.9446607292055534, "grad_norm": 0.9924207810328861, "learning_rate": 0.0001, "loss": 0.784, "mean_abs_error": 385.74920364618424, "mean_abs_error_last_10": 211.9677747367019, "mean_abs_error_last_25": 303.46390250525366, "mean_abs_error_last_50": 293.80252467202695, "mean_pred_prob": 0.03302674832521006, "mean_pred_prob_last_10": 0.16836257567629218, "mean_pred_prob_last_25": 0.08809800185263157, "mean_pred_prob_last_50": 0.05429181504296139, "mean_token_accuracy": 0.8734624266624451, "step": 53140 }, { "epoch": 0.9448384975023554, "grad_norm": 2.4975653740430626, "learning_rate": 0.0001, "loss": 0.7036, "mean_abs_error": 419.2976237864203, "mean_abs_error_last_10": 119.06257247899357, "mean_abs_error_last_25": 214.98933632920415, "mean_abs_error_last_50": 263.01578748712836, "mean_pred_prob": 0.041258975467644635, "mean_pred_prob_last_10": 0.2013787694275379, "mean_pred_prob_last_25": 0.11305177742615342, "mean_pred_prob_last_50": 0.06915382770821452, "mean_token_accuracy": 0.8727107524871827, "step": 53150 }, { "epoch": 0.9450162657991574, "grad_norm": 1.6660554912555683, "learning_rate": 0.0001, "loss": 0.6976, "mean_abs_error": 163.14789919598576, "mean_abs_error_last_10": 21.529790972874615, "mean_abs_error_last_25": 49.68330200812385, "mean_abs_error_last_50": 89.73172859283464, "mean_pred_prob": 0.04177229506894946, "mean_pred_prob_last_10": 0.20850137025117874, "mean_pred_prob_last_25": 0.11485767383128405, "mean_pred_prob_last_50": 0.07109400564804673, "mean_token_accuracy": 0.8781534254550933, "step": 53160 }, { "epoch": 0.9451940340959594, "grad_norm": 1.2837137683307491, "learning_rate": 0.0001, "loss": 0.6943, "mean_abs_error": 293.5205673580498, "mean_abs_error_last_10": 151.09151014390775, "mean_abs_error_last_25": 155.51159521127494, "mean_abs_error_last_50": 199.21762996247259, "mean_pred_prob": 0.03203071195166558, "mean_pred_prob_last_10": 0.15727170165628196, "mean_pred_prob_last_25": 0.08818820752203464, "mean_pred_prob_last_50": 0.053635295014828444, "mean_token_accuracy": 0.8691239953041077, "step": 53170 }, { "epoch": 0.9453718023927613, "grad_norm": 1.5208320343089743, "learning_rate": 0.0001, "loss": 0.7514, "mean_abs_error": 100.66624964847738, "mean_abs_error_last_10": 39.54286737457235, "mean_abs_error_last_25": 44.48894514092248, "mean_abs_error_last_50": 68.61215089413629, "mean_pred_prob": 0.05580288646742702, "mean_pred_prob_last_10": 0.2751376435160637, "mean_pred_prob_last_25": 0.15332133360207081, "mean_pred_prob_last_50": 0.09384737098589539, "mean_token_accuracy": 0.8643050372600556, "step": 53180 }, { "epoch": 0.9455495706895632, "grad_norm": 1.3552592945264983, "learning_rate": 0.0001, "loss": 0.6271, "mean_abs_error": 292.55977280783634, "mean_abs_error_last_10": 170.70960280617018, "mean_abs_error_last_25": 176.9241056008024, "mean_abs_error_last_50": 226.71239273406468, "mean_pred_prob": 0.04031744679668918, "mean_pred_prob_last_10": 0.2017265069996938, "mean_pred_prob_last_25": 0.1078066486865282, "mean_pred_prob_last_50": 0.06627405104227364, "mean_token_accuracy": 0.8742642045021057, "step": 53190 }, { "epoch": 0.9457273389863652, "grad_norm": 1.0590973762070213, "learning_rate": 0.0001, "loss": 0.6243, "mean_abs_error": 242.17130572650507, "mean_abs_error_last_10": 38.102672843355435, "mean_abs_error_last_25": 67.29027764716932, "mean_abs_error_last_50": 146.18034060022666, "mean_pred_prob": 0.04695842727087438, "mean_pred_prob_last_10": 0.22874052301049233, "mean_pred_prob_last_25": 0.12479207888245583, "mean_pred_prob_last_50": 0.07623316105455161, "mean_token_accuracy": 0.8708303391933441, "step": 53200 }, { "epoch": 0.9459051072831671, "grad_norm": 1.3085175895495087, "learning_rate": 0.0001, "loss": 0.7714, "mean_abs_error": 224.66571290914743, "mean_abs_error_last_10": 37.11095795662923, "mean_abs_error_last_25": 79.23469441430912, "mean_abs_error_last_50": 147.26802910818748, "mean_pred_prob": 0.0347399995662272, "mean_pred_prob_last_10": 0.18993089459836482, "mean_pred_prob_last_25": 0.09987680856138467, "mean_pred_prob_last_50": 0.05945225870236755, "mean_token_accuracy": 0.877625697851181, "step": 53210 }, { "epoch": 0.9460828755799691, "grad_norm": 1.7495706430634481, "learning_rate": 0.0001, "loss": 0.6465, "mean_abs_error": 1331.4999577056174, "mean_abs_error_last_10": 370.2231430306292, "mean_abs_error_last_25": 687.4039149931741, "mean_abs_error_last_50": 910.6519783779452, "mean_pred_prob": 0.02283379887812771, "mean_pred_prob_last_10": 0.11254079104983247, "mean_pred_prob_last_25": 0.06100969192921184, "mean_pred_prob_last_50": 0.03815204149577767, "mean_token_accuracy": 0.8658448576927185, "step": 53220 }, { "epoch": 0.946260643876771, "grad_norm": 2.02104683756048, "learning_rate": 0.0001, "loss": 0.7492, "mean_abs_error": 886.0476920924591, "mean_abs_error_last_10": 587.229669284563, "mean_abs_error_last_25": 630.5395499806457, "mean_abs_error_last_50": 704.8109766916886, "mean_pred_prob": 0.03397957233100897, "mean_pred_prob_last_10": 0.16889751889102628, "mean_pred_prob_last_25": 0.09431959734647535, "mean_pred_prob_last_50": 0.05737983692670241, "mean_token_accuracy": 0.8829652845859528, "step": 53230 }, { "epoch": 0.9464384121735729, "grad_norm": 1.5277069205734548, "learning_rate": 0.0001, "loss": 0.7962, "mean_abs_error": 1105.3935386103403, "mean_abs_error_last_10": 718.0660566623562, "mean_abs_error_last_25": 803.5914174881535, "mean_abs_error_last_50": 904.8171236695491, "mean_pred_prob": 0.03770622434967663, "mean_pred_prob_last_10": 0.1847977054596413, "mean_pred_prob_last_25": 0.10205629655683879, "mean_pred_prob_last_50": 0.06240745682734996, "mean_token_accuracy": 0.8778432667255401, "step": 53240 }, { "epoch": 0.9466161804703749, "grad_norm": 2.575345890819308, "learning_rate": 0.0001, "loss": 0.5904, "mean_abs_error": 64.16142332421664, "mean_abs_error_last_10": 6.687365708160307, "mean_abs_error_last_25": 16.882251116913707, "mean_abs_error_last_50": 32.063433264048726, "mean_pred_prob": 0.0748607436195016, "mean_pred_prob_last_10": 0.33403370082378386, "mean_pred_prob_last_25": 0.20141934864223004, "mean_pred_prob_last_50": 0.1258180920034647, "mean_token_accuracy": 0.8786619663238525, "step": 53250 }, { "epoch": 0.9467939487671768, "grad_norm": 2.033053180335178, "learning_rate": 0.0001, "loss": 0.6687, "mean_abs_error": 507.13070466531207, "mean_abs_error_last_10": 130.60004117220836, "mean_abs_error_last_25": 167.56097408494796, "mean_abs_error_last_50": 326.02695450878304, "mean_pred_prob": 0.026118461950682105, "mean_pred_prob_last_10": 0.12923042138572782, "mean_pred_prob_last_25": 0.07066348176449537, "mean_pred_prob_last_50": 0.043599733978044244, "mean_token_accuracy": 0.8747955381870269, "step": 53260 }, { "epoch": 0.9469717170639788, "grad_norm": 1.3476563650730642, "learning_rate": 0.0001, "loss": 0.6581, "mean_abs_error": 525.2425081268643, "mean_abs_error_last_10": 149.2699159420528, "mean_abs_error_last_25": 216.80828604654283, "mean_abs_error_last_50": 310.33696857487377, "mean_pred_prob": 0.03946251331362873, "mean_pred_prob_last_10": 0.20300199181074277, "mean_pred_prob_last_25": 0.10879349575843662, "mean_pred_prob_last_50": 0.06434255031053908, "mean_token_accuracy": 0.8702628195285798, "step": 53270 }, { "epoch": 0.9471494853607808, "grad_norm": 1.6078078948423573, "learning_rate": 0.0001, "loss": 0.714, "mean_abs_error": 383.4814005611209, "mean_abs_error_last_10": 103.35987622816597, "mean_abs_error_last_25": 147.33664160132716, "mean_abs_error_last_50": 240.31242529101146, "mean_pred_prob": 0.0358624461106956, "mean_pred_prob_last_10": 0.17674681898206474, "mean_pred_prob_last_25": 0.09843664355576039, "mean_pred_prob_last_50": 0.0599459926597774, "mean_token_accuracy": 0.8806404232978821, "step": 53280 }, { "epoch": 0.9473272536575827, "grad_norm": 1.1899578083774838, "learning_rate": 0.0001, "loss": 0.6796, "mean_abs_error": 511.26136835730205, "mean_abs_error_last_10": 160.3545219242846, "mean_abs_error_last_25": 211.0812516357455, "mean_abs_error_last_50": 304.52749518962844, "mean_pred_prob": 0.029178386926651, "mean_pred_prob_last_10": 0.13778192792087793, "mean_pred_prob_last_25": 0.0818450435064733, "mean_pred_prob_last_50": 0.050129322707653044, "mean_token_accuracy": 0.8764696836471557, "step": 53290 }, { "epoch": 0.9475050219543847, "grad_norm": 1.5063531473029916, "learning_rate": 0.0001, "loss": 0.716, "mean_abs_error": 492.5108186301119, "mean_abs_error_last_10": 129.29710198327263, "mean_abs_error_last_25": 219.71694368571193, "mean_abs_error_last_50": 269.48546551553386, "mean_pred_prob": 0.026351140812039375, "mean_pred_prob_last_10": 0.13594805779866875, "mean_pred_prob_last_25": 0.07138552470132709, "mean_pred_prob_last_50": 0.04347184963990003, "mean_token_accuracy": 0.8750389575958252, "step": 53300 }, { "epoch": 0.9476827902511866, "grad_norm": 1.5794578180052097, "learning_rate": 0.0001, "loss": 0.7632, "mean_abs_error": 136.58267470294427, "mean_abs_error_last_10": 27.495166186067117, "mean_abs_error_last_25": 52.493423295733066, "mean_abs_error_last_50": 109.56831389202566, "mean_pred_prob": 0.0604375695809722, "mean_pred_prob_last_10": 0.2970812402665615, "mean_pred_prob_last_25": 0.16993784960359334, "mean_pred_prob_last_50": 0.10209417510777712, "mean_token_accuracy": 0.8765833437442779, "step": 53310 }, { "epoch": 0.9478605585479886, "grad_norm": 1.1019052237898679, "learning_rate": 0.0001, "loss": 0.7187, "mean_abs_error": 371.3114011000142, "mean_abs_error_last_10": 133.35313555935915, "mean_abs_error_last_25": 196.0938927976521, "mean_abs_error_last_50": 214.9915347310101, "mean_pred_prob": 0.03057742943055928, "mean_pred_prob_last_10": 0.1580137053504586, "mean_pred_prob_last_25": 0.08540523778647184, "mean_pred_prob_last_50": 0.051966573623940346, "mean_token_accuracy": 0.8671374917030334, "step": 53320 }, { "epoch": 0.9480383268447905, "grad_norm": 1.4079031163933864, "learning_rate": 0.0001, "loss": 0.6643, "mean_abs_error": 786.3433041184419, "mean_abs_error_last_10": 311.90257250373656, "mean_abs_error_last_25": 393.1199110797626, "mean_abs_error_last_50": 538.2637617351484, "mean_pred_prob": 0.03677216052165022, "mean_pred_prob_last_10": 0.17942607208970002, "mean_pred_prob_last_25": 0.09907488554599694, "mean_pred_prob_last_50": 0.0612443652091315, "mean_token_accuracy": 0.8744159162044525, "step": 53330 }, { "epoch": 0.9482160951415924, "grad_norm": 1.658648109439084, "learning_rate": 0.0001, "loss": 0.6702, "mean_abs_error": 225.49165312719424, "mean_abs_error_last_10": 98.3546735192501, "mean_abs_error_last_25": 202.4351110532861, "mean_abs_error_last_50": 200.66955864233327, "mean_pred_prob": 0.046169677097350356, "mean_pred_prob_last_10": 0.22942171469330788, "mean_pred_prob_last_25": 0.12423550868406892, "mean_pred_prob_last_50": 0.07631116891279817, "mean_token_accuracy": 0.8744646847248078, "step": 53340 }, { "epoch": 0.9483938634383944, "grad_norm": 1.2686554590951011, "learning_rate": 0.0001, "loss": 0.5755, "mean_abs_error": 296.7683638399436, "mean_abs_error_last_10": 116.19197995659405, "mean_abs_error_last_25": 117.2062455026819, "mean_abs_error_last_50": 166.33057393720767, "mean_pred_prob": 0.04014613644685596, "mean_pred_prob_last_10": 0.18587482068687677, "mean_pred_prob_last_25": 0.10613483600318432, "mean_pred_prob_last_50": 0.06590232769958675, "mean_token_accuracy": 0.8840035676956177, "step": 53350 }, { "epoch": 0.9485716317351963, "grad_norm": 1.3946016785362327, "learning_rate": 0.0001, "loss": 0.7165, "mean_abs_error": 1013.2062133010692, "mean_abs_error_last_10": 530.4721171831378, "mean_abs_error_last_25": 620.6541093423517, "mean_abs_error_last_50": 692.1254685998666, "mean_pred_prob": 0.02728692966629751, "mean_pred_prob_last_10": 0.140328580775531, "mean_pred_prob_last_25": 0.07492995444918052, "mean_pred_prob_last_50": 0.04555143345496617, "mean_token_accuracy": 0.8709835469722748, "step": 53360 }, { "epoch": 0.9487494000319983, "grad_norm": 2.364504923706111, "learning_rate": 0.0001, "loss": 0.6944, "mean_abs_error": 791.7876574260006, "mean_abs_error_last_10": 279.86680095181646, "mean_abs_error_last_25": 355.7111383009063, "mean_abs_error_last_50": 547.0475705511043, "mean_pred_prob": 0.01926820516237058, "mean_pred_prob_last_10": 0.09524998719571158, "mean_pred_prob_last_25": 0.05028432698454708, "mean_pred_prob_last_50": 0.031452721008099614, "mean_token_accuracy": 0.8760893166065216, "step": 53370 }, { "epoch": 0.9489271683288002, "grad_norm": 1.1597070161620004, "learning_rate": 0.0001, "loss": 0.7696, "mean_abs_error": 442.93916661171863, "mean_abs_error_last_10": 110.48687129790903, "mean_abs_error_last_25": 184.58971709428107, "mean_abs_error_last_50": 242.3500486478569, "mean_pred_prob": 0.021802205685526134, "mean_pred_prob_last_10": 0.12886314708739519, "mean_pred_prob_last_25": 0.06762362206354737, "mean_pred_prob_last_50": 0.0386349443346262, "mean_token_accuracy": 0.8681913197040558, "step": 53380 }, { "epoch": 0.9491049366256021, "grad_norm": 1.0578811832768233, "learning_rate": 0.0001, "loss": 0.762, "mean_abs_error": 1441.4204244301245, "mean_abs_error_last_10": 870.4011586911232, "mean_abs_error_last_25": 1002.405254074422, "mean_abs_error_last_50": 1188.8227854587572, "mean_pred_prob": 0.04059808179808897, "mean_pred_prob_last_10": 0.1905125846969895, "mean_pred_prob_last_25": 0.10878103239228949, "mean_pred_prob_last_50": 0.06813826659636106, "mean_token_accuracy": 0.8673587262630462, "step": 53390 }, { "epoch": 0.9492827049224042, "grad_norm": 2.011681614520965, "learning_rate": 0.0001, "loss": 0.6444, "mean_abs_error": 322.9477004536817, "mean_abs_error_last_10": 164.66391546893996, "mean_abs_error_last_25": 165.7670029704316, "mean_abs_error_last_50": 216.30337325651448, "mean_pred_prob": 0.0426543164299801, "mean_pred_prob_last_10": 0.20946224899962546, "mean_pred_prob_last_25": 0.11783732241019607, "mean_pred_prob_last_50": 0.0727369848638773, "mean_token_accuracy": 0.8717495143413544, "step": 53400 }, { "epoch": 0.9494604732192061, "grad_norm": 1.2479718810824718, "learning_rate": 0.0001, "loss": 0.6566, "mean_abs_error": 599.2255102554328, "mean_abs_error_last_10": 277.3874500637048, "mean_abs_error_last_25": 358.87817730042786, "mean_abs_error_last_50": 446.8952502639878, "mean_pred_prob": 0.05203054706798867, "mean_pred_prob_last_10": 0.26483628888963723, "mean_pred_prob_last_25": 0.1448247116699349, "mean_pred_prob_last_50": 0.08762168654357083, "mean_token_accuracy": 0.8721472322940826, "step": 53410 }, { "epoch": 0.9496382415160081, "grad_norm": 1.2524186252307707, "learning_rate": 0.0001, "loss": 0.6726, "mean_abs_error": 482.3778785864741, "mean_abs_error_last_10": 213.46712970949187, "mean_abs_error_last_25": 241.6967374739129, "mean_abs_error_last_50": 301.09111666603314, "mean_pred_prob": 0.03517111723776907, "mean_pred_prob_last_10": 0.16975229419767857, "mean_pred_prob_last_25": 0.09495765655301511, "mean_pred_prob_last_50": 0.058178938133642076, "mean_token_accuracy": 0.868551242351532, "step": 53420 }, { "epoch": 0.94981600981281, "grad_norm": 1.549237607045422, "learning_rate": 0.0001, "loss": 0.5789, "mean_abs_error": 240.11997635527254, "mean_abs_error_last_10": 96.87974678071994, "mean_abs_error_last_25": 114.68543891040486, "mean_abs_error_last_50": 148.89015811925245, "mean_pred_prob": 0.05008419892401435, "mean_pred_prob_last_10": 0.25666547666769474, "mean_pred_prob_last_25": 0.14237611389253288, "mean_pred_prob_last_50": 0.08664474941324443, "mean_token_accuracy": 0.8824544608592987, "step": 53430 }, { "epoch": 0.9499937781096119, "grad_norm": 0.9730444886532226, "learning_rate": 0.0001, "loss": 0.6583, "mean_abs_error": 327.05933720259515, "mean_abs_error_last_10": 110.01539061627314, "mean_abs_error_last_25": 145.12102575334757, "mean_abs_error_last_50": 205.0091053894177, "mean_pred_prob": 0.026132327411323784, "mean_pred_prob_last_10": 0.1273138852789998, "mean_pred_prob_last_25": 0.06920221606269479, "mean_pred_prob_last_50": 0.04328528437763453, "mean_token_accuracy": 0.8786717414855957, "step": 53440 }, { "epoch": 0.9501715464064139, "grad_norm": 0.9413542853053837, "learning_rate": 0.0001, "loss": 0.6751, "mean_abs_error": 549.2810287702647, "mean_abs_error_last_10": 191.94893389572417, "mean_abs_error_last_25": 216.60500325303065, "mean_abs_error_last_50": 325.4165044321393, "mean_pred_prob": 0.04689289575326257, "mean_pred_prob_last_10": 0.23063062883447855, "mean_pred_prob_last_25": 0.12426502282032743, "mean_pred_prob_last_50": 0.07671953382669017, "mean_token_accuracy": 0.8768605709075927, "step": 53450 }, { "epoch": 0.9503493147032158, "grad_norm": 0.8074670690312035, "learning_rate": 0.0001, "loss": 0.6369, "mean_abs_error": 178.8221216070888, "mean_abs_error_last_10": 31.049579495988105, "mean_abs_error_last_25": 52.023339700693384, "mean_abs_error_last_50": 94.01307892148627, "mean_pred_prob": 0.049505960242822764, "mean_pred_prob_last_10": 0.2412735890597105, "mean_pred_prob_last_25": 0.13347506225109101, "mean_pred_prob_last_50": 0.08115931674838066, "mean_token_accuracy": 0.8683453917503356, "step": 53460 }, { "epoch": 0.9505270830000178, "grad_norm": 1.891231111426997, "learning_rate": 0.0001, "loss": 0.7748, "mean_abs_error": 381.3786039095091, "mean_abs_error_last_10": 271.1431457679484, "mean_abs_error_last_25": 262.21996419916184, "mean_abs_error_last_50": 328.3325594471848, "mean_pred_prob": 0.04525228459388018, "mean_pred_prob_last_10": 0.2205303158611059, "mean_pred_prob_last_25": 0.12218349725008011, "mean_pred_prob_last_50": 0.07524688919074833, "mean_token_accuracy": 0.8727588534355164, "step": 53470 }, { "epoch": 0.9507048512968197, "grad_norm": 1.5609856742613015, "learning_rate": 0.0001, "loss": 0.7499, "mean_abs_error": 395.3928239906786, "mean_abs_error_last_10": 156.81948314388111, "mean_abs_error_last_25": 240.85937716334155, "mean_abs_error_last_50": 251.83329857350336, "mean_pred_prob": 0.03186354064382613, "mean_pred_prob_last_10": 0.1405833525583148, "mean_pred_prob_last_25": 0.07960048420354723, "mean_pred_prob_last_50": 0.050952296750620005, "mean_token_accuracy": 0.8748343050479889, "step": 53480 }, { "epoch": 0.9508826195936216, "grad_norm": 1.793314779852843, "learning_rate": 0.0001, "loss": 0.696, "mean_abs_error": 974.9291693392266, "mean_abs_error_last_10": 597.896908042498, "mean_abs_error_last_25": 686.4781252896685, "mean_abs_error_last_50": 802.7579081120455, "mean_pred_prob": 0.041773658552847336, "mean_pred_prob_last_10": 0.20735581180197188, "mean_pred_prob_last_25": 0.11396933575742878, "mean_pred_prob_last_50": 0.0701838695895276, "mean_token_accuracy": 0.8804826319217682, "step": 53490 }, { "epoch": 0.9510603878904236, "grad_norm": 3.167922421079378, "learning_rate": 0.0001, "loss": 0.6699, "mean_abs_error": 286.3721413512664, "mean_abs_error_last_10": 90.55245423891219, "mean_abs_error_last_25": 104.91180615341139, "mean_abs_error_last_50": 178.1968280752073, "mean_pred_prob": 0.05119325057603419, "mean_pred_prob_last_10": 0.2269717391580343, "mean_pred_prob_last_25": 0.135174710303545, "mean_pred_prob_last_50": 0.0835838452912867, "mean_token_accuracy": 0.862732207775116, "step": 53500 }, { "epoch": 0.9512381561872256, "grad_norm": 1.8593896218058124, "learning_rate": 0.0001, "loss": 0.625, "mean_abs_error": 378.28492886036065, "mean_abs_error_last_10": 95.3207160158089, "mean_abs_error_last_25": 144.7413247466624, "mean_abs_error_last_50": 195.2272962567627, "mean_pred_prob": 0.041107123531401155, "mean_pred_prob_last_10": 0.1858108475804329, "mean_pred_prob_last_25": 0.11038874192163348, "mean_pred_prob_last_50": 0.06933335680514574, "mean_token_accuracy": 0.8813934206962586, "step": 53510 }, { "epoch": 0.9514159244840276, "grad_norm": 1.3185588472022307, "learning_rate": 0.0001, "loss": 0.6177, "mean_abs_error": 1059.6415225734906, "mean_abs_error_last_10": 645.0334265306235, "mean_abs_error_last_25": 664.9636595560175, "mean_abs_error_last_50": 744.3169713673454, "mean_pred_prob": 0.03411908912821673, "mean_pred_prob_last_10": 0.17778693207073956, "mean_pred_prob_last_25": 0.09844716343795881, "mean_pred_prob_last_50": 0.05853076079802122, "mean_token_accuracy": 0.8683198690414429, "step": 53520 }, { "epoch": 0.9515936927808295, "grad_norm": 0.9937647133162805, "learning_rate": 0.0001, "loss": 0.6239, "mean_abs_error": 358.21766649988024, "mean_abs_error_last_10": 250.995459922017, "mean_abs_error_last_25": 270.44897273402563, "mean_abs_error_last_50": 279.8897226278898, "mean_pred_prob": 0.025547164701856674, "mean_pred_prob_last_10": 0.11508639570092782, "mean_pred_prob_last_25": 0.06533039006171748, "mean_pred_prob_last_50": 0.04094879296608269, "mean_token_accuracy": 0.8729295253753662, "step": 53530 }, { "epoch": 0.9517714610776314, "grad_norm": 1.5417886002736498, "learning_rate": 0.0001, "loss": 0.6181, "mean_abs_error": 464.36666203876166, "mean_abs_error_last_10": 106.4337082112417, "mean_abs_error_last_25": 143.54674222451644, "mean_abs_error_last_50": 250.65972993473142, "mean_pred_prob": 0.040424506517592815, "mean_pred_prob_last_10": 0.19783823672914877, "mean_pred_prob_last_25": 0.11502793856197968, "mean_pred_prob_last_50": 0.06931199722457677, "mean_token_accuracy": 0.8777325987815857, "step": 53540 }, { "epoch": 0.9519492293744334, "grad_norm": 0.7326128298576707, "learning_rate": 0.0001, "loss": 0.7083, "mean_abs_error": 550.0335086426132, "mean_abs_error_last_10": 97.38216388160988, "mean_abs_error_last_25": 160.6145180222254, "mean_abs_error_last_50": 312.3991733010112, "mean_pred_prob": 0.031679425982292744, "mean_pred_prob_last_10": 0.17827689519617707, "mean_pred_prob_last_25": 0.09301941861631349, "mean_pred_prob_last_50": 0.05485111712478101, "mean_token_accuracy": 0.8674405813217163, "step": 53550 }, { "epoch": 0.9521269976712353, "grad_norm": 1.2862392138178929, "learning_rate": 0.0001, "loss": 0.6224, "mean_abs_error": 609.885498911953, "mean_abs_error_last_10": 305.24161056270316, "mean_abs_error_last_25": 296.23484719654317, "mean_abs_error_last_50": 399.776698033584, "mean_pred_prob": 0.027698237530421464, "mean_pred_prob_last_10": 0.135449752304703, "mean_pred_prob_last_25": 0.07371658597839996, "mean_pred_prob_last_50": 0.04586633343133144, "mean_token_accuracy": 0.8641330182552338, "step": 53560 }, { "epoch": 0.9523047659680373, "grad_norm": 1.1699010990695287, "learning_rate": 0.0001, "loss": 0.569, "mean_abs_error": 448.6725653986094, "mean_abs_error_last_10": 115.65424865778623, "mean_abs_error_last_25": 182.92950059681388, "mean_abs_error_last_50": 254.08608136039578, "mean_pred_prob": 0.028770315856672824, "mean_pred_prob_last_10": 0.1527949849725701, "mean_pred_prob_last_25": 0.08156753967050463, "mean_pred_prob_last_50": 0.048571704974165186, "mean_token_accuracy": 0.8842590093612671, "step": 53570 }, { "epoch": 0.9524825342648392, "grad_norm": 2.142766282722855, "learning_rate": 0.0001, "loss": 0.6979, "mean_abs_error": 341.3128403159957, "mean_abs_error_last_10": 92.46235214917134, "mean_abs_error_last_25": 128.30792619674187, "mean_abs_error_last_50": 196.4589757903904, "mean_pred_prob": 0.04122574864304625, "mean_pred_prob_last_10": 0.1995885100448504, "mean_pred_prob_last_25": 0.11244433515239508, "mean_pred_prob_last_50": 0.06943147156853229, "mean_token_accuracy": 0.8805175602436066, "step": 53580 }, { "epoch": 0.9526603025616411, "grad_norm": 1.246895830104243, "learning_rate": 0.0001, "loss": 0.5848, "mean_abs_error": 144.01221311249503, "mean_abs_error_last_10": 30.47433191587652, "mean_abs_error_last_25": 48.874310880799044, "mean_abs_error_last_50": 87.27907401823244, "mean_pred_prob": 0.04762165257707238, "mean_pred_prob_last_10": 0.21472388096153736, "mean_pred_prob_last_25": 0.12498910799622535, "mean_pred_prob_last_50": 0.07833784222602844, "mean_token_accuracy": 0.871870219707489, "step": 53590 }, { "epoch": 0.9528380708584431, "grad_norm": 1.6508580845024947, "learning_rate": 0.0001, "loss": 0.7116, "mean_abs_error": 625.063755653377, "mean_abs_error_last_10": 148.95033430794578, "mean_abs_error_last_25": 191.0187841528506, "mean_abs_error_last_50": 354.6983930084405, "mean_pred_prob": 0.02893148995935917, "mean_pred_prob_last_10": 0.15788745788158848, "mean_pred_prob_last_25": 0.0858110069297254, "mean_pred_prob_last_50": 0.05055687838466838, "mean_token_accuracy": 0.868424779176712, "step": 53600 }, { "epoch": 0.953015839155245, "grad_norm": 2.5280173735470584, "learning_rate": 0.0001, "loss": 0.8445, "mean_abs_error": 1040.2527157344418, "mean_abs_error_last_10": 456.5451194537287, "mean_abs_error_last_25": 543.0612155383475, "mean_abs_error_last_50": 719.8132326573215, "mean_pred_prob": 0.028940473656984978, "mean_pred_prob_last_10": 0.12784386564744638, "mean_pred_prob_last_25": 0.07627854109450709, "mean_pred_prob_last_50": 0.04695492396713234, "mean_token_accuracy": 0.8684332072734833, "step": 53610 }, { "epoch": 0.953193607452047, "grad_norm": 3.0398823357160714, "learning_rate": 0.0001, "loss": 0.7821, "mean_abs_error": 1121.2909982321562, "mean_abs_error_last_10": 620.7258505888746, "mean_abs_error_last_25": 725.6636067732104, "mean_abs_error_last_50": 883.018220011325, "mean_pred_prob": 0.03499614500178723, "mean_pred_prob_last_10": 0.15953924533096142, "mean_pred_prob_last_25": 0.08791288342035841, "mean_pred_prob_last_50": 0.05616700279933866, "mean_token_accuracy": 0.8759231388568878, "step": 53620 }, { "epoch": 0.953371375748849, "grad_norm": 1.3930556473192555, "learning_rate": 0.0001, "loss": 0.7005, "mean_abs_error": 744.7457065004616, "mean_abs_error_last_10": 294.9509508844959, "mean_abs_error_last_25": 326.6268814627483, "mean_abs_error_last_50": 421.9910505737127, "mean_pred_prob": 0.019582168979104607, "mean_pred_prob_last_10": 0.09942083534551785, "mean_pred_prob_last_25": 0.05061459422577173, "mean_pred_prob_last_50": 0.03156376793049276, "mean_token_accuracy": 0.8758973956108094, "step": 53630 }, { "epoch": 0.9535491440456509, "grad_norm": 1.2661546974512616, "learning_rate": 0.0001, "loss": 0.8317, "mean_abs_error": 535.3564185439645, "mean_abs_error_last_10": 104.42209805927935, "mean_abs_error_last_25": 172.541815132672, "mean_abs_error_last_50": 290.18327571750945, "mean_pred_prob": 0.03728475690586493, "mean_pred_prob_last_10": 0.17931009365711362, "mean_pred_prob_last_25": 0.09989545093849302, "mean_pred_prob_last_50": 0.06127951222006232, "mean_token_accuracy": 0.8725330471992493, "step": 53640 }, { "epoch": 0.9537269123424529, "grad_norm": 1.4674422543909094, "learning_rate": 0.0001, "loss": 0.6209, "mean_abs_error": 301.0780505419655, "mean_abs_error_last_10": 139.64907113286966, "mean_abs_error_last_25": 185.14849041599786, "mean_abs_error_last_50": 264.41356203533195, "mean_pred_prob": 0.03481761170551181, "mean_pred_prob_last_10": 0.17187913320958614, "mean_pred_prob_last_25": 0.09123629275709391, "mean_pred_prob_last_50": 0.05640265177935362, "mean_token_accuracy": 0.8794193089008331, "step": 53650 }, { "epoch": 0.9539046806392548, "grad_norm": 1.8065066224429562, "learning_rate": 0.0001, "loss": 0.658, "mean_abs_error": 371.5392374466972, "mean_abs_error_last_10": 182.66028117036643, "mean_abs_error_last_25": 161.22893878050718, "mean_abs_error_last_50": 221.4724005267244, "mean_pred_prob": 0.055487057607388125, "mean_pred_prob_last_10": 0.2522819845238701, "mean_pred_prob_last_25": 0.14882946144207382, "mean_pred_prob_last_50": 0.0911983636149671, "mean_token_accuracy": 0.8696528673171997, "step": 53660 }, { "epoch": 0.9540824489360568, "grad_norm": 1.3446417456287756, "learning_rate": 0.0001, "loss": 0.6869, "mean_abs_error": 679.1933595845355, "mean_abs_error_last_10": 302.63393160941604, "mean_abs_error_last_25": 354.23881098614544, "mean_abs_error_last_50": 508.87027334368196, "mean_pred_prob": 0.02486230553477071, "mean_pred_prob_last_10": 0.13164755010511725, "mean_pred_prob_last_25": 0.07043102484894917, "mean_pred_prob_last_50": 0.04225082385819405, "mean_token_accuracy": 0.860759311914444, "step": 53670 }, { "epoch": 0.9542602172328587, "grad_norm": 2.853346493679441, "learning_rate": 0.0001, "loss": 0.782, "mean_abs_error": 501.8445148806328, "mean_abs_error_last_10": 178.9722826457208, "mean_abs_error_last_25": 243.90171952988968, "mean_abs_error_last_50": 346.26420231661484, "mean_pred_prob": 0.0346916557115037, "mean_pred_prob_last_10": 0.1607343253446743, "mean_pred_prob_last_25": 0.09213743184809572, "mean_pred_prob_last_50": 0.05666489223949611, "mean_token_accuracy": 0.870807409286499, "step": 53680 }, { "epoch": 0.9544379855296606, "grad_norm": 1.7787988937244383, "learning_rate": 0.0001, "loss": 0.8026, "mean_abs_error": 287.8320184462188, "mean_abs_error_last_10": 84.01726841733432, "mean_abs_error_last_25": 151.82438001253203, "mean_abs_error_last_50": 199.04142592755085, "mean_pred_prob": 0.038666658522561195, "mean_pred_prob_last_10": 0.19017384303733706, "mean_pred_prob_last_25": 0.10888561634346842, "mean_pred_prob_last_50": 0.06583106368780137, "mean_token_accuracy": 0.8728593230247498, "step": 53690 }, { "epoch": 0.9546157538264626, "grad_norm": 1.949425242477083, "learning_rate": 0.0001, "loss": 0.63, "mean_abs_error": 167.1732438832566, "mean_abs_error_last_10": 37.319296351277806, "mean_abs_error_last_25": 56.05846291062535, "mean_abs_error_last_50": 93.92988991643844, "mean_pred_prob": 0.04509039930999279, "mean_pred_prob_last_10": 0.23889820277690887, "mean_pred_prob_last_25": 0.1272789128124714, "mean_pred_prob_last_50": 0.07588704116642475, "mean_token_accuracy": 0.8839849531650543, "step": 53700 }, { "epoch": 0.9547935221232645, "grad_norm": 3.5038017978411466, "learning_rate": 0.0001, "loss": 0.6478, "mean_abs_error": 350.67652648489286, "mean_abs_error_last_10": 46.071819224848, "mean_abs_error_last_25": 78.76448966864315, "mean_abs_error_last_50": 166.11523888325067, "mean_pred_prob": 0.05142461904324591, "mean_pred_prob_last_10": 0.2535276226699352, "mean_pred_prob_last_25": 0.1432699934579432, "mean_pred_prob_last_50": 0.08809632891789079, "mean_token_accuracy": 0.8820353865623474, "step": 53710 }, { "epoch": 0.9549712904200665, "grad_norm": 1.5296548496986118, "learning_rate": 0.0001, "loss": 0.6067, "mean_abs_error": 159.11150131604296, "mean_abs_error_last_10": 45.780772982971115, "mean_abs_error_last_25": 96.8082706193101, "mean_abs_error_last_50": 132.75197201635805, "mean_pred_prob": 0.03979315827600658, "mean_pred_prob_last_10": 0.20708695910871028, "mean_pred_prob_last_25": 0.10875399522483349, "mean_pred_prob_last_50": 0.06506842514500022, "mean_token_accuracy": 0.8678942143917083, "step": 53720 }, { "epoch": 0.9551490587168684, "grad_norm": 1.860719914752315, "learning_rate": 0.0001, "loss": 0.6416, "mean_abs_error": 181.93693662332737, "mean_abs_error_last_10": 43.07044379147786, "mean_abs_error_last_25": 67.78880771391819, "mean_abs_error_last_50": 141.20432880113952, "mean_pred_prob": 0.05148799973540008, "mean_pred_prob_last_10": 0.24188262820243836, "mean_pred_prob_last_25": 0.13619303153827786, "mean_pred_prob_last_50": 0.08457192601636052, "mean_token_accuracy": 0.880197674036026, "step": 53730 }, { "epoch": 0.9553268270136703, "grad_norm": 2.201113661747975, "learning_rate": 0.0001, "loss": 0.6865, "mean_abs_error": 382.1183760908166, "mean_abs_error_last_10": 129.80999153758953, "mean_abs_error_last_25": 178.74177738442444, "mean_abs_error_last_50": 218.15287425560683, "mean_pred_prob": 0.04373869677074253, "mean_pred_prob_last_10": 0.19634785389062018, "mean_pred_prob_last_25": 0.11534508976619691, "mean_pred_prob_last_50": 0.07302214869996533, "mean_token_accuracy": 0.8725520133972168, "step": 53740 }, { "epoch": 0.9555045953104724, "grad_norm": 1.1253749709986085, "learning_rate": 0.0001, "loss": 0.6912, "mean_abs_error": 465.46534398157735, "mean_abs_error_last_10": 480.5822115716648, "mean_abs_error_last_25": 382.0439339539762, "mean_abs_error_last_50": 361.3827079454301, "mean_pred_prob": 0.040966891293646765, "mean_pred_prob_last_10": 0.19390918720746414, "mean_pred_prob_last_25": 0.11245106031419709, "mean_pred_prob_last_50": 0.06871385957929306, "mean_token_accuracy": 0.8631093800067902, "step": 53750 }, { "epoch": 0.9556823636072743, "grad_norm": 1.7765519350339827, "learning_rate": 0.0001, "loss": 0.7443, "mean_abs_error": 342.9316396207108, "mean_abs_error_last_10": 215.3706117230857, "mean_abs_error_last_25": 248.36017805713033, "mean_abs_error_last_50": 292.25708919007377, "mean_pred_prob": 0.03338288315571845, "mean_pred_prob_last_10": 0.16743288873694837, "mean_pred_prob_last_25": 0.09197045295732095, "mean_pred_prob_last_50": 0.055539865570608526, "mean_token_accuracy": 0.872582197189331, "step": 53760 }, { "epoch": 0.9558601319040763, "grad_norm": 1.3850108688288467, "learning_rate": 0.0001, "loss": 0.6738, "mean_abs_error": 781.1864036213701, "mean_abs_error_last_10": 232.73870614132116, "mean_abs_error_last_25": 292.92640286704415, "mean_abs_error_last_50": 456.31610634441705, "mean_pred_prob": 0.023144003248307853, "mean_pred_prob_last_10": 0.11293667189311236, "mean_pred_prob_last_25": 0.06251485794782638, "mean_pred_prob_last_50": 0.0391692406963557, "mean_token_accuracy": 0.8748797833919525, "step": 53770 }, { "epoch": 0.9560379002008782, "grad_norm": 1.8597675520036325, "learning_rate": 0.0001, "loss": 0.6094, "mean_abs_error": 271.1914012631505, "mean_abs_error_last_10": 67.23769867083845, "mean_abs_error_last_25": 82.9712662130005, "mean_abs_error_last_50": 136.19662399367513, "mean_pred_prob": 0.04400852164253592, "mean_pred_prob_last_10": 0.2078293412923813, "mean_pred_prob_last_25": 0.12104188036173583, "mean_pred_prob_last_50": 0.07512816963717342, "mean_token_accuracy": 0.8790005803108215, "step": 53780 }, { "epoch": 0.9562156684976801, "grad_norm": 1.0932403976819074, "learning_rate": 0.0001, "loss": 0.7248, "mean_abs_error": 324.5417606340376, "mean_abs_error_last_10": 152.06142252304326, "mean_abs_error_last_25": 159.36038517132587, "mean_abs_error_last_50": 184.9733168663402, "mean_pred_prob": 0.0359845639904961, "mean_pred_prob_last_10": 0.1653635261580348, "mean_pred_prob_last_25": 0.09174861172214151, "mean_pred_prob_last_50": 0.057789949141442774, "mean_token_accuracy": 0.867991304397583, "step": 53790 }, { "epoch": 0.9563934367944821, "grad_norm": 1.3518782581245978, "learning_rate": 0.0001, "loss": 0.808, "mean_abs_error": 230.54116799064877, "mean_abs_error_last_10": 89.57987782183423, "mean_abs_error_last_25": 119.60661142150704, "mean_abs_error_last_50": 165.7681750745549, "mean_pred_prob": 0.0433552669826895, "mean_pred_prob_last_10": 0.1880820695310831, "mean_pred_prob_last_25": 0.11852578055113554, "mean_pred_prob_last_50": 0.07479053679853678, "mean_token_accuracy": 0.8722662508487702, "step": 53800 }, { "epoch": 0.956571205091284, "grad_norm": 1.5212331317335013, "learning_rate": 0.0001, "loss": 0.6959, "mean_abs_error": 477.6142136591258, "mean_abs_error_last_10": 169.88242464053894, "mean_abs_error_last_25": 214.9492471549923, "mean_abs_error_last_50": 312.1174198708147, "mean_pred_prob": 0.03139003699179739, "mean_pred_prob_last_10": 0.15136673640226944, "mean_pred_prob_last_25": 0.08396390435518697, "mean_pred_prob_last_50": 0.05195486301090568, "mean_token_accuracy": 0.8581181645393372, "step": 53810 }, { "epoch": 0.956748973388086, "grad_norm": 1.3335748622517967, "learning_rate": 0.0001, "loss": 0.5016, "mean_abs_error": 294.989576753041, "mean_abs_error_last_10": 100.59286789546631, "mean_abs_error_last_25": 100.31765372429484, "mean_abs_error_last_50": 139.45987070757363, "mean_pred_prob": 0.058235592860728505, "mean_pred_prob_last_10": 0.26616831198334695, "mean_pred_prob_last_25": 0.15252879103645683, "mean_pred_prob_last_50": 0.09685816778801382, "mean_token_accuracy": 0.8886556982994079, "step": 53820 }, { "epoch": 0.9569267416848879, "grad_norm": 2.0438553440624427, "learning_rate": 0.0001, "loss": 0.6717, "mean_abs_error": 101.44490581402695, "mean_abs_error_last_10": 32.03187607521494, "mean_abs_error_last_25": 34.52517884188574, "mean_abs_error_last_50": 52.01113764067916, "mean_pred_prob": 0.06337877633050085, "mean_pred_prob_last_10": 0.28517217859625815, "mean_pred_prob_last_25": 0.169348450563848, "mean_pred_prob_last_50": 0.10568468607962131, "mean_token_accuracy": 0.8802948653697967, "step": 53830 }, { "epoch": 0.9571045099816898, "grad_norm": 1.617086739451048, "learning_rate": 0.0001, "loss": 0.5967, "mean_abs_error": 876.5533582991359, "mean_abs_error_last_10": 538.2207452147258, "mean_abs_error_last_25": 587.2435443900364, "mean_abs_error_last_50": 661.7845439501164, "mean_pred_prob": 0.046580506129248536, "mean_pred_prob_last_10": 0.19965281046461314, "mean_pred_prob_last_25": 0.11954349156294483, "mean_pred_prob_last_50": 0.07497911833052058, "mean_token_accuracy": 0.8773967206478119, "step": 53840 }, { "epoch": 0.9572822782784918, "grad_norm": 1.4990075103728797, "learning_rate": 0.0001, "loss": 0.6438, "mean_abs_error": 1043.193050657163, "mean_abs_error_last_10": 235.32722563091335, "mean_abs_error_last_25": 366.6225176079489, "mean_abs_error_last_50": 529.4130923309486, "mean_pred_prob": 0.02674337775679305, "mean_pred_prob_last_10": 0.12404130091890693, "mean_pred_prob_last_25": 0.06854330956703052, "mean_pred_prob_last_50": 0.04265777358668856, "mean_token_accuracy": 0.872847193479538, "step": 53850 }, { "epoch": 0.9574600465752937, "grad_norm": 1.613004729850552, "learning_rate": 0.0001, "loss": 0.612, "mean_abs_error": 129.91979257457322, "mean_abs_error_last_10": 35.02908217830591, "mean_abs_error_last_25": 54.17744767438984, "mean_abs_error_last_50": 78.07426287138729, "mean_pred_prob": 0.05538811129517853, "mean_pred_prob_last_10": 0.2627783242613077, "mean_pred_prob_last_25": 0.15103891361504793, "mean_pred_prob_last_50": 0.09275914561003447, "mean_token_accuracy": 0.8762876093387604, "step": 53860 }, { "epoch": 0.9576378148720958, "grad_norm": 1.2604780759258962, "learning_rate": 0.0001, "loss": 0.5936, "mean_abs_error": 292.8228653855191, "mean_abs_error_last_10": 85.94862892605471, "mean_abs_error_last_25": 192.20838189827694, "mean_abs_error_last_50": 214.44788937465796, "mean_pred_prob": 0.04339921521022916, "mean_pred_prob_last_10": 0.18304369710385798, "mean_pred_prob_last_25": 0.10687912749126553, "mean_pred_prob_last_50": 0.06948209046386182, "mean_token_accuracy": 0.8759011864662171, "step": 53870 }, { "epoch": 0.9578155831688977, "grad_norm": 1.3978825318893497, "learning_rate": 0.0001, "loss": 0.6993, "mean_abs_error": 382.6781104471308, "mean_abs_error_last_10": 86.43620074185256, "mean_abs_error_last_25": 145.2433827797068, "mean_abs_error_last_50": 185.8622330559883, "mean_pred_prob": 0.0497420575004071, "mean_pred_prob_last_10": 0.24785585635108873, "mean_pred_prob_last_25": 0.13726835665293038, "mean_pred_prob_last_50": 0.08434304100228474, "mean_token_accuracy": 0.8659790515899658, "step": 53880 }, { "epoch": 0.9579933514656996, "grad_norm": 2.169041472609504, "learning_rate": 0.0001, "loss": 0.6527, "mean_abs_error": 163.65582851530522, "mean_abs_error_last_10": 52.54661875386216, "mean_abs_error_last_25": 130.20209614032268, "mean_abs_error_last_50": 127.65115625771243, "mean_pred_prob": 0.06814528640825301, "mean_pred_prob_last_10": 0.29497375413775445, "mean_pred_prob_last_25": 0.17292805798351765, "mean_pred_prob_last_50": 0.11165381325408816, "mean_token_accuracy": 0.8725402474403381, "step": 53890 }, { "epoch": 0.9581711197625016, "grad_norm": 1.2863305921488888, "learning_rate": 0.0001, "loss": 0.5907, "mean_abs_error": 222.60716216872635, "mean_abs_error_last_10": 54.683593872983025, "mean_abs_error_last_25": 100.3916699423304, "mean_abs_error_last_50": 152.63356702211087, "mean_pred_prob": 0.0419897835701704, "mean_pred_prob_last_10": 0.20567948408424855, "mean_pred_prob_last_25": 0.1149121692404151, "mean_pred_prob_last_50": 0.06991463964805007, "mean_token_accuracy": 0.8690062761306763, "step": 53900 }, { "epoch": 0.9583488880593035, "grad_norm": 1.3287880980504228, "learning_rate": 0.0001, "loss": 0.787, "mean_abs_error": 685.1388380290509, "mean_abs_error_last_10": 218.7125658045947, "mean_abs_error_last_25": 476.46686267919597, "mean_abs_error_last_50": 548.1995195647129, "mean_pred_prob": 0.036402600636938585, "mean_pred_prob_last_10": 0.17516019589966164, "mean_pred_prob_last_25": 0.0987931122072041, "mean_pred_prob_last_50": 0.059660364175215364, "mean_token_accuracy": 0.8715901076793671, "step": 53910 }, { "epoch": 0.9585266563561055, "grad_norm": 0.8890703762672421, "learning_rate": 0.0001, "loss": 0.6283, "mean_abs_error": 270.1028450270598, "mean_abs_error_last_10": 67.99710364141744, "mean_abs_error_last_25": 118.09580735123295, "mean_abs_error_last_50": 177.2344056024679, "mean_pred_prob": 0.040046728774905206, "mean_pred_prob_last_10": 0.18905588202178478, "mean_pred_prob_last_25": 0.10837236493825912, "mean_pred_prob_last_50": 0.0668531246483326, "mean_token_accuracy": 0.8730068325996398, "step": 53920 }, { "epoch": 0.9587044246529074, "grad_norm": 2.0773221079667716, "learning_rate": 0.0001, "loss": 0.708, "mean_abs_error": 1014.3609543500825, "mean_abs_error_last_10": 439.4984099013576, "mean_abs_error_last_25": 624.572682858274, "mean_abs_error_last_50": 872.425440970763, "mean_pred_prob": 0.03530957758484874, "mean_pred_prob_last_10": 0.17888136254041456, "mean_pred_prob_last_25": 0.09701138552336488, "mean_pred_prob_last_50": 0.05832478849042673, "mean_token_accuracy": 0.8621422052383423, "step": 53930 }, { "epoch": 0.9588821929497093, "grad_norm": 2.5193865906180832, "learning_rate": 0.0001, "loss": 0.8312, "mean_abs_error": 638.652600890322, "mean_abs_error_last_10": 216.86918189579356, "mean_abs_error_last_25": 291.94630691671364, "mean_abs_error_last_50": 429.16156487714096, "mean_pred_prob": 0.04317475462448783, "mean_pred_prob_last_10": 0.19753331480314956, "mean_pred_prob_last_25": 0.11391461452003568, "mean_pred_prob_last_50": 0.07169421995058656, "mean_token_accuracy": 0.868390679359436, "step": 53940 }, { "epoch": 0.9590599612465113, "grad_norm": 1.417973072957544, "learning_rate": 0.0001, "loss": 0.7928, "mean_abs_error": 1273.3218595254968, "mean_abs_error_last_10": 633.1833073759243, "mean_abs_error_last_25": 717.4432536864675, "mean_abs_error_last_50": 927.1783121689621, "mean_pred_prob": 0.028962881381448823, "mean_pred_prob_last_10": 0.14557798132300376, "mean_pred_prob_last_25": 0.07816083183570299, "mean_pred_prob_last_50": 0.04820337941928301, "mean_token_accuracy": 0.8659505903720855, "step": 53950 }, { "epoch": 0.9592377295433132, "grad_norm": 2.2269076941980606, "learning_rate": 0.0001, "loss": 0.6451, "mean_abs_error": 430.57951674553794, "mean_abs_error_last_10": 140.72244991050033, "mean_abs_error_last_25": 144.64046734790287, "mean_abs_error_last_50": 207.8824902383023, "mean_pred_prob": 0.0579642889846582, "mean_pred_prob_last_10": 0.2664880821714178, "mean_pred_prob_last_25": 0.15439346903003753, "mean_pred_prob_last_50": 0.09607660063775256, "mean_token_accuracy": 0.8800108730792999, "step": 53960 }, { "epoch": 0.9594154978401152, "grad_norm": 2.122421693435703, "learning_rate": 0.0001, "loss": 0.8364, "mean_abs_error": 390.3465451923594, "mean_abs_error_last_10": 200.1255798766502, "mean_abs_error_last_25": 242.0904506096054, "mean_abs_error_last_50": 281.6848322302379, "mean_pred_prob": 0.03727998055983335, "mean_pred_prob_last_10": 0.18207956224214286, "mean_pred_prob_last_25": 0.10046321747358888, "mean_pred_prob_last_50": 0.06121948435902595, "mean_token_accuracy": 0.8672247648239135, "step": 53970 }, { "epoch": 0.9595932661369171, "grad_norm": 2.5566831665238867, "learning_rate": 0.0001, "loss": 0.6407, "mean_abs_error": 403.63278992100993, "mean_abs_error_last_10": 201.14927095188696, "mean_abs_error_last_25": 317.5907760731117, "mean_abs_error_last_50": 367.1821052562878, "mean_pred_prob": 0.045446519053075465, "mean_pred_prob_last_10": 0.20654415369499474, "mean_pred_prob_last_25": 0.12042438879143447, "mean_pred_prob_last_50": 0.07506681319791823, "mean_token_accuracy": 0.8713564574718475, "step": 53980 }, { "epoch": 0.9597710344337191, "grad_norm": 0.8710940488522616, "learning_rate": 0.0001, "loss": 0.6536, "mean_abs_error": 433.27005747241867, "mean_abs_error_last_10": 80.97248304592459, "mean_abs_error_last_25": 158.61028160503596, "mean_abs_error_last_50": 254.70663540166782, "mean_pred_prob": 0.024461865215562283, "mean_pred_prob_last_10": 0.12863594926893712, "mean_pred_prob_last_25": 0.0675746857188642, "mean_pred_prob_last_50": 0.04161751898936927, "mean_token_accuracy": 0.8693007588386535, "step": 53990 }, { "epoch": 0.9599488027305211, "grad_norm": 1.5583459131606692, "learning_rate": 0.0001, "loss": 0.6331, "mean_abs_error": 469.6029089325786, "mean_abs_error_last_10": 68.62485145773198, "mean_abs_error_last_25": 119.9683097439352, "mean_abs_error_last_50": 233.888959468372, "mean_pred_prob": 0.03375558448024094, "mean_pred_prob_last_10": 0.16588820051401854, "mean_pred_prob_last_25": 0.09024271918460727, "mean_pred_prob_last_50": 0.05598373264074326, "mean_token_accuracy": 0.8800158679485321, "step": 54000 }, { "epoch": 0.960126571027323, "grad_norm": 2.396439628900493, "learning_rate": 0.0001, "loss": 0.6215, "mean_abs_error": 927.8682749960844, "mean_abs_error_last_10": 622.8508373742832, "mean_abs_error_last_25": 689.802357084106, "mean_abs_error_last_50": 772.0473621861258, "mean_pred_prob": 0.0436039229069138, "mean_pred_prob_last_10": 0.2143145578185795, "mean_pred_prob_last_25": 0.12257341766380705, "mean_pred_prob_last_50": 0.07418904909864069, "mean_token_accuracy": 0.8723320722579956, "step": 54010 }, { "epoch": 0.960304339324125, "grad_norm": 1.208605365126251, "learning_rate": 0.0001, "loss": 0.6709, "mean_abs_error": 808.7765988352497, "mean_abs_error_last_10": 307.88025720137, "mean_abs_error_last_25": 383.90465157734116, "mean_abs_error_last_50": 549.7352134127925, "mean_pred_prob": 0.04629518413566984, "mean_pred_prob_last_10": 0.2058596407121513, "mean_pred_prob_last_25": 0.12087564283283428, "mean_pred_prob_last_50": 0.07664717476291116, "mean_token_accuracy": 0.8644052743911743, "step": 54020 }, { "epoch": 0.9604821076209269, "grad_norm": 1.246476623186556, "learning_rate": 0.0001, "loss": 0.6037, "mean_abs_error": 374.27099549469733, "mean_abs_error_last_10": 162.7703546069423, "mean_abs_error_last_25": 205.80533876393687, "mean_abs_error_last_50": 295.3531701406085, "mean_pred_prob": 0.03778156682383269, "mean_pred_prob_last_10": 0.18219058755785228, "mean_pred_prob_last_25": 0.10098132900893689, "mean_pred_prob_last_50": 0.06231512534432113, "mean_token_accuracy": 0.8779274582862854, "step": 54030 }, { "epoch": 0.9606598759177288, "grad_norm": 1.6649890408927288, "learning_rate": 0.0001, "loss": 0.5972, "mean_abs_error": 929.2840698461354, "mean_abs_error_last_10": 561.9460336157896, "mean_abs_error_last_25": 657.4233060901727, "mean_abs_error_last_50": 735.8143043213915, "mean_pred_prob": 0.04817700834828429, "mean_pred_prob_last_10": 0.24322531795478425, "mean_pred_prob_last_25": 0.13491072453034575, "mean_pred_prob_last_50": 0.08142176615947391, "mean_token_accuracy": 0.8805481672286988, "step": 54040 }, { "epoch": 0.9608376442145308, "grad_norm": 1.076756022411705, "learning_rate": 0.0001, "loss": 0.648, "mean_abs_error": 1013.1829177991569, "mean_abs_error_last_10": 527.6571429060228, "mean_abs_error_last_25": 595.6546239950002, "mean_abs_error_last_50": 739.1168657260414, "mean_pred_prob": 0.04194444249733351, "mean_pred_prob_last_10": 0.18289842618396507, "mean_pred_prob_last_25": 0.10884074259956833, "mean_pred_prob_last_50": 0.0691190888552228, "mean_token_accuracy": 0.8724493086338043, "step": 54050 }, { "epoch": 0.9610154125113327, "grad_norm": 1.5836208684296296, "learning_rate": 0.0001, "loss": 0.7009, "mean_abs_error": 136.82220140957912, "mean_abs_error_last_10": 35.26987424328207, "mean_abs_error_last_25": 64.75623190592351, "mean_abs_error_last_50": 104.29611046876975, "mean_pred_prob": 0.06576427044346929, "mean_pred_prob_last_10": 0.3156899832189083, "mean_pred_prob_last_25": 0.17531389687210322, "mean_pred_prob_last_50": 0.10745801171287894, "mean_token_accuracy": 0.8805680871009827, "step": 54060 }, { "epoch": 0.9611931808081347, "grad_norm": 1.4658172640486877, "learning_rate": 0.0001, "loss": 0.6182, "mean_abs_error": 833.9561410619615, "mean_abs_error_last_10": 321.4170569261218, "mean_abs_error_last_25": 436.6779646199017, "mean_abs_error_last_50": 574.565498205485, "mean_pred_prob": 0.016253698454238474, "mean_pred_prob_last_10": 0.08377101179212332, "mean_pred_prob_last_25": 0.045910977059975266, "mean_pred_prob_last_50": 0.027029172470793127, "mean_token_accuracy": 0.8747038602828979, "step": 54070 }, { "epoch": 0.9613709491049366, "grad_norm": 1.8158188093690948, "learning_rate": 0.0001, "loss": 0.8867, "mean_abs_error": 310.27422351464236, "mean_abs_error_last_10": 89.3638826107559, "mean_abs_error_last_25": 189.05696021325, "mean_abs_error_last_50": 217.79528380517814, "mean_pred_prob": 0.022863823082298042, "mean_pred_prob_last_10": 0.13048500679433345, "mean_pred_prob_last_25": 0.06661085207015276, "mean_pred_prob_last_50": 0.039546503033488986, "mean_token_accuracy": 0.8684915542602539, "step": 54080 }, { "epoch": 0.9615487174017385, "grad_norm": 1.1501461915781561, "learning_rate": 0.0001, "loss": 0.7735, "mean_abs_error": 1296.4140655867373, "mean_abs_error_last_10": 673.3840628745239, "mean_abs_error_last_25": 769.9248036761852, "mean_abs_error_last_50": 951.729637900053, "mean_pred_prob": 0.027948256870149634, "mean_pred_prob_last_10": 0.13946022408199496, "mean_pred_prob_last_25": 0.07639658069820143, "mean_pred_prob_last_50": 0.04672033866227139, "mean_token_accuracy": 0.8703623831272125, "step": 54090 }, { "epoch": 0.9617264856985405, "grad_norm": 1.274189068841685, "learning_rate": 0.0001, "loss": 0.7366, "mean_abs_error": 849.1304250151464, "mean_abs_error_last_10": 319.74310695711654, "mean_abs_error_last_25": 408.9777942561002, "mean_abs_error_last_50": 590.5376311643469, "mean_pred_prob": 0.03839252692414448, "mean_pred_prob_last_10": 0.161939002526924, "mean_pred_prob_last_25": 0.10025663439300843, "mean_pred_prob_last_50": 0.06352716206747573, "mean_token_accuracy": 0.8700256526470185, "step": 54100 }, { "epoch": 0.9619042539953425, "grad_norm": 1.6649698567965654, "learning_rate": 0.0001, "loss": 0.62, "mean_abs_error": 291.74593311001973, "mean_abs_error_last_10": 158.2582531091187, "mean_abs_error_last_25": 149.26815136626323, "mean_abs_error_last_50": 187.30071796145734, "mean_pred_prob": 0.037687028339132664, "mean_pred_prob_last_10": 0.19110842943191528, "mean_pred_prob_last_25": 0.10471493052318692, "mean_pred_prob_last_50": 0.06337018911726773, "mean_token_accuracy": 0.8811442017555237, "step": 54110 }, { "epoch": 0.9620820222921445, "grad_norm": 2.6838981455690862, "learning_rate": 0.0001, "loss": 0.6634, "mean_abs_error": 157.29709432344063, "mean_abs_error_last_10": 26.78286057825496, "mean_abs_error_last_25": 56.948113363847995, "mean_abs_error_last_50": 100.78355183091722, "mean_pred_prob": 0.052901994343847035, "mean_pred_prob_last_10": 0.28496633265167476, "mean_pred_prob_last_25": 0.14804906584322453, "mean_pred_prob_last_50": 0.08802811680361629, "mean_token_accuracy": 0.8675833463668823, "step": 54120 }, { "epoch": 0.9622597905889464, "grad_norm": 1.729413899528336, "learning_rate": 0.0001, "loss": 0.5806, "mean_abs_error": 510.3061920088224, "mean_abs_error_last_10": 150.4625024236928, "mean_abs_error_last_25": 173.8374270664619, "mean_abs_error_last_50": 281.72908622495515, "mean_pred_prob": 0.0473719121189788, "mean_pred_prob_last_10": 0.21037161884596572, "mean_pred_prob_last_25": 0.11751653088722377, "mean_pred_prob_last_50": 0.07540251773316413, "mean_token_accuracy": 0.8767327010631562, "step": 54130 }, { "epoch": 0.9624375588857483, "grad_norm": 1.7335923509278903, "learning_rate": 0.0001, "loss": 0.7716, "mean_abs_error": 301.9795372934155, "mean_abs_error_last_10": 120.01157491117795, "mean_abs_error_last_25": 157.91047984029677, "mean_abs_error_last_50": 192.78760834687088, "mean_pred_prob": 0.04980157387908548, "mean_pred_prob_last_10": 0.25321209076792, "mean_pred_prob_last_25": 0.14205690445378422, "mean_pred_prob_last_50": 0.08452341435477137, "mean_token_accuracy": 0.8716456711292266, "step": 54140 }, { "epoch": 0.9626153271825503, "grad_norm": 0.9691999677827828, "learning_rate": 0.0001, "loss": 0.6674, "mean_abs_error": 487.4188861896626, "mean_abs_error_last_10": NaN, "mean_abs_error_last_25": NaN, "mean_abs_error_last_50": 277.4453882607944, "mean_pred_prob": 0.08033505738712847, "mean_pred_prob_last_10": 0.1853226138278842, "mean_pred_prob_last_25": 0.12595905354246498, "mean_pred_prob_last_50": 0.1021905758883804, "mean_token_accuracy": 0.8810749053955078, "step": 54150 }, { "epoch": 0.9627930954793522, "grad_norm": 1.4279357623545759, "learning_rate": 0.0001, "loss": 0.6898, "mean_abs_error": 834.1387394776351, "mean_abs_error_last_10": 191.08529496418652, "mean_abs_error_last_25": 367.47914309945577, "mean_abs_error_last_50": 601.7683381288211, "mean_pred_prob": 0.027554859523661435, "mean_pred_prob_last_10": 0.1147549232468009, "mean_pred_prob_last_25": 0.07046168241649867, "mean_pred_prob_last_50": 0.04495589272119105, "mean_token_accuracy": 0.8624621510505677, "step": 54160 }, { "epoch": 0.9629708637761542, "grad_norm": 2.086300709467218, "learning_rate": 0.0001, "loss": 0.7323, "mean_abs_error": 816.2233948530126, "mean_abs_error_last_10": 321.59352074212296, "mean_abs_error_last_25": 360.76847477643116, "mean_abs_error_last_50": 483.1392500533424, "mean_pred_prob": 0.046092945890268314, "mean_pred_prob_last_10": 0.21556209268746898, "mean_pred_prob_last_25": 0.12953747166902757, "mean_pred_prob_last_50": 0.07850462425558362, "mean_token_accuracy": 0.8732089221477508, "step": 54170 }, { "epoch": 0.9631486320729561, "grad_norm": 2.996453350941959, "learning_rate": 0.0001, "loss": 0.8134, "mean_abs_error": 907.7351874022058, "mean_abs_error_last_10": 391.7330211413322, "mean_abs_error_last_25": 469.87081090345254, "mean_abs_error_last_50": 626.8061007501419, "mean_pred_prob": 0.022844406214426273, "mean_pred_prob_last_10": 0.10432692872127518, "mean_pred_prob_last_25": 0.057917632587486875, "mean_pred_prob_last_50": 0.03718070542090572, "mean_token_accuracy": 0.8622132956981658, "step": 54180 }, { "epoch": 0.963326400369758, "grad_norm": 1.305492932243655, "learning_rate": 0.0001, "loss": 0.6805, "mean_abs_error": 392.1119198391194, "mean_abs_error_last_10": 192.12653752565856, "mean_abs_error_last_25": 324.35090001215093, "mean_abs_error_last_50": 347.4798945066393, "mean_pred_prob": 0.04504426552448422, "mean_pred_prob_last_10": 0.2269307252019644, "mean_pred_prob_last_25": 0.12235499606467784, "mean_pred_prob_last_50": 0.07482237161602825, "mean_token_accuracy": 0.8860482335090637, "step": 54190 }, { "epoch": 0.96350416866656, "grad_norm": 0.9599680600276006, "learning_rate": 0.0001, "loss": 0.7497, "mean_abs_error": 366.55884327417476, "mean_abs_error_last_10": 147.56492223428762, "mean_abs_error_last_25": 171.23525770427688, "mean_abs_error_last_50": 239.84595051970146, "mean_pred_prob": 0.04362546154297888, "mean_pred_prob_last_10": 0.20835221670567988, "mean_pred_prob_last_25": 0.11912450073286891, "mean_pred_prob_last_50": 0.0741712694056332, "mean_token_accuracy": 0.8670591235160827, "step": 54200 }, { "epoch": 0.9636819369633619, "grad_norm": 3.263259199236934, "learning_rate": 0.0001, "loss": 0.6726, "mean_abs_error": 697.5500474219826, "mean_abs_error_last_10": 410.49232988988814, "mean_abs_error_last_25": 448.08631528341965, "mean_abs_error_last_50": 530.7062791240803, "mean_pred_prob": 0.05954464648384601, "mean_pred_prob_last_10": 0.2515235342027154, "mean_pred_prob_last_25": 0.1471099721151404, "mean_pred_prob_last_50": 0.0955675206700107, "mean_token_accuracy": 0.8728314399719238, "step": 54210 }, { "epoch": 0.963859705260164, "grad_norm": 2.494167516012632, "learning_rate": 0.0001, "loss": 0.7126, "mean_abs_error": 262.04670040641247, "mean_abs_error_last_10": 78.5732476262306, "mean_abs_error_last_25": 143.23878042560287, "mean_abs_error_last_50": 198.4678838279152, "mean_pred_prob": 0.034038195852190256, "mean_pred_prob_last_10": 0.1736841879785061, "mean_pred_prob_last_25": 0.0934387831017375, "mean_pred_prob_last_50": 0.05732111847028136, "mean_token_accuracy": 0.8829051315784454, "step": 54220 }, { "epoch": 0.9640374735569659, "grad_norm": 1.8744111215656722, "learning_rate": 0.0001, "loss": 0.6574, "mean_abs_error": 969.2050950231662, "mean_abs_error_last_10": 411.34388518426096, "mean_abs_error_last_25": 531.124613824905, "mean_abs_error_last_50": 716.1268972815808, "mean_pred_prob": 0.03114637963590212, "mean_pred_prob_last_10": 0.15115634937537833, "mean_pred_prob_last_25": 0.0853934375103563, "mean_pred_prob_last_50": 0.05234177955135237, "mean_token_accuracy": 0.8643112063407898, "step": 54230 }, { "epoch": 0.9642152418537678, "grad_norm": 1.4366043020594244, "learning_rate": 0.0001, "loss": 0.6354, "mean_abs_error": 489.7326061756996, "mean_abs_error_last_10": 129.2259528060356, "mean_abs_error_last_25": 191.82579832010566, "mean_abs_error_last_50": 312.6296441494548, "mean_pred_prob": 0.04067258364520967, "mean_pred_prob_last_10": 0.19548040591180324, "mean_pred_prob_last_25": 0.108140361122787, "mean_pred_prob_last_50": 0.06690499018877745, "mean_token_accuracy": 0.883676964044571, "step": 54240 }, { "epoch": 0.9643930101505698, "grad_norm": 1.3709129624875183, "learning_rate": 0.0001, "loss": 0.8062, "mean_abs_error": 328.36118499572575, "mean_abs_error_last_10": 114.60654003501054, "mean_abs_error_last_25": 201.87830227883128, "mean_abs_error_last_50": 271.715125246617, "mean_pred_prob": 0.04909843229688704, "mean_pred_prob_last_10": 0.2184584192931652, "mean_pred_prob_last_25": 0.13033751836046575, "mean_pred_prob_last_50": 0.0803976295515895, "mean_token_accuracy": 0.8723255932331085, "step": 54250 }, { "epoch": 0.9645707784473717, "grad_norm": 1.1851561147823577, "learning_rate": 0.0001, "loss": 0.6584, "mean_abs_error": 508.5748089357486, "mean_abs_error_last_10": 141.8369489198238, "mean_abs_error_last_25": 185.99455044218158, "mean_abs_error_last_50": 297.2535346691269, "mean_pred_prob": 0.03074869913398288, "mean_pred_prob_last_10": 0.15773382956394927, "mean_pred_prob_last_25": 0.08529433505027555, "mean_pred_prob_last_50": 0.05230134773883037, "mean_token_accuracy": 0.8814148247241974, "step": 54260 }, { "epoch": 0.9647485467441737, "grad_norm": 1.5262760719421005, "learning_rate": 0.0001, "loss": 0.6767, "mean_abs_error": 609.926590382265, "mean_abs_error_last_10": 248.4377257569046, "mean_abs_error_last_25": 303.38520728762035, "mean_abs_error_last_50": 358.3995318181054, "mean_pred_prob": 0.030533187021501363, "mean_pred_prob_last_10": 0.16466702870093286, "mean_pred_prob_last_25": 0.08572268838761374, "mean_pred_prob_last_50": 0.051462642522528766, "mean_token_accuracy": 0.8703594923019409, "step": 54270 }, { "epoch": 0.9649263150409756, "grad_norm": 1.9163285798247, "learning_rate": 0.0001, "loss": 0.5746, "mean_abs_error": 406.66888114319465, "mean_abs_error_last_10": 149.3901399180557, "mean_abs_error_last_25": 154.04422357770707, "mean_abs_error_last_50": 200.74128556291876, "mean_pred_prob": 0.03826616231817752, "mean_pred_prob_last_10": 0.18405436063185335, "mean_pred_prob_last_25": 0.10186807075515389, "mean_pred_prob_last_50": 0.06350764769595116, "mean_token_accuracy": 0.8820888519287109, "step": 54280 }, { "epoch": 0.9651040833377775, "grad_norm": 2.2383332853700417, "learning_rate": 0.0001, "loss": 0.6492, "mean_abs_error": 316.49881498206344, "mean_abs_error_last_10": 63.73627523528486, "mean_abs_error_last_25": 114.27125020385957, "mean_abs_error_last_50": 170.69286411332376, "mean_pred_prob": 0.04211923172697425, "mean_pred_prob_last_10": 0.21408427972346544, "mean_pred_prob_last_25": 0.11686206422746181, "mean_pred_prob_last_50": 0.07150857467204333, "mean_token_accuracy": 0.8746354103088378, "step": 54290 }, { "epoch": 0.9652818516345795, "grad_norm": 0.8936986382535661, "learning_rate": 0.0001, "loss": 0.6558, "mean_abs_error": 884.9680248501497, "mean_abs_error_last_10": 486.15112600889296, "mean_abs_error_last_25": 525.17845526149, "mean_abs_error_last_50": 646.8318634867711, "mean_pred_prob": 0.031145106346230023, "mean_pred_prob_last_10": 0.15459777628129814, "mean_pred_prob_last_25": 0.08377347363275475, "mean_pred_prob_last_50": 0.051313603777089156, "mean_token_accuracy": 0.8672117292881012, "step": 54300 }, { "epoch": 0.9654596199313814, "grad_norm": 1.1298844977135214, "learning_rate": 0.0001, "loss": 0.7698, "mean_abs_error": 375.92350625271, "mean_abs_error_last_10": 69.62285263462599, "mean_abs_error_last_25": 83.01675646993321, "mean_abs_error_last_50": 161.68791028883015, "mean_pred_prob": 0.05108400003518909, "mean_pred_prob_last_10": 0.2779536973685026, "mean_pred_prob_last_25": 0.14943371005356312, "mean_pred_prob_last_50": 0.08821215955540537, "mean_token_accuracy": 0.8675760090351105, "step": 54310 }, { "epoch": 0.9656373882281833, "grad_norm": 1.2806381596956122, "learning_rate": 0.0001, "loss": 0.6915, "mean_abs_error": 401.4851929541957, "mean_abs_error_last_10": 106.15257728002969, "mean_abs_error_last_25": 141.75168927968392, "mean_abs_error_last_50": 223.1459121004679, "mean_pred_prob": 0.0255496964789927, "mean_pred_prob_last_10": 0.1319402851164341, "mean_pred_prob_last_25": 0.07184715773910284, "mean_pred_prob_last_50": 0.04322336884215474, "mean_token_accuracy": 0.8745501875877381, "step": 54320 }, { "epoch": 0.9658151565249853, "grad_norm": 4.108539731200789, "learning_rate": 0.0001, "loss": 0.7579, "mean_abs_error": 571.5608001542784, "mean_abs_error_last_10": 238.00590655832607, "mean_abs_error_last_25": 305.7427413972776, "mean_abs_error_last_50": 419.3015212458225, "mean_pred_prob": 0.03476844018441625, "mean_pred_prob_last_10": 0.18976095013786107, "mean_pred_prob_last_25": 0.100712458754424, "mean_pred_prob_last_50": 0.058761155657703054, "mean_token_accuracy": 0.8798788011074066, "step": 54330 }, { "epoch": 0.9659929248217873, "grad_norm": 1.150414365999198, "learning_rate": 0.0001, "loss": 0.6434, "mean_abs_error": 1125.1950835151868, "mean_abs_error_last_10": 670.1966616907362, "mean_abs_error_last_25": 766.7676384335243, "mean_abs_error_last_50": 861.3935996365584, "mean_pred_prob": 0.02532857390178833, "mean_pred_prob_last_10": 0.1258865121693816, "mean_pred_prob_last_25": 0.06782265243818983, "mean_pred_prob_last_50": 0.041616646357579154, "mean_token_accuracy": 0.8751702189445496, "step": 54340 }, { "epoch": 0.9661706931185893, "grad_norm": 1.5882840133942613, "learning_rate": 0.0001, "loss": 0.6665, "mean_abs_error": 451.1421236116099, "mean_abs_error_last_10": 167.62445849098623, "mean_abs_error_last_25": 217.0808061906837, "mean_abs_error_last_50": 304.1527346105931, "mean_pred_prob": 0.039199091249611226, "mean_pred_prob_last_10": 0.19877003671135754, "mean_pred_prob_last_25": 0.1113373325788416, "mean_pred_prob_last_50": 0.06726795408176259, "mean_token_accuracy": 0.8728693902492524, "step": 54350 }, { "epoch": 0.9663484614153912, "grad_norm": 1.4772096551641103, "learning_rate": 0.0001, "loss": 0.6208, "mean_abs_error": 943.0723665107671, "mean_abs_error_last_10": 625.3440738902032, "mean_abs_error_last_25": 658.1194841177717, "mean_abs_error_last_50": 766.0689082565611, "mean_pred_prob": 0.03861385365453316, "mean_pred_prob_last_10": 0.18657887732551898, "mean_pred_prob_last_25": 0.10778976363944821, "mean_pred_prob_last_50": 0.06412288806895958, "mean_token_accuracy": 0.8677865445613862, "step": 54360 }, { "epoch": 0.9665262297121932, "grad_norm": 1.4174211616960273, "learning_rate": 0.0001, "loss": 0.6924, "mean_abs_error": 191.6788861526084, "mean_abs_error_last_10": 69.0417354412147, "mean_abs_error_last_25": 110.07808826053402, "mean_abs_error_last_50": 140.13102186514024, "mean_pred_prob": 0.05046499734744429, "mean_pred_prob_last_10": 0.21254800744354724, "mean_pred_prob_last_25": 0.12664955481886864, "mean_pred_prob_last_50": 0.0809860716573894, "mean_token_accuracy": 0.8716950058937073, "step": 54370 }, { "epoch": 0.9667039980089951, "grad_norm": 2.001069481188279, "learning_rate": 0.0001, "loss": 0.7005, "mean_abs_error": 829.5763696272063, "mean_abs_error_last_10": 287.56064055957756, "mean_abs_error_last_25": 424.6083933343604, "mean_abs_error_last_50": 589.5355947792248, "mean_pred_prob": 0.020665759511757642, "mean_pred_prob_last_10": 0.09778482521651313, "mean_pred_prob_last_25": 0.0532858932740055, "mean_pred_prob_last_50": 0.033962286659516394, "mean_token_accuracy": 0.8619140267372132, "step": 54380 }, { "epoch": 0.966881766305797, "grad_norm": 0.9966901968894171, "learning_rate": 0.0001, "loss": 0.6995, "mean_abs_error": 465.0922993959724, "mean_abs_error_last_10": 102.87295133791231, "mean_abs_error_last_25": 178.4326797464752, "mean_abs_error_last_50": 257.77264453198825, "mean_pred_prob": 0.033516608865465966, "mean_pred_prob_last_10": 0.16346451623830943, "mean_pred_prob_last_25": 0.09120080317370594, "mean_pred_prob_last_50": 0.05477561412844807, "mean_token_accuracy": 0.8649577677249909, "step": 54390 }, { "epoch": 0.967059534602599, "grad_norm": 2.284755337552472, "learning_rate": 0.0001, "loss": 0.6965, "mean_abs_error": 355.83488811174846, "mean_abs_error_last_10": 93.45787252206372, "mean_abs_error_last_25": 105.3263304027012, "mean_abs_error_last_50": 197.18624452468092, "mean_pred_prob": 0.03603896056301892, "mean_pred_prob_last_10": 0.18252507466822862, "mean_pred_prob_last_25": 0.10234466204419732, "mean_pred_prob_last_50": 0.06119589069858193, "mean_token_accuracy": 0.8736139476299286, "step": 54400 }, { "epoch": 0.9672373028994009, "grad_norm": 1.2218401038647937, "learning_rate": 0.0001, "loss": 0.5326, "mean_abs_error": 510.8582148969216, "mean_abs_error_last_10": 230.6007367250881, "mean_abs_error_last_25": 285.21005241945744, "mean_abs_error_last_50": 347.1227908245248, "mean_pred_prob": 0.04108712215675041, "mean_pred_prob_last_10": 0.1962871256750077, "mean_pred_prob_last_25": 0.10954495703335851, "mean_pred_prob_last_50": 0.06815895753679797, "mean_token_accuracy": 0.874880188703537, "step": 54410 }, { "epoch": 0.9674150711962028, "grad_norm": 2.294037366265863, "learning_rate": 0.0001, "loss": 0.6892, "mean_abs_error": 510.17168190711925, "mean_abs_error_last_10": 232.2004523778647, "mean_abs_error_last_25": 193.61592839908548, "mean_abs_error_last_50": 272.8331126261114, "mean_pred_prob": 0.03753901175223291, "mean_pred_prob_last_10": 0.17247092984616758, "mean_pred_prob_last_25": 0.10059313382953405, "mean_pred_prob_last_50": 0.06263804137706756, "mean_token_accuracy": 0.8729098975658417, "step": 54420 }, { "epoch": 0.9675928394930048, "grad_norm": 1.2889613463592229, "learning_rate": 0.0001, "loss": 0.6851, "mean_abs_error": 826.7433408946836, "mean_abs_error_last_10": 416.946862027676, "mean_abs_error_last_25": 451.4022105768692, "mean_abs_error_last_50": 627.9250903354574, "mean_pred_prob": 0.030618596021668054, "mean_pred_prob_last_10": 0.16390342265367508, "mean_pred_prob_last_25": 0.08647906247642823, "mean_pred_prob_last_50": 0.05149826929555275, "mean_token_accuracy": 0.8748079478740692, "step": 54430 }, { "epoch": 0.9677706077898067, "grad_norm": 1.949894225328559, "learning_rate": 0.0001, "loss": 0.7133, "mean_abs_error": 900.8535423877565, "mean_abs_error_last_10": 377.5185286975355, "mean_abs_error_last_25": 447.61742661594906, "mean_abs_error_last_50": 633.7928873369608, "mean_pred_prob": 0.04392551867495058, "mean_pred_prob_last_10": 0.21501542963669634, "mean_pred_prob_last_25": 0.11599688270071055, "mean_pred_prob_last_50": 0.07227905794861726, "mean_token_accuracy": 0.8668071091175079, "step": 54440 }, { "epoch": 0.9679483760866087, "grad_norm": 1.0202910718624638, "learning_rate": 0.0001, "loss": 0.6701, "mean_abs_error": 323.4879267444534, "mean_abs_error_last_10": 95.42167304427925, "mean_abs_error_last_25": 106.26656411661888, "mean_abs_error_last_50": 179.44181638994624, "mean_pred_prob": 0.04005969977006316, "mean_pred_prob_last_10": 0.1914044816046953, "mean_pred_prob_last_25": 0.10618597157299518, "mean_pred_prob_last_50": 0.06624825401231646, "mean_token_accuracy": 0.8724706947803498, "step": 54450 }, { "epoch": 0.9681261443834107, "grad_norm": 0.7129499238801511, "learning_rate": 0.0001, "loss": 0.702, "mean_abs_error": 359.10538816262283, "mean_abs_error_last_10": 80.19083973861521, "mean_abs_error_last_25": 104.04118424402502, "mean_abs_error_last_50": 193.50339420285255, "mean_pred_prob": 0.044579457002691926, "mean_pred_prob_last_10": 0.22097512800246477, "mean_pred_prob_last_25": 0.12771412055008113, "mean_pred_prob_last_50": 0.07669276434462517, "mean_token_accuracy": 0.8682020545005799, "step": 54460 }, { "epoch": 0.9683039126802127, "grad_norm": 2.959291832128537, "learning_rate": 0.0001, "loss": 0.7667, "mean_abs_error": 928.2814281228812, "mean_abs_error_last_10": 476.88799369258976, "mean_abs_error_last_25": 506.71126865328597, "mean_abs_error_last_50": 634.3537697153924, "mean_pred_prob": 0.034382216396625156, "mean_pred_prob_last_10": 0.16921881647431292, "mean_pred_prob_last_25": 0.09219475313730072, "mean_pred_prob_last_50": 0.05777186885825358, "mean_token_accuracy": 0.8661905646324157, "step": 54470 }, { "epoch": 0.9684816809770146, "grad_norm": 3.1212050094965273, "learning_rate": 0.0001, "loss": 0.6775, "mean_abs_error": 482.4062624801256, "mean_abs_error_last_10": 134.2013519350703, "mean_abs_error_last_25": 155.10178115701206, "mean_abs_error_last_50": 297.4230824475427, "mean_pred_prob": 0.045735430088825524, "mean_pred_prob_last_10": 0.2211216352880001, "mean_pred_prob_last_25": 0.1252474264241755, "mean_pred_prob_last_50": 0.0767418381292373, "mean_token_accuracy": 0.8811071276664734, "step": 54480 }, { "epoch": 0.9686594492738165, "grad_norm": 1.2016167118029886, "learning_rate": 0.0001, "loss": 0.7904, "mean_abs_error": 512.5051352518511, "mean_abs_error_last_10": 108.5643499494231, "mean_abs_error_last_25": 153.06223930612003, "mean_abs_error_last_50": 249.0616642496854, "mean_pred_prob": 0.03697152570821345, "mean_pred_prob_last_10": 0.17423115149140359, "mean_pred_prob_last_25": 0.10236929561942816, "mean_pred_prob_last_50": 0.06210983516648412, "mean_token_accuracy": 0.8643247425556183, "step": 54490 }, { "epoch": 0.9688372175706185, "grad_norm": 2.081237183818322, "learning_rate": 0.0001, "loss": 0.8294, "mean_abs_error": 671.4295172798527, "mean_abs_error_last_10": 163.80806045472605, "mean_abs_error_last_25": 283.19745430898377, "mean_abs_error_last_50": 378.7336842845652, "mean_pred_prob": 0.03292810680577531, "mean_pred_prob_last_10": 0.16906708221649752, "mean_pred_prob_last_25": 0.09115113933803513, "mean_pred_prob_last_50": 0.054899680137168616, "mean_token_accuracy": 0.8612428069114685, "step": 54500 }, { "epoch": 0.9690149858674204, "grad_norm": 1.1325252473612153, "learning_rate": 0.0001, "loss": 0.8155, "mean_abs_error": 578.3090941710796, "mean_abs_error_last_10": 189.82374445284094, "mean_abs_error_last_25": 237.7024863119339, "mean_abs_error_last_50": 298.8806001725064, "mean_pred_prob": 0.03764337485772558, "mean_pred_prob_last_10": 0.19511906356783584, "mean_pred_prob_last_25": 0.10469627459533512, "mean_pred_prob_last_50": 0.06274788138689473, "mean_token_accuracy": 0.8686266660690307, "step": 54510 }, { "epoch": 0.9691927541642223, "grad_norm": 1.2819166552630348, "learning_rate": 0.0001, "loss": 0.7618, "mean_abs_error": 370.90518591031406, "mean_abs_error_last_10": 84.23870203566928, "mean_abs_error_last_25": 107.11969711136405, "mean_abs_error_last_50": 226.85370265417873, "mean_pred_prob": 0.04839867018163204, "mean_pred_prob_last_10": 0.23426658604294062, "mean_pred_prob_last_25": 0.13158829752355813, "mean_pred_prob_last_50": 0.08193045249208808, "mean_token_accuracy": 0.8687040328979492, "step": 54520 }, { "epoch": 0.9693705224610243, "grad_norm": 1.3680614677076222, "learning_rate": 0.0001, "loss": 0.6958, "mean_abs_error": 773.8741634210761, "mean_abs_error_last_10": 295.2025337310444, "mean_abs_error_last_25": 403.9840759672643, "mean_abs_error_last_50": 463.446746172507, "mean_pred_prob": 0.0237130900262855, "mean_pred_prob_last_10": 0.1279691571369767, "mean_pred_prob_last_25": 0.06682576718740166, "mean_pred_prob_last_50": 0.04021480090450495, "mean_token_accuracy": 0.8661152124404907, "step": 54530 }, { "epoch": 0.9695482907578262, "grad_norm": 0.9570781878261557, "learning_rate": 0.0001, "loss": 0.7712, "mean_abs_error": 301.15910335130604, "mean_abs_error_last_10": 73.4938299480439, "mean_abs_error_last_25": 111.60778406278757, "mean_abs_error_last_50": 185.9796773985544, "mean_pred_prob": 0.03610336324200034, "mean_pred_prob_last_10": 0.19463268406689166, "mean_pred_prob_last_25": 0.10174045749008656, "mean_pred_prob_last_50": 0.06262342566624284, "mean_token_accuracy": 0.8712446331977844, "step": 54540 }, { "epoch": 0.9697260590546282, "grad_norm": 2.384949034937999, "learning_rate": 0.0001, "loss": 0.723, "mean_abs_error": 428.7824876580532, "mean_abs_error_last_10": 120.08467424962976, "mean_abs_error_last_25": 178.84175243964972, "mean_abs_error_last_50": 297.4850909774293, "mean_pred_prob": 0.02836533428635448, "mean_pred_prob_last_10": 0.15115723129129038, "mean_pred_prob_last_25": 0.07687885310733691, "mean_pred_prob_last_50": 0.04721147099044174, "mean_token_accuracy": 0.8700648784637451, "step": 54550 }, { "epoch": 0.9699038273514301, "grad_norm": 2.03052692971874, "learning_rate": 0.0001, "loss": 0.6301, "mean_abs_error": 73.42976287114055, "mean_abs_error_last_10": 17.696443206470196, "mean_abs_error_last_25": 30.561507760018003, "mean_abs_error_last_50": 47.15973531146882, "mean_pred_prob": 0.05993183068931103, "mean_pred_prob_last_10": 0.2845778353512287, "mean_pred_prob_last_25": 0.16201584711670874, "mean_pred_prob_last_50": 0.09903172608464957, "mean_token_accuracy": 0.880132257938385, "step": 54560 }, { "epoch": 0.970081595648232, "grad_norm": 2.307279469178391, "learning_rate": 0.0001, "loss": 0.7945, "mean_abs_error": 614.8852508408403, "mean_abs_error_last_10": 224.0588820923428, "mean_abs_error_last_25": 308.87014624157143, "mean_abs_error_last_50": 373.53445416424086, "mean_pred_prob": 0.03225745534873568, "mean_pred_prob_last_10": 0.16506385139655322, "mean_pred_prob_last_25": 0.08924485188908875, "mean_pred_prob_last_50": 0.05354667231440544, "mean_token_accuracy": 0.8630168557167053, "step": 54570 }, { "epoch": 0.9702593639450341, "grad_norm": 1.6255053787461287, "learning_rate": 0.0001, "loss": 0.5563, "mean_abs_error": 823.9104975744746, "mean_abs_error_last_10": 262.27192704782493, "mean_abs_error_last_25": 326.3930158269243, "mean_abs_error_last_50": 525.21819030208, "mean_pred_prob": 0.03127872648183257, "mean_pred_prob_last_10": 0.1558604803867638, "mean_pred_prob_last_25": 0.08284161015180871, "mean_pred_prob_last_50": 0.05157414485584013, "mean_token_accuracy": 0.877652645111084, "step": 54580 }, { "epoch": 0.970437132241836, "grad_norm": 1.3529862439812177, "learning_rate": 0.0001, "loss": 0.5961, "mean_abs_error": 582.3261772624995, "mean_abs_error_last_10": 268.4572832462592, "mean_abs_error_last_25": 275.7544076618784, "mean_abs_error_last_50": 378.9692274742491, "mean_pred_prob": 0.025331718428060413, "mean_pred_prob_last_10": 0.13147657527588308, "mean_pred_prob_last_25": 0.07056527939857915, "mean_pred_prob_last_50": 0.042722871352452785, "mean_token_accuracy": 0.8850701987743378, "step": 54590 }, { "epoch": 0.970614900538638, "grad_norm": 1.64878023777366, "learning_rate": 0.0001, "loss": 0.624, "mean_abs_error": 126.38839381184076, "mean_abs_error_last_10": 14.64946841821624, "mean_abs_error_last_25": 42.14537428407865, "mean_abs_error_last_50": 68.29823033686516, "mean_pred_prob": 0.07020971830934286, "mean_pred_prob_last_10": 0.3017460033297539, "mean_pred_prob_last_25": 0.17390788327902557, "mean_pred_prob_last_50": 0.1113979671150446, "mean_token_accuracy": 0.873027640581131, "step": 54600 }, { "epoch": 0.9707926688354399, "grad_norm": 1.123870505346169, "learning_rate": 0.0001, "loss": 0.6772, "mean_abs_error": 218.2217738408549, "mean_abs_error_last_10": 92.6352357314336, "mean_abs_error_last_25": 86.06360347571855, "mean_abs_error_last_50": 139.19744199329583, "mean_pred_prob": 0.04103325470350683, "mean_pred_prob_last_10": 0.20164379719644784, "mean_pred_prob_last_25": 0.11581925190985203, "mean_pred_prob_last_50": 0.06968209589831531, "mean_token_accuracy": 0.8698609173297882, "step": 54610 }, { "epoch": 0.9709704371322418, "grad_norm": 1.4622723509809787, "learning_rate": 0.0001, "loss": 0.7506, "mean_abs_error": 285.8367606631664, "mean_abs_error_last_10": 89.68594263499126, "mean_abs_error_last_25": 125.63137672845328, "mean_abs_error_last_50": 176.6605848119028, "mean_pred_prob": 0.04123603252228349, "mean_pred_prob_last_10": 0.20451283287256955, "mean_pred_prob_last_25": 0.1107387631200254, "mean_pred_prob_last_50": 0.06905522728338838, "mean_token_accuracy": 0.8623217582702637, "step": 54620 }, { "epoch": 0.9711482054290438, "grad_norm": 1.2464733867442677, "learning_rate": 0.0001, "loss": 0.6647, "mean_abs_error": 842.7402792686773, "mean_abs_error_last_10": 410.8401905744255, "mean_abs_error_last_25": 488.9552801801753, "mean_abs_error_last_50": 616.3438667577914, "mean_pred_prob": 0.04200767510337755, "mean_pred_prob_last_10": 0.189825716445921, "mean_pred_prob_last_25": 0.10752546858566348, "mean_pred_prob_last_50": 0.0674093301378889, "mean_token_accuracy": 0.8741302609443664, "step": 54630 }, { "epoch": 0.9713259737258457, "grad_norm": 3.178604384051333, "learning_rate": 0.0001, "loss": 0.6901, "mean_abs_error": 355.49210587686787, "mean_abs_error_last_10": 92.39161232343169, "mean_abs_error_last_25": 102.59402663029759, "mean_abs_error_last_50": 153.22982274963093, "mean_pred_prob": 0.059255730197764936, "mean_pred_prob_last_10": 0.257023892365396, "mean_pred_prob_last_25": 0.150279541593045, "mean_pred_prob_last_50": 0.09660429623909295, "mean_token_accuracy": 0.8715164661407471, "step": 54640 }, { "epoch": 0.9715037420226477, "grad_norm": 2.4489737540112753, "learning_rate": 0.0001, "loss": 0.7154, "mean_abs_error": 905.5562499054022, "mean_abs_error_last_10": 426.73335966785453, "mean_abs_error_last_25": 478.3414491224677, "mean_abs_error_last_50": 618.3169110489459, "mean_pred_prob": 0.027479839939041996, "mean_pred_prob_last_10": 0.13166880565695466, "mean_pred_prob_last_25": 0.0729326096305158, "mean_pred_prob_last_50": 0.04513655034825206, "mean_token_accuracy": 0.8690303862094879, "step": 54650 }, { "epoch": 0.9716815103194496, "grad_norm": 3.052053071747755, "learning_rate": 0.0001, "loss": 0.6343, "mean_abs_error": 132.23506425823228, "mean_abs_error_last_10": 19.7192565631429, "mean_abs_error_last_25": 53.57589115339706, "mean_abs_error_last_50": 84.06126218479443, "mean_pred_prob": 0.05233641993254423, "mean_pred_prob_last_10": 0.28512016385793687, "mean_pred_prob_last_25": 0.14545688666403295, "mean_pred_prob_last_50": 0.08782617021352053, "mean_token_accuracy": 0.8874667823314667, "step": 54660 }, { "epoch": 0.9718592786162515, "grad_norm": 1.0004765273599274, "learning_rate": 0.0001, "loss": 0.6427, "mean_abs_error": 1475.6185985299637, "mean_abs_error_last_10": 886.9102745813389, "mean_abs_error_last_25": 997.1028968322122, "mean_abs_error_last_50": 1102.6600115870729, "mean_pred_prob": 0.025507802322681527, "mean_pred_prob_last_10": 0.1256995903662755, "mean_pred_prob_last_25": 0.06679181651124963, "mean_pred_prob_last_50": 0.04235697567200987, "mean_token_accuracy": 0.8715957641601563, "step": 54670 }, { "epoch": 0.9720370469130535, "grad_norm": 1.7763125159855322, "learning_rate": 0.0001, "loss": 0.5959, "mean_abs_error": 214.24453576976128, "mean_abs_error_last_10": 21.229757605419703, "mean_abs_error_last_25": 56.029517181691446, "mean_abs_error_last_50": 94.55718439543897, "mean_pred_prob": 0.05592144727706909, "mean_pred_prob_last_10": 0.2801081668585539, "mean_pred_prob_last_25": 0.1524540306068957, "mean_pred_prob_last_50": 0.09413952415343374, "mean_token_accuracy": 0.8786443293094635, "step": 54680 }, { "epoch": 0.9722148152098554, "grad_norm": 1.872874419793438, "learning_rate": 0.0001, "loss": 0.6921, "mean_abs_error": 105.50464643738262, "mean_abs_error_last_10": 28.11887645033646, "mean_abs_error_last_25": 43.69362598191334, "mean_abs_error_last_50": 72.99493026504696, "mean_pred_prob": 0.058619917742908, "mean_pred_prob_last_10": 0.2525734592229128, "mean_pred_prob_last_25": 0.15214281883090736, "mean_pred_prob_last_50": 0.09792327899485827, "mean_token_accuracy": 0.8688015401363373, "step": 54690 }, { "epoch": 0.9723925835066575, "grad_norm": 1.8159809326500505, "learning_rate": 0.0001, "loss": 0.5983, "mean_abs_error": 454.93403870108784, "mean_abs_error_last_10": 129.22165486659156, "mean_abs_error_last_25": 183.14941583800683, "mean_abs_error_last_50": 270.62028648675636, "mean_pred_prob": 0.03898761817254126, "mean_pred_prob_last_10": 0.1972433367744088, "mean_pred_prob_last_25": 0.10959570929408073, "mean_pred_prob_last_50": 0.06688674897886812, "mean_token_accuracy": 0.8730570375919342, "step": 54700 }, { "epoch": 0.9725703518034594, "grad_norm": 1.402838507419228, "learning_rate": 0.0001, "loss": 0.7089, "mean_abs_error": 574.2354368394035, "mean_abs_error_last_10": 191.1550218807059, "mean_abs_error_last_25": 256.09866798930017, "mean_abs_error_last_50": 386.9387793333041, "mean_pred_prob": 0.027324992296053098, "mean_pred_prob_last_10": 0.13834248882485553, "mean_pred_prob_last_25": 0.07346583293983713, "mean_pred_prob_last_50": 0.04549390128231608, "mean_token_accuracy": 0.8730725288391114, "step": 54710 }, { "epoch": 0.9727481201002613, "grad_norm": 3.3668381375833754, "learning_rate": 0.0001, "loss": 0.6146, "mean_abs_error": 420.87392384827706, "mean_abs_error_last_10": 133.53270234220366, "mean_abs_error_last_25": 156.94009617675385, "mean_abs_error_last_50": 245.88886694200897, "mean_pred_prob": 0.051385826413752514, "mean_pred_prob_last_10": 0.22190073584206402, "mean_pred_prob_last_25": 0.13345147704239935, "mean_pred_prob_last_50": 0.08427935898071155, "mean_token_accuracy": 0.8667517244815827, "step": 54720 }, { "epoch": 0.9729258883970633, "grad_norm": 1.5932885813257935, "learning_rate": 0.0001, "loss": 0.8186, "mean_abs_error": 1514.0804380563404, "mean_abs_error_last_10": 752.4929507949362, "mean_abs_error_last_25": 806.0069645589554, "mean_abs_error_last_50": 1031.0366254641256, "mean_pred_prob": 0.0238523063424509, "mean_pred_prob_last_10": 0.11484688658383675, "mean_pred_prob_last_25": 0.06404889556579292, "mean_pred_prob_last_50": 0.040243782743345946, "mean_token_accuracy": 0.85759739279747, "step": 54730 }, { "epoch": 0.9731036566938652, "grad_norm": 1.8815736658892024, "learning_rate": 0.0001, "loss": 0.8162, "mean_abs_error": 794.340693709703, "mean_abs_error_last_10": 532.719990774083, "mean_abs_error_last_25": 560.3326633464637, "mean_abs_error_last_50": 672.0133060592085, "mean_pred_prob": 0.044422625261358914, "mean_pred_prob_last_10": 0.21248619153047912, "mean_pred_prob_last_25": 0.11836927696131169, "mean_pred_prob_last_50": 0.07322713049943559, "mean_token_accuracy": 0.8711847364902496, "step": 54740 }, { "epoch": 0.9732814249906672, "grad_norm": 2.453533594095384, "learning_rate": 0.0001, "loss": 0.7407, "mean_abs_error": 1179.0416644085242, "mean_abs_error_last_10": 706.6747860434758, "mean_abs_error_last_25": 684.963046040954, "mean_abs_error_last_50": 792.8406345941419, "mean_pred_prob": 0.04727658168121707, "mean_pred_prob_last_10": 0.18671576382184868, "mean_pred_prob_last_25": 0.11256036891718395, "mean_pred_prob_last_50": 0.0744160815665964, "mean_token_accuracy": 0.8706934154033661, "step": 54750 }, { "epoch": 0.9734591932874691, "grad_norm": 1.5984473616946222, "learning_rate": 0.0001, "loss": 0.5712, "mean_abs_error": 388.43127876935654, "mean_abs_error_last_10": 147.33377091727715, "mean_abs_error_last_25": 163.90211393031157, "mean_abs_error_last_50": 205.71039419914655, "mean_pred_prob": 0.03999087872216478, "mean_pred_prob_last_10": 0.20578050478361548, "mean_pred_prob_last_25": 0.10939916463103146, "mean_pred_prob_last_50": 0.06587821682915092, "mean_token_accuracy": 0.8778265297412873, "step": 54760 }, { "epoch": 0.973636961584271, "grad_norm": 1.300630264712187, "learning_rate": 0.0001, "loss": 0.7353, "mean_abs_error": 987.6564285265471, "mean_abs_error_last_10": 686.3468082429021, "mean_abs_error_last_25": 773.1619802575578, "mean_abs_error_last_50": 903.5560312748064, "mean_pred_prob": 0.040185456893232184, "mean_pred_prob_last_10": 0.19744180038833292, "mean_pred_prob_last_25": 0.11204741674882826, "mean_pred_prob_last_50": 0.06754029827570776, "mean_token_accuracy": 0.878651601076126, "step": 54770 }, { "epoch": 0.973814729881073, "grad_norm": 2.7871851072869887, "learning_rate": 0.0001, "loss": 0.6578, "mean_abs_error": 906.0047142306188, "mean_abs_error_last_10": 354.959625154615, "mean_abs_error_last_25": 474.57219646264105, "mean_abs_error_last_50": 545.1530110893113, "mean_pred_prob": 0.03142249812954105, "mean_pred_prob_last_10": 0.15494003043277188, "mean_pred_prob_last_25": 0.09004231448052451, "mean_pred_prob_last_50": 0.05458470629528165, "mean_token_accuracy": 0.8686747610569, "step": 54780 }, { "epoch": 0.9739924981778749, "grad_norm": 2.26164868449028, "learning_rate": 0.0001, "loss": 0.6021, "mean_abs_error": 1032.3167878650584, "mean_abs_error_last_10": 364.82943707708125, "mean_abs_error_last_25": 422.2661063681856, "mean_abs_error_last_50": 612.1147848911194, "mean_pred_prob": 0.04250149971048813, "mean_pred_prob_last_10": 0.19607710746349766, "mean_pred_prob_last_25": 0.11514081992791034, "mean_pred_prob_last_50": 0.0701097145822132, "mean_token_accuracy": 0.8728300094604492, "step": 54790 }, { "epoch": 0.9741702664746769, "grad_norm": 1.0129531440644801, "learning_rate": 0.0001, "loss": 0.6247, "mean_abs_error": 498.1405370262364, "mean_abs_error_last_10": 166.87354949081845, "mean_abs_error_last_25": 228.2951302095774, "mean_abs_error_last_50": 305.85539051303004, "mean_pred_prob": 0.058355106975068335, "mean_pred_prob_last_10": 0.26797128543257714, "mean_pred_prob_last_25": 0.15893938288791104, "mean_pred_prob_last_50": 0.09954204757232218, "mean_token_accuracy": 0.8717031717300415, "step": 54800 }, { "epoch": 0.9743480347714788, "grad_norm": 1.8272963840324707, "learning_rate": 0.0001, "loss": 0.7324, "mean_abs_error": 1056.1783571049862, "mean_abs_error_last_10": 789.3759047933135, "mean_abs_error_last_25": 814.8415804338789, "mean_abs_error_last_50": 926.4512081841615, "mean_pred_prob": 0.019366942744818517, "mean_pred_prob_last_10": 0.09814599280070979, "mean_pred_prob_last_25": 0.050631504235207105, "mean_pred_prob_last_50": 0.03086753445386421, "mean_token_accuracy": 0.8632143855094909, "step": 54810 }, { "epoch": 0.9745258030682808, "grad_norm": 1.0541545039824245, "learning_rate": 0.0001, "loss": 0.5971, "mean_abs_error": 347.54537042482434, "mean_abs_error_last_10": 247.4088858148886, "mean_abs_error_last_25": 290.18156910596474, "mean_abs_error_last_50": 296.13839727315235, "mean_pred_prob": 0.04979072022251785, "mean_pred_prob_last_10": 0.20351529493927956, "mean_pred_prob_last_25": 0.12575442818924784, "mean_pred_prob_last_50": 0.08057931466028094, "mean_token_accuracy": 0.8770725548267364, "step": 54820 }, { "epoch": 0.9747035713650828, "grad_norm": 2.4326429082287433, "learning_rate": 0.0001, "loss": 0.7005, "mean_abs_error": 917.966248431416, "mean_abs_error_last_10": 496.8843911342459, "mean_abs_error_last_25": 621.9084240977155, "mean_abs_error_last_50": 720.9583915092026, "mean_pred_prob": 0.03929111383913551, "mean_pred_prob_last_10": 0.16892210088844878, "mean_pred_prob_last_25": 0.10127114968490787, "mean_pred_prob_last_50": 0.06408102210843936, "mean_token_accuracy": 0.8652283310890198, "step": 54830 }, { "epoch": 0.9748813396618847, "grad_norm": 1.9199918346918134, "learning_rate": 0.0001, "loss": 0.7991, "mean_abs_error": 1111.1928260834652, "mean_abs_error_last_10": 403.9731723111193, "mean_abs_error_last_25": 614.097726000525, "mean_abs_error_last_50": 832.9198883215207, "mean_pred_prob": 0.011920456716325134, "mean_pred_prob_last_10": 0.06968964945990593, "mean_pred_prob_last_25": 0.03396744719939306, "mean_pred_prob_last_50": 0.020025518484180792, "mean_token_accuracy": 0.8668503642082215, "step": 54840 }, { "epoch": 0.9750591079586867, "grad_norm": 1.428273028616013, "learning_rate": 0.0001, "loss": 0.58, "mean_abs_error": 587.8604608022199, "mean_abs_error_last_10": 247.34574924075363, "mean_abs_error_last_25": 230.84682864979578, "mean_abs_error_last_50": 361.0836745354915, "mean_pred_prob": 0.04399352449690923, "mean_pred_prob_last_10": 0.2140939073404297, "mean_pred_prob_last_25": 0.11731608543777838, "mean_pred_prob_last_50": 0.07058189067756757, "mean_token_accuracy": 0.8849092543125152, "step": 54850 }, { "epoch": 0.9752368762554886, "grad_norm": 1.2198796014946571, "learning_rate": 0.0001, "loss": 0.5663, "mean_abs_error": 534.2099032534969, "mean_abs_error_last_10": 134.07763858834122, "mean_abs_error_last_25": 209.83766365486798, "mean_abs_error_last_50": 342.96838382124463, "mean_pred_prob": 0.045045683393254876, "mean_pred_prob_last_10": 0.22545690956758335, "mean_pred_prob_last_25": 0.1252953812771011, "mean_pred_prob_last_50": 0.07686498951516114, "mean_token_accuracy": 0.8880869150161743, "step": 54860 }, { "epoch": 0.9754146445522905, "grad_norm": 1.2070125752378198, "learning_rate": 0.0001, "loss": 0.5819, "mean_abs_error": 713.1941715768986, "mean_abs_error_last_10": 227.87022589838315, "mean_abs_error_last_25": 309.49919270860437, "mean_abs_error_last_50": 437.88925066635954, "mean_pred_prob": 0.04100266271270812, "mean_pred_prob_last_10": 0.20119748861761763, "mean_pred_prob_last_25": 0.11252163622993976, "mean_pred_prob_last_50": 0.0693005607521627, "mean_token_accuracy": 0.8690194606781005, "step": 54870 }, { "epoch": 0.9755924128490925, "grad_norm": 1.8244892408261915, "learning_rate": 0.0001, "loss": 0.761, "mean_abs_error": 136.91353259874944, "mean_abs_error_last_10": 39.256730538811766, "mean_abs_error_last_25": 60.12923284825665, "mean_abs_error_last_50": 100.27353349184254, "mean_pred_prob": 0.06482681268826126, "mean_pred_prob_last_10": 0.29803238809108734, "mean_pred_prob_last_25": 0.17109342906624078, "mean_pred_prob_last_50": 0.10496458476409316, "mean_token_accuracy": 0.8760958790779114, "step": 54880 }, { "epoch": 0.9757701811458944, "grad_norm": 1.6539008580788785, "learning_rate": 0.0001, "loss": 0.58, "mean_abs_error": 153.76147229431848, "mean_abs_error_last_10": 49.29440603821712, "mean_abs_error_last_25": 61.64897053441693, "mean_abs_error_last_50": 106.36747191086697, "mean_pred_prob": 0.06454470111057162, "mean_pred_prob_last_10": 0.2765213791280985, "mean_pred_prob_last_25": 0.1620199851691723, "mean_pred_prob_last_50": 0.10351362312212586, "mean_token_accuracy": 0.8826433420181274, "step": 54890 }, { "epoch": 0.9759479494426964, "grad_norm": 2.3618099694909382, "learning_rate": 0.0001, "loss": 0.6368, "mean_abs_error": 227.78663235405983, "mean_abs_error_last_10": 35.333395434052704, "mean_abs_error_last_25": 59.64067862662265, "mean_abs_error_last_50": 131.97334597775358, "mean_pred_prob": 0.06654343148693442, "mean_pred_prob_last_10": 0.2889366015791893, "mean_pred_prob_last_25": 0.1634559690952301, "mean_pred_prob_last_50": 0.10588802034035324, "mean_token_accuracy": 0.8834408700466156, "step": 54900 }, { "epoch": 0.9761257177394983, "grad_norm": 1.5701325126328476, "learning_rate": 0.0001, "loss": 0.7139, "mean_abs_error": 417.0928857231996, "mean_abs_error_last_10": 219.81753233129638, "mean_abs_error_last_25": 218.95552254743262, "mean_abs_error_last_50": 258.86933130079035, "mean_pred_prob": 0.06489774804213085, "mean_pred_prob_last_10": 0.2929106328636408, "mean_pred_prob_last_25": 0.1754805857432075, "mean_pred_prob_last_50": 0.10591352735646069, "mean_token_accuracy": 0.8595995426177978, "step": 54910 }, { "epoch": 0.9763034860363002, "grad_norm": 1.4230527219968394, "learning_rate": 0.0001, "loss": 0.6829, "mean_abs_error": 1315.240554626535, "mean_abs_error_last_10": 763.2386308689274, "mean_abs_error_last_25": 823.9215230835828, "mean_abs_error_last_50": 977.9364173567153, "mean_pred_prob": 0.04464370237546973, "mean_pred_prob_last_10": 0.20875991325010546, "mean_pred_prob_last_25": 0.11175107431190553, "mean_pred_prob_last_50": 0.07168636436836096, "mean_token_accuracy": 0.8682389438152314, "step": 54920 }, { "epoch": 0.9764812543331023, "grad_norm": 1.8159382853067867, "learning_rate": 0.0001, "loss": 0.9317, "mean_abs_error": 204.5032267945216, "mean_abs_error_last_10": 44.25751006946953, "mean_abs_error_last_25": 88.61230178095411, "mean_abs_error_last_50": 149.96796563899207, "mean_pred_prob": 0.032898972183465956, "mean_pred_prob_last_10": 0.18019561544060708, "mean_pred_prob_last_25": 0.09283280186355114, "mean_pred_prob_last_50": 0.05474438574165106, "mean_token_accuracy": 0.8705341398715973, "step": 54930 }, { "epoch": 0.9766590226299042, "grad_norm": 1.229473742989002, "learning_rate": 0.0001, "loss": 0.6434, "mean_abs_error": 887.1629897423558, "mean_abs_error_last_10": 238.4226810710922, "mean_abs_error_last_25": 352.9230739953262, "mean_abs_error_last_50": 497.87982333621767, "mean_pred_prob": 0.018829712504521014, "mean_pred_prob_last_10": 0.1035637303488329, "mean_pred_prob_last_25": 0.05219270440284163, "mean_pred_prob_last_50": 0.030875981028657407, "mean_token_accuracy": 0.878024446964264, "step": 54940 }, { "epoch": 0.9768367909267062, "grad_norm": 1.4656930274920954, "learning_rate": 0.0001, "loss": 0.7152, "mean_abs_error": 324.6417622392256, "mean_abs_error_last_10": 99.09476836481961, "mean_abs_error_last_25": 145.36557118146274, "mean_abs_error_last_50": 197.79475929995345, "mean_pred_prob": 0.035457942122593525, "mean_pred_prob_last_10": 0.18779919017106295, "mean_pred_prob_last_25": 0.10120626939460635, "mean_pred_prob_last_50": 0.05996294468641281, "mean_token_accuracy": 0.8764920890331268, "step": 54950 }, { "epoch": 0.9770145592235081, "grad_norm": 1.8049107268198221, "learning_rate": 0.0001, "loss": 0.6864, "mean_abs_error": 295.8883100482352, "mean_abs_error_last_10": 108.3774168588191, "mean_abs_error_last_25": 114.3497482980707, "mean_abs_error_last_50": 128.08882458143265, "mean_pred_prob": 0.04532983966637403, "mean_pred_prob_last_10": 0.2143271178007126, "mean_pred_prob_last_25": 0.12263507295865565, "mean_pred_prob_last_50": 0.07553479897323996, "mean_token_accuracy": 0.8786589801311493, "step": 54960 }, { "epoch": 0.97719232752031, "grad_norm": 1.0085790627033815, "learning_rate": 0.0001, "loss": 0.6744, "mean_abs_error": 595.2310353438717, "mean_abs_error_last_10": 173.04403331491068, "mean_abs_error_last_25": 272.6936501688184, "mean_abs_error_last_50": 361.9990336324502, "mean_pred_prob": 0.03572198656620458, "mean_pred_prob_last_10": 0.182133384142071, "mean_pred_prob_last_25": 0.09835713919019326, "mean_pred_prob_last_50": 0.05968433769303374, "mean_token_accuracy": 0.8682502746582031, "step": 54970 }, { "epoch": 0.977370095817112, "grad_norm": 1.3673579055229306, "learning_rate": 0.0001, "loss": 0.6098, "mean_abs_error": 361.9615309109996, "mean_abs_error_last_10": 101.38406777622141, "mean_abs_error_last_25": 131.47315519256236, "mean_abs_error_last_50": 175.4857389246207, "mean_pred_prob": 0.05984939909540117, "mean_pred_prob_last_10": 0.26688528815284374, "mean_pred_prob_last_25": 0.1590725603629835, "mean_pred_prob_last_50": 0.10161749375984072, "mean_token_accuracy": 0.8718946754932404, "step": 54980 }, { "epoch": 0.9775478641139139, "grad_norm": 0.9865053447539861, "learning_rate": 0.0001, "loss": 0.6161, "mean_abs_error": 40.98513571490331, "mean_abs_error_last_10": 4.6367367136488715, "mean_abs_error_last_25": 12.448447877097143, "mean_abs_error_last_50": 25.293783997959686, "mean_pred_prob": 0.07711398154497147, "mean_pred_prob_last_10": 0.35609982907772064, "mean_pred_prob_last_25": 0.20468688905239105, "mean_pred_prob_last_50": 0.12611324042081834, "mean_token_accuracy": 0.8725808918476105, "step": 54990 }, { "epoch": 0.9777256324107159, "grad_norm": 2.096757483440168, "learning_rate": 0.0001, "loss": 0.7208, "mean_abs_error": 402.4769912101127, "mean_abs_error_last_10": 50.607323979092534, "mean_abs_error_last_25": 97.44774333477105, "mean_abs_error_last_50": 180.77387115280436, "mean_pred_prob": 0.03270748150534928, "mean_pred_prob_last_10": 0.16559212915599347, "mean_pred_prob_last_25": 0.08761713579297066, "mean_pred_prob_last_50": 0.05400424664840102, "mean_token_accuracy": 0.8719270467758179, "step": 55000 }, { "epoch": 0.9779034007075178, "grad_norm": 1.6856626839650914, "learning_rate": 0.0001, "loss": 0.6955, "mean_abs_error": 328.23905744985956, "mean_abs_error_last_10": 133.2117003478417, "mean_abs_error_last_25": 155.88650712467364, "mean_abs_error_last_50": 212.88583674346197, "mean_pred_prob": 0.036384580796584486, "mean_pred_prob_last_10": 0.18091646693646907, "mean_pred_prob_last_25": 0.09952869396656752, "mean_pred_prob_last_50": 0.0608321875333786, "mean_token_accuracy": 0.8634537339210511, "step": 55010 }, { "epoch": 0.9780811690043197, "grad_norm": 0.9625441101903449, "learning_rate": 0.0001, "loss": 0.9438, "mean_abs_error": 630.5148476849514, "mean_abs_error_last_10": 199.90095430478294, "mean_abs_error_last_25": 299.9649253775015, "mean_abs_error_last_50": 411.64968060823367, "mean_pred_prob": 0.02351748289074749, "mean_pred_prob_last_10": 0.13000808362849056, "mean_pred_prob_last_25": 0.06966725105885416, "mean_pred_prob_last_50": 0.039861301449127494, "mean_token_accuracy": 0.8742401659488678, "step": 55020 }, { "epoch": 0.9782589373011217, "grad_norm": 1.9644378766883495, "learning_rate": 0.0001, "loss": 0.6578, "mean_abs_error": 421.77572256568544, "mean_abs_error_last_10": 206.14547867956088, "mean_abs_error_last_25": 267.1011879204782, "mean_abs_error_last_50": 325.8639893773885, "mean_pred_prob": 0.03342927551711909, "mean_pred_prob_last_10": 0.1784255885751918, "mean_pred_prob_last_25": 0.09694043205818162, "mean_pred_prob_last_50": 0.05695803323760629, "mean_token_accuracy": 0.8713433742523193, "step": 55030 }, { "epoch": 0.9784367055979236, "grad_norm": 2.3644815138211763, "learning_rate": 0.0001, "loss": 0.7527, "mean_abs_error": 159.81381456898234, "mean_abs_error_last_10": 53.79544971375424, "mean_abs_error_last_25": 82.17716527547181, "mean_abs_error_last_50": 122.7598124447178, "mean_pred_prob": 0.04856470944359899, "mean_pred_prob_last_10": 0.2193967316299677, "mean_pred_prob_last_25": 0.12545590912923216, "mean_pred_prob_last_50": 0.0790418646298349, "mean_token_accuracy": 0.8646250903606415, "step": 55040 }, { "epoch": 0.9786144738947257, "grad_norm": 1.8207599236690886, "learning_rate": 0.0001, "loss": 0.762, "mean_abs_error": 145.55972270583678, "mean_abs_error_last_10": 64.85502784019408, "mean_abs_error_last_25": 87.0532375490415, "mean_abs_error_last_50": 103.12678379931344, "mean_pred_prob": 0.04514416088350117, "mean_pred_prob_last_10": 0.20989830009639263, "mean_pred_prob_last_25": 0.12090705633163452, "mean_pred_prob_last_50": 0.07485336698591709, "mean_token_accuracy": 0.8623886048793793, "step": 55050 }, { "epoch": 0.9787922421915276, "grad_norm": 1.0708721317208263, "learning_rate": 0.0001, "loss": 0.7696, "mean_abs_error": 835.8472239249737, "mean_abs_error_last_10": 177.98932467908094, "mean_abs_error_last_25": 292.3087345751312, "mean_abs_error_last_50": 538.167361736684, "mean_pred_prob": 0.01461533546098508, "mean_pred_prob_last_10": 0.09371649883687497, "mean_pred_prob_last_25": 0.045288956386502835, "mean_pred_prob_last_50": 0.026095919008366764, "mean_token_accuracy": 0.8762013792991639, "step": 55060 }, { "epoch": 0.9789700104883295, "grad_norm": 1.963726117984887, "learning_rate": 0.0001, "loss": 0.6479, "mean_abs_error": 141.35370431816025, "mean_abs_error_last_10": 86.926371287693, "mean_abs_error_last_25": 109.79087758373585, "mean_abs_error_last_50": 113.98410974913972, "mean_pred_prob": 0.055345260491594675, "mean_pred_prob_last_10": 0.2729828327894211, "mean_pred_prob_last_25": 0.14798111766576766, "mean_pred_prob_last_50": 0.09135985411703587, "mean_token_accuracy": 0.8687571167945862, "step": 55070 }, { "epoch": 0.9791477787851315, "grad_norm": 2.0081680969852433, "learning_rate": 0.0001, "loss": 0.7206, "mean_abs_error": 188.2650645325037, "mean_abs_error_last_10": 40.28516615960335, "mean_abs_error_last_25": 72.7959787884707, "mean_abs_error_last_50": 123.5997595866254, "mean_pred_prob": 0.06116227498278022, "mean_pred_prob_last_10": 0.27936029210686686, "mean_pred_prob_last_25": 0.16273266151547433, "mean_pred_prob_last_50": 0.10107874209061266, "mean_token_accuracy": 0.8718311965465546, "step": 55080 }, { "epoch": 0.9793255470819334, "grad_norm": 1.6598876925162387, "learning_rate": 0.0001, "loss": 0.7709, "mean_abs_error": 489.22462724811305, "mean_abs_error_last_10": 98.65429554266571, "mean_abs_error_last_25": 110.52687016417553, "mean_abs_error_last_50": 227.83291292374594, "mean_pred_prob": 0.040652682655490935, "mean_pred_prob_last_10": 0.20199102694168686, "mean_pred_prob_last_25": 0.11672377828508615, "mean_pred_prob_last_50": 0.06834728801622987, "mean_token_accuracy": 0.8697416961193085, "step": 55090 }, { "epoch": 0.9795033153787354, "grad_norm": 2.066975228539755, "learning_rate": 0.0001, "loss": 0.7334, "mean_abs_error": 438.8410330115327, "mean_abs_error_last_10": 171.84170259216953, "mean_abs_error_last_25": 262.9095701881902, "mean_abs_error_last_50": 357.9372498354665, "mean_pred_prob": 0.02174917752854526, "mean_pred_prob_last_10": 0.10291302166879177, "mean_pred_prob_last_25": 0.057195376604795456, "mean_pred_prob_last_50": 0.034759722743183376, "mean_token_accuracy": 0.869869488477707, "step": 55100 }, { "epoch": 0.9796810836755373, "grad_norm": 1.3519085158399307, "learning_rate": 0.0001, "loss": 0.8, "mean_abs_error": 2480.301361186407, "mean_abs_error_last_10": 1509.1067780649555, "mean_abs_error_last_25": 1666.9480326575394, "mean_abs_error_last_50": 1916.99835465369, "mean_pred_prob": 0.016282245821639663, "mean_pred_prob_last_10": 0.07447696459130385, "mean_pred_prob_last_25": 0.04199690996756544, "mean_pred_prob_last_50": 0.026260493362497073, "mean_token_accuracy": 0.8716907501220703, "step": 55110 }, { "epoch": 0.9798588519723392, "grad_norm": 1.6662025145513006, "learning_rate": 0.0001, "loss": 0.7729, "mean_abs_error": 430.38128342713287, "mean_abs_error_last_10": 124.6572307974072, "mean_abs_error_last_25": 211.8839051981751, "mean_abs_error_last_50": 341.37000764427637, "mean_pred_prob": 0.045338389463722706, "mean_pred_prob_last_10": 0.20134758669883013, "mean_pred_prob_last_25": 0.11940733259543776, "mean_pred_prob_last_50": 0.07506179194897414, "mean_token_accuracy": 0.8669414401054383, "step": 55120 }, { "epoch": 0.9800366202691412, "grad_norm": 2.8578641402928135, "learning_rate": 0.0001, "loss": 0.7375, "mean_abs_error": 344.7836760821009, "mean_abs_error_last_10": 102.31262846469741, "mean_abs_error_last_25": 127.48982277135269, "mean_abs_error_last_50": 201.16843470026484, "mean_pred_prob": 0.03436205030884594, "mean_pred_prob_last_10": 0.16785918176174164, "mean_pred_prob_last_25": 0.09799785418435931, "mean_pred_prob_last_50": 0.058071808656677604, "mean_token_accuracy": 0.8764634013175965, "step": 55130 }, { "epoch": 0.9802143885659431, "grad_norm": 1.5860034232766935, "learning_rate": 0.0001, "loss": 0.7395, "mean_abs_error": 253.07998841045168, "mean_abs_error_last_10": 23.420784194412192, "mean_abs_error_last_25": 86.29032969371262, "mean_abs_error_last_50": 167.4744968887827, "mean_pred_prob": 0.04662805297411978, "mean_pred_prob_last_10": 0.2538915790617466, "mean_pred_prob_last_25": 0.13345744386315345, "mean_pred_prob_last_50": 0.07923395568504929, "mean_token_accuracy": 0.8687142074108124, "step": 55140 }, { "epoch": 0.9803921568627451, "grad_norm": 0.8578990823585521, "learning_rate": 0.0001, "loss": 0.6945, "mean_abs_error": 340.1397628284652, "mean_abs_error_last_10": 77.5680451037782, "mean_abs_error_last_25": 112.71913718785129, "mean_abs_error_last_50": 195.43336562891298, "mean_pred_prob": 0.03924860106781125, "mean_pred_prob_last_10": 0.20122335460036994, "mean_pred_prob_last_25": 0.10948310690000654, "mean_pred_prob_last_50": 0.06546787372790278, "mean_token_accuracy": 0.8739788293838501, "step": 55150 }, { "epoch": 0.980569925159547, "grad_norm": 1.4280993467584002, "learning_rate": 0.0001, "loss": 0.74, "mean_abs_error": 481.78807631089984, "mean_abs_error_last_10": 202.41013319676554, "mean_abs_error_last_25": 174.93483983080193, "mean_abs_error_last_50": 259.1635233800411, "mean_pred_prob": 0.03718940643593669, "mean_pred_prob_last_10": 0.1741916005499661, "mean_pred_prob_last_25": 0.09554322012700141, "mean_pred_prob_last_50": 0.059726286912336946, "mean_token_accuracy": 0.8747877717018128, "step": 55160 }, { "epoch": 0.980747693456349, "grad_norm": 0.9116519529691853, "learning_rate": 0.0001, "loss": 0.5622, "mean_abs_error": 265.5235934918707, "mean_abs_error_last_10": 60.547332194105046, "mean_abs_error_last_25": 95.52357168817912, "mean_abs_error_last_50": 139.14141063670937, "mean_pred_prob": 0.049372084671631455, "mean_pred_prob_last_10": 0.24303704425692557, "mean_pred_prob_last_25": 0.13728643730282783, "mean_pred_prob_last_50": 0.08305442426353693, "mean_token_accuracy": 0.8773529052734375, "step": 55170 }, { "epoch": 0.980925461753151, "grad_norm": 1.1187619541753866, "learning_rate": 0.0001, "loss": 0.7505, "mean_abs_error": 806.7864002621877, "mean_abs_error_last_10": 169.5213604079355, "mean_abs_error_last_25": 269.0127477131808, "mean_abs_error_last_50": 447.40287638065456, "mean_pred_prob": 0.029878910828847437, "mean_pred_prob_last_10": 0.13180482625029982, "mean_pred_prob_last_25": 0.07472356194630266, "mean_pred_prob_last_50": 0.04772768720285967, "mean_token_accuracy": 0.8665643274784088, "step": 55180 }, { "epoch": 0.9811032300499529, "grad_norm": 1.6919708545139687, "learning_rate": 0.0001, "loss": 0.7303, "mean_abs_error": 554.3000912317755, "mean_abs_error_last_10": 288.0296041976284, "mean_abs_error_last_25": 362.25383909980326, "mean_abs_error_last_50": 417.63242332305737, "mean_pred_prob": 0.08076541645859833, "mean_pred_prob_last_10": 0.3316578020399902, "mean_pred_prob_last_25": 0.2054895328328712, "mean_pred_prob_last_50": 0.13092755717225374, "mean_token_accuracy": 0.8705776274204254, "step": 55190 }, { "epoch": 0.9812809983467549, "grad_norm": 1.3604343122468765, "learning_rate": 0.0001, "loss": 0.633, "mean_abs_error": 204.8593538950416, "mean_abs_error_last_10": 45.820600883922715, "mean_abs_error_last_25": 68.42024708361308, "mean_abs_error_last_50": 121.43615916881978, "mean_pred_prob": 0.059528531460091474, "mean_pred_prob_last_10": 0.28012192733585833, "mean_pred_prob_last_25": 0.15627092104405166, "mean_pred_prob_last_50": 0.09834877029061317, "mean_token_accuracy": 0.8808742821216583, "step": 55200 }, { "epoch": 0.9814587666435568, "grad_norm": 1.5189436403178265, "learning_rate": 0.0001, "loss": 0.5967, "mean_abs_error": 57.518702333048324, "mean_abs_error_last_10": 24.284873441547035, "mean_abs_error_last_25": 25.446246413499473, "mean_abs_error_last_50": 41.06068188718176, "mean_pred_prob": 0.06357546243816614, "mean_pred_prob_last_10": 0.29262676984071734, "mean_pred_prob_last_25": 0.16946327202022077, "mean_pred_prob_last_50": 0.1038402272388339, "mean_token_accuracy": 0.8704573273658752, "step": 55210 }, { "epoch": 0.9816365349403587, "grad_norm": 1.4969876352550988, "learning_rate": 0.0001, "loss": 0.7184, "mean_abs_error": 770.5157727915545, "mean_abs_error_last_10": 110.00057398148098, "mean_abs_error_last_25": 217.7007690207792, "mean_abs_error_last_50": 440.05699826644303, "mean_pred_prob": 0.03577800377388485, "mean_pred_prob_last_10": 0.1774241697159596, "mean_pred_prob_last_25": 0.09943163429852575, "mean_pred_prob_last_50": 0.059416361240437254, "mean_token_accuracy": 0.8663921535015107, "step": 55220 }, { "epoch": 0.9818143032371607, "grad_norm": 1.2532195291691126, "learning_rate": 0.0001, "loss": 0.6784, "mean_abs_error": 723.7809359540612, "mean_abs_error_last_10": 436.1845385329768, "mean_abs_error_last_25": 512.3061170708138, "mean_abs_error_last_50": 564.212873760271, "mean_pred_prob": 0.03993183165293886, "mean_pred_prob_last_10": 0.1948077636800008, "mean_pred_prob_last_25": 0.10553816783649381, "mean_pred_prob_last_50": 0.06582321021705866, "mean_token_accuracy": 0.8746975600719452, "step": 55230 }, { "epoch": 0.9819920715339626, "grad_norm": 1.2885530258476088, "learning_rate": 0.0001, "loss": 0.6555, "mean_abs_error": 363.6147258033091, "mean_abs_error_last_10": 75.45529537750048, "mean_abs_error_last_25": 98.31038955918795, "mean_abs_error_last_50": 152.53993502843534, "mean_pred_prob": 0.05130032405722886, "mean_pred_prob_last_10": 0.2203288719058037, "mean_pred_prob_last_25": 0.13437578035518527, "mean_pred_prob_last_50": 0.08537016292102635, "mean_token_accuracy": 0.8769514203071594, "step": 55240 }, { "epoch": 0.9821698398307646, "grad_norm": 1.7117048103812535, "learning_rate": 0.0001, "loss": 1.1329, "mean_abs_error": 339.05538138245925, "mean_abs_error_last_10": 81.06402612991113, "mean_abs_error_last_25": 104.97121093461908, "mean_abs_error_last_50": 192.5334815354213, "mean_pred_prob": 0.03130182782188058, "mean_pred_prob_last_10": 0.17086071223020555, "mean_pred_prob_last_25": 0.08819173034280539, "mean_pred_prob_last_50": 0.05311912931501865, "mean_token_accuracy": 0.8763943612575531, "step": 55250 }, { "epoch": 0.9823476081275665, "grad_norm": 2.5904384893176586, "learning_rate": 0.0001, "loss": 0.6921, "mean_abs_error": 673.776009652292, "mean_abs_error_last_10": 112.97932940907315, "mean_abs_error_last_25": 191.52783224201536, "mean_abs_error_last_50": 363.1008275764784, "mean_pred_prob": 0.04059230212005786, "mean_pred_prob_last_10": 0.1859559619333595, "mean_pred_prob_last_25": 0.1062178359599784, "mean_pred_prob_last_50": 0.06669699586927891, "mean_token_accuracy": 0.8790282547473908, "step": 55260 }, { "epoch": 0.9825253764243684, "grad_norm": 1.0956893233660343, "learning_rate": 0.0001, "loss": 0.7463, "mean_abs_error": 461.60188897793006, "mean_abs_error_last_10": 301.89080683925533, "mean_abs_error_last_25": 377.5708821111985, "mean_abs_error_last_50": 389.5653277040916, "mean_pred_prob": 0.031777407578192654, "mean_pred_prob_last_10": 0.15214192625135184, "mean_pred_prob_last_25": 0.08286412516608835, "mean_pred_prob_last_50": 0.051969268219545485, "mean_token_accuracy": 0.8710491836071015, "step": 55270 }, { "epoch": 0.9827031447211704, "grad_norm": 1.1883116869857064, "learning_rate": 0.0001, "loss": 0.7321, "mean_abs_error": 355.496101518418, "mean_abs_error_last_10": 120.77501623957387, "mean_abs_error_last_25": 181.19092228530207, "mean_abs_error_last_50": 238.0030362047922, "mean_pred_prob": 0.02176028788089752, "mean_pred_prob_last_10": 0.12475795820355415, "mean_pred_prob_last_25": 0.061365464515984056, "mean_pred_prob_last_50": 0.036086806934326886, "mean_token_accuracy": 0.8671697795391082, "step": 55280 }, { "epoch": 0.9828809130179724, "grad_norm": 2.2515817310908224, "learning_rate": 0.0001, "loss": 0.6322, "mean_abs_error": 85.29054523218718, "mean_abs_error_last_10": 16.10115819845626, "mean_abs_error_last_25": 32.44718180073308, "mean_abs_error_last_50": 49.49038581172136, "mean_pred_prob": 0.0691941157914698, "mean_pred_prob_last_10": 0.3172345541417599, "mean_pred_prob_last_25": 0.18436567652970554, "mean_pred_prob_last_50": 0.11540698623284698, "mean_token_accuracy": 0.8757193326950073, "step": 55290 }, { "epoch": 0.9830586813147744, "grad_norm": 1.1550439394334917, "learning_rate": 0.0001, "loss": 0.6268, "mean_abs_error": 923.4236163556295, "mean_abs_error_last_10": 527.2051983711251, "mean_abs_error_last_25": 584.0210069571202, "mean_abs_error_last_50": 686.4809892568234, "mean_pred_prob": 0.03679157820442924, "mean_pred_prob_last_10": 0.17216693905647845, "mean_pred_prob_last_25": 0.10051233586273156, "mean_pred_prob_last_50": 0.06185209906834643, "mean_token_accuracy": 0.8735696613788605, "step": 55300 }, { "epoch": 0.9832364496115763, "grad_norm": 2.0426124309140605, "learning_rate": 0.0001, "loss": 0.734, "mean_abs_error": 858.2449193702105, "mean_abs_error_last_10": 288.87667333045295, "mean_abs_error_last_25": 395.08174620493435, "mean_abs_error_last_50": 585.5058022181968, "mean_pred_prob": 0.035742254671640696, "mean_pred_prob_last_10": 0.16992965784156694, "mean_pred_prob_last_25": 0.10037411983939819, "mean_pred_prob_last_50": 0.06122484825900756, "mean_token_accuracy": 0.8709902465343475, "step": 55310 }, { "epoch": 0.9834142179083782, "grad_norm": 2.2977114852379104, "learning_rate": 0.0001, "loss": 0.6159, "mean_abs_error": 992.0880730789373, "mean_abs_error_last_10": 633.2663139520736, "mean_abs_error_last_25": 692.2609318819455, "mean_abs_error_last_50": 781.6905237277604, "mean_pred_prob": 0.050637484283652154, "mean_pred_prob_last_10": 0.2516710716066882, "mean_pred_prob_last_25": 0.13721789172268473, "mean_pred_prob_last_50": 0.08404475289571564, "mean_token_accuracy": 0.8773254930973053, "step": 55320 }, { "epoch": 0.9835919862051802, "grad_norm": 1.8479239322639245, "learning_rate": 0.0001, "loss": 0.5602, "mean_abs_error": 518.8152446014266, "mean_abs_error_last_10": 352.27362097371923, "mean_abs_error_last_25": 397.78127219743817, "mean_abs_error_last_50": 404.37661825167237, "mean_pred_prob": 0.05085562241147272, "mean_pred_prob_last_10": 0.23381354375742375, "mean_pred_prob_last_25": 0.13598038136260585, "mean_pred_prob_last_50": 0.08256749506108463, "mean_token_accuracy": 0.8803342521190644, "step": 55330 }, { "epoch": 0.9837697545019821, "grad_norm": 0.9677818162926926, "learning_rate": 0.0001, "loss": 0.6434, "mean_abs_error": 589.9292621562579, "mean_abs_error_last_10": 115.76267764015843, "mean_abs_error_last_25": 181.46338784155574, "mean_abs_error_last_50": 376.4940775677884, "mean_pred_prob": 0.048749893766944295, "mean_pred_prob_last_10": 0.23493122787913306, "mean_pred_prob_last_25": 0.13360389935551212, "mean_pred_prob_last_50": 0.08168949363171123, "mean_token_accuracy": 0.8731561601161957, "step": 55340 }, { "epoch": 0.9839475227987841, "grad_norm": 1.293161793787521, "learning_rate": 0.0001, "loss": 0.6076, "mean_abs_error": 704.0108031182775, "mean_abs_error_last_10": 322.10310853495344, "mean_abs_error_last_25": 353.8115694353356, "mean_abs_error_last_50": 427.16643373234376, "mean_pred_prob": 0.03521464927471243, "mean_pred_prob_last_10": 0.17640921301790513, "mean_pred_prob_last_25": 0.0970148503605742, "mean_pred_prob_last_50": 0.059649301820900294, "mean_token_accuracy": 0.8673830687999725, "step": 55350 }, { "epoch": 0.984125291095586, "grad_norm": 1.5942056925372288, "learning_rate": 0.0001, "loss": 0.7444, "mean_abs_error": 428.12129666077044, "mean_abs_error_last_10": 241.85121231193347, "mean_abs_error_last_25": 235.63781008985688, "mean_abs_error_last_50": 281.3633500198045, "mean_pred_prob": 0.0464275119593367, "mean_pred_prob_last_10": 0.2324644439155236, "mean_pred_prob_last_25": 0.13111091583268716, "mean_pred_prob_last_50": 0.07902603019028902, "mean_token_accuracy": 0.8659987688064575, "step": 55360 }, { "epoch": 0.9843030593923879, "grad_norm": 2.138770366499151, "learning_rate": 0.0001, "loss": 0.707, "mean_abs_error": 406.92345217532386, "mean_abs_error_last_10": 137.03807851903179, "mean_abs_error_last_25": 163.52163205556442, "mean_abs_error_last_50": 306.2774059132201, "mean_pred_prob": 0.03804532792419195, "mean_pred_prob_last_10": 0.18871879111975431, "mean_pred_prob_last_25": 0.10643200669437647, "mean_pred_prob_last_50": 0.06496322751045228, "mean_token_accuracy": 0.869506710767746, "step": 55370 }, { "epoch": 0.9844808276891899, "grad_norm": 1.417780396423552, "learning_rate": 0.0001, "loss": 0.6107, "mean_abs_error": 326.0390802875272, "mean_abs_error_last_10": 95.75538276291894, "mean_abs_error_last_25": 140.04921320395073, "mean_abs_error_last_50": 202.33628437491532, "mean_pred_prob": 0.04242167808115482, "mean_pred_prob_last_10": 0.17081465888768435, "mean_pred_prob_last_25": 0.10628392184153199, "mean_pred_prob_last_50": 0.06767286234535277, "mean_token_accuracy": 0.8799494326114654, "step": 55380 }, { "epoch": 0.9846585959859918, "grad_norm": 1.7683853179193836, "learning_rate": 0.0001, "loss": 1.0612, "mean_abs_error": 193.0210800951236, "mean_abs_error_last_10": 99.49397227547249, "mean_abs_error_last_25": 138.21682397985902, "mean_abs_error_last_50": 146.91282294729507, "mean_pred_prob": 0.04156074947677553, "mean_pred_prob_last_10": 0.2059070346876979, "mean_pred_prob_last_25": 0.11495682084932923, "mean_pred_prob_last_50": 0.06946808481588959, "mean_token_accuracy": 0.8708068788051605, "step": 55390 }, { "epoch": 0.9848363642827938, "grad_norm": 1.3637784985358374, "learning_rate": 0.0001, "loss": 0.7202, "mean_abs_error": 1224.0427621731299, "mean_abs_error_last_10": 581.7362194568833, "mean_abs_error_last_25": 634.2212297189292, "mean_abs_error_last_50": 853.5271114931529, "mean_pred_prob": 0.023643522738711908, "mean_pred_prob_last_10": 0.11060557602904737, "mean_pred_prob_last_25": 0.06360684463288635, "mean_pred_prob_last_50": 0.03963092628982849, "mean_token_accuracy": 0.8670257687568664, "step": 55400 }, { "epoch": 0.9850141325795958, "grad_norm": 3.936816775717256, "learning_rate": 0.0001, "loss": 0.8118, "mean_abs_error": 534.6804202965219, "mean_abs_error_last_10": 94.99230470074221, "mean_abs_error_last_25": 198.1067566354928, "mean_abs_error_last_50": 317.4360741736631, "mean_pred_prob": 0.02785548600368202, "mean_pred_prob_last_10": 0.14726345986127853, "mean_pred_prob_last_25": 0.07975486535578966, "mean_pred_prob_last_50": 0.04781641652807593, "mean_token_accuracy": 0.8807014048099517, "step": 55410 }, { "epoch": 0.9851919008763977, "grad_norm": 1.3911634872357292, "learning_rate": 0.0001, "loss": 0.8734, "mean_abs_error": 637.8050142412183, "mean_abs_error_last_10": 102.81556344560254, "mean_abs_error_last_25": 220.6542784315588, "mean_abs_error_last_50": 373.34611138707254, "mean_pred_prob": 0.027209305413998665, "mean_pred_prob_last_10": 0.1446033362299204, "mean_pred_prob_last_25": 0.07823103722184896, "mean_pred_prob_last_50": 0.047234267462044954, "mean_token_accuracy": 0.881013298034668, "step": 55420 }, { "epoch": 0.9853696691731997, "grad_norm": 1.43102993630547, "learning_rate": 0.0001, "loss": 0.6382, "mean_abs_error": 715.565845625439, "mean_abs_error_last_10": 111.91077842813543, "mean_abs_error_last_25": 184.28094721978675, "mean_abs_error_last_50": 350.5206956360067, "mean_pred_prob": 0.04639323069131933, "mean_pred_prob_last_10": 0.22948169526644052, "mean_pred_prob_last_25": 0.12503823905717582, "mean_pred_prob_last_50": 0.07502788294805214, "mean_token_accuracy": 0.8623909890651703, "step": 55430 }, { "epoch": 0.9855474374700016, "grad_norm": 1.788367618688345, "learning_rate": 0.0001, "loss": 0.7394, "mean_abs_error": 520.2000503015113, "mean_abs_error_last_10": 221.55717839460254, "mean_abs_error_last_25": 220.08011377241564, "mean_abs_error_last_50": 310.98907697174224, "mean_pred_prob": 0.05298054409213364, "mean_pred_prob_last_10": 0.18329365167301148, "mean_pred_prob_last_25": 0.1247578376205638, "mean_pred_prob_last_50": 0.08319697544211521, "mean_token_accuracy": 0.8731280922889709, "step": 55440 }, { "epoch": 0.9857252057668036, "grad_norm": 1.9397742944464216, "learning_rate": 0.0001, "loss": 0.6504, "mean_abs_error": 739.7940545853672, "mean_abs_error_last_10": 391.1678693913369, "mean_abs_error_last_25": 443.74748441588616, "mean_abs_error_last_50": 551.8577417053502, "mean_pred_prob": 0.03672280798200518, "mean_pred_prob_last_10": 0.1844437736494001, "mean_pred_prob_last_25": 0.09960015841643326, "mean_pred_prob_last_50": 0.06091295840451494, "mean_token_accuracy": 0.870910507440567, "step": 55450 }, { "epoch": 0.9859029740636055, "grad_norm": 1.11982919191667, "learning_rate": 0.0001, "loss": 0.5828, "mean_abs_error": 431.3501651111618, "mean_abs_error_last_10": 84.67276589673611, "mean_abs_error_last_25": 125.1691399664322, "mean_abs_error_last_50": 232.6014420149001, "mean_pred_prob": 0.036683963146060705, "mean_pred_prob_last_10": 0.18305718712508678, "mean_pred_prob_last_25": 0.0991760746575892, "mean_pred_prob_last_50": 0.06051291977055371, "mean_token_accuracy": 0.8700975179672241, "step": 55460 }, { "epoch": 0.9860807423604074, "grad_norm": 1.5928335657098391, "learning_rate": 0.0001, "loss": 0.69, "mean_abs_error": 727.364114076729, "mean_abs_error_last_10": 288.3667941975433, "mean_abs_error_last_25": 348.8155551595628, "mean_abs_error_last_50": 496.5219596128445, "mean_pred_prob": 0.03880838418554049, "mean_pred_prob_last_10": 0.18590079209534452, "mean_pred_prob_last_25": 0.11016328094992786, "mean_pred_prob_last_50": 0.06489611584693193, "mean_token_accuracy": 0.8740898370742798, "step": 55470 }, { "epoch": 0.9862585106572094, "grad_norm": 1.9062307578956488, "learning_rate": 0.0001, "loss": 0.6209, "mean_abs_error": 698.9734560381701, "mean_abs_error_last_10": 273.373639714574, "mean_abs_error_last_25": 333.2717952038224, "mean_abs_error_last_50": 484.32919758455427, "mean_pred_prob": 0.05316374341782648, "mean_pred_prob_last_10": 0.23308248824905603, "mean_pred_prob_last_25": 0.1396272938756738, "mean_pred_prob_last_50": 0.08861882343771868, "mean_token_accuracy": 0.8775170862674713, "step": 55480 }, { "epoch": 0.9864362789540113, "grad_norm": 2.9574529419954025, "learning_rate": 0.0001, "loss": 0.7066, "mean_abs_error": 1598.9347626790866, "mean_abs_error_last_10": 826.4208263517669, "mean_abs_error_last_25": 1049.631620787354, "mean_abs_error_last_50": 1190.4004551242704, "mean_pred_prob": 0.03004093438066775, "mean_pred_prob_last_10": 0.14891480358783155, "mean_pred_prob_last_25": 0.08284440739371349, "mean_pred_prob_last_50": 0.05033574965200387, "mean_token_accuracy": 0.8657932579517365, "step": 55490 }, { "epoch": 0.9866140472508133, "grad_norm": 1.0962483801448564, "learning_rate": 0.0001, "loss": 0.6605, "mean_abs_error": 989.83536555878, "mean_abs_error_last_10": 401.8171780731973, "mean_abs_error_last_25": 530.7555648667877, "mean_abs_error_last_50": 671.6068385394154, "mean_pred_prob": 0.036899349917075594, "mean_pred_prob_last_10": 0.17506381475832314, "mean_pred_prob_last_25": 0.10014027142897249, "mean_pred_prob_last_50": 0.06156936196493916, "mean_token_accuracy": 0.868459039926529, "step": 55500 }, { "epoch": 0.9867918155476152, "grad_norm": 1.9919383993439694, "learning_rate": 0.0001, "loss": 0.6128, "mean_abs_error": 419.9927486964847, "mean_abs_error_last_10": 98.06114503055628, "mean_abs_error_last_25": 181.35418166924623, "mean_abs_error_last_50": 264.22160285829017, "mean_pred_prob": 0.036394555238075554, "mean_pred_prob_last_10": 0.18767663957551123, "mean_pred_prob_last_25": 0.0982915022643283, "mean_pred_prob_last_50": 0.059483429201645774, "mean_token_accuracy": 0.872265076637268, "step": 55510 }, { "epoch": 0.9869695838444172, "grad_norm": 2.2311954334474153, "learning_rate": 0.0001, "loss": 0.9654, "mean_abs_error": 391.03072712697065, "mean_abs_error_last_10": 92.64162578570446, "mean_abs_error_last_25": 123.53125924561535, "mean_abs_error_last_50": 213.47370376660496, "mean_pred_prob": 0.05729152210406028, "mean_pred_prob_last_10": 0.28632065705023707, "mean_pred_prob_last_25": 0.16467231032438576, "mean_pred_prob_last_50": 0.09817275664536282, "mean_token_accuracy": 0.8703178644180298, "step": 55520 }, { "epoch": 0.9871473521412192, "grad_norm": 2.3521371964384303, "learning_rate": 0.0001, "loss": 0.7376, "mean_abs_error": 650.5465489472018, "mean_abs_error_last_10": 91.54057131222623, "mean_abs_error_last_25": 251.7848504564102, "mean_abs_error_last_50": 403.9117890731203, "mean_pred_prob": 0.04850468525546603, "mean_pred_prob_last_10": 0.2220187244238332, "mean_pred_prob_last_25": 0.12809043279848992, "mean_pred_prob_last_50": 0.08078506760066376, "mean_token_accuracy": 0.8754313170909882, "step": 55530 }, { "epoch": 0.9873251204380211, "grad_norm": 1.633176709780271, "learning_rate": 0.0001, "loss": 0.6942, "mean_abs_error": 599.0408372467132, "mean_abs_error_last_10": 188.3682652375594, "mean_abs_error_last_25": 227.4739160031244, "mean_abs_error_last_50": 323.61496282464805, "mean_pred_prob": 0.0477081680088304, "mean_pred_prob_last_10": 0.23701027521165088, "mean_pred_prob_last_25": 0.13321556833107023, "mean_pred_prob_last_50": 0.08045412058709189, "mean_token_accuracy": 0.8689979135990142, "step": 55540 }, { "epoch": 0.9875028887348231, "grad_norm": 1.456191099820555, "learning_rate": 0.0001, "loss": 0.6347, "mean_abs_error": 770.1087892688188, "mean_abs_error_last_10": 275.92863831786036, "mean_abs_error_last_25": 318.8149875635871, "mean_abs_error_last_50": 415.82749094510746, "mean_pred_prob": 0.04438038439839147, "mean_pred_prob_last_10": 0.21433303821831942, "mean_pred_prob_last_25": 0.1173935582570266, "mean_pred_prob_last_50": 0.07244600679841824, "mean_token_accuracy": 0.8798250079154968, "step": 55550 }, { "epoch": 0.987680657031625, "grad_norm": 1.3140436474381998, "learning_rate": 0.0001, "loss": 0.8164, "mean_abs_error": 373.222560030041, "mean_abs_error_last_10": 63.913978318840805, "mean_abs_error_last_25": 106.21220737383831, "mean_abs_error_last_50": 185.2748317520771, "mean_pred_prob": 0.04842105736024678, "mean_pred_prob_last_10": 0.23775961715728045, "mean_pred_prob_last_25": 0.13316473001614212, "mean_pred_prob_last_50": 0.08099002526141703, "mean_token_accuracy": 0.8702433943748474, "step": 55560 }, { "epoch": 0.9878584253284269, "grad_norm": 2.2873459181484583, "learning_rate": 0.0001, "loss": 0.678, "mean_abs_error": 588.1958404000576, "mean_abs_error_last_10": 190.97892459321525, "mean_abs_error_last_25": 284.9374931197127, "mean_abs_error_last_50": 444.2312789764584, "mean_pred_prob": 0.03049046457745135, "mean_pred_prob_last_10": 0.13563285116106272, "mean_pred_prob_last_25": 0.08271136996336281, "mean_pred_prob_last_50": 0.05050942692905665, "mean_token_accuracy": 0.876572984457016, "step": 55570 }, { "epoch": 0.9880361936252289, "grad_norm": 1.052068346820478, "learning_rate": 0.0001, "loss": 0.6636, "mean_abs_error": 442.32541136134415, "mean_abs_error_last_10": 110.18688946021766, "mean_abs_error_last_25": 188.15800873735304, "mean_abs_error_last_50": 266.99445142471393, "mean_pred_prob": 0.04311767022591084, "mean_pred_prob_last_10": 0.2077388269128278, "mean_pred_prob_last_25": 0.1131708052707836, "mean_pred_prob_last_50": 0.07030710740946233, "mean_token_accuracy": 0.8698115766048431, "step": 55580 }, { "epoch": 0.9882139619220308, "grad_norm": 1.7815698466727057, "learning_rate": 0.0001, "loss": 0.7599, "mean_abs_error": 624.8325716308821, "mean_abs_error_last_10": 145.9620661496929, "mean_abs_error_last_25": 195.8168430907141, "mean_abs_error_last_50": 328.22813396631807, "mean_pred_prob": 0.04752634964534082, "mean_pred_prob_last_10": 0.21773761233780534, "mean_pred_prob_last_25": 0.12594168909126893, "mean_pred_prob_last_50": 0.0800097439030651, "mean_token_accuracy": 0.8720624268054962, "step": 55590 }, { "epoch": 0.9883917302188328, "grad_norm": 1.891090325947936, "learning_rate": 0.0001, "loss": 0.6543, "mean_abs_error": 788.6078653585417, "mean_abs_error_last_10": 271.13578731879164, "mean_abs_error_last_25": 363.0268520763491, "mean_abs_error_last_50": 490.02506411179877, "mean_pred_prob": 0.036050838648225184, "mean_pred_prob_last_10": 0.1850065845588688, "mean_pred_prob_last_25": 0.10057294003199786, "mean_pred_prob_last_50": 0.060501004138495774, "mean_token_accuracy": 0.8726225018501281, "step": 55600 }, { "epoch": 0.9885694985156347, "grad_norm": 2.0017741872598105, "learning_rate": 0.0001, "loss": 0.7405, "mean_abs_error": 1142.4105874396778, "mean_abs_error_last_10": 731.4546493270387, "mean_abs_error_last_25": 774.9705652109317, "mean_abs_error_last_50": 894.2643395666237, "mean_pred_prob": 0.047134169265336825, "mean_pred_prob_last_10": 0.21937601473910034, "mean_pred_prob_last_25": 0.12934459684183822, "mean_pred_prob_last_50": 0.08000108361156891, "mean_token_accuracy": 0.8797617673873901, "step": 55610 }, { "epoch": 0.9887472668124366, "grad_norm": 1.7891313930554167, "learning_rate": 0.0001, "loss": 0.6514, "mean_abs_error": 836.881595219769, "mean_abs_error_last_10": 442.735181672474, "mean_abs_error_last_25": 531.661838545728, "mean_abs_error_last_50": 589.0747269224117, "mean_pred_prob": 0.04314979411137756, "mean_pred_prob_last_10": 0.18110744710429572, "mean_pred_prob_last_25": 0.10875126310274937, "mean_pred_prob_last_50": 0.06941679838637356, "mean_token_accuracy": 0.8813241004943848, "step": 55620 }, { "epoch": 0.9889250351092386, "grad_norm": 2.296278055593944, "learning_rate": 0.0001, "loss": 0.6513, "mean_abs_error": 215.4948668022268, "mean_abs_error_last_10": 177.75576206834913, "mean_abs_error_last_25": 185.51925912557166, "mean_abs_error_last_50": 187.13530596128322, "mean_pred_prob": 0.04574442834127694, "mean_pred_prob_last_10": 0.23327715396881105, "mean_pred_prob_last_25": 0.12445035213604569, "mean_pred_prob_last_50": 0.07483451585285365, "mean_token_accuracy": 0.8684004843235016, "step": 55630 }, { "epoch": 0.9891028034060406, "grad_norm": 1.185950940626648, "learning_rate": 0.0001, "loss": 0.6536, "mean_abs_error": 219.62278427525376, "mean_abs_error_last_10": 88.8961649592632, "mean_abs_error_last_25": 124.55142963768063, "mean_abs_error_last_50": 153.16874827999143, "mean_pred_prob": 0.05519039775244892, "mean_pred_prob_last_10": 0.26368751749396324, "mean_pred_prob_last_25": 0.14763124100863934, "mean_pred_prob_last_50": 0.09098075535148382, "mean_token_accuracy": 0.8662730634212494, "step": 55640 }, { "epoch": 0.9892805717028426, "grad_norm": 3.270825334890544, "learning_rate": 0.0001, "loss": 0.7075, "mean_abs_error": 633.0467189014096, "mean_abs_error_last_10": 188.2410511236334, "mean_abs_error_last_25": 300.38484625919955, "mean_abs_error_last_50": 379.1606989188309, "mean_pred_prob": 0.028036135202273726, "mean_pred_prob_last_10": 0.12320213634520769, "mean_pred_prob_last_25": 0.07369797965511679, "mean_pred_prob_last_50": 0.04602322448045015, "mean_token_accuracy": 0.8730533361434937, "step": 55650 }, { "epoch": 0.9894583399996445, "grad_norm": 1.3153739940015017, "learning_rate": 0.0001, "loss": 0.7133, "mean_abs_error": 286.1688447033126, "mean_abs_error_last_10": 49.60670897176395, "mean_abs_error_last_25": 79.2773664023629, "mean_abs_error_last_50": 213.1330929114818, "mean_pred_prob": 0.03217923152260482, "mean_pred_prob_last_10": 0.15492329187691212, "mean_pred_prob_last_25": 0.0867011547088623, "mean_pred_prob_last_50": 0.05356452837586403, "mean_token_accuracy": 0.8641385853290557, "step": 55660 }, { "epoch": 0.9896361082964464, "grad_norm": 1.8456087900813274, "learning_rate": 0.0001, "loss": 0.7452, "mean_abs_error": 503.7297549452901, "mean_abs_error_last_10": 269.3605265296086, "mean_abs_error_last_25": 369.52324419176836, "mean_abs_error_last_50": 431.4760374165545, "mean_pred_prob": 0.021503261150792242, "mean_pred_prob_last_10": 0.11955251675099135, "mean_pred_prob_last_25": 0.06322384690865875, "mean_pred_prob_last_50": 0.03748271563090384, "mean_token_accuracy": 0.875749933719635, "step": 55670 }, { "epoch": 0.9898138765932484, "grad_norm": 2.189895615597641, "learning_rate": 0.0001, "loss": 0.8163, "mean_abs_error": 836.3640766475667, "mean_abs_error_last_10": 276.0189812897722, "mean_abs_error_last_25": 299.37446288275606, "mean_abs_error_last_50": 411.8100958328543, "mean_pred_prob": 0.039674992504296826, "mean_pred_prob_last_10": 0.18792051541386173, "mean_pred_prob_last_25": 0.10910930876852945, "mean_pred_prob_last_50": 0.0679710883880034, "mean_token_accuracy": 0.8671825230121613, "step": 55680 }, { "epoch": 0.9899916448900503, "grad_norm": 1.405054595234334, "learning_rate": 0.0001, "loss": 0.7133, "mean_abs_error": 254.08790850093138, "mean_abs_error_last_10": 147.84572754912932, "mean_abs_error_last_25": 131.90172210733922, "mean_abs_error_last_50": 149.48483459421269, "mean_pred_prob": 0.036305532418191434, "mean_pred_prob_last_10": 0.1418167296797037, "mean_pred_prob_last_25": 0.08659528782591223, "mean_pred_prob_last_50": 0.05807679020799696, "mean_token_accuracy": 0.8733326315879821, "step": 55690 }, { "epoch": 0.9901694131868523, "grad_norm": 0.8162307724270307, "learning_rate": 0.0001, "loss": 0.5518, "mean_abs_error": 792.5243923056557, "mean_abs_error_last_10": 444.75808049549124, "mean_abs_error_last_25": 482.5393629286024, "mean_abs_error_last_50": 551.1792453627556, "mean_pred_prob": 0.041334848778205924, "mean_pred_prob_last_10": 0.19308696987573057, "mean_pred_prob_last_25": 0.10628867558552883, "mean_pred_prob_last_50": 0.06783337404485792, "mean_token_accuracy": 0.8762192785739898, "step": 55700 }, { "epoch": 0.9903471814836542, "grad_norm": 2.405107012363628, "learning_rate": 0.0001, "loss": 0.6807, "mean_abs_error": 370.1789066914566, "mean_abs_error_last_10": 176.90392258947776, "mean_abs_error_last_25": 229.7553775768975, "mean_abs_error_last_50": 264.70366140378394, "mean_pred_prob": 0.05106450663879514, "mean_pred_prob_last_10": 0.23343982519581913, "mean_pred_prob_last_25": 0.13417683099396527, "mean_pred_prob_last_50": 0.0840740010375157, "mean_token_accuracy": 0.8659768044948578, "step": 55710 }, { "epoch": 0.9905249497804561, "grad_norm": 1.1653616163438372, "learning_rate": 0.0001, "loss": 0.7859, "mean_abs_error": 883.9969829864183, "mean_abs_error_last_10": 233.87660476074228, "mean_abs_error_last_25": 389.34049902808545, "mean_abs_error_last_50": 515.6258434133376, "mean_pred_prob": 0.03770704871276394, "mean_pred_prob_last_10": 0.17349710018606856, "mean_pred_prob_last_25": 0.1014248730672989, "mean_pred_prob_last_50": 0.06218515789951198, "mean_token_accuracy": 0.8777244627475739, "step": 55720 }, { "epoch": 0.9907027180772581, "grad_norm": 1.837968444214177, "learning_rate": 0.0001, "loss": 0.7411, "mean_abs_error": 229.58928531731652, "mean_abs_error_last_10": 28.268598296663935, "mean_abs_error_last_25": 44.086059365500695, "mean_abs_error_last_50": 98.48012842281501, "mean_pred_prob": 0.048337291460484266, "mean_pred_prob_last_10": 0.24809953086078168, "mean_pred_prob_last_25": 0.1324781646952033, "mean_pred_prob_last_50": 0.08123030476272106, "mean_token_accuracy": 0.8738956689834595, "step": 55730 }, { "epoch": 0.99088048637406, "grad_norm": 1.1029535580739447, "learning_rate": 0.0001, "loss": 0.7768, "mean_abs_error": 658.0201168132269, "mean_abs_error_last_10": 268.7719267264968, "mean_abs_error_last_25": 470.1786605870594, "mean_abs_error_last_50": 539.3539989120383, "mean_pred_prob": 0.021730174036929385, "mean_pred_prob_last_10": 0.10572455213405192, "mean_pred_prob_last_25": 0.055717118992470206, "mean_pred_prob_last_50": 0.03430600658757612, "mean_token_accuracy": 0.8745780169963837, "step": 55740 }, { "epoch": 0.991058254670862, "grad_norm": 1.4244098229157878, "learning_rate": 0.0001, "loss": 0.6865, "mean_abs_error": 308.26183506141194, "mean_abs_error_last_10": 135.23476599679822, "mean_abs_error_last_25": 127.9406750284156, "mean_abs_error_last_50": 170.52171171025643, "mean_pred_prob": 0.03260380779393017, "mean_pred_prob_last_10": 0.19287977572530507, "mean_pred_prob_last_25": 0.09961794801056385, "mean_pred_prob_last_50": 0.05780468676239252, "mean_token_accuracy": 0.8774498462677002, "step": 55750 }, { "epoch": 0.991236022967664, "grad_norm": 1.5736758060931981, "learning_rate": 0.0001, "loss": 0.6583, "mean_abs_error": 1005.0312121728732, "mean_abs_error_last_10": 433.1407783328191, "mean_abs_error_last_25": 531.9678690959124, "mean_abs_error_last_50": 748.5905367726896, "mean_pred_prob": 0.051700591098051515, "mean_pred_prob_last_10": 0.2432277313870145, "mean_pred_prob_last_25": 0.13846068064449354, "mean_pred_prob_last_50": 0.08673395514342701, "mean_token_accuracy": 0.8779081404209137, "step": 55760 }, { "epoch": 0.9914137912644659, "grad_norm": 1.2345605412408054, "learning_rate": 0.0001, "loss": 0.6365, "mean_abs_error": 272.1130156291765, "mean_abs_error_last_10": 131.43223502941896, "mean_abs_error_last_25": 147.0718701969695, "mean_abs_error_last_50": 182.44541166774016, "mean_pred_prob": 0.043334467755630615, "mean_pred_prob_last_10": 0.2275644838809967, "mean_pred_prob_last_25": 0.11749123074114323, "mean_pred_prob_last_50": 0.07169749829918146, "mean_token_accuracy": 0.8845381200313568, "step": 55770 }, { "epoch": 0.9915915595612679, "grad_norm": 0.7544921168985163, "learning_rate": 0.0001, "loss": 0.924, "mean_abs_error": 1023.2694173584975, "mean_abs_error_last_10": 498.52051058430635, "mean_abs_error_last_25": 661.11705477249, "mean_abs_error_last_50": 784.0943606964399, "mean_pred_prob": 0.02773815058171749, "mean_pred_prob_last_10": 0.15059398770099505, "mean_pred_prob_last_25": 0.07895133295387495, "mean_pred_prob_last_50": 0.04774726660689339, "mean_token_accuracy": 0.8774309277534484, "step": 55780 }, { "epoch": 0.9917693278580698, "grad_norm": 4.588218472217507, "learning_rate": 0.0001, "loss": 0.6107, "mean_abs_error": 94.13625785664873, "mean_abs_error_last_10": 15.607278884592342, "mean_abs_error_last_25": 34.27336094872256, "mean_abs_error_last_50": 50.90382141699685, "mean_pred_prob": 0.05860122153535485, "mean_pred_prob_last_10": 0.2921150289475918, "mean_pred_prob_last_25": 0.1580196239054203, "mean_pred_prob_last_50": 0.09833779074251651, "mean_token_accuracy": 0.8732366979122161, "step": 55790 }, { "epoch": 0.9919470961548718, "grad_norm": 1.2689554696497665, "learning_rate": 0.0001, "loss": 0.7093, "mean_abs_error": 116.35067624229025, "mean_abs_error_last_10": 33.60210799047199, "mean_abs_error_last_25": 80.28430034803117, "mean_abs_error_last_50": 104.15490969525517, "mean_pred_prob": 0.05023932922631502, "mean_pred_prob_last_10": 0.24844025298953057, "mean_pred_prob_last_25": 0.13850609660148622, "mean_pred_prob_last_50": 0.08447937685996294, "mean_token_accuracy": 0.8769434511661529, "step": 55800 }, { "epoch": 0.9921248644516737, "grad_norm": 1.7139441417880121, "learning_rate": 0.0001, "loss": 0.6918, "mean_abs_error": 532.5946253998524, "mean_abs_error_last_10": 194.764040669601, "mean_abs_error_last_25": 279.41669040525505, "mean_abs_error_last_50": 365.3180385474933, "mean_pred_prob": 0.02697002579807304, "mean_pred_prob_last_10": 0.13502721838885917, "mean_pred_prob_last_25": 0.07624371174024418, "mean_pred_prob_last_50": 0.045601434033596885, "mean_token_accuracy": 0.8745131850242615, "step": 55810 }, { "epoch": 0.9923026327484756, "grad_norm": 1.4670365603535287, "learning_rate": 0.0001, "loss": 0.6023, "mean_abs_error": 508.23002903025446, "mean_abs_error_last_10": 246.28054480287216, "mean_abs_error_last_25": 244.32311115746333, "mean_abs_error_last_50": 295.9946235329875, "mean_pred_prob": 0.04417090895585716, "mean_pred_prob_last_10": 0.20065009742975234, "mean_pred_prob_last_25": 0.11383071367163211, "mean_pred_prob_last_50": 0.07258541237097234, "mean_token_accuracy": 0.8739913463592529, "step": 55820 }, { "epoch": 0.9924804010452776, "grad_norm": 2.0860537808349346, "learning_rate": 0.0001, "loss": 0.6023, "mean_abs_error": 276.2423585847868, "mean_abs_error_last_10": 107.04850302800858, "mean_abs_error_last_25": 111.00232507466849, "mean_abs_error_last_50": 140.53923747209313, "mean_pred_prob": 0.02920181113295257, "mean_pred_prob_last_10": 0.1593833990395069, "mean_pred_prob_last_25": 0.0827180559746921, "mean_pred_prob_last_50": 0.04904027725569904, "mean_token_accuracy": 0.8798873722553253, "step": 55830 }, { "epoch": 0.9926581693420795, "grad_norm": 1.4488480878760561, "learning_rate": 0.0001, "loss": 0.7113, "mean_abs_error": 304.2476649511951, "mean_abs_error_last_10": 87.95378022994137, "mean_abs_error_last_25": 110.77991202364039, "mean_abs_error_last_50": 174.95789835571844, "mean_pred_prob": 0.043378267844673246, "mean_pred_prob_last_10": 0.21846693488769234, "mean_pred_prob_last_25": 0.1204710598103702, "mean_pred_prob_last_50": 0.07346046245656908, "mean_token_accuracy": 0.871652489900589, "step": 55840 }, { "epoch": 0.9928359376388814, "grad_norm": 1.9960196065007572, "learning_rate": 0.0001, "loss": 0.7176, "mean_abs_error": 337.0657781962297, "mean_abs_error_last_10": 139.926318924709, "mean_abs_error_last_25": 165.89537447863694, "mean_abs_error_last_50": 190.6725373190872, "mean_pred_prob": 0.041524217603728174, "mean_pred_prob_last_10": 0.20347310174256564, "mean_pred_prob_last_25": 0.11355618201196194, "mean_pred_prob_last_50": 0.07001650868915021, "mean_token_accuracy": 0.8769641578197479, "step": 55850 }, { "epoch": 0.9930137059356834, "grad_norm": 1.3429172683456518, "learning_rate": 0.0001, "loss": 0.6134, "mean_abs_error": 541.021084346083, "mean_abs_error_last_10": 143.91151481433135, "mean_abs_error_last_25": 314.5214031889219, "mean_abs_error_last_50": 449.0576813340963, "mean_pred_prob": 0.027304257778450847, "mean_pred_prob_last_10": 0.1328602435067296, "mean_pred_prob_last_25": 0.07208368349820375, "mean_pred_prob_last_50": 0.04478150540962815, "mean_token_accuracy": 0.8648464620113373, "step": 55860 }, { "epoch": 0.9931914742324853, "grad_norm": 1.4658039758863757, "learning_rate": 0.0001, "loss": 0.7209, "mean_abs_error": 335.80341019935565, "mean_abs_error_last_10": 66.32989625423772, "mean_abs_error_last_25": 214.93929482493076, "mean_abs_error_last_50": 333.8287737843349, "mean_pred_prob": 0.05936596130486578, "mean_pred_prob_last_10": 0.2666809784248471, "mean_pred_prob_last_25": 0.15649164197966456, "mean_pred_prob_last_50": 0.09857393507845699, "mean_token_accuracy": 0.8693992257118225, "step": 55870 }, { "epoch": 0.9933692425292874, "grad_norm": 1.0159612387817365, "learning_rate": 0.0001, "loss": 0.6059, "mean_abs_error": 842.4748651325665, "mean_abs_error_last_10": 569.0254996182391, "mean_abs_error_last_25": 604.8540024364225, "mean_abs_error_last_50": 673.6769778964552, "mean_pred_prob": 0.035234229150228204, "mean_pred_prob_last_10": 0.1818700424511917, "mean_pred_prob_last_25": 0.09564113215892575, "mean_pred_prob_last_50": 0.05887206502375193, "mean_token_accuracy": 0.8721172213554382, "step": 55880 }, { "epoch": 0.9935470108260893, "grad_norm": 1.2059961199344658, "learning_rate": 0.0001, "loss": 0.5605, "mean_abs_error": 277.443566366768, "mean_abs_error_last_10": 110.54121802119607, "mean_abs_error_last_25": 91.72000190378841, "mean_abs_error_last_50": 123.94091491421916, "mean_pred_prob": 0.04385313098318875, "mean_pred_prob_last_10": 0.20609409641474485, "mean_pred_prob_last_25": 0.116966394148767, "mean_pred_prob_last_50": 0.0727033356204629, "mean_token_accuracy": 0.8835173130035401, "step": 55890 }, { "epoch": 0.9937247791228913, "grad_norm": 1.5040185293567998, "learning_rate": 0.0001, "loss": 0.6589, "mean_abs_error": 549.948290027439, "mean_abs_error_last_10": 264.23397034516654, "mean_abs_error_last_25": 347.97293655088515, "mean_abs_error_last_50": 366.4960300958554, "mean_pred_prob": 0.04164550467394292, "mean_pred_prob_last_10": 0.18371566840214654, "mean_pred_prob_last_25": 0.10754779763519765, "mean_pred_prob_last_50": 0.06775204935693183, "mean_token_accuracy": 0.8682462751865387, "step": 55900 }, { "epoch": 0.9939025474196932, "grad_norm": 1.6894192320331316, "learning_rate": 0.0001, "loss": 0.7514, "mean_abs_error": 580.2250272300201, "mean_abs_error_last_10": 273.94223044862053, "mean_abs_error_last_25": 332.16964897379245, "mean_abs_error_last_50": 460.4625713026172, "mean_pred_prob": 0.03538948122295551, "mean_pred_prob_last_10": 0.16459821342723444, "mean_pred_prob_last_25": 0.09271092138951645, "mean_pred_prob_last_50": 0.05785285869496874, "mean_token_accuracy": 0.8719727218151092, "step": 55910 }, { "epoch": 0.9940803157164951, "grad_norm": 1.135897780831395, "learning_rate": 0.0001, "loss": 0.6211, "mean_abs_error": 220.7077735616499, "mean_abs_error_last_10": 38.314486106055405, "mean_abs_error_last_25": 67.00378958616392, "mean_abs_error_last_50": 110.01627968178475, "mean_pred_prob": 0.057224634615704414, "mean_pred_prob_last_10": 0.25981470234692094, "mean_pred_prob_last_25": 0.1534401759505272, "mean_pred_prob_last_50": 0.0945913284085691, "mean_token_accuracy": 0.8818686544895172, "step": 55920 }, { "epoch": 0.9942580840132971, "grad_norm": 0.9525070929336407, "learning_rate": 0.0001, "loss": 0.6498, "mean_abs_error": 88.5564849265444, "mean_abs_error_last_10": 11.167254994408083, "mean_abs_error_last_25": 24.76253063515977, "mean_abs_error_last_50": 51.71599136984646, "mean_pred_prob": 0.046189846284687516, "mean_pred_prob_last_10": 0.23804354593157767, "mean_pred_prob_last_25": 0.1307202696800232, "mean_pred_prob_last_50": 0.07779923621565103, "mean_token_accuracy": 0.8855368733406067, "step": 55930 }, { "epoch": 0.994435852310099, "grad_norm": 1.5114127097811574, "learning_rate": 0.0001, "loss": 0.7486, "mean_abs_error": 373.39461956893166, "mean_abs_error_last_10": 138.64770214224131, "mean_abs_error_last_25": 159.60894797108875, "mean_abs_error_last_50": 201.8554871253521, "mean_pred_prob": 0.04449320377316326, "mean_pred_prob_last_10": 0.21412809044122696, "mean_pred_prob_last_25": 0.12384291067719459, "mean_pred_prob_last_50": 0.07471955348737538, "mean_token_accuracy": 0.8805300712585449, "step": 55940 }, { "epoch": 0.994613620606901, "grad_norm": 2.175056946475986, "learning_rate": 0.0001, "loss": 0.7523, "mean_abs_error": 308.88170111147775, "mean_abs_error_last_10": 137.96068989244958, "mean_abs_error_last_25": 194.8497501317229, "mean_abs_error_last_50": 213.31573681182613, "mean_pred_prob": 0.037441257806494835, "mean_pred_prob_last_10": 0.18530678264796735, "mean_pred_prob_last_25": 0.09905043747276068, "mean_pred_prob_last_50": 0.06125080091878772, "mean_token_accuracy": 0.8703319609165192, "step": 55950 }, { "epoch": 0.9947913889037029, "grad_norm": 2.451234388098083, "learning_rate": 0.0001, "loss": 0.6566, "mean_abs_error": 222.1081890360715, "mean_abs_error_last_10": 116.4603184292923, "mean_abs_error_last_25": 153.36683648196384, "mean_abs_error_last_50": 182.8062934605606, "mean_pred_prob": 0.04038865687325597, "mean_pred_prob_last_10": 0.1837055990472436, "mean_pred_prob_last_25": 0.1066306178458035, "mean_pred_prob_last_50": 0.06628323886543512, "mean_token_accuracy": 0.8820860981941223, "step": 55960 }, { "epoch": 0.9949691572005048, "grad_norm": 1.260442174850975, "learning_rate": 0.0001, "loss": 0.6481, "mean_abs_error": 211.49676483258722, "mean_abs_error_last_10": 53.519289081560245, "mean_abs_error_last_25": 63.11299307486506, "mean_abs_error_last_50": 98.82310942597974, "mean_pred_prob": 0.05367966503836215, "mean_pred_prob_last_10": 0.25023403242230413, "mean_pred_prob_last_25": 0.1446898218244314, "mean_pred_prob_last_50": 0.09021557783707976, "mean_token_accuracy": 0.8713148057460784, "step": 55970 }, { "epoch": 0.9951469254973068, "grad_norm": 1.7932586233461187, "learning_rate": 0.0001, "loss": 0.7283, "mean_abs_error": 216.68578534937222, "mean_abs_error_last_10": 55.794718590766514, "mean_abs_error_last_25": 75.86548431562929, "mean_abs_error_last_50": 112.29243202629978, "mean_pred_prob": 0.0434591471683234, "mean_pred_prob_last_10": 0.21504642479121686, "mean_pred_prob_last_25": 0.11762610822916031, "mean_pred_prob_last_50": 0.07296893382444977, "mean_token_accuracy": 0.8674332082271576, "step": 55980 }, { "epoch": 0.9953246937941087, "grad_norm": 1.0135722861604106, "learning_rate": 0.0001, "loss": 0.6631, "mean_abs_error": 373.46576195560186, "mean_abs_error_last_10": 148.1923618565655, "mean_abs_error_last_25": 129.98975986917142, "mean_abs_error_last_50": 166.48771148532512, "mean_pred_prob": 0.03952067121863365, "mean_pred_prob_last_10": 0.19037694549188017, "mean_pred_prob_last_25": 0.10619728895835578, "mean_pred_prob_last_50": 0.06554765640757979, "mean_token_accuracy": 0.868331778049469, "step": 55990 }, { "epoch": 0.9955024620909108, "grad_norm": 0.9942592609616713, "learning_rate": 0.0001, "loss": 0.6383, "mean_abs_error": 176.08643846181477, "mean_abs_error_last_10": 34.36156381584229, "mean_abs_error_last_25": 59.667815382382514, "mean_abs_error_last_50": 135.6865462076132, "mean_pred_prob": 0.05049250111915171, "mean_pred_prob_last_10": 0.24748337045311927, "mean_pred_prob_last_25": 0.13807844538241626, "mean_pred_prob_last_50": 0.08461882849223912, "mean_token_accuracy": 0.8769512355327607, "step": 56000 }, { "epoch": 0.9956802303877127, "grad_norm": 1.4908531174411974, "learning_rate": 0.0001, "loss": 0.6578, "mean_abs_error": 543.7950646042416, "mean_abs_error_last_10": 202.13497130728805, "mean_abs_error_last_25": 224.10176137440848, "mean_abs_error_last_50": 361.26212703394805, "mean_pred_prob": 0.04549458616529591, "mean_pred_prob_last_10": 0.2119755137595348, "mean_pred_prob_last_25": 0.11550148109672591, "mean_pred_prob_last_50": 0.07343695910531096, "mean_token_accuracy": 0.8782258450984954, "step": 56010 }, { "epoch": 0.9958579986845146, "grad_norm": 2.559357671570466, "learning_rate": 0.0001, "loss": 0.798, "mean_abs_error": 409.2189795233525, "mean_abs_error_last_10": 142.1515225561323, "mean_abs_error_last_25": 188.74946206874998, "mean_abs_error_last_50": 326.9871962807212, "mean_pred_prob": 0.02233042542356998, "mean_pred_prob_last_10": 0.10781967714428901, "mean_pred_prob_last_25": 0.058188346773386, "mean_pred_prob_last_50": 0.03601821647025645, "mean_token_accuracy": 0.8713536143302918, "step": 56020 }, { "epoch": 0.9960357669813166, "grad_norm": 1.1632199308516942, "learning_rate": 0.0001, "loss": 0.5673, "mean_abs_error": 340.630897956952, "mean_abs_error_last_10": 128.09529125593468, "mean_abs_error_last_25": 171.71389394928715, "mean_abs_error_last_50": 243.00542266439078, "mean_pred_prob": 0.036676240246742965, "mean_pred_prob_last_10": 0.1879533503204584, "mean_pred_prob_last_25": 0.10515638515353203, "mean_pred_prob_last_50": 0.062278645718470214, "mean_token_accuracy": 0.8722637593746185, "step": 56030 }, { "epoch": 0.9962135352781185, "grad_norm": 1.4484827967522527, "learning_rate": 0.0001, "loss": 0.7468, "mean_abs_error": 115.70386183213971, "mean_abs_error_last_10": 40.89724805085517, "mean_abs_error_last_25": 64.71827963327557, "mean_abs_error_last_50": 77.59511772393287, "mean_pred_prob": 0.05602940544486046, "mean_pred_prob_last_10": 0.2473519254475832, "mean_pred_prob_last_25": 0.1421194091439247, "mean_pred_prob_last_50": 0.09027358889579773, "mean_token_accuracy": 0.8684240639209747, "step": 56040 }, { "epoch": 0.9963913035749205, "grad_norm": 1.2351802522308148, "learning_rate": 0.0001, "loss": 0.7457, "mean_abs_error": 316.6157275316662, "mean_abs_error_last_10": 152.77597152240665, "mean_abs_error_last_25": 245.04863403771486, "mean_abs_error_last_50": 244.10602784972622, "mean_pred_prob": 0.05271236402913928, "mean_pred_prob_last_10": 0.24142485121265053, "mean_pred_prob_last_25": 0.14145074314437805, "mean_pred_prob_last_50": 0.08704299251548946, "mean_token_accuracy": 0.8691452205181122, "step": 56050 }, { "epoch": 0.9965690718717224, "grad_norm": 1.6471781990310708, "learning_rate": 0.0001, "loss": 0.6342, "mean_abs_error": 1015.5390282327523, "mean_abs_error_last_10": 486.4232653501273, "mean_abs_error_last_25": 610.1264506929958, "mean_abs_error_last_50": 770.3938948774583, "mean_pred_prob": 0.029203631496056915, "mean_pred_prob_last_10": 0.15519600165716838, "mean_pred_prob_last_25": 0.08334568105929066, "mean_pred_prob_last_50": 0.05022618098737439, "mean_token_accuracy": 0.8683955729007721, "step": 56060 }, { "epoch": 0.9967468401685243, "grad_norm": 2.31514693336999, "learning_rate": 0.0001, "loss": 0.7346, "mean_abs_error": 474.66284393175977, "mean_abs_error_last_10": 120.63869631184612, "mean_abs_error_last_25": 152.95041356584792, "mean_abs_error_last_50": 252.43109426708634, "mean_pred_prob": 0.03558952243765816, "mean_pred_prob_last_10": 0.1778268282767385, "mean_pred_prob_last_25": 0.09674775823950768, "mean_pred_prob_last_50": 0.05954768166411668, "mean_token_accuracy": 0.8686587631702423, "step": 56070 }, { "epoch": 0.9969246084653263, "grad_norm": 2.191260785039266, "learning_rate": 0.0001, "loss": 0.8207, "mean_abs_error": 526.4040885465067, "mean_abs_error_last_10": 182.99552614104138, "mean_abs_error_last_25": 236.5776265402007, "mean_abs_error_last_50": 325.8052248296727, "mean_pred_prob": 0.030291430337820203, "mean_pred_prob_last_10": 0.15633297613821923, "mean_pred_prob_last_25": 0.08211476891301572, "mean_pred_prob_last_50": 0.05021727317944169, "mean_token_accuracy": 0.8692237615585328, "step": 56080 }, { "epoch": 0.9971023767621282, "grad_norm": 3.0797696469808886, "learning_rate": 0.0001, "loss": 0.7826, "mean_abs_error": 587.7334944225732, "mean_abs_error_last_10": 214.6629340620669, "mean_abs_error_last_25": 314.3533054664482, "mean_abs_error_last_50": 365.9296867839848, "mean_pred_prob": 0.035168441451969554, "mean_pred_prob_last_10": 0.18498895366792567, "mean_pred_prob_last_25": 0.0966703478305135, "mean_pred_prob_last_50": 0.05892350981594063, "mean_token_accuracy": 0.868229192495346, "step": 56090 }, { "epoch": 0.9972801450589301, "grad_norm": 1.3807281391089505, "learning_rate": 0.0001, "loss": 0.6797, "mean_abs_error": 935.3036517937514, "mean_abs_error_last_10": 540.6976353505537, "mean_abs_error_last_25": 590.5461236656586, "mean_abs_error_last_50": 738.5206631976264, "mean_pred_prob": 0.03882309415348573, "mean_pred_prob_last_10": 0.1866425555519527, "mean_pred_prob_last_25": 0.1013220313412603, "mean_pred_prob_last_50": 0.06293331132619642, "mean_token_accuracy": 0.8692958414554596, "step": 56100 }, { "epoch": 0.9974579133557321, "grad_norm": 1.1352887644959357, "learning_rate": 0.0001, "loss": 0.6077, "mean_abs_error": 404.79529455559606, "mean_abs_error_last_10": 120.79718687373614, "mean_abs_error_last_25": 194.19398385108525, "mean_abs_error_last_50": 258.5939319137757, "mean_pred_prob": 0.040562090871389955, "mean_pred_prob_last_10": 0.18544509168714285, "mean_pred_prob_last_25": 0.10856622352730483, "mean_pred_prob_last_50": 0.06662361666094511, "mean_token_accuracy": 0.8667716205120086, "step": 56110 }, { "epoch": 0.9976356816525341, "grad_norm": 0.9841517087207073, "learning_rate": 0.0001, "loss": 0.6594, "mean_abs_error": 999.3031616296827, "mean_abs_error_last_10": 576.7244080637842, "mean_abs_error_last_25": 674.0682927398454, "mean_abs_error_last_50": 714.015924829928, "mean_pred_prob": 0.04097628544695908, "mean_pred_prob_last_10": 0.2056979996006703, "mean_pred_prob_last_25": 0.11153166391595733, "mean_pred_prob_last_50": 0.06818959592201282, "mean_token_accuracy": 0.8762257397174835, "step": 56120 }, { "epoch": 0.9978134499493361, "grad_norm": 1.793665416474786, "learning_rate": 0.0001, "loss": 0.5836, "mean_abs_error": 233.7680474622373, "mean_abs_error_last_10": 50.70637089183447, "mean_abs_error_last_25": 100.79774756668449, "mean_abs_error_last_50": 140.67146370108134, "mean_pred_prob": 0.04130490156821907, "mean_pred_prob_last_10": 0.20211503840982914, "mean_pred_prob_last_25": 0.10945849819108844, "mean_pred_prob_last_50": 0.06785739324986935, "mean_token_accuracy": 0.8880408346652985, "step": 56130 }, { "epoch": 0.997991218246138, "grad_norm": 1.0382528430498699, "learning_rate": 0.0001, "loss": 0.7887, "mean_abs_error": 262.9249454501473, "mean_abs_error_last_10": 152.78241020842145, "mean_abs_error_last_25": 189.43631521254892, "mean_abs_error_last_50": 188.18042712704548, "mean_pred_prob": 0.04364963504485786, "mean_pred_prob_last_10": 0.20620311219245196, "mean_pred_prob_last_25": 0.11804764177650214, "mean_pred_prob_last_50": 0.07284828349947929, "mean_token_accuracy": 0.8657076954841614, "step": 56140 }, { "epoch": 0.99816898654294, "grad_norm": 1.2499343647675136, "learning_rate": 0.0001, "loss": 0.6658, "mean_abs_error": 1039.2060986943202, "mean_abs_error_last_10": 724.8230657298764, "mean_abs_error_last_25": 791.7211570165842, "mean_abs_error_last_50": 882.3751657924971, "mean_pred_prob": 0.047023139867815186, "mean_pred_prob_last_10": 0.2313991832517786, "mean_pred_prob_last_25": 0.12735780136863467, "mean_pred_prob_last_50": 0.07883884785114788, "mean_token_accuracy": 0.8663348197937012, "step": 56150 }, { "epoch": 0.9983467548397419, "grad_norm": 1.1991750572383513, "learning_rate": 0.0001, "loss": 0.7364, "mean_abs_error": 1128.434935575863, "mean_abs_error_last_10": 621.4146796520706, "mean_abs_error_last_25": 759.415603252394, "mean_abs_error_last_50": 916.067952029508, "mean_pred_prob": 0.03281776617513969, "mean_pred_prob_last_10": 0.18288478656613733, "mean_pred_prob_last_25": 0.09544739041302819, "mean_pred_prob_last_50": 0.0559018930158345, "mean_token_accuracy": 0.8693328201770782, "step": 56160 }, { "epoch": 0.9985245231365438, "grad_norm": 1.3081059284125087, "learning_rate": 0.0001, "loss": 0.6851, "mean_abs_error": 394.310166873512, "mean_abs_error_last_10": 121.88477229038884, "mean_abs_error_last_25": 178.3583754664442, "mean_abs_error_last_50": 297.53701632448406, "mean_pred_prob": 0.03436267958022654, "mean_pred_prob_last_10": 0.15235477443784476, "mean_pred_prob_last_25": 0.09063049415126442, "mean_pred_prob_last_50": 0.05787891447544098, "mean_token_accuracy": 0.8767144620418549, "step": 56170 }, { "epoch": 0.9987022914333458, "grad_norm": 0.7390693820027313, "learning_rate": 0.0001, "loss": 0.654, "mean_abs_error": 1050.9872368235458, "mean_abs_error_last_10": 513.4756842556014, "mean_abs_error_last_25": 640.9777512882492, "mean_abs_error_last_50": 765.6212223796804, "mean_pred_prob": 0.039695391058921815, "mean_pred_prob_last_10": 0.18978227006737142, "mean_pred_prob_last_25": 0.10768737011821941, "mean_pred_prob_last_50": 0.06556678401248064, "mean_token_accuracy": 0.8742628395557404, "step": 56180 }, { "epoch": 0.9988800597301477, "grad_norm": 1.4328587302889215, "learning_rate": 0.0001, "loss": 0.7443, "mean_abs_error": 357.95917485087205, "mean_abs_error_last_10": 93.09880967386293, "mean_abs_error_last_25": 177.20061474811442, "mean_abs_error_last_50": 267.6855565216791, "mean_pred_prob": 0.04180976375937462, "mean_pred_prob_last_10": 0.1722336158156395, "mean_pred_prob_last_25": 0.10632064417004586, "mean_pred_prob_last_50": 0.06864749118685723, "mean_token_accuracy": 0.8742541015148163, "step": 56190 }, { "epoch": 0.9990578280269496, "grad_norm": 1.212695357770276, "learning_rate": 0.0001, "loss": 0.7864, "mean_abs_error": 641.5667498898049, "mean_abs_error_last_10": 166.867762323495, "mean_abs_error_last_25": 290.1710674118041, "mean_abs_error_last_50": 463.4881063472623, "mean_pred_prob": 0.03471718456130475, "mean_pred_prob_last_10": 0.17025079713203012, "mean_pred_prob_last_25": 0.09380275749135762, "mean_pred_prob_last_50": 0.056567150948103516, "mean_token_accuracy": 0.8661380887031556, "step": 56200 }, { "epoch": 0.9992355963237516, "grad_norm": 1.8842010386612429, "learning_rate": 0.0001, "loss": 0.7701, "mean_abs_error": 467.3395649653909, "mean_abs_error_last_10": 88.54434951032734, "mean_abs_error_last_25": 117.88115426314769, "mean_abs_error_last_50": 216.50791181755963, "mean_pred_prob": 0.03860812203492969, "mean_pred_prob_last_10": 0.19223604947328568, "mean_pred_prob_last_25": 0.11075801835395396, "mean_pred_prob_last_50": 0.06584827210754156, "mean_token_accuracy": 0.8605342984199524, "step": 56210 }, { "epoch": 0.9994133646205535, "grad_norm": 1.364256461465098, "learning_rate": 0.0001, "loss": 0.6145, "mean_abs_error": 169.09581343203533, "mean_abs_error_last_10": 41.03650367399915, "mean_abs_error_last_25": 78.23367022776127, "mean_abs_error_last_50": 114.28634050117554, "mean_pred_prob": 0.0397070461884141, "mean_pred_prob_last_10": 0.21521846503019332, "mean_pred_prob_last_25": 0.11351452581584454, "mean_pred_prob_last_50": 0.06723501868546009, "mean_token_accuracy": 0.8748152792453766, "step": 56220 }, { "epoch": 0.9995911329173556, "grad_norm": 1.8787083632278554, "learning_rate": 0.0001, "loss": 0.6194, "mean_abs_error": 835.7689955142563, "mean_abs_error_last_10": 465.69396528251445, "mean_abs_error_last_25": 473.3071063814485, "mean_abs_error_last_50": 641.2541006704245, "mean_pred_prob": 0.034942608239362016, "mean_pred_prob_last_10": 0.16921068756491878, "mean_pred_prob_last_25": 0.09400002747715916, "mean_pred_prob_last_50": 0.05761166386655532, "mean_token_accuracy": 0.8767816960811615, "step": 56230 }, { "epoch": 0.9997689012141575, "grad_norm": 1.5754519736993098, "learning_rate": 0.0001, "loss": 0.6757, "mean_abs_error": 1353.0353345022831, "mean_abs_error_last_10": 667.7671573006495, "mean_abs_error_last_25": 727.35399270842, "mean_abs_error_last_50": 920.2000621956504, "mean_pred_prob": 0.0322530660458142, "mean_pred_prob_last_10": 0.14851999710372182, "mean_pred_prob_last_25": 0.08533726086316165, "mean_pred_prob_last_50": 0.0531075672712177, "mean_token_accuracy": 0.8817672669887543, "step": 56240 }, { "epoch": 0.9999466695109595, "grad_norm": 1.4394392755703813, "learning_rate": 0.0001, "loss": 0.8408, "mean_abs_error": 508.71003266104196, "mean_abs_error_last_10": 147.82608695834125, "mean_abs_error_last_25": 219.89941674989328, "mean_abs_error_last_50": 315.4142177975562, "mean_pred_prob": 0.023806779086589812, "mean_pred_prob_last_10": 0.12659046798944473, "mean_pred_prob_last_25": 0.0684992884285748, "mean_pred_prob_last_50": 0.039704419765621425, "mean_token_accuracy": 0.8782657265663147, "step": 56250 }, { "epoch": 1.0, "mean_abs_error": 352.95204934079084, "mean_abs_error_last_10": 73.31935663198732, "mean_abs_error_last_25": 90.48814334252829, "mean_abs_error_last_50": 150.62643507684675, "mean_pred_prob": 0.02727323646346728, "mean_pred_prob_last_10": 0.1499374471604824, "mean_pred_prob_last_25": 0.07899452062944572, "mean_pred_prob_last_50": 0.047673165798187256, "mean_token_accuracy": 0.870428204536438, "step": 56253, "total_flos": 3707122466086912.0, "train_loss": 0.8589762968989311, "train_runtime": 56498.3641, "train_samples_per_second": 7.965, "train_steps_per_second": 0.996 } ], "logging_steps": 10, "max_steps": 56253, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3707122466086912.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }