diff --git "a/checkpoint-1500/trainer_state.json" "b/checkpoint-1500/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1500/trainer_state.json" @@ -0,0 +1,50134 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4313753684664606, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "ce_ib": 65.99971008300781, + "ce_orig": 0.8247115612030029, + "epoch": 0, + "kl_loss": 3969.01025390625, + "loss_ib": 39.69670104980469, + "step": 0 + }, + { + "ce_ib": 61.875301361083984, + "ce_orig": 0.3094598948955536, + "epoch": 0, + "kl_loss": 1816.435302734375, + "loss_ib": 18.17053985595703, + "step": 0 + }, + { + "ce_ib": 65.33805084228516, + "ce_orig": 1.0820972919464111, + "epoch": 0, + "kl_loss": 4051.13818359375, + "loss_ib": 40.517913818359375, + "step": 0 + }, + { + "ce_ib": 65.36083221435547, + "ce_orig": 0.8601827025413513, + "epoch": 0, + "kl_loss": 3727.80126953125, + "loss_ib": 37.2845458984375, + "step": 0 + }, + { + "ce_ib": 64.40461730957031, + "ce_orig": 1.3601988554000854, + "epoch": 0.00028758357897764035, + "kl_loss": 3548.660888671875, + "loss_ib": 35.49304962158203, + "step": 1 + }, + { + "ce_ib": 66.136474609375, + "ce_orig": 0.9451982975006104, + "epoch": 0.00028758357897764035, + "kl_loss": 4003.119140625, + "loss_ib": 40.037803649902344, + "step": 1 + }, + { + "ce_ib": 65.30732727050781, + "ce_orig": 1.3611608743667603, + "epoch": 0.00028758357897764035, + "kl_loss": 3076.302490234375, + "loss_ib": 30.769554138183594, + "step": 1 + }, + { + "ce_ib": 63.613216400146484, + "ce_orig": 0.5681392550468445, + "epoch": 0.00028758357897764035, + "kl_loss": 3922.22265625, + "loss_ib": 39.22858810424805, + "step": 1 + }, + { + "ce_ib": 65.20169067382812, + "ce_orig": 0.9869711399078369, + "epoch": 0.0005751671579552807, + "kl_loss": 4010.333251953125, + "loss_ib": 40.1098518371582, + "step": 2 + }, + { + "ce_ib": 64.6613540649414, + "ce_orig": 1.0124142169952393, + "epoch": 0.0005751671579552807, + "kl_loss": 3416.4658203125, + "loss_ib": 34.17112350463867, + "step": 2 + }, + { + "ce_ib": 64.3924560546875, + "ce_orig": 0.825140118598938, + "epoch": 0.0005751671579552807, + "kl_loss": 3954.5244140625, + "loss_ib": 39.55168533325195, + "step": 2 + }, + { + "ce_ib": 66.31563568115234, + "ce_orig": 1.6114795207977295, + "epoch": 0.0005751671579552807, + "kl_loss": 3360.53955078125, + "loss_ib": 33.61202621459961, + "step": 2 + }, + { + "ce_ib": 63.97846603393555, + "ce_orig": 1.0248628854751587, + "epoch": 0.0008627507369329212, + "kl_loss": 3866.74462890625, + "loss_ib": 38.67384338378906, + "step": 3 + }, + { + "ce_ib": 64.94669342041016, + "ce_orig": 0.7158174514770508, + "epoch": 0.0008627507369329212, + "kl_loss": 3586.52783203125, + "loss_ib": 35.87177276611328, + "step": 3 + }, + { + "ce_ib": 66.78568267822266, + "ce_orig": 1.1728931665420532, + "epoch": 0.0008627507369329212, + "kl_loss": 3981.269775390625, + "loss_ib": 39.81937789916992, + "step": 3 + }, + { + "ce_ib": 66.30445861816406, + "ce_orig": 0.9273799657821655, + "epoch": 0.0008627507369329212, + "kl_loss": 3999.728271484375, + "loss_ib": 40.00391387939453, + "step": 3 + }, + { + "ce_ib": 63.22294616699219, + "ce_orig": 0.6721798181533813, + "epoch": 0.0011503343159105614, + "kl_loss": 3434.2626953125, + "loss_ib": 34.34894943237305, + "step": 4 + }, + { + "ce_ib": 65.629150390625, + "ce_orig": 0.851636528968811, + "epoch": 0.0011503343159105614, + "kl_loss": 3777.80029296875, + "loss_ib": 37.78456497192383, + "step": 4 + }, + { + "ce_ib": 65.70416259765625, + "ce_orig": 0.8407150506973267, + "epoch": 0.0011503343159105614, + "kl_loss": 3663.44775390625, + "loss_ib": 36.641048431396484, + "step": 4 + }, + { + "ce_ib": 65.25149536132812, + "ce_orig": 0.8431562781333923, + "epoch": 0.0011503343159105614, + "kl_loss": 4073.102783203125, + "loss_ib": 40.737552642822266, + "step": 4 + }, + { + "epoch": 0.0014379178948882019, + "grad_norm": Infinity, + "learning_rate": 0.0, + "loss": 37.6069, + "step": 5 + }, + { + "ce_ib": 63.31033706665039, + "ce_orig": 0.5193647146224976, + "epoch": 0.0014379178948882019, + "kl_loss": 3829.75732421875, + "loss_ib": 38.30390167236328, + "step": 5 + }, + { + "ce_ib": 64.82113647460938, + "ce_orig": 0.9080048203468323, + "epoch": 0.0014379178948882019, + "kl_loss": 4034.60400390625, + "loss_ib": 40.35251998901367, + "step": 5 + }, + { + "ce_ib": 67.75746154785156, + "ce_orig": 1.7583141326904297, + "epoch": 0.0014379178948882019, + "kl_loss": 3362.895751953125, + "loss_ib": 33.6357307434082, + "step": 5 + }, + { + "ce_ib": 65.55052947998047, + "ce_orig": 1.0019645690917969, + "epoch": 0.0014379178948882019, + "kl_loss": 3561.7119140625, + "loss_ib": 35.62367248535156, + "step": 5 + }, + { + "ce_ib": 65.5093765258789, + "ce_orig": 1.2022827863693237, + "epoch": 0.0017255014738658423, + "kl_loss": 3854.793212890625, + "loss_ib": 38.554481506347656, + "step": 6 + }, + { + "ce_ib": 63.95633316040039, + "ce_orig": 0.5561846494674683, + "epoch": 0.0017255014738658423, + "kl_loss": 3231.163818359375, + "loss_ib": 32.31803512573242, + "step": 6 + }, + { + "ce_ib": 66.91143798828125, + "ce_orig": 1.007911205291748, + "epoch": 0.0017255014738658423, + "kl_loss": 3694.936767578125, + "loss_ib": 36.956058502197266, + "step": 6 + }, + { + "ce_ib": 65.86326599121094, + "ce_orig": 1.1325939893722534, + "epoch": 0.0017255014738658423, + "kl_loss": 3653.87255859375, + "loss_ib": 36.545310974121094, + "step": 6 + }, + { + "ce_ib": 61.93317794799805, + "ce_orig": 0.3588999807834625, + "epoch": 0.0020130850528434826, + "kl_loss": 2617.40478515625, + "loss_ib": 26.180240631103516, + "step": 7 + }, + { + "ce_ib": 66.48375701904297, + "ce_orig": 0.9551417231559753, + "epoch": 0.0020130850528434826, + "kl_loss": 4009.37158203125, + "loss_ib": 40.100364685058594, + "step": 7 + }, + { + "ce_ib": 64.68529510498047, + "ce_orig": 1.3479645252227783, + "epoch": 0.0020130850528434826, + "kl_loss": 3682.17919921875, + "loss_ib": 36.82826232910156, + "step": 7 + }, + { + "ce_ib": 65.71565246582031, + "ce_orig": 1.4119635820388794, + "epoch": 0.0020130850528434826, + "kl_loss": 3543.86865234375, + "loss_ib": 35.44525909423828, + "step": 7 + }, + { + "ce_ib": 64.79589080810547, + "ce_orig": 1.1264829635620117, + "epoch": 0.002300668631821123, + "kl_loss": 3802.966552734375, + "loss_ib": 38.0361442565918, + "step": 8 + }, + { + "ce_ib": 64.57544708251953, + "ce_orig": 0.8281353712081909, + "epoch": 0.002300668631821123, + "kl_loss": 4064.0234375, + "loss_ib": 40.646690368652344, + "step": 8 + }, + { + "ce_ib": 64.70269012451172, + "ce_orig": 0.8244958519935608, + "epoch": 0.002300668631821123, + "kl_loss": 3695.80029296875, + "loss_ib": 36.964473724365234, + "step": 8 + }, + { + "ce_ib": 66.23006439208984, + "ce_orig": 0.7464499473571777, + "epoch": 0.002300668631821123, + "kl_loss": 3909.509765625, + "loss_ib": 39.10171890258789, + "step": 8 + }, + { + "ce_ib": 66.00849151611328, + "ce_orig": 1.2234286069869995, + "epoch": 0.0025882522107987635, + "kl_loss": 3269.261962890625, + "loss_ib": 32.69921875, + "step": 9 + }, + { + "ce_ib": 61.78355407714844, + "ce_orig": 0.6015470623970032, + "epoch": 0.0025882522107987635, + "kl_loss": 3815.06103515625, + "loss_ib": 38.15678787231445, + "step": 9 + }, + { + "ce_ib": 61.84153747558594, + "ce_orig": 0.6827983856201172, + "epoch": 0.0025882522107987635, + "kl_loss": 3885.240966796875, + "loss_ib": 38.85859298706055, + "step": 9 + }, + { + "ce_ib": 66.06260681152344, + "ce_orig": 1.3108824491500854, + "epoch": 0.0025882522107987635, + "kl_loss": 3949.405029296875, + "loss_ib": 39.50065612792969, + "step": 9 + }, + { + "epoch": 0.0028758357897764038, + "grad_norm": 518.9179077148438, + "learning_rate": 6.369426751592357e-07, + "loss": 37.6918, + "step": 10 + }, + { + "ce_ib": 64.02448272705078, + "ce_orig": 0.762144923210144, + "epoch": 0.0028758357897764038, + "kl_loss": 3554.281005859375, + "loss_ib": 35.54920959472656, + "step": 10 + }, + { + "ce_ib": 68.01136016845703, + "ce_orig": 1.6496213674545288, + "epoch": 0.0028758357897764038, + "kl_loss": 3769.318359375, + "loss_ib": 37.69998550415039, + "step": 10 + }, + { + "ce_ib": 68.6688003540039, + "ce_orig": 1.7943211793899536, + "epoch": 0.0028758357897764038, + "kl_loss": 3359.124267578125, + "loss_ib": 33.59811019897461, + "step": 10 + }, + { + "ce_ib": 66.47740936279297, + "ce_orig": 0.9888946413993835, + "epoch": 0.0028758357897764038, + "kl_loss": 3657.010009765625, + "loss_ib": 36.57674789428711, + "step": 10 + }, + { + "ce_ib": 68.97245788574219, + "ce_orig": 1.853747844696045, + "epoch": 0.003163419368754044, + "kl_loss": 3813.30908203125, + "loss_ib": 38.13998794555664, + "step": 11 + }, + { + "ce_ib": 66.31513214111328, + "ce_orig": 1.50633704662323, + "epoch": 0.003163419368754044, + "kl_loss": 3285.7900390625, + "loss_ib": 32.86452865600586, + "step": 11 + }, + { + "ce_ib": 63.428436279296875, + "ce_orig": 0.9150334000587463, + "epoch": 0.003163419368754044, + "kl_loss": 3867.107421875, + "loss_ib": 38.67741775512695, + "step": 11 + }, + { + "ce_ib": 64.99458312988281, + "ce_orig": 0.8206988573074341, + "epoch": 0.003163419368754044, + "kl_loss": 1840.817626953125, + "loss_ib": 18.414674758911133, + "step": 11 + }, + { + "ce_ib": 64.90898132324219, + "ce_orig": 1.1344208717346191, + "epoch": 0.0034510029477316847, + "kl_loss": 3756.06201171875, + "loss_ib": 37.56711196899414, + "step": 12 + }, + { + "ce_ib": 65.14974975585938, + "ce_orig": 0.8449010848999023, + "epoch": 0.0034510029477316847, + "kl_loss": 3669.84326171875, + "loss_ib": 36.70494842529297, + "step": 12 + }, + { + "ce_ib": 61.738800048828125, + "ce_orig": 0.8912803530693054, + "epoch": 0.0034510029477316847, + "kl_loss": 3856.634765625, + "loss_ib": 38.5725212097168, + "step": 12 + }, + { + "ce_ib": 62.223426818847656, + "ce_orig": 0.6894405484199524, + "epoch": 0.0034510029477316847, + "kl_loss": 3847.307373046875, + "loss_ib": 38.47929763793945, + "step": 12 + }, + { + "ce_ib": 62.734622955322266, + "ce_orig": 0.8210228085517883, + "epoch": 0.003738586526709325, + "kl_loss": 4047.759765625, + "loss_ib": 40.48387145996094, + "step": 13 + }, + { + "ce_ib": 63.48801040649414, + "ce_orig": 0.6192799806594849, + "epoch": 0.003738586526709325, + "kl_loss": 3207.78173828125, + "loss_ib": 32.084163665771484, + "step": 13 + }, + { + "ce_ib": 63.36425018310547, + "ce_orig": 0.8307191729545593, + "epoch": 0.003738586526709325, + "kl_loss": 4154.55859375, + "loss_ib": 41.55192184448242, + "step": 13 + }, + { + "ce_ib": 63.72712326049805, + "ce_orig": 0.6353437304496765, + "epoch": 0.003738586526709325, + "kl_loss": 3861.010009765625, + "loss_ib": 38.61647033691406, + "step": 13 + }, + { + "ce_ib": 63.29721450805664, + "ce_orig": 1.0746912956237793, + "epoch": 0.004026170105686965, + "kl_loss": 3788.18603515625, + "loss_ib": 37.88819122314453, + "step": 14 + }, + { + "ce_ib": 64.876708984375, + "ce_orig": 0.2551986575126648, + "epoch": 0.004026170105686965, + "kl_loss": 3221.554931640625, + "loss_ib": 32.2220344543457, + "step": 14 + }, + { + "ce_ib": 63.66843032836914, + "ce_orig": 0.9092416763305664, + "epoch": 0.004026170105686965, + "kl_loss": 4153.7578125, + "loss_ib": 41.5439453125, + "step": 14 + }, + { + "ce_ib": 62.37735366821289, + "ce_orig": 0.4772454798221588, + "epoch": 0.004026170105686965, + "kl_loss": 3842.333984375, + "loss_ib": 38.4295768737793, + "step": 14 + }, + { + "epoch": 0.004313753684664605, + "grad_norm": 522.8844604492188, + "learning_rate": 1.4331210191082802e-06, + "loss": 37.6292, + "step": 15 + }, + { + "ce_ib": 65.9225845336914, + "ce_orig": 1.2483989000320435, + "epoch": 0.004313753684664605, + "kl_loss": 3718.763427734375, + "loss_ib": 37.1942253112793, + "step": 15 + }, + { + "ce_ib": 62.323360443115234, + "ce_orig": 0.6228176951408386, + "epoch": 0.004313753684664605, + "kl_loss": 3593.427490234375, + "loss_ib": 35.94050598144531, + "step": 15 + }, + { + "ce_ib": 63.397438049316406, + "ce_orig": 1.2859151363372803, + "epoch": 0.004313753684664605, + "kl_loss": 4189.4609375, + "loss_ib": 41.90094757080078, + "step": 15 + }, + { + "ce_ib": 63.35916519165039, + "ce_orig": 0.7082123160362244, + "epoch": 0.004313753684664605, + "kl_loss": 3782.929443359375, + "loss_ib": 37.835628509521484, + "step": 15 + }, + { + "ce_ib": 63.69921112060547, + "ce_orig": 0.7915922999382019, + "epoch": 0.004601337263642246, + "kl_loss": 3295.76953125, + "loss_ib": 32.96406555175781, + "step": 16 + }, + { + "ce_ib": 64.55254364013672, + "ce_orig": 1.4573107957839966, + "epoch": 0.004601337263642246, + "kl_loss": 3830.550537109375, + "loss_ib": 38.31195831298828, + "step": 16 + }, + { + "ce_ib": 63.20068359375, + "ce_orig": 0.9544379115104675, + "epoch": 0.004601337263642246, + "kl_loss": 3457.2080078125, + "loss_ib": 34.578399658203125, + "step": 16 + }, + { + "ce_ib": 67.24832153320312, + "ce_orig": 0.8406115174293518, + "epoch": 0.004601337263642246, + "kl_loss": 4083.5341796875, + "loss_ib": 40.842063903808594, + "step": 16 + }, + { + "ce_ib": 66.60543060302734, + "ce_orig": 1.3419269323349, + "epoch": 0.004888920842619887, + "kl_loss": 3605.4677734375, + "loss_ib": 36.061336517333984, + "step": 17 + }, + { + "ce_ib": 62.604434967041016, + "ce_orig": 0.6389816999435425, + "epoch": 0.004888920842619887, + "kl_loss": 4083.78759765625, + "loss_ib": 40.84413528442383, + "step": 17 + }, + { + "ce_ib": 64.72972106933594, + "ce_orig": 1.176672101020813, + "epoch": 0.004888920842619887, + "kl_loss": 1900.025634765625, + "loss_ib": 19.006729125976562, + "step": 17 + }, + { + "ce_ib": 65.18509674072266, + "ce_orig": 1.2524960041046143, + "epoch": 0.004888920842619887, + "kl_loss": 3951.653076171875, + "loss_ib": 39.523048400878906, + "step": 17 + }, + { + "ce_ib": 64.49193572998047, + "ce_orig": 1.1009666919708252, + "epoch": 0.005176504421597527, + "kl_loss": 3485.996826171875, + "loss_ib": 34.866416931152344, + "step": 18 + }, + { + "ce_ib": 61.90851974487305, + "ce_orig": 0.4805839955806732, + "epoch": 0.005176504421597527, + "kl_loss": 3180.7490234375, + "loss_ib": 31.81368064880371, + "step": 18 + }, + { + "ce_ib": 65.22083282470703, + "ce_orig": 0.80530846118927, + "epoch": 0.005176504421597527, + "kl_loss": 3836.34423828125, + "loss_ib": 38.36996078491211, + "step": 18 + }, + { + "ce_ib": 65.64443969726562, + "ce_orig": 1.2098023891448975, + "epoch": 0.005176504421597527, + "kl_loss": 3608.783203125, + "loss_ib": 36.09439468383789, + "step": 18 + }, + { + "ce_ib": 64.61073303222656, + "ce_orig": 1.073931097984314, + "epoch": 0.005464088000575167, + "kl_loss": 3759.63671875, + "loss_ib": 37.60282516479492, + "step": 19 + }, + { + "ce_ib": 66.11485290527344, + "ce_orig": 1.3443665504455566, + "epoch": 0.005464088000575167, + "kl_loss": 3318.16650390625, + "loss_ib": 33.18827438354492, + "step": 19 + }, + { + "ce_ib": 66.71015167236328, + "ce_orig": 0.8358739018440247, + "epoch": 0.005464088000575167, + "kl_loss": 4202.9921875, + "loss_ib": 42.036590576171875, + "step": 19 + }, + { + "ce_ib": 67.69292449951172, + "ce_orig": 1.7301944494247437, + "epoch": 0.005464088000575167, + "kl_loss": 3555.893310546875, + "loss_ib": 35.56570053100586, + "step": 19 + }, + { + "epoch": 0.0057516715795528075, + "grad_norm": 523.6597900390625, + "learning_rate": 2.229299363057325e-06, + "loss": 38.0114, + "step": 20 + }, + { + "ce_ib": 63.84000015258789, + "ce_orig": 0.7589442729949951, + "epoch": 0.0057516715795528075, + "kl_loss": 4097.67724609375, + "loss_ib": 40.983154296875, + "step": 20 + }, + { + "ce_ib": 62.5760612487793, + "ce_orig": 0.6316663026809692, + "epoch": 0.0057516715795528075, + "kl_loss": 3378.1162109375, + "loss_ib": 33.787418365478516, + "step": 20 + }, + { + "ce_ib": 64.90914154052734, + "ce_orig": 0.8841529488563538, + "epoch": 0.0057516715795528075, + "kl_loss": 4190.35009765625, + "loss_ib": 41.90998840332031, + "step": 20 + }, + { + "ce_ib": 63.49992370605469, + "ce_orig": 1.1218868494033813, + "epoch": 0.0057516715795528075, + "kl_loss": 3893.951171875, + "loss_ib": 38.94586181640625, + "step": 20 + }, + { + "ce_ib": 62.81148147583008, + "ce_orig": 0.7255597710609436, + "epoch": 0.006039255158530448, + "kl_loss": 4084.71142578125, + "loss_ib": 40.8533935546875, + "step": 21 + }, + { + "ce_ib": 62.18263244628906, + "ce_orig": 0.6901943683624268, + "epoch": 0.006039255158530448, + "kl_loss": 3732.931396484375, + "loss_ib": 37.335533142089844, + "step": 21 + }, + { + "ce_ib": 64.20355987548828, + "ce_orig": 1.0124316215515137, + "epoch": 0.006039255158530448, + "kl_loss": 3761.3251953125, + "loss_ib": 37.61967086791992, + "step": 21 + }, + { + "ce_ib": 61.63228988647461, + "ce_orig": 0.5279907584190369, + "epoch": 0.006039255158530448, + "kl_loss": 3691.64111328125, + "loss_ib": 36.92257308959961, + "step": 21 + }, + { + "ce_ib": 62.56758499145508, + "ce_orig": 0.7798469066619873, + "epoch": 0.006326838737508088, + "kl_loss": 3670.70166015625, + "loss_ib": 36.71327209472656, + "step": 22 + }, + { + "ce_ib": 63.57075881958008, + "ce_orig": 0.8365420699119568, + "epoch": 0.006326838737508088, + "kl_loss": 3658.580322265625, + "loss_ib": 36.592159271240234, + "step": 22 + }, + { + "ce_ib": 61.62807083129883, + "ce_orig": 0.5540810823440552, + "epoch": 0.006326838737508088, + "kl_loss": 3681.03662109375, + "loss_ib": 36.8165283203125, + "step": 22 + }, + { + "ce_ib": 64.64292907714844, + "ce_orig": 1.0211745500564575, + "epoch": 0.006326838737508088, + "kl_loss": 3909.70458984375, + "loss_ib": 39.10350799560547, + "step": 22 + }, + { + "ce_ib": 63.90407180786133, + "ce_orig": 1.0038657188415527, + "epoch": 0.006614422316485728, + "kl_loss": 3516.5341796875, + "loss_ib": 35.17173385620117, + "step": 23 + }, + { + "ce_ib": 64.3149642944336, + "ce_orig": 1.43198823928833, + "epoch": 0.006614422316485728, + "kl_loss": 3473.03955078125, + "loss_ib": 34.73682403564453, + "step": 23 + }, + { + "ce_ib": 65.7113037109375, + "ce_orig": 1.3454030752182007, + "epoch": 0.006614422316485728, + "kl_loss": 3446.73095703125, + "loss_ib": 34.473880767822266, + "step": 23 + }, + { + "ce_ib": 64.66767120361328, + "ce_orig": 1.1042531728744507, + "epoch": 0.006614422316485728, + "kl_loss": 3285.74560546875, + "loss_ib": 32.863922119140625, + "step": 23 + }, + { + "ce_ib": 62.60567092895508, + "ce_orig": 0.8803403973579407, + "epoch": 0.006902005895463369, + "kl_loss": 4045.211669921875, + "loss_ib": 40.4583740234375, + "step": 24 + }, + { + "ce_ib": 62.218082427978516, + "ce_orig": 0.5355222225189209, + "epoch": 0.006902005895463369, + "kl_loss": 2061.3427734375, + "loss_ib": 20.619647979736328, + "step": 24 + }, + { + "ce_ib": 62.739349365234375, + "ce_orig": 0.7724053263664246, + "epoch": 0.006902005895463369, + "kl_loss": 3955.72021484375, + "loss_ib": 39.5634765625, + "step": 24 + }, + { + "ce_ib": 64.84529876708984, + "ce_orig": 0.9221442341804504, + "epoch": 0.006902005895463369, + "kl_loss": 4072.906982421875, + "loss_ib": 40.73555374145508, + "step": 24 + }, + { + "epoch": 0.00718958947444101, + "grad_norm": 500.6357116699219, + "learning_rate": 3.0254777070063695e-06, + "loss": 37.5291, + "step": 25 + }, + { + "ce_ib": 64.90079498291016, + "ce_orig": 1.3044438362121582, + "epoch": 0.00718958947444101, + "kl_loss": 3491.833984375, + "loss_ib": 34.924827575683594, + "step": 25 + }, + { + "ce_ib": 60.71520233154297, + "ce_orig": 0.4326849579811096, + "epoch": 0.00718958947444101, + "kl_loss": 3640.559326171875, + "loss_ib": 36.41166305541992, + "step": 25 + }, + { + "ce_ib": 64.01187896728516, + "ce_orig": 1.0306893587112427, + "epoch": 0.00718958947444101, + "kl_loss": 3960.2236328125, + "loss_ib": 39.60863494873047, + "step": 25 + }, + { + "ce_ib": 64.18307495117188, + "ce_orig": 0.9839837551116943, + "epoch": 0.00718958947444101, + "kl_loss": 3976.920654296875, + "loss_ib": 39.7756233215332, + "step": 25 + }, + { + "ce_ib": 64.72349548339844, + "ce_orig": 1.4616881608963013, + "epoch": 0.00747717305341865, + "kl_loss": 3761.33935546875, + "loss_ib": 37.61986541748047, + "step": 26 + }, + { + "ce_ib": 64.97052764892578, + "ce_orig": 0.7502491474151611, + "epoch": 0.00747717305341865, + "kl_loss": 3893.336669921875, + "loss_ib": 38.93986129760742, + "step": 26 + }, + { + "ce_ib": 65.82380676269531, + "ce_orig": 1.34544038772583, + "epoch": 0.00747717305341865, + "kl_loss": 3513.64404296875, + "loss_ib": 35.14302062988281, + "step": 26 + }, + { + "ce_ib": 65.6162109375, + "ce_orig": 1.0590736865997314, + "epoch": 0.00747717305341865, + "kl_loss": 3592.0341796875, + "loss_ib": 35.926902770996094, + "step": 26 + }, + { + "ce_ib": 60.9577522277832, + "ce_orig": 0.7530563473701477, + "epoch": 0.00776475663239629, + "kl_loss": 3584.533203125, + "loss_ib": 35.8514289855957, + "step": 27 + }, + { + "ce_ib": 62.96725845336914, + "ce_orig": 0.7575862407684326, + "epoch": 0.00776475663239629, + "kl_loss": 4020.78271484375, + "loss_ib": 40.2141227722168, + "step": 27 + }, + { + "ce_ib": 63.01191329956055, + "ce_orig": 0.8695152997970581, + "epoch": 0.00776475663239629, + "kl_loss": 3717.216064453125, + "loss_ib": 37.178462982177734, + "step": 27 + }, + { + "ce_ib": 61.84136199951172, + "ce_orig": 0.5044524669647217, + "epoch": 0.00776475663239629, + "kl_loss": 3559.84326171875, + "loss_ib": 35.6046142578125, + "step": 27 + }, + { + "ce_ib": 62.60879898071289, + "ce_orig": 0.7411525249481201, + "epoch": 0.00805234021137393, + "kl_loss": 3949.431640625, + "loss_ib": 39.50057601928711, + "step": 28 + }, + { + "ce_ib": 63.889503479003906, + "ce_orig": 0.7678407430648804, + "epoch": 0.00805234021137393, + "kl_loss": 3446.158447265625, + "loss_ib": 34.46797180175781, + "step": 28 + }, + { + "ce_ib": 64.12403869628906, + "ce_orig": 1.3409479856491089, + "epoch": 0.00805234021137393, + "kl_loss": 3590.536376953125, + "loss_ib": 35.91177749633789, + "step": 28 + }, + { + "ce_ib": 67.9863510131836, + "ce_orig": 1.4907015562057495, + "epoch": 0.00805234021137393, + "kl_loss": 4102.9951171875, + "loss_ib": 41.036746978759766, + "step": 28 + }, + { + "ce_ib": 63.33028793334961, + "ce_orig": 0.7299618721008301, + "epoch": 0.008339923790351571, + "kl_loss": 3933.06591796875, + "loss_ib": 39.33699035644531, + "step": 29 + }, + { + "ce_ib": 65.11859893798828, + "ce_orig": 1.07808256149292, + "epoch": 0.008339923790351571, + "kl_loss": 3267.3017578125, + "loss_ib": 32.679527282714844, + "step": 29 + }, + { + "ce_ib": 61.750606536865234, + "ce_orig": 1.5331333875656128, + "epoch": 0.008339923790351571, + "kl_loss": 3922.66748046875, + "loss_ib": 39.23284912109375, + "step": 29 + }, + { + "ce_ib": 63.025821685791016, + "ce_orig": 0.9248456954956055, + "epoch": 0.008339923790351571, + "kl_loss": 3479.8994140625, + "loss_ib": 34.8052978515625, + "step": 29 + }, + { + "epoch": 0.00862750736932921, + "grad_norm": 531.7625732421875, + "learning_rate": 3.821656050955414e-06, + "loss": 37.9283, + "step": 30 + }, + { + "ce_ib": 63.19704055786133, + "ce_orig": 0.813347339630127, + "epoch": 0.00862750736932921, + "kl_loss": 3683.62451171875, + "loss_ib": 36.84256362915039, + "step": 30 + }, + { + "ce_ib": 64.78852844238281, + "ce_orig": 1.25947105884552, + "epoch": 0.00862750736932921, + "kl_loss": 3672.6279296875, + "loss_ib": 36.732757568359375, + "step": 30 + }, + { + "ce_ib": 65.48835754394531, + "ce_orig": 1.5832844972610474, + "epoch": 0.00862750736932921, + "kl_loss": 3535.34765625, + "loss_ib": 35.360023498535156, + "step": 30 + }, + { + "ce_ib": 67.59868621826172, + "ce_orig": 1.2523659467697144, + "epoch": 0.00862750736932921, + "kl_loss": 3592.551513671875, + "loss_ib": 35.932273864746094, + "step": 30 + }, + { + "ce_ib": 65.58070373535156, + "ce_orig": 1.3204323053359985, + "epoch": 0.008915090948306852, + "kl_loss": 3685.355224609375, + "loss_ib": 36.860107421875, + "step": 31 + }, + { + "ce_ib": 65.22953033447266, + "ce_orig": 1.7766001224517822, + "epoch": 0.008915090948306852, + "kl_loss": 3778.63427734375, + "loss_ib": 37.79286575317383, + "step": 31 + }, + { + "ce_ib": 63.3836555480957, + "ce_orig": 1.204979419708252, + "epoch": 0.008915090948306852, + "kl_loss": 3684.710693359375, + "loss_ib": 36.85344314575195, + "step": 31 + }, + { + "ce_ib": 62.92445755004883, + "ce_orig": 0.7449155449867249, + "epoch": 0.008915090948306852, + "kl_loss": 3957.828857421875, + "loss_ib": 39.58457946777344, + "step": 31 + }, + { + "ce_ib": 63.693058013916016, + "ce_orig": 0.963614821434021, + "epoch": 0.009202674527284491, + "kl_loss": 3871.184814453125, + "loss_ib": 38.71821594238281, + "step": 32 + }, + { + "ce_ib": 62.92957305908203, + "ce_orig": 0.699960470199585, + "epoch": 0.009202674527284491, + "kl_loss": 3611.31103515625, + "loss_ib": 36.11940383911133, + "step": 32 + }, + { + "ce_ib": 61.641639709472656, + "ce_orig": 0.5809459686279297, + "epoch": 0.009202674527284491, + "kl_loss": 3945.29052734375, + "loss_ib": 39.459068298339844, + "step": 32 + }, + { + "ce_ib": 61.629180908203125, + "ce_orig": 0.4764775037765503, + "epoch": 0.009202674527284491, + "kl_loss": 3752.23681640625, + "loss_ib": 37.52853012084961, + "step": 32 + }, + { + "ce_ib": 65.62612915039062, + "ce_orig": 1.0748307704925537, + "epoch": 0.009490258106262132, + "kl_loss": 3731.794921875, + "loss_ib": 37.32451248168945, + "step": 33 + }, + { + "ce_ib": 61.40937423706055, + "ce_orig": 1.1108014583587646, + "epoch": 0.009490258106262132, + "kl_loss": 3651.0771484375, + "loss_ib": 36.516910552978516, + "step": 33 + }, + { + "ce_ib": 59.90447235107422, + "ce_orig": 0.32240188121795654, + "epoch": 0.009490258106262132, + "kl_loss": 3386.24365234375, + "loss_ib": 33.86842727661133, + "step": 33 + }, + { + "ce_ib": 62.98430633544922, + "ce_orig": 1.2453433275222778, + "epoch": 0.009490258106262132, + "kl_loss": 3598.07275390625, + "loss_ib": 35.98702621459961, + "step": 33 + }, + { + "ce_ib": 63.15117263793945, + "ce_orig": 0.7339913249015808, + "epoch": 0.009777841685239774, + "kl_loss": 2625.630859375, + "loss_ib": 26.262624740600586, + "step": 34 + }, + { + "ce_ib": 61.62659454345703, + "ce_orig": 1.0284781455993652, + "epoch": 0.009777841685239774, + "kl_loss": 3689.47705078125, + "loss_ib": 36.90093231201172, + "step": 34 + }, + { + "ce_ib": 62.00101852416992, + "ce_orig": 0.7457196116447449, + "epoch": 0.009777841685239774, + "kl_loss": 3870.28466796875, + "loss_ib": 38.70904541015625, + "step": 34 + }, + { + "ce_ib": 60.758033752441406, + "ce_orig": 0.4431888163089752, + "epoch": 0.009777841685239774, + "kl_loss": 2776.04248046875, + "loss_ib": 27.76650047302246, + "step": 34 + }, + { + "epoch": 0.010065425264217413, + "grad_norm": 514.6280517578125, + "learning_rate": 4.6178343949044585e-06, + "loss": 36.6953, + "step": 35 + }, + { + "ce_ib": 61.88233947753906, + "ce_orig": 0.742504894733429, + "epoch": 0.010065425264217413, + "kl_loss": 3676.15625, + "loss_ib": 36.76774978637695, + "step": 35 + }, + { + "ce_ib": 65.21971130371094, + "ce_orig": 1.4801323413848877, + "epoch": 0.010065425264217413, + "kl_loss": 3239.951171875, + "loss_ib": 32.40603256225586, + "step": 35 + }, + { + "ce_ib": 61.43843460083008, + "ce_orig": 0.8911157846450806, + "epoch": 0.010065425264217413, + "kl_loss": 3907.4306640625, + "loss_ib": 39.08045196533203, + "step": 35 + }, + { + "ce_ib": 60.8431510925293, + "ce_orig": 0.6813702583312988, + "epoch": 0.010065425264217413, + "kl_loss": 3710.92919921875, + "loss_ib": 37.11537551879883, + "step": 35 + }, + { + "ce_ib": 62.07615280151367, + "ce_orig": 0.9490892887115479, + "epoch": 0.010353008843195054, + "kl_loss": 3473.35009765625, + "loss_ib": 34.739707946777344, + "step": 36 + }, + { + "ce_ib": 63.5875358581543, + "ce_orig": 1.1264761686325073, + "epoch": 0.010353008843195054, + "kl_loss": 3691.65673828125, + "loss_ib": 36.92292785644531, + "step": 36 + }, + { + "ce_ib": 61.989559173583984, + "ce_orig": 0.7521851062774658, + "epoch": 0.010353008843195054, + "kl_loss": 3681.02392578125, + "loss_ib": 36.816436767578125, + "step": 36 + }, + { + "ce_ib": 66.18658447265625, + "ce_orig": 1.4330860376358032, + "epoch": 0.010353008843195054, + "kl_loss": 3735.92529296875, + "loss_ib": 37.36587142944336, + "step": 36 + }, + { + "ce_ib": 61.510005950927734, + "ce_orig": 0.6856619119644165, + "epoch": 0.010640592422172693, + "kl_loss": 3193.4365234375, + "loss_ib": 31.940513610839844, + "step": 37 + }, + { + "ce_ib": 65.34510040283203, + "ce_orig": 1.6867130994796753, + "epoch": 0.010640592422172693, + "kl_loss": 3602.250244140625, + "loss_ib": 36.02903747558594, + "step": 37 + }, + { + "ce_ib": 63.198848724365234, + "ce_orig": 1.003406286239624, + "epoch": 0.010640592422172693, + "kl_loss": 3923.912109375, + "loss_ib": 39.24544143676758, + "step": 37 + }, + { + "ce_ib": 63.12528610229492, + "ce_orig": 1.0664888620376587, + "epoch": 0.010640592422172693, + "kl_loss": 3344.81298828125, + "loss_ib": 33.45444107055664, + "step": 37 + }, + { + "ce_ib": 61.980186462402344, + "ce_orig": 0.6754278540611267, + "epoch": 0.010928176001150335, + "kl_loss": 3765.134033203125, + "loss_ib": 37.65753936767578, + "step": 38 + }, + { + "ce_ib": 64.89917755126953, + "ce_orig": 1.3892360925674438, + "epoch": 0.010928176001150335, + "kl_loss": 3431.3056640625, + "loss_ib": 34.31954574584961, + "step": 38 + }, + { + "ce_ib": 63.881900787353516, + "ce_orig": 0.9926798343658447, + "epoch": 0.010928176001150335, + "kl_loss": 3779.434814453125, + "loss_ib": 37.80073547363281, + "step": 38 + }, + { + "ce_ib": 61.26738357543945, + "ce_orig": 0.5064423084259033, + "epoch": 0.010928176001150335, + "kl_loss": 3226.73974609375, + "loss_ib": 32.27352523803711, + "step": 38 + }, + { + "ce_ib": 61.48027420043945, + "ce_orig": 0.6444438099861145, + "epoch": 0.011215759580127974, + "kl_loss": 3812.853271484375, + "loss_ib": 38.13467788696289, + "step": 39 + }, + { + "ce_ib": 65.14291381835938, + "ce_orig": 1.157513976097107, + "epoch": 0.011215759580127974, + "kl_loss": 3730.44921875, + "loss_ib": 37.311004638671875, + "step": 39 + }, + { + "ce_ib": 65.04698944091797, + "ce_orig": 1.4464482069015503, + "epoch": 0.011215759580127974, + "kl_loss": 3499.406005859375, + "loss_ib": 35.00056457519531, + "step": 39 + }, + { + "ce_ib": 61.870948791503906, + "ce_orig": 0.7682390213012695, + "epoch": 0.011215759580127974, + "kl_loss": 3903.7294921875, + "loss_ib": 39.043479919433594, + "step": 39 + }, + { + "epoch": 0.011503343159105615, + "grad_norm": 530.5418701171875, + "learning_rate": 5.414012738853504e-06, + "loss": 36.8859, + "step": 40 + }, + { + "ce_ib": 66.74645233154297, + "ce_orig": 1.7853225469589233, + "epoch": 0.011503343159105615, + "kl_loss": 3331.3544921875, + "loss_ib": 33.32021713256836, + "step": 40 + }, + { + "ce_ib": 62.318843841552734, + "ce_orig": 0.7977343797683716, + "epoch": 0.011503343159105615, + "kl_loss": 3502.8125, + "loss_ib": 35.034358978271484, + "step": 40 + }, + { + "ce_ib": 63.681610107421875, + "ce_orig": 1.3478271961212158, + "epoch": 0.011503343159105615, + "kl_loss": 3501.63134765625, + "loss_ib": 35.02267837524414, + "step": 40 + }, + { + "ce_ib": 62.9583740234375, + "ce_orig": 1.0469328165054321, + "epoch": 0.011503343159105615, + "kl_loss": 3721.776611328125, + "loss_ib": 37.22406005859375, + "step": 40 + }, + { + "ce_ib": 61.38816833496094, + "ce_orig": 0.7066026926040649, + "epoch": 0.011790926738083256, + "kl_loss": 3805.5771484375, + "loss_ib": 38.06190872192383, + "step": 41 + }, + { + "ce_ib": 61.03729248046875, + "ce_orig": 0.7563859820365906, + "epoch": 0.011790926738083256, + "kl_loss": 3660.0556640625, + "loss_ib": 36.606658935546875, + "step": 41 + }, + { + "ce_ib": 63.25761795043945, + "ce_orig": 0.8998059034347534, + "epoch": 0.011790926738083256, + "kl_loss": 3492.48876953125, + "loss_ib": 34.93121337890625, + "step": 41 + }, + { + "ce_ib": 62.87449264526367, + "ce_orig": 0.9313430190086365, + "epoch": 0.011790926738083256, + "kl_loss": 3241.703125, + "loss_ib": 32.423316955566406, + "step": 41 + }, + { + "ce_ib": 64.49095916748047, + "ce_orig": 1.71138596534729, + "epoch": 0.012078510317060896, + "kl_loss": 3664.8984375, + "loss_ib": 36.655433654785156, + "step": 42 + }, + { + "ce_ib": 61.09724807739258, + "ce_orig": 0.7595378756523132, + "epoch": 0.012078510317060896, + "kl_loss": 3622.4169921875, + "loss_ib": 36.230281829833984, + "step": 42 + }, + { + "ce_ib": 60.89824676513672, + "ce_orig": 0.9210549592971802, + "epoch": 0.012078510317060896, + "kl_loss": 3331.73388671875, + "loss_ib": 33.32342529296875, + "step": 42 + }, + { + "ce_ib": 62.15813446044922, + "ce_orig": 0.6488374471664429, + "epoch": 0.012078510317060896, + "kl_loss": 3685.21826171875, + "loss_ib": 36.8583984375, + "step": 42 + }, + { + "ce_ib": 62.060028076171875, + "ce_orig": 0.8468491435050964, + "epoch": 0.012366093896038537, + "kl_loss": 3631.91064453125, + "loss_ib": 36.325313568115234, + "step": 43 + }, + { + "ce_ib": 64.66139221191406, + "ce_orig": 1.069366693496704, + "epoch": 0.012366093896038537, + "kl_loss": 3382.873046875, + "loss_ib": 33.83519744873047, + "step": 43 + }, + { + "ce_ib": 59.3759651184082, + "ce_orig": 0.6913302540779114, + "epoch": 0.012366093896038537, + "kl_loss": 3886.6796875, + "loss_ib": 38.87273406982422, + "step": 43 + }, + { + "ce_ib": 63.80333709716797, + "ce_orig": 1.2420175075531006, + "epoch": 0.012366093896038537, + "kl_loss": 3315.640625, + "loss_ib": 33.162784576416016, + "step": 43 + }, + { + "ce_ib": 61.69696044921875, + "ce_orig": 0.9505507349967957, + "epoch": 0.012653677475016176, + "kl_loss": 3327.21044921875, + "loss_ib": 33.27827453613281, + "step": 44 + }, + { + "ce_ib": 60.231571197509766, + "ce_orig": 1.0403425693511963, + "epoch": 0.012653677475016176, + "kl_loss": 3099.7314453125, + "loss_ib": 31.00333595275879, + "step": 44 + }, + { + "ce_ib": 60.59477996826172, + "ce_orig": 0.9732199907302856, + "epoch": 0.012653677475016176, + "kl_loss": 3785.45654296875, + "loss_ib": 37.86062240600586, + "step": 44 + }, + { + "ce_ib": 60.5795783996582, + "ce_orig": 0.9181808233261108, + "epoch": 0.012653677475016176, + "kl_loss": 3470.03466796875, + "loss_ib": 34.70640182495117, + "step": 44 + }, + { + "epoch": 0.012941261053993817, + "grad_norm": 476.64434814453125, + "learning_rate": 6.210191082802548e-06, + "loss": 35.3798, + "step": 45 + }, + { + "ce_ib": 61.55487060546875, + "ce_orig": 1.3028727769851685, + "epoch": 0.012941261053993817, + "kl_loss": 3647.279541015625, + "loss_ib": 36.47895050048828, + "step": 45 + }, + { + "ce_ib": 60.31386947631836, + "ce_orig": 0.6792593598365784, + "epoch": 0.012941261053993817, + "kl_loss": 3385.7255859375, + "loss_ib": 33.863285064697266, + "step": 45 + }, + { + "ce_ib": 61.40266036987305, + "ce_orig": 0.9505258798599243, + "epoch": 0.012941261053993817, + "kl_loss": 3687.73779296875, + "loss_ib": 36.88351821899414, + "step": 45 + }, + { + "ce_ib": 62.62788009643555, + "ce_orig": 0.7607139945030212, + "epoch": 0.012941261053993817, + "kl_loss": 3367.47412109375, + "loss_ib": 33.68100357055664, + "step": 45 + }, + { + "ce_ib": 60.83255386352539, + "ce_orig": 1.0399366617202759, + "epoch": 0.013228844632971457, + "kl_loss": 3394.5810546875, + "loss_ib": 33.9518928527832, + "step": 46 + }, + { + "ce_ib": 61.447139739990234, + "ce_orig": 0.8051762580871582, + "epoch": 0.013228844632971457, + "kl_loss": 3173.857421875, + "loss_ib": 31.744718551635742, + "step": 46 + }, + { + "ce_ib": 61.36168670654297, + "ce_orig": 1.1020334959030151, + "epoch": 0.013228844632971457, + "kl_loss": 3414.359130859375, + "loss_ib": 34.14972686767578, + "step": 46 + }, + { + "ce_ib": 60.41455841064453, + "ce_orig": 0.706063449382782, + "epoch": 0.013228844632971457, + "kl_loss": 3687.080078125, + "loss_ib": 36.8768424987793, + "step": 46 + }, + { + "ce_ib": 60.49672317504883, + "ce_orig": 0.8405731320381165, + "epoch": 0.013516428211949098, + "kl_loss": 3867.85693359375, + "loss_ib": 38.68461990356445, + "step": 47 + }, + { + "ce_ib": 63.74329376220703, + "ce_orig": 0.8466535806655884, + "epoch": 0.013516428211949098, + "kl_loss": 3611.955078125, + "loss_ib": 36.12592315673828, + "step": 47 + }, + { + "ce_ib": 61.18013381958008, + "ce_orig": 0.9858855605125427, + "epoch": 0.013516428211949098, + "kl_loss": 3885.5107421875, + "loss_ib": 38.86122512817383, + "step": 47 + }, + { + "ce_ib": 65.59097290039062, + "ce_orig": 1.7553099393844604, + "epoch": 0.013516428211949098, + "kl_loss": 3490.478515625, + "loss_ib": 34.91134262084961, + "step": 47 + }, + { + "ce_ib": 61.62842559814453, + "ce_orig": 0.838309109210968, + "epoch": 0.013804011790926739, + "kl_loss": 3487.0517578125, + "loss_ib": 34.876678466796875, + "step": 48 + }, + { + "ce_ib": 62.534908294677734, + "ce_orig": 1.5411649942398071, + "epoch": 0.013804011790926739, + "kl_loss": 3555.408203125, + "loss_ib": 35.560333251953125, + "step": 48 + }, + { + "ce_ib": 61.39102554321289, + "ce_orig": 0.5941852927207947, + "epoch": 0.013804011790926739, + "kl_loss": 3320.55078125, + "loss_ib": 33.211647033691406, + "step": 48 + }, + { + "ce_ib": 62.17927551269531, + "ce_orig": 1.398880958557129, + "epoch": 0.013804011790926739, + "kl_loss": 2767.198974609375, + "loss_ib": 27.678207397460938, + "step": 48 + }, + { + "ce_ib": 61.87635803222656, + "ce_orig": 1.1401021480560303, + "epoch": 0.014091595369904378, + "kl_loss": 3533.70703125, + "loss_ib": 35.343257904052734, + "step": 49 + }, + { + "ce_ib": 59.036781311035156, + "ce_orig": 0.7694017291069031, + "epoch": 0.014091595369904378, + "kl_loss": 3748.14453125, + "loss_ib": 37.48734664916992, + "step": 49 + }, + { + "ce_ib": 60.804603576660156, + "ce_orig": 1.3716927766799927, + "epoch": 0.014091595369904378, + "kl_loss": 3407.55224609375, + "loss_ib": 34.08160400390625, + "step": 49 + }, + { + "ce_ib": 60.6622200012207, + "ce_orig": 0.6214744448661804, + "epoch": 0.014091595369904378, + "kl_loss": 3320.9990234375, + "loss_ib": 33.21605682373047, + "step": 49 + }, + { + "epoch": 0.01437917894888202, + "grad_norm": 510.91943359375, + "learning_rate": 7.006369426751593e-06, + "loss": 35.2805, + "step": 50 + }, + { + "ce_ib": 58.74787139892578, + "ce_orig": 0.8769705891609192, + "epoch": 0.01437917894888202, + "kl_loss": 3581.33544921875, + "loss_ib": 35.81922912597656, + "step": 50 + }, + { + "ce_ib": 57.18757247924805, + "ce_orig": 0.699286699295044, + "epoch": 0.01437917894888202, + "kl_loss": 3327.9033203125, + "loss_ib": 33.284751892089844, + "step": 50 + }, + { + "ce_ib": 60.32874298095703, + "ce_orig": 0.722357451915741, + "epoch": 0.01437917894888202, + "kl_loss": 3368.121337890625, + "loss_ib": 33.6872444152832, + "step": 50 + }, + { + "ce_ib": 63.51592254638672, + "ce_orig": 1.0477139949798584, + "epoch": 0.01437917894888202, + "kl_loss": 3434.626708984375, + "loss_ib": 34.35261917114258, + "step": 50 + }, + { + "ce_ib": 58.714073181152344, + "ce_orig": 0.6123059391975403, + "epoch": 0.014666762527859659, + "kl_loss": 3747.56982421875, + "loss_ib": 37.4815673828125, + "step": 51 + }, + { + "ce_ib": 61.6097526550293, + "ce_orig": 1.2942873239517212, + "epoch": 0.014666762527859659, + "kl_loss": 3235.971923828125, + "loss_ib": 32.36587905883789, + "step": 51 + }, + { + "ce_ib": 58.607505798339844, + "ce_orig": 0.6153243184089661, + "epoch": 0.014666762527859659, + "kl_loss": 3752.208984375, + "loss_ib": 37.527950286865234, + "step": 51 + }, + { + "ce_ib": 60.741729736328125, + "ce_orig": 1.3440642356872559, + "epoch": 0.014666762527859659, + "kl_loss": 3612.9765625, + "loss_ib": 36.13583755493164, + "step": 51 + }, + { + "ce_ib": 60.87339782714844, + "ce_orig": 0.8129587769508362, + "epoch": 0.0149543461068373, + "kl_loss": 3125.83642578125, + "loss_ib": 31.264450073242188, + "step": 52 + }, + { + "ce_ib": 58.96531295776367, + "ce_orig": 0.9034717082977295, + "epoch": 0.0149543461068373, + "kl_loss": 3257.168212890625, + "loss_ib": 32.57757568359375, + "step": 52 + }, + { + "ce_ib": 60.43812942504883, + "ce_orig": 0.8096925020217896, + "epoch": 0.0149543461068373, + "kl_loss": 3082.474853515625, + "loss_ib": 30.830793380737305, + "step": 52 + }, + { + "ce_ib": 61.23511505126953, + "ce_orig": 1.078736424446106, + "epoch": 0.0149543461068373, + "kl_loss": 3490.707275390625, + "loss_ib": 34.9131965637207, + "step": 52 + }, + { + "ce_ib": 60.389286041259766, + "ce_orig": 1.2757381200790405, + "epoch": 0.015241929685814939, + "kl_loss": 3353.767578125, + "loss_ib": 33.54371643066406, + "step": 53 + }, + { + "ce_ib": 57.41773223876953, + "ce_orig": 0.552437424659729, + "epoch": 0.015241929685814939, + "kl_loss": 2717.25927734375, + "loss_ib": 27.178335189819336, + "step": 53 + }, + { + "ce_ib": 60.19536590576172, + "ce_orig": 0.7778604626655579, + "epoch": 0.015241929685814939, + "kl_loss": 3204.783203125, + "loss_ib": 32.05385208129883, + "step": 53 + }, + { + "ce_ib": 61.59130859375, + "ce_orig": 1.6185189485549927, + "epoch": 0.015241929685814939, + "kl_loss": 3477.7470703125, + "loss_ib": 34.78363037109375, + "step": 53 + }, + { + "ce_ib": 63.59449005126953, + "ce_orig": 1.4870353937149048, + "epoch": 0.01552951326479258, + "kl_loss": 3315.589599609375, + "loss_ib": 33.162254333496094, + "step": 54 + }, + { + "ce_ib": 59.9765625, + "ce_orig": 1.258398175239563, + "epoch": 0.01552951326479258, + "kl_loss": 3485.625, + "loss_ib": 34.862247467041016, + "step": 54 + }, + { + "ce_ib": 58.29402160644531, + "ce_orig": 0.9382989406585693, + "epoch": 0.01552951326479258, + "kl_loss": 3394.168701171875, + "loss_ib": 33.94751739501953, + "step": 54 + }, + { + "ce_ib": 58.97043228149414, + "ce_orig": 0.6211685538291931, + "epoch": 0.01552951326479258, + "kl_loss": 3460.42333984375, + "loss_ib": 34.610130310058594, + "step": 54 + }, + { + "epoch": 0.01581709684377022, + "grad_norm": 469.67340087890625, + "learning_rate": 7.802547770700637e-06, + "loss": 34.1513, + "step": 55 + }, + { + "ce_ib": 61.76213073730469, + "ce_orig": 0.8650195002555847, + "epoch": 0.01581709684377022, + "kl_loss": 3174.44775390625, + "loss_ib": 31.750654220581055, + "step": 55 + }, + { + "ce_ib": 55.92485809326172, + "ce_orig": 0.08637077361345291, + "epoch": 0.01581709684377022, + "kl_loss": 468.1816101074219, + "loss_ib": 4.687408447265625, + "step": 55 + }, + { + "ce_ib": 59.152156829833984, + "ce_orig": 0.9888356328010559, + "epoch": 0.01581709684377022, + "kl_loss": 3260.65771484375, + "loss_ib": 32.612491607666016, + "step": 55 + }, + { + "ce_ib": 60.753597259521484, + "ce_orig": 0.837508499622345, + "epoch": 0.01581709684377022, + "kl_loss": 3544.2236328125, + "loss_ib": 35.44831085205078, + "step": 55 + }, + { + "ce_ib": 56.82107925415039, + "ce_orig": 0.8040409684181213, + "epoch": 0.01610468042274786, + "kl_loss": 3288.75830078125, + "loss_ib": 32.89326477050781, + "step": 56 + }, + { + "ce_ib": 58.140541076660156, + "ce_orig": 0.666872501373291, + "epoch": 0.01610468042274786, + "kl_loss": 3718.909912109375, + "loss_ib": 37.19491195678711, + "step": 56 + }, + { + "ce_ib": 59.2910041809082, + "ce_orig": 0.9221104979515076, + "epoch": 0.01610468042274786, + "kl_loss": 3336.389404296875, + "loss_ib": 33.36982345581055, + "step": 56 + }, + { + "ce_ib": 61.521507263183594, + "ce_orig": 1.3518534898757935, + "epoch": 0.01610468042274786, + "kl_loss": 3092.3486328125, + "loss_ib": 30.929637908935547, + "step": 56 + }, + { + "ce_ib": 57.95586013793945, + "ce_orig": 0.8081279397010803, + "epoch": 0.016392264001725502, + "kl_loss": 3491.455810546875, + "loss_ib": 34.920352935791016, + "step": 57 + }, + { + "ce_ib": 58.949745178222656, + "ce_orig": 0.5393152236938477, + "epoch": 0.016392264001725502, + "kl_loss": 3235.858154296875, + "loss_ib": 32.36447525024414, + "step": 57 + }, + { + "ce_ib": 60.55893325805664, + "ce_orig": 0.7738803029060364, + "epoch": 0.016392264001725502, + "kl_loss": 2924.27783203125, + "loss_ib": 29.24883270263672, + "step": 57 + }, + { + "ce_ib": 61.02143478393555, + "ce_orig": 1.209029197692871, + "epoch": 0.016392264001725502, + "kl_loss": 3489.797607421875, + "loss_ib": 34.904075622558594, + "step": 57 + }, + { + "ce_ib": 60.51230239868164, + "ce_orig": 1.1623197793960571, + "epoch": 0.016679847580703143, + "kl_loss": 3124.18798828125, + "loss_ib": 31.2479305267334, + "step": 58 + }, + { + "ce_ib": 60.99052429199219, + "ce_orig": 1.069433569908142, + "epoch": 0.016679847580703143, + "kl_loss": 3169.855712890625, + "loss_ib": 31.704654693603516, + "step": 58 + }, + { + "ce_ib": 58.70066452026367, + "ce_orig": 1.0279523134231567, + "epoch": 0.016679847580703143, + "kl_loss": 3483.8056640625, + "loss_ib": 34.84392547607422, + "step": 58 + }, + { + "ce_ib": 59.15578842163086, + "ce_orig": 1.0242782831192017, + "epoch": 0.016679847580703143, + "kl_loss": 3071.107421875, + "loss_ib": 30.716989517211914, + "step": 58 + }, + { + "ce_ib": 61.244327545166016, + "ce_orig": 1.7360433340072632, + "epoch": 0.01696743115968078, + "kl_loss": 2984.33984375, + "loss_ib": 29.849523544311523, + "step": 59 + }, + { + "ce_ib": 60.01206588745117, + "ce_orig": 1.2617676258087158, + "epoch": 0.01696743115968078, + "kl_loss": 3349.006591796875, + "loss_ib": 33.49606704711914, + "step": 59 + }, + { + "ce_ib": 56.53895568847656, + "ce_orig": 0.5960240960121155, + "epoch": 0.01696743115968078, + "kl_loss": 3249.41357421875, + "loss_ib": 32.49979019165039, + "step": 59 + }, + { + "ce_ib": 59.71278762817383, + "ce_orig": 0.8869993090629578, + "epoch": 0.01696743115968078, + "kl_loss": 3232.68505859375, + "loss_ib": 32.332820892333984, + "step": 59 + }, + { + "epoch": 0.01725501473865842, + "grad_norm": 486.4937438964844, + "learning_rate": 8.598726114649681e-06, + "loss": 33.3977, + "step": 60 + }, + { + "ce_ib": 58.857635498046875, + "ce_orig": 0.7359964847564697, + "epoch": 0.01725501473865842, + "kl_loss": 3203.08740234375, + "loss_ib": 32.03675842285156, + "step": 60 + }, + { + "ce_ib": 59.75052261352539, + "ce_orig": 1.0773297548294067, + "epoch": 0.01725501473865842, + "kl_loss": 3154.876953125, + "loss_ib": 31.55474281311035, + "step": 60 + }, + { + "ce_ib": 57.949344635009766, + "ce_orig": 0.8577583432197571, + "epoch": 0.01725501473865842, + "kl_loss": 3265.8623046875, + "loss_ib": 32.6644172668457, + "step": 60 + }, + { + "ce_ib": 59.86404037475586, + "ce_orig": 1.3723738193511963, + "epoch": 0.01725501473865842, + "kl_loss": 2843.7080078125, + "loss_ib": 28.443065643310547, + "step": 60 + }, + { + "ce_ib": 56.99949645996094, + "ce_orig": 0.5773953199386597, + "epoch": 0.017542598317636063, + "kl_loss": 3343.131103515625, + "loss_ib": 33.43701171875, + "step": 61 + }, + { + "ce_ib": 60.948787689208984, + "ce_orig": 1.6039363145828247, + "epoch": 0.017542598317636063, + "kl_loss": 3304.048583984375, + "loss_ib": 33.04657745361328, + "step": 61 + }, + { + "ce_ib": 58.39208221435547, + "ce_orig": 0.984937310218811, + "epoch": 0.017542598317636063, + "kl_loss": 3363.2353515625, + "loss_ib": 33.63819122314453, + "step": 61 + }, + { + "ce_ib": 59.0418586730957, + "ce_orig": 0.783109188079834, + "epoch": 0.017542598317636063, + "kl_loss": 3394.3154296875, + "loss_ib": 33.949058532714844, + "step": 61 + }, + { + "ce_ib": 62.10023880004883, + "ce_orig": 1.8530871868133545, + "epoch": 0.017830181896613704, + "kl_loss": 3175.18505859375, + "loss_ib": 31.758060455322266, + "step": 62 + }, + { + "ce_ib": 56.835514068603516, + "ce_orig": 0.7488876581192017, + "epoch": 0.017830181896613704, + "kl_loss": 2187.16943359375, + "loss_ib": 21.877376556396484, + "step": 62 + }, + { + "ce_ib": 60.125152587890625, + "ce_orig": 1.4274426698684692, + "epoch": 0.017830181896613704, + "kl_loss": 3222.76611328125, + "loss_ib": 32.233673095703125, + "step": 62 + }, + { + "ce_ib": 55.86289978027344, + "ce_orig": 0.7154338955879211, + "epoch": 0.017830181896613704, + "kl_loss": 3421.8095703125, + "loss_ib": 34.22368240356445, + "step": 62 + }, + { + "ce_ib": 59.97455596923828, + "ce_orig": 1.2463781833648682, + "epoch": 0.018117765475591345, + "kl_loss": 2927.573486328125, + "loss_ib": 29.2817325592041, + "step": 63 + }, + { + "ce_ib": 58.33196258544922, + "ce_orig": 0.5972486734390259, + "epoch": 0.018117765475591345, + "kl_loss": 3288.707763671875, + "loss_ib": 32.89291000366211, + "step": 63 + }, + { + "ce_ib": 60.974822998046875, + "ce_orig": 1.4676904678344727, + "epoch": 0.018117765475591345, + "kl_loss": 2948.23388671875, + "loss_ib": 29.48843765258789, + "step": 63 + }, + { + "ce_ib": 57.45879364013672, + "ce_orig": 0.7307599782943726, + "epoch": 0.018117765475591345, + "kl_loss": 2998.276123046875, + "loss_ib": 29.988508224487305, + "step": 63 + }, + { + "ce_ib": 57.19486618041992, + "ce_orig": 0.7821041345596313, + "epoch": 0.018405349054568983, + "kl_loss": 3225.396728515625, + "loss_ib": 32.25968551635742, + "step": 64 + }, + { + "ce_ib": 57.73394775390625, + "ce_orig": 0.6754387617111206, + "epoch": 0.018405349054568983, + "kl_loss": 3085.6650390625, + "loss_ib": 30.862422943115234, + "step": 64 + }, + { + "ce_ib": 55.39207077026367, + "ce_orig": 0.5158528685569763, + "epoch": 0.018405349054568983, + "kl_loss": 3063.913818359375, + "loss_ib": 30.644676208496094, + "step": 64 + }, + { + "ce_ib": 57.978118896484375, + "ce_orig": 0.9754984974861145, + "epoch": 0.018405349054568983, + "kl_loss": 3214.08251953125, + "loss_ib": 32.14662170410156, + "step": 64 + }, + { + "epoch": 0.018692932633546624, + "grad_norm": 475.1830139160156, + "learning_rate": 9.394904458598726e-06, + "loss": 32.2213, + "step": 65 + }, + { + "ce_ib": 57.8340950012207, + "ce_orig": 0.7973042726516724, + "epoch": 0.018692932633546624, + "kl_loss": 3016.814208984375, + "loss_ib": 30.173925399780273, + "step": 65 + }, + { + "ce_ib": 56.048274993896484, + "ce_orig": 0.7601141929626465, + "epoch": 0.018692932633546624, + "kl_loss": 3293.418212890625, + "loss_ib": 32.93978500366211, + "step": 65 + }, + { + "ce_ib": 55.681297302246094, + "ce_orig": 0.9179377555847168, + "epoch": 0.018692932633546624, + "kl_loss": 3359.49755859375, + "loss_ib": 33.60054397583008, + "step": 65 + }, + { + "ce_ib": 58.35697937011719, + "ce_orig": 1.3632832765579224, + "epoch": 0.018692932633546624, + "kl_loss": 3379.584228515625, + "loss_ib": 33.80167770385742, + "step": 65 + }, + { + "ce_ib": 60.778778076171875, + "ce_orig": 1.0116430521011353, + "epoch": 0.018980516212524265, + "kl_loss": 3229.637451171875, + "loss_ib": 32.302452087402344, + "step": 66 + }, + { + "ce_ib": 56.4047737121582, + "ce_orig": 1.053054928779602, + "epoch": 0.018980516212524265, + "kl_loss": 3030.66552734375, + "loss_ib": 30.312294006347656, + "step": 66 + }, + { + "ce_ib": 57.94568634033203, + "ce_orig": 0.946856677532196, + "epoch": 0.018980516212524265, + "kl_loss": 3040.84765625, + "loss_ib": 30.414268493652344, + "step": 66 + }, + { + "ce_ib": 56.41437911987305, + "ce_orig": 0.7833185195922852, + "epoch": 0.018980516212524265, + "kl_loss": 3050.6826171875, + "loss_ib": 30.512468338012695, + "step": 66 + }, + { + "ce_ib": 59.401546478271484, + "ce_orig": 1.328580617904663, + "epoch": 0.019268099791501906, + "kl_loss": 2916.177978515625, + "loss_ib": 29.1677188873291, + "step": 67 + }, + { + "ce_ib": 57.880680084228516, + "ce_orig": 0.7882740497589111, + "epoch": 0.019268099791501906, + "kl_loss": 3422.02783203125, + "loss_ib": 34.22606658935547, + "step": 67 + }, + { + "ce_ib": 59.44600296020508, + "ce_orig": 0.7883732914924622, + "epoch": 0.019268099791501906, + "kl_loss": 3327.45703125, + "loss_ib": 33.280513763427734, + "step": 67 + }, + { + "ce_ib": 59.17395782470703, + "ce_orig": 1.088062047958374, + "epoch": 0.019268099791501906, + "kl_loss": 1573.11181640625, + "loss_ib": 15.737035751342773, + "step": 67 + }, + { + "ce_ib": 56.406436920166016, + "ce_orig": 0.592961847782135, + "epoch": 0.019555683370479547, + "kl_loss": 3268.48779296875, + "loss_ib": 32.69051742553711, + "step": 68 + }, + { + "ce_ib": 56.2723274230957, + "ce_orig": 0.5646532773971558, + "epoch": 0.019555683370479547, + "kl_loss": 3152.529296875, + "loss_ib": 31.530920028686523, + "step": 68 + }, + { + "ce_ib": 55.98052215576172, + "ce_orig": 0.5173469185829163, + "epoch": 0.019555683370479547, + "kl_loss": 2985.48681640625, + "loss_ib": 29.86046600341797, + "step": 68 + }, + { + "ce_ib": 55.83637237548828, + "ce_orig": 0.4308261573314667, + "epoch": 0.019555683370479547, + "kl_loss": 2723.078369140625, + "loss_ib": 27.236366271972656, + "step": 68 + }, + { + "ce_ib": 59.32357406616211, + "ce_orig": 1.509739637374878, + "epoch": 0.019843266949457185, + "kl_loss": 2924.435546875, + "loss_ib": 29.250288009643555, + "step": 69 + }, + { + "ce_ib": 59.09616470336914, + "ce_orig": 2.3940815925598145, + "epoch": 0.019843266949457185, + "kl_loss": 2945.3994140625, + "loss_ib": 29.459903717041016, + "step": 69 + }, + { + "ce_ib": 55.772178649902344, + "ce_orig": 1.207844614982605, + "epoch": 0.019843266949457185, + "kl_loss": 3110.099365234375, + "loss_ib": 31.106569290161133, + "step": 69 + }, + { + "ce_ib": 59.807865142822266, + "ce_orig": 1.4729925394058228, + "epoch": 0.019843266949457185, + "kl_loss": 2798.36474609375, + "loss_ib": 27.989627838134766, + "step": 69 + }, + { + "epoch": 0.020130850528434826, + "grad_norm": 451.3029479980469, + "learning_rate": 1.0191082802547772e-05, + "loss": 31.1615, + "step": 70 + }, + { + "ce_ib": 57.67570495605469, + "ce_orig": 0.8390839695930481, + "epoch": 0.020130850528434826, + "kl_loss": 3100.12451171875, + "loss_ib": 31.00701141357422, + "step": 70 + }, + { + "ce_ib": 56.19687271118164, + "ce_orig": 0.4426974654197693, + "epoch": 0.020130850528434826, + "kl_loss": 2944.1025390625, + "loss_ib": 29.446645736694336, + "step": 70 + }, + { + "ce_ib": 57.55405044555664, + "ce_orig": 1.0506970882415771, + "epoch": 0.020130850528434826, + "kl_loss": 2788.48095703125, + "loss_ib": 27.89056396484375, + "step": 70 + }, + { + "ce_ib": 57.250022888183594, + "ce_orig": 0.8879465460777283, + "epoch": 0.020130850528434826, + "kl_loss": 3008.960693359375, + "loss_ib": 30.0953311920166, + "step": 70 + }, + { + "ce_ib": 56.017364501953125, + "ce_orig": 0.8262448310852051, + "epoch": 0.020418434107412467, + "kl_loss": 3022.1181640625, + "loss_ib": 30.226781845092773, + "step": 71 + }, + { + "ce_ib": 57.61652755737305, + "ce_orig": 0.628873348236084, + "epoch": 0.020418434107412467, + "kl_loss": 2915.463134765625, + "loss_ib": 29.16039276123047, + "step": 71 + }, + { + "ce_ib": 56.01335525512695, + "ce_orig": 0.9199650883674622, + "epoch": 0.020418434107412467, + "kl_loss": 2920.94091796875, + "loss_ib": 29.215009689331055, + "step": 71 + }, + { + "ce_ib": 55.02300262451172, + "ce_orig": 0.636806845664978, + "epoch": 0.020418434107412467, + "kl_loss": 2990.93994140625, + "loss_ib": 29.914901733398438, + "step": 71 + }, + { + "ce_ib": 58.92692947387695, + "ce_orig": 1.6168797016143799, + "epoch": 0.020706017686390108, + "kl_loss": 2569.22216796875, + "loss_ib": 25.6981143951416, + "step": 72 + }, + { + "ce_ib": 54.50556182861328, + "ce_orig": 0.7561197280883789, + "epoch": 0.020706017686390108, + "kl_loss": 3147.120361328125, + "loss_ib": 31.476654052734375, + "step": 72 + }, + { + "ce_ib": 54.70288848876953, + "ce_orig": 0.7983661890029907, + "epoch": 0.020706017686390108, + "kl_loss": 2991.32568359375, + "loss_ib": 29.91872787475586, + "step": 72 + }, + { + "ce_ib": 55.79477310180664, + "ce_orig": 1.1335042715072632, + "epoch": 0.020706017686390108, + "kl_loss": 2651.6884765625, + "loss_ib": 26.522462844848633, + "step": 72 + }, + { + "ce_ib": 55.5018424987793, + "ce_orig": 0.6628856062889099, + "epoch": 0.02099360126536775, + "kl_loss": 2991.87646484375, + "loss_ib": 29.924312591552734, + "step": 73 + }, + { + "ce_ib": 54.66994094848633, + "ce_orig": 0.9854854941368103, + "epoch": 0.02099360126536775, + "kl_loss": 2946.215087890625, + "loss_ib": 29.46761703491211, + "step": 73 + }, + { + "ce_ib": 58.076210021972656, + "ce_orig": 1.3572182655334473, + "epoch": 0.02099360126536775, + "kl_loss": 3050.9150390625, + "loss_ib": 30.514957427978516, + "step": 73 + }, + { + "ce_ib": 55.1069221496582, + "ce_orig": 0.8574339747428894, + "epoch": 0.02099360126536775, + "kl_loss": 3119.42724609375, + "loss_ib": 31.19978141784668, + "step": 73 + }, + { + "ce_ib": 54.855194091796875, + "ce_orig": 0.8055992126464844, + "epoch": 0.021281184844345387, + "kl_loss": 3110.638671875, + "loss_ib": 31.11187171936035, + "step": 74 + }, + { + "ce_ib": 52.890716552734375, + "ce_orig": 0.522036075592041, + "epoch": 0.021281184844345387, + "kl_loss": 2826.3251953125, + "loss_ib": 28.268539428710938, + "step": 74 + }, + { + "ce_ib": 54.693538665771484, + "ce_orig": 0.729824960231781, + "epoch": 0.021281184844345387, + "kl_loss": 3347.115966796875, + "loss_ib": 33.476627349853516, + "step": 74 + }, + { + "ce_ib": 55.074676513671875, + "ce_orig": 0.9839091897010803, + "epoch": 0.021281184844345387, + "kl_loss": 2667.3046875, + "loss_ib": 26.67855453491211, + "step": 74 + }, + { + "epoch": 0.021568768423323028, + "grad_norm": 460.4914245605469, + "learning_rate": 1.0987261146496815e-05, + "loss": 30.2003, + "step": 75 + }, + { + "ce_ib": 55.64740753173828, + "ce_orig": 0.6976457238197327, + "epoch": 0.021568768423323028, + "kl_loss": 2705.426025390625, + "loss_ib": 27.059823989868164, + "step": 75 + }, + { + "ce_ib": 56.64052963256836, + "ce_orig": 1.448681116104126, + "epoch": 0.021568768423323028, + "kl_loss": 2853.453369140625, + "loss_ib": 28.540197372436523, + "step": 75 + }, + { + "ce_ib": 55.26200485229492, + "ce_orig": 0.7230492234230042, + "epoch": 0.021568768423323028, + "kl_loss": 3141.7001953125, + "loss_ib": 31.422529220581055, + "step": 75 + }, + { + "ce_ib": 54.74195098876953, + "ce_orig": 0.611904501914978, + "epoch": 0.021568768423323028, + "kl_loss": 3029.38916015625, + "loss_ib": 30.29936408996582, + "step": 75 + }, + { + "ce_ib": 54.31443786621094, + "ce_orig": 1.1379636526107788, + "epoch": 0.02185635200230067, + "kl_loss": 3160.546875, + "loss_ib": 31.61090087890625, + "step": 76 + }, + { + "ce_ib": 53.96803665161133, + "ce_orig": 0.7516291737556458, + "epoch": 0.02185635200230067, + "kl_loss": 3008.7666015625, + "loss_ib": 30.093063354492188, + "step": 76 + }, + { + "ce_ib": 54.48766326904297, + "ce_orig": 1.210386872291565, + "epoch": 0.02185635200230067, + "kl_loss": 2852.64794921875, + "loss_ib": 28.53192901611328, + "step": 76 + }, + { + "ce_ib": 54.85871124267578, + "ce_orig": 1.4512004852294922, + "epoch": 0.02185635200230067, + "kl_loss": 3227.62255859375, + "loss_ib": 32.28171157836914, + "step": 76 + }, + { + "ce_ib": 54.70118713378906, + "ce_orig": 1.2653746604919434, + "epoch": 0.02214393558127831, + "kl_loss": 2925.536865234375, + "loss_ib": 29.260839462280273, + "step": 77 + }, + { + "ce_ib": 54.4128532409668, + "ce_orig": 0.7473430633544922, + "epoch": 0.02214393558127831, + "kl_loss": 2963.836669921875, + "loss_ib": 29.643808364868164, + "step": 77 + }, + { + "ce_ib": 56.661434173583984, + "ce_orig": 1.4954595565795898, + "epoch": 0.02214393558127831, + "kl_loss": 2720.67724609375, + "loss_ib": 27.212438583374023, + "step": 77 + }, + { + "ce_ib": 54.728179931640625, + "ce_orig": 0.827836275100708, + "epoch": 0.02214393558127831, + "kl_loss": 2690.066650390625, + "loss_ib": 26.906139373779297, + "step": 77 + }, + { + "ce_ib": 53.935279846191406, + "ce_orig": 0.7729896903038025, + "epoch": 0.022431519160255948, + "kl_loss": 3118.779296875, + "loss_ib": 31.193185806274414, + "step": 78 + }, + { + "ce_ib": 53.90089797973633, + "ce_orig": 1.05341637134552, + "epoch": 0.022431519160255948, + "kl_loss": 2867.553466796875, + "loss_ib": 28.680925369262695, + "step": 78 + }, + { + "ce_ib": 57.06119155883789, + "ce_orig": 1.1991482973098755, + "epoch": 0.022431519160255948, + "kl_loss": 2615.50390625, + "loss_ib": 26.160743713378906, + "step": 78 + }, + { + "ce_ib": 55.66145324707031, + "ce_orig": 1.2269896268844604, + "epoch": 0.022431519160255948, + "kl_loss": 2716.58154296875, + "loss_ib": 27.1713809967041, + "step": 78 + }, + { + "ce_ib": 56.3227424621582, + "ce_orig": 1.6050187349319458, + "epoch": 0.02271910273923359, + "kl_loss": 2608.66943359375, + "loss_ib": 26.09232521057129, + "step": 79 + }, + { + "ce_ib": 55.02303695678711, + "ce_orig": 1.3928401470184326, + "epoch": 0.02271910273923359, + "kl_loss": 2699.94677734375, + "loss_ib": 27.00497055053711, + "step": 79 + }, + { + "ce_ib": 53.92034912109375, + "ce_orig": 0.8707427978515625, + "epoch": 0.02271910273923359, + "kl_loss": 2800.906494140625, + "loss_ib": 28.01445770263672, + "step": 79 + }, + { + "ce_ib": 55.906776428222656, + "ce_orig": 1.184428095817566, + "epoch": 0.02271910273923359, + "kl_loss": 2736.019775390625, + "loss_ib": 27.365787506103516, + "step": 79 + }, + { + "epoch": 0.02300668631821123, + "grad_norm": 423.0335998535156, + "learning_rate": 1.178343949044586e-05, + "loss": 28.5021, + "step": 80 + }, + { + "ce_ib": 54.60192108154297, + "ce_orig": 1.0805023908615112, + "epoch": 0.02300668631821123, + "kl_loss": 2762.3125, + "loss_ib": 27.628583908081055, + "step": 80 + }, + { + "ce_ib": 54.54481506347656, + "ce_orig": 0.9205932021141052, + "epoch": 0.02300668631821123, + "kl_loss": 2967.17919921875, + "loss_ib": 29.67724609375, + "step": 80 + }, + { + "ce_ib": 59.29964065551758, + "ce_orig": 1.6950358152389526, + "epoch": 0.02300668631821123, + "kl_loss": 2693.053955078125, + "loss_ib": 26.93647003173828, + "step": 80 + }, + { + "ce_ib": 52.242156982421875, + "ce_orig": 0.9504690766334534, + "epoch": 0.02300668631821123, + "kl_loss": 2905.36669921875, + "loss_ib": 29.05889129638672, + "step": 80 + }, + { + "ce_ib": 55.506492614746094, + "ce_orig": 1.2976820468902588, + "epoch": 0.02329426989718887, + "kl_loss": 2749.579345703125, + "loss_ib": 27.501344680786133, + "step": 81 + }, + { + "ce_ib": 55.9608039855957, + "ce_orig": 1.092388391494751, + "epoch": 0.02329426989718887, + "kl_loss": 2680.87744140625, + "loss_ib": 26.814369201660156, + "step": 81 + }, + { + "ce_ib": 55.16973114013672, + "ce_orig": 1.686485767364502, + "epoch": 0.02329426989718887, + "kl_loss": 2655.03369140625, + "loss_ib": 26.55585479736328, + "step": 81 + }, + { + "ce_ib": 54.82407760620117, + "ce_orig": 0.9071881175041199, + "epoch": 0.02329426989718887, + "kl_loss": 2584.10107421875, + "loss_ib": 25.846492767333984, + "step": 81 + }, + { + "ce_ib": 52.7196044921875, + "ce_orig": 0.6356145739555359, + "epoch": 0.023581853476166512, + "kl_loss": 2775.622802734375, + "loss_ib": 27.761497497558594, + "step": 82 + }, + { + "ce_ib": 52.03306198120117, + "ce_orig": 0.935957133769989, + "epoch": 0.023581853476166512, + "kl_loss": 2809.512451171875, + "loss_ib": 28.100326538085938, + "step": 82 + }, + { + "ce_ib": 53.90233612060547, + "ce_orig": 1.145911455154419, + "epoch": 0.023581853476166512, + "kl_loss": 2701.700927734375, + "loss_ib": 27.02239990234375, + "step": 82 + }, + { + "ce_ib": 53.358924865722656, + "ce_orig": 0.881079375743866, + "epoch": 0.023581853476166512, + "kl_loss": 2774.904296875, + "loss_ib": 27.754379272460938, + "step": 82 + }, + { + "ce_ib": 51.51953125, + "ce_orig": 1.0080299377441406, + "epoch": 0.02386943705514415, + "kl_loss": 2673.2802734375, + "loss_ib": 26.737953186035156, + "step": 83 + }, + { + "ce_ib": 54.799827575683594, + "ce_orig": 1.211903691291809, + "epoch": 0.02386943705514415, + "kl_loss": 2386.9873046875, + "loss_ib": 23.87535285949707, + "step": 83 + }, + { + "ce_ib": 53.07735824584961, + "ce_orig": 0.9792759418487549, + "epoch": 0.02386943705514415, + "kl_loss": 2844.404296875, + "loss_ib": 28.449350357055664, + "step": 83 + }, + { + "ce_ib": 57.56839370727539, + "ce_orig": 1.708152413368225, + "epoch": 0.02386943705514415, + "kl_loss": 2686.004150390625, + "loss_ib": 26.86579704284668, + "step": 83 + }, + { + "ce_ib": 56.169593811035156, + "ce_orig": 1.0889211893081665, + "epoch": 0.02415702063412179, + "kl_loss": 2531.922607421875, + "loss_ib": 25.32484245300293, + "step": 84 + }, + { + "ce_ib": 52.437904357910156, + "ce_orig": 0.9348316788673401, + "epoch": 0.02415702063412179, + "kl_loss": 2593.02294921875, + "loss_ib": 25.93547248840332, + "step": 84 + }, + { + "ce_ib": 50.439369201660156, + "ce_orig": 0.710110068321228, + "epoch": 0.02415702063412179, + "kl_loss": 2809.076171875, + "loss_ib": 28.09580421447754, + "step": 84 + }, + { + "ce_ib": 51.97590637207031, + "ce_orig": 0.7616681456565857, + "epoch": 0.02415702063412179, + "kl_loss": 2728.056640625, + "loss_ib": 27.285762786865234, + "step": 84 + }, + { + "epoch": 0.024444604213099432, + "grad_norm": 432.98297119140625, + "learning_rate": 1.2579617834394904e-05, + "loss": 27.55, + "step": 85 + }, + { + "ce_ib": 53.3626594543457, + "ce_orig": 1.2040644884109497, + "epoch": 0.024444604213099432, + "kl_loss": 2776.72802734375, + "loss_ib": 27.77261734008789, + "step": 85 + }, + { + "ce_ib": 50.8441276550293, + "ce_orig": 0.3849184215068817, + "epoch": 0.024444604213099432, + "kl_loss": 2446.23681640625, + "loss_ib": 24.467453002929688, + "step": 85 + }, + { + "ce_ib": 56.2274055480957, + "ce_orig": 1.5932576656341553, + "epoch": 0.024444604213099432, + "kl_loss": 2712.46728515625, + "loss_ib": 27.130294799804688, + "step": 85 + }, + { + "ce_ib": 51.763954162597656, + "ce_orig": 0.8911384344100952, + "epoch": 0.024444604213099432, + "kl_loss": 2615.62939453125, + "loss_ib": 26.161468505859375, + "step": 85 + }, + { + "ce_ib": 58.18117904663086, + "ce_orig": 2.2738187313079834, + "epoch": 0.024732187792077073, + "kl_loss": 2173.28564453125, + "loss_ib": 21.73867416381836, + "step": 86 + }, + { + "ce_ib": 52.2686882019043, + "ce_orig": 0.9880481958389282, + "epoch": 0.024732187792077073, + "kl_loss": 2540.4462890625, + "loss_ib": 25.40968894958496, + "step": 86 + }, + { + "ce_ib": 52.55824661254883, + "ce_orig": 0.7514367699623108, + "epoch": 0.024732187792077073, + "kl_loss": 2694.514404296875, + "loss_ib": 26.95039939880371, + "step": 86 + }, + { + "ce_ib": 54.02186584472656, + "ce_orig": 1.0578229427337646, + "epoch": 0.024732187792077073, + "kl_loss": 2508.5751953125, + "loss_ib": 25.091154098510742, + "step": 86 + }, + { + "ce_ib": 51.11041259765625, + "ce_orig": 1.0314545631408691, + "epoch": 0.025019771371054714, + "kl_loss": 2482.1767578125, + "loss_ib": 24.82687759399414, + "step": 87 + }, + { + "ce_ib": 52.022422790527344, + "ce_orig": 0.9868292212486267, + "epoch": 0.025019771371054714, + "kl_loss": 2379.2041015625, + "loss_ib": 23.797243118286133, + "step": 87 + }, + { + "ce_ib": 51.21082305908203, + "ce_orig": 0.7481355667114258, + "epoch": 0.025019771371054714, + "kl_loss": 2631.20556640625, + "loss_ib": 26.317176818847656, + "step": 87 + }, + { + "ce_ib": 55.54100799560547, + "ce_orig": 1.7815814018249512, + "epoch": 0.025019771371054714, + "kl_loss": 2545.77587890625, + "loss_ib": 25.46331214904785, + "step": 87 + }, + { + "ce_ib": 52.07685470581055, + "ce_orig": 0.6155886054039001, + "epoch": 0.025307354950032352, + "kl_loss": 2352.36767578125, + "loss_ib": 23.52888298034668, + "step": 88 + }, + { + "ce_ib": 53.431034088134766, + "ce_orig": 0.2792898416519165, + "epoch": 0.025307354950032352, + "kl_loss": 2138.5185546875, + "loss_ib": 21.39052963256836, + "step": 88 + }, + { + "ce_ib": 51.92123794555664, + "ce_orig": 0.8495407104492188, + "epoch": 0.025307354950032352, + "kl_loss": 2580.02490234375, + "loss_ib": 25.80544090270996, + "step": 88 + }, + { + "ce_ib": 51.645992279052734, + "ce_orig": 0.8546672463417053, + "epoch": 0.025307354950032352, + "kl_loss": 2461.83203125, + "loss_ib": 24.623483657836914, + "step": 88 + }, + { + "ce_ib": 54.053565979003906, + "ce_orig": 1.8279736042022705, + "epoch": 0.025594938529009993, + "kl_loss": 2367.5888671875, + "loss_ib": 23.681293487548828, + "step": 89 + }, + { + "ce_ib": 51.267242431640625, + "ce_orig": 1.1525427103042603, + "epoch": 0.025594938529009993, + "kl_loss": 2426.50537109375, + "loss_ib": 24.270179748535156, + "step": 89 + }, + { + "ce_ib": 49.84978485107422, + "ce_orig": 0.9793948531150818, + "epoch": 0.025594938529009993, + "kl_loss": 2521.16650390625, + "loss_ib": 25.216650009155273, + "step": 89 + }, + { + "ce_ib": 53.35862350463867, + "ce_orig": 0.9854567646980286, + "epoch": 0.025594938529009993, + "kl_loss": 2478.33984375, + "loss_ib": 24.788734436035156, + "step": 89 + }, + { + "epoch": 0.025882522107987634, + "grad_norm": 397.4322814941406, + "learning_rate": 1.337579617834395e-05, + "loss": 25.8688, + "step": 90 + }, + { + "ce_ib": 50.910614013671875, + "ce_orig": 0.6994275450706482, + "epoch": 0.025882522107987634, + "kl_loss": 2584.65478515625, + "loss_ib": 25.85163688659668, + "step": 90 + }, + { + "ce_ib": 53.02171325683594, + "ce_orig": 1.0508131980895996, + "epoch": 0.025882522107987634, + "kl_loss": 2165.5048828125, + "loss_ib": 21.660350799560547, + "step": 90 + }, + { + "ce_ib": 55.81970977783203, + "ce_orig": 2.0338478088378906, + "epoch": 0.025882522107987634, + "kl_loss": 2277.224609375, + "loss_ib": 22.7778263092041, + "step": 90 + }, + { + "ce_ib": 52.05428695678711, + "ce_orig": 1.1036865711212158, + "epoch": 0.025882522107987634, + "kl_loss": 2340.133056640625, + "loss_ib": 23.40653419494629, + "step": 90 + }, + { + "ce_ib": 50.32669448852539, + "ce_orig": 0.6009736657142639, + "epoch": 0.026170105686965275, + "kl_loss": 2234.68017578125, + "loss_ib": 22.35183334350586, + "step": 91 + }, + { + "ce_ib": 53.1220817565918, + "ce_orig": 1.201808214187622, + "epoch": 0.026170105686965275, + "kl_loss": 2135.09423828125, + "loss_ib": 21.35625457763672, + "step": 91 + }, + { + "ce_ib": 49.957176208496094, + "ce_orig": 1.040064811706543, + "epoch": 0.026170105686965275, + "kl_loss": 2486.378662109375, + "loss_ib": 24.86878204345703, + "step": 91 + }, + { + "ce_ib": 50.12799835205078, + "ce_orig": 0.863908588886261, + "epoch": 0.026170105686965275, + "kl_loss": 2400.52392578125, + "loss_ib": 24.010250091552734, + "step": 91 + }, + { + "ce_ib": 52.6755256652832, + "ce_orig": 1.1070929765701294, + "epoch": 0.026457689265942913, + "kl_loss": 1971.570068359375, + "loss_ib": 19.72096824645996, + "step": 92 + }, + { + "ce_ib": 49.004791259765625, + "ce_orig": 1.1243679523468018, + "epoch": 0.026457689265942913, + "kl_loss": 2431.189453125, + "loss_ib": 24.31679344177246, + "step": 92 + }, + { + "ce_ib": 56.39695739746094, + "ce_orig": 2.0895683765411377, + "epoch": 0.026457689265942913, + "kl_loss": 2032.292724609375, + "loss_ib": 20.32856559753418, + "step": 92 + }, + { + "ce_ib": 48.332069396972656, + "ce_orig": 0.7159590721130371, + "epoch": 0.026457689265942913, + "kl_loss": 2333.5986328125, + "loss_ib": 23.3408203125, + "step": 92 + }, + { + "ce_ib": 51.314674377441406, + "ce_orig": 0.5955004692077637, + "epoch": 0.026745272844920554, + "kl_loss": 2113.81005859375, + "loss_ib": 21.143232345581055, + "step": 93 + }, + { + "ce_ib": 50.85389709472656, + "ce_orig": 1.063989281654358, + "epoch": 0.026745272844920554, + "kl_loss": 2374.11474609375, + "loss_ib": 23.746232986450195, + "step": 93 + }, + { + "ce_ib": 52.245399475097656, + "ce_orig": 0.854840099811554, + "epoch": 0.026745272844920554, + "kl_loss": 1991.779052734375, + "loss_ib": 19.923015594482422, + "step": 93 + }, + { + "ce_ib": 49.68756103515625, + "ce_orig": 0.7530232667922974, + "epoch": 0.026745272844920554, + "kl_loss": 2126.8818359375, + "loss_ib": 21.273786544799805, + "step": 93 + }, + { + "ce_ib": 54.24198913574219, + "ce_orig": 1.3871289491653442, + "epoch": 0.027032856423898195, + "kl_loss": 2152.40869140625, + "loss_ib": 21.529510498046875, + "step": 94 + }, + { + "ce_ib": 51.039283752441406, + "ce_orig": 1.1029423475265503, + "epoch": 0.027032856423898195, + "kl_loss": 2147.513916015625, + "loss_ib": 21.480243682861328, + "step": 94 + }, + { + "ce_ib": 48.558746337890625, + "ce_orig": 0.8018097877502441, + "epoch": 0.027032856423898195, + "kl_loss": 2464.363525390625, + "loss_ib": 24.64849090576172, + "step": 94 + }, + { + "ce_ib": 51.13576889038086, + "ce_orig": 1.1009808778762817, + "epoch": 0.027032856423898195, + "kl_loss": 2014.6015625, + "loss_ib": 20.1511287689209, + "step": 94 + }, + { + "epoch": 0.027320440002875836, + "grad_norm": 380.0746154785156, + "learning_rate": 1.4171974522292993e-05, + "loss": 23.87, + "step": 95 + }, + { + "ce_ib": 55.49427795410156, + "ce_orig": 1.7911261320114136, + "epoch": 0.027320440002875836, + "kl_loss": 2262.322265625, + "loss_ib": 22.62877082824707, + "step": 95 + }, + { + "ce_ib": 49.06692886352539, + "ce_orig": 1.022802710533142, + "epoch": 0.027320440002875836, + "kl_loss": 2229.0361328125, + "loss_ib": 22.29526710510254, + "step": 95 + }, + { + "ce_ib": 50.922794342041016, + "ce_orig": 0.8836882710456848, + "epoch": 0.027320440002875836, + "kl_loss": 2195.3564453125, + "loss_ib": 21.958656311035156, + "step": 95 + }, + { + "ce_ib": 49.9268913269043, + "ce_orig": 1.0127633810043335, + "epoch": 0.027320440002875836, + "kl_loss": 2146.888671875, + "loss_ib": 21.473878860473633, + "step": 95 + }, + { + "ce_ib": 49.77082061767578, + "ce_orig": 0.9023265838623047, + "epoch": 0.027608023581853477, + "kl_loss": 2299.9072265625, + "loss_ib": 23.00404930114746, + "step": 96 + }, + { + "ce_ib": 47.34916305541992, + "ce_orig": 0.5646235346794128, + "epoch": 0.027608023581853477, + "kl_loss": 2068.25634765625, + "loss_ib": 20.687297821044922, + "step": 96 + }, + { + "ce_ib": 49.27576446533203, + "ce_orig": 0.8970661163330078, + "epoch": 0.027608023581853477, + "kl_loss": 2058.2939453125, + "loss_ib": 20.587865829467773, + "step": 96 + }, + { + "ce_ib": 51.644412994384766, + "ce_orig": 1.6223132610321045, + "epoch": 0.027608023581853477, + "kl_loss": 1921.7979736328125, + "loss_ib": 19.22314453125, + "step": 96 + }, + { + "ce_ib": 49.01395034790039, + "ce_orig": 0.74750155210495, + "epoch": 0.027895607160831115, + "kl_loss": 1786.2314453125, + "loss_ib": 17.867216110229492, + "step": 97 + }, + { + "ce_ib": 49.16642761230469, + "ce_orig": 1.0198180675506592, + "epoch": 0.027895607160831115, + "kl_loss": 2137.5, + "loss_ib": 21.37991714477539, + "step": 97 + }, + { + "ce_ib": 51.9681396484375, + "ce_orig": 1.1882346868515015, + "epoch": 0.027895607160831115, + "kl_loss": 2058.308837890625, + "loss_ib": 20.588285446166992, + "step": 97 + }, + { + "ce_ib": 49.95888900756836, + "ce_orig": 0.9148277640342712, + "epoch": 0.027895607160831115, + "kl_loss": 2065.841552734375, + "loss_ib": 20.663410186767578, + "step": 97 + }, + { + "ce_ib": 49.571800231933594, + "ce_orig": 1.0728422403335571, + "epoch": 0.028183190739808756, + "kl_loss": 2083.6865234375, + "loss_ib": 20.841821670532227, + "step": 98 + }, + { + "ce_ib": 49.93168640136719, + "ce_orig": 0.8260626792907715, + "epoch": 0.028183190739808756, + "kl_loss": 1862.836181640625, + "loss_ib": 18.63335418701172, + "step": 98 + }, + { + "ce_ib": 48.66516876220703, + "ce_orig": 0.562609851360321, + "epoch": 0.028183190739808756, + "kl_loss": 2181.225830078125, + "loss_ib": 21.817123413085938, + "step": 98 + }, + { + "ce_ib": 47.39895248413086, + "ce_orig": 1.3863259553909302, + "epoch": 0.028183190739808756, + "kl_loss": 2107.5283203125, + "loss_ib": 21.080020904541016, + "step": 98 + }, + { + "ce_ib": 48.56808090209961, + "ce_orig": 0.7511693835258484, + "epoch": 0.028470774318786397, + "kl_loss": 2047.9061279296875, + "loss_ib": 20.483917236328125, + "step": 99 + }, + { + "ce_ib": 48.201148986816406, + "ce_orig": 1.3514686822891235, + "epoch": 0.028470774318786397, + "kl_loss": 2207.186279296875, + "loss_ib": 22.07668113708496, + "step": 99 + }, + { + "ce_ib": 47.37411880493164, + "ce_orig": 0.8630917072296143, + "epoch": 0.028470774318786397, + "kl_loss": 2191.63232421875, + "loss_ib": 21.921058654785156, + "step": 99 + }, + { + "ce_ib": 50.7148323059082, + "ce_orig": 1.3532038927078247, + "epoch": 0.028470774318786397, + "kl_loss": 2022.049560546875, + "loss_ib": 20.225566864013672, + "step": 99 + }, + { + "epoch": 0.02875835789776404, + "grad_norm": 356.2680358886719, + "learning_rate": 1.4968152866242039e-05, + "loss": 21.7419, + "step": 100 + }, + { + "ce_ib": 47.18466567993164, + "ce_orig": 1.1329602003097534, + "epoch": 0.02875835789776404, + "kl_loss": 1989.36962890625, + "loss_ib": 19.898414611816406, + "step": 100 + }, + { + "ce_ib": 53.88701248168945, + "ce_orig": 2.2805113792419434, + "epoch": 0.02875835789776404, + "kl_loss": 1755.5928955078125, + "loss_ib": 17.561317443847656, + "step": 100 + }, + { + "ce_ib": 48.18925094604492, + "ce_orig": 0.8274984359741211, + "epoch": 0.02875835789776404, + "kl_loss": 1906.422119140625, + "loss_ib": 19.069040298461914, + "step": 100 + }, + { + "ce_ib": 46.921417236328125, + "ce_orig": 0.7587900757789612, + "epoch": 0.02875835789776404, + "kl_loss": 2072.403076171875, + "loss_ib": 20.728723526000977, + "step": 100 + }, + { + "ce_ib": 53.836181640625, + "ce_orig": 2.063023805618286, + "epoch": 0.02904594147674168, + "kl_loss": 1652.205322265625, + "loss_ib": 16.527435302734375, + "step": 101 + }, + { + "ce_ib": 47.936073303222656, + "ce_orig": 0.6192855834960938, + "epoch": 0.02904594147674168, + "kl_loss": 1928.050048828125, + "loss_ib": 19.285293579101562, + "step": 101 + }, + { + "ce_ib": 52.24326705932617, + "ce_orig": 1.2729721069335938, + "epoch": 0.02904594147674168, + "kl_loss": 1928.1400146484375, + "loss_ib": 19.286624908447266, + "step": 101 + }, + { + "ce_ib": 46.775421142578125, + "ce_orig": 0.7013092041015625, + "epoch": 0.02904594147674168, + "kl_loss": 2082.877685546875, + "loss_ib": 20.833454132080078, + "step": 101 + }, + { + "ce_ib": 46.20343780517578, + "ce_orig": 0.7948331832885742, + "epoch": 0.029333525055719317, + "kl_loss": 1859.212890625, + "loss_ib": 18.59674835205078, + "step": 102 + }, + { + "ce_ib": 48.856143951416016, + "ce_orig": 1.1838785409927368, + "epoch": 0.029333525055719317, + "kl_loss": 1660.9873046875, + "loss_ib": 16.614757537841797, + "step": 102 + }, + { + "ce_ib": 47.809078216552734, + "ce_orig": 1.779065728187561, + "epoch": 0.029333525055719317, + "kl_loss": 1773.1181640625, + "loss_ib": 17.7359619140625, + "step": 102 + }, + { + "ce_ib": 48.513916015625, + "ce_orig": 1.4937797784805298, + "epoch": 0.029333525055719317, + "kl_loss": 1870.419189453125, + "loss_ib": 18.709043502807617, + "step": 102 + }, + { + "ce_ib": 50.835166931152344, + "ce_orig": 1.46725594997406, + "epoch": 0.02962110863469696, + "kl_loss": 1848.5126953125, + "loss_ib": 18.490209579467773, + "step": 103 + }, + { + "ce_ib": 49.164024353027344, + "ce_orig": 1.4439281225204468, + "epoch": 0.02962110863469696, + "kl_loss": 1786.349365234375, + "loss_ib": 17.868410110473633, + "step": 103 + }, + { + "ce_ib": 47.89384841918945, + "ce_orig": 1.4249969720840454, + "epoch": 0.02962110863469696, + "kl_loss": 1961.390625, + "loss_ib": 19.618694305419922, + "step": 103 + }, + { + "ce_ib": 50.38489532470703, + "ce_orig": 1.2643296718597412, + "epoch": 0.02962110863469696, + "kl_loss": 1844.09521484375, + "loss_ib": 18.44599151611328, + "step": 103 + }, + { + "ce_ib": 47.76396942138672, + "ce_orig": 0.7680091857910156, + "epoch": 0.0299086922136746, + "kl_loss": 1581.564697265625, + "loss_ib": 15.820423126220703, + "step": 104 + }, + { + "ce_ib": 47.076053619384766, + "ce_orig": 1.2261803150177002, + "epoch": 0.0299086922136746, + "kl_loss": 1776.835693359375, + "loss_ib": 17.77306365966797, + "step": 104 + }, + { + "ce_ib": 47.24263000488281, + "ce_orig": 0.8971230983734131, + "epoch": 0.0299086922136746, + "kl_loss": 1715.18798828125, + "loss_ib": 17.15660285949707, + "step": 104 + }, + { + "ce_ib": 45.86969757080078, + "ce_orig": 0.2927989065647125, + "epoch": 0.0299086922136746, + "kl_loss": 1346.9228515625, + "loss_ib": 13.47381591796875, + "step": 104 + }, + { + "epoch": 0.03019627579265224, + "grad_norm": 313.57281494140625, + "learning_rate": 1.5764331210191083e-05, + "loss": 19.1552, + "step": 105 + }, + { + "ce_ib": 46.8266487121582, + "ce_orig": 1.2301392555236816, + "epoch": 0.03019627579265224, + "kl_loss": 1715.3831787109375, + "loss_ib": 17.15851402282715, + "step": 105 + }, + { + "ce_ib": 48.934349060058594, + "ce_orig": 1.5663868188858032, + "epoch": 0.03019627579265224, + "kl_loss": 1713.754638671875, + "loss_ib": 17.142438888549805, + "step": 105 + }, + { + "ce_ib": 47.73679733276367, + "ce_orig": 0.7973002791404724, + "epoch": 0.03019627579265224, + "kl_loss": 1519.5120849609375, + "loss_ib": 15.199894905090332, + "step": 105 + }, + { + "ce_ib": 48.780006408691406, + "ce_orig": 1.2290194034576416, + "epoch": 0.03019627579265224, + "kl_loss": 1714.5169677734375, + "loss_ib": 17.150047302246094, + "step": 105 + }, + { + "ce_ib": 46.12453079223633, + "ce_orig": 0.7794104218482971, + "epoch": 0.030483859371629878, + "kl_loss": 1571.0771484375, + "loss_ib": 15.715384483337402, + "step": 106 + }, + { + "ce_ib": 46.5201530456543, + "ce_orig": 0.8720536231994629, + "epoch": 0.030483859371629878, + "kl_loss": 1566.1361083984375, + "loss_ib": 15.66601276397705, + "step": 106 + }, + { + "ce_ib": 44.020755767822266, + "ce_orig": 0.22585166990756989, + "epoch": 0.030483859371629878, + "kl_loss": 1053.362548828125, + "loss_ib": 10.538026809692383, + "step": 106 + }, + { + "ce_ib": 44.2620735168457, + "ce_orig": 0.26073363423347473, + "epoch": 0.030483859371629878, + "kl_loss": 1120.464599609375, + "loss_ib": 11.20907211303711, + "step": 106 + }, + { + "ce_ib": 47.4915885925293, + "ce_orig": 1.2189853191375732, + "epoch": 0.03077144295060752, + "kl_loss": 947.7132568359375, + "loss_ib": 9.481881141662598, + "step": 107 + }, + { + "ce_ib": 47.91807556152344, + "ce_orig": 1.3612654209136963, + "epoch": 0.03077144295060752, + "kl_loss": 1467.544677734375, + "loss_ib": 14.680237770080566, + "step": 107 + }, + { + "ce_ib": 42.33379364013672, + "ce_orig": 0.8033524751663208, + "epoch": 0.03077144295060752, + "kl_loss": 1744.4759521484375, + "loss_ib": 17.448991775512695, + "step": 107 + }, + { + "ce_ib": 47.13089370727539, + "ce_orig": 0.9136131405830383, + "epoch": 0.03077144295060752, + "kl_loss": 1629.6527099609375, + "loss_ib": 16.301239013671875, + "step": 107 + }, + { + "ce_ib": 46.97206115722656, + "ce_orig": 1.0767881870269775, + "epoch": 0.03105902652958516, + "kl_loss": 1675.902099609375, + "loss_ib": 16.763717651367188, + "step": 108 + }, + { + "ce_ib": 43.823402404785156, + "ce_orig": 0.7930386662483215, + "epoch": 0.03105902652958516, + "kl_loss": 1553.387939453125, + "loss_ib": 15.538261413574219, + "step": 108 + }, + { + "ce_ib": 45.571510314941406, + "ce_orig": 0.781028151512146, + "epoch": 0.03105902652958516, + "kl_loss": 1472.5889892578125, + "loss_ib": 14.730446815490723, + "step": 108 + }, + { + "ce_ib": 48.90847396850586, + "ce_orig": 1.6240291595458984, + "epoch": 0.03105902652958516, + "kl_loss": 1453.4114990234375, + "loss_ib": 14.539006233215332, + "step": 108 + }, + { + "ce_ib": 47.367916107177734, + "ce_orig": 0.615014374256134, + "epoch": 0.0313466101085628, + "kl_loss": 1492.5986328125, + "loss_ib": 14.9307222366333, + "step": 109 + }, + { + "ce_ib": 46.28598403930664, + "ce_orig": 1.3170636892318726, + "epoch": 0.0313466101085628, + "kl_loss": 1374.111572265625, + "loss_ib": 13.745744705200195, + "step": 109 + }, + { + "ce_ib": 47.25022506713867, + "ce_orig": 1.893700361251831, + "epoch": 0.0313466101085628, + "kl_loss": 1464.014404296875, + "loss_ib": 14.644868850708008, + "step": 109 + }, + { + "ce_ib": 43.77083969116211, + "ce_orig": 0.5421922206878662, + "epoch": 0.0313466101085628, + "kl_loss": 1321.385986328125, + "loss_ib": 13.218236923217773, + "step": 109 + }, + { + "epoch": 0.03163419368754044, + "grad_norm": 294.16033935546875, + "learning_rate": 1.6560509554140128e-05, + "loss": 16.7628, + "step": 110 + }, + { + "ce_ib": 47.9586067199707, + "ce_orig": 0.962894082069397, + "epoch": 0.03163419368754044, + "kl_loss": 1451.75830078125, + "loss_ib": 14.522378921508789, + "step": 110 + }, + { + "ce_ib": 43.98506546020508, + "ce_orig": 0.734940767288208, + "epoch": 0.03163419368754044, + "kl_loss": 1582.23291015625, + "loss_ib": 15.826726913452148, + "step": 110 + }, + { + "ce_ib": 43.784725189208984, + "ce_orig": 1.0270369052886963, + "epoch": 0.03163419368754044, + "kl_loss": 1439.91748046875, + "loss_ib": 14.403553009033203, + "step": 110 + }, + { + "ce_ib": 47.88432312011719, + "ce_orig": 1.8923044204711914, + "epoch": 0.03163419368754044, + "kl_loss": 1339.40087890625, + "loss_ib": 13.398797035217285, + "step": 110 + }, + { + "ce_ib": 45.02385711669922, + "ce_orig": 0.563847005367279, + "epoch": 0.031921777266518084, + "kl_loss": 1429.29736328125, + "loss_ib": 14.29747486114502, + "step": 111 + }, + { + "ce_ib": 44.20392990112305, + "ce_orig": 1.070389986038208, + "epoch": 0.031921777266518084, + "kl_loss": 1417.1707763671875, + "loss_ib": 14.176127433776855, + "step": 111 + }, + { + "ce_ib": 47.985328674316406, + "ce_orig": 1.82245671749115, + "epoch": 0.031921777266518084, + "kl_loss": 1312.49853515625, + "loss_ib": 13.129783630371094, + "step": 111 + }, + { + "ce_ib": 47.73635482788086, + "ce_orig": 1.0577436685562134, + "epoch": 0.031921777266518084, + "kl_loss": 1424.302734375, + "loss_ib": 14.247800827026367, + "step": 111 + }, + { + "ce_ib": 46.29003143310547, + "ce_orig": 1.197394847869873, + "epoch": 0.03220936084549572, + "kl_loss": 1341.0858154296875, + "loss_ib": 13.415486335754395, + "step": 112 + }, + { + "ce_ib": 45.729007720947266, + "ce_orig": 0.9139751195907593, + "epoch": 0.03220936084549572, + "kl_loss": 1310.7313232421875, + "loss_ib": 13.111886024475098, + "step": 112 + }, + { + "ce_ib": 46.864864349365234, + "ce_orig": 1.5965396165847778, + "epoch": 0.03220936084549572, + "kl_loss": 1156.0203857421875, + "loss_ib": 11.564889907836914, + "step": 112 + }, + { + "ce_ib": 43.946414947509766, + "ce_orig": 1.2731986045837402, + "epoch": 0.03220936084549572, + "kl_loss": 1306.838134765625, + "loss_ib": 13.072775840759277, + "step": 112 + }, + { + "ce_ib": 44.451026916503906, + "ce_orig": 1.2415810823440552, + "epoch": 0.032496944424473366, + "kl_loss": 1380.1820068359375, + "loss_ib": 13.80626392364502, + "step": 113 + }, + { + "ce_ib": 45.05312728881836, + "ce_orig": 0.7081640362739563, + "epoch": 0.032496944424473366, + "kl_loss": 1326.8614501953125, + "loss_ib": 13.273119926452637, + "step": 113 + }, + { + "ce_ib": 45.38166809082031, + "ce_orig": 0.49601882696151733, + "epoch": 0.032496944424473366, + "kl_loss": 1228.1025390625, + "loss_ib": 12.285563468933105, + "step": 113 + }, + { + "ce_ib": 42.399070739746094, + "ce_orig": 1.1342860460281372, + "epoch": 0.032496944424473366, + "kl_loss": 1330.390380859375, + "loss_ib": 13.30814266204834, + "step": 113 + }, + { + "ce_ib": 42.90398406982422, + "ce_orig": 0.8459790945053101, + "epoch": 0.032784528003451004, + "kl_loss": 1192.5108642578125, + "loss_ib": 11.929399490356445, + "step": 114 + }, + { + "ce_ib": 42.922142028808594, + "ce_orig": 0.657600462436676, + "epoch": 0.032784528003451004, + "kl_loss": 1351.73291015625, + "loss_ib": 13.521620750427246, + "step": 114 + }, + { + "ce_ib": 43.3038215637207, + "ce_orig": 0.5711429119110107, + "epoch": 0.032784528003451004, + "kl_loss": 1127.6776123046875, + "loss_ib": 11.281105995178223, + "step": 114 + }, + { + "ce_ib": 45.69675064086914, + "ce_orig": 1.5162954330444336, + "epoch": 0.032784528003451004, + "kl_loss": 1221.5166015625, + "loss_ib": 12.219735145568848, + "step": 114 + }, + { + "epoch": 0.03307211158242864, + "grad_norm": 255.8572540283203, + "learning_rate": 1.7356687898089173e-05, + "loss": 14.1527, + "step": 115 + }, + { + "ce_ib": 45.3430061340332, + "ce_orig": 1.2573144435882568, + "epoch": 0.03307211158242864, + "kl_loss": 1201.44775390625, + "loss_ib": 12.019010543823242, + "step": 115 + }, + { + "ce_ib": 41.08205795288086, + "ce_orig": 0.44548746943473816, + "epoch": 0.03307211158242864, + "kl_loss": 553.1367797851562, + "loss_ib": 5.535475730895996, + "step": 115 + }, + { + "ce_ib": 42.01618957519531, + "ce_orig": 0.9748629927635193, + "epoch": 0.03307211158242864, + "kl_loss": 1253.167724609375, + "loss_ib": 12.53587818145752, + "step": 115 + }, + { + "ce_ib": 46.25815200805664, + "ce_orig": 1.704155445098877, + "epoch": 0.03307211158242864, + "kl_loss": 1103.8121337890625, + "loss_ib": 11.042746543884277, + "step": 115 + }, + { + "ce_ib": 41.26174545288086, + "ce_orig": 0.6826565861701965, + "epoch": 0.033359695161406286, + "kl_loss": 1163.051025390625, + "loss_ib": 11.634635925292969, + "step": 116 + }, + { + "ce_ib": 40.63665008544922, + "ce_orig": 0.900534987449646, + "epoch": 0.033359695161406286, + "kl_loss": 1172.418212890625, + "loss_ib": 11.728245735168457, + "step": 116 + }, + { + "ce_ib": 44.31190872192383, + "ce_orig": 1.1153950691223145, + "epoch": 0.033359695161406286, + "kl_loss": 1033.94287109375, + "loss_ib": 10.343859672546387, + "step": 116 + }, + { + "ce_ib": 43.1805305480957, + "ce_orig": 0.9051750302314758, + "epoch": 0.033359695161406286, + "kl_loss": 1077.079833984375, + "loss_ib": 10.775115966796875, + "step": 116 + }, + { + "ce_ib": 41.2710075378418, + "ce_orig": 0.38958603143692017, + "epoch": 0.033647278740383924, + "kl_loss": 966.39892578125, + "loss_ib": 9.668116569519043, + "step": 117 + }, + { + "ce_ib": 42.25935745239258, + "ce_orig": 0.8274361491203308, + "epoch": 0.033647278740383924, + "kl_loss": 1053.1240234375, + "loss_ib": 10.535466194152832, + "step": 117 + }, + { + "ce_ib": 40.06983947753906, + "ce_orig": 0.7041372060775757, + "epoch": 0.033647278740383924, + "kl_loss": 1147.9034423828125, + "loss_ib": 11.483041763305664, + "step": 117 + }, + { + "ce_ib": 46.431880950927734, + "ce_orig": 1.3825441598892212, + "epoch": 0.033647278740383924, + "kl_loss": 1083.45751953125, + "loss_ib": 10.839218139648438, + "step": 117 + }, + { + "ce_ib": 43.25281524658203, + "ce_orig": 0.9396786689758301, + "epoch": 0.03393486231936156, + "kl_loss": 1038.500244140625, + "loss_ib": 10.389327049255371, + "step": 118 + }, + { + "ce_ib": 40.72803497314453, + "ce_orig": 0.5316663980484009, + "epoch": 0.03393486231936156, + "kl_loss": 935.3412475585938, + "loss_ib": 9.357484817504883, + "step": 118 + }, + { + "ce_ib": 40.19837951660156, + "ce_orig": 0.7874443531036377, + "epoch": 0.03393486231936156, + "kl_loss": 987.9114379882812, + "loss_ib": 9.883133888244629, + "step": 118 + }, + { + "ce_ib": 42.92312240600586, + "ce_orig": 0.8642221093177795, + "epoch": 0.03393486231936156, + "kl_loss": 1002.868896484375, + "loss_ib": 10.032980918884277, + "step": 118 + }, + { + "ce_ib": 43.140602111816406, + "ce_orig": 0.9903743863105774, + "epoch": 0.034222445898339206, + "kl_loss": 945.7418212890625, + "loss_ib": 9.461731910705566, + "step": 119 + }, + { + "ce_ib": 44.95377731323242, + "ce_orig": 0.7084935903549194, + "epoch": 0.034222445898339206, + "kl_loss": 1001.4967041015625, + "loss_ib": 10.019461631774902, + "step": 119 + }, + { + "ce_ib": 43.81313705444336, + "ce_orig": 1.0507792234420776, + "epoch": 0.034222445898339206, + "kl_loss": 966.8169555664062, + "loss_ib": 9.672550201416016, + "step": 119 + }, + { + "ce_ib": 38.509605407714844, + "ce_orig": 0.24314048886299133, + "epoch": 0.034222445898339206, + "kl_loss": 900.1303100585938, + "loss_ib": 9.00515365600586, + "step": 119 + }, + { + "epoch": 0.03451002947731684, + "grad_norm": 213.6476287841797, + "learning_rate": 1.8152866242038215e-05, + "loss": 11.2209, + "step": 120 + }, + { + "ce_ib": 43.3798942565918, + "ce_orig": 1.417758822441101, + "epoch": 0.03451002947731684, + "kl_loss": 874.8845825195312, + "loss_ib": 8.753183364868164, + "step": 120 + }, + { + "ce_ib": 41.32734298706055, + "ce_orig": 0.8705493807792664, + "epoch": 0.03451002947731684, + "kl_loss": 937.0117797851562, + "loss_ib": 9.374250411987305, + "step": 120 + }, + { + "ce_ib": 38.6541633605957, + "ce_orig": 0.23277077078819275, + "epoch": 0.03451002947731684, + "kl_loss": 378.5799560546875, + "loss_ib": 3.7896647453308105, + "step": 120 + }, + { + "ce_ib": 43.36669921875, + "ce_orig": 1.0161871910095215, + "epoch": 0.03451002947731684, + "kl_loss": 853.2371215820312, + "loss_ib": 8.536707878112793, + "step": 120 + }, + { + "ce_ib": 41.16870880126953, + "ce_orig": 0.9253172874450684, + "epoch": 0.03479761305629449, + "kl_loss": 846.5045776367188, + "loss_ib": 8.469161987304688, + "step": 121 + }, + { + "ce_ib": 44.02971267700195, + "ce_orig": 1.2445998191833496, + "epoch": 0.03479761305629449, + "kl_loss": 718.2892456054688, + "loss_ib": 7.187295436859131, + "step": 121 + }, + { + "ce_ib": 43.64518737792969, + "ce_orig": 1.7942239046096802, + "epoch": 0.03479761305629449, + "kl_loss": 756.7442626953125, + "loss_ib": 7.571806907653809, + "step": 121 + }, + { + "ce_ib": 42.69846725463867, + "ce_orig": 0.8896026015281677, + "epoch": 0.03479761305629449, + "kl_loss": 883.087890625, + "loss_ib": 8.835148811340332, + "step": 121 + }, + { + "ce_ib": 40.64472579956055, + "ce_orig": 0.8089054822921753, + "epoch": 0.035085196635272126, + "kl_loss": 727.42724609375, + "loss_ib": 7.278336524963379, + "step": 122 + }, + { + "ce_ib": 41.40364456176758, + "ce_orig": 0.8790702819824219, + "epoch": 0.035085196635272126, + "kl_loss": 701.3001708984375, + "loss_ib": 7.017142295837402, + "step": 122 + }, + { + "ce_ib": 45.067813873291016, + "ce_orig": 0.3258854150772095, + "epoch": 0.035085196635272126, + "kl_loss": 577.9033203125, + "loss_ib": 5.783539772033691, + "step": 122 + }, + { + "ce_ib": 44.684349060058594, + "ce_orig": 1.5608466863632202, + "epoch": 0.035085196635272126, + "kl_loss": 660.216796875, + "loss_ib": 6.606636047363281, + "step": 122 + }, + { + "ce_ib": 42.09290313720703, + "ce_orig": 0.7853221297264099, + "epoch": 0.03537278021424976, + "kl_loss": 688.5507202148438, + "loss_ib": 6.889716625213623, + "step": 123 + }, + { + "ce_ib": 41.19681930541992, + "ce_orig": 1.0058292150497437, + "epoch": 0.03537278021424976, + "kl_loss": 660.992431640625, + "loss_ib": 6.614044189453125, + "step": 123 + }, + { + "ce_ib": 41.700340270996094, + "ce_orig": 0.7194111347198486, + "epoch": 0.03537278021424976, + "kl_loss": 460.14190673828125, + "loss_ib": 4.605588912963867, + "step": 123 + }, + { + "ce_ib": 44.97040939331055, + "ce_orig": 1.1617707014083862, + "epoch": 0.03537278021424976, + "kl_loss": 618.4761962890625, + "loss_ib": 6.189259052276611, + "step": 123 + }, + { + "ce_ib": 42.433712005615234, + "ce_orig": 0.7316823601722717, + "epoch": 0.03566036379322741, + "kl_loss": 761.43359375, + "loss_ib": 7.618578910827637, + "step": 124 + }, + { + "ce_ib": 42.72594451904297, + "ce_orig": 0.9406179189682007, + "epoch": 0.03566036379322741, + "kl_loss": 608.378173828125, + "loss_ib": 6.088054180145264, + "step": 124 + }, + { + "ce_ib": 44.054351806640625, + "ce_orig": 1.101775050163269, + "epoch": 0.03566036379322741, + "kl_loss": 533.7700805664062, + "loss_ib": 5.342106342315674, + "step": 124 + }, + { + "ce_ib": 41.86262130737305, + "ce_orig": 1.2485934495925903, + "epoch": 0.03566036379322741, + "kl_loss": 604.7998657226562, + "loss_ib": 6.05218505859375, + "step": 124 + }, + { + "epoch": 0.035947947372205045, + "grad_norm": 165.4596405029297, + "learning_rate": 1.8949044585987264e-05, + "loss": 8.3603, + "step": 125 + }, + { + "ce_ib": 41.704227447509766, + "ce_orig": 0.7984323501586914, + "epoch": 0.035947947372205045, + "kl_loss": 570.8909912109375, + "loss_ib": 5.713080406188965, + "step": 125 + }, + { + "ce_ib": 44.47398376464844, + "ce_orig": 1.532689094543457, + "epoch": 0.035947947372205045, + "kl_loss": 572.6158447265625, + "loss_ib": 5.7306060791015625, + "step": 125 + }, + { + "ce_ib": 44.11103820800781, + "ce_orig": 0.8605639338493347, + "epoch": 0.035947947372205045, + "kl_loss": 560.5389404296875, + "loss_ib": 5.609800338745117, + "step": 125 + }, + { + "ce_ib": 42.20602798461914, + "ce_orig": 1.0192476511001587, + "epoch": 0.035947947372205045, + "kl_loss": 583.3988647460938, + "loss_ib": 5.83820915222168, + "step": 125 + }, + { + "ce_ib": 43.26830291748047, + "ce_orig": 0.7587823271751404, + "epoch": 0.03623553095118269, + "kl_loss": 595.424072265625, + "loss_ib": 5.958567142486572, + "step": 126 + }, + { + "ce_ib": 42.44752502441406, + "ce_orig": 1.2043931484222412, + "epoch": 0.03623553095118269, + "kl_loss": 496.97802734375, + "loss_ib": 4.974024772644043, + "step": 126 + }, + { + "ce_ib": 43.29387283325195, + "ce_orig": 1.735470175743103, + "epoch": 0.03623553095118269, + "kl_loss": 459.98992919921875, + "loss_ib": 4.604228496551514, + "step": 126 + }, + { + "ce_ib": 44.7207145690918, + "ce_orig": 1.0822900533676147, + "epoch": 0.03623553095118269, + "kl_loss": 513.75048828125, + "loss_ib": 5.141976833343506, + "step": 126 + }, + { + "ce_ib": 43.62824249267578, + "ce_orig": 1.2997839450836182, + "epoch": 0.03652311453016033, + "kl_loss": 499.59527587890625, + "loss_ib": 5.0003156661987305, + "step": 127 + }, + { + "ce_ib": 45.198951721191406, + "ce_orig": 1.4282422065734863, + "epoch": 0.03652311453016033, + "kl_loss": 442.3271484375, + "loss_ib": 4.427791118621826, + "step": 127 + }, + { + "ce_ib": 41.611297607421875, + "ce_orig": 0.6139658093452454, + "epoch": 0.03652311453016033, + "kl_loss": 492.67706298828125, + "loss_ib": 4.930931568145752, + "step": 127 + }, + { + "ce_ib": 43.24065017700195, + "ce_orig": 1.5706660747528076, + "epoch": 0.03652311453016033, + "kl_loss": 477.58251953125, + "loss_ib": 4.780148983001709, + "step": 127 + }, + { + "ce_ib": 45.77122497558594, + "ce_orig": 1.3252004384994507, + "epoch": 0.036810698109137965, + "kl_loss": 367.55364990234375, + "loss_ib": 3.6801135540008545, + "step": 128 + }, + { + "ce_ib": 43.11273956298828, + "ce_orig": 1.3362116813659668, + "epoch": 0.036810698109137965, + "kl_loss": 402.22039794921875, + "loss_ib": 4.026515007019043, + "step": 128 + }, + { + "ce_ib": 43.218231201171875, + "ce_orig": 1.3605029582977295, + "epoch": 0.036810698109137965, + "kl_loss": 467.728759765625, + "loss_ib": 4.681609630584717, + "step": 128 + }, + { + "ce_ib": 42.986263275146484, + "ce_orig": 1.012925386428833, + "epoch": 0.036810698109137965, + "kl_loss": 461.2456359863281, + "loss_ib": 4.61675500869751, + "step": 128 + }, + { + "ce_ib": 45.84535598754883, + "ce_orig": 0.9996353387832642, + "epoch": 0.03709828168811561, + "kl_loss": 307.2117919921875, + "loss_ib": 3.07670259475708, + "step": 129 + }, + { + "ce_ib": 47.88697052001953, + "ce_orig": 1.4389865398406982, + "epoch": 0.03709828168811561, + "kl_loss": 270.8951416015625, + "loss_ib": 2.713740110397339, + "step": 129 + }, + { + "ce_ib": 44.36796569824219, + "ce_orig": 0.28512611985206604, + "epoch": 0.03709828168811561, + "kl_loss": 424.74493408203125, + "loss_ib": 4.251885890960693, + "step": 129 + }, + { + "ce_ib": 49.39015197753906, + "ce_orig": 1.1395325660705566, + "epoch": 0.03709828168811561, + "kl_loss": 358.5738220214844, + "loss_ib": 3.59067702293396, + "step": 129 + }, + { + "epoch": 0.03738586526709325, + "grad_norm": 114.32688903808594, + "learning_rate": 1.974522292993631e-05, + "loss": 5.7247, + "step": 130 + }, + { + "ce_ib": 46.33614730834961, + "ce_orig": 1.3037816286087036, + "epoch": 0.03738586526709325, + "kl_loss": 291.12872314453125, + "loss_ib": 2.9159207344055176, + "step": 130 + }, + { + "ce_ib": 47.692874908447266, + "ce_orig": 0.864368736743927, + "epoch": 0.03738586526709325, + "kl_loss": 327.73089599609375, + "loss_ib": 3.282078266143799, + "step": 130 + }, + { + "ce_ib": 49.18111038208008, + "ce_orig": 1.450368046760559, + "epoch": 0.03738586526709325, + "kl_loss": 326.9405212402344, + "loss_ib": 3.2743234634399414, + "step": 130 + }, + { + "ce_ib": 47.151817321777344, + "ce_orig": 0.6973881125450134, + "epoch": 0.03738586526709325, + "kl_loss": 341.2000732421875, + "loss_ib": 3.4167158603668213, + "step": 130 + }, + { + "ce_ib": 47.11798858642578, + "ce_orig": 0.9874345660209656, + "epoch": 0.03767344884607089, + "kl_loss": 245.39163208007812, + "loss_ib": 2.458627939224243, + "step": 131 + }, + { + "ce_ib": 48.82184600830078, + "ce_orig": 1.7434450387954712, + "epoch": 0.03767344884607089, + "kl_loss": 233.43666076660156, + "loss_ib": 2.3392486572265625, + "step": 131 + }, + { + "ce_ib": 52.432861328125, + "ce_orig": 1.6617094278335571, + "epoch": 0.03767344884607089, + "kl_loss": 249.0355682373047, + "loss_ib": 2.495598793029785, + "step": 131 + }, + { + "ce_ib": 48.34469223022461, + "ce_orig": 0.8209026455879211, + "epoch": 0.03767344884607089, + "kl_loss": 258.18017578125, + "loss_ib": 2.5866363048553467, + "step": 131 + }, + { + "ce_ib": 49.49347686767578, + "ce_orig": 1.1256669759750366, + "epoch": 0.03796103242504853, + "kl_loss": 249.56777954101562, + "loss_ib": 2.500627040863037, + "step": 132 + }, + { + "ce_ib": 53.37258529663086, + "ce_orig": 1.1655960083007812, + "epoch": 0.03796103242504853, + "kl_loss": 296.350830078125, + "loss_ib": 2.9688453674316406, + "step": 132 + }, + { + "ce_ib": 48.557525634765625, + "ce_orig": 1.608039140701294, + "epoch": 0.03796103242504853, + "kl_loss": 236.46792602539062, + "loss_ib": 2.369534969329834, + "step": 132 + }, + { + "ce_ib": 49.64237594604492, + "ce_orig": 1.220885157585144, + "epoch": 0.03796103242504853, + "kl_loss": 232.15313720703125, + "loss_ib": 2.32649564743042, + "step": 132 + }, + { + "ce_ib": 53.12397384643555, + "ce_orig": 1.4053157567977905, + "epoch": 0.03824861600402617, + "kl_loss": 148.06582641601562, + "loss_ib": 1.4859706163406372, + "step": 133 + }, + { + "ce_ib": 46.214569091796875, + "ce_orig": 0.5235381722450256, + "epoch": 0.03824861600402617, + "kl_loss": 216.90960693359375, + "loss_ib": 2.173717498779297, + "step": 133 + }, + { + "ce_ib": 63.17025375366211, + "ce_orig": 1.6942404508590698, + "epoch": 0.03824861600402617, + "kl_loss": 157.73348999023438, + "loss_ib": 1.5836519002914429, + "step": 133 + }, + { + "ce_ib": 56.94756317138672, + "ce_orig": 1.1503974199295044, + "epoch": 0.03824861600402617, + "kl_loss": 172.39581298828125, + "loss_ib": 1.7296528816223145, + "step": 133 + }, + { + "ce_ib": 56.051700592041016, + "ce_orig": 1.2757078409194946, + "epoch": 0.03853619958300381, + "kl_loss": 132.28765869140625, + "loss_ib": 1.328481674194336, + "step": 134 + }, + { + "ce_ib": 57.61357879638672, + "ce_orig": 0.6087625026702881, + "epoch": 0.03853619958300381, + "kl_loss": 177.25326538085938, + "loss_ib": 1.7782940864562988, + "step": 134 + }, + { + "ce_ib": 62.553672790527344, + "ce_orig": 0.979148805141449, + "epoch": 0.03853619958300381, + "kl_loss": 148.2458038330078, + "loss_ib": 1.4887133836746216, + "step": 134 + }, + { + "ce_ib": 59.60444259643555, + "ce_orig": 0.786743700504303, + "epoch": 0.03853619958300381, + "kl_loss": 177.24185180664062, + "loss_ib": 1.7783788442611694, + "step": 134 + }, + { + "epoch": 0.03882378316198145, + "grad_norm": 61.325218200683594, + "learning_rate": 2.054140127388535e-05, + "loss": 3.423, + "step": 135 + }, + { + "ce_ib": 47.83711624145508, + "ce_orig": 1.0823129415512085, + "epoch": 0.03882378316198145, + "kl_loss": 117.60155487060547, + "loss_ib": 1.1807992458343506, + "step": 135 + }, + { + "ce_ib": 53.23299789428711, + "ce_orig": 2.0539369583129883, + "epoch": 0.03882378316198145, + "kl_loss": 105.29367065429688, + "loss_ib": 1.0582599639892578, + "step": 135 + }, + { + "ce_ib": 62.1235466003418, + "ce_orig": 1.288967251777649, + "epoch": 0.03882378316198145, + "kl_loss": 117.70156860351562, + "loss_ib": 1.1832280158996582, + "step": 135 + }, + { + "ce_ib": 60.246185302734375, + "ce_orig": 1.0303462743759155, + "epoch": 0.03882378316198145, + "kl_loss": 119.00950622558594, + "loss_ib": 1.1961196660995483, + "step": 135 + }, + { + "ce_ib": 64.75760650634766, + "ce_orig": 1.0299265384674072, + "epoch": 0.039111366740959094, + "kl_loss": 97.26797485351562, + "loss_ib": 0.9791554808616638, + "step": 136 + }, + { + "ce_ib": 59.91645812988281, + "ce_orig": 0.9491832256317139, + "epoch": 0.039111366740959094, + "kl_loss": 101.67373657226562, + "loss_ib": 1.0227290391921997, + "step": 136 + }, + { + "ce_ib": 64.09386444091797, + "ce_orig": 1.0575294494628906, + "epoch": 0.039111366740959094, + "kl_loss": 86.674072265625, + "loss_ib": 0.873150110244751, + "step": 136 + }, + { + "ce_ib": 55.84811782836914, + "ce_orig": 0.5318177342414856, + "epoch": 0.039111366740959094, + "kl_loss": 69.2609634399414, + "loss_ib": 0.6981943845748901, + "step": 136 + }, + { + "ce_ib": 61.201900482177734, + "ce_orig": 1.5152733325958252, + "epoch": 0.03939895031993673, + "kl_loss": 86.82037353515625, + "loss_ib": 0.8743239641189575, + "step": 137 + }, + { + "ce_ib": 48.1711540222168, + "ce_orig": 0.9321234822273254, + "epoch": 0.03939895031993673, + "kl_loss": 88.01079559326172, + "loss_ib": 0.8849250674247742, + "step": 137 + }, + { + "ce_ib": 56.62932205200195, + "ce_orig": 0.9663710594177246, + "epoch": 0.03939895031993673, + "kl_loss": 92.86305236816406, + "loss_ib": 0.9342934489250183, + "step": 137 + }, + { + "ce_ib": 50.61298751831055, + "ce_orig": 1.012882113456726, + "epoch": 0.03939895031993673, + "kl_loss": 100.32122802734375, + "loss_ib": 1.0082734823226929, + "step": 137 + }, + { + "ce_ib": 52.78557586669922, + "ce_orig": 1.3567779064178467, + "epoch": 0.03968653389891437, + "kl_loss": 75.2830581665039, + "loss_ib": 0.7581090927124023, + "step": 138 + }, + { + "ce_ib": 52.53969192504883, + "ce_orig": 1.0360667705535889, + "epoch": 0.03968653389891437, + "kl_loss": 80.51203918457031, + "loss_ib": 0.81037437915802, + "step": 138 + }, + { + "ce_ib": 42.97282028198242, + "ce_orig": 0.6911470890045166, + "epoch": 0.03968653389891437, + "kl_loss": 107.92098236083984, + "loss_ib": 1.0835070610046387, + "step": 138 + }, + { + "ce_ib": 55.01441192626953, + "ce_orig": 1.0413540601730347, + "epoch": 0.03968653389891437, + "kl_loss": 84.3485107421875, + "loss_ib": 0.8489865064620972, + "step": 138 + }, + { + "ce_ib": 49.41832733154297, + "ce_orig": 0.5004691481590271, + "epoch": 0.039974117477892014, + "kl_loss": 64.56187438964844, + "loss_ib": 0.6505606174468994, + "step": 139 + }, + { + "ce_ib": 54.80555725097656, + "ce_orig": 0.8709143996238708, + "epoch": 0.039974117477892014, + "kl_loss": 75.56375122070312, + "loss_ib": 0.7611180543899536, + "step": 139 + }, + { + "ce_ib": 44.817134857177734, + "ce_orig": 1.1028708219528198, + "epoch": 0.039974117477892014, + "kl_loss": 56.54993438720703, + "loss_ib": 0.569981038570404, + "step": 139 + }, + { + "ce_ib": 50.21042251586914, + "ce_orig": 1.0824670791625977, + "epoch": 0.039974117477892014, + "kl_loss": 69.47157287597656, + "loss_ib": 0.6997367739677429, + "step": 139 + }, + { + "epoch": 0.04026170105686965, + "grad_norm": 27.144216537475586, + "learning_rate": 2.1337579617834397e-05, + "loss": 2.0774, + "step": 140 + }, + { + "ce_ib": 47.516990661621094, + "ce_orig": 1.2334574460983276, + "epoch": 0.04026170105686965, + "kl_loss": 63.68349838256836, + "loss_ib": 0.6415866613388062, + "step": 140 + }, + { + "ce_ib": 42.7164306640625, + "ce_orig": 0.7245992422103882, + "epoch": 0.04026170105686965, + "kl_loss": 62.23073196411133, + "loss_ib": 0.6265789866447449, + "step": 140 + }, + { + "ce_ib": 47.90711212158203, + "ce_orig": 0.797220766544342, + "epoch": 0.04026170105686965, + "kl_loss": 70.8463134765625, + "loss_ib": 0.7132538557052612, + "step": 140 + }, + { + "ce_ib": 44.96827697753906, + "ce_orig": 0.9708709716796875, + "epoch": 0.04026170105686965, + "kl_loss": 62.58702087402344, + "loss_ib": 0.6303670406341553, + "step": 140 + }, + { + "ce_ib": 43.66420364379883, + "ce_orig": 1.5154752731323242, + "epoch": 0.040549284635847296, + "kl_loss": 63.67985916137695, + "loss_ib": 0.641165018081665, + "step": 141 + }, + { + "ce_ib": 39.74589920043945, + "ce_orig": 1.1597050428390503, + "epoch": 0.040549284635847296, + "kl_loss": 68.15312194824219, + "loss_ib": 0.6855058073997498, + "step": 141 + }, + { + "ce_ib": 38.30898666381836, + "ce_orig": 1.208406686782837, + "epoch": 0.040549284635847296, + "kl_loss": 54.45480728149414, + "loss_ib": 0.5483789443969727, + "step": 141 + }, + { + "ce_ib": 42.6765022277832, + "ce_orig": 1.5113545656204224, + "epoch": 0.040549284635847296, + "kl_loss": 45.85490417480469, + "loss_ib": 0.46281668543815613, + "step": 141 + }, + { + "ce_ib": 40.479461669921875, + "ce_orig": 1.2261719703674316, + "epoch": 0.040836868214824934, + "kl_loss": 44.18513488769531, + "loss_ib": 0.44589927792549133, + "step": 142 + }, + { + "ce_ib": 38.052547454833984, + "ce_orig": 0.6562057137489319, + "epoch": 0.040836868214824934, + "kl_loss": 54.23804473876953, + "loss_ib": 0.5461856722831726, + "step": 142 + }, + { + "ce_ib": 42.021270751953125, + "ce_orig": 0.31955811381340027, + "epoch": 0.040836868214824934, + "kl_loss": 29.33365249633789, + "loss_ib": 0.2975386381149292, + "step": 142 + }, + { + "ce_ib": 41.40554428100586, + "ce_orig": 0.9634075164794922, + "epoch": 0.040836868214824934, + "kl_loss": 54.530174255371094, + "loss_ib": 0.5494422912597656, + "step": 142 + }, + { + "ce_ib": 38.900028228759766, + "ce_orig": 1.0080485343933105, + "epoch": 0.04112445179380257, + "kl_loss": 52.973106384277344, + "loss_ib": 0.533621072769165, + "step": 143 + }, + { + "ce_ib": 38.12443923950195, + "ce_orig": 0.9282627105712891, + "epoch": 0.04112445179380257, + "kl_loss": 42.263893127441406, + "loss_ib": 0.42645135521888733, + "step": 143 + }, + { + "ce_ib": 43.536231994628906, + "ce_orig": 1.5404144525527954, + "epoch": 0.04112445179380257, + "kl_loss": 39.663185119628906, + "loss_ib": 0.4009854793548584, + "step": 143 + }, + { + "ce_ib": 39.83261489868164, + "ce_orig": 1.2672309875488281, + "epoch": 0.04112445179380257, + "kl_loss": 42.35781478881836, + "loss_ib": 0.4275614023208618, + "step": 143 + }, + { + "ce_ib": 41.01529312133789, + "ce_orig": 1.457834005355835, + "epoch": 0.041412035372780216, + "kl_loss": 34.49407958984375, + "loss_ib": 0.3490423262119293, + "step": 144 + }, + { + "ce_ib": 29.345317840576172, + "ce_orig": 0.3651731610298157, + "epoch": 0.041412035372780216, + "kl_loss": 68.94469451904297, + "loss_ib": 0.6923814415931702, + "step": 144 + }, + { + "ce_ib": 34.595951080322266, + "ce_orig": 0.5874239802360535, + "epoch": 0.041412035372780216, + "kl_loss": 42.69541931152344, + "loss_ib": 0.43041378259658813, + "step": 144 + }, + { + "ce_ib": 33.79957962036133, + "ce_orig": 0.6981248259544373, + "epoch": 0.041412035372780216, + "kl_loss": 43.602195739746094, + "loss_ib": 0.43940192461013794, + "step": 144 + }, + { + "epoch": 0.041699618951757854, + "grad_norm": 11.803001403808594, + "learning_rate": 2.2133757961783442e-05, + "loss": 1.5408, + "step": 145 + }, + { + "ce_ib": 35.549190521240234, + "ce_orig": 1.1565806865692139, + "epoch": 0.041699618951757854, + "kl_loss": 33.884830474853516, + "loss_ib": 0.34240320324897766, + "step": 145 + }, + { + "ce_ib": 36.85725021362305, + "ce_orig": 0.614605188369751, + "epoch": 0.041699618951757854, + "kl_loss": 38.68310546875, + "loss_ib": 0.3905167579650879, + "step": 145 + }, + { + "ce_ib": 33.613216400146484, + "ce_orig": 0.774656355381012, + "epoch": 0.041699618951757854, + "kl_loss": 35.071624755859375, + "loss_ib": 0.35407754778862, + "step": 145 + }, + { + "ce_ib": 38.60401916503906, + "ce_orig": 1.3087610006332397, + "epoch": 0.041699618951757854, + "kl_loss": 27.925447463989258, + "loss_ib": 0.28311488032341003, + "step": 145 + }, + { + "ce_ib": 31.10846519470215, + "ce_orig": 0.8307720422744751, + "epoch": 0.0419872025307355, + "kl_loss": 35.52260208129883, + "loss_ib": 0.358336865901947, + "step": 146 + }, + { + "ce_ib": 35.52298355102539, + "ce_orig": 0.6402543187141418, + "epoch": 0.0419872025307355, + "kl_loss": 20.296550750732422, + "loss_ib": 0.2065178006887436, + "step": 146 + }, + { + "ce_ib": 32.333797454833984, + "ce_orig": 0.5773739814758301, + "epoch": 0.0419872025307355, + "kl_loss": 33.569828033447266, + "loss_ib": 0.3389316499233246, + "step": 146 + }, + { + "ce_ib": 36.07624816894531, + "ce_orig": 1.3285282850265503, + "epoch": 0.0419872025307355, + "kl_loss": 28.952056884765625, + "loss_ib": 0.29312819242477417, + "step": 146 + }, + { + "ce_ib": 34.523563385009766, + "ce_orig": 1.455711841583252, + "epoch": 0.042274786109713136, + "kl_loss": 33.6051025390625, + "loss_ib": 0.33950334787368774, + "step": 147 + }, + { + "ce_ib": 32.496185302734375, + "ce_orig": 0.8119601011276245, + "epoch": 0.042274786109713136, + "kl_loss": 30.560955047607422, + "loss_ib": 0.3088591694831848, + "step": 147 + }, + { + "ce_ib": 31.24298858642578, + "ce_orig": 0.6599155068397522, + "epoch": 0.042274786109713136, + "kl_loss": 25.330509185791016, + "loss_ib": 0.25642937421798706, + "step": 147 + }, + { + "ce_ib": 34.15837097167969, + "ce_orig": 0.7831727862358093, + "epoch": 0.042274786109713136, + "kl_loss": 32.8238410949707, + "loss_ib": 0.33165425062179565, + "step": 147 + }, + { + "ce_ib": 36.50813674926758, + "ce_orig": 1.3959016799926758, + "epoch": 0.042562369688690774, + "kl_loss": 24.21875762939453, + "loss_ib": 0.24583838880062103, + "step": 148 + }, + { + "ce_ib": 33.1202392578125, + "ce_orig": 0.511696457862854, + "epoch": 0.042562369688690774, + "kl_loss": 32.21922302246094, + "loss_ib": 0.32550424337387085, + "step": 148 + }, + { + "ce_ib": 31.0117130279541, + "ce_orig": 0.6812951564788818, + "epoch": 0.042562369688690774, + "kl_loss": 24.291759490966797, + "loss_ib": 0.24601876735687256, + "step": 148 + }, + { + "ce_ib": 31.82808494567871, + "ce_orig": 0.6159489750862122, + "epoch": 0.042562369688690774, + "kl_loss": 21.070880889892578, + "loss_ib": 0.21389161050319672, + "step": 148 + }, + { + "ce_ib": 30.777088165283203, + "ce_orig": 0.6892868280410767, + "epoch": 0.04284995326766842, + "kl_loss": 22.594371795654297, + "loss_ib": 0.22902143001556396, + "step": 149 + }, + { + "ce_ib": 37.14453887939453, + "ce_orig": 1.9816077947616577, + "epoch": 0.04284995326766842, + "kl_loss": 44.33348083496094, + "loss_ib": 0.44704926013946533, + "step": 149 + }, + { + "ce_ib": 28.821805953979492, + "ce_orig": 0.8520447611808777, + "epoch": 0.04284995326766842, + "kl_loss": 25.312294006347656, + "loss_ib": 0.25600510835647583, + "step": 149 + }, + { + "ce_ib": 34.31684494018555, + "ce_orig": 1.2896530628204346, + "epoch": 0.04284995326766842, + "kl_loss": 29.53026580810547, + "loss_ib": 0.2987343370914459, + "step": 149 + }, + { + "epoch": 0.043137536846646056, + "grad_norm": 6.311826705932617, + "learning_rate": 2.2929936305732484e-05, + "loss": 1.2869, + "step": 150 + }, + { + "ce_ib": 29.53162384033203, + "ce_orig": 0.9975224733352661, + "epoch": 0.043137536846646056, + "kl_loss": 21.389690399169922, + "loss_ib": 0.21685007214546204, + "step": 150 + }, + { + "ce_ib": 33.31801986694336, + "ce_orig": 1.2958406209945679, + "epoch": 0.043137536846646056, + "kl_loss": 24.239055633544922, + "loss_ib": 0.24572233855724335, + "step": 150 + }, + { + "ce_ib": 35.665565490722656, + "ce_orig": 0.5235558152198792, + "epoch": 0.043137536846646056, + "kl_loss": 22.637462615966797, + "loss_ib": 0.22994117438793182, + "step": 150 + }, + { + "ce_ib": 35.29521179199219, + "ce_orig": 1.2123780250549316, + "epoch": 0.043137536846646056, + "kl_loss": 21.82353401184082, + "loss_ib": 0.22176486253738403, + "step": 150 + }, + { + "ce_ib": 33.05928421020508, + "ce_orig": 0.8596100807189941, + "epoch": 0.043425120425623694, + "kl_loss": 24.44532012939453, + "loss_ib": 0.2477591335773468, + "step": 151 + }, + { + "ce_ib": 38.77718734741211, + "ce_orig": 2.1412835121154785, + "epoch": 0.043425120425623694, + "kl_loss": 28.010578155517578, + "loss_ib": 0.2839834988117218, + "step": 151 + }, + { + "ce_ib": 34.24200439453125, + "ce_orig": 0.6295925974845886, + "epoch": 0.043425120425623694, + "kl_loss": 21.05971908569336, + "loss_ib": 0.21402138471603394, + "step": 151 + }, + { + "ce_ib": 33.19257736206055, + "ce_orig": 1.2560220956802368, + "epoch": 0.043425120425623694, + "kl_loss": 21.76863670349121, + "loss_ib": 0.2210056185722351, + "step": 151 + }, + { + "ce_ib": 32.58409881591797, + "ce_orig": 0.7950013279914856, + "epoch": 0.04371270400460134, + "kl_loss": 28.509788513183594, + "loss_ib": 0.28835630416870117, + "step": 152 + }, + { + "ce_ib": 32.49606704711914, + "ce_orig": 1.8779208660125732, + "epoch": 0.04371270400460134, + "kl_loss": 30.838430404663086, + "loss_ib": 0.3116339147090912, + "step": 152 + }, + { + "ce_ib": 33.429622650146484, + "ce_orig": 0.7865967154502869, + "epoch": 0.04371270400460134, + "kl_loss": 29.228368759155273, + "loss_ib": 0.2956266403198242, + "step": 152 + }, + { + "ce_ib": 29.401348114013672, + "ce_orig": 0.7986537218093872, + "epoch": 0.04371270400460134, + "kl_loss": 24.602405548095703, + "loss_ib": 0.24896419048309326, + "step": 152 + }, + { + "ce_ib": 35.428165435791016, + "ce_orig": 1.3112716674804688, + "epoch": 0.044000287583578976, + "kl_loss": 18.217554092407227, + "loss_ib": 0.1857183575630188, + "step": 153 + }, + { + "ce_ib": 33.20622634887695, + "ce_orig": 1.091870903968811, + "epoch": 0.044000287583578976, + "kl_loss": 18.279142379760742, + "loss_ib": 0.18611203134059906, + "step": 153 + }, + { + "ce_ib": 32.40380859375, + "ce_orig": 1.0627433061599731, + "epoch": 0.044000287583578976, + "kl_loss": 20.22241973876953, + "loss_ib": 0.20546457171440125, + "step": 153 + }, + { + "ce_ib": 29.084455490112305, + "ce_orig": 0.823095440864563, + "epoch": 0.044000287583578976, + "kl_loss": 23.27497673034668, + "loss_ib": 0.235658198595047, + "step": 153 + }, + { + "ce_ib": 30.08700180053711, + "ce_orig": 0.6791905164718628, + "epoch": 0.04428787116255662, + "kl_loss": 17.341888427734375, + "loss_ib": 0.17642758786678314, + "step": 154 + }, + { + "ce_ib": 28.275983810424805, + "ce_orig": 0.40569692850112915, + "epoch": 0.04428787116255662, + "kl_loss": 19.95773696899414, + "loss_ib": 0.20240497589111328, + "step": 154 + }, + { + "ce_ib": 33.74617004394531, + "ce_orig": 1.7346209287643433, + "epoch": 0.04428787116255662, + "kl_loss": 18.556991577148438, + "loss_ib": 0.18894453346729279, + "step": 154 + }, + { + "ce_ib": 30.752422332763672, + "ce_orig": 0.71451336145401, + "epoch": 0.04428787116255662, + "kl_loss": 16.57543182373047, + "loss_ib": 0.168829545378685, + "step": 154 + }, + { + "epoch": 0.04457545474153426, + "grad_norm": 3.4597690105438232, + "learning_rate": 2.372611464968153e-05, + "loss": 1.2585, + "step": 155 + }, + { + "ce_ib": 28.92538833618164, + "ce_orig": 1.050588607788086, + "epoch": 0.04457545474153426, + "kl_loss": 19.674413681030273, + "loss_ib": 0.19963666796684265, + "step": 155 + }, + { + "ce_ib": 31.494068145751953, + "ce_orig": 1.3162670135498047, + "epoch": 0.04457545474153426, + "kl_loss": 15.291072845458984, + "loss_ib": 0.156060129404068, + "step": 155 + }, + { + "ce_ib": 31.52849769592285, + "ce_orig": 0.6012848615646362, + "epoch": 0.04457545474153426, + "kl_loss": 15.920844078063965, + "loss_ib": 0.1623612940311432, + "step": 155 + }, + { + "ce_ib": 33.46098709106445, + "ce_orig": 1.0411237478256226, + "epoch": 0.04457545474153426, + "kl_loss": 17.932607650756836, + "loss_ib": 0.1826721727848053, + "step": 155 + }, + { + "ce_ib": 34.06367874145508, + "ce_orig": 0.7581042647361755, + "epoch": 0.044863038320511896, + "kl_loss": 22.63808822631836, + "loss_ib": 0.22978724539279938, + "step": 156 + }, + { + "ce_ib": 30.403427124023438, + "ce_orig": 0.5148236751556396, + "epoch": 0.044863038320511896, + "kl_loss": 14.46303939819336, + "loss_ib": 0.14767073094844818, + "step": 156 + }, + { + "ce_ib": 29.40231704711914, + "ce_orig": 0.7519353032112122, + "epoch": 0.044863038320511896, + "kl_loss": 21.479459762573242, + "loss_ib": 0.21773482859134674, + "step": 156 + }, + { + "ce_ib": 27.831212997436523, + "ce_orig": 0.80788654088974, + "epoch": 0.044863038320511896, + "kl_loss": 16.518142700195312, + "loss_ib": 0.16796454787254333, + "step": 156 + }, + { + "ce_ib": 27.716188430786133, + "ce_orig": 0.7496557831764221, + "epoch": 0.04515062189948954, + "kl_loss": 13.905786514282227, + "loss_ib": 0.1418294757604599, + "step": 157 + }, + { + "ce_ib": 28.782617568969727, + "ce_orig": 0.7090852856636047, + "epoch": 0.04515062189948954, + "kl_loss": 15.777366638183594, + "loss_ib": 0.16065192222595215, + "step": 157 + }, + { + "ce_ib": 26.00276756286621, + "ce_orig": 0.494842529296875, + "epoch": 0.04515062189948954, + "kl_loss": 16.35750389099121, + "loss_ib": 0.16617530584335327, + "step": 157 + }, + { + "ce_ib": 28.558490753173828, + "ce_orig": 0.948776364326477, + "epoch": 0.04515062189948954, + "kl_loss": 15.582597732543945, + "loss_ib": 0.15868182480335236, + "step": 157 + }, + { + "ce_ib": 34.348106384277344, + "ce_orig": 1.4037892818450928, + "epoch": 0.04543820547846718, + "kl_loss": 15.35753059387207, + "loss_ib": 0.15701010823249817, + "step": 158 + }, + { + "ce_ib": 30.06648826599121, + "ce_orig": 1.0562583208084106, + "epoch": 0.04543820547846718, + "kl_loss": 13.857028007507324, + "loss_ib": 0.14157693088054657, + "step": 158 + }, + { + "ce_ib": 33.296878814697266, + "ce_orig": 1.184076189994812, + "epoch": 0.04543820547846718, + "kl_loss": 14.362920761108398, + "loss_ib": 0.14695888757705688, + "step": 158 + }, + { + "ce_ib": 30.477880477905273, + "ce_orig": 0.6938384771347046, + "epoch": 0.04543820547846718, + "kl_loss": 13.156094551086426, + "loss_ib": 0.13460873067378998, + "step": 158 + }, + { + "ce_ib": 30.92000961303711, + "ce_orig": 0.8126051425933838, + "epoch": 0.04572578905744482, + "kl_loss": 13.102733612060547, + "loss_ib": 0.13411933183670044, + "step": 159 + }, + { + "ce_ib": 32.433162689208984, + "ce_orig": 1.181881070137024, + "epoch": 0.04572578905744482, + "kl_loss": 14.020172119140625, + "loss_ib": 0.14344502985477448, + "step": 159 + }, + { + "ce_ib": 33.700931549072266, + "ce_orig": 1.5680264234542847, + "epoch": 0.04572578905744482, + "kl_loss": 13.899885177612305, + "loss_ib": 0.14236894249916077, + "step": 159 + }, + { + "ce_ib": 28.371702194213867, + "ce_orig": 0.9268200397491455, + "epoch": 0.04572578905744482, + "kl_loss": 18.150760650634766, + "loss_ib": 0.18434476852416992, + "step": 159 + }, + { + "epoch": 0.04601337263642246, + "grad_norm": 2.066725254058838, + "learning_rate": 2.4522292993630575e-05, + "loss": 1.0186, + "step": 160 + }, + { + "ce_ib": 29.896162033081055, + "ce_orig": 0.8601086735725403, + "epoch": 0.04601337263642246, + "kl_loss": 12.835603713989258, + "loss_ib": 0.13134564459323883, + "step": 160 + }, + { + "ce_ib": 29.533695220947266, + "ce_orig": 1.1664679050445557, + "epoch": 0.04601337263642246, + "kl_loss": 15.90629768371582, + "loss_ib": 0.1620163470506668, + "step": 160 + }, + { + "ce_ib": 28.180938720703125, + "ce_orig": 0.8322929739952087, + "epoch": 0.04601337263642246, + "kl_loss": 12.797597885131836, + "loss_ib": 0.13079407811164856, + "step": 160 + }, + { + "ce_ib": 28.38677215576172, + "ce_orig": 0.8806703090667725, + "epoch": 0.04601337263642246, + "kl_loss": 16.691715240478516, + "loss_ib": 0.16975581645965576, + "step": 160 + }, + { + "ce_ib": 27.899879455566406, + "ce_orig": 0.6471708416938782, + "epoch": 0.0463009562154001, + "kl_loss": 14.290294647216797, + "loss_ib": 0.14569292962551117, + "step": 161 + }, + { + "ce_ib": 27.485563278198242, + "ce_orig": 0.9937444925308228, + "epoch": 0.0463009562154001, + "kl_loss": 15.568538665771484, + "loss_ib": 0.1584339439868927, + "step": 161 + }, + { + "ce_ib": 30.291170120239258, + "ce_orig": 0.7304977178573608, + "epoch": 0.0463009562154001, + "kl_loss": 9.206818580627441, + "loss_ib": 0.09509730339050293, + "step": 161 + }, + { + "ce_ib": 29.51616859436035, + "ce_orig": 1.386801838874817, + "epoch": 0.0463009562154001, + "kl_loss": 20.9112548828125, + "loss_ib": 0.21206416189670563, + "step": 161 + }, + { + "ce_ib": 27.004371643066406, + "ce_orig": 0.6013516783714294, + "epoch": 0.04658853979437774, + "kl_loss": 15.80407428741455, + "loss_ib": 0.1607411801815033, + "step": 162 + }, + { + "ce_ib": 25.159454345703125, + "ce_orig": 0.9960594773292542, + "epoch": 0.04658853979437774, + "kl_loss": 13.249858856201172, + "loss_ib": 0.13501453399658203, + "step": 162 + }, + { + "ce_ib": 26.201725006103516, + "ce_orig": 0.5098617076873779, + "epoch": 0.04658853979437774, + "kl_loss": 11.842464447021484, + "loss_ib": 0.1210448145866394, + "step": 162 + }, + { + "ce_ib": 29.0825138092041, + "ce_orig": 0.8241496086120605, + "epoch": 0.04658853979437774, + "kl_loss": 16.010656356811523, + "loss_ib": 0.16301481425762177, + "step": 162 + }, + { + "ce_ib": 26.992971420288086, + "ce_orig": 0.8256320357322693, + "epoch": 0.04687612337335538, + "kl_loss": 11.819284439086914, + "loss_ib": 0.12089213728904724, + "step": 163 + }, + { + "ce_ib": 27.297061920166016, + "ce_orig": 0.9797989726066589, + "epoch": 0.04687612337335538, + "kl_loss": 12.12143325805664, + "loss_ib": 0.12394402921199799, + "step": 163 + }, + { + "ce_ib": 26.038820266723633, + "ce_orig": 0.779868483543396, + "epoch": 0.04687612337335538, + "kl_loss": 12.200529098510742, + "loss_ib": 0.12460917234420776, + "step": 163 + }, + { + "ce_ib": 30.567201614379883, + "ce_orig": 1.4183546304702759, + "epoch": 0.04687612337335538, + "kl_loss": 11.150833129882812, + "loss_ib": 0.11456504464149475, + "step": 163 + }, + { + "ce_ib": 27.423969268798828, + "ce_orig": 0.6227314472198486, + "epoch": 0.047163706952333025, + "kl_loss": 11.591859817504883, + "loss_ib": 0.11866099387407303, + "step": 164 + }, + { + "ce_ib": 31.1706485748291, + "ce_orig": 0.592538595199585, + "epoch": 0.047163706952333025, + "kl_loss": 11.143152236938477, + "loss_ib": 0.11454858630895615, + "step": 164 + }, + { + "ce_ib": 32.50811004638672, + "ce_orig": 0.8140405416488647, + "epoch": 0.047163706952333025, + "kl_loss": 13.64712905883789, + "loss_ib": 0.1397220939397812, + "step": 164 + }, + { + "ce_ib": 27.141164779663086, + "ce_orig": 0.41711243987083435, + "epoch": 0.047163706952333025, + "kl_loss": 12.186077117919922, + "loss_ib": 0.12457488477230072, + "step": 164 + }, + { + "epoch": 0.04745129053131066, + "grad_norm": 1.150227665901184, + "learning_rate": 2.531847133757962e-05, + "loss": 0.983, + "step": 165 + }, + { + "ce_ib": 25.222728729248047, + "ce_orig": 0.6572214365005493, + "epoch": 0.04745129053131066, + "kl_loss": 11.52933120727539, + "loss_ib": 0.11781557649374008, + "step": 165 + }, + { + "ce_ib": 33.31783676147461, + "ce_orig": 0.8566097617149353, + "epoch": 0.04745129053131066, + "kl_loss": 12.30784797668457, + "loss_ib": 0.12641026079654694, + "step": 165 + }, + { + "ce_ib": 24.890148162841797, + "ce_orig": 0.5485845804214478, + "epoch": 0.04745129053131066, + "kl_loss": 13.047416687011719, + "loss_ib": 0.1329631805419922, + "step": 165 + }, + { + "ce_ib": 28.270605087280273, + "ce_orig": 0.3421739935874939, + "epoch": 0.04745129053131066, + "kl_loss": 15.006128311157227, + "loss_ib": 0.15288834273815155, + "step": 165 + }, + { + "ce_ib": 26.621320724487305, + "ce_orig": 1.039825439453125, + "epoch": 0.0477388741102883, + "kl_loss": 11.003231048583984, + "loss_ib": 0.11269444227218628, + "step": 166 + }, + { + "ce_ib": 17.792619705200195, + "ce_orig": 0.09731145948171616, + "epoch": 0.0477388741102883, + "kl_loss": 6.299266338348389, + "loss_ib": 0.06477192044258118, + "step": 166 + }, + { + "ce_ib": 29.130701065063477, + "ce_orig": 1.3552623987197876, + "epoch": 0.0477388741102883, + "kl_loss": 12.245400428771973, + "loss_ib": 0.12536707520484924, + "step": 166 + }, + { + "ce_ib": 29.495161056518555, + "ce_orig": 0.5962749123573303, + "epoch": 0.0477388741102883, + "kl_loss": 11.876587867736816, + "loss_ib": 0.12171538919210434, + "step": 166 + }, + { + "ce_ib": 28.59954833984375, + "ce_orig": 1.011759638786316, + "epoch": 0.048026457689265944, + "kl_loss": 10.475525856018066, + "loss_ib": 0.10761521011590958, + "step": 167 + }, + { + "ce_ib": 26.95580291748047, + "ce_orig": 0.7863696813583374, + "epoch": 0.048026457689265944, + "kl_loss": 11.420799255371094, + "loss_ib": 0.11690356582403183, + "step": 167 + }, + { + "ce_ib": 25.404388427734375, + "ce_orig": 0.49368423223495483, + "epoch": 0.048026457689265944, + "kl_loss": 10.898456573486328, + "loss_ib": 0.11152499914169312, + "step": 167 + }, + { + "ce_ib": 27.980323791503906, + "ce_orig": 0.8852983713150024, + "epoch": 0.048026457689265944, + "kl_loss": 11.366129875183105, + "loss_ib": 0.11645933240652084, + "step": 167 + }, + { + "ce_ib": 25.69623374938965, + "ce_orig": 0.875866174697876, + "epoch": 0.04831404126824358, + "kl_loss": 11.159571647644043, + "loss_ib": 0.11416534334421158, + "step": 168 + }, + { + "ce_ib": 26.52794647216797, + "ce_orig": 0.7182326912879944, + "epoch": 0.04831404126824358, + "kl_loss": 10.362823486328125, + "loss_ib": 0.10628102719783783, + "step": 168 + }, + { + "ce_ib": 26.30867576599121, + "ce_orig": 0.8692768216133118, + "epoch": 0.04831404126824358, + "kl_loss": 10.388944625854492, + "loss_ib": 0.10652031004428864, + "step": 168 + }, + { + "ce_ib": 24.659727096557617, + "ce_orig": 0.6755059361457825, + "epoch": 0.04831404126824358, + "kl_loss": 10.561405181884766, + "loss_ib": 0.10808002203702927, + "step": 168 + }, + { + "ce_ib": 29.72395896911621, + "ce_orig": 1.1792970895767212, + "epoch": 0.04860162484722123, + "kl_loss": 10.214838981628418, + "loss_ib": 0.10512077808380127, + "step": 169 + }, + { + "ce_ib": 27.70913314819336, + "ce_orig": 0.9696344137191772, + "epoch": 0.04860162484722123, + "kl_loss": 10.429807662963867, + "loss_ib": 0.1070689857006073, + "step": 169 + }, + { + "ce_ib": 27.571584701538086, + "ce_orig": 0.9312324523925781, + "epoch": 0.04860162484722123, + "kl_loss": 10.552055358886719, + "loss_ib": 0.10827770829200745, + "step": 169 + }, + { + "ce_ib": 25.53492546081543, + "ce_orig": 0.8822551369667053, + "epoch": 0.04860162484722123, + "kl_loss": 11.001655578613281, + "loss_ib": 0.11257004737854004, + "step": 169 + }, + { + "epoch": 0.048889208426198864, + "grad_norm": 0.8837189674377441, + "learning_rate": 2.6114649681528662e-05, + "loss": 0.9786, + "step": 170 + }, + { + "ce_ib": 25.924278259277344, + "ce_orig": 1.155822992324829, + "epoch": 0.048889208426198864, + "kl_loss": 10.456619262695312, + "loss_ib": 0.10715862363576889, + "step": 170 + }, + { + "ce_ib": 27.14344024658203, + "ce_orig": 1.013275146484375, + "epoch": 0.048889208426198864, + "kl_loss": 9.088029861450195, + "loss_ib": 0.09359464794397354, + "step": 170 + }, + { + "ce_ib": 23.778573989868164, + "ce_orig": 0.5937850475311279, + "epoch": 0.048889208426198864, + "kl_loss": 11.208532333374023, + "loss_ib": 0.11446317285299301, + "step": 170 + }, + { + "ce_ib": 28.437326431274414, + "ce_orig": 1.3872705698013306, + "epoch": 0.048889208426198864, + "kl_loss": 11.0403413772583, + "loss_ib": 0.11324714124202728, + "step": 170 + }, + { + "ce_ib": 29.6293888092041, + "ce_orig": 1.264078974723816, + "epoch": 0.0491767920051765, + "kl_loss": 10.278146743774414, + "loss_ib": 0.10574440658092499, + "step": 171 + }, + { + "ce_ib": 23.555601119995117, + "ce_orig": 0.5615886449813843, + "epoch": 0.0491767920051765, + "kl_loss": 10.424758911132812, + "loss_ib": 0.10660314559936523, + "step": 171 + }, + { + "ce_ib": 29.939388275146484, + "ce_orig": 0.7696157097816467, + "epoch": 0.0491767920051765, + "kl_loss": 10.607294082641602, + "loss_ib": 0.10906687378883362, + "step": 171 + }, + { + "ce_ib": 21.64013671875, + "ce_orig": 0.8276143074035645, + "epoch": 0.0491767920051765, + "kl_loss": 11.212567329406738, + "loss_ib": 0.11428967863321304, + "step": 171 + }, + { + "ce_ib": 27.828157424926758, + "ce_orig": 0.9696255922317505, + "epoch": 0.04946437558415415, + "kl_loss": 10.001435279846191, + "loss_ib": 0.10279716551303864, + "step": 172 + }, + { + "ce_ib": 23.958757400512695, + "ce_orig": 0.6945645213127136, + "epoch": 0.04946437558415415, + "kl_loss": 11.062480926513672, + "loss_ib": 0.11302068829536438, + "step": 172 + }, + { + "ce_ib": 27.76424217224121, + "ce_orig": 0.9959214329719543, + "epoch": 0.04946437558415415, + "kl_loss": 10.427704811096191, + "loss_ib": 0.10705346614122391, + "step": 172 + }, + { + "ce_ib": 25.078935623168945, + "ce_orig": 0.5796197056770325, + "epoch": 0.04946437558415415, + "kl_loss": 10.434064865112305, + "loss_ib": 0.10684854537248611, + "step": 172 + }, + { + "ce_ib": 28.147438049316406, + "ce_orig": 0.8044544458389282, + "epoch": 0.049751959163131784, + "kl_loss": 10.538864135742188, + "loss_ib": 0.10820338129997253, + "step": 173 + }, + { + "ce_ib": 29.065446853637695, + "ce_orig": 0.8273786902427673, + "epoch": 0.049751959163131784, + "kl_loss": 9.853753089904785, + "loss_ib": 0.1014440730214119, + "step": 173 + }, + { + "ce_ib": 27.74785614013672, + "ce_orig": 1.2104791402816772, + "epoch": 0.049751959163131784, + "kl_loss": 10.07681655883789, + "loss_ib": 0.10354294627904892, + "step": 173 + }, + { + "ce_ib": 26.78622817993164, + "ce_orig": 1.5120453834533691, + "epoch": 0.049751959163131784, + "kl_loss": 9.62009334564209, + "loss_ib": 0.09887955337762833, + "step": 173 + }, + { + "ce_ib": 19.017391204833984, + "ce_orig": 0.5162482857704163, + "epoch": 0.05003954274210943, + "kl_loss": 7.353050231933594, + "loss_ib": 0.07543224096298218, + "step": 174 + }, + { + "ce_ib": 23.64644432067871, + "ce_orig": 0.9337442517280579, + "epoch": 0.05003954274210943, + "kl_loss": 10.005657196044922, + "loss_ib": 0.10242121666669846, + "step": 174 + }, + { + "ce_ib": 26.815704345703125, + "ce_orig": 0.6984226107597351, + "epoch": 0.05003954274210943, + "kl_loss": 10.047138214111328, + "loss_ib": 0.10315295308828354, + "step": 174 + }, + { + "ce_ib": 23.5247859954834, + "ce_orig": 0.6298738718032837, + "epoch": 0.05003954274210943, + "kl_loss": 9.880701065063477, + "loss_ib": 0.1011594831943512, + "step": 174 + }, + { + "epoch": 0.050327126321087066, + "grad_norm": 0.5031439661979675, + "learning_rate": 2.6910828025477707e-05, + "loss": 0.9779, + "step": 175 + }, + { + "ce_ib": 27.915422439575195, + "ce_orig": 1.2774735689163208, + "epoch": 0.050327126321087066, + "kl_loss": 10.713859558105469, + "loss_ib": 0.1099301278591156, + "step": 175 + }, + { + "ce_ib": 25.17244529724121, + "ce_orig": 0.5252784490585327, + "epoch": 0.050327126321087066, + "kl_loss": 10.287009239196777, + "loss_ib": 0.10538733005523682, + "step": 175 + }, + { + "ce_ib": 25.812246322631836, + "ce_orig": 0.7732113599777222, + "epoch": 0.050327126321087066, + "kl_loss": 8.927350997924805, + "loss_ib": 0.09185472875833511, + "step": 175 + }, + { + "ce_ib": 23.02685546875, + "ce_orig": 0.7139325141906738, + "epoch": 0.050327126321087066, + "kl_loss": 10.248601913452148, + "loss_ib": 0.10478869825601578, + "step": 175 + }, + { + "ce_ib": 22.77245330810547, + "ce_orig": 0.8318886756896973, + "epoch": 0.050614709900064704, + "kl_loss": 10.135682106018066, + "loss_ib": 0.1036340594291687, + "step": 176 + }, + { + "ce_ib": 27.42522430419922, + "ce_orig": 0.9915688037872314, + "epoch": 0.050614709900064704, + "kl_loss": 10.458263397216797, + "loss_ib": 0.10732515156269073, + "step": 176 + }, + { + "ce_ib": 24.290016174316406, + "ce_orig": 0.7032797932624817, + "epoch": 0.050614709900064704, + "kl_loss": 10.18847370147705, + "loss_ib": 0.10431373119354248, + "step": 176 + }, + { + "ce_ib": 27.81938362121582, + "ce_orig": 1.1106735467910767, + "epoch": 0.050614709900064704, + "kl_loss": 9.77632999420166, + "loss_ib": 0.100545234978199, + "step": 176 + }, + { + "ce_ib": 28.1321964263916, + "ce_orig": 1.3234449625015259, + "epoch": 0.05090229347904235, + "kl_loss": 9.504875183105469, + "loss_ib": 0.09786196798086166, + "step": 177 + }, + { + "ce_ib": 25.004257202148438, + "ce_orig": 0.7698526382446289, + "epoch": 0.05090229347904235, + "kl_loss": 10.274667739868164, + "loss_ib": 0.10524710267782211, + "step": 177 + }, + { + "ce_ib": 25.05718421936035, + "ce_orig": 0.8450519442558289, + "epoch": 0.05090229347904235, + "kl_loss": 7.935550689697266, + "loss_ib": 0.0818612277507782, + "step": 177 + }, + { + "ce_ib": 24.45059585571289, + "ce_orig": 0.6560284495353699, + "epoch": 0.05090229347904235, + "kl_loss": 10.264579772949219, + "loss_ib": 0.10509085655212402, + "step": 177 + }, + { + "ce_ib": 22.231950759887695, + "ce_orig": 0.8478792309761047, + "epoch": 0.051189877058019986, + "kl_loss": 9.088963508605957, + "loss_ib": 0.09311282634735107, + "step": 178 + }, + { + "ce_ib": 25.44860076904297, + "ce_orig": 0.7396875619888306, + "epoch": 0.051189877058019986, + "kl_loss": 9.547811508178711, + "loss_ib": 0.09802297502756119, + "step": 178 + }, + { + "ce_ib": 26.52227783203125, + "ce_orig": 1.3045439720153809, + "epoch": 0.051189877058019986, + "kl_loss": 9.957924842834473, + "loss_ib": 0.1022314727306366, + "step": 178 + }, + { + "ce_ib": 25.924222946166992, + "ce_orig": 1.1649706363677979, + "epoch": 0.051189877058019986, + "kl_loss": 10.241584777832031, + "loss_ib": 0.10500826686620712, + "step": 178 + }, + { + "ce_ib": 27.674495697021484, + "ce_orig": 0.5262369513511658, + "epoch": 0.051477460636997624, + "kl_loss": 8.583837509155273, + "loss_ib": 0.08860582113265991, + "step": 179 + }, + { + "ce_ib": 29.368635177612305, + "ce_orig": 1.8323945999145508, + "epoch": 0.051477460636997624, + "kl_loss": 10.265556335449219, + "loss_ib": 0.10559242218732834, + "step": 179 + }, + { + "ce_ib": 24.87542152404785, + "ce_orig": 0.8622165322303772, + "epoch": 0.051477460636997624, + "kl_loss": 9.931290626525879, + "loss_ib": 0.10180044919252396, + "step": 179 + }, + { + "ce_ib": 24.743249893188477, + "ce_orig": 0.8221871852874756, + "epoch": 0.051477460636997624, + "kl_loss": 9.529619216918945, + "loss_ib": 0.09777051955461502, + "step": 179 + }, + { + "epoch": 0.05176504421597527, + "grad_norm": 0.6572920680046082, + "learning_rate": 2.7707006369426753e-05, + "loss": 0.9762, + "step": 180 + }, + { + "ce_ib": 26.228139877319336, + "ce_orig": 0.9346477389335632, + "epoch": 0.05176504421597527, + "kl_loss": 9.228906631469727, + "loss_ib": 0.09491188079118729, + "step": 180 + }, + { + "ce_ib": 27.96097183227539, + "ce_orig": 1.4497267007827759, + "epoch": 0.05176504421597527, + "kl_loss": 9.41794204711914, + "loss_ib": 0.09697551280260086, + "step": 180 + }, + { + "ce_ib": 25.501893997192383, + "ce_orig": 1.3674439191818237, + "epoch": 0.05176504421597527, + "kl_loss": 9.394105911254883, + "loss_ib": 0.0964912474155426, + "step": 180 + }, + { + "ce_ib": 27.754831314086914, + "ce_orig": 0.9353328943252563, + "epoch": 0.05176504421597527, + "kl_loss": 9.990425109863281, + "loss_ib": 0.10267972946166992, + "step": 180 + }, + { + "ce_ib": 24.477588653564453, + "ce_orig": 1.223670482635498, + "epoch": 0.052052627794952906, + "kl_loss": 9.197659492492676, + "loss_ib": 0.09442435204982758, + "step": 181 + }, + { + "ce_ib": 23.42432403564453, + "ce_orig": 0.34692513942718506, + "epoch": 0.052052627794952906, + "kl_loss": 9.035377502441406, + "loss_ib": 0.09269620478153229, + "step": 181 + }, + { + "ce_ib": 27.408384323120117, + "ce_orig": 1.0176830291748047, + "epoch": 0.052052627794952906, + "kl_loss": 8.829448699951172, + "loss_ib": 0.09103532880544662, + "step": 181 + }, + { + "ce_ib": 20.15254020690918, + "ce_orig": 0.4916859269142151, + "epoch": 0.052052627794952906, + "kl_loss": 8.9959716796875, + "loss_ib": 0.09197497367858887, + "step": 181 + }, + { + "ce_ib": 28.84882164001465, + "ce_orig": 1.4133418798446655, + "epoch": 0.05234021137393055, + "kl_loss": 9.433625221252441, + "loss_ib": 0.09722113609313965, + "step": 182 + }, + { + "ce_ib": 24.220762252807617, + "ce_orig": 0.7626959085464478, + "epoch": 0.05234021137393055, + "kl_loss": 9.004999160766602, + "loss_ib": 0.09247206151485443, + "step": 182 + }, + { + "ce_ib": 26.707427978515625, + "ce_orig": 0.950811505317688, + "epoch": 0.05234021137393055, + "kl_loss": 9.357291221618652, + "loss_ib": 0.09624365717172623, + "step": 182 + }, + { + "ce_ib": 23.827503204345703, + "ce_orig": 0.5993396639823914, + "epoch": 0.05234021137393055, + "kl_loss": 9.675762176513672, + "loss_ib": 0.09914036840200424, + "step": 182 + }, + { + "ce_ib": 25.040048599243164, + "ce_orig": 1.0414315462112427, + "epoch": 0.05262779495290819, + "kl_loss": 9.17612075805664, + "loss_ib": 0.09426520764827728, + "step": 183 + }, + { + "ce_ib": 26.632596969604492, + "ce_orig": 1.2410509586334229, + "epoch": 0.05262779495290819, + "kl_loss": 9.67950439453125, + "loss_ib": 0.09945829957723618, + "step": 183 + }, + { + "ce_ib": 22.586328506469727, + "ce_orig": 0.5787039399147034, + "epoch": 0.05262779495290819, + "kl_loss": 9.118326187133789, + "loss_ib": 0.09344189614057541, + "step": 183 + }, + { + "ce_ib": 29.002498626708984, + "ce_orig": 1.1482164859771729, + "epoch": 0.05262779495290819, + "kl_loss": 10.303224563598633, + "loss_ib": 0.10593248903751373, + "step": 183 + }, + { + "ce_ib": 23.188966751098633, + "ce_orig": 0.7345482110977173, + "epoch": 0.052915378531885826, + "kl_loss": 9.377893447875977, + "loss_ib": 0.09609782695770264, + "step": 184 + }, + { + "ce_ib": 25.167457580566406, + "ce_orig": 1.279574990272522, + "epoch": 0.052915378531885826, + "kl_loss": 8.157093048095703, + "loss_ib": 0.08408767729997635, + "step": 184 + }, + { + "ce_ib": 25.17441749572754, + "ce_orig": 1.2902156114578247, + "epoch": 0.052915378531885826, + "kl_loss": 8.778035163879395, + "loss_ib": 0.09029779583215714, + "step": 184 + }, + { + "ce_ib": 27.5651798248291, + "ce_orig": 0.6481632590293884, + "epoch": 0.052915378531885826, + "kl_loss": 8.615208625793457, + "loss_ib": 0.0889086052775383, + "step": 184 + }, + { + "epoch": 0.05320296211086347, + "grad_norm": 0.774932861328125, + "learning_rate": 2.8503184713375798e-05, + "loss": 1.0273, + "step": 185 + }, + { + "ce_ib": 28.26412582397461, + "ce_orig": 0.9693439602851868, + "epoch": 0.05320296211086347, + "kl_loss": 9.72558307647705, + "loss_ib": 0.10008224099874496, + "step": 185 + }, + { + "ce_ib": 20.355464935302734, + "ce_orig": 0.6961947679519653, + "epoch": 0.05320296211086347, + "kl_loss": 8.683027267456055, + "loss_ib": 0.08886582404375076, + "step": 185 + }, + { + "ce_ib": 19.516334533691406, + "ce_orig": 0.6023780703544617, + "epoch": 0.05320296211086347, + "kl_loss": 9.514884948730469, + "loss_ib": 0.09710048139095306, + "step": 185 + }, + { + "ce_ib": 21.90512466430664, + "ce_orig": 0.8949795961380005, + "epoch": 0.05320296211086347, + "kl_loss": 9.39652156829834, + "loss_ib": 0.09615572541952133, + "step": 185 + }, + { + "ce_ib": 24.16393280029297, + "ce_orig": 0.730219304561615, + "epoch": 0.05349054568984111, + "kl_loss": 8.413753509521484, + "loss_ib": 0.08655392378568649, + "step": 186 + }, + { + "ce_ib": 24.4334774017334, + "ce_orig": 1.2984904050827026, + "epoch": 0.05349054568984111, + "kl_loss": 8.700630187988281, + "loss_ib": 0.08944965153932571, + "step": 186 + }, + { + "ce_ib": 22.994918823242188, + "ce_orig": 0.9692792296409607, + "epoch": 0.05349054568984111, + "kl_loss": 8.794185638427734, + "loss_ib": 0.09024134278297424, + "step": 186 + }, + { + "ce_ib": 27.755258560180664, + "ce_orig": 1.4234228134155273, + "epoch": 0.05349054568984111, + "kl_loss": 5.130355358123779, + "loss_ib": 0.0540790781378746, + "step": 186 + }, + { + "ce_ib": 19.920040130615234, + "ce_orig": 0.459452748298645, + "epoch": 0.05377812926881875, + "kl_loss": 7.401340484619141, + "loss_ib": 0.07600540667772293, + "step": 187 + }, + { + "ce_ib": 22.339643478393555, + "ce_orig": 0.7629045844078064, + "epoch": 0.05377812926881875, + "kl_loss": 8.446830749511719, + "loss_ib": 0.08670226484537125, + "step": 187 + }, + { + "ce_ib": 23.796178817749023, + "ce_orig": 1.3895570039749146, + "epoch": 0.05377812926881875, + "kl_loss": 9.165254592895508, + "loss_ib": 0.0940321609377861, + "step": 187 + }, + { + "ce_ib": 21.33721351623535, + "ce_orig": 0.2807011902332306, + "epoch": 0.05377812926881875, + "kl_loss": 8.006156921386719, + "loss_ib": 0.08219528943300247, + "step": 187 + }, + { + "ce_ib": 20.01226234436035, + "ce_orig": 0.7121122479438782, + "epoch": 0.05406571284779639, + "kl_loss": 8.959085464477539, + "loss_ib": 0.09159208089113235, + "step": 188 + }, + { + "ce_ib": 19.18909454345703, + "ce_orig": 0.7582953572273254, + "epoch": 0.05406571284779639, + "kl_loss": 8.402653694152832, + "loss_ib": 0.08594544231891632, + "step": 188 + }, + { + "ce_ib": 23.931289672851562, + "ce_orig": 0.8940808773040771, + "epoch": 0.05406571284779639, + "kl_loss": 8.514259338378906, + "loss_ib": 0.08753572404384613, + "step": 188 + }, + { + "ce_ib": 20.879886627197266, + "ce_orig": 0.5851081609725952, + "epoch": 0.05406571284779639, + "kl_loss": 9.260396957397461, + "loss_ib": 0.09469195455312729, + "step": 188 + }, + { + "ce_ib": 20.511985778808594, + "ce_orig": 0.8533673882484436, + "epoch": 0.05435329642677403, + "kl_loss": 8.759720802307129, + "loss_ib": 0.08964840322732925, + "step": 189 + }, + { + "ce_ib": 23.13450050354004, + "ce_orig": 1.0011026859283447, + "epoch": 0.05435329642677403, + "kl_loss": 8.676036834716797, + "loss_ib": 0.08907381445169449, + "step": 189 + }, + { + "ce_ib": 22.484384536743164, + "ce_orig": 0.5926994681358337, + "epoch": 0.05435329642677403, + "kl_loss": 9.435342788696289, + "loss_ib": 0.09660186618566513, + "step": 189 + }, + { + "ce_ib": 21.21821403503418, + "ce_orig": 0.8962640166282654, + "epoch": 0.05435329642677403, + "kl_loss": 8.669075012207031, + "loss_ib": 0.08881256729364395, + "step": 189 + }, + { + "epoch": 0.05464088000575167, + "grad_norm": 0.43721508979797363, + "learning_rate": 2.929936305732484e-05, + "loss": 1.0218, + "step": 190 + }, + { + "ce_ib": 21.65335464477539, + "ce_orig": 0.7994527816772461, + "epoch": 0.05464088000575167, + "kl_loss": 8.212764739990234, + "loss_ib": 0.08429298549890518, + "step": 190 + }, + { + "ce_ib": 25.716175079345703, + "ce_orig": 1.026253342628479, + "epoch": 0.05464088000575167, + "kl_loss": 8.664275169372559, + "loss_ib": 0.08921436965465546, + "step": 190 + }, + { + "ce_ib": 19.4307861328125, + "ce_orig": 0.8895479440689087, + "epoch": 0.05464088000575167, + "kl_loss": 8.509403228759766, + "loss_ib": 0.0870371162891388, + "step": 190 + }, + { + "ce_ib": 21.85231590270996, + "ce_orig": 0.7853972911834717, + "epoch": 0.05464088000575167, + "kl_loss": 8.377355575561523, + "loss_ib": 0.08595878630876541, + "step": 190 + }, + { + "ce_ib": 21.575359344482422, + "ce_orig": 1.0062997341156006, + "epoch": 0.05492846358472931, + "kl_loss": 9.271797180175781, + "loss_ib": 0.0948755070567131, + "step": 191 + }, + { + "ce_ib": 18.518245697021484, + "ce_orig": 0.6092102527618408, + "epoch": 0.05492846358472931, + "kl_loss": 8.641265869140625, + "loss_ib": 0.08826448023319244, + "step": 191 + }, + { + "ce_ib": 25.908557891845703, + "ce_orig": 0.7555634379386902, + "epoch": 0.05492846358472931, + "kl_loss": 9.02600383758545, + "loss_ib": 0.09285089373588562, + "step": 191 + }, + { + "ce_ib": 22.47454071044922, + "ce_orig": 0.5190201997756958, + "epoch": 0.05492846358472931, + "kl_loss": 9.089900970458984, + "loss_ib": 0.09314646571874619, + "step": 191 + }, + { + "ce_ib": 19.4965763092041, + "ce_orig": 0.8628413081169128, + "epoch": 0.055216047163706955, + "kl_loss": 9.05790901184082, + "loss_ib": 0.09252873808145523, + "step": 192 + }, + { + "ce_ib": 24.495662689208984, + "ce_orig": 1.0552870035171509, + "epoch": 0.055216047163706955, + "kl_loss": 8.362863540649414, + "loss_ib": 0.0860782042145729, + "step": 192 + }, + { + "ce_ib": 24.617902755737305, + "ce_orig": 1.3669184446334839, + "epoch": 0.055216047163706955, + "kl_loss": 8.252336502075195, + "loss_ib": 0.08498515188694, + "step": 192 + }, + { + "ce_ib": 21.620195388793945, + "ce_orig": 0.9216135144233704, + "epoch": 0.055216047163706955, + "kl_loss": 8.622823715209961, + "loss_ib": 0.08839025348424911, + "step": 192 + }, + { + "ce_ib": 23.482017517089844, + "ce_orig": 1.2965989112854004, + "epoch": 0.05550363074268459, + "kl_loss": 8.686678886413574, + "loss_ib": 0.089214988052845, + "step": 193 + }, + { + "ce_ib": 21.503093719482422, + "ce_orig": 1.1102378368377686, + "epoch": 0.05550363074268459, + "kl_loss": 8.758203506469727, + "loss_ib": 0.0897323414683342, + "step": 193 + }, + { + "ce_ib": 21.88249969482422, + "ce_orig": 0.3644579350948334, + "epoch": 0.05550363074268459, + "kl_loss": 7.924787998199463, + "loss_ib": 0.08143612742424011, + "step": 193 + }, + { + "ce_ib": 21.49346160888672, + "ce_orig": 0.8568457961082458, + "epoch": 0.05550363074268459, + "kl_loss": 8.320171356201172, + "loss_ib": 0.08535105735063553, + "step": 193 + }, + { + "ce_ib": 18.73956298828125, + "ce_orig": 0.8066674470901489, + "epoch": 0.05579121432166223, + "kl_loss": 8.048727035522461, + "loss_ib": 0.08236122876405716, + "step": 194 + }, + { + "ce_ib": 24.636383056640625, + "ce_orig": 0.97906494140625, + "epoch": 0.05579121432166223, + "kl_loss": 7.121569633483887, + "loss_ib": 0.07367932796478271, + "step": 194 + }, + { + "ce_ib": 20.886672973632812, + "ce_orig": 1.25295090675354, + "epoch": 0.05579121432166223, + "kl_loss": 8.471221923828125, + "loss_ib": 0.08680088818073273, + "step": 194 + }, + { + "ce_ib": 19.9046630859375, + "ce_orig": 0.5161154866218567, + "epoch": 0.05579121432166223, + "kl_loss": 7.873350143432617, + "loss_ib": 0.08072397112846375, + "step": 194 + }, + { + "epoch": 0.056078797900639875, + "grad_norm": 0.8270230293273926, + "learning_rate": 3.0095541401273885e-05, + "loss": 0.9806, + "step": 195 + }, + { + "ce_ib": 20.66087532043457, + "ce_orig": 0.6626381874084473, + "epoch": 0.056078797900639875, + "kl_loss": 8.434722900390625, + "loss_ib": 0.08641331642866135, + "step": 195 + }, + { + "ce_ib": 22.705623626708984, + "ce_orig": 0.9331481456756592, + "epoch": 0.056078797900639875, + "kl_loss": 7.2313232421875, + "loss_ib": 0.07458379119634628, + "step": 195 + }, + { + "ce_ib": 23.993696212768555, + "ce_orig": 0.9489652514457703, + "epoch": 0.056078797900639875, + "kl_loss": 7.374420166015625, + "loss_ib": 0.07614357024431229, + "step": 195 + }, + { + "ce_ib": 24.617033004760742, + "ce_orig": 0.7637354135513306, + "epoch": 0.056078797900639875, + "kl_loss": 7.686088562011719, + "loss_ib": 0.07932259142398834, + "step": 195 + }, + { + "ce_ib": 21.54843521118164, + "ce_orig": 0.8521741032600403, + "epoch": 0.05636638147961751, + "kl_loss": 8.026320457458496, + "loss_ib": 0.0824180468916893, + "step": 196 + }, + { + "ce_ib": 20.1884822845459, + "ce_orig": 0.8504369258880615, + "epoch": 0.05636638147961751, + "kl_loss": 7.845184326171875, + "loss_ib": 0.08047069609165192, + "step": 196 + }, + { + "ce_ib": 22.211240768432617, + "ce_orig": 0.4319168031215668, + "epoch": 0.05636638147961751, + "kl_loss": 6.318869590759277, + "loss_ib": 0.065409816801548, + "step": 196 + }, + { + "ce_ib": 20.042993545532227, + "ce_orig": 0.6225204467773438, + "epoch": 0.05636638147961751, + "kl_loss": 8.074682235717773, + "loss_ib": 0.08275111764669418, + "step": 196 + }, + { + "ce_ib": 13.801077842712402, + "ce_orig": 0.4428274929523468, + "epoch": 0.05665396505859516, + "kl_loss": 7.092032432556152, + "loss_ib": 0.07230043411254883, + "step": 197 + }, + { + "ce_ib": 19.726043701171875, + "ce_orig": 0.5650824904441833, + "epoch": 0.05665396505859516, + "kl_loss": 7.761396408081055, + "loss_ib": 0.07958656549453735, + "step": 197 + }, + { + "ce_ib": 23.52407455444336, + "ce_orig": 1.374847650527954, + "epoch": 0.05665396505859516, + "kl_loss": 6.945611000061035, + "loss_ib": 0.07180851697921753, + "step": 197 + }, + { + "ce_ib": 21.02933120727539, + "ce_orig": 0.9913616180419922, + "epoch": 0.05665396505859516, + "kl_loss": 7.67958402633667, + "loss_ib": 0.07889877259731293, + "step": 197 + }, + { + "ce_ib": 26.16086196899414, + "ce_orig": 1.5300548076629639, + "epoch": 0.056941548637572795, + "kl_loss": 7.4000701904296875, + "loss_ib": 0.0766167864203453, + "step": 198 + }, + { + "ce_ib": 20.750835418701172, + "ce_orig": 0.9555485844612122, + "epoch": 0.056941548637572795, + "kl_loss": 7.1511125564575195, + "loss_ib": 0.07358621060848236, + "step": 198 + }, + { + "ce_ib": 23.05903434753418, + "ce_orig": 1.1008634567260742, + "epoch": 0.056941548637572795, + "kl_loss": 7.473138332366943, + "loss_ib": 0.0770372822880745, + "step": 198 + }, + { + "ce_ib": 21.61954689025879, + "ce_orig": 1.4359227418899536, + "epoch": 0.056941548637572795, + "kl_loss": 7.772992134094238, + "loss_ib": 0.0798918753862381, + "step": 198 + }, + { + "ce_ib": 22.668001174926758, + "ce_orig": 1.0338892936706543, + "epoch": 0.05722913221655043, + "kl_loss": 7.578032493591309, + "loss_ib": 0.07804711908102036, + "step": 199 + }, + { + "ce_ib": 20.854860305786133, + "ce_orig": 0.9726830124855042, + "epoch": 0.05722913221655043, + "kl_loss": 7.427217483520508, + "loss_ib": 0.0763576552271843, + "step": 199 + }, + { + "ce_ib": 19.557754516601562, + "ce_orig": 0.8703896403312683, + "epoch": 0.05722913221655043, + "kl_loss": 7.243409633636475, + "loss_ib": 0.07438986748456955, + "step": 199 + }, + { + "ce_ib": 18.793437957763672, + "ce_orig": 0.8299582004547119, + "epoch": 0.05722913221655043, + "kl_loss": 6.471531867980957, + "loss_ib": 0.06659466028213501, + "step": 199 + }, + { + "epoch": 0.05751671579552808, + "grad_norm": 0.6937683820724487, + "learning_rate": 3.089171974522293e-05, + "loss": 0.982, + "step": 200 + }, + { + "ce_ib": 18.451953887939453, + "ce_orig": 1.0577921867370605, + "epoch": 0.05751671579552808, + "kl_loss": 7.2733869552612305, + "loss_ib": 0.07457906752824783, + "step": 200 + }, + { + "ce_ib": 23.337678909301758, + "ce_orig": 1.3253329992294312, + "epoch": 0.05751671579552808, + "kl_loss": 7.374900817871094, + "loss_ib": 0.07608277350664139, + "step": 200 + }, + { + "ce_ib": 18.122037887573242, + "ce_orig": 0.9964814782142639, + "epoch": 0.05751671579552808, + "kl_loss": 7.532997131347656, + "loss_ib": 0.07714217156171799, + "step": 200 + }, + { + "ce_ib": 19.866018295288086, + "ce_orig": 0.7532010078430176, + "epoch": 0.05751671579552808, + "kl_loss": 6.963897705078125, + "loss_ib": 0.07162558287382126, + "step": 200 + }, + { + "ce_ib": 18.71930503845215, + "ce_orig": 0.8960237503051758, + "epoch": 0.057804299374505715, + "kl_loss": 6.8690900802612305, + "loss_ib": 0.07056283205747604, + "step": 201 + }, + { + "ce_ib": 22.056734085083008, + "ce_orig": 1.0263980627059937, + "epoch": 0.057804299374505715, + "kl_loss": 6.602439880371094, + "loss_ib": 0.06823007017374039, + "step": 201 + }, + { + "ce_ib": 24.96833038330078, + "ce_orig": 1.6670337915420532, + "epoch": 0.057804299374505715, + "kl_loss": 6.63405704498291, + "loss_ib": 0.06883740425109863, + "step": 201 + }, + { + "ce_ib": 17.40508460998535, + "ce_orig": 0.3124699592590332, + "epoch": 0.057804299374505715, + "kl_loss": 6.398665428161621, + "loss_ib": 0.06572715938091278, + "step": 201 + }, + { + "ce_ib": 13.636467933654785, + "ce_orig": 0.5163049697875977, + "epoch": 0.05809188295348336, + "kl_loss": 6.814591407775879, + "loss_ib": 0.06950955837965012, + "step": 202 + }, + { + "ce_ib": 18.503398895263672, + "ce_orig": 0.8511436581611633, + "epoch": 0.05809188295348336, + "kl_loss": 4.558845043182373, + "loss_ib": 0.047438789159059525, + "step": 202 + }, + { + "ce_ib": 22.386396408081055, + "ce_orig": 0.974443793296814, + "epoch": 0.05809188295348336, + "kl_loss": 6.523048400878906, + "loss_ib": 0.06746912002563477, + "step": 202 + }, + { + "ce_ib": 20.26010513305664, + "ce_orig": 0.7885109186172485, + "epoch": 0.05809188295348336, + "kl_loss": 6.658895015716553, + "loss_ib": 0.06861495971679688, + "step": 202 + }, + { + "ce_ib": 24.684518814086914, + "ce_orig": 1.362154245376587, + "epoch": 0.058379466532461, + "kl_loss": 5.672746658325195, + "loss_ib": 0.05919591709971428, + "step": 203 + }, + { + "ce_ib": 18.035612106323242, + "ce_orig": 0.6344237923622131, + "epoch": 0.058379466532461, + "kl_loss": 6.552053451538086, + "loss_ib": 0.06732409447431564, + "step": 203 + }, + { + "ce_ib": 19.674922943115234, + "ce_orig": 1.129352331161499, + "epoch": 0.058379466532461, + "kl_loss": 6.27418327331543, + "loss_ib": 0.06470932066440582, + "step": 203 + }, + { + "ce_ib": 12.673866271972656, + "ce_orig": 0.2503475248813629, + "epoch": 0.058379466532461, + "kl_loss": 4.017500877380371, + "loss_ib": 0.0414423942565918, + "step": 203 + }, + { + "ce_ib": 16.628164291381836, + "ce_orig": 0.6799634099006653, + "epoch": 0.058667050111438634, + "kl_loss": 5.470815181732178, + "loss_ib": 0.056370966136455536, + "step": 204 + }, + { + "ce_ib": 23.80594825744629, + "ce_orig": 1.2403467893600464, + "epoch": 0.058667050111438634, + "kl_loss": 5.936471462249756, + "loss_ib": 0.061745308339595795, + "step": 204 + }, + { + "ce_ib": 22.816349029541016, + "ce_orig": 1.0042665004730225, + "epoch": 0.058667050111438634, + "kl_loss": 6.33897066116333, + "loss_ib": 0.06567133963108063, + "step": 204 + }, + { + "ce_ib": 15.579545021057129, + "ce_orig": 0.5806044936180115, + "epoch": 0.058667050111438634, + "kl_loss": 6.2884297370910645, + "loss_ib": 0.06444225460290909, + "step": 204 + }, + { + "epoch": 0.05895463369041628, + "grad_norm": 0.43801939487457275, + "learning_rate": 3.1687898089171976e-05, + "loss": 0.9615, + "step": 205 + }, + { + "ce_ib": 20.434833526611328, + "ce_orig": 0.9689016342163086, + "epoch": 0.05895463369041628, + "kl_loss": 6.424372673034668, + "loss_ib": 0.06628721207380295, + "step": 205 + }, + { + "ce_ib": 15.206514358520508, + "ce_orig": 0.4593224823474884, + "epoch": 0.05895463369041628, + "kl_loss": 6.005724906921387, + "loss_ib": 0.06157790124416351, + "step": 205 + }, + { + "ce_ib": 21.497190475463867, + "ce_orig": 1.140707015991211, + "epoch": 0.05895463369041628, + "kl_loss": 6.157401084899902, + "loss_ib": 0.06372372806072235, + "step": 205 + }, + { + "ce_ib": 21.64202308654785, + "ce_orig": 0.8690503239631653, + "epoch": 0.05895463369041628, + "kl_loss": 6.049506187438965, + "loss_ib": 0.06265926361083984, + "step": 205 + }, + { + "ce_ib": 17.65264129638672, + "ce_orig": 0.8661278486251831, + "epoch": 0.05924221726939392, + "kl_loss": 6.142066955566406, + "loss_ib": 0.0631859302520752, + "step": 206 + }, + { + "ce_ib": 18.6512451171875, + "ce_orig": 1.1120545864105225, + "epoch": 0.05924221726939392, + "kl_loss": 6.5508646965026855, + "loss_ib": 0.06737376749515533, + "step": 206 + }, + { + "ce_ib": 19.080215454101562, + "ce_orig": 1.205805778503418, + "epoch": 0.05924221726939392, + "kl_loss": 5.709317207336426, + "loss_ib": 0.059001192450523376, + "step": 206 + }, + { + "ce_ib": 18.595766067504883, + "ce_orig": 0.9016050696372986, + "epoch": 0.05924221726939392, + "kl_loss": 5.711783409118652, + "loss_ib": 0.0589774064719677, + "step": 206 + }, + { + "ce_ib": 20.492830276489258, + "ce_orig": 0.8852484822273254, + "epoch": 0.05952980084837156, + "kl_loss": 5.075399398803711, + "loss_ib": 0.05280327796936035, + "step": 207 + }, + { + "ce_ib": 20.912809371948242, + "ce_orig": 0.784504771232605, + "epoch": 0.05952980084837156, + "kl_loss": 3.873704433441162, + "loss_ib": 0.04082832112908363, + "step": 207 + }, + { + "ce_ib": 16.91316795349121, + "ce_orig": 0.7453713417053223, + "epoch": 0.05952980084837156, + "kl_loss": 4.164481163024902, + "loss_ib": 0.04333612695336342, + "step": 207 + }, + { + "ce_ib": 23.150854110717773, + "ce_orig": 1.8038743734359741, + "epoch": 0.05952980084837156, + "kl_loss": 6.1978583335876465, + "loss_ib": 0.06429366767406464, + "step": 207 + }, + { + "ce_ib": 20.63064193725586, + "ce_orig": 1.5274888277053833, + "epoch": 0.0598173844273492, + "kl_loss": 4.736423492431641, + "loss_ib": 0.049427296966314316, + "step": 208 + }, + { + "ce_ib": 20.130807876586914, + "ce_orig": 0.9102981686592102, + "epoch": 0.0598173844273492, + "kl_loss": 4.98231315612793, + "loss_ib": 0.05183621123433113, + "step": 208 + }, + { + "ce_ib": 21.33695411682129, + "ce_orig": 0.3839934766292572, + "epoch": 0.0598173844273492, + "kl_loss": 3.1306653022766113, + "loss_ib": 0.033440347760915756, + "step": 208 + }, + { + "ce_ib": 19.694299697875977, + "ce_orig": 0.9525083899497986, + "epoch": 0.0598173844273492, + "kl_loss": 4.653975486755371, + "loss_ib": 0.048509180545806885, + "step": 208 + }, + { + "ce_ib": 21.432010650634766, + "ce_orig": 1.1178271770477295, + "epoch": 0.06010496800632684, + "kl_loss": 3.3814926147460938, + "loss_ib": 0.035958126187324524, + "step": 209 + }, + { + "ce_ib": 19.939306259155273, + "ce_orig": 1.0498489141464233, + "epoch": 0.06010496800632684, + "kl_loss": 4.5651397705078125, + "loss_ib": 0.04764533042907715, + "step": 209 + }, + { + "ce_ib": 19.62514877319336, + "ce_orig": 1.1263172626495361, + "epoch": 0.06010496800632684, + "kl_loss": 4.4086761474609375, + "loss_ib": 0.04604927450418472, + "step": 209 + }, + { + "ce_ib": 17.709300994873047, + "ce_orig": 1.0799542665481567, + "epoch": 0.06010496800632684, + "kl_loss": 4.823720932006836, + "loss_ib": 0.0500081367790699, + "step": 209 + }, + { + "epoch": 0.06039255158530448, + "grad_norm": 0.667425274848938, + "learning_rate": 3.248407643312102e-05, + "loss": 0.9474, + "step": 210 + }, + { + "ce_ib": 22.428897857666016, + "ce_orig": 1.325988531112671, + "epoch": 0.06039255158530448, + "kl_loss": 3.7257208824157715, + "loss_ib": 0.039500098675489426, + "step": 210 + }, + { + "ce_ib": 18.08632469177246, + "ce_orig": 0.3834853172302246, + "epoch": 0.06039255158530448, + "kl_loss": 3.6763598918914795, + "loss_ib": 0.03857222944498062, + "step": 210 + }, + { + "ce_ib": 18.410423278808594, + "ce_orig": 0.5081047415733337, + "epoch": 0.06039255158530448, + "kl_loss": 3.751244068145752, + "loss_ib": 0.03935348242521286, + "step": 210 + }, + { + "ce_ib": 17.127031326293945, + "ce_orig": 0.6193530559539795, + "epoch": 0.06039255158530448, + "kl_loss": 4.159435749053955, + "loss_ib": 0.04330705851316452, + "step": 210 + }, + { + "ce_ib": 21.037677764892578, + "ce_orig": 0.9653246998786926, + "epoch": 0.06068013516428212, + "kl_loss": 3.6135926246643066, + "loss_ib": 0.03823969140648842, + "step": 211 + }, + { + "ce_ib": 15.933859825134277, + "ce_orig": 0.5138083696365356, + "epoch": 0.06068013516428212, + "kl_loss": 2.297349452972412, + "loss_ib": 0.024566879495978355, + "step": 211 + }, + { + "ce_ib": 16.323041915893555, + "ce_orig": 1.1270829439163208, + "epoch": 0.06068013516428212, + "kl_loss": 2.4675984382629395, + "loss_ib": 0.026308288797736168, + "step": 211 + }, + { + "ce_ib": 18.137113571166992, + "ce_orig": 0.5509803891181946, + "epoch": 0.06068013516428212, + "kl_loss": 2.595515251159668, + "loss_ib": 0.02776886336505413, + "step": 211 + }, + { + "ce_ib": 14.729268074035645, + "ce_orig": 0.4266526401042938, + "epoch": 0.060967718743259756, + "kl_loss": 1.8660860061645508, + "loss_ib": 0.020133785903453827, + "step": 212 + }, + { + "ce_ib": 19.452571868896484, + "ce_orig": 0.8632349967956543, + "epoch": 0.060967718743259756, + "kl_loss": 2.0922412872314453, + "loss_ib": 0.02286767028272152, + "step": 212 + }, + { + "ce_ib": 21.427040100097656, + "ce_orig": 1.345961332321167, + "epoch": 0.060967718743259756, + "kl_loss": 1.7945568561553955, + "loss_ib": 0.02008827216923237, + "step": 212 + }, + { + "ce_ib": 21.80542755126953, + "ce_orig": 1.136615514755249, + "epoch": 0.060967718743259756, + "kl_loss": 1.982521891593933, + "loss_ib": 0.022005761042237282, + "step": 212 + }, + { + "ce_ib": 23.523645401000977, + "ce_orig": 1.9374449253082275, + "epoch": 0.0612553023222374, + "kl_loss": 2.183800220489502, + "loss_ib": 0.02419036626815796, + "step": 213 + }, + { + "ce_ib": 23.139501571655273, + "ce_orig": 1.261841058731079, + "epoch": 0.0612553023222374, + "kl_loss": 1.225545048713684, + "loss_ib": 0.014569399878382683, + "step": 213 + }, + { + "ce_ib": 14.595914840698242, + "ce_orig": 0.4094107151031494, + "epoch": 0.0612553023222374, + "kl_loss": 1.4455546140670776, + "loss_ib": 0.01591513678431511, + "step": 213 + }, + { + "ce_ib": 18.374540328979492, + "ce_orig": 0.9666364789009094, + "epoch": 0.0612553023222374, + "kl_loss": 1.5520057678222656, + "loss_ib": 0.017357511445879936, + "step": 213 + }, + { + "ce_ib": 17.489238739013672, + "ce_orig": 0.4721967875957489, + "epoch": 0.06154288590121504, + "kl_loss": 1.1278434991836548, + "loss_ib": 0.013027358800172806, + "step": 214 + }, + { + "ce_ib": 21.929288864135742, + "ce_orig": 0.6112910509109497, + "epoch": 0.06154288590121504, + "kl_loss": 1.07718026638031, + "loss_ib": 0.012964731082320213, + "step": 214 + }, + { + "ce_ib": 18.024003982543945, + "ce_orig": 0.4354954957962036, + "epoch": 0.06154288590121504, + "kl_loss": 1.1616055965423584, + "loss_ib": 0.013418455608189106, + "step": 214 + }, + { + "ce_ib": 19.154476165771484, + "ce_orig": 0.9074000120162964, + "epoch": 0.06154288590121504, + "kl_loss": 1.048647165298462, + "loss_ib": 0.01240191888064146, + "step": 214 + }, + { + "epoch": 0.06183046948019268, + "grad_norm": 0.19470971822738647, + "learning_rate": 3.328025477707007e-05, + "loss": 0.8823, + "step": 215 + }, + { + "ce_ib": 16.662282943725586, + "ce_orig": 0.5908299088478088, + "epoch": 0.06183046948019268, + "kl_loss": 1.0955469608306885, + "loss_ib": 0.012621697969734669, + "step": 215 + }, + { + "ce_ib": 15.762285232543945, + "ce_orig": 0.5801149606704712, + "epoch": 0.06183046948019268, + "kl_loss": 0.9997921586036682, + "loss_ib": 0.011574150063097477, + "step": 215 + }, + { + "ce_ib": 20.8988094329834, + "ce_orig": 0.9614391922950745, + "epoch": 0.06183046948019268, + "kl_loss": 0.9723953008651733, + "loss_ib": 0.011813833378255367, + "step": 215 + }, + { + "ce_ib": 20.39583969116211, + "ce_orig": 1.0183390378952026, + "epoch": 0.06183046948019268, + "kl_loss": 0.9245635867118835, + "loss_ib": 0.011285219341516495, + "step": 215 + }, + { + "ce_ib": 14.578946113586426, + "ce_orig": 0.734819769859314, + "epoch": 0.06211805305917032, + "kl_loss": 0.8773603439331055, + "loss_ib": 0.010231498628854752, + "step": 216 + }, + { + "ce_ib": 13.65113639831543, + "ce_orig": 0.4453405439853668, + "epoch": 0.06211805305917032, + "kl_loss": 0.902603030204773, + "loss_ib": 0.010391143150627613, + "step": 216 + }, + { + "ce_ib": 24.090421676635742, + "ce_orig": 1.5431694984436035, + "epoch": 0.06211805305917032, + "kl_loss": 0.8628696203231812, + "loss_ib": 0.011037738062441349, + "step": 216 + }, + { + "ce_ib": 16.371984481811523, + "ce_orig": 0.6436638832092285, + "epoch": 0.06211805305917032, + "kl_loss": 0.9481015205383301, + "loss_ib": 0.011118213646113873, + "step": 216 + }, + { + "ce_ib": 16.45316505432129, + "ce_orig": 0.6905233263969421, + "epoch": 0.06240563663814796, + "kl_loss": 0.8371706008911133, + "loss_ib": 0.010017022490501404, + "step": 217 + }, + { + "ce_ib": 19.796371459960938, + "ce_orig": 0.9113252758979797, + "epoch": 0.06240563663814796, + "kl_loss": 0.7912114262580872, + "loss_ib": 0.00989175122231245, + "step": 217 + }, + { + "ce_ib": 20.719688415527344, + "ce_orig": 0.6585960388183594, + "epoch": 0.06240563663814796, + "kl_loss": 0.7871678471565247, + "loss_ib": 0.009943647310137749, + "step": 217 + }, + { + "ce_ib": 18.301244735717773, + "ce_orig": 0.9191728234291077, + "epoch": 0.06240563663814796, + "kl_loss": 0.7824192643165588, + "loss_ib": 0.009654317051172256, + "step": 217 + }, + { + "ce_ib": 16.72066879272461, + "ce_orig": 0.46977195143699646, + "epoch": 0.0626932202171256, + "kl_loss": 0.7237412929534912, + "loss_ib": 0.008909479714930058, + "step": 218 + }, + { + "ce_ib": 16.60516357421875, + "ce_orig": 0.8432900309562683, + "epoch": 0.0626932202171256, + "kl_loss": 0.7562671899795532, + "loss_ib": 0.009223188273608685, + "step": 218 + }, + { + "ce_ib": 19.466259002685547, + "ce_orig": 0.9356642365455627, + "epoch": 0.0626932202171256, + "kl_loss": 0.7408407926559448, + "loss_ib": 0.009355033747851849, + "step": 218 + }, + { + "ce_ib": 19.53274917602539, + "ce_orig": 0.6844194531440735, + "epoch": 0.0626932202171256, + "kl_loss": 0.7530844807624817, + "loss_ib": 0.009484118781983852, + "step": 218 + }, + { + "ce_ib": 22.165111541748047, + "ce_orig": 1.2755643129348755, + "epoch": 0.06298080379610324, + "kl_loss": 0.6112433671951294, + "loss_ib": 0.008328944444656372, + "step": 219 + }, + { + "ce_ib": 15.414247512817383, + "ce_orig": 0.8299206495285034, + "epoch": 0.06298080379610324, + "kl_loss": 0.7032531499862671, + "loss_ib": 0.008573955856263638, + "step": 219 + }, + { + "ce_ib": 16.366025924682617, + "ce_orig": 0.660663902759552, + "epoch": 0.06298080379610324, + "kl_loss": 0.6545971632003784, + "loss_ib": 0.008182574063539505, + "step": 219 + }, + { + "ce_ib": 19.620121002197266, + "ce_orig": 1.2592724561691284, + "epoch": 0.06298080379610324, + "kl_loss": 0.7279493808746338, + "loss_ib": 0.009241505526006222, + "step": 219 + }, + { + "epoch": 0.06326838737508088, + "grad_norm": 0.07929490506649017, + "learning_rate": 3.407643312101911e-05, + "loss": 0.8749, + "step": 220 + }, + { + "ce_ib": 20.658096313476562, + "ce_orig": 1.0993177890777588, + "epoch": 0.06326838737508088, + "kl_loss": 0.6090418100357056, + "loss_ib": 0.008156226947903633, + "step": 220 + }, + { + "ce_ib": 17.3227481842041, + "ce_orig": 0.6923868656158447, + "epoch": 0.06326838737508088, + "kl_loss": 0.6292073130607605, + "loss_ib": 0.008024347946047783, + "step": 220 + }, + { + "ce_ib": 15.15152359008789, + "ce_orig": 0.697593092918396, + "epoch": 0.06326838737508088, + "kl_loss": 0.604870617389679, + "loss_ib": 0.007563858292996883, + "step": 220 + }, + { + "ce_ib": 20.777067184448242, + "ce_orig": 1.269119381904602, + "epoch": 0.06326838737508088, + "kl_loss": 0.6507552862167358, + "loss_ib": 0.008585259318351746, + "step": 220 + }, + { + "ce_ib": 18.540620803833008, + "ce_orig": 1.0002384185791016, + "epoch": 0.06355597095405853, + "kl_loss": 0.578797459602356, + "loss_ib": 0.0076420363038778305, + "step": 221 + }, + { + "ce_ib": 13.15246868133545, + "ce_orig": 0.25608256459236145, + "epoch": 0.06355597095405853, + "kl_loss": 0.7502469420433044, + "loss_ib": 0.008817716501653194, + "step": 221 + }, + { + "ce_ib": 13.858514785766602, + "ce_orig": 0.682886004447937, + "epoch": 0.06355597095405853, + "kl_loss": 0.538476824760437, + "loss_ib": 0.006770619656890631, + "step": 221 + }, + { + "ce_ib": 17.7680606842041, + "ce_orig": 0.6039354801177979, + "epoch": 0.06355597095405853, + "kl_loss": 0.5650777816772461, + "loss_ib": 0.007427583914250135, + "step": 221 + }, + { + "ce_ib": 19.38729476928711, + "ce_orig": 0.8789693117141724, + "epoch": 0.06384355453303617, + "kl_loss": 0.6542633771896362, + "loss_ib": 0.008481362834572792, + "step": 222 + }, + { + "ce_ib": 18.89866065979004, + "ce_orig": 0.7741104960441589, + "epoch": 0.06384355453303617, + "kl_loss": 0.5847321152687073, + "loss_ib": 0.007737187203019857, + "step": 222 + }, + { + "ce_ib": 21.54572296142578, + "ce_orig": 0.7229393124580383, + "epoch": 0.06384355453303617, + "kl_loss": 0.5832604169845581, + "loss_ib": 0.007987176068127155, + "step": 222 + }, + { + "ce_ib": 13.869481086730957, + "ce_orig": 0.5696704387664795, + "epoch": 0.06384355453303617, + "kl_loss": 0.5188637375831604, + "loss_ib": 0.006575585342943668, + "step": 222 + }, + { + "ce_ib": 14.482152938842773, + "ce_orig": 0.5589219331741333, + "epoch": 0.0641311381120138, + "kl_loss": 0.47938820719718933, + "loss_ib": 0.006242097355425358, + "step": 223 + }, + { + "ce_ib": 16.80389976501465, + "ce_orig": 0.5980596542358398, + "epoch": 0.0641311381120138, + "kl_loss": 0.6172512769699097, + "loss_ib": 0.007852902635931969, + "step": 223 + }, + { + "ce_ib": 22.49806785583496, + "ce_orig": 0.7062133550643921, + "epoch": 0.0641311381120138, + "kl_loss": 0.5784010887145996, + "loss_ib": 0.008033817633986473, + "step": 223 + }, + { + "ce_ib": 17.86919593811035, + "ce_orig": 0.6028913259506226, + "epoch": 0.0641311381120138, + "kl_loss": 0.5538998246192932, + "loss_ib": 0.007325917482376099, + "step": 223 + }, + { + "ce_ib": 21.530719757080078, + "ce_orig": 1.1314647197723389, + "epoch": 0.06441872169099144, + "kl_loss": 0.5061776638031006, + "loss_ib": 0.007214848417788744, + "step": 224 + }, + { + "ce_ib": 17.580371856689453, + "ce_orig": 0.8268778920173645, + "epoch": 0.06441872169099144, + "kl_loss": 0.5295155644416809, + "loss_ib": 0.007053192704916, + "step": 224 + }, + { + "ce_ib": 16.985912322998047, + "ce_orig": 0.92490553855896, + "epoch": 0.06441872169099144, + "kl_loss": 0.47081345319747925, + "loss_ib": 0.006406725384294987, + "step": 224 + }, + { + "ce_ib": 20.34337043762207, + "ce_orig": 1.7162299156188965, + "epoch": 0.06441872169099144, + "kl_loss": 0.5065193772315979, + "loss_ib": 0.0070995306596159935, + "step": 224 + }, + { + "epoch": 0.06470630526996908, + "grad_norm": 0.07722701877355576, + "learning_rate": 3.487261146496815e-05, + "loss": 0.8907, + "step": 225 + }, + { + "ce_ib": 15.637423515319824, + "ce_orig": 0.8265129923820496, + "epoch": 0.06470630526996908, + "kl_loss": 0.6796841025352478, + "loss_ib": 0.008360583335161209, + "step": 225 + }, + { + "ce_ib": 21.416494369506836, + "ce_orig": 1.278948426246643, + "epoch": 0.06470630526996908, + "kl_loss": 0.48355668783187866, + "loss_ib": 0.006977215874940157, + "step": 225 + }, + { + "ce_ib": 19.162933349609375, + "ce_orig": 0.33566343784332275, + "epoch": 0.06470630526996908, + "kl_loss": 0.4927806854248047, + "loss_ib": 0.006844100076705217, + "step": 225 + }, + { + "ce_ib": 17.001327514648438, + "ce_orig": 0.8918877840042114, + "epoch": 0.06470630526996908, + "kl_loss": 0.4844684898853302, + "loss_ib": 0.006544817704707384, + "step": 225 + }, + { + "ce_ib": 19.425342559814453, + "ce_orig": 1.1151187419891357, + "epoch": 0.06499388884894673, + "kl_loss": 0.44420889019966125, + "loss_ib": 0.006384622771292925, + "step": 226 + }, + { + "ce_ib": 20.979902267456055, + "ce_orig": 1.1682270765304565, + "epoch": 0.06499388884894673, + "kl_loss": 0.5223791003227234, + "loss_ib": 0.007321780547499657, + "step": 226 + }, + { + "ce_ib": 21.13734245300293, + "ce_orig": 0.6381849050521851, + "epoch": 0.06499388884894673, + "kl_loss": 0.5622669458389282, + "loss_ib": 0.007736403960734606, + "step": 226 + }, + { + "ce_ib": 19.859725952148438, + "ce_orig": 1.4218535423278809, + "epoch": 0.06499388884894673, + "kl_loss": 0.5652158260345459, + "loss_ib": 0.0076381308026611805, + "step": 226 + }, + { + "ce_ib": 18.34955596923828, + "ce_orig": 0.8892937302589417, + "epoch": 0.06528147242792437, + "kl_loss": 0.477683424949646, + "loss_ib": 0.006611789111047983, + "step": 227 + }, + { + "ce_ib": 16.387413024902344, + "ce_orig": 0.5451152324676514, + "epoch": 0.06528147242792437, + "kl_loss": 0.4405210614204407, + "loss_ib": 0.006043951492756605, + "step": 227 + }, + { + "ce_ib": 16.179128646850586, + "ce_orig": 0.5826038718223572, + "epoch": 0.06528147242792437, + "kl_loss": 0.4468899965286255, + "loss_ib": 0.006086812354624271, + "step": 227 + }, + { + "ce_ib": 15.503174781799316, + "ce_orig": 0.4542520344257355, + "epoch": 0.06528147242792437, + "kl_loss": 0.4687623083591461, + "loss_ib": 0.006237940862774849, + "step": 227 + }, + { + "ce_ib": 17.556617736816406, + "ce_orig": 0.6604429483413696, + "epoch": 0.06556905600690201, + "kl_loss": 0.4429520070552826, + "loss_ib": 0.006185181438922882, + "step": 228 + }, + { + "ce_ib": 14.565351486206055, + "ce_orig": 0.663815438747406, + "epoch": 0.06556905600690201, + "kl_loss": 0.41016486287117004, + "loss_ib": 0.005558183882385492, + "step": 228 + }, + { + "ce_ib": 9.793392181396484, + "ce_orig": 0.1998930275440216, + "epoch": 0.06556905600690201, + "kl_loss": 0.5553406476974487, + "loss_ib": 0.006532745435833931, + "step": 228 + }, + { + "ce_ib": 15.705013275146484, + "ce_orig": 0.949131965637207, + "epoch": 0.06556905600690201, + "kl_loss": 0.5142616033554077, + "loss_ib": 0.006713117007166147, + "step": 228 + }, + { + "ce_ib": 22.55472755432129, + "ce_orig": 1.56355881690979, + "epoch": 0.06585663958587964, + "kl_loss": 0.508346676826477, + "loss_ib": 0.007338939234614372, + "step": 229 + }, + { + "ce_ib": 20.364665985107422, + "ce_orig": 1.1723278760910034, + "epoch": 0.06585663958587964, + "kl_loss": 0.4082661271095276, + "loss_ib": 0.006119127850979567, + "step": 229 + }, + { + "ce_ib": 16.15178680419922, + "ce_orig": 0.8450407981872559, + "epoch": 0.06585663958587964, + "kl_loss": 0.5447847843170166, + "loss_ib": 0.007063026074320078, + "step": 229 + }, + { + "ce_ib": 18.998653411865234, + "ce_orig": 1.2076853513717651, + "epoch": 0.06585663958587964, + "kl_loss": 0.42832180857658386, + "loss_ib": 0.006183082703500986, + "step": 229 + }, + { + "epoch": 0.06614422316485728, + "grad_norm": 0.07635564357042313, + "learning_rate": 3.56687898089172e-05, + "loss": 0.8962, + "step": 230 + }, + { + "ce_ib": 22.920480728149414, + "ce_orig": 1.7977490425109863, + "epoch": 0.06614422316485728, + "kl_loss": 0.4276275932788849, + "loss_ib": 0.0065683238208293915, + "step": 230 + }, + { + "ce_ib": 18.89206886291504, + "ce_orig": 1.2325646877288818, + "epoch": 0.06614422316485728, + "kl_loss": 0.42932283878326416, + "loss_ib": 0.006182434968650341, + "step": 230 + }, + { + "ce_ib": 19.41433334350586, + "ce_orig": 1.1008139848709106, + "epoch": 0.06614422316485728, + "kl_loss": 0.4227140247821808, + "loss_ib": 0.006168573163449764, + "step": 230 + }, + { + "ce_ib": 16.897308349609375, + "ce_orig": 0.8783938884735107, + "epoch": 0.06614422316485728, + "kl_loss": 0.4387606680393219, + "loss_ib": 0.0060773370787501335, + "step": 230 + }, + { + "ce_ib": 20.402490615844727, + "ce_orig": 0.4892929196357727, + "epoch": 0.06643180674383492, + "kl_loss": 0.5622318387031555, + "loss_ib": 0.007662567310035229, + "step": 231 + }, + { + "ce_ib": 18.776615142822266, + "ce_orig": 0.8454554080963135, + "epoch": 0.06643180674383492, + "kl_loss": 0.3648257553577423, + "loss_ib": 0.005525919143110514, + "step": 231 + }, + { + "ce_ib": 19.490280151367188, + "ce_orig": 0.5873563289642334, + "epoch": 0.06643180674383492, + "kl_loss": 0.49844107031822205, + "loss_ib": 0.006933438591659069, + "step": 231 + }, + { + "ce_ib": 18.226577758789062, + "ce_orig": 0.9095281362533569, + "epoch": 0.06643180674383492, + "kl_loss": 0.42676785588264465, + "loss_ib": 0.006090336479246616, + "step": 231 + }, + { + "ce_ib": 14.32204818725586, + "ce_orig": 0.4136866331100464, + "epoch": 0.06671939032281257, + "kl_loss": 0.5454199910163879, + "loss_ib": 0.006886404473334551, + "step": 232 + }, + { + "ce_ib": 15.030553817749023, + "ce_orig": 1.1399730443954468, + "epoch": 0.06671939032281257, + "kl_loss": 0.3817654848098755, + "loss_ib": 0.00532070966437459, + "step": 232 + }, + { + "ce_ib": 17.033527374267578, + "ce_orig": 0.9704214334487915, + "epoch": 0.06671939032281257, + "kl_loss": 0.45833879709243774, + "loss_ib": 0.006286740303039551, + "step": 232 + }, + { + "ce_ib": 16.526573181152344, + "ce_orig": 0.6879238486289978, + "epoch": 0.06671939032281257, + "kl_loss": 0.3568248152732849, + "loss_ib": 0.005220905411988497, + "step": 232 + }, + { + "ce_ib": 22.991727828979492, + "ce_orig": 1.4614430665969849, + "epoch": 0.06700697390179021, + "kl_loss": 0.42173314094543457, + "loss_ib": 0.006516504101455212, + "step": 233 + }, + { + "ce_ib": 18.632497787475586, + "ce_orig": 0.5046707391738892, + "epoch": 0.06700697390179021, + "kl_loss": 0.41837000846862793, + "loss_ib": 0.006046949420124292, + "step": 233 + }, + { + "ce_ib": 18.9344425201416, + "ce_orig": 1.0734100341796875, + "epoch": 0.06700697390179021, + "kl_loss": 0.40839213132858276, + "loss_ib": 0.00597736518830061, + "step": 233 + }, + { + "ce_ib": 11.921408653259277, + "ce_orig": 0.525043785572052, + "epoch": 0.06700697390179021, + "kl_loss": 0.35900092124938965, + "loss_ib": 0.004782150033861399, + "step": 233 + }, + { + "ce_ib": 16.360872268676758, + "ce_orig": 0.9665996432304382, + "epoch": 0.06729455748076785, + "kl_loss": 0.352176696062088, + "loss_ib": 0.005157853942364454, + "step": 234 + }, + { + "ce_ib": 17.86981964111328, + "ce_orig": 0.9465886354446411, + "epoch": 0.06729455748076785, + "kl_loss": 0.39093858003616333, + "loss_ib": 0.005696367472410202, + "step": 234 + }, + { + "ce_ib": 18.097686767578125, + "ce_orig": 0.8353486657142639, + "epoch": 0.06729455748076785, + "kl_loss": 0.3744758367538452, + "loss_ib": 0.005554527044296265, + "step": 234 + }, + { + "ce_ib": 22.27129554748535, + "ce_orig": 1.4471676349639893, + "epoch": 0.06729455748076785, + "kl_loss": 0.41227924823760986, + "loss_ib": 0.006349921692162752, + "step": 234 + }, + { + "epoch": 0.06758214105974548, + "grad_norm": 0.08016426116228104, + "learning_rate": 3.646496815286624e-05, + "loss": 0.8961, + "step": 235 + }, + { + "ce_ib": 17.102169036865234, + "ce_orig": 0.6802967190742493, + "epoch": 0.06758214105974548, + "kl_loss": 0.3811526298522949, + "loss_ib": 0.005521743092685938, + "step": 235 + }, + { + "ce_ib": 23.21323585510254, + "ce_orig": 1.7242603302001953, + "epoch": 0.06758214105974548, + "kl_loss": 0.43173903226852417, + "loss_ib": 0.006638714112341404, + "step": 235 + }, + { + "ce_ib": 13.367447853088379, + "ce_orig": 0.7693591713905334, + "epoch": 0.06758214105974548, + "kl_loss": 0.33568012714385986, + "loss_ib": 0.004693545866757631, + "step": 235 + }, + { + "ce_ib": 16.364444732666016, + "ce_orig": 0.9041774868965149, + "epoch": 0.06758214105974548, + "kl_loss": 0.3745976388454437, + "loss_ib": 0.005382420960813761, + "step": 235 + }, + { + "ce_ib": 17.089984893798828, + "ce_orig": 0.8724990487098694, + "epoch": 0.06786972463872312, + "kl_loss": 0.3381209969520569, + "loss_ib": 0.005090207792818546, + "step": 236 + }, + { + "ce_ib": 17.797290802001953, + "ce_orig": 1.2699850797653198, + "epoch": 0.06786972463872312, + "kl_loss": 0.4074021279811859, + "loss_ib": 0.0058537498116493225, + "step": 236 + }, + { + "ce_ib": 10.40896224975586, + "ce_orig": 0.2636343240737915, + "epoch": 0.06786972463872312, + "kl_loss": 0.6993351578712463, + "loss_ib": 0.008034247905015945, + "step": 236 + }, + { + "ce_ib": 23.780147552490234, + "ce_orig": 1.2295787334442139, + "epoch": 0.06786972463872312, + "kl_loss": 0.47904911637306213, + "loss_ib": 0.007168505806475878, + "step": 236 + }, + { + "ce_ib": 17.171159744262695, + "ce_orig": 1.21332848072052, + "epoch": 0.06815730821770077, + "kl_loss": 0.34175872802734375, + "loss_ib": 0.005134702660143375, + "step": 237 + }, + { + "ce_ib": 18.72576904296875, + "ce_orig": 0.8414787650108337, + "epoch": 0.06815730821770077, + "kl_loss": 0.41571539640426636, + "loss_ib": 0.006029731128364801, + "step": 237 + }, + { + "ce_ib": 18.983478546142578, + "ce_orig": 0.5497841238975525, + "epoch": 0.06815730821770077, + "kl_loss": 0.400782972574234, + "loss_ib": 0.005906177684664726, + "step": 237 + }, + { + "ce_ib": 15.720340728759766, + "ce_orig": 0.6456199884414673, + "epoch": 0.06815730821770077, + "kl_loss": 0.3572655916213989, + "loss_ib": 0.005144690163433552, + "step": 237 + }, + { + "ce_ib": 14.64540958404541, + "ce_orig": 0.5299506783485413, + "epoch": 0.06844489179667841, + "kl_loss": 0.3614061176776886, + "loss_ib": 0.005078601650893688, + "step": 238 + }, + { + "ce_ib": 14.622838973999023, + "ce_orig": 0.6512343883514404, + "epoch": 0.06844489179667841, + "kl_loss": 0.3499015271663666, + "loss_ib": 0.004961299244314432, + "step": 238 + }, + { + "ce_ib": 18.794681549072266, + "ce_orig": 0.5560781359672546, + "epoch": 0.06844489179667841, + "kl_loss": 0.3744252324104309, + "loss_ib": 0.005623720120638609, + "step": 238 + }, + { + "ce_ib": 16.820043563842773, + "ce_orig": 0.7689210772514343, + "epoch": 0.06844489179667841, + "kl_loss": 0.3898257613182068, + "loss_ib": 0.005580261815339327, + "step": 238 + }, + { + "ce_ib": 20.637855529785156, + "ce_orig": 1.1541659832000732, + "epoch": 0.06873247537565605, + "kl_loss": 0.4067099392414093, + "loss_ib": 0.006130884867161512, + "step": 239 + }, + { + "ce_ib": 14.409849166870117, + "ce_orig": 0.6937656402587891, + "epoch": 0.06873247537565605, + "kl_loss": 0.30936238169670105, + "loss_ib": 0.00453460868448019, + "step": 239 + }, + { + "ce_ib": 19.96538543701172, + "ce_orig": 1.6420783996582031, + "epoch": 0.06873247537565605, + "kl_loss": 0.3878486752510071, + "loss_ib": 0.005875025410205126, + "step": 239 + }, + { + "ce_ib": 16.174251556396484, + "ce_orig": 0.7331187725067139, + "epoch": 0.06873247537565605, + "kl_loss": 0.3374932110309601, + "loss_ib": 0.004992356989532709, + "step": 239 + }, + { + "epoch": 0.06902005895463369, + "grad_norm": 0.09181191027164459, + "learning_rate": 3.7261146496815283e-05, + "loss": 0.9216, + "step": 240 + }, + { + "ce_ib": 16.239824295043945, + "ce_orig": 0.45115554332733154, + "epoch": 0.06902005895463369, + "kl_loss": 0.3430927097797394, + "loss_ib": 0.005054909270256758, + "step": 240 + }, + { + "ce_ib": 20.204030990600586, + "ce_orig": 1.1928750276565552, + "epoch": 0.06902005895463369, + "kl_loss": 0.37950411438941956, + "loss_ib": 0.005815444048494101, + "step": 240 + }, + { + "ce_ib": 11.103907585144043, + "ce_orig": 0.41011255979537964, + "epoch": 0.06902005895463369, + "kl_loss": 0.3157821595668793, + "loss_ib": 0.004268212243914604, + "step": 240 + }, + { + "ce_ib": 16.61946678161621, + "ce_orig": 0.958586573600769, + "epoch": 0.06902005895463369, + "kl_loss": 0.472045361995697, + "loss_ib": 0.006382400169968605, + "step": 240 + }, + { + "ce_ib": 18.43888282775879, + "ce_orig": 0.8224636912345886, + "epoch": 0.06930764253361132, + "kl_loss": 0.31457871198654175, + "loss_ib": 0.004989675246179104, + "step": 241 + }, + { + "ce_ib": 19.861295700073242, + "ce_orig": 0.5294429659843445, + "epoch": 0.06930764253361132, + "kl_loss": 0.30837568640708923, + "loss_ib": 0.0050698863342404366, + "step": 241 + }, + { + "ce_ib": 17.056270599365234, + "ce_orig": 0.6633943319320679, + "epoch": 0.06930764253361132, + "kl_loss": 0.3993522524833679, + "loss_ib": 0.0056991493329405785, + "step": 241 + }, + { + "ce_ib": 13.237872123718262, + "ce_orig": 0.6085235476493835, + "epoch": 0.06930764253361132, + "kl_loss": 0.276342511177063, + "loss_ib": 0.0040872120298445225, + "step": 241 + }, + { + "ce_ib": 15.212370872497559, + "ce_orig": 0.5519753694534302, + "epoch": 0.06959522611258898, + "kl_loss": 0.3807227611541748, + "loss_ib": 0.005328464321792126, + "step": 242 + }, + { + "ce_ib": 14.194107055664062, + "ce_orig": 0.6263343691825867, + "epoch": 0.06959522611258898, + "kl_loss": 0.33415570855140686, + "loss_ib": 0.004760967567563057, + "step": 242 + }, + { + "ce_ib": 18.368473052978516, + "ce_orig": 0.8124864101409912, + "epoch": 0.06959522611258898, + "kl_loss": 0.3696582317352295, + "loss_ib": 0.005533429328352213, + "step": 242 + }, + { + "ce_ib": 18.977922439575195, + "ce_orig": 0.7746420502662659, + "epoch": 0.06959522611258898, + "kl_loss": 0.3186471462249756, + "loss_ib": 0.005084263626486063, + "step": 242 + }, + { + "ce_ib": 18.268203735351562, + "ce_orig": 0.9710350632667542, + "epoch": 0.06988280969156661, + "kl_loss": 0.32097768783569336, + "loss_ib": 0.005036597140133381, + "step": 243 + }, + { + "ce_ib": 20.087373733520508, + "ce_orig": 1.0895576477050781, + "epoch": 0.06988280969156661, + "kl_loss": 0.3057764768600464, + "loss_ib": 0.005066501908004284, + "step": 243 + }, + { + "ce_ib": 18.099958419799805, + "ce_orig": 0.907975971698761, + "epoch": 0.06988280969156661, + "kl_loss": 0.35890674591064453, + "loss_ib": 0.005399063229560852, + "step": 243 + }, + { + "ce_ib": 18.00782012939453, + "ce_orig": 0.520527184009552, + "epoch": 0.06988280969156661, + "kl_loss": 0.36176708340644836, + "loss_ib": 0.005418452434241772, + "step": 243 + }, + { + "ce_ib": 15.336793899536133, + "ce_orig": 0.755825936794281, + "epoch": 0.07017039327054425, + "kl_loss": 0.27832502126693726, + "loss_ib": 0.0043169292621314526, + "step": 244 + }, + { + "ce_ib": 14.561528205871582, + "ce_orig": 0.5862277150154114, + "epoch": 0.07017039327054425, + "kl_loss": 0.3262811303138733, + "loss_ib": 0.004718963988125324, + "step": 244 + }, + { + "ce_ib": 21.186973571777344, + "ce_orig": 1.1542885303497314, + "epoch": 0.07017039327054425, + "kl_loss": 0.3798186182975769, + "loss_ib": 0.005916883237659931, + "step": 244 + }, + { + "ce_ib": 17.660518646240234, + "ce_orig": 0.7000678181648254, + "epoch": 0.07017039327054425, + "kl_loss": 0.40329715609550476, + "loss_ib": 0.005799023434519768, + "step": 244 + }, + { + "epoch": 0.07045797684952189, + "grad_norm": 0.07606582343578339, + "learning_rate": 3.805732484076434e-05, + "loss": 0.8966, + "step": 245 + }, + { + "ce_ib": 17.2858829498291, + "ce_orig": 0.6848281025886536, + "epoch": 0.07045797684952189, + "kl_loss": 0.3115922510623932, + "loss_ib": 0.0048445104621350765, + "step": 245 + }, + { + "ce_ib": 12.877274513244629, + "ce_orig": 0.42091885209083557, + "epoch": 0.07045797684952189, + "kl_loss": 0.33227574825286865, + "loss_ib": 0.004610484931617975, + "step": 245 + }, + { + "ce_ib": 19.50572967529297, + "ce_orig": 1.2013055086135864, + "epoch": 0.07045797684952189, + "kl_loss": 0.29557040333747864, + "loss_ib": 0.004906277172267437, + "step": 245 + }, + { + "ce_ib": 17.724828720092773, + "ce_orig": 0.7979478240013123, + "epoch": 0.07045797684952189, + "kl_loss": 0.3825852870941162, + "loss_ib": 0.005598335526883602, + "step": 245 + }, + { + "ce_ib": 17.733549118041992, + "ce_orig": 1.1459635496139526, + "epoch": 0.07074556042849953, + "kl_loss": 0.29438281059265137, + "loss_ib": 0.004717182833701372, + "step": 246 + }, + { + "ce_ib": 16.1971378326416, + "ce_orig": 0.13715046644210815, + "epoch": 0.07074556042849953, + "kl_loss": 0.6951940059661865, + "loss_ib": 0.008571653626859188, + "step": 246 + }, + { + "ce_ib": 20.530433654785156, + "ce_orig": 1.4083482027053833, + "epoch": 0.07074556042849953, + "kl_loss": 0.35388949513435364, + "loss_ib": 0.005591938272118568, + "step": 246 + }, + { + "ce_ib": 16.526330947875977, + "ce_orig": 0.6567316055297852, + "epoch": 0.07074556042849953, + "kl_loss": 0.3231682777404785, + "loss_ib": 0.004884315654635429, + "step": 246 + }, + { + "ce_ib": 14.093873977661133, + "ce_orig": 0.871584951877594, + "epoch": 0.07103314400747718, + "kl_loss": 0.2740858793258667, + "loss_ib": 0.004150246270000935, + "step": 247 + }, + { + "ce_ib": 21.621118545532227, + "ce_orig": 1.2882436513900757, + "epoch": 0.07103314400747718, + "kl_loss": 0.4834282696247101, + "loss_ib": 0.006996394135057926, + "step": 247 + }, + { + "ce_ib": 19.13994026184082, + "ce_orig": 1.2011432647705078, + "epoch": 0.07103314400747718, + "kl_loss": 0.31072482466697693, + "loss_ib": 0.005021241959184408, + "step": 247 + }, + { + "ce_ib": 13.94855785369873, + "ce_orig": 0.6923986673355103, + "epoch": 0.07103314400747718, + "kl_loss": 0.34611976146698, + "loss_ib": 0.004856053274124861, + "step": 247 + }, + { + "ce_ib": 18.997570037841797, + "ce_orig": 1.2123730182647705, + "epoch": 0.07132072758645482, + "kl_loss": 0.3029173016548157, + "loss_ib": 0.004928929731249809, + "step": 248 + }, + { + "ce_ib": 18.082622528076172, + "ce_orig": 0.7000762224197388, + "epoch": 0.07132072758645482, + "kl_loss": 0.3445127308368683, + "loss_ib": 0.005253389477729797, + "step": 248 + }, + { + "ce_ib": 13.205419540405273, + "ce_orig": 0.6170308589935303, + "epoch": 0.07132072758645482, + "kl_loss": 0.2936919927597046, + "loss_ib": 0.004257461987435818, + "step": 248 + }, + { + "ce_ib": 16.406328201293945, + "ce_orig": 0.8249359130859375, + "epoch": 0.07132072758645482, + "kl_loss": 0.2896481454372406, + "loss_ib": 0.004537113942205906, + "step": 248 + }, + { + "ce_ib": 19.35923194885254, + "ce_orig": 0.9184130430221558, + "epoch": 0.07160831116543245, + "kl_loss": 0.2877388000488281, + "loss_ib": 0.004813311155885458, + "step": 249 + }, + { + "ce_ib": 14.84507942199707, + "ce_orig": 0.962078332901001, + "epoch": 0.07160831116543245, + "kl_loss": 0.33685553073883057, + "loss_ib": 0.004853063262999058, + "step": 249 + }, + { + "ce_ib": 13.66109848022461, + "ce_orig": 0.8815235495567322, + "epoch": 0.07160831116543245, + "kl_loss": 0.2970173954963684, + "loss_ib": 0.004336283542215824, + "step": 249 + }, + { + "ce_ib": 15.64456844329834, + "ce_orig": 0.9901912808418274, + "epoch": 0.07160831116543245, + "kl_loss": 0.30990880727767944, + "loss_ib": 0.004663544707000256, + "step": 249 + }, + { + "epoch": 0.07189589474441009, + "grad_norm": 0.07667157799005508, + "learning_rate": 3.885350318471338e-05, + "loss": 0.8786, + "step": 250 + }, + { + "ce_ib": 17.655288696289062, + "ce_orig": 0.9249431490898132, + "epoch": 0.07189589474441009, + "kl_loss": 0.24412468075752258, + "loss_ib": 0.004206775221973658, + "step": 250 + }, + { + "ce_ib": 16.22374153137207, + "ce_orig": 0.8454200625419617, + "epoch": 0.07189589474441009, + "kl_loss": 0.3324206471443176, + "loss_ib": 0.004946580622345209, + "step": 250 + }, + { + "ce_ib": 18.696596145629883, + "ce_orig": 1.0471431016921997, + "epoch": 0.07189589474441009, + "kl_loss": 0.3605830669403076, + "loss_ib": 0.005475489888340235, + "step": 250 + }, + { + "ce_ib": 19.617197036743164, + "ce_orig": 0.630739152431488, + "epoch": 0.07189589474441009, + "kl_loss": 0.42468297481536865, + "loss_ib": 0.006208549719303846, + "step": 250 + }, + { + "ce_ib": 14.190434455871582, + "ce_orig": 0.5681670904159546, + "epoch": 0.07218347832338773, + "kl_loss": 0.2779189646244049, + "loss_ib": 0.004198232665657997, + "step": 251 + }, + { + "ce_ib": 12.449499130249023, + "ce_orig": 0.7335292100906372, + "epoch": 0.07218347832338773, + "kl_loss": 0.30513036251068115, + "loss_ib": 0.004296253435313702, + "step": 251 + }, + { + "ce_ib": 17.19744300842285, + "ce_orig": 0.664770245552063, + "epoch": 0.07218347832338773, + "kl_loss": 0.3403986096382141, + "loss_ib": 0.005123730283230543, + "step": 251 + }, + { + "ce_ib": 16.57467269897461, + "ce_orig": 1.2152647972106934, + "epoch": 0.07218347832338773, + "kl_loss": 0.2757129669189453, + "loss_ib": 0.0044145965948700905, + "step": 251 + }, + { + "ce_ib": 20.365676879882812, + "ce_orig": 0.9634372591972351, + "epoch": 0.07247106190236538, + "kl_loss": 0.31626924872398376, + "loss_ib": 0.005199260078370571, + "step": 252 + }, + { + "ce_ib": 18.716487884521484, + "ce_orig": 1.0937143564224243, + "epoch": 0.07247106190236538, + "kl_loss": 0.3098085820674896, + "loss_ib": 0.0049697342328727245, + "step": 252 + }, + { + "ce_ib": 16.123247146606445, + "ce_orig": 1.0408896207809448, + "epoch": 0.07247106190236538, + "kl_loss": 0.31008970737457275, + "loss_ib": 0.004713221453130245, + "step": 252 + }, + { + "ce_ib": 17.019351959228516, + "ce_orig": 1.1104483604431152, + "epoch": 0.07247106190236538, + "kl_loss": 0.30466794967651367, + "loss_ib": 0.004748614504933357, + "step": 252 + }, + { + "ce_ib": 14.710082054138184, + "ce_orig": 0.9470803737640381, + "epoch": 0.07275864548134302, + "kl_loss": 0.37655502557754517, + "loss_ib": 0.005236558150500059, + "step": 253 + }, + { + "ce_ib": 20.534164428710938, + "ce_orig": 0.901342511177063, + "epoch": 0.07275864548134302, + "kl_loss": 0.3438325524330139, + "loss_ib": 0.005491741932928562, + "step": 253 + }, + { + "ce_ib": 16.916545867919922, + "ce_orig": 0.9539148211479187, + "epoch": 0.07275864548134302, + "kl_loss": 0.35449427366256714, + "loss_ib": 0.005236596800386906, + "step": 253 + }, + { + "ce_ib": 15.45893383026123, + "ce_orig": 0.6202948689460754, + "epoch": 0.07275864548134302, + "kl_loss": 0.3998444080352783, + "loss_ib": 0.005544337444007397, + "step": 253 + }, + { + "ce_ib": 17.64470100402832, + "ce_orig": 1.3979955911636353, + "epoch": 0.07304622906032066, + "kl_loss": 0.31596821546554565, + "loss_ib": 0.004924152046442032, + "step": 254 + }, + { + "ce_ib": 17.690441131591797, + "ce_orig": 0.8207519054412842, + "epoch": 0.07304622906032066, + "kl_loss": 0.2767926752567291, + "loss_ib": 0.0045369709841907024, + "step": 254 + }, + { + "ce_ib": 10.424705505371094, + "ce_orig": 0.5839744210243225, + "epoch": 0.07304622906032066, + "kl_loss": 0.22676922380924225, + "loss_ib": 0.0033101625740528107, + "step": 254 + }, + { + "ce_ib": 20.686954498291016, + "ce_orig": 0.6900187730789185, + "epoch": 0.07304622906032066, + "kl_loss": 0.4284232258796692, + "loss_ib": 0.006352927535772324, + "step": 254 + }, + { + "epoch": 0.0733338126392983, + "grad_norm": 0.06607817858457565, + "learning_rate": 3.964968152866242e-05, + "loss": 0.846, + "step": 255 + }, + { + "ce_ib": 14.843269348144531, + "ce_orig": 0.8040740489959717, + "epoch": 0.0733338126392983, + "kl_loss": 0.37673041224479675, + "loss_ib": 0.005251631140708923, + "step": 255 + }, + { + "ce_ib": 14.139528274536133, + "ce_orig": 0.7245256304740906, + "epoch": 0.0733338126392983, + "kl_loss": 0.3231876790523529, + "loss_ib": 0.004645829554647207, + "step": 255 + }, + { + "ce_ib": 24.16304588317871, + "ce_orig": 1.9036223888397217, + "epoch": 0.0733338126392983, + "kl_loss": 0.2994362413883209, + "loss_ib": 0.005410667043179274, + "step": 255 + }, + { + "ce_ib": 17.201786041259766, + "ce_orig": 0.680133044719696, + "epoch": 0.0733338126392983, + "kl_loss": 0.3443969786167145, + "loss_ib": 0.005164148285984993, + "step": 255 + }, + { + "ce_ib": 11.843783378601074, + "ce_orig": 0.5880969166755676, + "epoch": 0.07362139621827593, + "kl_loss": 0.2765531539916992, + "loss_ib": 0.003949909936636686, + "step": 256 + }, + { + "ce_ib": 15.530258178710938, + "ce_orig": 0.7509983777999878, + "epoch": 0.07362139621827593, + "kl_loss": 0.3052097260951996, + "loss_ib": 0.004605122841894627, + "step": 256 + }, + { + "ce_ib": 18.292640686035156, + "ce_orig": 0.9828827977180481, + "epoch": 0.07362139621827593, + "kl_loss": 0.29785263538360596, + "loss_ib": 0.0048077902756631374, + "step": 256 + }, + { + "ce_ib": 12.437490463256836, + "ce_orig": 0.5975197553634644, + "epoch": 0.07362139621827593, + "kl_loss": 0.32125842571258545, + "loss_ib": 0.004456333350390196, + "step": 256 + }, + { + "ce_ib": 17.51129722595215, + "ce_orig": 0.6895196437835693, + "epoch": 0.07390897979725358, + "kl_loss": 0.29928696155548096, + "loss_ib": 0.00474399933591485, + "step": 257 + }, + { + "ce_ib": 19.982250213623047, + "ce_orig": 1.2780667543411255, + "epoch": 0.07390897979725358, + "kl_loss": 0.4109703004360199, + "loss_ib": 0.006107928231358528, + "step": 257 + }, + { + "ce_ib": 18.59293556213379, + "ce_orig": 0.7878507375717163, + "epoch": 0.07390897979725358, + "kl_loss": 0.2821478247642517, + "loss_ib": 0.004680771846324205, + "step": 257 + }, + { + "ce_ib": 16.528976440429688, + "ce_orig": 0.9417824149131775, + "epoch": 0.07390897979725358, + "kl_loss": 0.2769574522972107, + "loss_ib": 0.004422471858561039, + "step": 257 + }, + { + "ce_ib": 12.966187477111816, + "ce_orig": 0.5238648653030396, + "epoch": 0.07419656337623122, + "kl_loss": 0.31731730699539185, + "loss_ib": 0.004469791427254677, + "step": 258 + }, + { + "ce_ib": 13.36031723022461, + "ce_orig": 0.5534040927886963, + "epoch": 0.07419656337623122, + "kl_loss": 0.2720886170864105, + "loss_ib": 0.00405691796913743, + "step": 258 + }, + { + "ce_ib": 19.149700164794922, + "ce_orig": 1.4191524982452393, + "epoch": 0.07419656337623122, + "kl_loss": 0.34151729941368103, + "loss_ib": 0.00533014303073287, + "step": 258 + }, + { + "ce_ib": 19.49608612060547, + "ce_orig": 1.3304085731506348, + "epoch": 0.07419656337623122, + "kl_loss": 0.303983211517334, + "loss_ib": 0.004989440552890301, + "step": 258 + }, + { + "ce_ib": 16.158462524414062, + "ce_orig": 0.7256084680557251, + "epoch": 0.07448414695520886, + "kl_loss": 0.3417550027370453, + "loss_ib": 0.0050333961844444275, + "step": 259 + }, + { + "ce_ib": 15.630940437316895, + "ce_orig": 0.45199069380760193, + "epoch": 0.07448414695520886, + "kl_loss": 0.3961242437362671, + "loss_ib": 0.005524335894733667, + "step": 259 + }, + { + "ce_ib": 15.765962600708008, + "ce_orig": 1.1196238994598389, + "epoch": 0.07448414695520886, + "kl_loss": 0.25270766019821167, + "loss_ib": 0.004103672690689564, + "step": 259 + }, + { + "ce_ib": 13.915310859680176, + "ce_orig": 0.8033282160758972, + "epoch": 0.07448414695520886, + "kl_loss": 0.292714923620224, + "loss_ib": 0.00431868014857173, + "step": 259 + }, + { + "epoch": 0.0747717305341865, + "grad_norm": 0.08863961696624756, + "learning_rate": 4.044585987261147e-05, + "loss": 0.8927, + "step": 260 + }, + { + "ce_ib": 15.066596031188965, + "ce_orig": 0.8063942193984985, + "epoch": 0.0747717305341865, + "kl_loss": 0.28827041387557983, + "loss_ib": 0.004389363341033459, + "step": 260 + }, + { + "ce_ib": 12.796646118164062, + "ce_orig": 0.41930192708969116, + "epoch": 0.0747717305341865, + "kl_loss": 0.2615140378475189, + "loss_ib": 0.0038948049768805504, + "step": 260 + }, + { + "ce_ib": 14.110474586486816, + "ce_orig": 0.68003249168396, + "epoch": 0.0747717305341865, + "kl_loss": 0.261357843875885, + "loss_ib": 0.0040246257558465, + "step": 260 + }, + { + "ce_ib": 17.80391502380371, + "ce_orig": 0.7285661697387695, + "epoch": 0.0747717305341865, + "kl_loss": 0.29259398579597473, + "loss_ib": 0.00470633152872324, + "step": 260 + }, + { + "ce_ib": 10.909024238586426, + "ce_orig": 0.47324129939079285, + "epoch": 0.07505931411316413, + "kl_loss": 0.2249765694141388, + "loss_ib": 0.0033406680449843407, + "step": 261 + }, + { + "ce_ib": 15.371655464172363, + "ce_orig": 0.9706589579582214, + "epoch": 0.07505931411316413, + "kl_loss": 0.2712176442146301, + "loss_ib": 0.004249341785907745, + "step": 261 + }, + { + "ce_ib": 13.265528678894043, + "ce_orig": 0.8591080904006958, + "epoch": 0.07505931411316413, + "kl_loss": 0.2495938241481781, + "loss_ib": 0.0038224910385906696, + "step": 261 + }, + { + "ce_ib": 10.983420372009277, + "ce_orig": 0.6682037711143494, + "epoch": 0.07505931411316413, + "kl_loss": 0.18751020729541779, + "loss_ib": 0.0029734440613538027, + "step": 261 + }, + { + "ce_ib": 11.951087951660156, + "ce_orig": 0.6800048351287842, + "epoch": 0.07534689769214178, + "kl_loss": 0.3001071512699127, + "loss_ib": 0.004196180030703545, + "step": 262 + }, + { + "ce_ib": 17.0267276763916, + "ce_orig": 0.4345322549343109, + "epoch": 0.07534689769214178, + "kl_loss": 0.3833320736885071, + "loss_ib": 0.005535993259400129, + "step": 262 + }, + { + "ce_ib": 18.44808006286621, + "ce_orig": 0.9550086259841919, + "epoch": 0.07534689769214178, + "kl_loss": 0.2870803773403168, + "loss_ib": 0.004715611692517996, + "step": 262 + }, + { + "ce_ib": 16.392032623291016, + "ce_orig": 1.0675876140594482, + "epoch": 0.07534689769214178, + "kl_loss": 0.30814555287361145, + "loss_ib": 0.004720658529549837, + "step": 262 + }, + { + "ce_ib": 16.68846321105957, + "ce_orig": 0.6256803274154663, + "epoch": 0.07563448127111942, + "kl_loss": 0.2585268020629883, + "loss_ib": 0.004254114348441362, + "step": 263 + }, + { + "ce_ib": 12.127674102783203, + "ce_orig": 0.5661578178405762, + "epoch": 0.07563448127111942, + "kl_loss": 0.23702625930309296, + "loss_ib": 0.0035830300766974688, + "step": 263 + }, + { + "ce_ib": 16.941381454467773, + "ce_orig": 0.6445264220237732, + "epoch": 0.07563448127111942, + "kl_loss": 0.29147881269454956, + "loss_ib": 0.004608925897628069, + "step": 263 + }, + { + "ce_ib": 11.159895896911621, + "ce_orig": 0.6294872164726257, + "epoch": 0.07563448127111942, + "kl_loss": 0.25180041790008545, + "loss_ib": 0.0036339936777949333, + "step": 263 + }, + { + "ce_ib": 16.635135650634766, + "ce_orig": 1.0586961507797241, + "epoch": 0.07592206485009706, + "kl_loss": 0.2687499523162842, + "loss_ib": 0.004351012874394655, + "step": 264 + }, + { + "ce_ib": 12.455622673034668, + "ce_orig": 0.8253864049911499, + "epoch": 0.07592206485009706, + "kl_loss": 0.24592146277427673, + "loss_ib": 0.00370477675460279, + "step": 264 + }, + { + "ce_ib": 11.99234676361084, + "ce_orig": 0.571262776851654, + "epoch": 0.07592206485009706, + "kl_loss": 0.19673100113868713, + "loss_ib": 0.0031665447168052197, + "step": 264 + }, + { + "ce_ib": 16.71959114074707, + "ce_orig": 0.8490833044052124, + "epoch": 0.07592206485009706, + "kl_loss": 0.3431280553340912, + "loss_ib": 0.005103239323943853, + "step": 264 + }, + { + "epoch": 0.0762096484290747, + "grad_norm": 0.09324845671653748, + "learning_rate": 4.1242038216560514e-05, + "loss": 0.8594, + "step": 265 + }, + { + "ce_ib": 13.668450355529785, + "ce_orig": 0.7410028576850891, + "epoch": 0.0762096484290747, + "kl_loss": 0.3923606872558594, + "loss_ib": 0.005290451925247908, + "step": 265 + }, + { + "ce_ib": 16.50493621826172, + "ce_orig": 0.904811441898346, + "epoch": 0.0762096484290747, + "kl_loss": 0.32142162322998047, + "loss_ib": 0.004864709917455912, + "step": 265 + }, + { + "ce_ib": 14.940958976745605, + "ce_orig": 1.0615205764770508, + "epoch": 0.0762096484290747, + "kl_loss": 0.2736600637435913, + "loss_ib": 0.004230696242302656, + "step": 265 + }, + { + "ce_ib": 20.21286964416504, + "ce_orig": 0.936401903629303, + "epoch": 0.0762096484290747, + "kl_loss": 0.2895187437534332, + "loss_ib": 0.004916474223136902, + "step": 265 + }, + { + "ce_ib": 15.859089851379395, + "ce_orig": 0.9013702869415283, + "epoch": 0.07649723200805233, + "kl_loss": 0.2485354244709015, + "loss_ib": 0.004071263130754232, + "step": 266 + }, + { + "ce_ib": 14.475261688232422, + "ce_orig": 0.9954730272293091, + "epoch": 0.07649723200805233, + "kl_loss": 0.2513744533061981, + "loss_ib": 0.003961270209401846, + "step": 266 + }, + { + "ce_ib": 20.038143157958984, + "ce_orig": 1.3421083688735962, + "epoch": 0.07649723200805233, + "kl_loss": 0.281283974647522, + "loss_ib": 0.00481665413826704, + "step": 266 + }, + { + "ce_ib": 22.541851043701172, + "ce_orig": 1.7132771015167236, + "epoch": 0.07649723200805233, + "kl_loss": 0.31567251682281494, + "loss_ib": 0.005410910118371248, + "step": 266 + }, + { + "ce_ib": 14.337822914123535, + "ce_orig": 0.8144393563270569, + "epoch": 0.07678481558702999, + "kl_loss": 0.31490829586982727, + "loss_ib": 0.004582865163683891, + "step": 267 + }, + { + "ce_ib": 16.432825088500977, + "ce_orig": 0.5571436285972595, + "epoch": 0.07678481558702999, + "kl_loss": 0.3173448443412781, + "loss_ib": 0.004816730972379446, + "step": 267 + }, + { + "ce_ib": 18.915435791015625, + "ce_orig": 1.2111248970031738, + "epoch": 0.07678481558702999, + "kl_loss": 0.27650901675224304, + "loss_ib": 0.004656633827835321, + "step": 267 + }, + { + "ce_ib": 20.70977783203125, + "ce_orig": 1.6732383966445923, + "epoch": 0.07678481558702999, + "kl_loss": 0.32756784558296204, + "loss_ib": 0.00534665584564209, + "step": 267 + }, + { + "ce_ib": 19.59430503845215, + "ce_orig": 1.263615369796753, + "epoch": 0.07707239916600762, + "kl_loss": 0.2561582028865814, + "loss_ib": 0.004521012306213379, + "step": 268 + }, + { + "ce_ib": 24.518985748291016, + "ce_orig": 2.1271183490753174, + "epoch": 0.07707239916600762, + "kl_loss": 0.36851945519447327, + "loss_ib": 0.006137093063443899, + "step": 268 + }, + { + "ce_ib": 16.20216941833496, + "ce_orig": 0.524202823638916, + "epoch": 0.07707239916600762, + "kl_loss": 0.30454859137535095, + "loss_ib": 0.00466570258140564, + "step": 268 + }, + { + "ce_ib": 16.534713745117188, + "ce_orig": 0.59481281042099, + "epoch": 0.07707239916600762, + "kl_loss": 0.24704763293266296, + "loss_ib": 0.004123947583138943, + "step": 268 + }, + { + "ce_ib": 15.644123077392578, + "ce_orig": 0.9609376788139343, + "epoch": 0.07735998274498526, + "kl_loss": 0.24495989084243774, + "loss_ib": 0.0040140110068023205, + "step": 269 + }, + { + "ce_ib": 13.680428504943848, + "ce_orig": 0.7830809950828552, + "epoch": 0.07735998274498526, + "kl_loss": 0.23677141964435577, + "loss_ib": 0.0037357567343860865, + "step": 269 + }, + { + "ce_ib": 19.454843521118164, + "ce_orig": 0.8037047386169434, + "epoch": 0.07735998274498526, + "kl_loss": 0.33364683389663696, + "loss_ib": 0.005281952675431967, + "step": 269 + }, + { + "ce_ib": 13.62769889831543, + "ce_orig": 0.7561288475990295, + "epoch": 0.07735998274498526, + "kl_loss": 0.3251601457595825, + "loss_ib": 0.004614371340721846, + "step": 269 + }, + { + "epoch": 0.0776475663239629, + "grad_norm": 0.06999674439430237, + "learning_rate": 4.2038216560509556e-05, + "loss": 0.8477, + "step": 270 + }, + { + "ce_ib": 15.570327758789062, + "ce_orig": 0.5456323027610779, + "epoch": 0.0776475663239629, + "kl_loss": 0.24074603617191315, + "loss_ib": 0.003964493051171303, + "step": 270 + }, + { + "ce_ib": 12.79995346069336, + "ce_orig": 0.5012090802192688, + "epoch": 0.0776475663239629, + "kl_loss": 0.3022935688495636, + "loss_ib": 0.004302930552512407, + "step": 270 + }, + { + "ce_ib": 16.223758697509766, + "ce_orig": 0.3916482925415039, + "epoch": 0.0776475663239629, + "kl_loss": 0.27299919724464417, + "loss_ib": 0.004352367948740721, + "step": 270 + }, + { + "ce_ib": 14.882962226867676, + "ce_orig": 0.9101399183273315, + "epoch": 0.0776475663239629, + "kl_loss": 0.2818550765514374, + "loss_ib": 0.004306846763938665, + "step": 270 + }, + { + "ce_ib": 15.584421157836914, + "ce_orig": 0.616856575012207, + "epoch": 0.07793514990294054, + "kl_loss": 0.2159929871559143, + "loss_ib": 0.003718371968716383, + "step": 271 + }, + { + "ce_ib": 14.746891975402832, + "ce_orig": 0.5636629462242126, + "epoch": 0.07793514990294054, + "kl_loss": 0.3384188413619995, + "loss_ib": 0.004858877509832382, + "step": 271 + }, + { + "ce_ib": 19.72770881652832, + "ce_orig": 1.6866846084594727, + "epoch": 0.07793514990294054, + "kl_loss": 0.2658270597457886, + "loss_ib": 0.004631041083484888, + "step": 271 + }, + { + "ce_ib": 13.834111213684082, + "ce_orig": 0.4504989981651306, + "epoch": 0.07793514990294054, + "kl_loss": 0.309722900390625, + "loss_ib": 0.0044806404039263725, + "step": 271 + }, + { + "ce_ib": 16.82283592224121, + "ce_orig": 1.4967201948165894, + "epoch": 0.07822273348191819, + "kl_loss": 0.2537575364112854, + "loss_ib": 0.004219858907163143, + "step": 272 + }, + { + "ce_ib": 16.692188262939453, + "ce_orig": 0.7724244594573975, + "epoch": 0.07822273348191819, + "kl_loss": 0.3247263431549072, + "loss_ib": 0.004916482139378786, + "step": 272 + }, + { + "ce_ib": 17.720109939575195, + "ce_orig": 1.0197162628173828, + "epoch": 0.07822273348191819, + "kl_loss": 0.3129570484161377, + "loss_ib": 0.004901581443846226, + "step": 272 + }, + { + "ce_ib": 11.472055435180664, + "ce_orig": 0.7266469597816467, + "epoch": 0.07822273348191819, + "kl_loss": 0.2540128827095032, + "loss_ib": 0.0036873342469334602, + "step": 272 + }, + { + "ce_ib": 14.627097129821777, + "ce_orig": 0.5554067492485046, + "epoch": 0.07851031706089583, + "kl_loss": 0.27136164903640747, + "loss_ib": 0.004176326096057892, + "step": 273 + }, + { + "ce_ib": 12.32707405090332, + "ce_orig": 0.832676887512207, + "epoch": 0.07851031706089583, + "kl_loss": 0.23146983981132507, + "loss_ib": 0.0035474055912345648, + "step": 273 + }, + { + "ce_ib": 14.380395889282227, + "ce_orig": 0.5141070485115051, + "epoch": 0.07851031706089583, + "kl_loss": 0.2842778265476227, + "loss_ib": 0.004280817694962025, + "step": 273 + }, + { + "ce_ib": 16.54999351501465, + "ce_orig": 1.1931746006011963, + "epoch": 0.07851031706089583, + "kl_loss": 0.3062623143196106, + "loss_ib": 0.004717622417956591, + "step": 273 + }, + { + "ce_ib": 16.448566436767578, + "ce_orig": 0.9063194394111633, + "epoch": 0.07879790063987346, + "kl_loss": 0.254788339138031, + "loss_ib": 0.004192739725112915, + "step": 274 + }, + { + "ce_ib": 20.326473236083984, + "ce_orig": 1.289271593093872, + "epoch": 0.07879790063987346, + "kl_loss": 0.21474193036556244, + "loss_ib": 0.004180066287517548, + "step": 274 + }, + { + "ce_ib": 14.406719207763672, + "ce_orig": 1.0166672468185425, + "epoch": 0.07879790063987346, + "kl_loss": 0.2415088266134262, + "loss_ib": 0.0038557599764317274, + "step": 274 + }, + { + "ce_ib": 17.027986526489258, + "ce_orig": 1.0573188066482544, + "epoch": 0.07879790063987346, + "kl_loss": 0.33195120096206665, + "loss_ib": 0.0050223106518387794, + "step": 274 + }, + { + "epoch": 0.0790854842188511, + "grad_norm": 0.08076049387454987, + "learning_rate": 4.2834394904458604e-05, + "loss": 0.908, + "step": 275 + }, + { + "ce_ib": 11.249130249023438, + "ce_orig": 0.5311962962150574, + "epoch": 0.0790854842188511, + "kl_loss": 0.2099292278289795, + "loss_ib": 0.0032242052257061005, + "step": 275 + }, + { + "ce_ib": 17.736249923706055, + "ce_orig": 0.9784615635871887, + "epoch": 0.0790854842188511, + "kl_loss": 0.27468031644821167, + "loss_ib": 0.004520427901297808, + "step": 275 + }, + { + "ce_ib": 14.927811622619629, + "ce_orig": 0.7807605862617493, + "epoch": 0.0790854842188511, + "kl_loss": 0.35637491941452026, + "loss_ib": 0.005056530237197876, + "step": 275 + }, + { + "ce_ib": 12.488973617553711, + "ce_orig": 0.8185478448867798, + "epoch": 0.0790854842188511, + "kl_loss": 0.31116726994514465, + "loss_ib": 0.004360570106655359, + "step": 275 + }, + { + "ce_ib": 15.501134872436523, + "ce_orig": 1.238783597946167, + "epoch": 0.07937306779782874, + "kl_loss": 0.20382773876190186, + "loss_ib": 0.0035883907694369555, + "step": 276 + }, + { + "ce_ib": 17.22933578491211, + "ce_orig": 1.2730385065078735, + "epoch": 0.07937306779782874, + "kl_loss": 0.47506648302078247, + "loss_ib": 0.006473598536103964, + "step": 276 + }, + { + "ce_ib": 13.309030532836914, + "ce_orig": 0.5584474802017212, + "epoch": 0.07937306779782874, + "kl_loss": 0.23811832070350647, + "loss_ib": 0.003712086006999016, + "step": 276 + }, + { + "ce_ib": 12.37324333190918, + "ce_orig": 0.8598084449768066, + "epoch": 0.07937306779782874, + "kl_loss": 0.20274879038333893, + "loss_ib": 0.0032648120541125536, + "step": 276 + }, + { + "ce_ib": 18.88533592224121, + "ce_orig": 0.7922468781471252, + "epoch": 0.07966065137680639, + "kl_loss": 0.2773568034172058, + "loss_ib": 0.004662101622670889, + "step": 277 + }, + { + "ce_ib": 16.412996292114258, + "ce_orig": 0.7500933408737183, + "epoch": 0.07966065137680639, + "kl_loss": 0.27461355924606323, + "loss_ib": 0.004387435037642717, + "step": 277 + }, + { + "ce_ib": 15.230081558227539, + "ce_orig": 1.1697120666503906, + "epoch": 0.07966065137680639, + "kl_loss": 0.2722022533416748, + "loss_ib": 0.00424503069370985, + "step": 277 + }, + { + "ce_ib": 15.583248138427734, + "ce_orig": 0.8734590411186218, + "epoch": 0.07966065137680639, + "kl_loss": 0.2992432117462158, + "loss_ib": 0.004550756886601448, + "step": 277 + }, + { + "ce_ib": 15.06824779510498, + "ce_orig": 1.1103448867797852, + "epoch": 0.07994823495578403, + "kl_loss": 0.2773784101009369, + "loss_ib": 0.0042806086130440235, + "step": 278 + }, + { + "ce_ib": 19.022869110107422, + "ce_orig": 1.0327725410461426, + "epoch": 0.07994823495578403, + "kl_loss": 0.33331602811813354, + "loss_ib": 0.005235447082668543, + "step": 278 + }, + { + "ce_ib": 15.828374862670898, + "ce_orig": 0.5875866413116455, + "epoch": 0.07994823495578403, + "kl_loss": 0.1845521628856659, + "loss_ib": 0.0034283590503036976, + "step": 278 + }, + { + "ce_ib": 10.436365127563477, + "ce_orig": 0.6552335023880005, + "epoch": 0.07994823495578403, + "kl_loss": 0.18279395997524261, + "loss_ib": 0.0028715759981423616, + "step": 278 + }, + { + "ce_ib": 12.258537292480469, + "ce_orig": 0.6182965636253357, + "epoch": 0.08023581853476167, + "kl_loss": 0.2173025906085968, + "loss_ib": 0.00339887966401875, + "step": 279 + }, + { + "ce_ib": 16.939525604248047, + "ce_orig": 1.2229260206222534, + "epoch": 0.08023581853476167, + "kl_loss": 0.249847412109375, + "loss_ib": 0.004192426800727844, + "step": 279 + }, + { + "ce_ib": 16.706846237182617, + "ce_orig": 1.1211984157562256, + "epoch": 0.08023581853476167, + "kl_loss": 0.2522197961807251, + "loss_ib": 0.004192882217466831, + "step": 279 + }, + { + "ce_ib": 14.674199104309082, + "ce_orig": 0.790857195854187, + "epoch": 0.08023581853476167, + "kl_loss": 0.20820698142051697, + "loss_ib": 0.0035494896583259106, + "step": 279 + }, + { + "epoch": 0.0805234021137393, + "grad_norm": 0.07456893473863602, + "learning_rate": 4.3630573248407646e-05, + "loss": 0.8909, + "step": 280 + }, + { + "ce_ib": 17.366165161132812, + "ce_orig": 0.7224763035774231, + "epoch": 0.0805234021137393, + "kl_loss": 0.2818063497543335, + "loss_ib": 0.004554680082947016, + "step": 280 + }, + { + "ce_ib": 15.292021751403809, + "ce_orig": 0.5337414145469666, + "epoch": 0.0805234021137393, + "kl_loss": 0.24038422107696533, + "loss_ib": 0.003933044150471687, + "step": 280 + }, + { + "ce_ib": 13.491898536682129, + "ce_orig": 0.5026684999465942, + "epoch": 0.0805234021137393, + "kl_loss": 0.19754835963249207, + "loss_ib": 0.0033246735110878944, + "step": 280 + }, + { + "ce_ib": 18.61618995666504, + "ce_orig": 1.3851393461227417, + "epoch": 0.0805234021137393, + "kl_loss": 0.3309285044670105, + "loss_ib": 0.005170903634279966, + "step": 280 + }, + { + "ce_ib": 12.345269203186035, + "ce_orig": 0.6989408135414124, + "epoch": 0.08081098569271694, + "kl_loss": 0.22234660387039185, + "loss_ib": 0.0034579928033053875, + "step": 281 + }, + { + "ce_ib": 11.875962257385254, + "ce_orig": 0.6694311499595642, + "epoch": 0.08081098569271694, + "kl_loss": 0.19901394844055176, + "loss_ib": 0.003177735721692443, + "step": 281 + }, + { + "ce_ib": 17.15048599243164, + "ce_orig": 0.5024470090866089, + "epoch": 0.08081098569271694, + "kl_loss": 0.3105819821357727, + "loss_ib": 0.004820868372917175, + "step": 281 + }, + { + "ce_ib": 18.85089111328125, + "ce_orig": 1.5827473402023315, + "epoch": 0.08081098569271694, + "kl_loss": 0.2236773669719696, + "loss_ib": 0.004121862351894379, + "step": 281 + }, + { + "ce_ib": 14.070013999938965, + "ce_orig": 0.9050841331481934, + "epoch": 0.08109856927169459, + "kl_loss": 0.26318368315696716, + "loss_ib": 0.00403883820399642, + "step": 282 + }, + { + "ce_ib": 15.759684562683105, + "ce_orig": 0.9596781134605408, + "epoch": 0.08109856927169459, + "kl_loss": 0.3149911165237427, + "loss_ib": 0.0047258795239031315, + "step": 282 + }, + { + "ce_ib": 18.05628776550293, + "ce_orig": 0.9637153148651123, + "epoch": 0.08109856927169459, + "kl_loss": 0.27664974331855774, + "loss_ib": 0.004572126083076, + "step": 282 + }, + { + "ce_ib": 18.621143341064453, + "ce_orig": 1.693290114402771, + "epoch": 0.08109856927169459, + "kl_loss": 0.2584025263786316, + "loss_ib": 0.004446139093488455, + "step": 282 + }, + { + "ce_ib": 11.559967041015625, + "ce_orig": 0.8692753911018372, + "epoch": 0.08138615285067223, + "kl_loss": 0.26356419920921326, + "loss_ib": 0.0037916384171694517, + "step": 283 + }, + { + "ce_ib": 16.148008346557617, + "ce_orig": 1.3362113237380981, + "epoch": 0.08138615285067223, + "kl_loss": 0.316663920879364, + "loss_ib": 0.004781439900398254, + "step": 283 + }, + { + "ce_ib": 21.178966522216797, + "ce_orig": 1.0704444646835327, + "epoch": 0.08138615285067223, + "kl_loss": 0.310921311378479, + "loss_ib": 0.005227109882980585, + "step": 283 + }, + { + "ce_ib": 14.54597282409668, + "ce_orig": 0.6114582419395447, + "epoch": 0.08138615285067223, + "kl_loss": 0.2650757431983948, + "loss_ib": 0.004105354659259319, + "step": 283 + }, + { + "ce_ib": 12.013655662536621, + "ce_orig": 0.8571666479110718, + "epoch": 0.08167373642964987, + "kl_loss": 0.2690030634403229, + "loss_ib": 0.003891396103426814, + "step": 284 + }, + { + "ce_ib": 16.22176170349121, + "ce_orig": 0.6218030452728271, + "epoch": 0.08167373642964987, + "kl_loss": 0.291636198759079, + "loss_ib": 0.004538537934422493, + "step": 284 + }, + { + "ce_ib": 16.29920768737793, + "ce_orig": 1.2184849977493286, + "epoch": 0.08167373642964987, + "kl_loss": 0.2997811734676361, + "loss_ib": 0.004627732560038567, + "step": 284 + }, + { + "ce_ib": 12.09832763671875, + "ce_orig": 0.6244350075721741, + "epoch": 0.08167373642964987, + "kl_loss": 0.2652917504310608, + "loss_ib": 0.0038627502508461475, + "step": 284 + }, + { + "epoch": 0.0819613200086275, + "grad_norm": 0.07042936980724335, + "learning_rate": 4.442675159235669e-05, + "loss": 0.9056, + "step": 285 + }, + { + "ce_ib": 13.477680206298828, + "ce_orig": 0.6597225666046143, + "epoch": 0.0819613200086275, + "kl_loss": 0.26786327362060547, + "loss_ib": 0.0040264008566737175, + "step": 285 + }, + { + "ce_ib": 16.648954391479492, + "ce_orig": 0.7288675904273987, + "epoch": 0.0819613200086275, + "kl_loss": 0.234561488032341, + "loss_ib": 0.004010510165244341, + "step": 285 + }, + { + "ce_ib": 16.058998107910156, + "ce_orig": 0.9044990539550781, + "epoch": 0.0819613200086275, + "kl_loss": 0.438146710395813, + "loss_ib": 0.0059873671270906925, + "step": 285 + }, + { + "ce_ib": 14.675004005432129, + "ce_orig": 0.6858831644058228, + "epoch": 0.0819613200086275, + "kl_loss": 0.41518154740333557, + "loss_ib": 0.005619315896183252, + "step": 285 + }, + { + "ce_ib": 19.281421661376953, + "ce_orig": 1.3555938005447388, + "epoch": 0.08224890358760514, + "kl_loss": 0.27201730012893677, + "loss_ib": 0.004648315254598856, + "step": 286 + }, + { + "ce_ib": 14.502872467041016, + "ce_orig": 0.7407470345497131, + "epoch": 0.08224890358760514, + "kl_loss": 0.16434180736541748, + "loss_ib": 0.0030937050469219685, + "step": 286 + }, + { + "ce_ib": 16.41741180419922, + "ce_orig": 1.1262027025222778, + "epoch": 0.08224890358760514, + "kl_loss": 0.2105521857738495, + "loss_ib": 0.0037472627591341734, + "step": 286 + }, + { + "ce_ib": 19.66245460510254, + "ce_orig": 1.3670978546142578, + "epoch": 0.08224890358760514, + "kl_loss": 0.2873149514198303, + "loss_ib": 0.004839394707232714, + "step": 286 + }, + { + "ce_ib": 13.970117568969727, + "ce_orig": 0.588868260383606, + "epoch": 0.0825364871665828, + "kl_loss": 0.27275267243385315, + "loss_ib": 0.004124538041651249, + "step": 287 + }, + { + "ce_ib": 17.439908981323242, + "ce_orig": 1.1837517023086548, + "epoch": 0.0825364871665828, + "kl_loss": 0.1923927515745163, + "loss_ib": 0.0036679182667285204, + "step": 287 + }, + { + "ce_ib": 10.239778518676758, + "ce_orig": 0.5817263722419739, + "epoch": 0.0825364871665828, + "kl_loss": 0.2323988527059555, + "loss_ib": 0.003347966354340315, + "step": 287 + }, + { + "ce_ib": 13.104039192199707, + "ce_orig": 0.8184726238250732, + "epoch": 0.0825364871665828, + "kl_loss": 0.272987425327301, + "loss_ib": 0.0040402780286967754, + "step": 287 + }, + { + "ce_ib": 11.393816947937012, + "ce_orig": 0.882415235042572, + "epoch": 0.08282407074556043, + "kl_loss": 0.17284469306468964, + "loss_ib": 0.0028678285889327526, + "step": 288 + }, + { + "ce_ib": 14.922150611877441, + "ce_orig": 0.6627479791641235, + "epoch": 0.08282407074556043, + "kl_loss": 0.20301824808120728, + "loss_ib": 0.0035223974846303463, + "step": 288 + }, + { + "ce_ib": 17.70345687866211, + "ce_orig": 0.603179931640625, + "epoch": 0.08282407074556043, + "kl_loss": 0.32347768545150757, + "loss_ib": 0.005005122162401676, + "step": 288 + }, + { + "ce_ib": 11.391678810119629, + "ce_orig": 0.6061888337135315, + "epoch": 0.08282407074556043, + "kl_loss": 0.26528626680374146, + "loss_ib": 0.003792030503973365, + "step": 288 + }, + { + "ce_ib": 13.464797019958496, + "ce_orig": 0.5479658246040344, + "epoch": 0.08311165432453807, + "kl_loss": 0.21538397669792175, + "loss_ib": 0.003500319318845868, + "step": 289 + }, + { + "ce_ib": 19.783390045166016, + "ce_orig": 1.4441841840744019, + "epoch": 0.08311165432453807, + "kl_loss": 0.2706390619277954, + "loss_ib": 0.004684729501605034, + "step": 289 + }, + { + "ce_ib": 16.319841384887695, + "ce_orig": 1.0974111557006836, + "epoch": 0.08311165432453807, + "kl_loss": 0.3325914144515991, + "loss_ib": 0.004957898054271936, + "step": 289 + }, + { + "ce_ib": 15.902824401855469, + "ce_orig": 0.6798999905586243, + "epoch": 0.08311165432453807, + "kl_loss": 0.2273552566766739, + "loss_ib": 0.003863835008814931, + "step": 289 + }, + { + "epoch": 0.08339923790351571, + "grad_norm": 0.07085248827934265, + "learning_rate": 4.522292993630574e-05, + "loss": 0.8988, + "step": 290 + }, + { + "ce_ib": 12.853606224060059, + "ce_orig": 0.5977413654327393, + "epoch": 0.08339923790351571, + "kl_loss": 0.258215069770813, + "loss_ib": 0.003867511171847582, + "step": 290 + }, + { + "ce_ib": 13.45907211303711, + "ce_orig": 0.7957695722579956, + "epoch": 0.08339923790351571, + "kl_loss": 0.2829551100730896, + "loss_ib": 0.00417545810341835, + "step": 290 + }, + { + "ce_ib": 16.264999389648438, + "ce_orig": 0.921114444732666, + "epoch": 0.08339923790351571, + "kl_loss": 0.2568701505661011, + "loss_ib": 0.004195201210677624, + "step": 290 + }, + { + "ce_ib": 11.070430755615234, + "ce_orig": 0.828477680683136, + "epoch": 0.08339923790351571, + "kl_loss": 0.18585243821144104, + "loss_ib": 0.0029655674006789923, + "step": 290 + }, + { + "ce_ib": 15.938783645629883, + "ce_orig": 0.8985275626182556, + "epoch": 0.08368682148249335, + "kl_loss": 0.322698712348938, + "loss_ib": 0.004820865113288164, + "step": 291 + }, + { + "ce_ib": 12.973055839538574, + "ce_orig": 0.9591237902641296, + "epoch": 0.08368682148249335, + "kl_loss": 0.1751486361026764, + "loss_ib": 0.0030487917829304934, + "step": 291 + }, + { + "ce_ib": 15.191100120544434, + "ce_orig": 0.9854549765586853, + "epoch": 0.08368682148249335, + "kl_loss": 0.2032536417245865, + "loss_ib": 0.0035516463685780764, + "step": 291 + }, + { + "ce_ib": 15.310328483581543, + "ce_orig": 0.8211847543716431, + "epoch": 0.08368682148249335, + "kl_loss": 0.2667645514011383, + "loss_ib": 0.004198677837848663, + "step": 291 + }, + { + "ce_ib": 16.732818603515625, + "ce_orig": 0.9525948166847229, + "epoch": 0.083974405061471, + "kl_loss": 0.24904996156692505, + "loss_ib": 0.004163781180977821, + "step": 292 + }, + { + "ce_ib": 14.454859733581543, + "ce_orig": 0.8364003896713257, + "epoch": 0.083974405061471, + "kl_loss": 0.300500750541687, + "loss_ib": 0.0044504934921860695, + "step": 292 + }, + { + "ce_ib": 16.7276611328125, + "ce_orig": 1.103460431098938, + "epoch": 0.083974405061471, + "kl_loss": 0.25054287910461426, + "loss_ib": 0.004178194794803858, + "step": 292 + }, + { + "ce_ib": 18.47587776184082, + "ce_orig": 1.1350774765014648, + "epoch": 0.083974405061471, + "kl_loss": 0.2811344861984253, + "loss_ib": 0.00465893279761076, + "step": 292 + }, + { + "ce_ib": 15.593929290771484, + "ce_orig": 0.7370646595954895, + "epoch": 0.08426198864044863, + "kl_loss": 0.21932320296764374, + "loss_ib": 0.003752624848857522, + "step": 293 + }, + { + "ce_ib": 15.527499198913574, + "ce_orig": 1.2750834226608276, + "epoch": 0.08426198864044863, + "kl_loss": 0.23349913954734802, + "loss_ib": 0.0038877411279827356, + "step": 293 + }, + { + "ce_ib": 12.307111740112305, + "ce_orig": 0.68174147605896, + "epoch": 0.08426198864044863, + "kl_loss": 0.29035478830337524, + "loss_ib": 0.004134258721023798, + "step": 293 + }, + { + "ce_ib": 11.335613250732422, + "ce_orig": 0.65586918592453, + "epoch": 0.08426198864044863, + "kl_loss": 0.21021895110607147, + "loss_ib": 0.0032357508316636086, + "step": 293 + }, + { + "ce_ib": 12.788888931274414, + "ce_orig": 1.1009180545806885, + "epoch": 0.08454957221942627, + "kl_loss": 0.2401624619960785, + "loss_ib": 0.00368051347322762, + "step": 294 + }, + { + "ce_ib": 16.075815200805664, + "ce_orig": 0.7001639604568481, + "epoch": 0.08454957221942627, + "kl_loss": 0.32635319232940674, + "loss_ib": 0.00487111322581768, + "step": 294 + }, + { + "ce_ib": 12.539616584777832, + "ce_orig": 0.7154040932655334, + "epoch": 0.08454957221942627, + "kl_loss": 0.1870480477809906, + "loss_ib": 0.0031244419515132904, + "step": 294 + }, + { + "ce_ib": 12.290575981140137, + "ce_orig": 0.8135526180267334, + "epoch": 0.08454957221942627, + "kl_loss": 0.2699443995952606, + "loss_ib": 0.003928501624614, + "step": 294 + }, + { + "epoch": 0.08483715579840391, + "grad_norm": 0.07897292077541351, + "learning_rate": 4.601910828025478e-05, + "loss": 0.8706, + "step": 295 + }, + { + "ce_ib": 18.31114959716797, + "ce_orig": 0.6431681513786316, + "epoch": 0.08483715579840391, + "kl_loss": 0.27935224771499634, + "loss_ib": 0.004624637309461832, + "step": 295 + }, + { + "ce_ib": 13.731470108032227, + "ce_orig": 0.8691681623458862, + "epoch": 0.08483715579840391, + "kl_loss": 0.19459585845470428, + "loss_ib": 0.003319105366244912, + "step": 295 + }, + { + "ce_ib": 11.220470428466797, + "ce_orig": 0.7137093544006348, + "epoch": 0.08483715579840391, + "kl_loss": 0.1932218372821808, + "loss_ib": 0.003054265398532152, + "step": 295 + }, + { + "ce_ib": 13.056318283081055, + "ce_orig": 0.5071139931678772, + "epoch": 0.08483715579840391, + "kl_loss": 0.21704959869384766, + "loss_ib": 0.0034761279821395874, + "step": 295 + }, + { + "ce_ib": 11.606334686279297, + "ce_orig": 0.7217742800712585, + "epoch": 0.08512473937738155, + "kl_loss": 0.1716514676809311, + "loss_ib": 0.002877148101106286, + "step": 296 + }, + { + "ce_ib": 17.7640438079834, + "ce_orig": 1.607155203819275, + "epoch": 0.08512473937738155, + "kl_loss": 0.29561007022857666, + "loss_ib": 0.004732504952698946, + "step": 296 + }, + { + "ce_ib": 17.47099494934082, + "ce_orig": 0.9363014698028564, + "epoch": 0.08512473937738155, + "kl_loss": 0.27039089798927307, + "loss_ib": 0.004451008513569832, + "step": 296 + }, + { + "ce_ib": 17.832012176513672, + "ce_orig": 1.6501764059066772, + "epoch": 0.08512473937738155, + "kl_loss": 0.5178996324539185, + "loss_ib": 0.006962197367101908, + "step": 296 + }, + { + "ce_ib": 13.307634353637695, + "ce_orig": 0.8553745746612549, + "epoch": 0.08541232295635919, + "kl_loss": 0.28302001953125, + "loss_ib": 0.004160963464528322, + "step": 297 + }, + { + "ce_ib": 13.746207237243652, + "ce_orig": 0.6645457744598389, + "epoch": 0.08541232295635919, + "kl_loss": 0.2449042946100235, + "loss_ib": 0.0038236635737121105, + "step": 297 + }, + { + "ce_ib": 11.31932544708252, + "ce_orig": 0.4558902978897095, + "epoch": 0.08541232295635919, + "kl_loss": 0.1459667682647705, + "loss_ib": 0.002591600175946951, + "step": 297 + }, + { + "ce_ib": 10.261171340942383, + "ce_orig": 0.7316778302192688, + "epoch": 0.08541232295635919, + "kl_loss": 0.18448230624198914, + "loss_ib": 0.002870940137654543, + "step": 297 + }, + { + "ce_ib": 15.214677810668945, + "ce_orig": 0.7406959533691406, + "epoch": 0.08569990653533684, + "kl_loss": 0.3475422263145447, + "loss_ib": 0.004996889736503363, + "step": 298 + }, + { + "ce_ib": 12.732254981994629, + "ce_orig": 0.9583520889282227, + "epoch": 0.08569990653533684, + "kl_loss": 0.2888698875904083, + "loss_ib": 0.004161924123764038, + "step": 298 + }, + { + "ce_ib": 13.56235408782959, + "ce_orig": 0.7003698348999023, + "epoch": 0.08569990653533684, + "kl_loss": 0.22925713658332825, + "loss_ib": 0.00364880682900548, + "step": 298 + }, + { + "ce_ib": 17.023927688598633, + "ce_orig": 0.7627611756324768, + "epoch": 0.08569990653533684, + "kl_loss": 0.3114122152328491, + "loss_ib": 0.004816514905542135, + "step": 298 + }, + { + "ce_ib": 12.935884475708008, + "ce_orig": 0.5755088329315186, + "epoch": 0.08598749011431447, + "kl_loss": 0.23949839174747467, + "loss_ib": 0.003688572207465768, + "step": 299 + }, + { + "ce_ib": 15.182515144348145, + "ce_orig": 0.7093390226364136, + "epoch": 0.08598749011431447, + "kl_loss": 0.254234254360199, + "loss_ib": 0.0040605938993394375, + "step": 299 + }, + { + "ce_ib": 10.937736511230469, + "ce_orig": 0.7415173649787903, + "epoch": 0.08598749011431447, + "kl_loss": 0.21827897429466248, + "loss_ib": 0.003276563249528408, + "step": 299 + }, + { + "ce_ib": 14.389042854309082, + "ce_orig": 0.6369369029998779, + "epoch": 0.08598749011431447, + "kl_loss": 0.25055113434791565, + "loss_ib": 0.003944415133446455, + "step": 299 + }, + { + "epoch": 0.08627507369329211, + "grad_norm": 0.08575107157230377, + "learning_rate": 4.681528662420383e-05, + "loss": 0.8178, + "step": 300 + }, + { + "ce_ib": 15.538275718688965, + "ce_orig": 0.7116795778274536, + "epoch": 0.08627507369329211, + "kl_loss": 0.1944524198770523, + "loss_ib": 0.0034983514342457056, + "step": 300 + }, + { + "ce_ib": 14.46932315826416, + "ce_orig": 1.0576528310775757, + "epoch": 0.08627507369329211, + "kl_loss": 0.1829913854598999, + "loss_ib": 0.0032768461387604475, + "step": 300 + }, + { + "ce_ib": 12.83568000793457, + "ce_orig": 1.0147863626480103, + "epoch": 0.08627507369329211, + "kl_loss": 0.25184863805770874, + "loss_ib": 0.0038020543288439512, + "step": 300 + }, + { + "ce_ib": 11.396459579467773, + "ce_orig": 0.6744865775108337, + "epoch": 0.08627507369329211, + "kl_loss": 0.21117661893367767, + "loss_ib": 0.003251412184908986, + "step": 300 + }, + { + "ce_ib": 17.51881980895996, + "ce_orig": 1.0844104290008545, + "epoch": 0.08656265727226975, + "kl_loss": 0.24072128534317017, + "loss_ib": 0.004159094765782356, + "step": 301 + }, + { + "ce_ib": 16.883913040161133, + "ce_orig": 0.7893635034561157, + "epoch": 0.08656265727226975, + "kl_loss": 0.21162733435630798, + "loss_ib": 0.0038046645931899548, + "step": 301 + }, + { + "ce_ib": 20.599260330200195, + "ce_orig": 1.6651158332824707, + "epoch": 0.08656265727226975, + "kl_loss": 0.26889339089393616, + "loss_ib": 0.004748859908431768, + "step": 301 + }, + { + "ce_ib": 13.75562858581543, + "ce_orig": 0.9824258685112, + "epoch": 0.08656265727226975, + "kl_loss": 0.2234395146369934, + "loss_ib": 0.0036099578719586134, + "step": 301 + }, + { + "ce_ib": 13.83020305633545, + "ce_orig": 0.5410613417625427, + "epoch": 0.08685024085124739, + "kl_loss": 0.2865542471408844, + "loss_ib": 0.0042485627345740795, + "step": 302 + }, + { + "ce_ib": 17.551300048828125, + "ce_orig": 0.7735820412635803, + "epoch": 0.08685024085124739, + "kl_loss": 0.25298500061035156, + "loss_ib": 0.004284979775547981, + "step": 302 + }, + { + "ce_ib": 14.208580017089844, + "ce_orig": 0.8505828380584717, + "epoch": 0.08685024085124739, + "kl_loss": 0.5346580147743225, + "loss_ib": 0.006767437793314457, + "step": 302 + }, + { + "ce_ib": 19.750244140625, + "ce_orig": 1.176154613494873, + "epoch": 0.08685024085124739, + "kl_loss": 0.2657851576805115, + "loss_ib": 0.004632875788956881, + "step": 302 + }, + { + "ce_ib": 14.662118911743164, + "ce_orig": 1.080672264099121, + "epoch": 0.08713782443022504, + "kl_loss": 0.18392251431941986, + "loss_ib": 0.003305436810478568, + "step": 303 + }, + { + "ce_ib": 11.911545753479004, + "ce_orig": 0.9907505512237549, + "epoch": 0.08713782443022504, + "kl_loss": 0.18622635304927826, + "loss_ib": 0.0030534181278198957, + "step": 303 + }, + { + "ce_ib": 15.689916610717773, + "ce_orig": 1.1870077848434448, + "epoch": 0.08713782443022504, + "kl_loss": 0.39201515913009644, + "loss_ib": 0.005489143077284098, + "step": 303 + }, + { + "ce_ib": 12.059552192687988, + "ce_orig": 0.5444112420082092, + "epoch": 0.08713782443022504, + "kl_loss": 0.23845581710338593, + "loss_ib": 0.0035905134864151478, + "step": 303 + }, + { + "ce_ib": 14.907177925109863, + "ce_orig": 1.255871295928955, + "epoch": 0.08742540800920268, + "kl_loss": 0.22089144587516785, + "loss_ib": 0.003699632128700614, + "step": 304 + }, + { + "ce_ib": 15.632399559020996, + "ce_orig": 1.2641204595565796, + "epoch": 0.08742540800920268, + "kl_loss": 0.18549595773220062, + "loss_ib": 0.0034181992523372173, + "step": 304 + }, + { + "ce_ib": 11.078656196594238, + "ce_orig": 0.4686051607131958, + "epoch": 0.08742540800920268, + "kl_loss": 0.2145492285490036, + "loss_ib": 0.0032533579505980015, + "step": 304 + }, + { + "ce_ib": 16.48569107055664, + "ce_orig": 1.2514501810073853, + "epoch": 0.08742540800920268, + "kl_loss": 0.15880295634269714, + "loss_ib": 0.0032365985680371523, + "step": 304 + }, + { + "epoch": 0.08771299158818031, + "grad_norm": 0.08359609544277191, + "learning_rate": 4.761146496815287e-05, + "loss": 0.8481, + "step": 305 + }, + { + "ce_ib": 20.280155181884766, + "ce_orig": 1.9041812419891357, + "epoch": 0.08771299158818031, + "kl_loss": 0.2823646068572998, + "loss_ib": 0.004851661156862974, + "step": 305 + }, + { + "ce_ib": 8.3218994140625, + "ce_orig": 0.7794256806373596, + "epoch": 0.08771299158818031, + "kl_loss": 0.15604904294013977, + "loss_ib": 0.0023926803842186928, + "step": 305 + }, + { + "ce_ib": 11.23653793334961, + "ce_orig": 0.9274653792381287, + "epoch": 0.08771299158818031, + "kl_loss": 0.22056221961975098, + "loss_ib": 0.0033292758744210005, + "step": 305 + }, + { + "ce_ib": 15.99108600616455, + "ce_orig": 0.8273392915725708, + "epoch": 0.08771299158818031, + "kl_loss": 0.287705659866333, + "loss_ib": 0.004476164933294058, + "step": 305 + }, + { + "ce_ib": 12.447872161865234, + "ce_orig": 1.0994980335235596, + "epoch": 0.08800057516715795, + "kl_loss": 0.1934283971786499, + "loss_ib": 0.0031790712382644415, + "step": 306 + }, + { + "ce_ib": 13.02301025390625, + "ce_orig": 0.9902395009994507, + "epoch": 0.08800057516715795, + "kl_loss": 0.18311861157417297, + "loss_ib": 0.003133486956357956, + "step": 306 + }, + { + "ce_ib": 15.912498474121094, + "ce_orig": 0.8098648190498352, + "epoch": 0.08800057516715795, + "kl_loss": 0.302369087934494, + "loss_ib": 0.004614940844476223, + "step": 306 + }, + { + "ce_ib": 11.363813400268555, + "ce_orig": 0.7102072238922119, + "epoch": 0.08800057516715795, + "kl_loss": 0.21124617755413055, + "loss_ib": 0.0032488428987562656, + "step": 306 + }, + { + "ce_ib": 19.32331085205078, + "ce_orig": 1.889481544494629, + "epoch": 0.08828815874613559, + "kl_loss": 0.2169814109802246, + "loss_ib": 0.004102144856005907, + "step": 307 + }, + { + "ce_ib": 12.5094633102417, + "ce_orig": 0.49748650193214417, + "epoch": 0.08828815874613559, + "kl_loss": 0.19575588405132294, + "loss_ib": 0.0032085052225738764, + "step": 307 + }, + { + "ce_ib": 11.513327598571777, + "ce_orig": 0.46595466136932373, + "epoch": 0.08828815874613559, + "kl_loss": 0.22934874892234802, + "loss_ib": 0.0034448199439793825, + "step": 307 + }, + { + "ce_ib": 15.249246597290039, + "ce_orig": 0.3422534167766571, + "epoch": 0.08828815874613559, + "kl_loss": 0.20166319608688354, + "loss_ib": 0.0035415564198046923, + "step": 307 + }, + { + "ce_ib": 17.809545516967773, + "ce_orig": 1.352950096130371, + "epoch": 0.08857574232511324, + "kl_loss": 0.26321953535079956, + "loss_ib": 0.004413149785250425, + "step": 308 + }, + { + "ce_ib": 17.206872940063477, + "ce_orig": 1.341861605644226, + "epoch": 0.08857574232511324, + "kl_loss": 0.19497671723365784, + "loss_ib": 0.003670454490929842, + "step": 308 + }, + { + "ce_ib": 15.39387321472168, + "ce_orig": 0.6654926538467407, + "epoch": 0.08857574232511324, + "kl_loss": 0.25926852226257324, + "loss_ib": 0.004132072441279888, + "step": 308 + }, + { + "ce_ib": 14.418597221374512, + "ce_orig": 0.6665434837341309, + "epoch": 0.08857574232511324, + "kl_loss": 0.23031118512153625, + "loss_ib": 0.0037449717056006193, + "step": 308 + }, + { + "ce_ib": 14.938549995422363, + "ce_orig": 0.7124409079551697, + "epoch": 0.08886332590409088, + "kl_loss": 0.28744351863861084, + "loss_ib": 0.004368290305137634, + "step": 309 + }, + { + "ce_ib": 15.685905456542969, + "ce_orig": 1.2443615198135376, + "epoch": 0.08886332590409088, + "kl_loss": 0.29595404863357544, + "loss_ib": 0.004528130870312452, + "step": 309 + }, + { + "ce_ib": 15.15534496307373, + "ce_orig": 1.0202579498291016, + "epoch": 0.08886332590409088, + "kl_loss": 0.36547189950942993, + "loss_ib": 0.005170253571122885, + "step": 309 + }, + { + "ce_ib": 16.149023056030273, + "ce_orig": 1.1084511280059814, + "epoch": 0.08886332590409088, + "kl_loss": 0.20774857699871063, + "loss_ib": 0.0036923878360539675, + "step": 309 + }, + { + "epoch": 0.08915090948306852, + "grad_norm": 0.08066050708293915, + "learning_rate": 4.840764331210191e-05, + "loss": 0.9081, + "step": 310 + }, + { + "ce_ib": 11.725279808044434, + "ce_orig": 0.6793712973594666, + "epoch": 0.08915090948306852, + "kl_loss": 0.21743886172771454, + "loss_ib": 0.003346916288137436, + "step": 310 + }, + { + "ce_ib": 9.597118377685547, + "ce_orig": 0.5881115794181824, + "epoch": 0.08915090948306852, + "kl_loss": 0.20812270045280457, + "loss_ib": 0.0030409388709813356, + "step": 310 + }, + { + "ce_ib": 17.555280685424805, + "ce_orig": 1.6664210557937622, + "epoch": 0.08915090948306852, + "kl_loss": 0.3189018964767456, + "loss_ib": 0.004944546613842249, + "step": 310 + }, + { + "ce_ib": 10.35605239868164, + "ce_orig": 0.6129300594329834, + "epoch": 0.08915090948306852, + "kl_loss": 0.19374847412109375, + "loss_ib": 0.0029730896931141615, + "step": 310 + }, + { + "ce_ib": 10.367539405822754, + "ce_orig": 0.27700501680374146, + "epoch": 0.08943849306204615, + "kl_loss": 0.37522298097610474, + "loss_ib": 0.00478898361325264, + "step": 311 + }, + { + "ce_ib": 12.798125267028809, + "ce_orig": 0.35544443130493164, + "epoch": 0.08943849306204615, + "kl_loss": 0.21808908879756927, + "loss_ib": 0.003460703417658806, + "step": 311 + }, + { + "ce_ib": 18.546096801757812, + "ce_orig": 1.2737302780151367, + "epoch": 0.08943849306204615, + "kl_loss": 0.1624414622783661, + "loss_ib": 0.003479024162515998, + "step": 311 + }, + { + "ce_ib": 19.247314453125, + "ce_orig": 1.827391266822815, + "epoch": 0.08943849306204615, + "kl_loss": 0.24330879747867584, + "loss_ib": 0.0043578194454312325, + "step": 311 + }, + { + "ce_ib": 12.14682674407959, + "ce_orig": 0.4805859923362732, + "epoch": 0.08972607664102379, + "kl_loss": 0.2552676796913147, + "loss_ib": 0.003767359536141157, + "step": 312 + }, + { + "ce_ib": 13.024700164794922, + "ce_orig": 0.9543373584747314, + "epoch": 0.08972607664102379, + "kl_loss": 0.2680973708629608, + "loss_ib": 0.0039834436029195786, + "step": 312 + }, + { + "ce_ib": 15.072936058044434, + "ce_orig": 1.2514426708221436, + "epoch": 0.08972607664102379, + "kl_loss": 0.19017720222473145, + "loss_ib": 0.0034090655390173197, + "step": 312 + }, + { + "ce_ib": 11.06240177154541, + "ce_orig": 0.6679608225822449, + "epoch": 0.08972607664102379, + "kl_loss": 0.17655614018440247, + "loss_ib": 0.0028718013782054186, + "step": 312 + }, + { + "ce_ib": 11.684070587158203, + "ce_orig": 0.5835570096969604, + "epoch": 0.09001366022000144, + "kl_loss": 0.18113084137439728, + "loss_ib": 0.0029797153547406197, + "step": 313 + }, + { + "ce_ib": 15.398133277893066, + "ce_orig": 0.5319973230361938, + "epoch": 0.09001366022000144, + "kl_loss": 0.2180468738079071, + "loss_ib": 0.0037202818784862757, + "step": 313 + }, + { + "ce_ib": 18.00635528564453, + "ce_orig": 1.5159127712249756, + "epoch": 0.09001366022000144, + "kl_loss": 0.25306442379951477, + "loss_ib": 0.004331279546022415, + "step": 313 + }, + { + "ce_ib": 16.58292007446289, + "ce_orig": 1.6001865863800049, + "epoch": 0.09001366022000144, + "kl_loss": 0.2032628208398819, + "loss_ib": 0.0036909200716763735, + "step": 313 + }, + { + "ce_ib": 9.98106575012207, + "ce_orig": 0.5567405223846436, + "epoch": 0.09030124379897908, + "kl_loss": 0.18281474709510803, + "loss_ib": 0.002826254116371274, + "step": 314 + }, + { + "ce_ib": 14.181166648864746, + "ce_orig": 0.7332763075828552, + "epoch": 0.09030124379897908, + "kl_loss": 0.2009732574224472, + "loss_ib": 0.0034278493840247393, + "step": 314 + }, + { + "ce_ib": 14.160673141479492, + "ce_orig": 1.0669901371002197, + "epoch": 0.09030124379897908, + "kl_loss": 0.1853453665971756, + "loss_ib": 0.0032695208210498095, + "step": 314 + }, + { + "ce_ib": 13.65168285369873, + "ce_orig": 0.6072067618370056, + "epoch": 0.09030124379897908, + "kl_loss": 0.2821107506752014, + "loss_ib": 0.004186275415122509, + "step": 314 + }, + { + "epoch": 0.09058882737795672, + "grad_norm": 0.0828433707356453, + "learning_rate": 4.920382165605096e-05, + "loss": 0.9224, + "step": 315 + }, + { + "ce_ib": 17.264680862426758, + "ce_orig": 1.208104133605957, + "epoch": 0.09058882737795672, + "kl_loss": 0.2404087483882904, + "loss_ib": 0.0041305553168058395, + "step": 315 + }, + { + "ce_ib": 9.675837516784668, + "ce_orig": 0.6803741455078125, + "epoch": 0.09058882737795672, + "kl_loss": 0.17154952883720398, + "loss_ib": 0.002683078870177269, + "step": 315 + }, + { + "ce_ib": 7.8654890060424805, + "ce_orig": 0.27281931042671204, + "epoch": 0.09058882737795672, + "kl_loss": 0.3646223545074463, + "loss_ib": 0.004432772286236286, + "step": 315 + }, + { + "ce_ib": 10.074167251586914, + "ce_orig": 0.7812896370887756, + "epoch": 0.09058882737795672, + "kl_loss": 0.1766500473022461, + "loss_ib": 0.002773917280137539, + "step": 315 + }, + { + "ce_ib": 9.807964324951172, + "ce_orig": 0.7548955082893372, + "epoch": 0.09087641095693436, + "kl_loss": 0.15925069153308868, + "loss_ib": 0.0025733031798154116, + "step": 316 + }, + { + "ce_ib": 12.453634262084961, + "ce_orig": 0.3912903964519501, + "epoch": 0.09087641095693436, + "kl_loss": 0.28723639249801636, + "loss_ib": 0.004117727279663086, + "step": 316 + }, + { + "ce_ib": 15.324727058410645, + "ce_orig": 0.6814879179000854, + "epoch": 0.09087641095693436, + "kl_loss": 0.2591487467288971, + "loss_ib": 0.0041239601559937, + "step": 316 + }, + { + "ce_ib": 14.351997375488281, + "ce_orig": 1.1507986783981323, + "epoch": 0.09087641095693436, + "kl_loss": 0.1707887053489685, + "loss_ib": 0.003143086563795805, + "step": 316 + }, + { + "ce_ib": 11.927781105041504, + "ce_orig": 0.5891753435134888, + "epoch": 0.091163994535912, + "kl_loss": 0.2667624056339264, + "loss_ib": 0.0038604019209742546, + "step": 317 + }, + { + "ce_ib": 13.470232009887695, + "ce_orig": 0.6153560280799866, + "epoch": 0.091163994535912, + "kl_loss": 0.14252835512161255, + "loss_ib": 0.002772306790575385, + "step": 317 + }, + { + "ce_ib": 14.50222396850586, + "ce_orig": 0.888332724571228, + "epoch": 0.091163994535912, + "kl_loss": 0.16025802493095398, + "loss_ib": 0.003052802523598075, + "step": 317 + }, + { + "ce_ib": 16.769819259643555, + "ce_orig": 1.285980463027954, + "epoch": 0.091163994535912, + "kl_loss": 0.21203196048736572, + "loss_ib": 0.003797301556915045, + "step": 317 + }, + { + "ce_ib": 18.3016414642334, + "ce_orig": 0.6251075863838196, + "epoch": 0.09145157811488965, + "kl_loss": 0.22406277060508728, + "loss_ib": 0.00407079141587019, + "step": 318 + }, + { + "ce_ib": 13.008817672729492, + "ce_orig": 0.5530975461006165, + "epoch": 0.09145157811488965, + "kl_loss": 0.20128408074378967, + "loss_ib": 0.0033137225545942783, + "step": 318 + }, + { + "ce_ib": 16.728418350219727, + "ce_orig": 1.2118330001831055, + "epoch": 0.09145157811488965, + "kl_loss": 0.23463211953639984, + "loss_ib": 0.004019163083285093, + "step": 318 + }, + { + "ce_ib": 11.389937400817871, + "ce_orig": 0.665806770324707, + "epoch": 0.09145157811488965, + "kl_loss": 0.44625556468963623, + "loss_ib": 0.005601549055427313, + "step": 318 + }, + { + "ce_ib": 17.052478790283203, + "ce_orig": 1.3070660829544067, + "epoch": 0.09173916169386728, + "kl_loss": 0.22904498875141144, + "loss_ib": 0.003995697479695082, + "step": 319 + }, + { + "ce_ib": 16.69203758239746, + "ce_orig": 0.9425002336502075, + "epoch": 0.09173916169386728, + "kl_loss": 0.4092777371406555, + "loss_ib": 0.005761981010437012, + "step": 319 + }, + { + "ce_ib": 9.762811660766602, + "ce_orig": 0.8007186651229858, + "epoch": 0.09173916169386728, + "kl_loss": 0.19042058289051056, + "loss_ib": 0.0028804868925362825, + "step": 319 + }, + { + "ce_ib": 15.089471817016602, + "ce_orig": 0.9387741088867188, + "epoch": 0.09173916169386728, + "kl_loss": 0.1873193383216858, + "loss_ib": 0.0033821403048932552, + "step": 319 + }, + { + "epoch": 0.09202674527284492, + "grad_norm": 0.09044753015041351, + "learning_rate": 5e-05, + "loss": 0.8929, + "step": 320 + }, + { + "ce_ib": 8.850724220275879, + "ce_orig": 0.6556456089019775, + "epoch": 0.09202674527284492, + "kl_loss": 0.12834425270557404, + "loss_ib": 0.002168514998629689, + "step": 320 + }, + { + "ce_ib": 14.284525871276855, + "ce_orig": 1.2114744186401367, + "epoch": 0.09202674527284492, + "kl_loss": 0.2224428355693817, + "loss_ib": 0.0036528806667774916, + "step": 320 + }, + { + "ce_ib": 16.129127502441406, + "ce_orig": 0.861198902130127, + "epoch": 0.09202674527284492, + "kl_loss": 0.1843017041683197, + "loss_ib": 0.0034559296909719706, + "step": 320 + }, + { + "ce_ib": 13.054864883422852, + "ce_orig": 0.7228923439979553, + "epoch": 0.09202674527284492, + "kl_loss": 0.20991846919059753, + "loss_ib": 0.003404670860618353, + "step": 320 + }, + { + "ce_ib": 14.091462135314941, + "ce_orig": 0.6277745962142944, + "epoch": 0.09231432885182256, + "kl_loss": 0.29013434052467346, + "loss_ib": 0.004310489632189274, + "step": 321 + }, + { + "ce_ib": 11.505946159362793, + "ce_orig": 0.8680534958839417, + "epoch": 0.09231432885182256, + "kl_loss": 0.19910269975662231, + "loss_ib": 0.003141621593385935, + "step": 321 + }, + { + "ce_ib": 15.06042194366455, + "ce_orig": 1.2571470737457275, + "epoch": 0.09231432885182256, + "kl_loss": 0.18096956610679626, + "loss_ib": 0.0033157377038151026, + "step": 321 + }, + { + "ce_ib": 14.149365425109863, + "ce_orig": 0.4702112078666687, + "epoch": 0.09231432885182256, + "kl_loss": 0.24852751195430756, + "loss_ib": 0.0039002113044261932, + "step": 321 + }, + { + "ce_ib": 21.1373348236084, + "ce_orig": 1.5176701545715332, + "epoch": 0.0926019124308002, + "kl_loss": 0.2831588089466095, + "loss_ib": 0.004945321474224329, + "step": 322 + }, + { + "ce_ib": 14.581371307373047, + "ce_orig": 0.9066507816314697, + "epoch": 0.0926019124308002, + "kl_loss": 0.4094810485839844, + "loss_ib": 0.005552947521209717, + "step": 322 + }, + { + "ce_ib": 9.24908447265625, + "ce_orig": 0.6094862818717957, + "epoch": 0.0926019124308002, + "kl_loss": 0.16086669266223907, + "loss_ib": 0.002533575287088752, + "step": 322 + }, + { + "ce_ib": 12.565040588378906, + "ce_orig": 0.8015382289886475, + "epoch": 0.0926019124308002, + "kl_loss": 0.27496886253356934, + "loss_ib": 0.0040061925537884235, + "step": 322 + }, + { + "ce_ib": 11.96411418914795, + "ce_orig": 0.6789979934692383, + "epoch": 0.09288949600977785, + "kl_loss": 0.14112810790538788, + "loss_ib": 0.0026076924987137318, + "step": 323 + }, + { + "ce_ib": 14.88967227935791, + "ce_orig": 0.8319886326789856, + "epoch": 0.09288949600977785, + "kl_loss": 0.2537601888179779, + "loss_ib": 0.004026568960398436, + "step": 323 + }, + { + "ce_ib": 10.79393482208252, + "ce_orig": 0.5163490176200867, + "epoch": 0.09288949600977785, + "kl_loss": 0.1722070872783661, + "loss_ib": 0.002801464172080159, + "step": 323 + }, + { + "ce_ib": 10.035134315490723, + "ce_orig": 0.5581719875335693, + "epoch": 0.09288949600977785, + "kl_loss": 0.2455272376537323, + "loss_ib": 0.003458785591647029, + "step": 323 + }, + { + "ce_ib": 15.038890838623047, + "ce_orig": 1.1411057710647583, + "epoch": 0.09317707958875548, + "kl_loss": 0.2383771389722824, + "loss_ib": 0.0038876603357493877, + "step": 324 + }, + { + "ce_ib": 11.690932273864746, + "ce_orig": 0.6252244114875793, + "epoch": 0.09317707958875548, + "kl_loss": 0.1746075451374054, + "loss_ib": 0.002915168646723032, + "step": 324 + }, + { + "ce_ib": 14.972967147827148, + "ce_orig": 1.1634771823883057, + "epoch": 0.09317707958875548, + "kl_loss": 0.24864430725574493, + "loss_ib": 0.003983739297837019, + "step": 324 + }, + { + "ce_ib": 13.491129875183105, + "ce_orig": 0.9431242346763611, + "epoch": 0.09317707958875548, + "kl_loss": 0.19168300926685333, + "loss_ib": 0.0032659429125487804, + "step": 324 + }, + { + "epoch": 0.09346466316773312, + "grad_norm": 0.07285797595977783, + "learning_rate": 4.999996988459869e-05, + "loss": 0.9029, + "step": 325 + }, + { + "ce_ib": 10.874665260314941, + "ce_orig": 0.8838172554969788, + "epoch": 0.09346466316773312, + "kl_loss": 0.21639001369476318, + "loss_ib": 0.00325136655010283, + "step": 325 + }, + { + "ce_ib": 12.071526527404785, + "ce_orig": 0.715691089630127, + "epoch": 0.09346466316773312, + "kl_loss": 0.17473536729812622, + "loss_ib": 0.002954506315290928, + "step": 325 + }, + { + "ce_ib": 9.097990036010742, + "ce_orig": 0.6916231513023376, + "epoch": 0.09346466316773312, + "kl_loss": 0.14074796438217163, + "loss_ib": 0.0023172786459326744, + "step": 325 + }, + { + "ce_ib": 7.898350238800049, + "ce_orig": 0.7045942544937134, + "epoch": 0.09346466316773312, + "kl_loss": 0.19722947478294373, + "loss_ib": 0.0027621297631412745, + "step": 325 + }, + { + "ce_ib": 19.925533294677734, + "ce_orig": 1.8780890703201294, + "epoch": 0.09375224674671076, + "kl_loss": 0.29034847021102905, + "loss_ib": 0.004896038211882114, + "step": 326 + }, + { + "ce_ib": 11.936103820800781, + "ce_orig": 0.8948700428009033, + "epoch": 0.09375224674671076, + "kl_loss": 0.22047904133796692, + "loss_ib": 0.003398400731384754, + "step": 326 + }, + { + "ce_ib": 14.651416778564453, + "ce_orig": 1.5577762126922607, + "epoch": 0.09375224674671076, + "kl_loss": 0.2572871744632721, + "loss_ib": 0.004038013052195311, + "step": 326 + }, + { + "ce_ib": 14.2105073928833, + "ce_orig": 0.6887364983558655, + "epoch": 0.09375224674671076, + "kl_loss": 0.1484694480895996, + "loss_ib": 0.0029057450592517853, + "step": 326 + }, + { + "ce_ib": 9.847541809082031, + "ce_orig": 0.7175891995429993, + "epoch": 0.0940398303256884, + "kl_loss": 0.251788467168808, + "loss_ib": 0.003502638777717948, + "step": 327 + }, + { + "ce_ib": 8.781983375549316, + "ce_orig": 0.6928913593292236, + "epoch": 0.0940398303256884, + "kl_loss": 0.1348564177751541, + "loss_ib": 0.0022267624735832214, + "step": 327 + }, + { + "ce_ib": 11.540362358093262, + "ce_orig": 0.7704603672027588, + "epoch": 0.0940398303256884, + "kl_loss": 0.16489502787590027, + "loss_ib": 0.002802986418828368, + "step": 327 + }, + { + "ce_ib": 13.85096263885498, + "ce_orig": 0.8789340853691101, + "epoch": 0.0940398303256884, + "kl_loss": 0.17469263076782227, + "loss_ib": 0.003132022451609373, + "step": 327 + }, + { + "ce_ib": 9.01992130279541, + "ce_orig": 0.39120611548423767, + "epoch": 0.09432741390466605, + "kl_loss": 0.17137807607650757, + "loss_ib": 0.0026157726533710957, + "step": 328 + }, + { + "ce_ib": 12.827068328857422, + "ce_orig": 0.6231464147567749, + "epoch": 0.09432741390466605, + "kl_loss": 0.24973925948143005, + "loss_ib": 0.003780099330469966, + "step": 328 + }, + { + "ce_ib": 13.935663223266602, + "ce_orig": 0.9439969062805176, + "epoch": 0.09432741390466605, + "kl_loss": 0.2583736777305603, + "loss_ib": 0.003977302927523851, + "step": 328 + }, + { + "ce_ib": 15.863046646118164, + "ce_orig": 1.0275061130523682, + "epoch": 0.09432741390466605, + "kl_loss": 0.22072093188762665, + "loss_ib": 0.00379351363517344, + "step": 328 + }, + { + "ce_ib": 16.495813369750977, + "ce_orig": 1.1426280736923218, + "epoch": 0.09461499748364369, + "kl_loss": 0.1818699985742569, + "loss_ib": 0.0034682813566178083, + "step": 329 + }, + { + "ce_ib": 12.4020357131958, + "ce_orig": 0.8381017446517944, + "epoch": 0.09461499748364369, + "kl_loss": 0.16994866728782654, + "loss_ib": 0.0029396903701126575, + "step": 329 + }, + { + "ce_ib": 10.978039741516113, + "ce_orig": 0.6018507480621338, + "epoch": 0.09461499748364369, + "kl_loss": 0.1774011105298996, + "loss_ib": 0.0028718148823827505, + "step": 329 + }, + { + "ce_ib": 13.26439380645752, + "ce_orig": 0.8519594669342041, + "epoch": 0.09461499748364369, + "kl_loss": 0.1966477632522583, + "loss_ib": 0.003292917041108012, + "step": 329 + }, + { + "epoch": 0.09490258106262132, + "grad_norm": 0.08392878621816635, + "learning_rate": 4.9999879538467306e-05, + "loss": 0.9175, + "step": 330 + }, + { + "ce_ib": 14.5608549118042, + "ce_orig": 1.4586288928985596, + "epoch": 0.09490258106262132, + "kl_loss": 0.220241978764534, + "loss_ib": 0.0036585049238055944, + "step": 330 + }, + { + "ce_ib": 16.367464065551758, + "ce_orig": 0.7915551662445068, + "epoch": 0.09490258106262132, + "kl_loss": 0.22915303707122803, + "loss_ib": 0.003928276710212231, + "step": 330 + }, + { + "ce_ib": 13.064582824707031, + "ce_orig": 0.6998893618583679, + "epoch": 0.09490258106262132, + "kl_loss": 0.38041651248931885, + "loss_ib": 0.005110623314976692, + "step": 330 + }, + { + "ce_ib": 11.392269134521484, + "ce_orig": 0.36753031611442566, + "epoch": 0.09490258106262132, + "kl_loss": 0.4851597547531128, + "loss_ib": 0.0059908246621489525, + "step": 330 + }, + { + "ce_ib": 12.86525821685791, + "ce_orig": 0.7186346054077148, + "epoch": 0.09519016464159896, + "kl_loss": 0.15524542331695557, + "loss_ib": 0.0028389799408614635, + "step": 331 + }, + { + "ce_ib": 14.1820707321167, + "ce_orig": 0.8073091506958008, + "epoch": 0.09519016464159896, + "kl_loss": 0.22929759323596954, + "loss_ib": 0.003711183089762926, + "step": 331 + }, + { + "ce_ib": 12.15315055847168, + "ce_orig": 0.5545368194580078, + "epoch": 0.09519016464159896, + "kl_loss": 0.22285009920597076, + "loss_ib": 0.003443815978243947, + "step": 331 + }, + { + "ce_ib": 12.464353561401367, + "ce_orig": 0.864552915096283, + "epoch": 0.09519016464159896, + "kl_loss": 0.2674625515937805, + "loss_ib": 0.00392106082290411, + "step": 331 + }, + { + "ce_ib": 8.458319664001465, + "ce_orig": 0.49817538261413574, + "epoch": 0.0954777482205766, + "kl_loss": 0.20442625880241394, + "loss_ib": 0.002890094416216016, + "step": 332 + }, + { + "ce_ib": 15.385281562805176, + "ce_orig": 1.422017216682434, + "epoch": 0.0954777482205766, + "kl_loss": 0.25458666682243347, + "loss_ib": 0.004084394313395023, + "step": 332 + }, + { + "ce_ib": 13.032305717468262, + "ce_orig": 0.5466614365577698, + "epoch": 0.0954777482205766, + "kl_loss": 0.2672412097454071, + "loss_ib": 0.003975642379373312, + "step": 332 + }, + { + "ce_ib": 12.244396209716797, + "ce_orig": 0.4979858100414276, + "epoch": 0.0954777482205766, + "kl_loss": 0.1899423450231552, + "loss_ib": 0.0031238629017025232, + "step": 332 + }, + { + "ce_ib": 9.587015151977539, + "ce_orig": 0.3958915174007416, + "epoch": 0.09576533179955425, + "kl_loss": 0.31766587495803833, + "loss_ib": 0.004135360009968281, + "step": 333 + }, + { + "ce_ib": 16.561017990112305, + "ce_orig": 1.007829189300537, + "epoch": 0.09576533179955425, + "kl_loss": 0.20323669910430908, + "loss_ib": 0.0036884688306599855, + "step": 333 + }, + { + "ce_ib": 13.782530784606934, + "ce_orig": 0.8161399960517883, + "epoch": 0.09576533179955425, + "kl_loss": 0.2164289653301239, + "loss_ib": 0.00354254269041121, + "step": 333 + }, + { + "ce_ib": 9.973050117492676, + "ce_orig": 0.6706444025039673, + "epoch": 0.09576533179955425, + "kl_loss": 0.20162354409694672, + "loss_ib": 0.0030135405249893665, + "step": 333 + }, + { + "ce_ib": 12.195393562316895, + "ce_orig": 0.613254964351654, + "epoch": 0.09605291537853189, + "kl_loss": 0.2231156826019287, + "loss_ib": 0.003450696123763919, + "step": 334 + }, + { + "ce_ib": 8.084924697875977, + "ce_orig": 0.27541494369506836, + "epoch": 0.09605291537853189, + "kl_loss": 0.5247204303741455, + "loss_ib": 0.006055696401745081, + "step": 334 + }, + { + "ce_ib": 11.561151504516602, + "ce_orig": 0.5130017995834351, + "epoch": 0.09605291537853189, + "kl_loss": 0.15488451719284058, + "loss_ib": 0.0027049602940678596, + "step": 334 + }, + { + "ce_ib": 12.512232780456543, + "ce_orig": 0.6367411613464355, + "epoch": 0.09605291537853189, + "kl_loss": 0.2246486246585846, + "loss_ib": 0.003497709520161152, + "step": 334 + }, + { + "epoch": 0.09634049895750953, + "grad_norm": 0.07655756175518036, + "learning_rate": 4.999972896182352e-05, + "loss": 0.8394, + "step": 335 + }, + { + "ce_ib": 12.717463493347168, + "ce_orig": 0.9400395750999451, + "epoch": 0.09634049895750953, + "kl_loss": 0.17612457275390625, + "loss_ib": 0.0030329918954521418, + "step": 335 + }, + { + "ce_ib": 12.413676261901855, + "ce_orig": 0.973748505115509, + "epoch": 0.09634049895750953, + "kl_loss": 0.24444803595542908, + "loss_ib": 0.0036858480889350176, + "step": 335 + }, + { + "ce_ib": 14.029793739318848, + "ce_orig": 0.7324392795562744, + "epoch": 0.09634049895750953, + "kl_loss": 0.18520355224609375, + "loss_ib": 0.003255015006288886, + "step": 335 + }, + { + "ce_ib": 15.990370750427246, + "ce_orig": 0.6844960451126099, + "epoch": 0.09634049895750953, + "kl_loss": 0.27585840225219727, + "loss_ib": 0.004357621073722839, + "step": 335 + }, + { + "ce_ib": 14.750761985778809, + "ce_orig": 1.194319725036621, + "epoch": 0.09662808253648716, + "kl_loss": 0.67383873462677, + "loss_ib": 0.008213463239371777, + "step": 336 + }, + { + "ce_ib": 17.5143985748291, + "ce_orig": 1.5835182666778564, + "epoch": 0.09662808253648716, + "kl_loss": 0.2557618021965027, + "loss_ib": 0.0043090577237308025, + "step": 336 + }, + { + "ce_ib": 17.389286041259766, + "ce_orig": 1.3591183423995972, + "epoch": 0.09662808253648716, + "kl_loss": 0.27419230341911316, + "loss_ib": 0.004480851348489523, + "step": 336 + }, + { + "ce_ib": 9.241145133972168, + "ce_orig": 0.607307493686676, + "epoch": 0.09662808253648716, + "kl_loss": 0.11753670126199722, + "loss_ib": 0.002099481411278248, + "step": 336 + }, + { + "ce_ib": 15.914252281188965, + "ce_orig": 1.711224913597107, + "epoch": 0.0969156661154648, + "kl_loss": 0.2505919933319092, + "loss_ib": 0.004097345285117626, + "step": 337 + }, + { + "ce_ib": 14.283632278442383, + "ce_orig": 0.9926325082778931, + "epoch": 0.0969156661154648, + "kl_loss": 0.22402063012123108, + "loss_ib": 0.0036685692612081766, + "step": 337 + }, + { + "ce_ib": 16.994945526123047, + "ce_orig": 0.8979167938232422, + "epoch": 0.0969156661154648, + "kl_loss": 0.22958716750144958, + "loss_ib": 0.003995365928858519, + "step": 337 + }, + { + "ce_ib": 15.639780044555664, + "ce_orig": 0.5237170457839966, + "epoch": 0.0969156661154648, + "kl_loss": 0.2666119337081909, + "loss_ib": 0.004230096936225891, + "step": 337 + }, + { + "ce_ib": 18.789344787597656, + "ce_orig": 1.6600208282470703, + "epoch": 0.09720324969444245, + "kl_loss": 0.2622734010219574, + "loss_ib": 0.004501668270677328, + "step": 338 + }, + { + "ce_ib": 14.16718578338623, + "ce_orig": 0.8235701322555542, + "epoch": 0.09720324969444245, + "kl_loss": 0.1999812126159668, + "loss_ib": 0.0034165303222835064, + "step": 338 + }, + { + "ce_ib": 15.722373008728027, + "ce_orig": 0.8121756315231323, + "epoch": 0.09720324969444245, + "kl_loss": 0.17169177532196045, + "loss_ib": 0.003289154963567853, + "step": 338 + }, + { + "ce_ib": 12.625021934509277, + "ce_orig": 0.4633500277996063, + "epoch": 0.09720324969444245, + "kl_loss": 0.17351466417312622, + "loss_ib": 0.0029976486694067717, + "step": 338 + }, + { + "ce_ib": 14.416272163391113, + "ce_orig": 0.5611670613288879, + "epoch": 0.09749083327342009, + "kl_loss": 0.29108044505119324, + "loss_ib": 0.004352431278675795, + "step": 339 + }, + { + "ce_ib": 10.808735847473145, + "ce_orig": 0.768107533454895, + "epoch": 0.09749083327342009, + "kl_loss": 0.22904689610004425, + "loss_ib": 0.0033713423181325197, + "step": 339 + }, + { + "ce_ib": 17.312829971313477, + "ce_orig": 1.2763899564743042, + "epoch": 0.09749083327342009, + "kl_loss": 0.19993865489959717, + "loss_ib": 0.003730669617652893, + "step": 339 + }, + { + "ce_ib": 17.75509262084961, + "ce_orig": 1.1115076541900635, + "epoch": 0.09749083327342009, + "kl_loss": 0.26826444268226624, + "loss_ib": 0.004458153620362282, + "step": 339 + }, + { + "epoch": 0.09777841685239773, + "grad_norm": 0.07021026313304901, + "learning_rate": 4.999951815503011e-05, + "loss": 0.8976, + "step": 340 + }, + { + "ce_ib": 7.319950103759766, + "ce_orig": 0.25490763783454895, + "epoch": 0.09777841685239773, + "kl_loss": 0.4792310297489166, + "loss_ib": 0.005524305161088705, + "step": 340 + }, + { + "ce_ib": 14.642142295837402, + "ce_orig": 0.5069236755371094, + "epoch": 0.09777841685239773, + "kl_loss": 0.23783719539642334, + "loss_ib": 0.003842586185783148, + "step": 340 + }, + { + "ce_ib": 9.56615924835205, + "ce_orig": 0.686457633972168, + "epoch": 0.09777841685239773, + "kl_loss": 0.14670798182487488, + "loss_ib": 0.002423695521429181, + "step": 340 + }, + { + "ce_ib": 15.014992713928223, + "ce_orig": 0.9454907178878784, + "epoch": 0.09777841685239773, + "kl_loss": 0.18971547484397888, + "loss_ib": 0.003398653818294406, + "step": 340 + }, + { + "ce_ib": 13.204034805297852, + "ce_orig": 0.6687142252922058, + "epoch": 0.09806600043137537, + "kl_loss": 0.286098837852478, + "loss_ib": 0.004181392025202513, + "step": 341 + }, + { + "ce_ib": 15.514037132263184, + "ce_orig": 0.5012982487678528, + "epoch": 0.09806600043137537, + "kl_loss": 0.2910040020942688, + "loss_ib": 0.004461443517357111, + "step": 341 + }, + { + "ce_ib": 12.121696472167969, + "ce_orig": 0.6653417348861694, + "epoch": 0.09806600043137537, + "kl_loss": 0.24229881167411804, + "loss_ib": 0.003635157598182559, + "step": 341 + }, + { + "ce_ib": 20.12578773498535, + "ce_orig": 0.7655023336410522, + "epoch": 0.09806600043137537, + "kl_loss": 0.22568227350711823, + "loss_ib": 0.004269401542842388, + "step": 341 + }, + { + "ce_ib": 9.653379440307617, + "ce_orig": 0.7402390837669373, + "epoch": 0.098353584010353, + "kl_loss": 0.1966065615415573, + "loss_ib": 0.002931403461843729, + "step": 342 + }, + { + "ce_ib": 12.452383995056152, + "ce_orig": 0.8609440326690674, + "epoch": 0.098353584010353, + "kl_loss": 0.14747576415538788, + "loss_ib": 0.0027199957985430956, + "step": 342 + }, + { + "ce_ib": 11.025045394897461, + "ce_orig": 0.6533346772193909, + "epoch": 0.098353584010353, + "kl_loss": 0.16730177402496338, + "loss_ib": 0.002775522181764245, + "step": 342 + }, + { + "ce_ib": 16.325525283813477, + "ce_orig": 1.0561354160308838, + "epoch": 0.098353584010353, + "kl_loss": 0.18068361282348633, + "loss_ib": 0.003439388470724225, + "step": 342 + }, + { + "ce_ib": 17.46106719970703, + "ce_orig": 1.6512928009033203, + "epoch": 0.09864116758933066, + "kl_loss": 0.4481199383735657, + "loss_ib": 0.006227306090295315, + "step": 343 + }, + { + "ce_ib": 16.864215850830078, + "ce_orig": 1.4105547666549683, + "epoch": 0.09864116758933066, + "kl_loss": 0.22357934713363647, + "loss_ib": 0.003922215197235346, + "step": 343 + }, + { + "ce_ib": 9.996209144592285, + "ce_orig": 0.7162432670593262, + "epoch": 0.09864116758933066, + "kl_loss": 0.1958695501089096, + "loss_ib": 0.00295831635594368, + "step": 343 + }, + { + "ce_ib": 8.547676086425781, + "ce_orig": 0.41816598176956177, + "epoch": 0.09864116758933066, + "kl_loss": 0.1747799813747406, + "loss_ib": 0.002602567430585623, + "step": 343 + }, + { + "ce_ib": 12.569132804870605, + "ce_orig": 0.5451200008392334, + "epoch": 0.0989287511683083, + "kl_loss": 0.22563554346561432, + "loss_ib": 0.0035132686607539654, + "step": 344 + }, + { + "ce_ib": 12.415586471557617, + "ce_orig": 0.7706530690193176, + "epoch": 0.0989287511683083, + "kl_loss": 0.1709377020597458, + "loss_ib": 0.002950935624539852, + "step": 344 + }, + { + "ce_ib": 19.655101776123047, + "ce_orig": 1.3017544746398926, + "epoch": 0.0989287511683083, + "kl_loss": 0.29058775305747986, + "loss_ib": 0.004871387500315905, + "step": 344 + }, + { + "ce_ib": 17.928071975708008, + "ce_orig": 1.330518126487732, + "epoch": 0.0989287511683083, + "kl_loss": 0.23996703326702118, + "loss_ib": 0.004192477557808161, + "step": 344 + }, + { + "epoch": 0.09921633474728593, + "grad_norm": 0.0755721926689148, + "learning_rate": 4.999924711859495e-05, + "loss": 0.8515, + "step": 345 + }, + { + "ce_ib": 14.76935863494873, + "ce_orig": 1.2034357786178589, + "epoch": 0.09921633474728593, + "kl_loss": 0.22408181428909302, + "loss_ib": 0.003717753803357482, + "step": 345 + }, + { + "ce_ib": 11.515120506286621, + "ce_orig": 0.7314639687538147, + "epoch": 0.09921633474728593, + "kl_loss": 0.19642382860183716, + "loss_ib": 0.003115750150755048, + "step": 345 + }, + { + "ce_ib": 11.988049507141113, + "ce_orig": 0.5762550830841064, + "epoch": 0.09921633474728593, + "kl_loss": 0.25246232748031616, + "loss_ib": 0.0037234281189739704, + "step": 345 + }, + { + "ce_ib": 13.44121265411377, + "ce_orig": 0.9119555950164795, + "epoch": 0.09921633474728593, + "kl_loss": 0.19339382648468018, + "loss_ib": 0.003278059186413884, + "step": 345 + }, + { + "ce_ib": 16.88981819152832, + "ce_orig": 1.815802812576294, + "epoch": 0.09950391832626357, + "kl_loss": 0.22369977831840515, + "loss_ib": 0.003925979603081942, + "step": 346 + }, + { + "ce_ib": 11.10827350616455, + "ce_orig": 0.8919088244438171, + "epoch": 0.09950391832626357, + "kl_loss": 0.20008933544158936, + "loss_ib": 0.0031117205508053303, + "step": 346 + }, + { + "ce_ib": 16.906099319458008, + "ce_orig": 1.4990653991699219, + "epoch": 0.09950391832626357, + "kl_loss": 0.19835729897022247, + "loss_ib": 0.0036741828080266714, + "step": 346 + }, + { + "ce_ib": 8.436453819274902, + "ce_orig": 0.6757850050926208, + "epoch": 0.09950391832626357, + "kl_loss": 0.16680480539798737, + "loss_ib": 0.00251169316470623, + "step": 346 + }, + { + "ce_ib": 9.981969833374023, + "ce_orig": 0.7898837327957153, + "epoch": 0.0997915019052412, + "kl_loss": 0.13752737641334534, + "loss_ib": 0.0023734706919640303, + "step": 347 + }, + { + "ce_ib": 12.792706489562988, + "ce_orig": 0.8162432909011841, + "epoch": 0.0997915019052412, + "kl_loss": 0.14307913184165955, + "loss_ib": 0.0027100618463009596, + "step": 347 + }, + { + "ce_ib": 12.92000675201416, + "ce_orig": 1.0188729763031006, + "epoch": 0.0997915019052412, + "kl_loss": 0.18868780136108398, + "loss_ib": 0.0031788786873221397, + "step": 347 + }, + { + "ce_ib": 17.870426177978516, + "ce_orig": 1.1175206899642944, + "epoch": 0.0997915019052412, + "kl_loss": 0.2027907520532608, + "loss_ib": 0.0038149498868733644, + "step": 347 + }, + { + "ce_ib": 15.885435104370117, + "ce_orig": 0.41733163595199585, + "epoch": 0.10007908548421886, + "kl_loss": 0.20918244123458862, + "loss_ib": 0.0036803679540753365, + "step": 348 + }, + { + "ce_ib": 12.231342315673828, + "ce_orig": 0.48961135745048523, + "epoch": 0.10007908548421886, + "kl_loss": 0.20116449892520905, + "loss_ib": 0.003234779229387641, + "step": 348 + }, + { + "ce_ib": 9.306249618530273, + "ce_orig": 0.6630443334579468, + "epoch": 0.10007908548421886, + "kl_loss": 0.1751163899898529, + "loss_ib": 0.002681788755580783, + "step": 348 + }, + { + "ce_ib": 11.609100341796875, + "ce_orig": 0.879733145236969, + "epoch": 0.10007908548421886, + "kl_loss": 0.41010767221450806, + "loss_ib": 0.005261986516416073, + "step": 348 + }, + { + "ce_ib": 10.676206588745117, + "ce_orig": 0.6579650640487671, + "epoch": 0.1003666690631965, + "kl_loss": 0.2183065414428711, + "loss_ib": 0.003250685753300786, + "step": 349 + }, + { + "ce_ib": 11.89196491241455, + "ce_orig": 0.8634635806083679, + "epoch": 0.1003666690631965, + "kl_loss": 0.21364440023899078, + "loss_ib": 0.003325640456750989, + "step": 349 + }, + { + "ce_ib": 17.328413009643555, + "ce_orig": 0.9897369146347046, + "epoch": 0.1003666690631965, + "kl_loss": 0.27497774362564087, + "loss_ib": 0.004482618533074856, + "step": 349 + }, + { + "ce_ib": 15.745963096618652, + "ce_orig": 1.2580201625823975, + "epoch": 0.1003666690631965, + "kl_loss": 0.24680611491203308, + "loss_ib": 0.004042657557874918, + "step": 349 + }, + { + "epoch": 0.10065425264217413, + "grad_norm": 0.0724608451128006, + "learning_rate": 4.999891585317103e-05, + "loss": 0.857, + "step": 350 + }, + { + "ce_ib": 15.896184921264648, + "ce_orig": 0.8773839473724365, + "epoch": 0.10065425264217413, + "kl_loss": 0.25826671719551086, + "loss_ib": 0.004172285553067923, + "step": 350 + }, + { + "ce_ib": 10.407938003540039, + "ce_orig": 0.7755264639854431, + "epoch": 0.10065425264217413, + "kl_loss": 0.13498742878437042, + "loss_ib": 0.0023906680289655924, + "step": 350 + }, + { + "ce_ib": 15.827054023742676, + "ce_orig": 1.3559359312057495, + "epoch": 0.10065425264217413, + "kl_loss": 0.21587374806404114, + "loss_ib": 0.0037414426915347576, + "step": 350 + }, + { + "ce_ib": 12.431255340576172, + "ce_orig": 1.0548380613327026, + "epoch": 0.10065425264217413, + "kl_loss": 0.17993846535682678, + "loss_ib": 0.003042510012164712, + "step": 350 + }, + { + "ce_ib": 11.040481567382812, + "ce_orig": 0.8403714895248413, + "epoch": 0.10094183622115177, + "kl_loss": 0.1840890645980835, + "loss_ib": 0.002944938838481903, + "step": 351 + }, + { + "ce_ib": 15.373204231262207, + "ce_orig": 1.075166940689087, + "epoch": 0.10094183622115177, + "kl_loss": 0.18732014298439026, + "loss_ib": 0.003410521661862731, + "step": 351 + }, + { + "ce_ib": 12.945959091186523, + "ce_orig": 0.897395133972168, + "epoch": 0.10094183622115177, + "kl_loss": 0.1956326961517334, + "loss_ib": 0.0032509227748960257, + "step": 351 + }, + { + "ce_ib": 6.2110090255737305, + "ce_orig": 0.1484360694885254, + "epoch": 0.10094183622115177, + "kl_loss": 0.3617279529571533, + "loss_ib": 0.0042383805848658085, + "step": 351 + }, + { + "ce_ib": 12.221625328063965, + "ce_orig": 0.7823323607444763, + "epoch": 0.10122941980012941, + "kl_loss": 0.18742753565311432, + "loss_ib": 0.003096437780186534, + "step": 352 + }, + { + "ce_ib": 13.974717140197754, + "ce_orig": 0.5945900082588196, + "epoch": 0.10122941980012941, + "kl_loss": 0.149112731218338, + "loss_ib": 0.002888598944991827, + "step": 352 + }, + { + "ce_ib": 14.455492973327637, + "ce_orig": 0.8753033876419067, + "epoch": 0.10122941980012941, + "kl_loss": 0.28339695930480957, + "loss_ib": 0.004279518499970436, + "step": 352 + }, + { + "ce_ib": 12.000777244567871, + "ce_orig": 0.676581859588623, + "epoch": 0.10122941980012941, + "kl_loss": 0.24054169654846191, + "loss_ib": 0.0036054945085197687, + "step": 352 + }, + { + "ce_ib": 10.058462142944336, + "ce_orig": 0.9673516154289246, + "epoch": 0.10151700337910706, + "kl_loss": 0.18438181281089783, + "loss_ib": 0.002849664306268096, + "step": 353 + }, + { + "ce_ib": 14.86198616027832, + "ce_orig": 0.6923442482948303, + "epoch": 0.10151700337910706, + "kl_loss": 0.3370734453201294, + "loss_ib": 0.004856932908296585, + "step": 353 + }, + { + "ce_ib": 16.324743270874023, + "ce_orig": 1.7405447959899902, + "epoch": 0.10151700337910706, + "kl_loss": 0.1804196834564209, + "loss_ib": 0.0034366711042821407, + "step": 353 + }, + { + "ce_ib": 10.453534126281738, + "ce_orig": 0.6495627164840698, + "epoch": 0.10151700337910706, + "kl_loss": 0.18358883261680603, + "loss_ib": 0.0028812417294830084, + "step": 353 + }, + { + "ce_ib": 10.957155227661133, + "ce_orig": 0.9104872941970825, + "epoch": 0.1018045869580847, + "kl_loss": 0.18098904192447662, + "loss_ib": 0.002905606059357524, + "step": 354 + }, + { + "ce_ib": 12.083660125732422, + "ce_orig": 0.5857529640197754, + "epoch": 0.1018045869580847, + "kl_loss": 0.2257942408323288, + "loss_ib": 0.0034663083497434855, + "step": 354 + }, + { + "ce_ib": 14.075161933898926, + "ce_orig": 0.827916145324707, + "epoch": 0.1018045869580847, + "kl_loss": 0.22436052560806274, + "loss_ib": 0.0036511211656033993, + "step": 354 + }, + { + "ce_ib": 9.636879920959473, + "ce_orig": 0.3787972331047058, + "epoch": 0.1018045869580847, + "kl_loss": 0.4520171582698822, + "loss_ib": 0.005483859684318304, + "step": 354 + }, + { + "epoch": 0.10209217053706234, + "grad_norm": 0.07327523827552795, + "learning_rate": 4.9998524359556445e-05, + "loss": 0.8569, + "step": 355 + }, + { + "ce_ib": 14.932779312133789, + "ce_orig": 0.9075332880020142, + "epoch": 0.10209217053706234, + "kl_loss": 0.2982865273952484, + "loss_ib": 0.004476143047213554, + "step": 355 + }, + { + "ce_ib": 14.173552513122559, + "ce_orig": 0.8084387183189392, + "epoch": 0.10209217053706234, + "kl_loss": 0.33803728222846985, + "loss_ib": 0.0047977278009057045, + "step": 355 + }, + { + "ce_ib": 8.768068313598633, + "ce_orig": 0.1545993983745575, + "epoch": 0.10209217053706234, + "kl_loss": 0.22588716447353363, + "loss_ib": 0.003135678358376026, + "step": 355 + }, + { + "ce_ib": 11.144165992736816, + "ce_orig": 0.8069305419921875, + "epoch": 0.10209217053706234, + "kl_loss": 0.1619873195886612, + "loss_ib": 0.002734289737418294, + "step": 355 + }, + { + "ce_ib": 12.587420463562012, + "ce_orig": 1.174453616142273, + "epoch": 0.10237975411603997, + "kl_loss": 0.25647827982902527, + "loss_ib": 0.003823524573817849, + "step": 356 + }, + { + "ce_ib": 9.973530769348145, + "ce_orig": 0.5818334221839905, + "epoch": 0.10237975411603997, + "kl_loss": 0.14407533407211304, + "loss_ib": 0.0024381063412874937, + "step": 356 + }, + { + "ce_ib": 12.697566032409668, + "ce_orig": 1.0796477794647217, + "epoch": 0.10237975411603997, + "kl_loss": 0.24205471575260162, + "loss_ib": 0.003690303536131978, + "step": 356 + }, + { + "ce_ib": 12.71401596069336, + "ce_orig": 0.7969520688056946, + "epoch": 0.10237975411603997, + "kl_loss": 0.2004072666168213, + "loss_ib": 0.003275474300608039, + "step": 356 + }, + { + "ce_ib": 11.712475776672363, + "ce_orig": 0.8881208896636963, + "epoch": 0.10266733769501761, + "kl_loss": 0.17075558006763458, + "loss_ib": 0.0028788032941520214, + "step": 357 + }, + { + "ce_ib": 13.805883407592773, + "ce_orig": 0.6269095540046692, + "epoch": 0.10266733769501761, + "kl_loss": 0.2602432370185852, + "loss_ib": 0.003983020782470703, + "step": 357 + }, + { + "ce_ib": 9.764449119567871, + "ce_orig": 0.7314236760139465, + "epoch": 0.10266733769501761, + "kl_loss": 0.2458636462688446, + "loss_ib": 0.0034350811038166285, + "step": 357 + }, + { + "ce_ib": 12.507657051086426, + "ce_orig": 0.6812151670455933, + "epoch": 0.10266733769501761, + "kl_loss": 0.19368158280849457, + "loss_ib": 0.0031875811982899904, + "step": 357 + }, + { + "ce_ib": 11.365423202514648, + "ce_orig": 0.6420213580131531, + "epoch": 0.10295492127399525, + "kl_loss": 0.4926440715789795, + "loss_ib": 0.0060629830695688725, + "step": 358 + }, + { + "ce_ib": 7.592077255249023, + "ce_orig": 0.265299916267395, + "epoch": 0.10295492127399525, + "kl_loss": 0.4861292541027069, + "loss_ib": 0.005620500538498163, + "step": 358 + }, + { + "ce_ib": 15.327858924865723, + "ce_orig": 0.9397932887077332, + "epoch": 0.10295492127399525, + "kl_loss": 0.17111369967460632, + "loss_ib": 0.003243922721594572, + "step": 358 + }, + { + "ce_ib": 13.333529472351074, + "ce_orig": 0.787560224533081, + "epoch": 0.10295492127399525, + "kl_loss": 0.20652362704277039, + "loss_ib": 0.00339858909137547, + "step": 358 + }, + { + "ce_ib": 13.753911972045898, + "ce_orig": 0.3446463644504547, + "epoch": 0.1032425048529729, + "kl_loss": 0.4839940667152405, + "loss_ib": 0.006215331610292196, + "step": 359 + }, + { + "ce_ib": 16.900297164916992, + "ce_orig": 1.5462009906768799, + "epoch": 0.1032425048529729, + "kl_loss": 0.1991090476512909, + "loss_ib": 0.003681120229884982, + "step": 359 + }, + { + "ce_ib": 14.061060905456543, + "ce_orig": 0.8726510405540466, + "epoch": 0.1032425048529729, + "kl_loss": 0.19025549292564392, + "loss_ib": 0.003308660816401243, + "step": 359 + }, + { + "ce_ib": 13.532440185546875, + "ce_orig": 1.3304260969161987, + "epoch": 0.1032425048529729, + "kl_loss": 0.16146531701087952, + "loss_ib": 0.002967897104099393, + "step": 359 + }, + { + "epoch": 0.10353008843195054, + "grad_norm": 0.08045380562543869, + "learning_rate": 4.99980726386944e-05, + "loss": 0.9013, + "step": 360 + }, + { + "ce_ib": 16.031070709228516, + "ce_orig": 0.641457200050354, + "epoch": 0.10353008843195054, + "kl_loss": 0.3759670853614807, + "loss_ib": 0.005362777505069971, + "step": 360 + }, + { + "ce_ib": 10.804533004760742, + "ce_orig": 0.6665270924568176, + "epoch": 0.10353008843195054, + "kl_loss": 0.16297683119773865, + "loss_ib": 0.0027102213352918625, + "step": 360 + }, + { + "ce_ib": 11.559240341186523, + "ce_orig": 0.8196082711219788, + "epoch": 0.10353008843195054, + "kl_loss": 0.1367412805557251, + "loss_ib": 0.002523336559534073, + "step": 360 + }, + { + "ce_ib": 11.892834663391113, + "ce_orig": 0.439602255821228, + "epoch": 0.10353008843195054, + "kl_loss": 0.3415584862232208, + "loss_ib": 0.004604868125170469, + "step": 360 + }, + { + "ce_ib": 8.562467575073242, + "ce_orig": 0.45625510811805725, + "epoch": 0.10381767201092817, + "kl_loss": 0.1342378854751587, + "loss_ib": 0.0021986253559589386, + "step": 361 + }, + { + "ce_ib": 9.903914451599121, + "ce_orig": 0.8635908365249634, + "epoch": 0.10381767201092817, + "kl_loss": 0.22620341181755066, + "loss_ib": 0.0032524254638701677, + "step": 361 + }, + { + "ce_ib": 17.545305252075195, + "ce_orig": 1.781671166419983, + "epoch": 0.10381767201092817, + "kl_loss": 0.217964768409729, + "loss_ib": 0.003934178035706282, + "step": 361 + }, + { + "ce_ib": 12.740987777709961, + "ce_orig": 0.7617380619049072, + "epoch": 0.10381767201092817, + "kl_loss": 0.26160410046577454, + "loss_ib": 0.003890139749273658, + "step": 361 + }, + { + "ce_ib": 10.342143058776855, + "ce_orig": 1.0058131217956543, + "epoch": 0.10410525558990581, + "kl_loss": 0.3163241147994995, + "loss_ib": 0.0041974554769694805, + "step": 362 + }, + { + "ce_ib": 19.41802406311035, + "ce_orig": 1.730543851852417, + "epoch": 0.10410525558990581, + "kl_loss": 0.23577484488487244, + "loss_ib": 0.004299550782889128, + "step": 362 + }, + { + "ce_ib": 15.791916847229004, + "ce_orig": 1.4829944372177124, + "epoch": 0.10410525558990581, + "kl_loss": 0.14233699440956116, + "loss_ib": 0.003002561628818512, + "step": 362 + }, + { + "ce_ib": 9.117117881774902, + "ce_orig": 0.4645211696624756, + "epoch": 0.10410525558990581, + "kl_loss": 0.1363692581653595, + "loss_ib": 0.0022754042875021696, + "step": 362 + }, + { + "ce_ib": 13.213629722595215, + "ce_orig": 1.1761356592178345, + "epoch": 0.10439283916888345, + "kl_loss": 0.2095613181591034, + "loss_ib": 0.0034169761929661036, + "step": 363 + }, + { + "ce_ib": 12.37755012512207, + "ce_orig": 0.8143442273139954, + "epoch": 0.10439283916888345, + "kl_loss": 0.14410914480686188, + "loss_ib": 0.002678846474736929, + "step": 363 + }, + { + "ce_ib": 17.626813888549805, + "ce_orig": 1.4411890506744385, + "epoch": 0.10439283916888345, + "kl_loss": 0.1780979335308075, + "loss_ib": 0.0035436605103313923, + "step": 363 + }, + { + "ce_ib": 9.846484184265137, + "ce_orig": 0.5962998270988464, + "epoch": 0.10439283916888345, + "kl_loss": 0.1512468159198761, + "loss_ib": 0.002497116569429636, + "step": 363 + }, + { + "ce_ib": 12.717291831970215, + "ce_orig": 0.9116876721382141, + "epoch": 0.1046804227478611, + "kl_loss": 0.16959382593631744, + "loss_ib": 0.00296766753308475, + "step": 364 + }, + { + "ce_ib": 12.457673072814941, + "ce_orig": 0.09528730809688568, + "epoch": 0.1046804227478611, + "kl_loss": 0.38302385807037354, + "loss_ib": 0.005076006054878235, + "step": 364 + }, + { + "ce_ib": 12.967721939086914, + "ce_orig": 0.7480602860450745, + "epoch": 0.1046804227478611, + "kl_loss": 0.41338658332824707, + "loss_ib": 0.005430637858808041, + "step": 364 + }, + { + "ce_ib": 15.809802055358887, + "ce_orig": 1.5115008354187012, + "epoch": 0.1046804227478611, + "kl_loss": 0.2618887722492218, + "loss_ib": 0.004199867602437735, + "step": 364 + }, + { + "epoch": 0.10496800632683874, + "grad_norm": 0.1035882830619812, + "learning_rate": 4.9997560691673194e-05, + "loss": 0.9193, + "step": 365 + }, + { + "ce_ib": 13.008557319641113, + "ce_orig": 0.8673336505889893, + "epoch": 0.10496800632683874, + "kl_loss": 0.2006014883518219, + "loss_ib": 0.003306870348751545, + "step": 365 + }, + { + "ce_ib": 9.279165267944336, + "ce_orig": 0.656031608581543, + "epoch": 0.10496800632683874, + "kl_loss": 0.24030038714408875, + "loss_ib": 0.0033309203572571278, + "step": 365 + }, + { + "ce_ib": 14.56029987335205, + "ce_orig": 1.059706211090088, + "epoch": 0.10496800632683874, + "kl_loss": 0.22321242094039917, + "loss_ib": 0.0036881540436297655, + "step": 365 + }, + { + "ce_ib": 10.043885231018066, + "ce_orig": 0.7288112044334412, + "epoch": 0.10496800632683874, + "kl_loss": 0.1112736165523529, + "loss_ib": 0.002117124618962407, + "step": 365 + }, + { + "ce_ib": 14.350020408630371, + "ce_orig": 1.2987266778945923, + "epoch": 0.10525558990581638, + "kl_loss": 0.21527716517448425, + "loss_ib": 0.0035877733025699854, + "step": 366 + }, + { + "ce_ib": 9.209977149963379, + "ce_orig": 0.7698776721954346, + "epoch": 0.10525558990581638, + "kl_loss": 0.20659813284873962, + "loss_ib": 0.0029869787395000458, + "step": 366 + }, + { + "ce_ib": 12.112702369689941, + "ce_orig": 0.7612364888191223, + "epoch": 0.10525558990581638, + "kl_loss": 0.2407711148262024, + "loss_ib": 0.003618981223553419, + "step": 366 + }, + { + "ce_ib": 11.292844772338867, + "ce_orig": 0.769523024559021, + "epoch": 0.10525558990581638, + "kl_loss": 0.23566709458827972, + "loss_ib": 0.00348595529794693, + "step": 366 + }, + { + "ce_ib": 12.479425430297852, + "ce_orig": 0.8336954116821289, + "epoch": 0.10554317348479401, + "kl_loss": 0.16503103077411652, + "loss_ib": 0.0028982528019696474, + "step": 367 + }, + { + "ce_ib": 11.297480583190918, + "ce_orig": 0.7182275056838989, + "epoch": 0.10554317348479401, + "kl_loss": 0.2943016290664673, + "loss_ib": 0.004072763957083225, + "step": 367 + }, + { + "ce_ib": 13.729005813598633, + "ce_orig": 1.3682771921157837, + "epoch": 0.10554317348479401, + "kl_loss": 0.1870647370815277, + "loss_ib": 0.0032435478642582893, + "step": 367 + }, + { + "ce_ib": 14.036231994628906, + "ce_orig": 1.112056851387024, + "epoch": 0.10554317348479401, + "kl_loss": 0.15229341387748718, + "loss_ib": 0.0029265573248267174, + "step": 367 + }, + { + "ce_ib": 15.093040466308594, + "ce_orig": 1.170424222946167, + "epoch": 0.10583075706377165, + "kl_loss": 0.2388845682144165, + "loss_ib": 0.0038981495890766382, + "step": 368 + }, + { + "ce_ib": 15.43298053741455, + "ce_orig": 1.196273922920227, + "epoch": 0.10583075706377165, + "kl_loss": 0.19452136754989624, + "loss_ib": 0.0034885117784142494, + "step": 368 + }, + { + "ce_ib": 13.480894088745117, + "ce_orig": 1.0033950805664062, + "epoch": 0.10583075706377165, + "kl_loss": 0.24435830116271973, + "loss_ib": 0.003791672410443425, + "step": 368 + }, + { + "ce_ib": 15.991963386535645, + "ce_orig": 1.5334750413894653, + "epoch": 0.10583075706377165, + "kl_loss": 0.1978762149810791, + "loss_ib": 0.0035779583267867565, + "step": 368 + }, + { + "ce_ib": 9.99071979522705, + "ce_orig": 0.521371066570282, + "epoch": 0.1061183406427493, + "kl_loss": 0.14332063496112823, + "loss_ib": 0.0024322783574461937, + "step": 369 + }, + { + "ce_ib": 14.23776912689209, + "ce_orig": 1.3547425270080566, + "epoch": 0.1061183406427493, + "kl_loss": 0.18942734599113464, + "loss_ib": 0.0033180504105985165, + "step": 369 + }, + { + "ce_ib": 10.9193754196167, + "ce_orig": 0.7963501214981079, + "epoch": 0.1061183406427493, + "kl_loss": 0.19985352456569672, + "loss_ib": 0.003090472659096122, + "step": 369 + }, + { + "ce_ib": 11.383045196533203, + "ce_orig": 0.7126600742340088, + "epoch": 0.1061183406427493, + "kl_loss": 0.17306244373321533, + "loss_ib": 0.002868928946554661, + "step": 369 + }, + { + "epoch": 0.10640592422172694, + "grad_norm": 0.08818119019269943, + "learning_rate": 4.999698851972622e-05, + "loss": 0.9172, + "step": 370 + }, + { + "ce_ib": 13.027803421020508, + "ce_orig": 0.8471740484237671, + "epoch": 0.10640592422172694, + "kl_loss": 0.1602221429347992, + "loss_ib": 0.0029050016310065985, + "step": 370 + }, + { + "ce_ib": 12.130885124206543, + "ce_orig": 0.5990825891494751, + "epoch": 0.10640592422172694, + "kl_loss": 0.19608041644096375, + "loss_ib": 0.0031738923862576485, + "step": 370 + }, + { + "ce_ib": 11.950088500976562, + "ce_orig": 0.6112602353096008, + "epoch": 0.10640592422172694, + "kl_loss": 0.1965973824262619, + "loss_ib": 0.0031609826255589724, + "step": 370 + }, + { + "ce_ib": 9.864639282226562, + "ce_orig": 0.7072968482971191, + "epoch": 0.10640592422172694, + "kl_loss": 0.1892612874507904, + "loss_ib": 0.002879076637327671, + "step": 370 + }, + { + "ce_ib": 7.837416648864746, + "ce_orig": 0.22909517586231232, + "epoch": 0.10669350780070458, + "kl_loss": 0.4259476363658905, + "loss_ib": 0.005043217912316322, + "step": 371 + }, + { + "ce_ib": 16.050464630126953, + "ce_orig": 1.3338783979415894, + "epoch": 0.10669350780070458, + "kl_loss": 0.18167968094348907, + "loss_ib": 0.0034218430519104004, + "step": 371 + }, + { + "ce_ib": 7.732363224029541, + "ce_orig": 0.6679915189743042, + "epoch": 0.10669350780070458, + "kl_loss": 0.19385015964508057, + "loss_ib": 0.00271173776127398, + "step": 371 + }, + { + "ce_ib": 17.768779754638672, + "ce_orig": 1.290654182434082, + "epoch": 0.10669350780070458, + "kl_loss": 0.18727800250053406, + "loss_ib": 0.0036496578250080347, + "step": 371 + }, + { + "ce_ib": 16.024396896362305, + "ce_orig": 1.4784846305847168, + "epoch": 0.10698109137968222, + "kl_loss": 0.17223666608333588, + "loss_ib": 0.0033248059917241335, + "step": 372 + }, + { + "ce_ib": 9.74294376373291, + "ce_orig": 0.5707986950874329, + "epoch": 0.10698109137968222, + "kl_loss": 0.16291573643684387, + "loss_ib": 0.0026034514885395765, + "step": 372 + }, + { + "ce_ib": 16.458507537841797, + "ce_orig": 0.9432319402694702, + "epoch": 0.10698109137968222, + "kl_loss": 0.1663103997707367, + "loss_ib": 0.0033089546486735344, + "step": 372 + }, + { + "ce_ib": 15.392163276672363, + "ce_orig": 1.5210273265838623, + "epoch": 0.10698109137968222, + "kl_loss": 0.22455021739006042, + "loss_ib": 0.003784718457609415, + "step": 372 + }, + { + "ce_ib": 14.316435813903809, + "ce_orig": 0.8189164400100708, + "epoch": 0.10726867495865985, + "kl_loss": 0.16313040256500244, + "loss_ib": 0.0030629474204033613, + "step": 373 + }, + { + "ce_ib": 11.636860847473145, + "ce_orig": 0.7378359436988831, + "epoch": 0.10726867495865985, + "kl_loss": 0.1494181752204895, + "loss_ib": 0.0026578675024211407, + "step": 373 + }, + { + "ce_ib": 17.269123077392578, + "ce_orig": 1.541763424873352, + "epoch": 0.10726867495865985, + "kl_loss": 0.26547926664352417, + "loss_ib": 0.004381704609841108, + "step": 373 + }, + { + "ce_ib": 10.263904571533203, + "ce_orig": 0.8089870810508728, + "epoch": 0.10726867495865985, + "kl_loss": 0.11840936541557312, + "loss_ib": 0.002210484119132161, + "step": 373 + }, + { + "ce_ib": 15.512616157531738, + "ce_orig": 0.9539732933044434, + "epoch": 0.1075562585376375, + "kl_loss": 0.28004124760627747, + "loss_ib": 0.004351674113422632, + "step": 374 + }, + { + "ce_ib": 13.218826293945312, + "ce_orig": 0.9203503131866455, + "epoch": 0.1075562585376375, + "kl_loss": 0.17954644560813904, + "loss_ib": 0.003117346903309226, + "step": 374 + }, + { + "ce_ib": 12.426115036010742, + "ce_orig": 1.001791000366211, + "epoch": 0.1075562585376375, + "kl_loss": 0.24035847187042236, + "loss_ib": 0.003646196098998189, + "step": 374 + }, + { + "ce_ib": 13.382880210876465, + "ce_orig": 1.0965611934661865, + "epoch": 0.1075562585376375, + "kl_loss": 0.16744542121887207, + "loss_ib": 0.003012742381542921, + "step": 374 + }, + { + "epoch": 0.10784384211661514, + "grad_norm": 0.0786062702536583, + "learning_rate": 4.999635612423198e-05, + "loss": 0.8711, + "step": 375 + }, + { + "ce_ib": 7.170506477355957, + "ce_orig": 0.2852292060852051, + "epoch": 0.10784384211661514, + "kl_loss": 0.37380170822143555, + "loss_ib": 0.004455067683011293, + "step": 375 + }, + { + "ce_ib": 10.11276626586914, + "ce_orig": 0.5102769136428833, + "epoch": 0.10784384211661514, + "kl_loss": 0.18036767840385437, + "loss_ib": 0.002814953215420246, + "step": 375 + }, + { + "ce_ib": 11.423179626464844, + "ce_orig": 1.2335662841796875, + "epoch": 0.10784384211661514, + "kl_loss": 0.19887100160121918, + "loss_ib": 0.0031310277990996838, + "step": 375 + }, + { + "ce_ib": 14.86428451538086, + "ce_orig": 0.6654768586158752, + "epoch": 0.10784384211661514, + "kl_loss": 0.19978465139865875, + "loss_ib": 0.0034842747263610363, + "step": 375 + }, + { + "ce_ib": 12.019170761108398, + "ce_orig": 0.830958902835846, + "epoch": 0.10813142569559278, + "kl_loss": 0.19432778656482697, + "loss_ib": 0.0031451948452740908, + "step": 376 + }, + { + "ce_ib": 15.544536590576172, + "ce_orig": 0.608134388923645, + "epoch": 0.10813142569559278, + "kl_loss": 0.3602546155452728, + "loss_ib": 0.005156999919563532, + "step": 376 + }, + { + "ce_ib": 10.856096267700195, + "ce_orig": 0.7232799530029297, + "epoch": 0.10813142569559278, + "kl_loss": 0.15802645683288574, + "loss_ib": 0.00266587408259511, + "step": 376 + }, + { + "ce_ib": 13.540989875793457, + "ce_orig": 1.0887871980667114, + "epoch": 0.10813142569559278, + "kl_loss": 0.19060353934764862, + "loss_ib": 0.0032601344864815474, + "step": 376 + }, + { + "ce_ib": 13.389488220214844, + "ce_orig": 0.8150464296340942, + "epoch": 0.10841900927457042, + "kl_loss": 0.19611474871635437, + "loss_ib": 0.0033000963740050793, + "step": 377 + }, + { + "ce_ib": 7.958892822265625, + "ce_orig": 0.558684229850769, + "epoch": 0.10841900927457042, + "kl_loss": 0.12297540158033371, + "loss_ib": 0.002025643130764365, + "step": 377 + }, + { + "ce_ib": 13.451696395874023, + "ce_orig": 0.9451778531074524, + "epoch": 0.10841900927457042, + "kl_loss": 0.22830717265605927, + "loss_ib": 0.003628241363912821, + "step": 377 + }, + { + "ce_ib": 11.917108535766602, + "ce_orig": 1.090754747390747, + "epoch": 0.10841900927457042, + "kl_loss": 0.18710459768772125, + "loss_ib": 0.003062756499275565, + "step": 377 + }, + { + "ce_ib": 11.53650951385498, + "ce_orig": 0.4949207007884979, + "epoch": 0.10870659285354806, + "kl_loss": 0.2471131980419159, + "loss_ib": 0.0036247826647013426, + "step": 378 + }, + { + "ce_ib": 9.730238914489746, + "ce_orig": 0.40288040041923523, + "epoch": 0.10870659285354806, + "kl_loss": 0.18191629648208618, + "loss_ib": 0.0027921865694224834, + "step": 378 + }, + { + "ce_ib": 9.56220817565918, + "ce_orig": 0.5649886727333069, + "epoch": 0.10870659285354806, + "kl_loss": 0.17736974358558655, + "loss_ib": 0.0027299183420836926, + "step": 378 + }, + { + "ce_ib": 16.986759185791016, + "ce_orig": 1.3326983451843262, + "epoch": 0.10870659285354806, + "kl_loss": 0.27190378308296204, + "loss_ib": 0.0044177137315273285, + "step": 378 + }, + { + "ce_ib": 10.621078491210938, + "ce_orig": 0.9211146831512451, + "epoch": 0.10899417643252571, + "kl_loss": 0.1395106166601181, + "loss_ib": 0.002457214053720236, + "step": 379 + }, + { + "ce_ib": 8.39609432220459, + "ce_orig": 0.679756224155426, + "epoch": 0.10899417643252571, + "kl_loss": 0.1734372079372406, + "loss_ib": 0.0025739814154803753, + "step": 379 + }, + { + "ce_ib": 13.126282691955566, + "ce_orig": 0.5308454036712646, + "epoch": 0.10899417643252571, + "kl_loss": 0.23796755075454712, + "loss_ib": 0.0036923037841916084, + "step": 379 + }, + { + "ce_ib": 15.385143280029297, + "ce_orig": 0.9731943607330322, + "epoch": 0.10899417643252571, + "kl_loss": 0.2400083839893341, + "loss_ib": 0.003938598092645407, + "step": 379 + }, + { + "epoch": 0.10928176001150335, + "grad_norm": 0.07642538100481033, + "learning_rate": 4.9995663506714054e-05, + "loss": 0.8705, + "step": 380 + }, + { + "ce_ib": 10.140104293823242, + "ce_orig": 0.55213463306427, + "epoch": 0.10928176001150335, + "kl_loss": 0.2998710870742798, + "loss_ib": 0.004012721125036478, + "step": 380 + }, + { + "ce_ib": 12.016203880310059, + "ce_orig": 0.2663145363330841, + "epoch": 0.10928176001150335, + "kl_loss": 0.20258453488349915, + "loss_ib": 0.0032274657860398293, + "step": 380 + }, + { + "ce_ib": 10.269865989685059, + "ce_orig": 0.5478006601333618, + "epoch": 0.10928176001150335, + "kl_loss": 0.17457614839076996, + "loss_ib": 0.0027727477718144655, + "step": 380 + }, + { + "ce_ib": 10.28240966796875, + "ce_orig": 0.46482110023498535, + "epoch": 0.10928176001150335, + "kl_loss": 0.14175333082675934, + "loss_ib": 0.0024457741528749466, + "step": 380 + }, + { + "ce_ib": 18.375350952148438, + "ce_orig": 1.6469268798828125, + "epoch": 0.10956934359048098, + "kl_loss": 0.2459729164838791, + "loss_ib": 0.004297263920307159, + "step": 381 + }, + { + "ce_ib": 13.591058731079102, + "ce_orig": 0.654187798500061, + "epoch": 0.10956934359048098, + "kl_loss": 0.33038705587387085, + "loss_ib": 0.004662976134568453, + "step": 381 + }, + { + "ce_ib": 18.45250129699707, + "ce_orig": 1.5424803495407104, + "epoch": 0.10956934359048098, + "kl_loss": 0.25389549136161804, + "loss_ib": 0.004384204745292664, + "step": 381 + }, + { + "ce_ib": 15.972155570983887, + "ce_orig": 1.1866068840026855, + "epoch": 0.10956934359048098, + "kl_loss": 0.26268285512924194, + "loss_ib": 0.004224043805152178, + "step": 381 + }, + { + "ce_ib": 10.061860084533691, + "ce_orig": 0.5798574090003967, + "epoch": 0.10985692716945862, + "kl_loss": 0.18744969367980957, + "loss_ib": 0.002880682935938239, + "step": 382 + }, + { + "ce_ib": 8.527949333190918, + "ce_orig": 0.7655165195465088, + "epoch": 0.10985692716945862, + "kl_loss": 0.19700750708580017, + "loss_ib": 0.002822869922965765, + "step": 382 + }, + { + "ce_ib": 7.723175525665283, + "ce_orig": 0.46251556277275085, + "epoch": 0.10985692716945862, + "kl_loss": 0.1479792296886444, + "loss_ib": 0.0022521098144352436, + "step": 382 + }, + { + "ce_ib": 16.92078399658203, + "ce_orig": 1.5507680177688599, + "epoch": 0.10985692716945862, + "kl_loss": 0.17009945213794708, + "loss_ib": 0.0033930731005966663, + "step": 382 + }, + { + "ce_ib": 11.547307014465332, + "ce_orig": 0.9038912057876587, + "epoch": 0.11014451074843626, + "kl_loss": 0.18578889966011047, + "loss_ib": 0.003012619446963072, + "step": 383 + }, + { + "ce_ib": 7.641073226928711, + "ce_orig": 0.6663032174110413, + "epoch": 0.11014451074843626, + "kl_loss": 0.12631307542324066, + "loss_ib": 0.002027238020673394, + "step": 383 + }, + { + "ce_ib": 7.092626094818115, + "ce_orig": 0.4686228632926941, + "epoch": 0.11014451074843626, + "kl_loss": 0.1317451447248459, + "loss_ib": 0.002026714151725173, + "step": 383 + }, + { + "ce_ib": 9.421049118041992, + "ce_orig": 0.6183243989944458, + "epoch": 0.11014451074843626, + "kl_loss": 0.15724687278270721, + "loss_ib": 0.002514573512598872, + "step": 383 + }, + { + "ce_ib": 11.571142196655273, + "ce_orig": 0.4451025128364563, + "epoch": 0.11043209432741391, + "kl_loss": 0.2353937327861786, + "loss_ib": 0.0035110514145344496, + "step": 384 + }, + { + "ce_ib": 16.550886154174805, + "ce_orig": 1.2699673175811768, + "epoch": 0.11043209432741391, + "kl_loss": 0.2898581027984619, + "loss_ib": 0.004553669597953558, + "step": 384 + }, + { + "ce_ib": 12.445303916931152, + "ce_orig": 0.6089316606521606, + "epoch": 0.11043209432741391, + "kl_loss": 0.5906498432159424, + "loss_ib": 0.007151029072701931, + "step": 384 + }, + { + "ce_ib": 11.304732322692871, + "ce_orig": 0.6872734427452087, + "epoch": 0.11043209432741391, + "kl_loss": 0.132551908493042, + "loss_ib": 0.002455992391332984, + "step": 384 + }, + { + "epoch": 0.11071967790639155, + "grad_norm": 0.08736824244260788, + "learning_rate": 4.999491066884113e-05, + "loss": 0.8343, + "step": 385 + }, + { + "ce_ib": 12.851602554321289, + "ce_orig": 1.4722890853881836, + "epoch": 0.11071967790639155, + "kl_loss": 0.16855554282665253, + "loss_ib": 0.0029707157518714666, + "step": 385 + }, + { + "ce_ib": 12.488396644592285, + "ce_orig": 1.012579321861267, + "epoch": 0.11071967790639155, + "kl_loss": 0.24618947505950928, + "loss_ib": 0.0037107341922819614, + "step": 385 + }, + { + "ce_ib": 9.774558067321777, + "ce_orig": 0.6036505103111267, + "epoch": 0.11071967790639155, + "kl_loss": 0.14310193061828613, + "loss_ib": 0.0024084749165922403, + "step": 385 + }, + { + "ce_ib": 10.53403091430664, + "ce_orig": 0.8375312089920044, + "epoch": 0.11071967790639155, + "kl_loss": 0.2290131151676178, + "loss_ib": 0.00334353419020772, + "step": 385 + }, + { + "ce_ib": 13.784265518188477, + "ce_orig": 0.7486900091171265, + "epoch": 0.11100726148536919, + "kl_loss": 0.275905966758728, + "loss_ib": 0.004137486219406128, + "step": 386 + }, + { + "ce_ib": 13.386645317077637, + "ce_orig": 0.8458417654037476, + "epoch": 0.11100726148536919, + "kl_loss": 0.2864159941673279, + "loss_ib": 0.0042028240859508514, + "step": 386 + }, + { + "ce_ib": 4.843447685241699, + "ce_orig": 0.1851879358291626, + "epoch": 0.11100726148536919, + "kl_loss": 0.44297945499420166, + "loss_ib": 0.0049141389317810535, + "step": 386 + }, + { + "ce_ib": 11.102249145507812, + "ce_orig": 0.7399924397468567, + "epoch": 0.11100726148536919, + "kl_loss": 0.2577285170555115, + "loss_ib": 0.0036875098012387753, + "step": 386 + }, + { + "ce_ib": 13.290738105773926, + "ce_orig": 0.8414790034294128, + "epoch": 0.11129484506434682, + "kl_loss": 0.1596413552761078, + "loss_ib": 0.0029254870023578405, + "step": 387 + }, + { + "ce_ib": 12.116263389587402, + "ce_orig": 1.0983738899230957, + "epoch": 0.11129484506434682, + "kl_loss": 0.22985008358955383, + "loss_ib": 0.0035101270768791437, + "step": 387 + }, + { + "ce_ib": 11.536699295043945, + "ce_orig": 0.7217467427253723, + "epoch": 0.11129484506434682, + "kl_loss": 0.23231491446495056, + "loss_ib": 0.003476819023489952, + "step": 387 + }, + { + "ce_ib": 10.348511695861816, + "ce_orig": 0.9353364706039429, + "epoch": 0.11129484506434682, + "kl_loss": 0.16923439502716064, + "loss_ib": 0.0027271949220448732, + "step": 387 + }, + { + "ce_ib": 8.719440460205078, + "ce_orig": 0.6035894155502319, + "epoch": 0.11158242864332446, + "kl_loss": 0.22707051038742065, + "loss_ib": 0.0031426490750163794, + "step": 388 + }, + { + "ce_ib": 12.744837760925293, + "ce_orig": 0.7636030316352844, + "epoch": 0.11158242864332446, + "kl_loss": 0.19510197639465332, + "loss_ib": 0.0032255034893751144, + "step": 388 + }, + { + "ce_ib": 10.507633209228516, + "ce_orig": 0.5344758033752441, + "epoch": 0.11158242864332446, + "kl_loss": 0.23824182152748108, + "loss_ib": 0.0034331816714257, + "step": 388 + }, + { + "ce_ib": 13.216085433959961, + "ce_orig": 0.9561126232147217, + "epoch": 0.11158242864332446, + "kl_loss": 0.19082219898700714, + "loss_ib": 0.003229830414056778, + "step": 388 + }, + { + "ce_ib": 12.790705680847168, + "ce_orig": 0.7942649722099304, + "epoch": 0.11187001222230211, + "kl_loss": 0.19223928451538086, + "loss_ib": 0.003201463259756565, + "step": 389 + }, + { + "ce_ib": 12.605323791503906, + "ce_orig": 0.9660126566886902, + "epoch": 0.11187001222230211, + "kl_loss": 0.18265745043754578, + "loss_ib": 0.0030871068593114614, + "step": 389 + }, + { + "ce_ib": 16.678693771362305, + "ce_orig": 1.4949008226394653, + "epoch": 0.11187001222230211, + "kl_loss": 0.2443901002407074, + "loss_ib": 0.004111770074814558, + "step": 389 + }, + { + "ce_ib": 15.18307876586914, + "ce_orig": 1.5530723333358765, + "epoch": 0.11187001222230211, + "kl_loss": 0.18980354070663452, + "loss_ib": 0.0034163433592766523, + "step": 389 + }, + { + "epoch": 0.11215759580127975, + "grad_norm": 0.0806172788143158, + "learning_rate": 4.999409761242696e-05, + "loss": 0.889, + "step": 390 + }, + { + "ce_ib": 12.329959869384766, + "ce_orig": 0.6557547450065613, + "epoch": 0.11215759580127975, + "kl_loss": 0.11406560242176056, + "loss_ib": 0.0023736520670354366, + "step": 390 + }, + { + "ce_ib": 10.382856369018555, + "ce_orig": 0.36463189125061035, + "epoch": 0.11215759580127975, + "kl_loss": 0.2578916549682617, + "loss_ib": 0.0036172019317746162, + "step": 390 + }, + { + "ce_ib": 13.944049835205078, + "ce_orig": 0.8022533655166626, + "epoch": 0.11215759580127975, + "kl_loss": 0.23697403073310852, + "loss_ib": 0.0037641453091055155, + "step": 390 + }, + { + "ce_ib": 10.437244415283203, + "ce_orig": 0.6617816090583801, + "epoch": 0.11215759580127975, + "kl_loss": 0.15966740250587463, + "loss_ib": 0.0026403984520584345, + "step": 390 + }, + { + "ce_ib": 11.55241584777832, + "ce_orig": 0.7899225950241089, + "epoch": 0.11244517938025739, + "kl_loss": 0.12170088291168213, + "loss_ib": 0.0023722504265606403, + "step": 391 + }, + { + "ce_ib": 14.663440704345703, + "ce_orig": 0.9394941329956055, + "epoch": 0.11244517938025739, + "kl_loss": 0.23881687223911285, + "loss_ib": 0.00385451246984303, + "step": 391 + }, + { + "ce_ib": 9.422616004943848, + "ce_orig": 0.5803003907203674, + "epoch": 0.11244517938025739, + "kl_loss": 0.16054609417915344, + "loss_ib": 0.0025477223098278046, + "step": 391 + }, + { + "ce_ib": 11.833211898803711, + "ce_orig": 0.6175609827041626, + "epoch": 0.11244517938025739, + "kl_loss": 0.2186504304409027, + "loss_ib": 0.0033698254264891148, + "step": 391 + }, + { + "ce_ib": 11.895299911499023, + "ce_orig": 0.6896355152130127, + "epoch": 0.11273276295923502, + "kl_loss": 0.17966461181640625, + "loss_ib": 0.0029861759394407272, + "step": 392 + }, + { + "ce_ib": 11.261984825134277, + "ce_orig": 0.7158202528953552, + "epoch": 0.11273276295923502, + "kl_loss": 0.19520384073257446, + "loss_ib": 0.00307823671028018, + "step": 392 + }, + { + "ce_ib": 12.316457748413086, + "ce_orig": 0.8367967009544373, + "epoch": 0.11273276295923502, + "kl_loss": 0.2615872323513031, + "loss_ib": 0.0038475177716463804, + "step": 392 + }, + { + "ce_ib": 10.227145195007324, + "ce_orig": 0.7019678950309753, + "epoch": 0.11273276295923502, + "kl_loss": 0.21493881940841675, + "loss_ib": 0.0031721023842692375, + "step": 392 + }, + { + "ce_ib": 16.03828239440918, + "ce_orig": 1.3268945217132568, + "epoch": 0.11302034653821266, + "kl_loss": 0.23496520519256592, + "loss_ib": 0.003953480161726475, + "step": 393 + }, + { + "ce_ib": 7.363077163696289, + "ce_orig": 0.6715714335441589, + "epoch": 0.11302034653821266, + "kl_loss": 0.12172873318195343, + "loss_ib": 0.0019535948522388935, + "step": 393 + }, + { + "ce_ib": 10.327352523803711, + "ce_orig": 0.9410114884376526, + "epoch": 0.11302034653821266, + "kl_loss": 0.11792122572660446, + "loss_ib": 0.0022119474597275257, + "step": 393 + }, + { + "ce_ib": 5.172538757324219, + "ce_orig": 0.20720714330673218, + "epoch": 0.11302034653821266, + "kl_loss": 0.41349154710769653, + "loss_ib": 0.004652169067412615, + "step": 393 + }, + { + "ce_ib": 10.094765663146973, + "ce_orig": 0.6660728454589844, + "epoch": 0.11330793011719031, + "kl_loss": 0.1545972228050232, + "loss_ib": 0.0025554485619068146, + "step": 394 + }, + { + "ce_ib": 12.061307907104492, + "ce_orig": 0.5917040109634399, + "epoch": 0.11330793011719031, + "kl_loss": 0.19155195355415344, + "loss_ib": 0.003121650079265237, + "step": 394 + }, + { + "ce_ib": 13.97226333618164, + "ce_orig": 1.101852536201477, + "epoch": 0.11330793011719031, + "kl_loss": 0.19805079698562622, + "loss_ib": 0.003377734450623393, + "step": 394 + }, + { + "ce_ib": 14.349608421325684, + "ce_orig": 0.5646123886108398, + "epoch": 0.11330793011719031, + "kl_loss": 0.17135578393936157, + "loss_ib": 0.0031485187355428934, + "step": 394 + }, + { + "epoch": 0.11359551369616795, + "grad_norm": 0.08417540043592453, + "learning_rate": 4.999322433943038e-05, + "loss": 0.8409, + "step": 395 + }, + { + "ce_ib": 12.859439849853516, + "ce_orig": 0.9198188185691833, + "epoch": 0.11359551369616795, + "kl_loss": 0.1865035444498062, + "loss_ib": 0.0031509792897850275, + "step": 395 + }, + { + "ce_ib": 10.23005485534668, + "ce_orig": 0.6933926939964294, + "epoch": 0.11359551369616795, + "kl_loss": 0.14806464314460754, + "loss_ib": 0.0025036518927663565, + "step": 395 + }, + { + "ce_ib": 14.05105972290039, + "ce_orig": 0.6480773687362671, + "epoch": 0.11359551369616795, + "kl_loss": 0.2830086648464203, + "loss_ib": 0.0042351926676929, + "step": 395 + }, + { + "ce_ib": 12.827180862426758, + "ce_orig": 1.2222548723220825, + "epoch": 0.11359551369616795, + "kl_loss": 0.19166235625743866, + "loss_ib": 0.0031993414741009474, + "step": 395 + }, + { + "ce_ib": 14.012380599975586, + "ce_orig": 1.3009854555130005, + "epoch": 0.11388309727514559, + "kl_loss": 0.2228410542011261, + "loss_ib": 0.003629648592323065, + "step": 396 + }, + { + "ce_ib": 7.52554178237915, + "ce_orig": 0.6004323363304138, + "epoch": 0.11388309727514559, + "kl_loss": 0.15014877915382385, + "loss_ib": 0.002254042075946927, + "step": 396 + }, + { + "ce_ib": 11.584349632263184, + "ce_orig": 0.8162614703178406, + "epoch": 0.11388309727514559, + "kl_loss": 0.1859622299671173, + "loss_ib": 0.003018057206645608, + "step": 396 + }, + { + "ce_ib": 11.174099922180176, + "ce_orig": 0.7962226271629333, + "epoch": 0.11388309727514559, + "kl_loss": 0.16366738080978394, + "loss_ib": 0.0027540838345885277, + "step": 396 + }, + { + "ce_ib": 13.57970905303955, + "ce_orig": 1.1856755018234253, + "epoch": 0.11417068085412323, + "kl_loss": 0.272286593914032, + "loss_ib": 0.004080836661159992, + "step": 397 + }, + { + "ce_ib": 6.8682122230529785, + "ce_orig": 0.43402042984962463, + "epoch": 0.11417068085412323, + "kl_loss": 0.1285514086484909, + "loss_ib": 0.00197233515791595, + "step": 397 + }, + { + "ce_ib": 11.961787223815918, + "ce_orig": 0.782745897769928, + "epoch": 0.11417068085412323, + "kl_loss": 0.15355338156223297, + "loss_ib": 0.0027317123021930456, + "step": 397 + }, + { + "ce_ib": 10.129254341125488, + "ce_orig": 0.8172139525413513, + "epoch": 0.11417068085412323, + "kl_loss": 0.17509959638118744, + "loss_ib": 0.0027639211621135473, + "step": 397 + }, + { + "ce_ib": 8.069706916809082, + "ce_orig": 0.6624881029129028, + "epoch": 0.11445826443310086, + "kl_loss": 0.12610213458538055, + "loss_ib": 0.0020679919980466366, + "step": 398 + }, + { + "ce_ib": 7.447951793670654, + "ce_orig": 0.44954437017440796, + "epoch": 0.11445826443310086, + "kl_loss": 0.16658943891525269, + "loss_ib": 0.002410689601674676, + "step": 398 + }, + { + "ce_ib": 11.109574317932129, + "ce_orig": 0.47779056429862976, + "epoch": 0.11445826443310086, + "kl_loss": 0.23340463638305664, + "loss_ib": 0.0034450036473572254, + "step": 398 + }, + { + "ce_ib": 15.319025039672852, + "ce_orig": 1.082602620124817, + "epoch": 0.11445826443310086, + "kl_loss": 0.1651686728000641, + "loss_ib": 0.0031835888512432575, + "step": 398 + }, + { + "ce_ib": 16.39435386657715, + "ce_orig": 1.458138108253479, + "epoch": 0.11474584801207852, + "kl_loss": 0.23157645761966705, + "loss_ib": 0.003955199848860502, + "step": 399 + }, + { + "ce_ib": 12.060273170471191, + "ce_orig": 1.2219758033752441, + "epoch": 0.11474584801207852, + "kl_loss": 0.18119218945503235, + "loss_ib": 0.0030179491732269526, + "step": 399 + }, + { + "ce_ib": 9.549758911132812, + "ce_orig": 0.5686326026916504, + "epoch": 0.11474584801207852, + "kl_loss": 0.1396368145942688, + "loss_ib": 0.0023513438645750284, + "step": 399 + }, + { + "ce_ib": 12.140963554382324, + "ce_orig": 0.8329185247421265, + "epoch": 0.11474584801207852, + "kl_loss": 0.1271989345550537, + "loss_ib": 0.002486085519194603, + "step": 399 + }, + { + "epoch": 0.11503343159105615, + "grad_norm": 0.08466464281082153, + "learning_rate": 4.9992290851955325e-05, + "loss": 0.8643, + "step": 400 + }, + { + "ce_ib": 16.369003295898438, + "ce_orig": 0.8453714847564697, + "epoch": 0.11503343159105615, + "kl_loss": 0.20362722873687744, + "loss_ib": 0.0036731725558638573, + "step": 400 + }, + { + "ce_ib": 10.80587387084961, + "ce_orig": 0.9257553219795227, + "epoch": 0.11503343159105615, + "kl_loss": 0.18605493009090424, + "loss_ib": 0.0029411364812403917, + "step": 400 + }, + { + "ce_ib": 6.573936939239502, + "ce_orig": 0.6002892851829529, + "epoch": 0.11503343159105615, + "kl_loss": 0.15251712501049042, + "loss_ib": 0.0021825649309903383, + "step": 400 + }, + { + "ce_ib": 13.615230560302734, + "ce_orig": 1.3942912817001343, + "epoch": 0.11503343159105615, + "kl_loss": 0.171320840716362, + "loss_ib": 0.0030747312121093273, + "step": 400 + }, + { + "ce_ib": 12.808405876159668, + "ce_orig": 1.0415929555892944, + "epoch": 0.11532101517003379, + "kl_loss": 0.17026068270206451, + "loss_ib": 0.002983447164297104, + "step": 401 + }, + { + "ce_ib": 7.786767959594727, + "ce_orig": 0.5461778044700623, + "epoch": 0.11532101517003379, + "kl_loss": 0.1809147298336029, + "loss_ib": 0.0025878241285681725, + "step": 401 + }, + { + "ce_ib": 9.318531036376953, + "ce_orig": 1.0691779851913452, + "epoch": 0.11532101517003379, + "kl_loss": 0.12710833549499512, + "loss_ib": 0.0022029364481568336, + "step": 401 + }, + { + "ce_ib": 15.711153030395508, + "ce_orig": 1.2399132251739502, + "epoch": 0.11532101517003379, + "kl_loss": 0.20981115102767944, + "loss_ib": 0.003669226774945855, + "step": 401 + }, + { + "ce_ib": 13.523634910583496, + "ce_orig": 1.0126256942749023, + "epoch": 0.11560859874901143, + "kl_loss": 0.17583820223808289, + "loss_ib": 0.003110745456069708, + "step": 402 + }, + { + "ce_ib": 10.453843116760254, + "ce_orig": 0.7707417607307434, + "epoch": 0.11560859874901143, + "kl_loss": 0.13824975490570068, + "loss_ib": 0.0024278818164020777, + "step": 402 + }, + { + "ce_ib": 12.620675086975098, + "ce_orig": 0.9837019443511963, + "epoch": 0.11560859874901143, + "kl_loss": 0.15091584622859955, + "loss_ib": 0.0027712257578969, + "step": 402 + }, + { + "ce_ib": 14.844929695129395, + "ce_orig": 1.3407633304595947, + "epoch": 0.11560859874901143, + "kl_loss": 0.30624860525131226, + "loss_ib": 0.00454697897657752, + "step": 402 + }, + { + "ce_ib": 11.737789154052734, + "ce_orig": 1.1215554475784302, + "epoch": 0.11589618232798907, + "kl_loss": 0.16605661809444427, + "loss_ib": 0.002834344981238246, + "step": 403 + }, + { + "ce_ib": 11.818507194519043, + "ce_orig": 0.4051503837108612, + "epoch": 0.11589618232798907, + "kl_loss": 0.2137288749217987, + "loss_ib": 0.0033191393595188856, + "step": 403 + }, + { + "ce_ib": 10.569378852844238, + "ce_orig": 0.7266899347305298, + "epoch": 0.11589618232798907, + "kl_loss": 0.19015324115753174, + "loss_ib": 0.002958470256999135, + "step": 403 + }, + { + "ce_ib": 10.655888557434082, + "ce_orig": 0.640403151512146, + "epoch": 0.11589618232798907, + "kl_loss": 0.18397970497608185, + "loss_ib": 0.0029053858015686274, + "step": 403 + }, + { + "ce_ib": 10.284205436706543, + "ce_orig": 1.0149288177490234, + "epoch": 0.11618376590696672, + "kl_loss": 0.16832667589187622, + "loss_ib": 0.0027116872370243073, + "step": 404 + }, + { + "ce_ib": 13.454412460327148, + "ce_orig": 1.0919182300567627, + "epoch": 0.11618376590696672, + "kl_loss": 0.15390118956565857, + "loss_ib": 0.0028844529297202826, + "step": 404 + }, + { + "ce_ib": 8.412931442260742, + "ce_orig": 0.6086220145225525, + "epoch": 0.11618376590696672, + "kl_loss": 0.13261398673057556, + "loss_ib": 0.002167432801797986, + "step": 404 + }, + { + "ce_ib": 16.36043930053711, + "ce_orig": 1.389167308807373, + "epoch": 0.11618376590696672, + "kl_loss": 0.13541021943092346, + "loss_ib": 0.002990146167576313, + "step": 404 + }, + { + "epoch": 0.11647134948594436, + "grad_norm": 0.08868135511875153, + "learning_rate": 4.999129715225077e-05, + "loss": 0.8893, + "step": 405 + }, + { + "ce_ib": 11.492820739746094, + "ce_orig": 0.746446967124939, + "epoch": 0.11647134948594436, + "kl_loss": 0.1755758821964264, + "loss_ib": 0.002905040979385376, + "step": 405 + }, + { + "ce_ib": 14.550654411315918, + "ce_orig": 1.1409695148468018, + "epoch": 0.11647134948594436, + "kl_loss": 0.16501018404960632, + "loss_ib": 0.003105167066678405, + "step": 405 + }, + { + "ce_ib": 12.881452560424805, + "ce_orig": 1.0264896154403687, + "epoch": 0.11647134948594436, + "kl_loss": 0.18386085331439972, + "loss_ib": 0.003126753494143486, + "step": 405 + }, + { + "ce_ib": 15.17376708984375, + "ce_orig": 1.3239004611968994, + "epoch": 0.11647134948594436, + "kl_loss": 0.18682563304901123, + "loss_ib": 0.0033856327645480633, + "step": 405 + }, + { + "ce_ib": 14.622434616088867, + "ce_orig": 1.3146438598632812, + "epoch": 0.116758933064922, + "kl_loss": 0.18558424711227417, + "loss_ib": 0.003318085800856352, + "step": 406 + }, + { + "ce_ib": 8.00043773651123, + "ce_orig": 0.7650782465934753, + "epoch": 0.116758933064922, + "kl_loss": 0.14243070781230927, + "loss_ib": 0.0022243508137762547, + "step": 406 + }, + { + "ce_ib": 11.638957977294922, + "ce_orig": 0.927288830280304, + "epoch": 0.116758933064922, + "kl_loss": 0.14190661907196045, + "loss_ib": 0.0025829619262367487, + "step": 406 + }, + { + "ce_ib": 8.698803901672363, + "ce_orig": 0.4740663766860962, + "epoch": 0.116758933064922, + "kl_loss": 0.18497580289840698, + "loss_ib": 0.0027196381706744432, + "step": 406 + }, + { + "ce_ib": 11.999215126037598, + "ce_orig": 0.5628153681755066, + "epoch": 0.11704651664389963, + "kl_loss": 0.261201411485672, + "loss_ib": 0.0038119356613606215, + "step": 407 + }, + { + "ce_ib": 14.865316390991211, + "ce_orig": 1.6626498699188232, + "epoch": 0.11704651664389963, + "kl_loss": 0.17202956974506378, + "loss_ib": 0.003206827212125063, + "step": 407 + }, + { + "ce_ib": 11.803093910217285, + "ce_orig": 0.8193854093551636, + "epoch": 0.11704651664389963, + "kl_loss": 0.2457035779953003, + "loss_ib": 0.003637345042079687, + "step": 407 + }, + { + "ce_ib": 14.206315994262695, + "ce_orig": 1.1790286302566528, + "epoch": 0.11704651664389963, + "kl_loss": 0.21022199094295502, + "loss_ib": 0.0035228515043854713, + "step": 407 + }, + { + "ce_ib": 9.615798950195312, + "ce_orig": 0.5693954229354858, + "epoch": 0.11733410022287727, + "kl_loss": 0.20928806066513062, + "loss_ib": 0.003054460510611534, + "step": 408 + }, + { + "ce_ib": 10.448309898376465, + "ce_orig": 0.7434052228927612, + "epoch": 0.11733410022287727, + "kl_loss": 0.18974988162517548, + "loss_ib": 0.002942329505458474, + "step": 408 + }, + { + "ce_ib": 12.322173118591309, + "ce_orig": 0.7398363351821899, + "epoch": 0.11733410022287727, + "kl_loss": 0.32504746317863464, + "loss_ib": 0.004482691641896963, + "step": 408 + }, + { + "ce_ib": 11.393105506896973, + "ce_orig": 0.2980586290359497, + "epoch": 0.11733410022287727, + "kl_loss": 0.2762417793273926, + "loss_ib": 0.003901728196069598, + "step": 408 + }, + { + "ce_ib": 9.13760757446289, + "ce_orig": 0.48705342411994934, + "epoch": 0.11762168380185492, + "kl_loss": 0.2090642750263214, + "loss_ib": 0.003004403319209814, + "step": 409 + }, + { + "ce_ib": 13.404657363891602, + "ce_orig": 1.3054029941558838, + "epoch": 0.11762168380185492, + "kl_loss": 0.15996284782886505, + "loss_ib": 0.0029400940984487534, + "step": 409 + }, + { + "ce_ib": 14.58011245727539, + "ce_orig": 0.6604776978492737, + "epoch": 0.11762168380185492, + "kl_loss": 0.17515218257904053, + "loss_ib": 0.0032095329370349646, + "step": 409 + }, + { + "ce_ib": 12.46037769317627, + "ce_orig": 0.969291090965271, + "epoch": 0.11762168380185492, + "kl_loss": 0.12114151567220688, + "loss_ib": 0.002457452705129981, + "step": 409 + }, + { + "epoch": 0.11790926738083256, + "grad_norm": 0.08972907811403275, + "learning_rate": 4.9990243242710764e-05, + "loss": 0.8016, + "step": 410 + }, + { + "ce_ib": 14.017788887023926, + "ce_orig": 1.2842930555343628, + "epoch": 0.11790926738083256, + "kl_loss": 0.20839394629001617, + "loss_ib": 0.0034857182763516903, + "step": 410 + }, + { + "ce_ib": 12.625293731689453, + "ce_orig": 0.331617534160614, + "epoch": 0.11790926738083256, + "kl_loss": 0.23530715703964233, + "loss_ib": 0.0036156009882688522, + "step": 410 + }, + { + "ce_ib": 13.452978134155273, + "ce_orig": 0.9680318832397461, + "epoch": 0.11790926738083256, + "kl_loss": 0.17226648330688477, + "loss_ib": 0.003067962359637022, + "step": 410 + }, + { + "ce_ib": 13.609016418457031, + "ce_orig": 1.1668546199798584, + "epoch": 0.11790926738083256, + "kl_loss": 0.15326163172721863, + "loss_ib": 0.0028935179580003023, + "step": 410 + }, + { + "ce_ib": 11.401368141174316, + "ce_orig": 0.7574223279953003, + "epoch": 0.1181968509598102, + "kl_loss": 0.17012295126914978, + "loss_ib": 0.002841366222128272, + "step": 411 + }, + { + "ce_ib": 13.586700439453125, + "ce_orig": 1.1896238327026367, + "epoch": 0.1181968509598102, + "kl_loss": 0.2817670702934265, + "loss_ib": 0.004176340531557798, + "step": 411 + }, + { + "ce_ib": 16.24236488342285, + "ce_orig": 1.1911630630493164, + "epoch": 0.1181968509598102, + "kl_loss": 0.19016054272651672, + "loss_ib": 0.003525841748341918, + "step": 411 + }, + { + "ce_ib": 13.229630470275879, + "ce_orig": 0.9971056580543518, + "epoch": 0.1181968509598102, + "kl_loss": 0.1399802565574646, + "loss_ib": 0.0027227657847106457, + "step": 411 + }, + { + "ce_ib": 10.67956829071045, + "ce_orig": 0.8728124499320984, + "epoch": 0.11848443453878783, + "kl_loss": 0.15304341912269592, + "loss_ib": 0.0025983911473304033, + "step": 412 + }, + { + "ce_ib": 7.926272869110107, + "ce_orig": 0.4618622958660126, + "epoch": 0.11848443453878783, + "kl_loss": 0.08525725454092026, + "loss_ib": 0.0016451997216790915, + "step": 412 + }, + { + "ce_ib": 12.342477798461914, + "ce_orig": 0.6581653952598572, + "epoch": 0.11848443453878783, + "kl_loss": 0.20672234892845154, + "loss_ib": 0.0033014710061252117, + "step": 412 + }, + { + "ce_ib": 12.10908317565918, + "ce_orig": 0.8213714957237244, + "epoch": 0.11848443453878783, + "kl_loss": 0.23801177740097046, + "loss_ib": 0.00359102594666183, + "step": 412 + }, + { + "ce_ib": 11.018802642822266, + "ce_orig": 0.5446355938911438, + "epoch": 0.11877201811776547, + "kl_loss": 0.26025596261024475, + "loss_ib": 0.0037044398486614227, + "step": 413 + }, + { + "ce_ib": 12.422916412353516, + "ce_orig": 0.9991008043289185, + "epoch": 0.11877201811776547, + "kl_loss": 0.37930434942245483, + "loss_ib": 0.00503533473238349, + "step": 413 + }, + { + "ce_ib": 11.024622917175293, + "ce_orig": 0.8313679099082947, + "epoch": 0.11877201811776547, + "kl_loss": 0.18216568231582642, + "loss_ib": 0.002924119122326374, + "step": 413 + }, + { + "ce_ib": 12.971683502197266, + "ce_orig": 0.9980677366256714, + "epoch": 0.11877201811776547, + "kl_loss": 0.28354763984680176, + "loss_ib": 0.004132644273340702, + "step": 413 + }, + { + "ce_ib": 10.75366497039795, + "ce_orig": 1.0008810758590698, + "epoch": 0.11905960169674312, + "kl_loss": 0.21501143276691437, + "loss_ib": 0.00322548090480268, + "step": 414 + }, + { + "ce_ib": 15.019447326660156, + "ce_orig": 1.0870994329452515, + "epoch": 0.11905960169674312, + "kl_loss": 0.18550065159797668, + "loss_ib": 0.003356951056048274, + "step": 414 + }, + { + "ce_ib": 13.59741497039795, + "ce_orig": 1.1491084098815918, + "epoch": 0.11905960169674312, + "kl_loss": 0.31042391061782837, + "loss_ib": 0.004463980905711651, + "step": 414 + }, + { + "ce_ib": 11.30091667175293, + "ce_orig": 0.7725252509117126, + "epoch": 0.11905960169674312, + "kl_loss": 0.19686946272850037, + "loss_ib": 0.003098786110058427, + "step": 414 + }, + { + "epoch": 0.11934718527572076, + "grad_norm": 0.0853128507733345, + "learning_rate": 4.998912912587444e-05, + "loss": 0.8496, + "step": 415 + }, + { + "ce_ib": 10.824992179870605, + "ce_orig": 1.003801703453064, + "epoch": 0.11934718527572076, + "kl_loss": 0.11075378954410553, + "loss_ib": 0.0021900369320064783, + "step": 415 + }, + { + "ce_ib": 14.990334510803223, + "ce_orig": 1.1157439947128296, + "epoch": 0.11934718527572076, + "kl_loss": 0.18331551551818848, + "loss_ib": 0.0033321885857731104, + "step": 415 + }, + { + "ce_ib": 10.030242919921875, + "ce_orig": 0.7444831728935242, + "epoch": 0.11934718527572076, + "kl_loss": 0.2142019271850586, + "loss_ib": 0.0031450435053557158, + "step": 415 + }, + { + "ce_ib": 7.975912570953369, + "ce_orig": 0.713485062122345, + "epoch": 0.11934718527572076, + "kl_loss": 0.20920607447624207, + "loss_ib": 0.00288965180516243, + "step": 415 + }, + { + "ce_ib": 12.238396644592285, + "ce_orig": 1.1776320934295654, + "epoch": 0.1196347688546984, + "kl_loss": 0.21955502033233643, + "loss_ib": 0.0034193897154182196, + "step": 416 + }, + { + "ce_ib": 13.989873886108398, + "ce_orig": 1.128517746925354, + "epoch": 0.1196347688546984, + "kl_loss": 0.2628554701805115, + "loss_ib": 0.0040275417268276215, + "step": 416 + }, + { + "ce_ib": 15.87778091430664, + "ce_orig": 1.3535970449447632, + "epoch": 0.1196347688546984, + "kl_loss": 0.197160542011261, + "loss_ib": 0.0035593833308666945, + "step": 416 + }, + { + "ce_ib": 8.849984169006348, + "ce_orig": 0.872184157371521, + "epoch": 0.1196347688546984, + "kl_loss": 0.1935926228761673, + "loss_ib": 0.0028209243901073933, + "step": 416 + }, + { + "ce_ib": 9.946513175964355, + "ce_orig": 0.5356448292732239, + "epoch": 0.11992235243367604, + "kl_loss": 0.2691783308982849, + "loss_ib": 0.0036864345893263817, + "step": 417 + }, + { + "ce_ib": 13.770381927490234, + "ce_orig": 1.427547812461853, + "epoch": 0.11992235243367604, + "kl_loss": 0.1527450531721115, + "loss_ib": 0.002904488705098629, + "step": 417 + }, + { + "ce_ib": 12.310346603393555, + "ce_orig": 1.485551118850708, + "epoch": 0.11992235243367604, + "kl_loss": 0.1050770953297615, + "loss_ib": 0.0022818055003881454, + "step": 417 + }, + { + "ce_ib": 14.205962181091309, + "ce_orig": 1.5815967321395874, + "epoch": 0.11992235243367604, + "kl_loss": 0.26177799701690674, + "loss_ib": 0.004038376267999411, + "step": 417 + }, + { + "ce_ib": 9.17827320098877, + "ce_orig": 0.5437095165252686, + "epoch": 0.12020993601265367, + "kl_loss": 0.17721109092235565, + "loss_ib": 0.0026899382937699556, + "step": 418 + }, + { + "ce_ib": 15.857308387756348, + "ce_orig": 0.8890218138694763, + "epoch": 0.12020993601265367, + "kl_loss": 0.35342293977737427, + "loss_ib": 0.0051199602894485, + "step": 418 + }, + { + "ce_ib": 12.519335746765137, + "ce_orig": 0.40189069509506226, + "epoch": 0.12020993601265367, + "kl_loss": 0.19997818768024445, + "loss_ib": 0.0032517153304070234, + "step": 418 + }, + { + "ce_ib": 10.383864402770996, + "ce_orig": 0.9198765754699707, + "epoch": 0.12020993601265367, + "kl_loss": 0.1422748863697052, + "loss_ib": 0.0024611353874206543, + "step": 418 + }, + { + "ce_ib": 4.41956090927124, + "ce_orig": 0.15882746875286102, + "epoch": 0.12049751959163132, + "kl_loss": 0.42377138137817383, + "loss_ib": 0.004679669626057148, + "step": 419 + }, + { + "ce_ib": 8.898272514343262, + "ce_orig": 0.6479420065879822, + "epoch": 0.12049751959163132, + "kl_loss": 0.14590340852737427, + "loss_ib": 0.002348861424252391, + "step": 419 + }, + { + "ce_ib": 11.02161693572998, + "ce_orig": 0.6498445272445679, + "epoch": 0.12049751959163132, + "kl_loss": 0.18951579928398132, + "loss_ib": 0.0029973196797072887, + "step": 419 + }, + { + "ce_ib": 13.597489356994629, + "ce_orig": 1.474985122680664, + "epoch": 0.12049751959163132, + "kl_loss": 0.2165094017982483, + "loss_ib": 0.0035248426720499992, + "step": 419 + }, + { + "epoch": 0.12078510317060896, + "grad_norm": 0.09395145624876022, + "learning_rate": 4.998795480442595e-05, + "loss": 0.919, + "step": 420 + }, + { + "ce_ib": 13.930419921875, + "ce_orig": 1.4253751039505005, + "epoch": 0.12078510317060896, + "kl_loss": 0.25621849298477173, + "loss_ib": 0.003955226857215166, + "step": 420 + }, + { + "ce_ib": 13.867815017700195, + "ce_orig": 1.0413322448730469, + "epoch": 0.12078510317060896, + "kl_loss": 0.1656143069267273, + "loss_ib": 0.003042924450710416, + "step": 420 + }, + { + "ce_ib": 9.55808162689209, + "ce_orig": 0.8998080492019653, + "epoch": 0.12078510317060896, + "kl_loss": 0.2003626972436905, + "loss_ib": 0.002959434874355793, + "step": 420 + }, + { + "ce_ib": 6.539864540100098, + "ce_orig": 0.4415733814239502, + "epoch": 0.12078510317060896, + "kl_loss": 0.16704899072647095, + "loss_ib": 0.0023244761396199465, + "step": 420 + }, + { + "ce_ib": 13.084672927856445, + "ce_orig": 0.9477989077568054, + "epoch": 0.1210726867495866, + "kl_loss": 0.1585165113210678, + "loss_ib": 0.0028936322778463364, + "step": 421 + }, + { + "ce_ib": 10.002495765686035, + "ce_orig": 0.7920336127281189, + "epoch": 0.1210726867495866, + "kl_loss": 0.23921585083007812, + "loss_ib": 0.0033924079034477472, + "step": 421 + }, + { + "ce_ib": 9.44317626953125, + "ce_orig": 0.7052544355392456, + "epoch": 0.1210726867495866, + "kl_loss": 0.1775376945734024, + "loss_ib": 0.002719694282859564, + "step": 421 + }, + { + "ce_ib": 6.67018985748291, + "ce_orig": 0.35514646768569946, + "epoch": 0.1210726867495866, + "kl_loss": 0.27639925479888916, + "loss_ib": 0.0034310114569962025, + "step": 421 + }, + { + "ce_ib": 9.008115768432617, + "ce_orig": 1.0835165977478027, + "epoch": 0.12136027032856424, + "kl_loss": 0.1306457817554474, + "loss_ib": 0.002207269426435232, + "step": 422 + }, + { + "ce_ib": 9.596372604370117, + "ce_orig": 0.6315584182739258, + "epoch": 0.12136027032856424, + "kl_loss": 0.16708403825759888, + "loss_ib": 0.002630477538332343, + "step": 422 + }, + { + "ce_ib": 10.36826229095459, + "ce_orig": 0.8352252840995789, + "epoch": 0.12136027032856424, + "kl_loss": 0.4174191355705261, + "loss_ib": 0.005211017560213804, + "step": 422 + }, + { + "ce_ib": 6.275569438934326, + "ce_orig": 0.6971433758735657, + "epoch": 0.12136027032856424, + "kl_loss": 0.10864199697971344, + "loss_ib": 0.001713976846076548, + "step": 422 + }, + { + "ce_ib": 11.055356979370117, + "ce_orig": 0.711353600025177, + "epoch": 0.12164785390754188, + "kl_loss": 0.223836749792099, + "loss_ib": 0.0033439029939472675, + "step": 423 + }, + { + "ce_ib": 12.989297866821289, + "ce_orig": 0.8257631063461304, + "epoch": 0.12164785390754188, + "kl_loss": 0.21258409321308136, + "loss_ib": 0.003424770664423704, + "step": 423 + }, + { + "ce_ib": 7.487967014312744, + "ce_orig": 0.5072060227394104, + "epoch": 0.12164785390754188, + "kl_loss": 0.15735791623592377, + "loss_ib": 0.002322375774383545, + "step": 423 + }, + { + "ce_ib": 13.437426567077637, + "ce_orig": 0.9563009142875671, + "epoch": 0.12164785390754188, + "kl_loss": 0.1478041410446167, + "loss_ib": 0.0028217840008437634, + "step": 423 + }, + { + "ce_ib": 12.20171070098877, + "ce_orig": 0.7322474718093872, + "epoch": 0.12193543748651951, + "kl_loss": 0.2593429684638977, + "loss_ib": 0.003813600866124034, + "step": 424 + }, + { + "ce_ib": 10.579559326171875, + "ce_orig": 0.5674452781677246, + "epoch": 0.12193543748651951, + "kl_loss": 0.24632222950458527, + "loss_ib": 0.0035211783833801746, + "step": 424 + }, + { + "ce_ib": 12.876055717468262, + "ce_orig": 1.14524245262146, + "epoch": 0.12193543748651951, + "kl_loss": 0.21795441210269928, + "loss_ib": 0.003467149566859007, + "step": 424 + }, + { + "ce_ib": 8.761207580566406, + "ce_orig": 0.9090970754623413, + "epoch": 0.12193543748651951, + "kl_loss": 0.15418817102909088, + "loss_ib": 0.0024180023465305567, + "step": 424 + }, + { + "epoch": 0.12222302106549716, + "grad_norm": 0.07512833178043365, + "learning_rate": 4.9986720281194496e-05, + "loss": 0.8761, + "step": 425 + }, + { + "ce_ib": 7.6993608474731445, + "ce_orig": 0.40163472294807434, + "epoch": 0.12222302106549716, + "kl_loss": 0.20606671273708344, + "loss_ib": 0.0028306031599640846, + "step": 425 + }, + { + "ce_ib": 10.682936668395996, + "ce_orig": 0.752029538154602, + "epoch": 0.12222302106549716, + "kl_loss": 0.17551946640014648, + "loss_ib": 0.0028234883211553097, + "step": 425 + }, + { + "ce_ib": 10.21149730682373, + "ce_orig": 0.8434242010116577, + "epoch": 0.12222302106549716, + "kl_loss": 0.19770103693008423, + "loss_ib": 0.0029981599655002356, + "step": 425 + }, + { + "ce_ib": 9.649618148803711, + "ce_orig": 0.6008751392364502, + "epoch": 0.12222302106549716, + "kl_loss": 0.2020394206047058, + "loss_ib": 0.0029853556770831347, + "step": 425 + }, + { + "ce_ib": 11.479440689086914, + "ce_orig": 0.9273905158042908, + "epoch": 0.1225106046444748, + "kl_loss": 0.15146127343177795, + "loss_ib": 0.0026625567115843296, + "step": 426 + }, + { + "ce_ib": 7.9811859130859375, + "ce_orig": 0.25573351979255676, + "epoch": 0.1225106046444748, + "kl_loss": 0.23791244626045227, + "loss_ib": 0.0031772428192198277, + "step": 426 + }, + { + "ce_ib": 10.76065444946289, + "ce_orig": 0.8275227546691895, + "epoch": 0.1225106046444748, + "kl_loss": 0.2022540271282196, + "loss_ib": 0.0030986058991402388, + "step": 426 + }, + { + "ce_ib": 10.59325885772705, + "ce_orig": 0.6065911650657654, + "epoch": 0.1225106046444748, + "kl_loss": 0.2510051429271698, + "loss_ib": 0.003569377353414893, + "step": 426 + }, + { + "ce_ib": 9.989922523498535, + "ce_orig": 0.6457223296165466, + "epoch": 0.12279818822345244, + "kl_loss": 0.41983091831207275, + "loss_ib": 0.005197301506996155, + "step": 427 + }, + { + "ce_ib": 14.872665405273438, + "ce_orig": 1.3183645009994507, + "epoch": 0.12279818822345244, + "kl_loss": 0.25291332602500916, + "loss_ib": 0.004016399849206209, + "step": 427 + }, + { + "ce_ib": 14.173356056213379, + "ce_orig": 0.860676646232605, + "epoch": 0.12279818822345244, + "kl_loss": 0.22176355123519897, + "loss_ib": 0.0036349711008369923, + "step": 427 + }, + { + "ce_ib": 12.447295188903809, + "ce_orig": 1.0465346574783325, + "epoch": 0.12279818822345244, + "kl_loss": 0.15125788748264313, + "loss_ib": 0.00275730830617249, + "step": 427 + }, + { + "ce_ib": 11.742615699768066, + "ce_orig": 0.8324576616287231, + "epoch": 0.12308577180243008, + "kl_loss": 0.26873379945755005, + "loss_ib": 0.0038615993689745665, + "step": 428 + }, + { + "ce_ib": 15.946845054626465, + "ce_orig": 1.4635050296783447, + "epoch": 0.12308577180243008, + "kl_loss": 0.3334965407848358, + "loss_ib": 0.004929649643599987, + "step": 428 + }, + { + "ce_ib": 11.777470588684082, + "ce_orig": 0.8487641215324402, + "epoch": 0.12308577180243008, + "kl_loss": 0.18009008467197418, + "loss_ib": 0.0029786478262394667, + "step": 428 + }, + { + "ce_ib": 15.157903671264648, + "ce_orig": 1.4350470304489136, + "epoch": 0.12308577180243008, + "kl_loss": 0.16165342926979065, + "loss_ib": 0.003132324665784836, + "step": 428 + }, + { + "ce_ib": 6.574849605560303, + "ce_orig": 0.35640019178390503, + "epoch": 0.12337335538140771, + "kl_loss": 0.16976439952850342, + "loss_ib": 0.0023551289923489094, + "step": 429 + }, + { + "ce_ib": 11.606804847717285, + "ce_orig": 0.7963778972625732, + "epoch": 0.12337335538140771, + "kl_loss": 0.17389288544654846, + "loss_ib": 0.002899609040468931, + "step": 429 + }, + { + "ce_ib": 11.487293243408203, + "ce_orig": 1.072171926498413, + "epoch": 0.12337335538140771, + "kl_loss": 0.19142760336399078, + "loss_ib": 0.0030630051624029875, + "step": 429 + }, + { + "ce_ib": 7.667901992797852, + "ce_orig": 0.5443602204322815, + "epoch": 0.12337335538140771, + "kl_loss": 0.1836308240890503, + "loss_ib": 0.0026030982844531536, + "step": 429 + }, + { + "epoch": 0.12366093896038537, + "grad_norm": 0.08391211926937103, + "learning_rate": 4.998542555915435e-05, + "loss": 0.9167, + "step": 430 + }, + { + "ce_ib": 11.157709121704102, + "ce_orig": 0.8726559281349182, + "epoch": 0.12366093896038537, + "kl_loss": 0.19528821110725403, + "loss_ib": 0.0030686529353260994, + "step": 430 + }, + { + "ce_ib": 12.455047607421875, + "ce_orig": 1.3305251598358154, + "epoch": 0.12366093896038537, + "kl_loss": 0.11633329838514328, + "loss_ib": 0.002408837666735053, + "step": 430 + }, + { + "ce_ib": 17.702678680419922, + "ce_orig": 1.9683383703231812, + "epoch": 0.12366093896038537, + "kl_loss": 0.21032744646072388, + "loss_ib": 0.003873542184010148, + "step": 430 + }, + { + "ce_ib": 11.364348411560059, + "ce_orig": 1.2615947723388672, + "epoch": 0.12366093896038537, + "kl_loss": 0.15659835934638977, + "loss_ib": 0.0027024184819310904, + "step": 430 + }, + { + "ce_ib": 10.107429504394531, + "ce_orig": 0.8148799538612366, + "epoch": 0.123948522539363, + "kl_loss": 0.13077875971794128, + "loss_ib": 0.0023185305763036013, + "step": 431 + }, + { + "ce_ib": 11.587873458862305, + "ce_orig": 1.074462890625, + "epoch": 0.123948522539363, + "kl_loss": 0.24695566296577454, + "loss_ib": 0.0036283438093960285, + "step": 431 + }, + { + "ce_ib": 10.614514350891113, + "ce_orig": 0.9438521862030029, + "epoch": 0.123948522539363, + "kl_loss": 0.2546341121196747, + "loss_ib": 0.0036077925469726324, + "step": 431 + }, + { + "ce_ib": 10.189565658569336, + "ce_orig": 0.468057245016098, + "epoch": 0.123948522539363, + "kl_loss": 0.21918027102947235, + "loss_ib": 0.003210759023204446, + "step": 431 + }, + { + "ce_ib": 16.234272003173828, + "ce_orig": 1.4394607543945312, + "epoch": 0.12423610611834064, + "kl_loss": 0.16391563415527344, + "loss_ib": 0.0032625836320221424, + "step": 432 + }, + { + "ce_ib": 13.389922142028809, + "ce_orig": 1.0571950674057007, + "epoch": 0.12423610611834064, + "kl_loss": 0.25300243496894836, + "loss_ib": 0.003869016421958804, + "step": 432 + }, + { + "ce_ib": 11.05131721496582, + "ce_orig": 0.8905650973320007, + "epoch": 0.12423610611834064, + "kl_loss": 0.1906907856464386, + "loss_ib": 0.0030120396986603737, + "step": 432 + }, + { + "ce_ib": 8.993093490600586, + "ce_orig": 0.7904607653617859, + "epoch": 0.12423610611834064, + "kl_loss": 0.18500834703445435, + "loss_ib": 0.002749392529949546, + "step": 432 + }, + { + "ce_ib": 13.643828392028809, + "ce_orig": 0.8764438629150391, + "epoch": 0.12452368969731828, + "kl_loss": 0.25997138023376465, + "loss_ib": 0.003964096307754517, + "step": 433 + }, + { + "ce_ib": 6.452097415924072, + "ce_orig": 0.5795682072639465, + "epoch": 0.12452368969731828, + "kl_loss": 0.1325344741344452, + "loss_ib": 0.001970554469153285, + "step": 433 + }, + { + "ce_ib": 10.294032096862793, + "ce_orig": 0.34366491436958313, + "epoch": 0.12452368969731828, + "kl_loss": 0.12770652770996094, + "loss_ib": 0.0023064683191478252, + "step": 433 + }, + { + "ce_ib": 8.313075065612793, + "ce_orig": 0.6022379398345947, + "epoch": 0.12452368969731828, + "kl_loss": 0.11339238286018372, + "loss_ib": 0.0019652312621474266, + "step": 433 + }, + { + "ce_ib": 14.330761909484863, + "ce_orig": 0.8713304996490479, + "epoch": 0.12481127327629592, + "kl_loss": 0.22767247259616852, + "loss_ib": 0.003709800774231553, + "step": 434 + }, + { + "ce_ib": 9.519655227661133, + "ce_orig": 0.5508330464363098, + "epoch": 0.12481127327629592, + "kl_loss": 0.1265774965286255, + "loss_ib": 0.002217740286141634, + "step": 434 + }, + { + "ce_ib": 12.286112785339355, + "ce_orig": 0.8811700344085693, + "epoch": 0.12481127327629592, + "kl_loss": 0.17079584300518036, + "loss_ib": 0.0029365697409957647, + "step": 434 + }, + { + "ce_ib": 8.997398376464844, + "ce_orig": 0.48554953932762146, + "epoch": 0.12481127327629592, + "kl_loss": 0.13161113858222961, + "loss_ib": 0.0022158510982990265, + "step": 434 + }, + { + "epoch": 0.12509885685527355, + "grad_norm": 0.08008844405412674, + "learning_rate": 4.99840706414248e-05, + "loss": 0.8499, + "step": 435 + }, + { + "ce_ib": 12.028959274291992, + "ce_orig": 0.7656774520874023, + "epoch": 0.12509885685527355, + "kl_loss": 0.18438570201396942, + "loss_ib": 0.0030467526521533728, + "step": 435 + }, + { + "ce_ib": 9.531744003295898, + "ce_orig": 0.7000839710235596, + "epoch": 0.12509885685527355, + "kl_loss": 0.1339460015296936, + "loss_ib": 0.002292634453624487, + "step": 435 + }, + { + "ce_ib": 7.86661958694458, + "ce_orig": 0.47849422693252563, + "epoch": 0.12509885685527355, + "kl_loss": 0.20920854806900024, + "loss_ib": 0.0028787474147975445, + "step": 435 + }, + { + "ce_ib": 10.296324729919434, + "ce_orig": 1.1329785585403442, + "epoch": 0.12509885685527355, + "kl_loss": 0.17465677857398987, + "loss_ib": 0.002776200184598565, + "step": 435 + }, + { + "ce_ib": 8.947026252746582, + "ce_orig": 0.9858295321464539, + "epoch": 0.1253864404342512, + "kl_loss": 0.22594591975212097, + "loss_ib": 0.0031541618518531322, + "step": 436 + }, + { + "ce_ib": 10.008584022521973, + "ce_orig": 0.7601190805435181, + "epoch": 0.1253864404342512, + "kl_loss": 0.18392398953437805, + "loss_ib": 0.0028400979936122894, + "step": 436 + }, + { + "ce_ib": 18.1694393157959, + "ce_orig": 1.7608329057693481, + "epoch": 0.1253864404342512, + "kl_loss": 0.3656727969646454, + "loss_ib": 0.005473671946674585, + "step": 436 + }, + { + "ce_ib": 10.617599487304688, + "ce_orig": 0.7405425906181335, + "epoch": 0.1253864404342512, + "kl_loss": 0.23880283534526825, + "loss_ib": 0.003449788084253669, + "step": 436 + }, + { + "ce_ib": 7.292166233062744, + "ce_orig": 0.321105033159256, + "epoch": 0.12567402401322886, + "kl_loss": 0.573823094367981, + "loss_ib": 0.006467447150498629, + "step": 437 + }, + { + "ce_ib": 12.345170021057129, + "ce_orig": 1.1237767934799194, + "epoch": 0.12567402401322886, + "kl_loss": 0.12964360415935516, + "loss_ib": 0.0025309529155492783, + "step": 437 + }, + { + "ce_ib": 9.116506576538086, + "ce_orig": 0.351901113986969, + "epoch": 0.12567402401322886, + "kl_loss": 0.1449497938156128, + "loss_ib": 0.0023611485958099365, + "step": 437 + }, + { + "ce_ib": 9.928841590881348, + "ce_orig": 0.5612432956695557, + "epoch": 0.12567402401322886, + "kl_loss": 0.17729271948337555, + "loss_ib": 0.0027658112812787294, + "step": 437 + }, + { + "ce_ib": 8.56289005279541, + "ce_orig": 0.5716758966445923, + "epoch": 0.12596160759220648, + "kl_loss": 0.1342535763978958, + "loss_ib": 0.0021988246589899063, + "step": 438 + }, + { + "ce_ib": 13.346972465515137, + "ce_orig": 0.7096478939056396, + "epoch": 0.12596160759220648, + "kl_loss": 0.1945207715034485, + "loss_ib": 0.0032799046020954847, + "step": 438 + }, + { + "ce_ib": 5.58247709274292, + "ce_orig": 0.38993921875953674, + "epoch": 0.12596160759220648, + "kl_loss": 0.3592712879180908, + "loss_ib": 0.004150960128754377, + "step": 438 + }, + { + "ce_ib": 10.52940845489502, + "ce_orig": 0.3933676481246948, + "epoch": 0.12596160759220648, + "kl_loss": 0.13906532526016235, + "loss_ib": 0.0024435939267277718, + "step": 438 + }, + { + "ce_ib": 10.115212440490723, + "ce_orig": 0.6660525798797607, + "epoch": 0.12624919117118413, + "kl_loss": 0.28316277265548706, + "loss_ib": 0.003843148937448859, + "step": 439 + }, + { + "ce_ib": 11.30185317993164, + "ce_orig": 0.8098467588424683, + "epoch": 0.12624919117118413, + "kl_loss": 0.16112932562828064, + "loss_ib": 0.002741478616371751, + "step": 439 + }, + { + "ce_ib": 7.795021057128906, + "ce_orig": 0.6182560324668884, + "epoch": 0.12624919117118413, + "kl_loss": 0.3065950274467468, + "loss_ib": 0.003845452331006527, + "step": 439 + }, + { + "ce_ib": 13.468255996704102, + "ce_orig": 0.9566720128059387, + "epoch": 0.12624919117118413, + "kl_loss": 0.2001672387123108, + "loss_ib": 0.003348497673869133, + "step": 439 + }, + { + "epoch": 0.12653677475016176, + "grad_norm": 0.08590603619813919, + "learning_rate": 4.998265553127013e-05, + "loss": 0.8382, + "step": 440 + }, + { + "ce_ib": 11.135417938232422, + "ce_orig": 0.9260854125022888, + "epoch": 0.12653677475016176, + "kl_loss": 0.15384265780448914, + "loss_ib": 0.0026519682724028826, + "step": 440 + }, + { + "ce_ib": 11.6417818069458, + "ce_orig": 0.7246519923210144, + "epoch": 0.12653677475016176, + "kl_loss": 0.18312156200408936, + "loss_ib": 0.002995393704622984, + "step": 440 + }, + { + "ce_ib": 11.070989608764648, + "ce_orig": 1.1319186687469482, + "epoch": 0.12653677475016176, + "kl_loss": 0.18624573945999146, + "loss_ib": 0.002969556488096714, + "step": 440 + }, + { + "ce_ib": 15.00080394744873, + "ce_orig": 1.5474122762680054, + "epoch": 0.12653677475016176, + "kl_loss": 0.17769742012023926, + "loss_ib": 0.003277054289355874, + "step": 440 + }, + { + "ce_ib": 12.565924644470215, + "ce_orig": 0.9930309057235718, + "epoch": 0.1268243583291394, + "kl_loss": 0.22812071442604065, + "loss_ib": 0.003537799697369337, + "step": 441 + }, + { + "ce_ib": 8.496469497680664, + "ce_orig": 0.7013565301895142, + "epoch": 0.1268243583291394, + "kl_loss": 0.1900731325149536, + "loss_ib": 0.0027503781020641327, + "step": 441 + }, + { + "ce_ib": 9.66351318359375, + "ce_orig": 0.984688401222229, + "epoch": 0.1268243583291394, + "kl_loss": 0.11712448298931122, + "loss_ib": 0.00213759602047503, + "step": 441 + }, + { + "ce_ib": 11.531122207641602, + "ce_orig": 0.8578464388847351, + "epoch": 0.1268243583291394, + "kl_loss": 0.22207866609096527, + "loss_ib": 0.003373898798599839, + "step": 441 + }, + { + "ce_ib": 11.552083015441895, + "ce_orig": 1.012474775314331, + "epoch": 0.12711194190811706, + "kl_loss": 0.19562029838562012, + "loss_ib": 0.003111411351710558, + "step": 442 + }, + { + "ce_ib": 8.36318588256836, + "ce_orig": 0.7492285966873169, + "epoch": 0.12711194190811706, + "kl_loss": 0.16260814666748047, + "loss_ib": 0.0024623998906463385, + "step": 442 + }, + { + "ce_ib": 9.527094841003418, + "ce_orig": 0.7202103137969971, + "epoch": 0.12711194190811706, + "kl_loss": 0.128434419631958, + "loss_ib": 0.0022370535880327225, + "step": 442 + }, + { + "ce_ib": 11.373018264770508, + "ce_orig": 0.8286006450653076, + "epoch": 0.12711194190811706, + "kl_loss": 0.22360824048519135, + "loss_ib": 0.0033733840100467205, + "step": 442 + }, + { + "ce_ib": 12.500737190246582, + "ce_orig": 1.1852082014083862, + "epoch": 0.12739952548709468, + "kl_loss": 0.17222145199775696, + "loss_ib": 0.0029722880572080612, + "step": 443 + }, + { + "ce_ib": 14.716822624206543, + "ce_orig": 1.8037943840026855, + "epoch": 0.12739952548709468, + "kl_loss": 0.22250494360923767, + "loss_ib": 0.003696731524541974, + "step": 443 + }, + { + "ce_ib": 13.993595123291016, + "ce_orig": 1.002560019493103, + "epoch": 0.12739952548709468, + "kl_loss": 0.1844272017478943, + "loss_ib": 0.0032436316832900047, + "step": 443 + }, + { + "ce_ib": 9.316429138183594, + "ce_orig": 0.4433915317058563, + "epoch": 0.12739952548709468, + "kl_loss": 0.3312546908855438, + "loss_ib": 0.0042441897094249725, + "step": 443 + }, + { + "ce_ib": 10.669767379760742, + "ce_orig": 0.8024405837059021, + "epoch": 0.12768710906607234, + "kl_loss": 0.18377812206745148, + "loss_ib": 0.002904757857322693, + "step": 444 + }, + { + "ce_ib": 8.749341011047363, + "ce_orig": 0.44320985674858093, + "epoch": 0.12768710906607234, + "kl_loss": 0.2103624790906906, + "loss_ib": 0.002978558884933591, + "step": 444 + }, + { + "ce_ib": 6.491762161254883, + "ce_orig": 0.7110791802406311, + "epoch": 0.12768710906607234, + "kl_loss": 0.12732303142547607, + "loss_ib": 0.0019224064890295267, + "step": 444 + }, + { + "ce_ib": 14.327024459838867, + "ce_orig": 1.2938189506530762, + "epoch": 0.12768710906607234, + "kl_loss": 0.1754641830921173, + "loss_ib": 0.003187343943864107, + "step": 444 + }, + { + "epoch": 0.12797469264504996, + "grad_norm": 0.07503530383110046, + "learning_rate": 4.99811802320997e-05, + "loss": 0.8966, + "step": 445 + }, + { + "ce_ib": 12.345433235168457, + "ce_orig": 0.8386138677597046, + "epoch": 0.12797469264504996, + "kl_loss": 0.31317782402038574, + "loss_ib": 0.004366321489214897, + "step": 445 + }, + { + "ce_ib": 11.949541091918945, + "ce_orig": 0.5236607193946838, + "epoch": 0.12797469264504996, + "kl_loss": 0.19711901247501373, + "loss_ib": 0.0031661440152674913, + "step": 445 + }, + { + "ce_ib": 10.233663558959961, + "ce_orig": 0.6846522688865662, + "epoch": 0.12797469264504996, + "kl_loss": 0.16328191757202148, + "loss_ib": 0.002656185533851385, + "step": 445 + }, + { + "ce_ib": 8.310503959655762, + "ce_orig": 0.7847996950149536, + "epoch": 0.12797469264504996, + "kl_loss": 0.137738898396492, + "loss_ib": 0.002208439400419593, + "step": 445 + }, + { + "ce_ib": 6.889098167419434, + "ce_orig": 0.5118075609207153, + "epoch": 0.1282622762240276, + "kl_loss": 0.16013775765895844, + "loss_ib": 0.0022902872879058123, + "step": 446 + }, + { + "ce_ib": 14.948297500610352, + "ce_orig": 1.007593035697937, + "epoch": 0.1282622762240276, + "kl_loss": 0.17850220203399658, + "loss_ib": 0.0032798515167087317, + "step": 446 + }, + { + "ce_ib": 10.017216682434082, + "ce_orig": 1.0469541549682617, + "epoch": 0.1282622762240276, + "kl_loss": 0.15087240934371948, + "loss_ib": 0.0025104456581175327, + "step": 446 + }, + { + "ce_ib": 9.104086875915527, + "ce_orig": 0.5955268144607544, + "epoch": 0.1282622762240276, + "kl_loss": 0.2088235318660736, + "loss_ib": 0.002998644020408392, + "step": 446 + }, + { + "ce_ib": 7.966549396514893, + "ce_orig": 0.7193945646286011, + "epoch": 0.12854985980300526, + "kl_loss": 0.17041301727294922, + "loss_ib": 0.0025007850490510464, + "step": 447 + }, + { + "ce_ib": 12.620382308959961, + "ce_orig": 0.6646131873130798, + "epoch": 0.12854985980300526, + "kl_loss": 0.21500855684280396, + "loss_ib": 0.0034121237695217133, + "step": 447 + }, + { + "ce_ib": 5.8810319900512695, + "ce_orig": 0.6042389869689941, + "epoch": 0.12854985980300526, + "kl_loss": 0.16328245401382446, + "loss_ib": 0.0022209277376532555, + "step": 447 + }, + { + "ce_ib": 13.061161994934082, + "ce_orig": 0.9521631598472595, + "epoch": 0.12854985980300526, + "kl_loss": 0.17168429493904114, + "loss_ib": 0.0030229592230170965, + "step": 447 + }, + { + "ce_ib": 8.089184761047363, + "ce_orig": 0.7481786608695984, + "epoch": 0.12883744338198289, + "kl_loss": 0.15286147594451904, + "loss_ib": 0.0023375332821160555, + "step": 448 + }, + { + "ce_ib": 7.247897624969482, + "ce_orig": 0.7153653502464294, + "epoch": 0.12883744338198289, + "kl_loss": 0.16917859017848969, + "loss_ib": 0.0024165755603462458, + "step": 448 + }, + { + "ce_ib": 9.982969284057617, + "ce_orig": 0.8300044536590576, + "epoch": 0.12883744338198289, + "kl_loss": 0.1288834512233734, + "loss_ib": 0.002287131268531084, + "step": 448 + }, + { + "ce_ib": 12.154969215393066, + "ce_orig": 0.8374365568161011, + "epoch": 0.12883744338198289, + "kl_loss": 0.2313854843378067, + "loss_ib": 0.003529351670295, + "step": 448 + }, + { + "ce_ib": 7.137256622314453, + "ce_orig": 0.24174675345420837, + "epoch": 0.12912502696096054, + "kl_loss": 0.4337605834007263, + "loss_ib": 0.005051331594586372, + "step": 449 + }, + { + "ce_ib": 13.23520565032959, + "ce_orig": 0.9527087807655334, + "epoch": 0.12912502696096054, + "kl_loss": 0.14814800024032593, + "loss_ib": 0.002805000403895974, + "step": 449 + }, + { + "ce_ib": 9.36728572845459, + "ce_orig": 0.4500534236431122, + "epoch": 0.12912502696096054, + "kl_loss": 0.12568199634552002, + "loss_ib": 0.002193548483774066, + "step": 449 + }, + { + "ce_ib": 8.504453659057617, + "ce_orig": 0.9163048267364502, + "epoch": 0.12912502696096054, + "kl_loss": 0.15279194712638855, + "loss_ib": 0.0023783647920936346, + "step": 449 + }, + { + "epoch": 0.12941261053993816, + "grad_norm": 0.09120236337184906, + "learning_rate": 4.9979644747467835e-05, + "loss": 0.8569, + "step": 450 + }, + { + "ce_ib": 12.404295921325684, + "ce_orig": 0.7500115036964417, + "epoch": 0.12941261053993816, + "kl_loss": 0.18148070573806763, + "loss_ib": 0.0030552365351468325, + "step": 450 + }, + { + "ce_ib": 9.151787757873535, + "ce_orig": 0.4986724555492401, + "epoch": 0.12941261053993816, + "kl_loss": 0.1752292811870575, + "loss_ib": 0.0026674713008105755, + "step": 450 + }, + { + "ce_ib": 8.871505737304688, + "ce_orig": 0.6981923580169678, + "epoch": 0.12941261053993816, + "kl_loss": 0.14069266617298126, + "loss_ib": 0.00229407730512321, + "step": 450 + }, + { + "ce_ib": 8.211197853088379, + "ce_orig": 0.7622634172439575, + "epoch": 0.12941261053993816, + "kl_loss": 0.13836929202079773, + "loss_ib": 0.00220481283031404, + "step": 450 + }, + { + "ce_ib": 7.645545482635498, + "ce_orig": 0.5680274963378906, + "epoch": 0.1297001941189158, + "kl_loss": 0.18493801355361938, + "loss_ib": 0.0026139344554394484, + "step": 451 + }, + { + "ce_ib": 9.379469871520996, + "ce_orig": 0.5945746302604675, + "epoch": 0.1297001941189158, + "kl_loss": 0.15481063723564148, + "loss_ib": 0.0024860533885657787, + "step": 451 + }, + { + "ce_ib": 13.473487854003906, + "ce_orig": 0.9739643931388855, + "epoch": 0.1297001941189158, + "kl_loss": 0.2487793266773224, + "loss_ib": 0.0038351418916136026, + "step": 451 + }, + { + "ce_ib": 9.095867156982422, + "ce_orig": 0.7306302785873413, + "epoch": 0.1297001941189158, + "kl_loss": 0.1404043734073639, + "loss_ib": 0.002313630422577262, + "step": 451 + }, + { + "ce_ib": 8.908297538757324, + "ce_orig": 0.548682451248169, + "epoch": 0.12998777769789346, + "kl_loss": 0.18304413557052612, + "loss_ib": 0.002721270779147744, + "step": 452 + }, + { + "ce_ib": 12.341330528259277, + "ce_orig": 1.0068693161010742, + "epoch": 0.12998777769789346, + "kl_loss": 0.21775811910629272, + "loss_ib": 0.0034117139875888824, + "step": 452 + }, + { + "ce_ib": 7.933249473571777, + "ce_orig": 0.6957258582115173, + "epoch": 0.12998777769789346, + "kl_loss": 0.12882256507873535, + "loss_ib": 0.002081550657749176, + "step": 452 + }, + { + "ce_ib": 11.709693908691406, + "ce_orig": 0.7259184718132019, + "epoch": 0.12998777769789346, + "kl_loss": 0.20683181285858154, + "loss_ib": 0.003239287296310067, + "step": 452 + }, + { + "ce_ib": 9.15219783782959, + "ce_orig": 0.521969199180603, + "epoch": 0.1302753612768711, + "kl_loss": 0.16149017214775085, + "loss_ib": 0.0025301214773207903, + "step": 453 + }, + { + "ce_ib": 10.897204399108887, + "ce_orig": 0.9679385423660278, + "epoch": 0.1302753612768711, + "kl_loss": 0.1327183097600937, + "loss_ib": 0.0024169033858925104, + "step": 453 + }, + { + "ce_ib": 11.597270965576172, + "ce_orig": 0.9538221955299377, + "epoch": 0.1302753612768711, + "kl_loss": 0.13329055905342102, + "loss_ib": 0.002492632484063506, + "step": 453 + }, + { + "ce_ib": 14.650552749633789, + "ce_orig": 0.9307949542999268, + "epoch": 0.1302753612768711, + "kl_loss": 0.3576693534851074, + "loss_ib": 0.005041748750954866, + "step": 453 + }, + { + "ce_ib": 9.383811950683594, + "ce_orig": 0.7557501196861267, + "epoch": 0.13056294485584874, + "kl_loss": 0.1699744164943695, + "loss_ib": 0.0026381253264844418, + "step": 454 + }, + { + "ce_ib": 16.616901397705078, + "ce_orig": 1.1251425743103027, + "epoch": 0.13056294485584874, + "kl_loss": 0.260105699300766, + "loss_ib": 0.0042627472430467606, + "step": 454 + }, + { + "ce_ib": 9.158967018127441, + "ce_orig": 0.602447509765625, + "epoch": 0.13056294485584874, + "kl_loss": 0.12641242146492004, + "loss_ib": 0.0021800207905471325, + "step": 454 + }, + { + "ce_ib": 12.393420219421387, + "ce_orig": 1.04123055934906, + "epoch": 0.13056294485584874, + "kl_loss": 0.21238833665847778, + "loss_ib": 0.0033632253762334585, + "step": 454 + }, + { + "epoch": 0.13085052843482636, + "grad_norm": 0.09227544069290161, + "learning_rate": 4.997804908107387e-05, + "loss": 0.8765, + "step": 455 + }, + { + "ce_ib": 9.478934288024902, + "ce_orig": 0.8604238033294678, + "epoch": 0.13085052843482636, + "kl_loss": 0.09945785254240036, + "loss_ib": 0.0019424718338996172, + "step": 455 + }, + { + "ce_ib": 7.167737007141113, + "ce_orig": 0.7395135164260864, + "epoch": 0.13085052843482636, + "kl_loss": 0.18323373794555664, + "loss_ib": 0.0025491111446172, + "step": 455 + }, + { + "ce_ib": 9.015175819396973, + "ce_orig": 0.5233743786811829, + "epoch": 0.13085052843482636, + "kl_loss": 0.22085565328598022, + "loss_ib": 0.00311007397249341, + "step": 455 + }, + { + "ce_ib": 16.64781951904297, + "ce_orig": 1.744786024093628, + "epoch": 0.13085052843482636, + "kl_loss": 0.17802797257900238, + "loss_ib": 0.0034450613893568516, + "step": 455 + }, + { + "ce_ib": 10.087674140930176, + "ce_orig": 0.6486227512359619, + "epoch": 0.13113811201380401, + "kl_loss": 0.23665431141853333, + "loss_ib": 0.0033753104507923126, + "step": 456 + }, + { + "ce_ib": 12.518637657165527, + "ce_orig": 1.2168270349502563, + "epoch": 0.13113811201380401, + "kl_loss": 0.15110589563846588, + "loss_ib": 0.002762922551482916, + "step": 456 + }, + { + "ce_ib": 13.409689903259277, + "ce_orig": 1.1832727193832397, + "epoch": 0.13113811201380401, + "kl_loss": 0.14149300754070282, + "loss_ib": 0.0027558987494558096, + "step": 456 + }, + { + "ce_ib": 7.571767807006836, + "ce_orig": 0.4468522071838379, + "epoch": 0.13113811201380401, + "kl_loss": 0.2032560110092163, + "loss_ib": 0.002789736958220601, + "step": 456 + }, + { + "ce_ib": 16.250171661376953, + "ce_orig": 1.4707938432693481, + "epoch": 0.13142569559278164, + "kl_loss": 0.15541328489780426, + "loss_ib": 0.0031791499350219965, + "step": 457 + }, + { + "ce_ib": 10.158995628356934, + "ce_orig": 0.7189385890960693, + "epoch": 0.13142569559278164, + "kl_loss": 0.13689836859703064, + "loss_ib": 0.0023848831187933683, + "step": 457 + }, + { + "ce_ib": 13.75023078918457, + "ce_orig": 0.9671751856803894, + "epoch": 0.13142569559278164, + "kl_loss": 0.20036864280700684, + "loss_ib": 0.0033787095453590155, + "step": 457 + }, + { + "ce_ib": 12.164261817932129, + "ce_orig": 0.6848416328430176, + "epoch": 0.13142569559278164, + "kl_loss": 0.18254505097866058, + "loss_ib": 0.0030418764799833298, + "step": 457 + }, + { + "ce_ib": 9.728108406066895, + "ce_orig": 0.6200440526008606, + "epoch": 0.1317132791717593, + "kl_loss": 0.15019859373569489, + "loss_ib": 0.002474796725437045, + "step": 458 + }, + { + "ce_ib": 13.638520240783691, + "ce_orig": 1.041512370109558, + "epoch": 0.1317132791717593, + "kl_loss": 0.19027158617973328, + "loss_ib": 0.0032665678299963474, + "step": 458 + }, + { + "ce_ib": 12.803059577941895, + "ce_orig": 0.5324247479438782, + "epoch": 0.1317132791717593, + "kl_loss": 0.18024376034736633, + "loss_ib": 0.003082743613049388, + "step": 458 + }, + { + "ce_ib": 10.689647674560547, + "ce_orig": 0.8073751926422119, + "epoch": 0.1317132791717593, + "kl_loss": 0.12346737831830978, + "loss_ib": 0.002303638495504856, + "step": 458 + }, + { + "ce_ib": 5.675622463226318, + "ce_orig": 0.3405647575855255, + "epoch": 0.13200086275073694, + "kl_loss": 0.31278595328330994, + "loss_ib": 0.0036954216193407774, + "step": 459 + }, + { + "ce_ib": 15.82054615020752, + "ce_orig": 1.3749570846557617, + "epoch": 0.13200086275073694, + "kl_loss": 0.21644842624664307, + "loss_ib": 0.00374653865583241, + "step": 459 + }, + { + "ce_ib": 10.457240104675293, + "ce_orig": 0.5686371326446533, + "epoch": 0.13200086275073694, + "kl_loss": 0.19664248824119568, + "loss_ib": 0.0030121486634016037, + "step": 459 + }, + { + "ce_ib": 9.230670928955078, + "ce_orig": 0.669769823551178, + "epoch": 0.13200086275073694, + "kl_loss": 0.15697245299816132, + "loss_ib": 0.0024927917402237654, + "step": 459 + }, + { + "epoch": 0.13228844632971457, + "grad_norm": 0.08651373535394669, + "learning_rate": 4.997639323676214e-05, + "loss": 0.7999, + "step": 460 + }, + { + "ce_ib": 10.430649757385254, + "ce_orig": 0.5384361147880554, + "epoch": 0.13228844632971457, + "kl_loss": 0.240619957447052, + "loss_ib": 0.0034492644481360912, + "step": 460 + }, + { + "ce_ib": 11.414108276367188, + "ce_orig": 0.2244451642036438, + "epoch": 0.13228844632971457, + "kl_loss": 0.333604633808136, + "loss_ib": 0.004477457143366337, + "step": 460 + }, + { + "ce_ib": 8.57000732421875, + "ce_orig": 0.6421215534210205, + "epoch": 0.13228844632971457, + "kl_loss": 0.11957961320877075, + "loss_ib": 0.002052796771749854, + "step": 460 + }, + { + "ce_ib": 9.772636413574219, + "ce_orig": 0.7227917909622192, + "epoch": 0.13228844632971457, + "kl_loss": 0.1671256273984909, + "loss_ib": 0.002648519817739725, + "step": 460 + }, + { + "ce_ib": 6.091513156890869, + "ce_orig": 0.5446451902389526, + "epoch": 0.13257602990869222, + "kl_loss": 0.15295615792274475, + "loss_ib": 0.0021387129090726376, + "step": 461 + }, + { + "ce_ib": 10.357866287231445, + "ce_orig": 0.7494820356369019, + "epoch": 0.13257602990869222, + "kl_loss": 0.16398490965366364, + "loss_ib": 0.002675635740160942, + "step": 461 + }, + { + "ce_ib": 9.155532836914062, + "ce_orig": 0.6777340173721313, + "epoch": 0.13257602990869222, + "kl_loss": 0.16127201914787292, + "loss_ib": 0.0025282735005021095, + "step": 461 + }, + { + "ce_ib": 10.313591003417969, + "ce_orig": 0.7135111689567566, + "epoch": 0.13257602990869222, + "kl_loss": 0.22754691541194916, + "loss_ib": 0.0033068279735744, + "step": 461 + }, + { + "ce_ib": 8.072470664978027, + "ce_orig": 0.6382450461387634, + "epoch": 0.13286361348766984, + "kl_loss": 0.12815770506858826, + "loss_ib": 0.0020888240542262793, + "step": 462 + }, + { + "ce_ib": 11.419739723205566, + "ce_orig": 1.4712454080581665, + "epoch": 0.13286361348766984, + "kl_loss": 0.15876120328903198, + "loss_ib": 0.0027295860927551985, + "step": 462 + }, + { + "ce_ib": 11.649603843688965, + "ce_orig": 0.5335864424705505, + "epoch": 0.13286361348766984, + "kl_loss": 0.23220552504062653, + "loss_ib": 0.0034870156086981297, + "step": 462 + }, + { + "ce_ib": 13.856771469116211, + "ce_orig": 1.503601312637329, + "epoch": 0.13286361348766984, + "kl_loss": 0.22454500198364258, + "loss_ib": 0.003631127066910267, + "step": 462 + }, + { + "ce_ib": 11.232563018798828, + "ce_orig": 0.8185619115829468, + "epoch": 0.1331511970666475, + "kl_loss": 0.14658993482589722, + "loss_ib": 0.002589155687019229, + "step": 463 + }, + { + "ce_ib": 8.680917739868164, + "ce_orig": 0.897162914276123, + "epoch": 0.1331511970666475, + "kl_loss": 0.16236066818237305, + "loss_ib": 0.002491698367521167, + "step": 463 + }, + { + "ce_ib": 13.455824851989746, + "ce_orig": 0.7123657464981079, + "epoch": 0.1331511970666475, + "kl_loss": 0.1669284999370575, + "loss_ib": 0.0030148671939969063, + "step": 463 + }, + { + "ce_ib": 11.438149452209473, + "ce_orig": 0.6367284059524536, + "epoch": 0.1331511970666475, + "kl_loss": 0.17310284078121185, + "loss_ib": 0.0028748433105647564, + "step": 463 + }, + { + "ce_ib": 9.650132179260254, + "ce_orig": 0.877562403678894, + "epoch": 0.13343878064562514, + "kl_loss": 0.131558358669281, + "loss_ib": 0.0022805966436862946, + "step": 464 + }, + { + "ce_ib": 10.920985221862793, + "ce_orig": 0.9254859089851379, + "epoch": 0.13343878064562514, + "kl_loss": 0.15900883078575134, + "loss_ib": 0.002682186895981431, + "step": 464 + }, + { + "ce_ib": 7.144834518432617, + "ce_orig": 0.9159427285194397, + "epoch": 0.13343878064562514, + "kl_loss": 0.12562786042690277, + "loss_ib": 0.0019707619212567806, + "step": 464 + }, + { + "ce_ib": 7.252218723297119, + "ce_orig": 0.5822017192840576, + "epoch": 0.13343878064562514, + "kl_loss": 0.23755145072937012, + "loss_ib": 0.0031007362995296717, + "step": 464 + }, + { + "epoch": 0.13372636422460277, + "grad_norm": 0.09323103725910187, + "learning_rate": 4.997467721852196e-05, + "loss": 0.8438, + "step": 465 + }, + { + "ce_ib": 13.914019584655762, + "ce_orig": 1.2358434200286865, + "epoch": 0.13372636422460277, + "kl_loss": 0.20358413457870483, + "loss_ib": 0.0034272430930286646, + "step": 465 + }, + { + "ce_ib": 6.4653425216674805, + "ce_orig": 0.5584582090377808, + "epoch": 0.13372636422460277, + "kl_loss": 0.0890372097492218, + "loss_ib": 0.0015369063476100564, + "step": 465 + }, + { + "ce_ib": 10.534329414367676, + "ce_orig": 0.4327813684940338, + "epoch": 0.13372636422460277, + "kl_loss": 0.17528124153614044, + "loss_ib": 0.0028062453493475914, + "step": 465 + }, + { + "ce_ib": 13.77268123626709, + "ce_orig": 0.8556481599807739, + "epoch": 0.13372636422460277, + "kl_loss": 0.20487025380134583, + "loss_ib": 0.003425970673561096, + "step": 465 + }, + { + "ce_ib": 9.667628288269043, + "ce_orig": 0.7900619506835938, + "epoch": 0.13401394780358042, + "kl_loss": 0.16862963140010834, + "loss_ib": 0.002653059083968401, + "step": 466 + }, + { + "ce_ib": 11.5745210647583, + "ce_orig": 0.5225064158439636, + "epoch": 0.13401394780358042, + "kl_loss": 0.219425767660141, + "loss_ib": 0.0033517098054289818, + "step": 466 + }, + { + "ce_ib": 10.800277709960938, + "ce_orig": 0.9123367667198181, + "epoch": 0.13401394780358042, + "kl_loss": 0.2272198349237442, + "loss_ib": 0.0033522259909659624, + "step": 466 + }, + { + "ce_ib": 8.781864166259766, + "ce_orig": 0.7642026543617249, + "epoch": 0.13401394780358042, + "kl_loss": 0.17208728194236755, + "loss_ib": 0.00259905937127769, + "step": 466 + }, + { + "ce_ib": 12.304344177246094, + "ce_orig": 0.7450142502784729, + "epoch": 0.13430153138255804, + "kl_loss": 0.1326742172241211, + "loss_ib": 0.0025571766309440136, + "step": 467 + }, + { + "ce_ib": 9.575723648071289, + "ce_orig": 0.648908793926239, + "epoch": 0.13430153138255804, + "kl_loss": 0.19128626585006714, + "loss_ib": 0.0028704351279884577, + "step": 467 + }, + { + "ce_ib": 13.457468032836914, + "ce_orig": 1.4314584732055664, + "epoch": 0.13430153138255804, + "kl_loss": 0.20292793214321136, + "loss_ib": 0.0033750259317457676, + "step": 467 + }, + { + "ce_ib": 8.486967086791992, + "ce_orig": 0.6459015011787415, + "epoch": 0.13430153138255804, + "kl_loss": 0.16141179203987122, + "loss_ib": 0.002462814562022686, + "step": 467 + }, + { + "ce_ib": 9.87436580657959, + "ce_orig": 1.0700461864471436, + "epoch": 0.1345891149615357, + "kl_loss": 0.24746514856815338, + "loss_ib": 0.0034620880614966154, + "step": 468 + }, + { + "ce_ib": 10.845966339111328, + "ce_orig": 0.6945490837097168, + "epoch": 0.1345891149615357, + "kl_loss": 0.1763351410627365, + "loss_ib": 0.0028479481115937233, + "step": 468 + }, + { + "ce_ib": 11.760831832885742, + "ce_orig": 0.9512259364128113, + "epoch": 0.1345891149615357, + "kl_loss": 0.15363314747810364, + "loss_ib": 0.002712414599955082, + "step": 468 + }, + { + "ce_ib": 9.594245910644531, + "ce_orig": 0.6800144910812378, + "epoch": 0.1345891149615357, + "kl_loss": 0.2686789929866791, + "loss_ib": 0.0036462144926190376, + "step": 468 + }, + { + "ce_ib": 9.218502044677734, + "ce_orig": 0.45141276717185974, + "epoch": 0.13487669854051335, + "kl_loss": 0.1673453450202942, + "loss_ib": 0.0025953035801649094, + "step": 469 + }, + { + "ce_ib": 8.04023265838623, + "ce_orig": 0.626139223575592, + "epoch": 0.13487669854051335, + "kl_loss": 0.18813760578632355, + "loss_ib": 0.0026853992603719234, + "step": 469 + }, + { + "ce_ib": 9.180455207824707, + "ce_orig": 0.4675959348678589, + "epoch": 0.13487669854051335, + "kl_loss": 0.19254645705223083, + "loss_ib": 0.0028435098938643932, + "step": 469 + }, + { + "ce_ib": 10.131449699401855, + "ce_orig": 0.6631660461425781, + "epoch": 0.13487669854051335, + "kl_loss": 0.14070668816566467, + "loss_ib": 0.0024202116765081882, + "step": 469 + }, + { + "epoch": 0.13516428211949097, + "grad_norm": 0.08323580771684647, + "learning_rate": 4.9972901030487616e-05, + "loss": 0.8432, + "step": 470 + }, + { + "ce_ib": 9.376252174377441, + "ce_orig": 0.6429654955863953, + "epoch": 0.13516428211949097, + "kl_loss": 0.15271279215812683, + "loss_ib": 0.002464753109961748, + "step": 470 + }, + { + "ce_ib": 7.387055397033691, + "ce_orig": 0.4410795569419861, + "epoch": 0.13516428211949097, + "kl_loss": 0.13434870541095734, + "loss_ib": 0.0020821925718337297, + "step": 470 + }, + { + "ce_ib": 12.794949531555176, + "ce_orig": 0.8125112056732178, + "epoch": 0.13516428211949097, + "kl_loss": 0.5913262963294983, + "loss_ib": 0.007192757446318865, + "step": 470 + }, + { + "ce_ib": 7.509584903717041, + "ce_orig": 0.8456118106842041, + "epoch": 0.13516428211949097, + "kl_loss": 0.13973768055438995, + "loss_ib": 0.0021483353339135647, + "step": 470 + }, + { + "ce_ib": 7.9404191970825195, + "ce_orig": 0.7081736326217651, + "epoch": 0.13545186569846862, + "kl_loss": 0.12523691356182098, + "loss_ib": 0.0020464109256863594, + "step": 471 + }, + { + "ce_ib": 10.213021278381348, + "ce_orig": 0.7080956101417542, + "epoch": 0.13545186569846862, + "kl_loss": 0.2242983877658844, + "loss_ib": 0.003264285856857896, + "step": 471 + }, + { + "ce_ib": 9.126559257507324, + "ce_orig": 0.7322322726249695, + "epoch": 0.13545186569846862, + "kl_loss": 0.1383284032344818, + "loss_ib": 0.002295939950272441, + "step": 471 + }, + { + "ce_ib": 6.414395809173584, + "ce_orig": 0.6884047985076904, + "epoch": 0.13545186569846862, + "kl_loss": 0.11249984800815582, + "loss_ib": 0.0017664380138739944, + "step": 471 + }, + { + "ce_ib": 5.14065408706665, + "ce_orig": 0.2586210370063782, + "epoch": 0.13573944927744624, + "kl_loss": 0.3770146667957306, + "loss_ib": 0.004284211900085211, + "step": 472 + }, + { + "ce_ib": 14.23343276977539, + "ce_orig": 1.3532360792160034, + "epoch": 0.13573944927744624, + "kl_loss": 0.18985393643379211, + "loss_ib": 0.003321882337331772, + "step": 472 + }, + { + "ce_ib": 14.564502716064453, + "ce_orig": 1.749810814857483, + "epoch": 0.13573944927744624, + "kl_loss": 0.15691399574279785, + "loss_ib": 0.0030255902092903852, + "step": 472 + }, + { + "ce_ib": 8.07909870147705, + "ce_orig": 0.5683416724205017, + "epoch": 0.13573944927744624, + "kl_loss": 0.20622625946998596, + "loss_ib": 0.0028701722621917725, + "step": 472 + }, + { + "ce_ib": 13.582554817199707, + "ce_orig": 1.6216903924942017, + "epoch": 0.1360270328564239, + "kl_loss": 0.1717289686203003, + "loss_ib": 0.0030755449552088976, + "step": 473 + }, + { + "ce_ib": 12.75936222076416, + "ce_orig": 1.1129322052001953, + "epoch": 0.1360270328564239, + "kl_loss": 0.18489691615104675, + "loss_ib": 0.003124905051663518, + "step": 473 + }, + { + "ce_ib": 8.29798412322998, + "ce_orig": 0.4912956953048706, + "epoch": 0.1360270328564239, + "kl_loss": 0.13187135756015778, + "loss_ib": 0.002148512052372098, + "step": 473 + }, + { + "ce_ib": 6.32996129989624, + "ce_orig": 0.5624181628227234, + "epoch": 0.1360270328564239, + "kl_loss": 0.15103286504745483, + "loss_ib": 0.0021433248184621334, + "step": 473 + }, + { + "ce_ib": 9.895087242126465, + "ce_orig": 0.9939194321632385, + "epoch": 0.13631461643540155, + "kl_loss": 0.14027726650238037, + "loss_ib": 0.00239228131249547, + "step": 474 + }, + { + "ce_ib": 10.561649322509766, + "ce_orig": 0.8449274897575378, + "epoch": 0.13631461643540155, + "kl_loss": 0.16884073615074158, + "loss_ib": 0.002744572004303336, + "step": 474 + }, + { + "ce_ib": 10.697015762329102, + "ce_orig": 0.6244873404502869, + "epoch": 0.13631461643540155, + "kl_loss": 0.16410231590270996, + "loss_ib": 0.002710724715143442, + "step": 474 + }, + { + "ce_ib": 10.975496292114258, + "ce_orig": 0.6514372825622559, + "epoch": 0.13631461643540155, + "kl_loss": 0.20907053351402283, + "loss_ib": 0.003188254777342081, + "step": 474 + }, + { + "epoch": 0.13660220001437917, + "grad_norm": 0.09574563801288605, + "learning_rate": 4.997106467693835e-05, + "loss": 0.8712, + "step": 475 + }, + { + "ce_ib": 8.283028602600098, + "ce_orig": 0.6631106734275818, + "epoch": 0.13660220001437917, + "kl_loss": 0.19269996881484985, + "loss_ib": 0.002755302470177412, + "step": 475 + }, + { + "ce_ib": 9.683073997497559, + "ce_orig": 0.7057353258132935, + "epoch": 0.13660220001437917, + "kl_loss": 0.22878339886665344, + "loss_ib": 0.0032561414409428835, + "step": 475 + }, + { + "ce_ib": 8.93950366973877, + "ce_orig": 0.7505787014961243, + "epoch": 0.13660220001437917, + "kl_loss": 0.1079094409942627, + "loss_ib": 0.001973044592887163, + "step": 475 + }, + { + "ce_ib": 6.951815128326416, + "ce_orig": 0.5303117632865906, + "epoch": 0.13660220001437917, + "kl_loss": 0.11965961754322052, + "loss_ib": 0.0018917776178568602, + "step": 475 + }, + { + "ce_ib": 14.84595775604248, + "ce_orig": 1.473997950553894, + "epoch": 0.13688978359335682, + "kl_loss": 0.16360431909561157, + "loss_ib": 0.0031206386629492044, + "step": 476 + }, + { + "ce_ib": 13.593896865844727, + "ce_orig": 1.2449053525924683, + "epoch": 0.13688978359335682, + "kl_loss": 0.16457515954971313, + "loss_ib": 0.003005141159519553, + "step": 476 + }, + { + "ce_ib": 9.549323081970215, + "ce_orig": 0.7047984004020691, + "epoch": 0.13688978359335682, + "kl_loss": 0.2452981024980545, + "loss_ib": 0.0034079132601618767, + "step": 476 + }, + { + "ce_ib": 10.544528007507324, + "ce_orig": 0.7549718022346497, + "epoch": 0.13688978359335682, + "kl_loss": 0.1296602040529251, + "loss_ib": 0.0023510546889156103, + "step": 476 + }, + { + "ce_ib": 7.901725769042969, + "ce_orig": 0.8333771824836731, + "epoch": 0.13717736717233445, + "kl_loss": 0.11247368156909943, + "loss_ib": 0.001914909458719194, + "step": 477 + }, + { + "ce_ib": 6.190486431121826, + "ce_orig": 0.6102291345596313, + "epoch": 0.13717736717233445, + "kl_loss": 0.10293813049793243, + "loss_ib": 0.0016484298976138234, + "step": 477 + }, + { + "ce_ib": 8.575281143188477, + "ce_orig": 0.701132595539093, + "epoch": 0.13717736717233445, + "kl_loss": 0.12163711339235306, + "loss_ib": 0.002073899144306779, + "step": 477 + }, + { + "ce_ib": 9.477543830871582, + "ce_orig": 0.6797293424606323, + "epoch": 0.13717736717233445, + "kl_loss": 0.13846732676029205, + "loss_ib": 0.0023324275389313698, + "step": 477 + }, + { + "ce_ib": 8.02422046661377, + "ce_orig": 0.7941074371337891, + "epoch": 0.1374649507513121, + "kl_loss": 0.11544310301542282, + "loss_ib": 0.001956852851435542, + "step": 478 + }, + { + "ce_ib": 10.08478832244873, + "ce_orig": 0.5088381171226501, + "epoch": 0.1374649507513121, + "kl_loss": 0.26347583532333374, + "loss_ib": 0.003643237054347992, + "step": 478 + }, + { + "ce_ib": 10.609148025512695, + "ce_orig": 1.1123254299163818, + "epoch": 0.1374649507513121, + "kl_loss": 0.18334609270095825, + "loss_ib": 0.0028943754732608795, + "step": 478 + }, + { + "ce_ib": 8.124927520751953, + "ce_orig": 0.911295473575592, + "epoch": 0.1374649507513121, + "kl_loss": 0.1195707842707634, + "loss_ib": 0.0020082006230950356, + "step": 478 + }, + { + "ce_ib": 9.455392837524414, + "ce_orig": 0.46617555618286133, + "epoch": 0.13775253433028975, + "kl_loss": 0.2140653133392334, + "loss_ib": 0.003086192300543189, + "step": 479 + }, + { + "ce_ib": 12.214388847351074, + "ce_orig": 0.7688418030738831, + "epoch": 0.13775253433028975, + "kl_loss": 0.20302480459213257, + "loss_ib": 0.0032516869250684977, + "step": 479 + }, + { + "ce_ib": 10.966753005981445, + "ce_orig": 0.7571495175361633, + "epoch": 0.13775253433028975, + "kl_loss": 0.17398342490196228, + "loss_ib": 0.0028365093749016523, + "step": 479 + }, + { + "ce_ib": 7.949142932891846, + "ce_orig": 0.6712113618850708, + "epoch": 0.13775253433028975, + "kl_loss": 0.16949275135993958, + "loss_ib": 0.002489841775968671, + "step": 479 + }, + { + "epoch": 0.13804011790926737, + "grad_norm": 0.11252865940332413, + "learning_rate": 4.996916816229837e-05, + "loss": 0.8761, + "step": 480 + }, + { + "ce_ib": 11.602306365966797, + "ce_orig": 0.8134323358535767, + "epoch": 0.13804011790926737, + "kl_loss": 0.2448122501373291, + "loss_ib": 0.0036083529703319073, + "step": 480 + }, + { + "ce_ib": 11.314531326293945, + "ce_orig": 0.7367826700210571, + "epoch": 0.13804011790926737, + "kl_loss": 0.15320701897144318, + "loss_ib": 0.002663523191586137, + "step": 480 + }, + { + "ce_ib": 12.606677055358887, + "ce_orig": 1.1588752269744873, + "epoch": 0.13804011790926737, + "kl_loss": 0.15930581092834473, + "loss_ib": 0.002853725804015994, + "step": 480 + }, + { + "ce_ib": 11.054275512695312, + "ce_orig": 1.0181690454483032, + "epoch": 0.13804011790926737, + "kl_loss": 0.11396267265081406, + "loss_ib": 0.0022450541146099567, + "step": 480 + }, + { + "ce_ib": 14.066468238830566, + "ce_orig": 1.4801671504974365, + "epoch": 0.13832770148824503, + "kl_loss": 0.20112600922584534, + "loss_ib": 0.0034179065842181444, + "step": 481 + }, + { + "ce_ib": 8.212839126586914, + "ce_orig": 0.7344709038734436, + "epoch": 0.13832770148824503, + "kl_loss": 0.16998212039470673, + "loss_ib": 0.002521105110645294, + "step": 481 + }, + { + "ce_ib": 12.749667167663574, + "ce_orig": 1.22507905960083, + "epoch": 0.13832770148824503, + "kl_loss": 0.16252657771110535, + "loss_ib": 0.0029002325609326363, + "step": 481 + }, + { + "ce_ib": 9.067646026611328, + "ce_orig": 0.8712934851646423, + "epoch": 0.13832770148824503, + "kl_loss": 0.14971376955509186, + "loss_ib": 0.0024039021227508783, + "step": 481 + }, + { + "ce_ib": 7.4774956703186035, + "ce_orig": 0.6922398805618286, + "epoch": 0.13861528506722265, + "kl_loss": 0.17292888462543488, + "loss_ib": 0.0024770384188741446, + "step": 482 + }, + { + "ce_ib": 12.176875114440918, + "ce_orig": 0.8391960859298706, + "epoch": 0.13861528506722265, + "kl_loss": 0.28599101305007935, + "loss_ib": 0.00407759752124548, + "step": 482 + }, + { + "ce_ib": 14.910351753234863, + "ce_orig": 1.4995876550674438, + "epoch": 0.13861528506722265, + "kl_loss": 0.20326115190982819, + "loss_ib": 0.0035236466210335493, + "step": 482 + }, + { + "ce_ib": 14.353828430175781, + "ce_orig": 1.3694660663604736, + "epoch": 0.13861528506722265, + "kl_loss": 0.2608376145362854, + "loss_ib": 0.004043758846819401, + "step": 482 + }, + { + "ce_ib": 10.655997276306152, + "ce_orig": 0.795907735824585, + "epoch": 0.1389028686462003, + "kl_loss": 0.17903940379619598, + "loss_ib": 0.0028559938073158264, + "step": 483 + }, + { + "ce_ib": 8.001945495605469, + "ce_orig": 0.6496250629425049, + "epoch": 0.1389028686462003, + "kl_loss": 0.22284536063671112, + "loss_ib": 0.003028648206964135, + "step": 483 + }, + { + "ce_ib": 15.7087984085083, + "ce_orig": 1.539727807044983, + "epoch": 0.1389028686462003, + "kl_loss": 0.1869555413722992, + "loss_ib": 0.003440435044467449, + "step": 483 + }, + { + "ce_ib": 11.261712074279785, + "ce_orig": 0.7162747383117676, + "epoch": 0.1389028686462003, + "kl_loss": 0.15583762526512146, + "loss_ib": 0.002684547333046794, + "step": 483 + }, + { + "ce_ib": 12.349994659423828, + "ce_orig": 0.5102917551994324, + "epoch": 0.13919045222517795, + "kl_loss": 0.21702654659748077, + "loss_ib": 0.003405264811590314, + "step": 484 + }, + { + "ce_ib": 14.703964233398438, + "ce_orig": 1.2549042701721191, + "epoch": 0.13919045222517795, + "kl_loss": 0.28059592843055725, + "loss_ib": 0.004276355262845755, + "step": 484 + }, + { + "ce_ib": 14.395584106445312, + "ce_orig": 1.6029713153839111, + "epoch": 0.13919045222517795, + "kl_loss": 0.1588822305202484, + "loss_ib": 0.0030283809173852205, + "step": 484 + }, + { + "ce_ib": 12.572036743164062, + "ce_orig": 1.4179061651229858, + "epoch": 0.13919045222517795, + "kl_loss": 0.23325228691101074, + "loss_ib": 0.0035897265188395977, + "step": 484 + }, + { + "epoch": 0.13947803580415558, + "grad_norm": 0.09978976100683212, + "learning_rate": 4.996721149113682e-05, + "loss": 0.9298, + "step": 485 + }, + { + "ce_ib": 15.488334655761719, + "ce_orig": 1.308066964149475, + "epoch": 0.13947803580415558, + "kl_loss": 0.17892438173294067, + "loss_ib": 0.0033380771055817604, + "step": 485 + }, + { + "ce_ib": 10.043342590332031, + "ce_orig": 0.9190800786018372, + "epoch": 0.13947803580415558, + "kl_loss": 0.14057381451129913, + "loss_ib": 0.0024100723676383495, + "step": 485 + }, + { + "ce_ib": 10.405566215515137, + "ce_orig": 0.41310566663742065, + "epoch": 0.13947803580415558, + "kl_loss": 0.08804390579462051, + "loss_ib": 0.001920995651744306, + "step": 485 + }, + { + "ce_ib": 12.68533706665039, + "ce_orig": 0.9749146103858948, + "epoch": 0.13947803580415558, + "kl_loss": 0.17658352851867676, + "loss_ib": 0.003034368623048067, + "step": 485 + }, + { + "ce_ib": 14.109472274780273, + "ce_orig": 0.93257075548172, + "epoch": 0.13976561938313323, + "kl_loss": 0.15330049395561218, + "loss_ib": 0.002943952102214098, + "step": 486 + }, + { + "ce_ib": 7.845019340515137, + "ce_orig": 0.7454962730407715, + "epoch": 0.13976561938313323, + "kl_loss": 0.18838530778884888, + "loss_ib": 0.0026683551259338856, + "step": 486 + }, + { + "ce_ib": 9.430821418762207, + "ce_orig": 1.0630453824996948, + "epoch": 0.13976561938313323, + "kl_loss": 0.15098202228546143, + "loss_ib": 0.00245290226303041, + "step": 486 + }, + { + "ce_ib": 6.5644049644470215, + "ce_orig": 0.5489628314971924, + "epoch": 0.13976561938313323, + "kl_loss": 0.08801446855068207, + "loss_ib": 0.001536585041321814, + "step": 486 + }, + { + "ce_ib": 11.013578414916992, + "ce_orig": 0.7610940933227539, + "epoch": 0.14005320296211085, + "kl_loss": 0.22202414274215698, + "loss_ib": 0.0033215992152690887, + "step": 487 + }, + { + "ce_ib": 13.75318717956543, + "ce_orig": 0.7965566515922546, + "epoch": 0.14005320296211085, + "kl_loss": 0.22268138825893402, + "loss_ib": 0.003602132434025407, + "step": 487 + }, + { + "ce_ib": 11.45129680633545, + "ce_orig": 1.317887306213379, + "epoch": 0.14005320296211085, + "kl_loss": 0.1648291051387787, + "loss_ib": 0.002793420571833849, + "step": 487 + }, + { + "ce_ib": 9.824847221374512, + "ce_orig": 0.6467515230178833, + "epoch": 0.14005320296211085, + "kl_loss": 0.21943452954292297, + "loss_ib": 0.0031768297776579857, + "step": 487 + }, + { + "ce_ib": 9.987610816955566, + "ce_orig": 0.7534988522529602, + "epoch": 0.1403407865410885, + "kl_loss": 0.1998450607061386, + "loss_ib": 0.0029972116462886333, + "step": 488 + }, + { + "ce_ib": 14.38665771484375, + "ce_orig": 1.332573413848877, + "epoch": 0.1403407865410885, + "kl_loss": 0.23315776884555817, + "loss_ib": 0.003770243376493454, + "step": 488 + }, + { + "ce_ib": 6.623453140258789, + "ce_orig": 0.42733436822891235, + "epoch": 0.1403407865410885, + "kl_loss": 0.14417804777622223, + "loss_ib": 0.0021041256841272116, + "step": 488 + }, + { + "ce_ib": 12.093878746032715, + "ce_orig": 1.1107982397079468, + "epoch": 0.1403407865410885, + "kl_loss": 0.1123935654759407, + "loss_ib": 0.00233332347124815, + "step": 488 + }, + { + "ce_ib": 11.833623886108398, + "ce_orig": 0.9317137598991394, + "epoch": 0.14062837012006615, + "kl_loss": 0.4279143214225769, + "loss_ib": 0.005462505854666233, + "step": 489 + }, + { + "ce_ib": 6.175386428833008, + "ce_orig": 0.2635171413421631, + "epoch": 0.14062837012006615, + "kl_loss": 0.17456625401973724, + "loss_ib": 0.0023632009979337454, + "step": 489 + }, + { + "ce_ib": 9.941847801208496, + "ce_orig": 0.6531252861022949, + "epoch": 0.14062837012006615, + "kl_loss": 0.17595279216766357, + "loss_ib": 0.0027537124697118998, + "step": 489 + }, + { + "ce_ib": 7.677126407623291, + "ce_orig": 0.688906192779541, + "epoch": 0.14062837012006615, + "kl_loss": 0.18364465236663818, + "loss_ib": 0.0026041590608656406, + "step": 489 + }, + { + "epoch": 0.14091595369904378, + "grad_norm": 0.0886775329709053, + "learning_rate": 4.996519466816778e-05, + "loss": 0.9075, + "step": 490 + }, + { + "ce_ib": 8.481292724609375, + "ce_orig": 0.5066956281661987, + "epoch": 0.14091595369904378, + "kl_loss": 0.14637017250061035, + "loss_ib": 0.002311830874532461, + "step": 490 + }, + { + "ce_ib": 14.51606559753418, + "ce_orig": 1.596727967262268, + "epoch": 0.14091595369904378, + "kl_loss": 0.19026514887809753, + "loss_ib": 0.0033542579039931297, + "step": 490 + }, + { + "ce_ib": 10.839821815490723, + "ce_orig": 1.242689609527588, + "epoch": 0.14091595369904378, + "kl_loss": 0.21338334679603577, + "loss_ib": 0.0032178156543523073, + "step": 490 + }, + { + "ce_ib": 7.8446831703186035, + "ce_orig": 0.968952476978302, + "epoch": 0.14091595369904378, + "kl_loss": 0.2997243404388428, + "loss_ib": 0.003781711682677269, + "step": 490 + }, + { + "ce_ib": 11.051322937011719, + "ce_orig": 1.4200416803359985, + "epoch": 0.14120353727802143, + "kl_loss": 0.17061398923397064, + "loss_ib": 0.0028112721629440784, + "step": 491 + }, + { + "ce_ib": 11.496525764465332, + "ce_orig": 1.1261836290359497, + "epoch": 0.14120353727802143, + "kl_loss": 0.1715063452720642, + "loss_ib": 0.002864715876057744, + "step": 491 + }, + { + "ce_ib": 15.705190658569336, + "ce_orig": 1.4249088764190674, + "epoch": 0.14120353727802143, + "kl_loss": 0.19172680377960205, + "loss_ib": 0.0034877872094511986, + "step": 491 + }, + { + "ce_ib": 10.472744941711426, + "ce_orig": 0.9399302005767822, + "epoch": 0.14120353727802143, + "kl_loss": 0.15676361322402954, + "loss_ib": 0.0026149104814976454, + "step": 491 + }, + { + "ce_ib": 7.808863639831543, + "ce_orig": 0.8644165396690369, + "epoch": 0.14149112085699905, + "kl_loss": 0.09857909381389618, + "loss_ib": 0.0017666771309450269, + "step": 492 + }, + { + "ce_ib": 9.123184204101562, + "ce_orig": 0.736179530620575, + "epoch": 0.14149112085699905, + "kl_loss": 0.1718408167362213, + "loss_ib": 0.0026307266671210527, + "step": 492 + }, + { + "ce_ib": 16.71449851989746, + "ce_orig": 1.8901475667953491, + "epoch": 0.14149112085699905, + "kl_loss": 0.21999290585517883, + "loss_ib": 0.0038713787216693163, + "step": 492 + }, + { + "ce_ib": 14.769342422485352, + "ce_orig": 1.0042953491210938, + "epoch": 0.14149112085699905, + "kl_loss": 0.19468256831169128, + "loss_ib": 0.0034237599465996027, + "step": 492 + }, + { + "ce_ib": 7.843172073364258, + "ce_orig": 0.2853364646434784, + "epoch": 0.1417787044359767, + "kl_loss": 0.13801902532577515, + "loss_ib": 0.002164507517591119, + "step": 493 + }, + { + "ce_ib": 10.78830337524414, + "ce_orig": 1.1020339727401733, + "epoch": 0.1417787044359767, + "kl_loss": 0.21507477760314941, + "loss_ib": 0.003229578025639057, + "step": 493 + }, + { + "ce_ib": 7.485945224761963, + "ce_orig": 0.6249547004699707, + "epoch": 0.1417787044359767, + "kl_loss": 0.0884731262922287, + "loss_ib": 0.0016333258245140314, + "step": 493 + }, + { + "ce_ib": 10.592840194702148, + "ce_orig": 0.6959387063980103, + "epoch": 0.1417787044359767, + "kl_loss": 0.3251839876174927, + "loss_ib": 0.004311123862862587, + "step": 493 + }, + { + "ce_ib": 6.683375358581543, + "ce_orig": 0.47197264432907104, + "epoch": 0.14206628801495436, + "kl_loss": 0.1459130495786667, + "loss_ib": 0.0021274678874760866, + "step": 494 + }, + { + "ce_ib": 8.309039115905762, + "ce_orig": 0.34105879068374634, + "epoch": 0.14206628801495436, + "kl_loss": 0.15694405138492584, + "loss_ib": 0.0024003442376852036, + "step": 494 + }, + { + "ce_ib": 13.072383880615234, + "ce_orig": 1.3826266527175903, + "epoch": 0.14206628801495436, + "kl_loss": 0.19439606368541718, + "loss_ib": 0.003251199144870043, + "step": 494 + }, + { + "ce_ib": 8.67587947845459, + "ce_orig": 0.5467540621757507, + "epoch": 0.14206628801495436, + "kl_loss": 0.22117763757705688, + "loss_ib": 0.0030793643090873957, + "step": 494 + }, + { + "epoch": 0.14235387159393198, + "grad_norm": 0.11509755253791809, + "learning_rate": 4.996311769825024e-05, + "loss": 0.8795, + "step": 495 + }, + { + "ce_ib": 7.835722923278809, + "ce_orig": 0.5985096096992493, + "epoch": 0.14235387159393198, + "kl_loss": 0.11672006547451019, + "loss_ib": 0.0019507729448378086, + "step": 495 + }, + { + "ce_ib": 10.796977996826172, + "ce_orig": 1.2538580894470215, + "epoch": 0.14235387159393198, + "kl_loss": 0.17189006507396698, + "loss_ib": 0.0027985982596874237, + "step": 495 + }, + { + "ce_ib": 11.224424362182617, + "ce_orig": 0.8003069758415222, + "epoch": 0.14235387159393198, + "kl_loss": 0.11536049842834473, + "loss_ib": 0.0022760473657399416, + "step": 495 + }, + { + "ce_ib": 9.976873397827148, + "ce_orig": 0.9437066316604614, + "epoch": 0.14235387159393198, + "kl_loss": 0.14582431316375732, + "loss_ib": 0.002455930458381772, + "step": 495 + }, + { + "ce_ib": 12.75202751159668, + "ce_orig": 1.2885056734085083, + "epoch": 0.14264145517290963, + "kl_loss": 0.17118601500988007, + "loss_ib": 0.002987062791362405, + "step": 496 + }, + { + "ce_ib": 11.649064064025879, + "ce_orig": 0.8830009698867798, + "epoch": 0.14264145517290963, + "kl_loss": 0.1887287050485611, + "loss_ib": 0.0030521934386342764, + "step": 496 + }, + { + "ce_ib": 14.029234886169434, + "ce_orig": 0.9412078261375427, + "epoch": 0.14264145517290963, + "kl_loss": 0.14774635434150696, + "loss_ib": 0.002880387008190155, + "step": 496 + }, + { + "ce_ib": 16.335601806640625, + "ce_orig": 2.0776398181915283, + "epoch": 0.14264145517290963, + "kl_loss": 0.1809537559747696, + "loss_ib": 0.003443097695708275, + "step": 496 + }, + { + "ce_ib": 9.790528297424316, + "ce_orig": 0.9552518129348755, + "epoch": 0.14292903875188726, + "kl_loss": 0.1889423429965973, + "loss_ib": 0.002868476090952754, + "step": 497 + }, + { + "ce_ib": 9.869925498962402, + "ce_orig": 0.8539248704910278, + "epoch": 0.14292903875188726, + "kl_loss": 0.19081233441829681, + "loss_ib": 0.002895115874707699, + "step": 497 + }, + { + "ce_ib": 9.595001220703125, + "ce_orig": 0.5288217663764954, + "epoch": 0.14292903875188726, + "kl_loss": 0.19318905472755432, + "loss_ib": 0.002891390584409237, + "step": 497 + }, + { + "ce_ib": 9.586201667785645, + "ce_orig": 1.1783746480941772, + "epoch": 0.14292903875188726, + "kl_loss": 0.18821683526039124, + "loss_ib": 0.002840788336470723, + "step": 497 + }, + { + "ce_ib": 12.379876136779785, + "ce_orig": 1.2077107429504395, + "epoch": 0.1432166223308649, + "kl_loss": 0.14908835291862488, + "loss_ib": 0.0027288710698485374, + "step": 498 + }, + { + "ce_ib": 11.757010459899902, + "ce_orig": 0.6566261649131775, + "epoch": 0.1432166223308649, + "kl_loss": 0.1946725696325302, + "loss_ib": 0.003122426802292466, + "step": 498 + }, + { + "ce_ib": 8.702943801879883, + "ce_orig": 0.4927518963813782, + "epoch": 0.1432166223308649, + "kl_loss": 0.10925129801034927, + "loss_ib": 0.001962807262316346, + "step": 498 + }, + { + "ce_ib": 10.411445617675781, + "ce_orig": 0.706779956817627, + "epoch": 0.1432166223308649, + "kl_loss": 0.17433921992778778, + "loss_ib": 0.002784536685794592, + "step": 498 + }, + { + "ce_ib": 14.122117042541504, + "ce_orig": 1.4039651155471802, + "epoch": 0.14350420590984256, + "kl_loss": 0.16670997440814972, + "loss_ib": 0.0030793112237006426, + "step": 499 + }, + { + "ce_ib": 10.383757591247559, + "ce_orig": 1.0631924867630005, + "epoch": 0.14350420590984256, + "kl_loss": 0.16225400567054749, + "loss_ib": 0.002660915721207857, + "step": 499 + }, + { + "ce_ib": 6.402273654937744, + "ce_orig": 0.6313638091087341, + "epoch": 0.14350420590984256, + "kl_loss": 0.18505465984344482, + "loss_ib": 0.0024907737970352173, + "step": 499 + }, + { + "ce_ib": 8.23951244354248, + "ce_orig": 0.8967234492301941, + "epoch": 0.14350420590984256, + "kl_loss": 0.13677600026130676, + "loss_ib": 0.0021917112171649933, + "step": 499 + }, + { + "epoch": 0.14379178948882018, + "grad_norm": 0.09538047015666962, + "learning_rate": 4.996098058638809e-05, + "loss": 0.8901, + "step": 500 + }, + { + "ce_ib": 11.12409496307373, + "ce_orig": 1.0636703968048096, + "epoch": 0.14379178948882018, + "kl_loss": 0.19292375445365906, + "loss_ib": 0.003041646908968687, + "step": 500 + }, + { + "ce_ib": 9.371049880981445, + "ce_orig": 1.0397700071334839, + "epoch": 0.14379178948882018, + "kl_loss": 0.2242284119129181, + "loss_ib": 0.003179389052093029, + "step": 500 + }, + { + "ce_ib": 9.47309684753418, + "ce_orig": 0.7427978515625, + "epoch": 0.14379178948882018, + "kl_loss": 0.13901641964912415, + "loss_ib": 0.0023374739103019238, + "step": 500 + }, + { + "ce_ib": 9.967049598693848, + "ce_orig": 0.44743821024894714, + "epoch": 0.14379178948882018, + "kl_loss": 0.1941380798816681, + "loss_ib": 0.002938085701316595, + "step": 500 + }, + { + "ce_ib": 6.8732428550720215, + "ce_orig": 0.6810420751571655, + "epoch": 0.14407937306779783, + "kl_loss": 0.14418122172355652, + "loss_ib": 0.0021291363518685102, + "step": 501 + }, + { + "ce_ib": 11.470809936523438, + "ce_orig": 1.31697416305542, + "epoch": 0.14407937306779783, + "kl_loss": 0.17208820581436157, + "loss_ib": 0.002867962932214141, + "step": 501 + }, + { + "ce_ib": 10.819042205810547, + "ce_orig": 1.1678673028945923, + "epoch": 0.14407937306779783, + "kl_loss": 0.19393761456012726, + "loss_ib": 0.003021280048415065, + "step": 501 + }, + { + "ce_ib": 6.877528190612793, + "ce_orig": 0.7564640641212463, + "epoch": 0.14407937306779783, + "kl_loss": 0.12551426887512207, + "loss_ib": 0.0019428954692557454, + "step": 501 + }, + { + "ce_ib": 7.815924644470215, + "ce_orig": 0.32971468567848206, + "epoch": 0.14436695664677546, + "kl_loss": 0.42823344469070435, + "loss_ib": 0.005063927266746759, + "step": 502 + }, + { + "ce_ib": 12.163249969482422, + "ce_orig": 0.8130381107330322, + "epoch": 0.14436695664677546, + "kl_loss": 0.15447859466075897, + "loss_ib": 0.002761110896244645, + "step": 502 + }, + { + "ce_ib": 8.399581909179688, + "ce_orig": 0.6190077066421509, + "epoch": 0.14436695664677546, + "kl_loss": 0.14217016100883484, + "loss_ib": 0.0022616598289459944, + "step": 502 + }, + { + "ce_ib": 6.57456636428833, + "ce_orig": 0.7635507583618164, + "epoch": 0.14436695664677546, + "kl_loss": 0.1617724746465683, + "loss_ib": 0.002275181468576193, + "step": 502 + }, + { + "ce_ib": 10.540924072265625, + "ce_orig": 0.5798451900482178, + "epoch": 0.1446545402257531, + "kl_loss": 0.21290099620819092, + "loss_ib": 0.003183102235198021, + "step": 503 + }, + { + "ce_ib": 8.164572715759277, + "ce_orig": 0.5676815509796143, + "epoch": 0.1446545402257531, + "kl_loss": 0.38908153772354126, + "loss_ib": 0.004707272630184889, + "step": 503 + }, + { + "ce_ib": 10.497174263000488, + "ce_orig": 0.8416476249694824, + "epoch": 0.1446545402257531, + "kl_loss": 0.09391873329877853, + "loss_ib": 0.001988904783502221, + "step": 503 + }, + { + "ce_ib": 11.39775562286377, + "ce_orig": 0.9236753582954407, + "epoch": 0.1446545402257531, + "kl_loss": 0.19317620992660522, + "loss_ib": 0.003071537474170327, + "step": 503 + }, + { + "ce_ib": 13.18823528289795, + "ce_orig": 1.4070528745651245, + "epoch": 0.14494212380473076, + "kl_loss": 0.16827702522277832, + "loss_ib": 0.0030015939846634865, + "step": 504 + }, + { + "ce_ib": 12.251890182495117, + "ce_orig": 1.0019031763076782, + "epoch": 0.14494212380473076, + "kl_loss": 0.15187132358551025, + "loss_ib": 0.002743902150541544, + "step": 504 + }, + { + "ce_ib": 11.316810607910156, + "ce_orig": 0.949166476726532, + "epoch": 0.14494212380473076, + "kl_loss": 0.18364247679710388, + "loss_ib": 0.0029681057203561068, + "step": 504 + }, + { + "ce_ib": 10.85471248626709, + "ce_orig": 1.003252387046814, + "epoch": 0.14494212380473076, + "kl_loss": 0.1248200461268425, + "loss_ib": 0.002333671785891056, + "step": 504 + }, + { + "epoch": 0.14522970738370838, + "grad_norm": 0.09246399253606796, + "learning_rate": 4.9958783337730156e-05, + "loss": 0.8941, + "step": 505 + }, + { + "ce_ib": 7.710818767547607, + "ce_orig": 0.6707054972648621, + "epoch": 0.14522970738370838, + "kl_loss": 0.12212786078453064, + "loss_ib": 0.001992360455915332, + "step": 505 + }, + { + "ce_ib": 11.13107967376709, + "ce_orig": 0.5834399461746216, + "epoch": 0.14522970738370838, + "kl_loss": 0.26416948437690735, + "loss_ib": 0.0037548027466982603, + "step": 505 + }, + { + "ce_ib": 10.07874870300293, + "ce_orig": 0.4818477928638458, + "epoch": 0.14522970738370838, + "kl_loss": 0.20180988311767578, + "loss_ib": 0.003025973681360483, + "step": 505 + }, + { + "ce_ib": 7.720142364501953, + "ce_orig": 0.48857802152633667, + "epoch": 0.14522970738370838, + "kl_loss": 0.23724249005317688, + "loss_ib": 0.0031444390770047903, + "step": 505 + }, + { + "ce_ib": 11.317874908447266, + "ce_orig": 0.878321647644043, + "epoch": 0.14551729096268604, + "kl_loss": 0.22631272673606873, + "loss_ib": 0.0033949147909879684, + "step": 506 + }, + { + "ce_ib": 6.088842868804932, + "ce_orig": 0.44260281324386597, + "epoch": 0.14551729096268604, + "kl_loss": 0.1061149537563324, + "loss_ib": 0.0016700337873771787, + "step": 506 + }, + { + "ce_ib": 10.132776260375977, + "ce_orig": 0.45138809084892273, + "epoch": 0.14551729096268604, + "kl_loss": 0.12615957856178284, + "loss_ib": 0.002274873433634639, + "step": 506 + }, + { + "ce_ib": 9.908495903015137, + "ce_orig": 0.6264781951904297, + "epoch": 0.14551729096268604, + "kl_loss": 0.15342764556407928, + "loss_ib": 0.0025251260958611965, + "step": 506 + }, + { + "ce_ib": 15.370584487915039, + "ce_orig": 1.6317614316940308, + "epoch": 0.14580487454166366, + "kl_loss": 0.368154913187027, + "loss_ib": 0.005218606907874346, + "step": 507 + }, + { + "ce_ib": 9.250166893005371, + "ce_orig": 0.9209775924682617, + "epoch": 0.14580487454166366, + "kl_loss": 0.16472969949245453, + "loss_ib": 0.002572313416749239, + "step": 507 + }, + { + "ce_ib": 14.066780090332031, + "ce_orig": 0.879586935043335, + "epoch": 0.14580487454166366, + "kl_loss": 0.5569726228713989, + "loss_ib": 0.0069764042273163795, + "step": 507 + }, + { + "ce_ib": 9.828339576721191, + "ce_orig": 0.7276045083999634, + "epoch": 0.14580487454166366, + "kl_loss": 0.20243659615516663, + "loss_ib": 0.003007199615240097, + "step": 507 + }, + { + "ce_ib": 8.405064582824707, + "ce_orig": 0.5828406810760498, + "epoch": 0.1460924581206413, + "kl_loss": 0.16031748056411743, + "loss_ib": 0.002443681238219142, + "step": 508 + }, + { + "ce_ib": 12.821935653686523, + "ce_orig": 0.7830954194068909, + "epoch": 0.1460924581206413, + "kl_loss": 0.1861943006515503, + "loss_ib": 0.003144136629998684, + "step": 508 + }, + { + "ce_ib": 13.270180702209473, + "ce_orig": 1.4049288034439087, + "epoch": 0.1460924581206413, + "kl_loss": 0.26943105459213257, + "loss_ib": 0.004021328408271074, + "step": 508 + }, + { + "ce_ib": 8.444483757019043, + "ce_orig": 0.9969488382339478, + "epoch": 0.1460924581206413, + "kl_loss": 0.17717388272285461, + "loss_ib": 0.002616187324747443, + "step": 508 + }, + { + "ce_ib": 14.19354248046875, + "ce_orig": 1.728639006614685, + "epoch": 0.14638004169961896, + "kl_loss": 0.12366104125976562, + "loss_ib": 0.002655964344739914, + "step": 509 + }, + { + "ce_ib": 12.799410820007324, + "ce_orig": 0.962722897529602, + "epoch": 0.14638004169961896, + "kl_loss": 0.2079516500234604, + "loss_ib": 0.003359457477927208, + "step": 509 + }, + { + "ce_ib": 7.642533779144287, + "ce_orig": 0.570946216583252, + "epoch": 0.14638004169961896, + "kl_loss": 0.15292105078697205, + "loss_ib": 0.002293463796377182, + "step": 509 + }, + { + "ce_ib": 10.93984603881836, + "ce_orig": 1.3312052488327026, + "epoch": 0.14638004169961896, + "kl_loss": 0.1654568910598755, + "loss_ib": 0.0027485534083098173, + "step": 509 + }, + { + "epoch": 0.1466676252785966, + "grad_norm": 0.09280460327863693, + "learning_rate": 4.9956525957570086e-05, + "loss": 0.8336, + "step": 510 + }, + { + "ce_ib": 7.373950004577637, + "ce_orig": 0.7074568271636963, + "epoch": 0.1466676252785966, + "kl_loss": 0.12326858937740326, + "loss_ib": 0.001970080891624093, + "step": 510 + }, + { + "ce_ib": 13.963235855102539, + "ce_orig": 1.2645924091339111, + "epoch": 0.1466676252785966, + "kl_loss": 0.1571410596370697, + "loss_ib": 0.002967734355479479, + "step": 510 + }, + { + "ce_ib": 16.279865264892578, + "ce_orig": 1.9305559396743774, + "epoch": 0.1466676252785966, + "kl_loss": 0.2673254609107971, + "loss_ib": 0.004301241133362055, + "step": 510 + }, + { + "ce_ib": 11.21214485168457, + "ce_orig": 0.6908549666404724, + "epoch": 0.1466676252785966, + "kl_loss": 0.18891718983650208, + "loss_ib": 0.003010386135429144, + "step": 510 + }, + { + "ce_ib": 13.738448143005371, + "ce_orig": 1.3909226655960083, + "epoch": 0.14695520885757424, + "kl_loss": 0.2021740972995758, + "loss_ib": 0.0033955855760723352, + "step": 511 + }, + { + "ce_ib": 8.647909164428711, + "ce_orig": 0.6839855313301086, + "epoch": 0.14695520885757424, + "kl_loss": 0.13517390191555023, + "loss_ib": 0.0022165297996252775, + "step": 511 + }, + { + "ce_ib": 12.71828556060791, + "ce_orig": 1.1863548755645752, + "epoch": 0.14695520885757424, + "kl_loss": 0.3870590329170227, + "loss_ib": 0.005142418667674065, + "step": 511 + }, + { + "ce_ib": 7.004892826080322, + "ce_orig": 0.6125016808509827, + "epoch": 0.14695520885757424, + "kl_loss": 0.11418376863002777, + "loss_ib": 0.0018423269502818584, + "step": 511 + }, + { + "ce_ib": 12.221985816955566, + "ce_orig": 1.3097161054611206, + "epoch": 0.14724279243655186, + "kl_loss": 0.1954774260520935, + "loss_ib": 0.003176972735673189, + "step": 512 + }, + { + "ce_ib": 7.531097412109375, + "ce_orig": 0.5426865816116333, + "epoch": 0.14724279243655186, + "kl_loss": 0.1673842817544937, + "loss_ib": 0.002426952589303255, + "step": 512 + }, + { + "ce_ib": 9.53439712524414, + "ce_orig": 0.8523163795471191, + "epoch": 0.14724279243655186, + "kl_loss": 0.16582007706165314, + "loss_ib": 0.0026116403751075268, + "step": 512 + }, + { + "ce_ib": 9.802694320678711, + "ce_orig": 0.5798234939575195, + "epoch": 0.14724279243655186, + "kl_loss": 0.31143057346343994, + "loss_ib": 0.0040945750661194324, + "step": 512 + }, + { + "ce_ib": 16.203046798706055, + "ce_orig": 1.5306854248046875, + "epoch": 0.1475303760155295, + "kl_loss": 0.17325828969478607, + "loss_ib": 0.003352887462824583, + "step": 513 + }, + { + "ce_ib": 8.417235374450684, + "ce_orig": 0.896914005279541, + "epoch": 0.1475303760155295, + "kl_loss": 0.19780506193637848, + "loss_ib": 0.0028197739738970995, + "step": 513 + }, + { + "ce_ib": 6.960491180419922, + "ce_orig": 0.528160572052002, + "epoch": 0.1475303760155295, + "kl_loss": 0.23184965550899506, + "loss_ib": 0.0030145456548780203, + "step": 513 + }, + { + "ce_ib": 6.913288116455078, + "ce_orig": 0.4217478930950165, + "epoch": 0.1475303760155295, + "kl_loss": 0.12475548684597015, + "loss_ib": 0.0019388835644349456, + "step": 513 + }, + { + "ce_ib": 13.182520866394043, + "ce_orig": 1.2185357809066772, + "epoch": 0.14781795959450716, + "kl_loss": 0.2229577898979187, + "loss_ib": 0.003547829808667302, + "step": 514 + }, + { + "ce_ib": 13.908792495727539, + "ce_orig": 1.1175830364227295, + "epoch": 0.14781795959450716, + "kl_loss": 0.28653332591056824, + "loss_ib": 0.004256212152540684, + "step": 514 + }, + { + "ce_ib": 11.028960227966309, + "ce_orig": 1.2024520635604858, + "epoch": 0.14781795959450716, + "kl_loss": 0.21963676810264587, + "loss_ib": 0.0032992635387927294, + "step": 514 + }, + { + "ce_ib": 9.403878211975098, + "ce_orig": 0.7816042304039001, + "epoch": 0.14781795959450716, + "kl_loss": 0.11819127202033997, + "loss_ib": 0.0021223004441708326, + "step": 514 + }, + { + "epoch": 0.1481055431734848, + "grad_norm": 0.09141584485769272, + "learning_rate": 4.9954208451346465e-05, + "loss": 0.8628, + "step": 515 + }, + { + "ce_ib": 8.093498229980469, + "ce_orig": 0.5340498089790344, + "epoch": 0.1481055431734848, + "kl_loss": 0.1474526822566986, + "loss_ib": 0.0022838765289634466, + "step": 515 + }, + { + "ce_ib": 11.310858726501465, + "ce_orig": 1.0567339658737183, + "epoch": 0.1481055431734848, + "kl_loss": 0.1496482789516449, + "loss_ib": 0.0026275685522705317, + "step": 515 + }, + { + "ce_ib": 8.765999794006348, + "ce_orig": 0.8822218775749207, + "epoch": 0.1481055431734848, + "kl_loss": 0.14761213958263397, + "loss_ib": 0.0023527212906628847, + "step": 515 + }, + { + "ce_ib": 9.370061874389648, + "ce_orig": 0.7221859097480774, + "epoch": 0.1481055431734848, + "kl_loss": 0.21021240949630737, + "loss_ib": 0.0030391302425414324, + "step": 515 + }, + { + "ce_ib": 10.952759742736816, + "ce_orig": 1.2016615867614746, + "epoch": 0.14839312675246244, + "kl_loss": 0.12589505314826965, + "loss_ib": 0.0023542263079434633, + "step": 516 + }, + { + "ce_ib": 11.208597183227539, + "ce_orig": 1.2649309635162354, + "epoch": 0.14839312675246244, + "kl_loss": 0.1935414969921112, + "loss_ib": 0.0030562744941562414, + "step": 516 + }, + { + "ce_ib": 14.064299583435059, + "ce_orig": 1.3251948356628418, + "epoch": 0.14839312675246244, + "kl_loss": 0.2095833718776703, + "loss_ib": 0.003502263454720378, + "step": 516 + }, + { + "ce_ib": 6.933436870574951, + "ce_orig": 0.6950163841247559, + "epoch": 0.14839312675246244, + "kl_loss": 0.1774298995733261, + "loss_ib": 0.0024676427710801363, + "step": 516 + }, + { + "ce_ib": 6.997844219207764, + "ce_orig": 0.7801376581192017, + "epoch": 0.14868071033144006, + "kl_loss": 0.14632141590118408, + "loss_ib": 0.002162998542189598, + "step": 517 + }, + { + "ce_ib": 11.151383399963379, + "ce_orig": 1.1578229665756226, + "epoch": 0.14868071033144006, + "kl_loss": 0.31972575187683105, + "loss_ib": 0.004312395583838224, + "step": 517 + }, + { + "ce_ib": 12.593860626220703, + "ce_orig": 1.288727045059204, + "epoch": 0.14868071033144006, + "kl_loss": 0.172956645488739, + "loss_ib": 0.0029889524448662996, + "step": 517 + }, + { + "ce_ib": 11.15422248840332, + "ce_orig": 0.7159664630889893, + "epoch": 0.14868071033144006, + "kl_loss": 0.20476844906806946, + "loss_ib": 0.0031631067395210266, + "step": 517 + }, + { + "ce_ib": 10.187602996826172, + "ce_orig": 0.8133834600448608, + "epoch": 0.14896829391041772, + "kl_loss": 0.15150345861911774, + "loss_ib": 0.0025337948463857174, + "step": 518 + }, + { + "ce_ib": 8.426726341247559, + "ce_orig": 0.8027427196502686, + "epoch": 0.14896829391041772, + "kl_loss": 0.33559098839759827, + "loss_ib": 0.004198582377284765, + "step": 518 + }, + { + "ce_ib": 10.157910346984863, + "ce_orig": 0.7894662618637085, + "epoch": 0.14896829391041772, + "kl_loss": 0.1754056215286255, + "loss_ib": 0.0027698471676558256, + "step": 518 + }, + { + "ce_ib": 12.347294807434082, + "ce_orig": 0.8127198219299316, + "epoch": 0.14896829391041772, + "kl_loss": 0.16092827916145325, + "loss_ib": 0.0028440123423933983, + "step": 518 + }, + { + "ce_ib": 12.081820487976074, + "ce_orig": 1.1218383312225342, + "epoch": 0.14925587748939537, + "kl_loss": 0.1572212278842926, + "loss_ib": 0.0027803941629827023, + "step": 519 + }, + { + "ce_ib": 8.355681419372559, + "ce_orig": 0.4457217752933502, + "epoch": 0.14925587748939537, + "kl_loss": 0.15767526626586914, + "loss_ib": 0.002412320813164115, + "step": 519 + }, + { + "ce_ib": 15.299323081970215, + "ce_orig": 1.5671056509017944, + "epoch": 0.14925587748939537, + "kl_loss": 0.22422994673252106, + "loss_ib": 0.003772231750190258, + "step": 519 + }, + { + "ce_ib": 6.267134189605713, + "ce_orig": 0.5929120779037476, + "epoch": 0.14925587748939537, + "kl_loss": 0.1314193606376648, + "loss_ib": 0.0019409068627282977, + "step": 519 + }, + { + "epoch": 0.149543461068373, + "grad_norm": 0.12853504717350006, + "learning_rate": 4.995183082464269e-05, + "loss": 0.8526, + "step": 520 + }, + { + "ce_ib": 14.093266487121582, + "ce_orig": 1.7895437479019165, + "epoch": 0.149543461068373, + "kl_loss": 0.18939438462257385, + "loss_ib": 0.003303270321339369, + "step": 520 + }, + { + "ce_ib": 9.622370719909668, + "ce_orig": 0.6922176480293274, + "epoch": 0.149543461068373, + "kl_loss": 0.21940672397613525, + "loss_ib": 0.003156304359436035, + "step": 520 + }, + { + "ce_ib": 10.755894660949707, + "ce_orig": 0.4960995316505432, + "epoch": 0.149543461068373, + "kl_loss": 0.19118964672088623, + "loss_ib": 0.002987485844641924, + "step": 520 + }, + { + "ce_ib": 12.176746368408203, + "ce_orig": 0.7688639163970947, + "epoch": 0.149543461068373, + "kl_loss": 0.1497233808040619, + "loss_ib": 0.0027149084489792585, + "step": 520 + }, + { + "ce_ib": 8.148961067199707, + "ce_orig": 0.9524043798446655, + "epoch": 0.14983104464735064, + "kl_loss": 0.1963375210762024, + "loss_ib": 0.00277827144600451, + "step": 521 + }, + { + "ce_ib": 7.612722396850586, + "ce_orig": 0.5829215049743652, + "epoch": 0.14983104464735064, + "kl_loss": 0.13818615674972534, + "loss_ib": 0.0021431338973343372, + "step": 521 + }, + { + "ce_ib": 6.743089199066162, + "ce_orig": 0.6650915741920471, + "epoch": 0.14983104464735064, + "kl_loss": 0.15762126445770264, + "loss_ib": 0.002250521443784237, + "step": 521 + }, + { + "ce_ib": 7.84657621383667, + "ce_orig": 0.5749149918556213, + "epoch": 0.14983104464735064, + "kl_loss": 0.1294727921485901, + "loss_ib": 0.0020793855655938387, + "step": 521 + }, + { + "ce_ib": 10.302209854125977, + "ce_orig": 0.7592967748641968, + "epoch": 0.15011862822632827, + "kl_loss": 0.16181603074073792, + "loss_ib": 0.002648381283506751, + "step": 522 + }, + { + "ce_ib": 11.727582931518555, + "ce_orig": 1.4059052467346191, + "epoch": 0.15011862822632827, + "kl_loss": 0.17755615711212158, + "loss_ib": 0.002948319772258401, + "step": 522 + }, + { + "ce_ib": 5.496983528137207, + "ce_orig": 0.5452262163162231, + "epoch": 0.15011862822632827, + "kl_loss": 0.09833450615406036, + "loss_ib": 0.0015330433379858732, + "step": 522 + }, + { + "ce_ib": 12.549999237060547, + "ce_orig": 0.8813621997833252, + "epoch": 0.15011862822632827, + "kl_loss": 0.19512861967086792, + "loss_ib": 0.0032062861137092113, + "step": 522 + }, + { + "ce_ib": 13.78541374206543, + "ce_orig": 1.6297649145126343, + "epoch": 0.15040621180530592, + "kl_loss": 0.17818473279476166, + "loss_ib": 0.0031603884417563677, + "step": 523 + }, + { + "ce_ib": 11.36571979522705, + "ce_orig": 0.8598288893699646, + "epoch": 0.15040621180530592, + "kl_loss": 0.13574492931365967, + "loss_ib": 0.0024940213188529015, + "step": 523 + }, + { + "ce_ib": 7.622193813323975, + "ce_orig": 0.7786977291107178, + "epoch": 0.15040621180530592, + "kl_loss": 0.14550796151161194, + "loss_ib": 0.00221729907207191, + "step": 523 + }, + { + "ce_ib": 11.014781951904297, + "ce_orig": 1.0243315696716309, + "epoch": 0.15040621180530592, + "kl_loss": 0.2482243776321411, + "loss_ib": 0.003583722049370408, + "step": 523 + }, + { + "ce_ib": 10.487326622009277, + "ce_orig": 0.8381717205047607, + "epoch": 0.15069379538428357, + "kl_loss": 0.1882508099079132, + "loss_ib": 0.002931240713223815, + "step": 524 + }, + { + "ce_ib": 5.229416847229004, + "ce_orig": 0.4120478630065918, + "epoch": 0.15069379538428357, + "kl_loss": 0.09162455052137375, + "loss_ib": 0.0014391872100532055, + "step": 524 + }, + { + "ce_ib": 10.072685241699219, + "ce_orig": 0.5915430188179016, + "epoch": 0.15069379538428357, + "kl_loss": 0.25207147002220154, + "loss_ib": 0.0035279830917716026, + "step": 524 + }, + { + "ce_ib": 9.47887134552002, + "ce_orig": 0.8751776218414307, + "epoch": 0.15069379538428357, + "kl_loss": 0.18856096267700195, + "loss_ib": 0.0028334965463727713, + "step": 524 + }, + { + "epoch": 0.1509813789632612, + "grad_norm": 0.09292690455913544, + "learning_rate": 4.9949393083187005e-05, + "loss": 0.9051, + "step": 525 + }, + { + "ce_ib": 16.163028717041016, + "ce_orig": 1.7853000164031982, + "epoch": 0.1509813789632612, + "kl_loss": 0.2081843614578247, + "loss_ib": 0.003698146203532815, + "step": 525 + }, + { + "ce_ib": 9.814764022827148, + "ce_orig": 0.8537163734436035, + "epoch": 0.1509813789632612, + "kl_loss": 0.2146293818950653, + "loss_ib": 0.003127770032733679, + "step": 525 + }, + { + "ce_ib": 6.847357749938965, + "ce_orig": 0.7610385417938232, + "epoch": 0.1509813789632612, + "kl_loss": 0.10232152044773102, + "loss_ib": 0.0017079509561881423, + "step": 525 + }, + { + "ce_ib": 9.158605575561523, + "ce_orig": 0.7744526863098145, + "epoch": 0.1509813789632612, + "kl_loss": 0.16445057094097137, + "loss_ib": 0.0025603664107620716, + "step": 525 + }, + { + "ce_ib": 10.982367515563965, + "ce_orig": 0.6184618473052979, + "epoch": 0.15126896254223884, + "kl_loss": 0.12473335862159729, + "loss_ib": 0.0023455703631043434, + "step": 526 + }, + { + "ce_ib": 9.526459693908691, + "ce_orig": 1.034873366355896, + "epoch": 0.15126896254223884, + "kl_loss": 0.18319007754325867, + "loss_ib": 0.0027845464646816254, + "step": 526 + }, + { + "ce_ib": 8.941856384277344, + "ce_orig": 0.8572098612785339, + "epoch": 0.15126896254223884, + "kl_loss": 0.14592421054840088, + "loss_ib": 0.0023534276988357306, + "step": 526 + }, + { + "ce_ib": 8.852437973022461, + "ce_orig": 0.6226751208305359, + "epoch": 0.15126896254223884, + "kl_loss": 0.10193414986133575, + "loss_ib": 0.001904585282318294, + "step": 526 + }, + { + "ce_ib": 9.570630073547363, + "ce_orig": 0.6968982219696045, + "epoch": 0.15155654612121647, + "kl_loss": 0.14039430022239685, + "loss_ib": 0.0023610058706253767, + "step": 527 + }, + { + "ce_ib": 7.009479522705078, + "ce_orig": 0.7059912085533142, + "epoch": 0.15155654612121647, + "kl_loss": 0.20908552408218384, + "loss_ib": 0.0027918030973523855, + "step": 527 + }, + { + "ce_ib": 7.056645393371582, + "ce_orig": 0.8603862524032593, + "epoch": 0.15155654612121647, + "kl_loss": 0.23444432020187378, + "loss_ib": 0.0030501075088977814, + "step": 527 + }, + { + "ce_ib": 8.10662841796875, + "ce_orig": 0.5832763910293579, + "epoch": 0.15155654612121647, + "kl_loss": 0.19913099706172943, + "loss_ib": 0.0028019726742058992, + "step": 527 + }, + { + "ce_ib": 12.294358253479004, + "ce_orig": 1.3011188507080078, + "epoch": 0.15184412970019412, + "kl_loss": 0.22973008453845978, + "loss_ib": 0.00352673651650548, + "step": 528 + }, + { + "ce_ib": 11.455277442932129, + "ce_orig": 0.8678830862045288, + "epoch": 0.15184412970019412, + "kl_loss": 0.20876343548297882, + "loss_ib": 0.003233161987736821, + "step": 528 + }, + { + "ce_ib": 13.364859580993652, + "ce_orig": 0.4887239933013916, + "epoch": 0.15184412970019412, + "kl_loss": 0.29823386669158936, + "loss_ib": 0.004318824503570795, + "step": 528 + }, + { + "ce_ib": 7.953736305236816, + "ce_orig": 0.8171699643135071, + "epoch": 0.15184412970019412, + "kl_loss": 0.095934197306633, + "loss_ib": 0.0017547155730426311, + "step": 528 + }, + { + "ce_ib": 6.75135612487793, + "ce_orig": 0.6152947545051575, + "epoch": 0.15213171327917177, + "kl_loss": 0.13159394264221191, + "loss_ib": 0.001991074997931719, + "step": 529 + }, + { + "ce_ib": 7.845587730407715, + "ce_orig": 0.5412665009498596, + "epoch": 0.15213171327917177, + "kl_loss": 0.19593513011932373, + "loss_ib": 0.0027439098339527845, + "step": 529 + }, + { + "ce_ib": 11.808723449707031, + "ce_orig": 1.4632874727249146, + "epoch": 0.15213171327917177, + "kl_loss": 0.1801619529724121, + "loss_ib": 0.002982491860166192, + "step": 529 + }, + { + "ce_ib": 9.44063949584961, + "ce_orig": 0.7242243885993958, + "epoch": 0.15213171327917177, + "kl_loss": 0.11666402220726013, + "loss_ib": 0.002110704081133008, + "step": 529 + }, + { + "epoch": 0.1524192968581494, + "grad_norm": 0.09365444630384445, + "learning_rate": 4.994689523285251e-05, + "loss": 0.8573, + "step": 530 + }, + { + "ce_ib": 13.775089263916016, + "ce_orig": 1.116524577140808, + "epoch": 0.1524192968581494, + "kl_loss": 0.19737480580806732, + "loss_ib": 0.0033512567169964314, + "step": 530 + }, + { + "ce_ib": 8.636777877807617, + "ce_orig": 1.3055970668792725, + "epoch": 0.1524192968581494, + "kl_loss": 0.10996636748313904, + "loss_ib": 0.001963341375812888, + "step": 530 + }, + { + "ce_ib": 9.965653419494629, + "ce_orig": 0.690579891204834, + "epoch": 0.1524192968581494, + "kl_loss": 0.19870160520076752, + "loss_ib": 0.0029835812747478485, + "step": 530 + }, + { + "ce_ib": 10.97655963897705, + "ce_orig": 0.7595181465148926, + "epoch": 0.1524192968581494, + "kl_loss": 0.18271636962890625, + "loss_ib": 0.002924819476902485, + "step": 530 + }, + { + "ce_ib": 8.113723754882812, + "ce_orig": 0.8315019011497498, + "epoch": 0.15270688043712705, + "kl_loss": 0.1101025640964508, + "loss_ib": 0.0019123980309814215, + "step": 531 + }, + { + "ce_ib": 13.18765640258789, + "ce_orig": 0.8172896504402161, + "epoch": 0.15270688043712705, + "kl_loss": 0.18894284963607788, + "loss_ib": 0.003208193928003311, + "step": 531 + }, + { + "ce_ib": 4.153471946716309, + "ce_orig": 0.1576905995607376, + "epoch": 0.15270688043712705, + "kl_loss": 0.40544354915618896, + "loss_ib": 0.004469782579690218, + "step": 531 + }, + { + "ce_ib": 7.5165839195251465, + "ce_orig": 0.8319332003593445, + "epoch": 0.15270688043712705, + "kl_loss": 0.1594739556312561, + "loss_ib": 0.0023463978432118893, + "step": 531 + }, + { + "ce_ib": 12.242563247680664, + "ce_orig": 1.217457890510559, + "epoch": 0.15299446401610467, + "kl_loss": 0.40689927339553833, + "loss_ib": 0.005293248686939478, + "step": 532 + }, + { + "ce_ib": 12.046343803405762, + "ce_orig": 1.403741717338562, + "epoch": 0.15299446401610467, + "kl_loss": 0.1668795347213745, + "loss_ib": 0.00287342956289649, + "step": 532 + }, + { + "ce_ib": 10.049248695373535, + "ce_orig": 1.009892225265503, + "epoch": 0.15299446401610467, + "kl_loss": 0.1594667136669159, + "loss_ib": 0.0025995918549597263, + "step": 532 + }, + { + "ce_ib": 8.571545600891113, + "ce_orig": 0.45953336358070374, + "epoch": 0.15299446401610467, + "kl_loss": 0.17538738250732422, + "loss_ib": 0.0026110284961760044, + "step": 532 + }, + { + "ce_ib": 7.9551005363464355, + "ce_orig": 0.6499494910240173, + "epoch": 0.15328204759508232, + "kl_loss": 0.17974281311035156, + "loss_ib": 0.0025929382536560297, + "step": 533 + }, + { + "ce_ib": 8.36906909942627, + "ce_orig": 0.6839005351066589, + "epoch": 0.15328204759508232, + "kl_loss": 0.13957883417606354, + "loss_ib": 0.0022326952312141657, + "step": 533 + }, + { + "ce_ib": 11.703327178955078, + "ce_orig": 1.262555718421936, + "epoch": 0.15328204759508232, + "kl_loss": 0.20418298244476318, + "loss_ib": 0.0032121625263243914, + "step": 533 + }, + { + "ce_ib": 11.333956718444824, + "ce_orig": 0.4284483790397644, + "epoch": 0.15328204759508232, + "kl_loss": 0.2264348566532135, + "loss_ib": 0.0033977441489696503, + "step": 533 + }, + { + "ce_ib": 9.996318817138672, + "ce_orig": 0.7071578502655029, + "epoch": 0.15356963117405997, + "kl_loss": 0.15454252064228058, + "loss_ib": 0.002545056864619255, + "step": 534 + }, + { + "ce_ib": 12.299304008483887, + "ce_orig": 1.0114151239395142, + "epoch": 0.15356963117405997, + "kl_loss": 0.19390329718589783, + "loss_ib": 0.0031689631287008524, + "step": 534 + }, + { + "ce_ib": 11.23180103302002, + "ce_orig": 1.1782814264297485, + "epoch": 0.15356963117405997, + "kl_loss": 0.20649601519107819, + "loss_ib": 0.0031881402246654034, + "step": 534 + }, + { + "ce_ib": 10.936742782592773, + "ce_orig": 1.050248384475708, + "epoch": 0.15356963117405997, + "kl_loss": 0.10441295057535172, + "loss_ib": 0.002137803705409169, + "step": 534 + }, + { + "epoch": 0.1538572147530376, + "grad_norm": 0.09265723824501038, + "learning_rate": 4.9944337279657106e-05, + "loss": 0.9042, + "step": 535 + }, + { + "ce_ib": 14.606983184814453, + "ce_orig": 1.221248745918274, + "epoch": 0.1538572147530376, + "kl_loss": 0.2094915807247162, + "loss_ib": 0.003555614035576582, + "step": 535 + }, + { + "ce_ib": 9.798521995544434, + "ce_orig": 0.5161023139953613, + "epoch": 0.1538572147530376, + "kl_loss": 0.1310487538576126, + "loss_ib": 0.0022903396748006344, + "step": 535 + }, + { + "ce_ib": 8.770511627197266, + "ce_orig": 0.7841663956642151, + "epoch": 0.1538572147530376, + "kl_loss": 0.18124917149543762, + "loss_ib": 0.0026895427145063877, + "step": 535 + }, + { + "ce_ib": 7.555400371551514, + "ce_orig": 0.7624974250793457, + "epoch": 0.1538572147530376, + "kl_loss": 0.17534580826759338, + "loss_ib": 0.002508997917175293, + "step": 535 + }, + { + "ce_ib": 10.383445739746094, + "ce_orig": 1.285834550857544, + "epoch": 0.15414479833201525, + "kl_loss": 0.14239218831062317, + "loss_ib": 0.002462266245856881, + "step": 536 + }, + { + "ce_ib": 8.89057445526123, + "ce_orig": 0.5952100157737732, + "epoch": 0.15414479833201525, + "kl_loss": 0.16057077050209045, + "loss_ib": 0.0024947652127593756, + "step": 536 + }, + { + "ce_ib": 7.21333122253418, + "ce_orig": 0.49197930097579956, + "epoch": 0.15414479833201525, + "kl_loss": 0.12823858857154846, + "loss_ib": 0.002003718866035342, + "step": 536 + }, + { + "ce_ib": 13.193877220153809, + "ce_orig": 0.9760612845420837, + "epoch": 0.15414479833201525, + "kl_loss": 0.294974148273468, + "loss_ib": 0.004269129130989313, + "step": 536 + }, + { + "ce_ib": 12.644906997680664, + "ce_orig": 1.145738124847412, + "epoch": 0.15443238191099287, + "kl_loss": 0.22196374833583832, + "loss_ib": 0.0034841280430555344, + "step": 537 + }, + { + "ce_ib": 8.502153396606445, + "ce_orig": 1.0564385652542114, + "epoch": 0.15443238191099287, + "kl_loss": 0.10582996904850006, + "loss_ib": 0.001908514997921884, + "step": 537 + }, + { + "ce_ib": 13.864537239074707, + "ce_orig": 1.2629547119140625, + "epoch": 0.15443238191099287, + "kl_loss": 0.21262601017951965, + "loss_ib": 0.003512713825330138, + "step": 537 + }, + { + "ce_ib": 12.024038314819336, + "ce_orig": 1.3225494623184204, + "epoch": 0.15443238191099287, + "kl_loss": 0.18519270420074463, + "loss_ib": 0.003054330823943019, + "step": 537 + }, + { + "ce_ib": 12.25508975982666, + "ce_orig": 1.3482215404510498, + "epoch": 0.15471996548997052, + "kl_loss": 0.2573168873786926, + "loss_ib": 0.003798677818849683, + "step": 538 + }, + { + "ce_ib": 18.197988510131836, + "ce_orig": 1.977699875831604, + "epoch": 0.15471996548997052, + "kl_loss": 0.1906663030385971, + "loss_ib": 0.0037264616694301367, + "step": 538 + }, + { + "ce_ib": 7.995527744293213, + "ce_orig": 1.0472140312194824, + "epoch": 0.15471996548997052, + "kl_loss": 0.08826296776533127, + "loss_ib": 0.0016821823082864285, + "step": 538 + }, + { + "ce_ib": 8.727664947509766, + "ce_orig": 0.9827343821525574, + "epoch": 0.15471996548997052, + "kl_loss": 0.16956618428230286, + "loss_ib": 0.002568428171798587, + "step": 538 + }, + { + "ce_ib": 9.891521453857422, + "ce_orig": 0.603715717792511, + "epoch": 0.15500754906894817, + "kl_loss": 0.1278199702501297, + "loss_ib": 0.002267351606860757, + "step": 539 + }, + { + "ce_ib": 10.724482536315918, + "ce_orig": 0.7409098148345947, + "epoch": 0.15500754906894817, + "kl_loss": 0.2033136785030365, + "loss_ib": 0.0031055849976837635, + "step": 539 + }, + { + "ce_ib": 9.865452766418457, + "ce_orig": 0.5195972323417664, + "epoch": 0.15500754906894817, + "kl_loss": 0.18522371351718903, + "loss_ib": 0.0028387822676450014, + "step": 539 + }, + { + "ce_ib": 11.253691673278809, + "ce_orig": 0.8413724303245544, + "epoch": 0.15500754906894817, + "kl_loss": 0.1541779339313507, + "loss_ib": 0.0026671483647078276, + "step": 539 + }, + { + "epoch": 0.1552951326479258, + "grad_norm": 0.08690934628248215, + "learning_rate": 4.994171922976348e-05, + "loss": 0.8932, + "step": 540 + }, + { + "ce_ib": 12.854255676269531, + "ce_orig": 0.9279084801673889, + "epoch": 0.1552951326479258, + "kl_loss": 0.14839236438274384, + "loss_ib": 0.00276934914290905, + "step": 540 + }, + { + "ce_ib": 13.400528907775879, + "ce_orig": 1.5644118785858154, + "epoch": 0.1552951326479258, + "kl_loss": 0.17688438296318054, + "loss_ib": 0.0031088965479284525, + "step": 540 + }, + { + "ce_ib": 7.6631035804748535, + "ce_orig": 0.9725773930549622, + "epoch": 0.1552951326479258, + "kl_loss": 0.14389821887016296, + "loss_ib": 0.002205292461439967, + "step": 540 + }, + { + "ce_ib": 10.149072647094727, + "ce_orig": 0.8826848268508911, + "epoch": 0.1552951326479258, + "kl_loss": 0.18272389471530914, + "loss_ib": 0.0028421462047845125, + "step": 540 + }, + { + "ce_ib": 10.487343788146973, + "ce_orig": 0.9640787243843079, + "epoch": 0.15558271622690345, + "kl_loss": 0.16164128482341766, + "loss_ib": 0.0026651471853256226, + "step": 541 + }, + { + "ce_ib": 10.549164772033691, + "ce_orig": 0.8183526992797852, + "epoch": 0.15558271622690345, + "kl_loss": 0.11300608515739441, + "loss_ib": 0.002184977289289236, + "step": 541 + }, + { + "ce_ib": 11.219377517700195, + "ce_orig": 0.6499941349029541, + "epoch": 0.15558271622690345, + "kl_loss": 0.22048121690750122, + "loss_ib": 0.003326749661937356, + "step": 541 + }, + { + "ce_ib": 8.821683883666992, + "ce_orig": 0.6309160590171814, + "epoch": 0.15558271622690345, + "kl_loss": 0.14700010418891907, + "loss_ib": 0.002352169482037425, + "step": 541 + }, + { + "ce_ib": 8.223409652709961, + "ce_orig": 0.6411111950874329, + "epoch": 0.15587029980588107, + "kl_loss": 0.15190641582012177, + "loss_ib": 0.0023414050228893757, + "step": 542 + }, + { + "ce_ib": 11.807840347290039, + "ce_orig": 1.247244119644165, + "epoch": 0.15587029980588107, + "kl_loss": 0.1326785683631897, + "loss_ib": 0.00250756973400712, + "step": 542 + }, + { + "ce_ib": 9.101454734802246, + "ce_orig": 0.7491029500961304, + "epoch": 0.15587029980588107, + "kl_loss": 0.1894010752439499, + "loss_ib": 0.0028041561599820852, + "step": 542 + }, + { + "ce_ib": 7.774291038513184, + "ce_orig": 0.7008493542671204, + "epoch": 0.15587029980588107, + "kl_loss": 0.10578468441963196, + "loss_ib": 0.0018352757906541228, + "step": 542 + }, + { + "ce_ib": 8.337699890136719, + "ce_orig": 0.8569279909133911, + "epoch": 0.15615788338485873, + "kl_loss": 0.16004885733127594, + "loss_ib": 0.002434258349239826, + "step": 543 + }, + { + "ce_ib": 11.937819480895996, + "ce_orig": 1.1342954635620117, + "epoch": 0.15615788338485873, + "kl_loss": 0.17315535247325897, + "loss_ib": 0.002925335429608822, + "step": 543 + }, + { + "ce_ib": 9.676532745361328, + "ce_orig": 0.4862201511859894, + "epoch": 0.15615788338485873, + "kl_loss": 0.1526506096124649, + "loss_ib": 0.002494159387424588, + "step": 543 + }, + { + "ce_ib": 10.160820007324219, + "ce_orig": 1.0899734497070312, + "epoch": 0.15615788338485873, + "kl_loss": 0.14247693121433258, + "loss_ib": 0.002440851181745529, + "step": 543 + }, + { + "ce_ib": 9.082582473754883, + "ce_orig": 0.4083137810230255, + "epoch": 0.15644546696383638, + "kl_loss": 0.2051030397415161, + "loss_ib": 0.0029592886567115784, + "step": 544 + }, + { + "ce_ib": 8.451061248779297, + "ce_orig": 0.3469814360141754, + "epoch": 0.15644546696383638, + "kl_loss": 0.15002912282943726, + "loss_ib": 0.0023453973699361086, + "step": 544 + }, + { + "ce_ib": 11.451112747192383, + "ce_orig": 1.2380187511444092, + "epoch": 0.15644546696383638, + "kl_loss": 0.15902701020240784, + "loss_ib": 0.0027353812474757433, + "step": 544 + }, + { + "ce_ib": 15.22938060760498, + "ce_orig": 1.9276436567306519, + "epoch": 0.15644546696383638, + "kl_loss": 0.2923920750617981, + "loss_ib": 0.004446858540177345, + "step": 544 + }, + { + "epoch": 0.156733050542814, + "grad_norm": 0.09769053012132645, + "learning_rate": 4.993904108947914e-05, + "loss": 0.8847, + "step": 545 + }, + { + "ce_ib": 7.582347393035889, + "ce_orig": 0.5396762490272522, + "epoch": 0.156733050542814, + "kl_loss": 0.1266171634197235, + "loss_ib": 0.002024406334385276, + "step": 545 + }, + { + "ce_ib": 13.711177825927734, + "ce_orig": 1.1446130275726318, + "epoch": 0.156733050542814, + "kl_loss": 0.28241363167762756, + "loss_ib": 0.0041952538304030895, + "step": 545 + }, + { + "ce_ib": 13.14343547821045, + "ce_orig": 1.172808051109314, + "epoch": 0.156733050542814, + "kl_loss": 0.21370941400527954, + "loss_ib": 0.0034514376893639565, + "step": 545 + }, + { + "ce_ib": 11.734895706176758, + "ce_orig": 1.2079064846038818, + "epoch": 0.156733050542814, + "kl_loss": 0.18663063645362854, + "loss_ib": 0.003039795672520995, + "step": 545 + }, + { + "ce_ib": 9.871030807495117, + "ce_orig": 1.0754120349884033, + "epoch": 0.15702063412179165, + "kl_loss": 0.13395808637142181, + "loss_ib": 0.0023266838397830725, + "step": 546 + }, + { + "ce_ib": 10.360943794250488, + "ce_orig": 0.7201725244522095, + "epoch": 0.15702063412179165, + "kl_loss": 0.24039188027381897, + "loss_ib": 0.0034400131553411484, + "step": 546 + }, + { + "ce_ib": 8.925966262817383, + "ce_orig": 0.5887497663497925, + "epoch": 0.15702063412179165, + "kl_loss": 0.2111162692308426, + "loss_ib": 0.0030037593096494675, + "step": 546 + }, + { + "ce_ib": 10.003677368164062, + "ce_orig": 0.8970090746879578, + "epoch": 0.15702063412179165, + "kl_loss": 0.15961842238903046, + "loss_ib": 0.0025965517852455378, + "step": 546 + }, + { + "ce_ib": 8.268735885620117, + "ce_orig": 0.6968417167663574, + "epoch": 0.15730821770076928, + "kl_loss": 0.14865007996559143, + "loss_ib": 0.0023133743088692427, + "step": 547 + }, + { + "ce_ib": 10.061677932739258, + "ce_orig": 1.0893551111221313, + "epoch": 0.15730821770076928, + "kl_loss": 0.14944544434547424, + "loss_ib": 0.0025006223004311323, + "step": 547 + }, + { + "ce_ib": 9.257678985595703, + "ce_orig": 0.8900756239891052, + "epoch": 0.15730821770076928, + "kl_loss": 0.13304127752780914, + "loss_ib": 0.002256180625408888, + "step": 547 + }, + { + "ce_ib": 8.690975189208984, + "ce_orig": 0.846500813961029, + "epoch": 0.15730821770076928, + "kl_loss": 0.21669834852218628, + "loss_ib": 0.0030360808596014977, + "step": 547 + }, + { + "ce_ib": 12.489033699035645, + "ce_orig": 0.8086367845535278, + "epoch": 0.15759580127974693, + "kl_loss": 0.15377330780029297, + "loss_ib": 0.0027866363525390625, + "step": 548 + }, + { + "ce_ib": 5.675604343414307, + "ce_orig": 0.6320753693580627, + "epoch": 0.15759580127974693, + "kl_loss": 0.13638901710510254, + "loss_ib": 0.0019314506789669394, + "step": 548 + }, + { + "ce_ib": 10.726299285888672, + "ce_orig": 0.5647706389427185, + "epoch": 0.15759580127974693, + "kl_loss": 0.18386539816856384, + "loss_ib": 0.002911283867433667, + "step": 548 + }, + { + "ce_ib": 7.901782512664795, + "ce_orig": 0.583336591720581, + "epoch": 0.15759580127974693, + "kl_loss": 0.13014139235019684, + "loss_ib": 0.00209159217774868, + "step": 548 + }, + { + "ce_ib": 8.100556373596191, + "ce_orig": 0.802794337272644, + "epoch": 0.15788338485872458, + "kl_loss": 0.14330238103866577, + "loss_ib": 0.0022430792450904846, + "step": 549 + }, + { + "ce_ib": 11.29372787475586, + "ce_orig": 0.885117769241333, + "epoch": 0.15788338485872458, + "kl_loss": 0.10478469729423523, + "loss_ib": 0.002177219605073333, + "step": 549 + }, + { + "ce_ib": 10.886632919311523, + "ce_orig": 1.232460856437683, + "epoch": 0.15788338485872458, + "kl_loss": 0.18609371781349182, + "loss_ib": 0.002949600340798497, + "step": 549 + }, + { + "ce_ib": 4.600982666015625, + "ce_orig": 0.1705351322889328, + "epoch": 0.15788338485872458, + "kl_loss": 0.3744945526123047, + "loss_ib": 0.004205043893307447, + "step": 549 + }, + { + "epoch": 0.1581709684377022, + "grad_norm": 0.09734965115785599, + "learning_rate": 4.993630286525634e-05, + "loss": 0.8445, + "step": 550 + }, + { + "ce_ib": 10.561800003051758, + "ce_orig": 1.2681158781051636, + "epoch": 0.1581709684377022, + "kl_loss": 0.16903841495513916, + "loss_ib": 0.0027465641032904387, + "step": 550 + }, + { + "ce_ib": 8.956363677978516, + "ce_orig": 1.0898432731628418, + "epoch": 0.1581709684377022, + "kl_loss": 0.1274310052394867, + "loss_ib": 0.002169946441426873, + "step": 550 + }, + { + "ce_ib": 4.494284152984619, + "ce_orig": 0.46590328216552734, + "epoch": 0.1581709684377022, + "kl_loss": 0.1029190719127655, + "loss_ib": 0.0014786190586164594, + "step": 550 + }, + { + "ce_ib": 13.822484016418457, + "ce_orig": 0.9805088639259338, + "epoch": 0.1581709684377022, + "kl_loss": 0.14357160031795502, + "loss_ib": 0.0028179644141346216, + "step": 550 + }, + { + "ce_ib": 8.873933792114258, + "ce_orig": 0.5876795053482056, + "epoch": 0.15845855201667985, + "kl_loss": 0.16282954812049866, + "loss_ib": 0.0025156887713819742, + "step": 551 + }, + { + "ce_ib": 11.708837509155273, + "ce_orig": 1.4527268409729004, + "epoch": 0.15845855201667985, + "kl_loss": 0.1705905795097351, + "loss_ib": 0.002876789541915059, + "step": 551 + }, + { + "ce_ib": 9.84598159790039, + "ce_orig": 0.6224023699760437, + "epoch": 0.15845855201667985, + "kl_loss": 0.17640681564807892, + "loss_ib": 0.0027486663311719894, + "step": 551 + }, + { + "ce_ib": 11.687630653381348, + "ce_orig": 1.245397686958313, + "epoch": 0.15845855201667985, + "kl_loss": 0.1673552244901657, + "loss_ib": 0.002842315472662449, + "step": 551 + }, + { + "ce_ib": 12.031242370605469, + "ce_orig": 1.0912039279937744, + "epoch": 0.15874613559565748, + "kl_loss": 0.1191263347864151, + "loss_ib": 0.002394387498497963, + "step": 552 + }, + { + "ce_ib": 7.358980655670166, + "ce_orig": 0.4738483726978302, + "epoch": 0.15874613559565748, + "kl_loss": 0.16353383660316467, + "loss_ib": 0.002371236216276884, + "step": 552 + }, + { + "ce_ib": 11.342854499816895, + "ce_orig": 0.9162889122962952, + "epoch": 0.15874613559565748, + "kl_loss": 0.24299943447113037, + "loss_ib": 0.0035642795264720917, + "step": 552 + }, + { + "ce_ib": 9.668421745300293, + "ce_orig": 0.8884750008583069, + "epoch": 0.15874613559565748, + "kl_loss": 0.15223366022109985, + "loss_ib": 0.0024891786742955446, + "step": 552 + }, + { + "ce_ib": 9.163921356201172, + "ce_orig": 0.5614813566207886, + "epoch": 0.15903371917463513, + "kl_loss": 0.20447899401187897, + "loss_ib": 0.0029611820355057716, + "step": 553 + }, + { + "ce_ib": 14.240976333618164, + "ce_orig": 1.3581258058547974, + "epoch": 0.15903371917463513, + "kl_loss": 0.15795043110847473, + "loss_ib": 0.0030036019161343575, + "step": 553 + }, + { + "ce_ib": 11.746164321899414, + "ce_orig": 0.9120945930480957, + "epoch": 0.15903371917463513, + "kl_loss": 0.21569553017616272, + "loss_ib": 0.0033315718173980713, + "step": 553 + }, + { + "ce_ib": 9.069586753845215, + "ce_orig": 0.7589283585548401, + "epoch": 0.15903371917463513, + "kl_loss": 0.13429264724254608, + "loss_ib": 0.002249885117635131, + "step": 553 + }, + { + "ce_ib": 4.418727874755859, + "ce_orig": 0.4682615101337433, + "epoch": 0.15932130275361278, + "kl_loss": 0.08103372156620026, + "loss_ib": 0.0012522100005298853, + "step": 554 + }, + { + "ce_ib": 9.614441871643066, + "ce_orig": 0.7101583480834961, + "epoch": 0.15932130275361278, + "kl_loss": 0.18348637223243713, + "loss_ib": 0.002796307671815157, + "step": 554 + }, + { + "ce_ib": 12.66514778137207, + "ce_orig": 1.1605197191238403, + "epoch": 0.15932130275361278, + "kl_loss": 0.17392480373382568, + "loss_ib": 0.0030057625845074654, + "step": 554 + }, + { + "ce_ib": 11.166543960571289, + "ce_orig": 0.8878234028816223, + "epoch": 0.15932130275361278, + "kl_loss": 0.18384350836277008, + "loss_ib": 0.002955089556053281, + "step": 554 + }, + { + "epoch": 0.1596088863325904, + "grad_norm": 0.11484308540821075, + "learning_rate": 4.99335045636921e-05, + "loss": 0.943, + "step": 555 + }, + { + "ce_ib": 11.496370315551758, + "ce_orig": 0.8270056247711182, + "epoch": 0.1596088863325904, + "kl_loss": 0.1401461362838745, + "loss_ib": 0.0025510983541607857, + "step": 555 + }, + { + "ce_ib": 8.691624641418457, + "ce_orig": 0.7428332567214966, + "epoch": 0.1596088863325904, + "kl_loss": 0.1531578004360199, + "loss_ib": 0.002400740282610059, + "step": 555 + }, + { + "ce_ib": 10.219917297363281, + "ce_orig": 0.6447824239730835, + "epoch": 0.1596088863325904, + "kl_loss": 0.1406973898410797, + "loss_ib": 0.002428965410217643, + "step": 555 + }, + { + "ce_ib": 12.348773002624512, + "ce_orig": 0.9984573125839233, + "epoch": 0.1596088863325904, + "kl_loss": 0.14746885001659393, + "loss_ib": 0.002709565684199333, + "step": 555 + }, + { + "ce_ib": 16.282346725463867, + "ce_orig": 1.6398210525512695, + "epoch": 0.15989646991156806, + "kl_loss": 0.21631492674350739, + "loss_ib": 0.003791383933275938, + "step": 556 + }, + { + "ce_ib": 6.016524791717529, + "ce_orig": 0.5652951002120972, + "epoch": 0.15989646991156806, + "kl_loss": 0.11779481917619705, + "loss_ib": 0.0017796006286516786, + "step": 556 + }, + { + "ce_ib": 10.406888961791992, + "ce_orig": 1.0634095668792725, + "epoch": 0.15989646991156806, + "kl_loss": 0.2169257253408432, + "loss_ib": 0.0032099459785968065, + "step": 556 + }, + { + "ce_ib": 6.763169288635254, + "ce_orig": 0.5677881240844727, + "epoch": 0.15989646991156806, + "kl_loss": 0.142292320728302, + "loss_ib": 0.0020992399659007788, + "step": 556 + }, + { + "ce_ib": 4.530671119689941, + "ce_orig": 0.26608189940452576, + "epoch": 0.16018405349054568, + "kl_loss": 0.33508527278900146, + "loss_ib": 0.003803919767960906, + "step": 557 + }, + { + "ce_ib": 6.98599910736084, + "ce_orig": 0.8139922618865967, + "epoch": 0.16018405349054568, + "kl_loss": 0.16635608673095703, + "loss_ib": 0.0023621607106179, + "step": 557 + }, + { + "ce_ib": 11.501959800720215, + "ce_orig": 1.0473116636276245, + "epoch": 0.16018405349054568, + "kl_loss": 0.16634799540042877, + "loss_ib": 0.002813675906509161, + "step": 557 + }, + { + "ce_ib": 11.507184982299805, + "ce_orig": 1.2152619361877441, + "epoch": 0.16018405349054568, + "kl_loss": 0.20226159691810608, + "loss_ib": 0.003173334524035454, + "step": 557 + }, + { + "ce_ib": 7.310940742492676, + "ce_orig": 0.6393885016441345, + "epoch": 0.16047163706952333, + "kl_loss": 0.18821844458580017, + "loss_ib": 0.0026132785715162754, + "step": 558 + }, + { + "ce_ib": 10.590526580810547, + "ce_orig": 1.0444893836975098, + "epoch": 0.16047163706952333, + "kl_loss": 0.10557543486356735, + "loss_ib": 0.0021148070227354765, + "step": 558 + }, + { + "ce_ib": 9.154651641845703, + "ce_orig": 0.9614023566246033, + "epoch": 0.16047163706952333, + "kl_loss": 0.10559763014316559, + "loss_ib": 0.0019714415539056063, + "step": 558 + }, + { + "ce_ib": 8.871302604675293, + "ce_orig": 0.7604251503944397, + "epoch": 0.16047163706952333, + "kl_loss": 0.10212661325931549, + "loss_ib": 0.0019083964871242642, + "step": 558 + }, + { + "ce_ib": 8.032217025756836, + "ce_orig": 0.5321059823036194, + "epoch": 0.16075922064850098, + "kl_loss": 0.14546947181224823, + "loss_ib": 0.0022579163778573275, + "step": 559 + }, + { + "ce_ib": 6.739596366882324, + "ce_orig": 0.5902585983276367, + "epoch": 0.16075922064850098, + "kl_loss": 0.10357312858104706, + "loss_ib": 0.0017096908995881677, + "step": 559 + }, + { + "ce_ib": 11.034686088562012, + "ce_orig": 1.1962263584136963, + "epoch": 0.16075922064850098, + "kl_loss": 0.11951439082622528, + "loss_ib": 0.0022986126132309437, + "step": 559 + }, + { + "ce_ib": 8.0606107711792, + "ce_orig": 0.7243085503578186, + "epoch": 0.16075922064850098, + "kl_loss": 0.544711709022522, + "loss_ib": 0.0062531777657568455, + "step": 559 + }, + { + "epoch": 0.1610468042274786, + "grad_norm": 0.1068345308303833, + "learning_rate": 4.9930646191528175e-05, + "loss": 0.8331, + "step": 560 + }, + { + "ce_ib": 6.733529090881348, + "ce_orig": 0.5895476341247559, + "epoch": 0.1610468042274786, + "kl_loss": 0.16663867235183716, + "loss_ib": 0.0023397395852953196, + "step": 560 + }, + { + "ce_ib": 6.515050888061523, + "ce_orig": 0.739066481590271, + "epoch": 0.1610468042274786, + "kl_loss": 0.5483835935592651, + "loss_ib": 0.00613534078001976, + "step": 560 + }, + { + "ce_ib": 10.440169334411621, + "ce_orig": 0.4615348279476166, + "epoch": 0.1610468042274786, + "kl_loss": 0.2002372294664383, + "loss_ib": 0.003046389203518629, + "step": 560 + }, + { + "ce_ib": 8.762824058532715, + "ce_orig": 0.8410115838050842, + "epoch": 0.1610468042274786, + "kl_loss": 0.21117277443408966, + "loss_ib": 0.0029880099464207888, + "step": 560 + }, + { + "ce_ib": 8.918501853942871, + "ce_orig": 0.9338309168815613, + "epoch": 0.16133438780645626, + "kl_loss": 0.1465751826763153, + "loss_ib": 0.002357601886615157, + "step": 561 + }, + { + "ce_ib": 11.092143058776855, + "ce_orig": 1.3103547096252441, + "epoch": 0.16133438780645626, + "kl_loss": 0.15548178553581238, + "loss_ib": 0.0026640319265425205, + "step": 561 + }, + { + "ce_ib": 9.238221168518066, + "ce_orig": 0.5433566570281982, + "epoch": 0.16133438780645626, + "kl_loss": 0.18088671565055847, + "loss_ib": 0.0027326892595738173, + "step": 561 + }, + { + "ce_ib": 8.788311004638672, + "ce_orig": 0.6767505407333374, + "epoch": 0.16133438780645626, + "kl_loss": 0.16330577433109283, + "loss_ib": 0.0025118887424468994, + "step": 561 + }, + { + "ce_ib": 12.871294975280762, + "ce_orig": 0.97724848985672, + "epoch": 0.16162197138543388, + "kl_loss": 0.22364787757396698, + "loss_ib": 0.00352360843680799, + "step": 562 + }, + { + "ce_ib": 8.219422340393066, + "ce_orig": 1.127738118171692, + "epoch": 0.16162197138543388, + "kl_loss": 0.1269611120223999, + "loss_ib": 0.00209155329503119, + "step": 562 + }, + { + "ce_ib": 8.829179763793945, + "ce_orig": 0.815184473991394, + "epoch": 0.16162197138543388, + "kl_loss": 0.15598227083683014, + "loss_ib": 0.0024427406024187803, + "step": 562 + }, + { + "ce_ib": 11.565589904785156, + "ce_orig": 1.25294029712677, + "epoch": 0.16162197138543388, + "kl_loss": 0.1817988157272339, + "loss_ib": 0.002974546980112791, + "step": 562 + }, + { + "ce_ib": 7.771119117736816, + "ce_orig": 0.641653299331665, + "epoch": 0.16190955496441153, + "kl_loss": 0.15232746303081512, + "loss_ib": 0.002300386317074299, + "step": 563 + }, + { + "ce_ib": 11.970166206359863, + "ce_orig": 1.0346808433532715, + "epoch": 0.16190955496441153, + "kl_loss": 0.18096312880516052, + "loss_ib": 0.0030066478066146374, + "step": 563 + }, + { + "ce_ib": 9.624247550964355, + "ce_orig": 0.8522278666496277, + "epoch": 0.16190955496441153, + "kl_loss": 0.14167995750904083, + "loss_ib": 0.002379224169999361, + "step": 563 + }, + { + "ce_ib": 10.173232078552246, + "ce_orig": 0.7698571681976318, + "epoch": 0.16190955496441153, + "kl_loss": 0.14464448392391205, + "loss_ib": 0.0024637680035084486, + "step": 563 + }, + { + "ce_ib": 9.11231803894043, + "ce_orig": 0.5458751916885376, + "epoch": 0.16219713854338919, + "kl_loss": 0.2736416459083557, + "loss_ib": 0.0036476480308920145, + "step": 564 + }, + { + "ce_ib": 7.692645072937012, + "ce_orig": 0.6927090287208557, + "epoch": 0.16219713854338919, + "kl_loss": 0.10034587234258652, + "loss_ib": 0.0017727231606841087, + "step": 564 + }, + { + "ce_ib": 9.02833080291748, + "ce_orig": 0.8556505441665649, + "epoch": 0.16219713854338919, + "kl_loss": 0.18506044149398804, + "loss_ib": 0.0027534374967217445, + "step": 564 + }, + { + "ce_ib": 7.497312068939209, + "ce_orig": 0.626701831817627, + "epoch": 0.16219713854338919, + "kl_loss": 0.21034158766269684, + "loss_ib": 0.0028531469870358706, + "step": 564 + }, + { + "epoch": 0.1624847221223668, + "grad_norm": 0.09401144832372665, + "learning_rate": 4.992772775565104e-05, + "loss": 0.8074, + "step": 565 + }, + { + "ce_ib": 11.46445369720459, + "ce_orig": 0.9820400476455688, + "epoch": 0.1624847221223668, + "kl_loss": 0.21540969610214233, + "loss_ib": 0.0033005422446876764, + "step": 565 + }, + { + "ce_ib": 11.646088600158691, + "ce_orig": 1.372199535369873, + "epoch": 0.1624847221223668, + "kl_loss": 0.15993696451187134, + "loss_ib": 0.002763978438451886, + "step": 565 + }, + { + "ce_ib": 7.149011135101318, + "ce_orig": 0.6144888997077942, + "epoch": 0.1624847221223668, + "kl_loss": 0.13073797523975372, + "loss_ib": 0.0020222808234393597, + "step": 565 + }, + { + "ce_ib": 8.267902374267578, + "ce_orig": 0.7729185819625854, + "epoch": 0.1624847221223668, + "kl_loss": 0.20054320991039276, + "loss_ib": 0.0028322222642600536, + "step": 565 + }, + { + "ce_ib": 12.370036125183105, + "ce_orig": 1.2028396129608154, + "epoch": 0.16277230570134446, + "kl_loss": 0.13463753461837769, + "loss_ib": 0.002583378693088889, + "step": 566 + }, + { + "ce_ib": 10.583022117614746, + "ce_orig": 1.0920214653015137, + "epoch": 0.16277230570134446, + "kl_loss": 0.1449476182460785, + "loss_ib": 0.002507778350263834, + "step": 566 + }, + { + "ce_ib": 8.324437141418457, + "ce_orig": 0.4909367859363556, + "epoch": 0.16277230570134446, + "kl_loss": 0.12846431136131287, + "loss_ib": 0.0021170866675674915, + "step": 566 + }, + { + "ce_ib": 8.053367614746094, + "ce_orig": 0.7633367776870728, + "epoch": 0.16277230570134446, + "kl_loss": 0.14153558015823364, + "loss_ib": 0.002220692578703165, + "step": 566 + }, + { + "ce_ib": 9.400287628173828, + "ce_orig": 0.7323331832885742, + "epoch": 0.16305988928032208, + "kl_loss": 0.11990717798471451, + "loss_ib": 0.0021391003392636776, + "step": 567 + }, + { + "ce_ib": 8.591780662536621, + "ce_orig": 0.3860359787940979, + "epoch": 0.16305988928032208, + "kl_loss": 0.2477714717388153, + "loss_ib": 0.003336892696097493, + "step": 567 + }, + { + "ce_ib": 8.714170455932617, + "ce_orig": 0.5352444052696228, + "epoch": 0.16305988928032208, + "kl_loss": 0.12474939227104187, + "loss_ib": 0.0021189108956605196, + "step": 567 + }, + { + "ce_ib": 12.906991958618164, + "ce_orig": 0.9715366363525391, + "epoch": 0.16305988928032208, + "kl_loss": 0.21474242210388184, + "loss_ib": 0.0034381235018372536, + "step": 567 + }, + { + "ce_ib": 8.508112907409668, + "ce_orig": 0.91560298204422, + "epoch": 0.16334747285929974, + "kl_loss": 0.14688250422477722, + "loss_ib": 0.0023196362890303135, + "step": 568 + }, + { + "ce_ib": 14.228510856628418, + "ce_orig": 1.5433235168457031, + "epoch": 0.16334747285929974, + "kl_loss": 0.12242799997329712, + "loss_ib": 0.0026471309829503298, + "step": 568 + }, + { + "ce_ib": 9.558116912841797, + "ce_orig": 0.8016780018806458, + "epoch": 0.16334747285929974, + "kl_loss": 0.2184457778930664, + "loss_ib": 0.003140269545838237, + "step": 568 + }, + { + "ce_ib": 9.04499340057373, + "ce_orig": 0.6228697299957275, + "epoch": 0.16334747285929974, + "kl_loss": 0.16972365975379944, + "loss_ib": 0.002601735759526491, + "step": 568 + }, + { + "ce_ib": 8.427020072937012, + "ce_orig": 0.8732650279998779, + "epoch": 0.1636350564382774, + "kl_loss": 0.14395001530647278, + "loss_ib": 0.0022822022438049316, + "step": 569 + }, + { + "ce_ib": 7.884949684143066, + "ce_orig": 0.8752006888389587, + "epoch": 0.1636350564382774, + "kl_loss": 0.10892467200756073, + "loss_ib": 0.0018777416553348303, + "step": 569 + }, + { + "ce_ib": 7.760122299194336, + "ce_orig": 0.6656043529510498, + "epoch": 0.1636350564382774, + "kl_loss": 0.1181686520576477, + "loss_ib": 0.0019576987251639366, + "step": 569 + }, + { + "ce_ib": 10.271071434020996, + "ce_orig": 0.9393675923347473, + "epoch": 0.1636350564382774, + "kl_loss": 0.15882378816604614, + "loss_ib": 0.002615345176309347, + "step": 569 + }, + { + "epoch": 0.163922640017255, + "grad_norm": 0.09945710003376007, + "learning_rate": 4.992474926309191e-05, + "loss": 0.8445, + "step": 570 + }, + { + "ce_ib": 7.904441833496094, + "ce_orig": 0.7863525748252869, + "epoch": 0.163922640017255, + "kl_loss": 0.10601916909217834, + "loss_ib": 0.0018506358610466123, + "step": 570 + }, + { + "ce_ib": 10.904396057128906, + "ce_orig": 0.9058529734611511, + "epoch": 0.163922640017255, + "kl_loss": 0.178889200091362, + "loss_ib": 0.002879331586882472, + "step": 570 + }, + { + "ce_ib": 8.751618385314941, + "ce_orig": 0.9822079539299011, + "epoch": 0.163922640017255, + "kl_loss": 0.4031105637550354, + "loss_ib": 0.0049062673933804035, + "step": 570 + }, + { + "ce_ib": 11.65134334564209, + "ce_orig": 1.064084529876709, + "epoch": 0.163922640017255, + "kl_loss": 0.13623788952827454, + "loss_ib": 0.002527513075619936, + "step": 570 + }, + { + "ce_ib": 14.585232734680176, + "ce_orig": 1.6336134672164917, + "epoch": 0.16421022359623266, + "kl_loss": 0.18193870782852173, + "loss_ib": 0.0032779101748019457, + "step": 571 + }, + { + "ce_ib": 9.184663772583008, + "ce_orig": 0.8301358819007874, + "epoch": 0.16421022359623266, + "kl_loss": 0.19080528616905212, + "loss_ib": 0.0028265193104743958, + "step": 571 + }, + { + "ce_ib": 8.895779609680176, + "ce_orig": 0.5664294362068176, + "epoch": 0.16421022359623266, + "kl_loss": 0.11969694495201111, + "loss_ib": 0.002086547203361988, + "step": 571 + }, + { + "ce_ib": 10.437873840332031, + "ce_orig": 0.7510645985603333, + "epoch": 0.16421022359623266, + "kl_loss": 0.3105461597442627, + "loss_ib": 0.004149248823523521, + "step": 571 + }, + { + "ce_ib": 12.008666038513184, + "ce_orig": 0.41471701860427856, + "epoch": 0.1644978071752103, + "kl_loss": 0.4446406066417694, + "loss_ib": 0.00564727233722806, + "step": 572 + }, + { + "ce_ib": 8.411678314208984, + "ce_orig": 0.5035253167152405, + "epoch": 0.1644978071752103, + "kl_loss": 0.1964578628540039, + "loss_ib": 0.002805746393278241, + "step": 572 + }, + { + "ce_ib": 13.450571060180664, + "ce_orig": 1.2887715101242065, + "epoch": 0.1644978071752103, + "kl_loss": 0.13696449995040894, + "loss_ib": 0.0027147019281983376, + "step": 572 + }, + { + "ce_ib": 10.915575981140137, + "ce_orig": 1.13957941532135, + "epoch": 0.1644978071752103, + "kl_loss": 0.18452590703964233, + "loss_ib": 0.0029368167743086815, + "step": 572 + }, + { + "ce_ib": 9.278141975402832, + "ce_orig": 1.0592403411865234, + "epoch": 0.16478539075418794, + "kl_loss": 0.12091828882694244, + "loss_ib": 0.0021369969472289085, + "step": 573 + }, + { + "ce_ib": 9.828802108764648, + "ce_orig": 0.9190781116485596, + "epoch": 0.16478539075418794, + "kl_loss": 0.17288747429847717, + "loss_ib": 0.0027117549907416105, + "step": 573 + }, + { + "ce_ib": 8.335225105285645, + "ce_orig": 0.6787893772125244, + "epoch": 0.16478539075418794, + "kl_loss": 0.1160748228430748, + "loss_ib": 0.001994270598515868, + "step": 573 + }, + { + "ce_ib": 8.189950942993164, + "ce_orig": 0.8531936407089233, + "epoch": 0.16478539075418794, + "kl_loss": 0.09948378056287766, + "loss_ib": 0.0018138327868655324, + "step": 573 + }, + { + "ce_ib": 6.455183506011963, + "ce_orig": 0.596420168876648, + "epoch": 0.1650729743331656, + "kl_loss": 0.11498283594846725, + "loss_ib": 0.0017953467322513461, + "step": 574 + }, + { + "ce_ib": 9.777907371520996, + "ce_orig": 0.8681027293205261, + "epoch": 0.1650729743331656, + "kl_loss": 0.1523522138595581, + "loss_ib": 0.002501312643289566, + "step": 574 + }, + { + "ce_ib": 9.882269859313965, + "ce_orig": 0.9420934319496155, + "epoch": 0.1650729743331656, + "kl_loss": 0.16510173678398132, + "loss_ib": 0.0026392440777271986, + "step": 574 + }, + { + "ce_ib": 12.674947738647461, + "ce_orig": 1.0848475694656372, + "epoch": 0.1650729743331656, + "kl_loss": 0.16495954990386963, + "loss_ib": 0.0029170899651944637, + "step": 574 + }, + { + "epoch": 0.1653605579121432, + "grad_norm": 0.08536024391651154, + "learning_rate": 4.992171072102663e-05, + "loss": 0.8644, + "step": 575 + }, + { + "ce_ib": 8.744396209716797, + "ce_orig": 0.7920838594436646, + "epoch": 0.1653605579121432, + "kl_loss": 0.15692180395126343, + "loss_ib": 0.002443657722324133, + "step": 575 + }, + { + "ce_ib": 10.282676696777344, + "ce_orig": 0.8695278763771057, + "epoch": 0.1653605579121432, + "kl_loss": 0.16592364013195038, + "loss_ib": 0.0026875040493905544, + "step": 575 + }, + { + "ce_ib": 11.398494720458984, + "ce_orig": 1.4248392581939697, + "epoch": 0.1653605579121432, + "kl_loss": 0.21238285303115845, + "loss_ib": 0.0032636779360473156, + "step": 575 + }, + { + "ce_ib": 6.761519908905029, + "ce_orig": 0.5061066746711731, + "epoch": 0.1653605579121432, + "kl_loss": 0.09173807501792908, + "loss_ib": 0.0015935326227918267, + "step": 575 + }, + { + "ce_ib": 10.953373908996582, + "ce_orig": 0.8243492245674133, + "epoch": 0.16564814149112086, + "kl_loss": 0.21726994216442108, + "loss_ib": 0.0032680367585271597, + "step": 576 + }, + { + "ce_ib": 8.85136890411377, + "ce_orig": 0.8645575642585754, + "epoch": 0.16564814149112086, + "kl_loss": 0.1265760213136673, + "loss_ib": 0.0021508969366550446, + "step": 576 + }, + { + "ce_ib": 4.987844944000244, + "ce_orig": 0.2835332453250885, + "epoch": 0.16564814149112086, + "kl_loss": 0.32002195715904236, + "loss_ib": 0.003699003951624036, + "step": 576 + }, + { + "ce_ib": 10.408366203308105, + "ce_orig": 0.5282058715820312, + "epoch": 0.16564814149112086, + "kl_loss": 0.1558026671409607, + "loss_ib": 0.0025988630950450897, + "step": 576 + }, + { + "ce_ib": 6.840135097503662, + "ce_orig": 0.7790481448173523, + "epoch": 0.1659357250700985, + "kl_loss": 0.17018580436706543, + "loss_ib": 0.002385871484875679, + "step": 577 + }, + { + "ce_ib": 5.964718341827393, + "ce_orig": 0.4317297339439392, + "epoch": 0.1659357250700985, + "kl_loss": 0.12732765078544617, + "loss_ib": 0.0018697483465075493, + "step": 577 + }, + { + "ce_ib": 8.891576766967773, + "ce_orig": 0.5287486910820007, + "epoch": 0.1659357250700985, + "kl_loss": 0.12389977276325226, + "loss_ib": 0.0021281554363667965, + "step": 577 + }, + { + "ce_ib": 4.9557671546936035, + "ce_orig": 0.6066608428955078, + "epoch": 0.1659357250700985, + "kl_loss": 0.12630987167358398, + "loss_ib": 0.0017586754402145743, + "step": 577 + }, + { + "ce_ib": 11.605189323425293, + "ce_orig": 1.0188590288162231, + "epoch": 0.16622330864907614, + "kl_loss": 0.18720872700214386, + "loss_ib": 0.0030326060950756073, + "step": 578 + }, + { + "ce_ib": 5.411064147949219, + "ce_orig": 0.283112108707428, + "epoch": 0.16622330864907614, + "kl_loss": 0.42558926343917847, + "loss_ib": 0.004796999040991068, + "step": 578 + }, + { + "ce_ib": 10.837127685546875, + "ce_orig": 0.8412641286849976, + "epoch": 0.16622330864907614, + "kl_loss": 0.1550775170326233, + "loss_ib": 0.0026344875805079937, + "step": 578 + }, + { + "ce_ib": 11.859156608581543, + "ce_orig": 1.3071043491363525, + "epoch": 0.16622330864907614, + "kl_loss": 0.08775961399078369, + "loss_ib": 0.002063511637970805, + "step": 578 + }, + { + "ce_ib": 12.27880573272705, + "ce_orig": 1.50951087474823, + "epoch": 0.1665108922280538, + "kl_loss": 0.17748884856700897, + "loss_ib": 0.0030027690809220076, + "step": 579 + }, + { + "ce_ib": 10.481730461120605, + "ce_orig": 1.193427562713623, + "epoch": 0.1665108922280538, + "kl_loss": 0.19660136103630066, + "loss_ib": 0.003014186630025506, + "step": 579 + }, + { + "ce_ib": 13.807245254516602, + "ce_orig": 1.1526756286621094, + "epoch": 0.1665108922280538, + "kl_loss": 0.23192673921585083, + "loss_ib": 0.0036999916192144156, + "step": 579 + }, + { + "ce_ib": 12.139798164367676, + "ce_orig": 1.2501667737960815, + "epoch": 0.1665108922280538, + "kl_loss": 0.14804279804229736, + "loss_ib": 0.0026944077108055353, + "step": 579 + }, + { + "epoch": 0.16679847580703142, + "grad_norm": 0.07876303791999817, + "learning_rate": 4.9918612136775776e-05, + "loss": 0.8655, + "step": 580 + }, + { + "ce_ib": 11.737598419189453, + "ce_orig": 1.0697919130325317, + "epoch": 0.16679847580703142, + "kl_loss": 0.17866802215576172, + "loss_ib": 0.002960439771413803, + "step": 580 + }, + { + "ce_ib": 5.683050632476807, + "ce_orig": 0.48447874188423157, + "epoch": 0.16679847580703142, + "kl_loss": 0.09303843230009079, + "loss_ib": 0.0014986892929300666, + "step": 580 + }, + { + "ce_ib": 12.669607162475586, + "ce_orig": 0.8514668345451355, + "epoch": 0.16679847580703142, + "kl_loss": 0.208790585398674, + "loss_ib": 0.003354866523295641, + "step": 580 + }, + { + "ce_ib": 11.108747482299805, + "ce_orig": 0.9369009137153625, + "epoch": 0.16679847580703142, + "kl_loss": 0.1635299026966095, + "loss_ib": 0.002746173646301031, + "step": 580 + }, + { + "ce_ib": 8.074356079101562, + "ce_orig": 0.7309190630912781, + "epoch": 0.16708605938600907, + "kl_loss": 0.2102489322423935, + "loss_ib": 0.0029099248349666595, + "step": 581 + }, + { + "ce_ib": 7.948696136474609, + "ce_orig": 0.6363977193832397, + "epoch": 0.16708605938600907, + "kl_loss": 0.45359325408935547, + "loss_ib": 0.0053308019414544106, + "step": 581 + }, + { + "ce_ib": 11.585594177246094, + "ce_orig": 1.2507925033569336, + "epoch": 0.16708605938600907, + "kl_loss": 0.14326362311840057, + "loss_ib": 0.002591195749118924, + "step": 581 + }, + { + "ce_ib": 12.229244232177734, + "ce_orig": 1.052150845527649, + "epoch": 0.16708605938600907, + "kl_loss": 0.14200204610824585, + "loss_ib": 0.002642944687977433, + "step": 581 + }, + { + "ce_ib": 7.478880405426025, + "ce_orig": 0.7154788970947266, + "epoch": 0.1673736429649867, + "kl_loss": 0.16189737617969513, + "loss_ib": 0.0023668615613132715, + "step": 582 + }, + { + "ce_ib": 12.277364730834961, + "ce_orig": 1.1993218660354614, + "epoch": 0.1673736429649867, + "kl_loss": 0.16127103567123413, + "loss_ib": 0.002840446773916483, + "step": 582 + }, + { + "ce_ib": 10.120058059692383, + "ce_orig": 0.7597600817680359, + "epoch": 0.1673736429649867, + "kl_loss": 0.13978613913059235, + "loss_ib": 0.0024098672438412905, + "step": 582 + }, + { + "ce_ib": 7.654234409332275, + "ce_orig": 0.6932991147041321, + "epoch": 0.1673736429649867, + "kl_loss": 0.09757931530475616, + "loss_ib": 0.0017412164015695453, + "step": 582 + }, + { + "ce_ib": 7.6219096183776855, + "ce_orig": 0.6601145267486572, + "epoch": 0.16766122654396434, + "kl_loss": 0.1617378145456314, + "loss_ib": 0.0023795689921826124, + "step": 583 + }, + { + "ce_ib": 7.920563220977783, + "ce_orig": 0.8288545608520508, + "epoch": 0.16766122654396434, + "kl_loss": 0.2300298511981964, + "loss_ib": 0.0030923548620194197, + "step": 583 + }, + { + "ce_ib": 9.533792495727539, + "ce_orig": 0.7653409838676453, + "epoch": 0.16766122654396434, + "kl_loss": 0.2119666188955307, + "loss_ib": 0.0030730452854186296, + "step": 583 + }, + { + "ce_ib": 7.689798831939697, + "ce_orig": 0.7431104779243469, + "epoch": 0.16766122654396434, + "kl_loss": 0.1498323678970337, + "loss_ib": 0.0022673034109175205, + "step": 583 + }, + { + "ce_ib": 10.659350395202637, + "ce_orig": 1.0661674737930298, + "epoch": 0.167948810122942, + "kl_loss": 0.15681301057338715, + "loss_ib": 0.002634064992889762, + "step": 584 + }, + { + "ce_ib": 8.092489242553711, + "ce_orig": 0.7272496819496155, + "epoch": 0.167948810122942, + "kl_loss": 0.17958983778953552, + "loss_ib": 0.0026051471941173077, + "step": 584 + }, + { + "ce_ib": 10.62472915649414, + "ce_orig": 0.8471878170967102, + "epoch": 0.167948810122942, + "kl_loss": 0.1516703963279724, + "loss_ib": 0.0025791770312935114, + "step": 584 + }, + { + "ce_ib": 13.692408561706543, + "ce_orig": 1.3621617555618286, + "epoch": 0.167948810122942, + "kl_loss": 0.2163066267967224, + "loss_ib": 0.0035323069896548986, + "step": 584 + }, + { + "epoch": 0.16823639370191962, + "grad_norm": 0.09431323409080505, + "learning_rate": 4.9915453517804554e-05, + "loss": 0.8551, + "step": 585 + }, + { + "ce_ib": 9.730319023132324, + "ce_orig": 0.9177817106246948, + "epoch": 0.16823639370191962, + "kl_loss": 0.21307173371315002, + "loss_ib": 0.0031037491280585527, + "step": 585 + }, + { + "ce_ib": 8.93181037902832, + "ce_orig": 1.0497219562530518, + "epoch": 0.16823639370191962, + "kl_loss": 0.11000088602304459, + "loss_ib": 0.001993189798668027, + "step": 585 + }, + { + "ce_ib": 9.731582641601562, + "ce_orig": 1.101962924003601, + "epoch": 0.16823639370191962, + "kl_loss": 0.2055375874042511, + "loss_ib": 0.003028533887118101, + "step": 585 + }, + { + "ce_ib": 6.749084949493408, + "ce_orig": 0.8812076449394226, + "epoch": 0.16823639370191962, + "kl_loss": 0.09668911248445511, + "loss_ib": 0.0016417994629591703, + "step": 585 + }, + { + "ce_ib": 6.907016277313232, + "ce_orig": 0.538230299949646, + "epoch": 0.16852397728089727, + "kl_loss": 0.18287301063537598, + "loss_ib": 0.002519431756809354, + "step": 586 + }, + { + "ce_ib": 13.047144889831543, + "ce_orig": 0.990262508392334, + "epoch": 0.16852397728089727, + "kl_loss": 0.19800947606563568, + "loss_ib": 0.0032848091796040535, + "step": 586 + }, + { + "ce_ib": 9.177399635314941, + "ce_orig": 0.4944288730621338, + "epoch": 0.16852397728089727, + "kl_loss": 0.09137950837612152, + "loss_ib": 0.001831535017117858, + "step": 586 + }, + { + "ce_ib": 9.995772361755371, + "ce_orig": 0.7994273900985718, + "epoch": 0.16852397728089727, + "kl_loss": 0.14279597997665405, + "loss_ib": 0.0024275369942188263, + "step": 586 + }, + { + "ce_ib": 10.55152416229248, + "ce_orig": 0.9644479155540466, + "epoch": 0.1688115608598749, + "kl_loss": 0.1346493363380432, + "loss_ib": 0.0024016457609832287, + "step": 587 + }, + { + "ce_ib": 7.489251136779785, + "ce_orig": 0.4593333601951599, + "epoch": 0.1688115608598749, + "kl_loss": 0.22705069184303284, + "loss_ib": 0.0030194318387657404, + "step": 587 + }, + { + "ce_ib": 13.462209701538086, + "ce_orig": 1.549858808517456, + "epoch": 0.1688115608598749, + "kl_loss": 0.17547796666622162, + "loss_ib": 0.0031010007951408625, + "step": 587 + }, + { + "ce_ib": 8.43393611907959, + "ce_orig": 0.9473516941070557, + "epoch": 0.1688115608598749, + "kl_loss": 0.16294658184051514, + "loss_ib": 0.0024728593416512012, + "step": 587 + }, + { + "ce_ib": 11.13269329071045, + "ce_orig": 1.07948637008667, + "epoch": 0.16909914443885254, + "kl_loss": 0.14176622033119202, + "loss_ib": 0.002530931495130062, + "step": 588 + }, + { + "ce_ib": 6.587364196777344, + "ce_orig": 0.4034996032714844, + "epoch": 0.16909914443885254, + "kl_loss": 0.10913390666246414, + "loss_ib": 0.0017500754911452532, + "step": 588 + }, + { + "ce_ib": 6.319201469421387, + "ce_orig": 0.7516131401062012, + "epoch": 0.16909914443885254, + "kl_loss": 0.11739123612642288, + "loss_ib": 0.00180583237670362, + "step": 588 + }, + { + "ce_ib": 5.119848728179932, + "ce_orig": 0.48948171734809875, + "epoch": 0.16909914443885254, + "kl_loss": 0.0971326231956482, + "loss_ib": 0.0014833110617473722, + "step": 588 + }, + { + "ce_ib": 10.022903442382812, + "ce_orig": 1.0736076831817627, + "epoch": 0.16938672801783017, + "kl_loss": 0.12581218779087067, + "loss_ib": 0.002260412322357297, + "step": 589 + }, + { + "ce_ib": 11.274831771850586, + "ce_orig": 0.7393347024917603, + "epoch": 0.16938672801783017, + "kl_loss": 0.13805577158927917, + "loss_ib": 0.0025080409832298756, + "step": 589 + }, + { + "ce_ib": 10.370933532714844, + "ce_orig": 1.0629693269729614, + "epoch": 0.16938672801783017, + "kl_loss": 0.16156615316867828, + "loss_ib": 0.0026527547743171453, + "step": 589 + }, + { + "ce_ib": 9.891374588012695, + "ce_orig": 0.7961823344230652, + "epoch": 0.16938672801783017, + "kl_loss": 0.14303208887577057, + "loss_ib": 0.0024194582365453243, + "step": 589 + }, + { + "epoch": 0.16967431159680782, + "grad_norm": 0.09825263917446136, + "learning_rate": 4.9912234871722805e-05, + "loss": 0.8531, + "step": 590 + }, + { + "ce_ib": 7.040185451507568, + "ce_orig": 0.45819783210754395, + "epoch": 0.16967431159680782, + "kl_loss": 0.19058099389076233, + "loss_ib": 0.0026098282542079687, + "step": 590 + }, + { + "ce_ib": 8.853228569030762, + "ce_orig": 0.45481324195861816, + "epoch": 0.16967431159680782, + "kl_loss": 0.16712503135204315, + "loss_ib": 0.0025565731339156628, + "step": 590 + }, + { + "ce_ib": 4.791247844696045, + "ce_orig": 0.5837194323539734, + "epoch": 0.16967431159680782, + "kl_loss": 0.09282873570919037, + "loss_ib": 0.001407412113621831, + "step": 590 + }, + { + "ce_ib": 10.547879219055176, + "ce_orig": 0.5191910862922668, + "epoch": 0.16967431159680782, + "kl_loss": 0.21911896765232086, + "loss_ib": 0.003245977684855461, + "step": 590 + }, + { + "ce_ib": 13.081586837768555, + "ce_orig": 1.6712095737457275, + "epoch": 0.16996189517578547, + "kl_loss": 0.1372794508934021, + "loss_ib": 0.002680953126400709, + "step": 591 + }, + { + "ce_ib": 10.580743789672852, + "ce_orig": 1.0760875940322876, + "epoch": 0.16996189517578547, + "kl_loss": 0.14443005621433258, + "loss_ib": 0.002502374816685915, + "step": 591 + }, + { + "ce_ib": 11.404194831848145, + "ce_orig": 1.1433049440383911, + "epoch": 0.16996189517578547, + "kl_loss": 0.1749769151210785, + "loss_ib": 0.0028901887126266956, + "step": 591 + }, + { + "ce_ib": 6.110208511352539, + "ce_orig": 0.5183145403862, + "epoch": 0.16996189517578547, + "kl_loss": 0.2318723201751709, + "loss_ib": 0.002929744077846408, + "step": 591 + }, + { + "ce_ib": 9.481659889221191, + "ce_orig": 1.2535269260406494, + "epoch": 0.1702494787547631, + "kl_loss": 0.10323582589626312, + "loss_ib": 0.0019805242773145437, + "step": 592 + }, + { + "ce_ib": 7.891531467437744, + "ce_orig": 1.11729896068573, + "epoch": 0.1702494787547631, + "kl_loss": 0.22114473581314087, + "loss_ib": 0.0030006002634763718, + "step": 592 + }, + { + "ce_ib": 11.817340850830078, + "ce_orig": 1.0938154458999634, + "epoch": 0.1702494787547631, + "kl_loss": 0.14513513445854187, + "loss_ib": 0.0026330852415412664, + "step": 592 + }, + { + "ce_ib": 7.189747333526611, + "ce_orig": 0.3452516496181488, + "epoch": 0.1702494787547631, + "kl_loss": 0.14610087871551514, + "loss_ib": 0.002179983537644148, + "step": 592 + }, + { + "ce_ib": 11.470684051513672, + "ce_orig": 1.2777209281921387, + "epoch": 0.17053706233374075, + "kl_loss": 0.21245329082012177, + "loss_ib": 0.0032716011628508568, + "step": 593 + }, + { + "ce_ib": 8.021244049072266, + "ce_orig": 0.7062201499938965, + "epoch": 0.17053706233374075, + "kl_loss": 0.17156916856765747, + "loss_ib": 0.0025178161449730396, + "step": 593 + }, + { + "ce_ib": 7.824449062347412, + "ce_orig": 0.625266432762146, + "epoch": 0.17053706233374075, + "kl_loss": 0.144322469830513, + "loss_ib": 0.00222566956654191, + "step": 593 + }, + { + "ce_ib": 8.449918746948242, + "ce_orig": 0.7968825697898865, + "epoch": 0.17053706233374075, + "kl_loss": 0.13197359442710876, + "loss_ib": 0.0021647277753800154, + "step": 593 + }, + { + "ce_ib": 12.557650566101074, + "ce_orig": 1.2830119132995605, + "epoch": 0.17082464591271837, + "kl_loss": 0.13416330516338348, + "loss_ib": 0.0025973981246352196, + "step": 594 + }, + { + "ce_ib": 16.166818618774414, + "ce_orig": 1.5858261585235596, + "epoch": 0.17082464591271837, + "kl_loss": 0.13871753215789795, + "loss_ib": 0.003003857098519802, + "step": 594 + }, + { + "ce_ib": 9.042425155639648, + "ce_orig": 0.6032095551490784, + "epoch": 0.17082464591271837, + "kl_loss": 0.14524920284748077, + "loss_ib": 0.0023567345924675465, + "step": 594 + }, + { + "ce_ib": 6.781697750091553, + "ce_orig": 0.7230402231216431, + "epoch": 0.17082464591271837, + "kl_loss": 0.11699005216360092, + "loss_ib": 0.0018480703001841903, + "step": 594 + }, + { + "epoch": 0.17111222949169602, + "grad_norm": 0.11506392806768417, + "learning_rate": 4.9908956206285e-05, + "loss": 0.8102, + "step": 595 + }, + { + "ce_ib": 8.491539001464844, + "ce_orig": 0.799639105796814, + "epoch": 0.17111222949169602, + "kl_loss": 0.310641884803772, + "loss_ib": 0.003955572843551636, + "step": 595 + }, + { + "ce_ib": 9.903238296508789, + "ce_orig": 0.9372822046279907, + "epoch": 0.17111222949169602, + "kl_loss": 0.34602734446525574, + "loss_ib": 0.004450596868991852, + "step": 595 + }, + { + "ce_ib": 10.448668479919434, + "ce_orig": 0.6878861784934998, + "epoch": 0.17111222949169602, + "kl_loss": 0.18895582854747772, + "loss_ib": 0.002934425137937069, + "step": 595 + }, + { + "ce_ib": 7.248452663421631, + "ce_orig": 0.638181746006012, + "epoch": 0.17111222949169602, + "kl_loss": 0.20415529608726501, + "loss_ib": 0.002766398014500737, + "step": 595 + }, + { + "ce_ib": 12.064194679260254, + "ce_orig": 1.1723579168319702, + "epoch": 0.17139981307067367, + "kl_loss": 0.14018401503562927, + "loss_ib": 0.0026082596741616726, + "step": 596 + }, + { + "ce_ib": 8.019061088562012, + "ce_orig": 0.6501631140708923, + "epoch": 0.17139981307067367, + "kl_loss": 0.15329334139823914, + "loss_ib": 0.0023348394315689802, + "step": 596 + }, + { + "ce_ib": 8.00246810913086, + "ce_orig": 0.6622079014778137, + "epoch": 0.17139981307067367, + "kl_loss": 0.21607182919979095, + "loss_ib": 0.0029609650373458862, + "step": 596 + }, + { + "ce_ib": 11.230345726013184, + "ce_orig": 0.9562708139419556, + "epoch": 0.17139981307067367, + "kl_loss": 0.1696317195892334, + "loss_ib": 0.002819351851940155, + "step": 596 + }, + { + "ce_ib": 10.980791091918945, + "ce_orig": 0.7618129849433899, + "epoch": 0.1716873966496513, + "kl_loss": 0.14176014065742493, + "loss_ib": 0.0025156803894788027, + "step": 597 + }, + { + "ce_ib": 12.790441513061523, + "ce_orig": 1.0193257331848145, + "epoch": 0.1716873966496513, + "kl_loss": 0.16009655594825745, + "loss_ib": 0.002880009589716792, + "step": 597 + }, + { + "ce_ib": 9.670978546142578, + "ce_orig": 1.0576988458633423, + "epoch": 0.1716873966496513, + "kl_loss": 0.31038549542427063, + "loss_ib": 0.004070952534675598, + "step": 597 + }, + { + "ce_ib": 8.231335639953613, + "ce_orig": 0.7771991491317749, + "epoch": 0.1716873966496513, + "kl_loss": 0.2681956887245178, + "loss_ib": 0.003505090484395623, + "step": 597 + }, + { + "ce_ib": 14.043437957763672, + "ce_orig": 1.6835757493972778, + "epoch": 0.17197498022862895, + "kl_loss": 0.22146332263946533, + "loss_ib": 0.0036189770326018333, + "step": 598 + }, + { + "ce_ib": 9.480457305908203, + "ce_orig": 0.8885291218757629, + "epoch": 0.17197498022862895, + "kl_loss": 0.09636467695236206, + "loss_ib": 0.0019116924377158284, + "step": 598 + }, + { + "ce_ib": 7.293931484222412, + "ce_orig": 0.6654239296913147, + "epoch": 0.17197498022862895, + "kl_loss": 0.13003098964691162, + "loss_ib": 0.0020297029986977577, + "step": 598 + }, + { + "ce_ib": 6.692012310028076, + "ce_orig": 0.5753830671310425, + "epoch": 0.17197498022862895, + "kl_loss": 0.11845792084932327, + "loss_ib": 0.0018537803553044796, + "step": 598 + }, + { + "ce_ib": 8.007339477539062, + "ce_orig": 0.897793710231781, + "epoch": 0.17226256380760657, + "kl_loss": 0.11580362170934677, + "loss_ib": 0.0019587702117860317, + "step": 599 + }, + { + "ce_ib": 10.507881164550781, + "ce_orig": 1.1790790557861328, + "epoch": 0.17226256380760657, + "kl_loss": 0.15849515795707703, + "loss_ib": 0.002635739743709564, + "step": 599 + }, + { + "ce_ib": 12.296706199645996, + "ce_orig": 1.374943494796753, + "epoch": 0.17226256380760657, + "kl_loss": 0.16814401745796204, + "loss_ib": 0.0029111106414347887, + "step": 599 + }, + { + "ce_ib": 11.69453239440918, + "ce_orig": 1.4047762155532837, + "epoch": 0.17226256380760657, + "kl_loss": 0.1669248342514038, + "loss_ib": 0.0028387014754116535, + "step": 599 + }, + { + "epoch": 0.17255014738658422, + "grad_norm": 0.09025447815656662, + "learning_rate": 4.9905617529390203e-05, + "loss": 0.7996, + "step": 600 + }, + { + "ce_ib": 13.253778457641602, + "ce_orig": 1.4632776975631714, + "epoch": 0.17255014738658422, + "kl_loss": 0.10774030536413193, + "loss_ib": 0.0024027808103710413, + "step": 600 + }, + { + "ce_ib": 9.553170204162598, + "ce_orig": 0.6664531826972961, + "epoch": 0.17255014738658422, + "kl_loss": 0.167972594499588, + "loss_ib": 0.002635042881593108, + "step": 600 + }, + { + "ce_ib": 8.886030197143555, + "ce_orig": 0.6646020412445068, + "epoch": 0.17255014738658422, + "kl_loss": 0.16888710856437683, + "loss_ib": 0.002577474107965827, + "step": 600 + }, + { + "ce_ib": 7.923281669616699, + "ce_orig": 0.6328381299972534, + "epoch": 0.17255014738658422, + "kl_loss": 0.13492171466350555, + "loss_ib": 0.002141545293852687, + "step": 600 + }, + { + "ce_ib": 5.616130828857422, + "ce_orig": 0.5428784489631653, + "epoch": 0.17283773096556188, + "kl_loss": 0.1354779154062271, + "loss_ib": 0.0019163921242579818, + "step": 601 + }, + { + "ce_ib": 9.720524787902832, + "ce_orig": 0.8215920925140381, + "epoch": 0.17283773096556188, + "kl_loss": 0.14569802582263947, + "loss_ib": 0.002429032465443015, + "step": 601 + }, + { + "ce_ib": 8.02761173248291, + "ce_orig": 0.8096197843551636, + "epoch": 0.17283773096556188, + "kl_loss": 0.1350010186433792, + "loss_ib": 0.002152771223336458, + "step": 601 + }, + { + "ce_ib": 7.660145282745361, + "ce_orig": 0.6597212553024292, + "epoch": 0.17283773096556188, + "kl_loss": 0.1304370015859604, + "loss_ib": 0.0020703845657408237, + "step": 601 + }, + { + "ce_ib": 7.52744722366333, + "ce_orig": 0.507038414478302, + "epoch": 0.1731253145445395, + "kl_loss": 0.19013965129852295, + "loss_ib": 0.0026541410479694605, + "step": 602 + }, + { + "ce_ib": 10.766141891479492, + "ce_orig": 0.8578734397888184, + "epoch": 0.1731253145445395, + "kl_loss": 0.12946805357933044, + "loss_ib": 0.002371294656768441, + "step": 602 + }, + { + "ce_ib": 5.255903244018555, + "ce_orig": 0.6048762798309326, + "epoch": 0.1731253145445395, + "kl_loss": 0.14380118250846863, + "loss_ib": 0.0019636021461337805, + "step": 602 + }, + { + "ce_ib": 10.115198135375977, + "ce_orig": 0.9766972064971924, + "epoch": 0.1731253145445395, + "kl_loss": 0.16051560640335083, + "loss_ib": 0.002616675803437829, + "step": 602 + }, + { + "ce_ib": 6.0160980224609375, + "ce_orig": 0.5172838568687439, + "epoch": 0.17341289812351715, + "kl_loss": 0.1775059700012207, + "loss_ib": 0.0023766695521771908, + "step": 603 + }, + { + "ce_ib": 7.59116792678833, + "ce_orig": 0.806316077709198, + "epoch": 0.17341289812351715, + "kl_loss": 0.23788809776306152, + "loss_ib": 0.0031379975844174623, + "step": 603 + }, + { + "ce_ib": 6.338790416717529, + "ce_orig": 0.17640917003154755, + "epoch": 0.17341289812351715, + "kl_loss": 0.11507699638605118, + "loss_ib": 0.0017846488626673818, + "step": 603 + }, + { + "ce_ib": 10.649868965148926, + "ce_orig": 1.2555245161056519, + "epoch": 0.17341289812351715, + "kl_loss": 0.16883064806461334, + "loss_ib": 0.002753293374553323, + "step": 603 + }, + { + "ce_ib": 15.31544303894043, + "ce_orig": 1.5796380043029785, + "epoch": 0.17370048170249477, + "kl_loss": 0.3695501685142517, + "loss_ib": 0.005227046087384224, + "step": 604 + }, + { + "ce_ib": 11.3411865234375, + "ce_orig": 1.3669389486312866, + "epoch": 0.17370048170249477, + "kl_loss": 0.13643240928649902, + "loss_ib": 0.0024984427727758884, + "step": 604 + }, + { + "ce_ib": 9.672558784484863, + "ce_orig": 0.6192442178726196, + "epoch": 0.17370048170249477, + "kl_loss": 0.17595481872558594, + "loss_ib": 0.0027268039993941784, + "step": 604 + }, + { + "ce_ib": 7.121029376983643, + "ce_orig": 0.7190517783164978, + "epoch": 0.17370048170249477, + "kl_loss": 0.1759684681892395, + "loss_ib": 0.0024717876221984625, + "step": 604 + }, + { + "epoch": 0.17398806528147243, + "grad_norm": 0.10308549553155899, + "learning_rate": 4.990221884908206e-05, + "loss": 0.8495, + "step": 605 + }, + { + "ce_ib": 15.291545867919922, + "ce_orig": 1.6869012117385864, + "epoch": 0.17398806528147243, + "kl_loss": 0.1663864552974701, + "loss_ib": 0.0031930189579725266, + "step": 605 + }, + { + "ce_ib": 7.900699138641357, + "ce_orig": 1.0077180862426758, + "epoch": 0.17398806528147243, + "kl_loss": 0.1302233338356018, + "loss_ib": 0.002092303242534399, + "step": 605 + }, + { + "ce_ib": 8.34791088104248, + "ce_orig": 0.7704336047172546, + "epoch": 0.17398806528147243, + "kl_loss": 0.17671293020248413, + "loss_ib": 0.0026019203942269087, + "step": 605 + }, + { + "ce_ib": 8.685835838317871, + "ce_orig": 0.8194261193275452, + "epoch": 0.17398806528147243, + "kl_loss": 0.2805391252040863, + "loss_ib": 0.0036739748902618885, + "step": 605 + }, + { + "ce_ib": 14.333061218261719, + "ce_orig": 1.967085361480713, + "epoch": 0.17427564886045008, + "kl_loss": 0.18738757073879242, + "loss_ib": 0.0033071814104914665, + "step": 606 + }, + { + "ce_ib": 4.270995616912842, + "ce_orig": 0.5261480808258057, + "epoch": 0.17427564886045008, + "kl_loss": 0.1274331510066986, + "loss_ib": 0.0017014308832585812, + "step": 606 + }, + { + "ce_ib": 9.806127548217773, + "ce_orig": 1.3137513399124146, + "epoch": 0.17427564886045008, + "kl_loss": 0.18525034189224243, + "loss_ib": 0.002833116101101041, + "step": 606 + }, + { + "ce_ib": 12.188000679016113, + "ce_orig": 1.2606420516967773, + "epoch": 0.17427564886045008, + "kl_loss": 0.15507441759109497, + "loss_ib": 0.0027695440221577883, + "step": 606 + }, + { + "ce_ib": 7.470699787139893, + "ce_orig": 0.2429499328136444, + "epoch": 0.1745632324394277, + "kl_loss": 0.18981708586215973, + "loss_ib": 0.002645240630954504, + "step": 607 + }, + { + "ce_ib": 9.157522201538086, + "ce_orig": 0.8754900693893433, + "epoch": 0.1745632324394277, + "kl_loss": 0.21200796961784363, + "loss_ib": 0.0030358319636434317, + "step": 607 + }, + { + "ce_ib": 10.273916244506836, + "ce_orig": 0.7955263257026672, + "epoch": 0.1745632324394277, + "kl_loss": 0.14678597450256348, + "loss_ib": 0.002495251130312681, + "step": 607 + }, + { + "ce_ib": 8.007857322692871, + "ce_orig": 0.8106256127357483, + "epoch": 0.1745632324394277, + "kl_loss": 0.10579822957515717, + "loss_ib": 0.001858767936937511, + "step": 607 + }, + { + "ce_ib": 14.369596481323242, + "ce_orig": 1.0616765022277832, + "epoch": 0.17485081601840535, + "kl_loss": 0.44787994027137756, + "loss_ib": 0.00591575913131237, + "step": 608 + }, + { + "ce_ib": 12.925912857055664, + "ce_orig": 1.2854984998703003, + "epoch": 0.17485081601840535, + "kl_loss": 0.22356921434402466, + "loss_ib": 0.0035282832104712725, + "step": 608 + }, + { + "ce_ib": 5.677262306213379, + "ce_orig": 0.6853749752044678, + "epoch": 0.17485081601840535, + "kl_loss": 0.2728205621242523, + "loss_ib": 0.0032959317322820425, + "step": 608 + }, + { + "ce_ib": 9.354161262512207, + "ce_orig": 0.8821960091590881, + "epoch": 0.17485081601840535, + "kl_loss": 0.1578974723815918, + "loss_ib": 0.0025143909733742476, + "step": 608 + }, + { + "ce_ib": 15.807966232299805, + "ce_orig": 1.1312600374221802, + "epoch": 0.17513839959738298, + "kl_loss": 0.2390902042388916, + "loss_ib": 0.003971698693931103, + "step": 609 + }, + { + "ce_ib": 12.78138542175293, + "ce_orig": 0.9202042818069458, + "epoch": 0.17513839959738298, + "kl_loss": 0.20055478811264038, + "loss_ib": 0.003283686237409711, + "step": 609 + }, + { + "ce_ib": 10.475568771362305, + "ce_orig": 0.8941435217857361, + "epoch": 0.17513839959738298, + "kl_loss": 0.15872544050216675, + "loss_ib": 0.0026348114479333162, + "step": 609 + }, + { + "ce_ib": 10.035277366638184, + "ce_orig": 1.2712260484695435, + "epoch": 0.17513839959738298, + "kl_loss": 0.15426132082939148, + "loss_ib": 0.002546140691265464, + "step": 609 + }, + { + "epoch": 0.17542598317636063, + "grad_norm": 0.09014883637428284, + "learning_rate": 4.989876017354878e-05, + "loss": 0.9056, + "step": 610 + }, + { + "ce_ib": 9.086161613464355, + "ce_orig": 0.9555597305297852, + "epoch": 0.17542598317636063, + "kl_loss": 0.14901208877563477, + "loss_ib": 0.002398737007752061, + "step": 610 + }, + { + "ce_ib": 13.615242004394531, + "ce_orig": 1.6006946563720703, + "epoch": 0.17542598317636063, + "kl_loss": 0.14780299365520477, + "loss_ib": 0.0028395538683980703, + "step": 610 + }, + { + "ce_ib": 9.924784660339355, + "ce_orig": 0.732452929019928, + "epoch": 0.17542598317636063, + "kl_loss": 0.14724621176719666, + "loss_ib": 0.0024649405386298895, + "step": 610 + }, + { + "ce_ib": 6.761714458465576, + "ce_orig": 0.5315162539482117, + "epoch": 0.17542598317636063, + "kl_loss": 0.12623527646064758, + "loss_ib": 0.0019385241903364658, + "step": 610 + }, + { + "ce_ib": 8.44915771484375, + "ce_orig": 0.8193613290786743, + "epoch": 0.17571356675533828, + "kl_loss": 0.1713801920413971, + "loss_ib": 0.0025587177369743586, + "step": 611 + }, + { + "ce_ib": 8.449363708496094, + "ce_orig": 0.567879319190979, + "epoch": 0.17571356675533828, + "kl_loss": 0.2723478078842163, + "loss_ib": 0.0035684143658727407, + "step": 611 + }, + { + "ce_ib": 5.55802059173584, + "ce_orig": 0.4952395260334015, + "epoch": 0.17571356675533828, + "kl_loss": 0.1825639307498932, + "loss_ib": 0.002381441183388233, + "step": 611 + }, + { + "ce_ib": 14.0112886428833, + "ce_orig": 1.512209177017212, + "epoch": 0.17571356675533828, + "kl_loss": 0.17549076676368713, + "loss_ib": 0.0031560363713651896, + "step": 611 + }, + { + "ce_ib": 8.182499885559082, + "ce_orig": 0.914882481098175, + "epoch": 0.1760011503343159, + "kl_loss": 0.16630445420742035, + "loss_ib": 0.0024812945630401373, + "step": 612 + }, + { + "ce_ib": 12.355195999145508, + "ce_orig": 0.9773331880569458, + "epoch": 0.1760011503343159, + "kl_loss": 0.23119373619556427, + "loss_ib": 0.0035474568139761686, + "step": 612 + }, + { + "ce_ib": 7.4552083015441895, + "ce_orig": 0.7183859944343567, + "epoch": 0.1760011503343159, + "kl_loss": 0.15184658765792847, + "loss_ib": 0.002263986738398671, + "step": 612 + }, + { + "ce_ib": 12.164751052856445, + "ce_orig": 1.0548458099365234, + "epoch": 0.1760011503343159, + "kl_loss": 0.179172545671463, + "loss_ib": 0.003008200554177165, + "step": 612 + }, + { + "ce_ib": 11.297383308410645, + "ce_orig": 0.8856826424598694, + "epoch": 0.17628873391329355, + "kl_loss": 0.18683576583862305, + "loss_ib": 0.0029980959370732307, + "step": 613 + }, + { + "ce_ib": 10.6996488571167, + "ce_orig": 0.816612958908081, + "epoch": 0.17628873391329355, + "kl_loss": 0.19587890803813934, + "loss_ib": 0.003028753912076354, + "step": 613 + }, + { + "ce_ib": 7.265003204345703, + "ce_orig": 0.5818957686424255, + "epoch": 0.17628873391329355, + "kl_loss": 0.24078163504600525, + "loss_ib": 0.0031343167647719383, + "step": 613 + }, + { + "ce_ib": 13.445194244384766, + "ce_orig": 1.461010217666626, + "epoch": 0.17628873391329355, + "kl_loss": 0.1896296739578247, + "loss_ib": 0.003240815829485655, + "step": 613 + }, + { + "ce_ib": 8.329078674316406, + "ce_orig": 0.5632631778717041, + "epoch": 0.17657631749227118, + "kl_loss": 0.1660267412662506, + "loss_ib": 0.0024931752122938633, + "step": 614 + }, + { + "ce_ib": 8.5061616897583, + "ce_orig": 0.668455958366394, + "epoch": 0.17657631749227118, + "kl_loss": 0.09593428671360016, + "loss_ib": 0.0018099590670317411, + "step": 614 + }, + { + "ce_ib": 7.759790420532227, + "ce_orig": 0.7224739789962769, + "epoch": 0.17657631749227118, + "kl_loss": 0.13144069910049438, + "loss_ib": 0.0020903858821839094, + "step": 614 + }, + { + "ce_ib": 9.92943000793457, + "ce_orig": 0.5543064475059509, + "epoch": 0.17657631749227118, + "kl_loss": 0.20024323463439941, + "loss_ib": 0.0029953753110021353, + "step": 614 + }, + { + "epoch": 0.17686390107124883, + "grad_norm": 0.14787979423999786, + "learning_rate": 4.9895241511123114e-05, + "loss": 0.8549, + "step": 615 + }, + { + "ce_ib": 12.167133331298828, + "ce_orig": 1.0232415199279785, + "epoch": 0.17686390107124883, + "kl_loss": 0.15243038535118103, + "loss_ib": 0.0027410173788666725, + "step": 615 + }, + { + "ce_ib": 12.68350601196289, + "ce_orig": 0.8611997365951538, + "epoch": 0.17686390107124883, + "kl_loss": 0.1832544207572937, + "loss_ib": 0.0031008946243673563, + "step": 615 + }, + { + "ce_ib": 5.987475872039795, + "ce_orig": 0.6404716968536377, + "epoch": 0.17686390107124883, + "kl_loss": 0.15817669034004211, + "loss_ib": 0.0021805143915116787, + "step": 615 + }, + { + "ce_ib": 6.460445880889893, + "ce_orig": 0.3817240297794342, + "epoch": 0.17686390107124883, + "kl_loss": 0.16189801692962646, + "loss_ib": 0.00226502469740808, + "step": 615 + }, + { + "ce_ib": 10.936073303222656, + "ce_orig": 0.48395687341690063, + "epoch": 0.17715148465022648, + "kl_loss": 0.1969614326953888, + "loss_ib": 0.003063221462070942, + "step": 616 + }, + { + "ce_ib": 8.578774452209473, + "ce_orig": 0.6014773845672607, + "epoch": 0.17715148465022648, + "kl_loss": 0.1850200891494751, + "loss_ib": 0.0027080783620476723, + "step": 616 + }, + { + "ce_ib": 6.492056369781494, + "ce_orig": 0.6777470707893372, + "epoch": 0.17715148465022648, + "kl_loss": 0.16238093376159668, + "loss_ib": 0.0022730149794369936, + "step": 616 + }, + { + "ce_ib": 5.815036296844482, + "ce_orig": 0.7170777320861816, + "epoch": 0.17715148465022648, + "kl_loss": 0.12132446467876434, + "loss_ib": 0.0017947482410818338, + "step": 616 + }, + { + "ce_ib": 6.723383903503418, + "ce_orig": 0.5069630146026611, + "epoch": 0.1774390682292041, + "kl_loss": 0.14102785289287567, + "loss_ib": 0.0020826170220971107, + "step": 617 + }, + { + "ce_ib": 7.4365363121032715, + "ce_orig": 0.6629728674888611, + "epoch": 0.1774390682292041, + "kl_loss": 0.18020984530448914, + "loss_ib": 0.002545751864090562, + "step": 617 + }, + { + "ce_ib": 10.48121166229248, + "ce_orig": 0.9769195318222046, + "epoch": 0.1774390682292041, + "kl_loss": 0.14403533935546875, + "loss_ib": 0.0024884745944291353, + "step": 617 + }, + { + "ce_ib": 8.293168067932129, + "ce_orig": 1.2928009033203125, + "epoch": 0.1774390682292041, + "kl_loss": 0.10530447959899902, + "loss_ib": 0.0018823615973815322, + "step": 617 + }, + { + "ce_ib": 5.5370988845825195, + "ce_orig": 0.6263954043388367, + "epoch": 0.17772665180818176, + "kl_loss": 0.11039714515209198, + "loss_ib": 0.001657681306824088, + "step": 618 + }, + { + "ce_ib": 13.478799819946289, + "ce_orig": 1.3046756982803345, + "epoch": 0.17772665180818176, + "kl_loss": 0.16247433423995972, + "loss_ib": 0.002972623100504279, + "step": 618 + }, + { + "ce_ib": 6.596388816833496, + "ce_orig": 0.622149646282196, + "epoch": 0.17772665180818176, + "kl_loss": 0.1503526270389557, + "loss_ib": 0.0021631652489304543, + "step": 618 + }, + { + "ce_ib": 12.335912704467773, + "ce_orig": 1.3814923763275146, + "epoch": 0.17772665180818176, + "kl_loss": 0.14966334402561188, + "loss_ib": 0.002730224747210741, + "step": 618 + }, + { + "ce_ib": 10.754959106445312, + "ce_orig": 0.859503984451294, + "epoch": 0.17801423538715938, + "kl_loss": 0.12099392712116241, + "loss_ib": 0.0022854350972920656, + "step": 619 + }, + { + "ce_ib": 5.859959125518799, + "ce_orig": 0.5606276392936707, + "epoch": 0.17801423538715938, + "kl_loss": 0.1312570571899414, + "loss_ib": 0.0018985664937645197, + "step": 619 + }, + { + "ce_ib": 5.188699722290039, + "ce_orig": 0.7638875842094421, + "epoch": 0.17801423538715938, + "kl_loss": 0.1094013974070549, + "loss_ib": 0.0016128838760778308, + "step": 619 + }, + { + "ce_ib": 11.558479309082031, + "ce_orig": 0.7178393006324768, + "epoch": 0.17801423538715938, + "kl_loss": 0.1623765379190445, + "loss_ib": 0.0027796130161732435, + "step": 619 + }, + { + "epoch": 0.17830181896613703, + "grad_norm": 0.15338236093521118, + "learning_rate": 4.989166287028234e-05, + "loss": 0.8193, + "step": 620 + }, + { + "ce_ib": 6.948857307434082, + "ce_orig": 0.7019293308258057, + "epoch": 0.17830181896613703, + "kl_loss": 0.1282852739095688, + "loss_ib": 0.001977738458663225, + "step": 620 + }, + { + "ce_ib": 6.701162815093994, + "ce_orig": 0.45527055859565735, + "epoch": 0.17830181896613703, + "kl_loss": 0.30297911167144775, + "loss_ib": 0.003699907334521413, + "step": 620 + }, + { + "ce_ib": 14.506156921386719, + "ce_orig": 1.8138208389282227, + "epoch": 0.17830181896613703, + "kl_loss": 0.20029760897159576, + "loss_ib": 0.0034535916056483984, + "step": 620 + }, + { + "ce_ib": 8.688165664672852, + "ce_orig": 0.6975913047790527, + "epoch": 0.17830181896613703, + "kl_loss": 0.1677013337612152, + "loss_ib": 0.002545829862356186, + "step": 620 + }, + { + "ce_ib": 8.23354721069336, + "ce_orig": 0.8580948710441589, + "epoch": 0.17858940254511468, + "kl_loss": 0.17454084753990173, + "loss_ib": 0.0025687632150948048, + "step": 621 + }, + { + "ce_ib": 6.359945774078369, + "ce_orig": 0.6615402102470398, + "epoch": 0.17858940254511468, + "kl_loss": 0.16662156581878662, + "loss_ib": 0.0023022103123366833, + "step": 621 + }, + { + "ce_ib": 9.940476417541504, + "ce_orig": 0.8817598819732666, + "epoch": 0.17858940254511468, + "kl_loss": 0.14029280841350555, + "loss_ib": 0.0023969756439328194, + "step": 621 + }, + { + "ce_ib": 5.855399131774902, + "ce_orig": 0.2266225963830948, + "epoch": 0.17858940254511468, + "kl_loss": 0.11816604435443878, + "loss_ib": 0.0017672003014013171, + "step": 621 + }, + { + "ce_ib": 9.321126937866211, + "ce_orig": 0.6670013666152954, + "epoch": 0.1788769861240923, + "kl_loss": 0.1992078721523285, + "loss_ib": 0.0029241912998259068, + "step": 622 + }, + { + "ce_ib": 9.116394996643066, + "ce_orig": 0.8380889296531677, + "epoch": 0.1788769861240923, + "kl_loss": 0.13510224223136902, + "loss_ib": 0.0022626619320362806, + "step": 622 + }, + { + "ce_ib": 8.283426284790039, + "ce_orig": 0.7578662037849426, + "epoch": 0.1788769861240923, + "kl_loss": 0.0886252149939537, + "loss_ib": 0.0017145946621894836, + "step": 622 + }, + { + "ce_ib": 10.24531364440918, + "ce_orig": 1.1135846376419067, + "epoch": 0.1788769861240923, + "kl_loss": 0.1562294065952301, + "loss_ib": 0.002586825517937541, + "step": 622 + }, + { + "ce_ib": 8.487592697143555, + "ce_orig": 0.6697806715965271, + "epoch": 0.17916456970306996, + "kl_loss": 0.22156865894794464, + "loss_ib": 0.003064445685595274, + "step": 623 + }, + { + "ce_ib": 7.489628314971924, + "ce_orig": 0.9431294202804565, + "epoch": 0.17916456970306996, + "kl_loss": 0.131272092461586, + "loss_ib": 0.0020616836845874786, + "step": 623 + }, + { + "ce_ib": 10.724349021911621, + "ce_orig": 1.0055797100067139, + "epoch": 0.17916456970306996, + "kl_loss": 0.1610507071018219, + "loss_ib": 0.002682941732928157, + "step": 623 + }, + { + "ce_ib": 8.10354995727539, + "ce_orig": 0.8617219924926758, + "epoch": 0.17916456970306996, + "kl_loss": 0.2971377372741699, + "loss_ib": 0.0037817321717739105, + "step": 623 + }, + { + "ce_ib": 11.173288345336914, + "ce_orig": 1.3478788137435913, + "epoch": 0.17945215328204758, + "kl_loss": 0.14247748255729675, + "loss_ib": 0.0025421034079045057, + "step": 624 + }, + { + "ce_ib": 5.565175533294678, + "ce_orig": 0.36270689964294434, + "epoch": 0.17945215328204758, + "kl_loss": 0.16036799550056458, + "loss_ib": 0.0021601973567157984, + "step": 624 + }, + { + "ce_ib": 10.509783744812012, + "ce_orig": 1.0102475881576538, + "epoch": 0.17945215328204758, + "kl_loss": 0.17176342010498047, + "loss_ib": 0.002768612466752529, + "step": 624 + }, + { + "ce_ib": 12.00045108795166, + "ce_orig": 1.1874045133590698, + "epoch": 0.17945215328204758, + "kl_loss": 0.21322676539421082, + "loss_ib": 0.003332312684506178, + "step": 624 + }, + { + "epoch": 0.17973973686102523, + "grad_norm": 0.11537851393222809, + "learning_rate": 4.988802425964824e-05, + "loss": 0.8732, + "step": 625 + }, + { + "ce_ib": 7.465215682983398, + "ce_orig": 0.6246358752250671, + "epoch": 0.17973973686102523, + "kl_loss": 0.2165844887495041, + "loss_ib": 0.002912366297096014, + "step": 625 + }, + { + "ce_ib": 6.249822616577148, + "ce_orig": 0.5895569920539856, + "epoch": 0.17973973686102523, + "kl_loss": 0.1526128053665161, + "loss_ib": 0.0021511102095246315, + "step": 625 + }, + { + "ce_ib": 8.204090118408203, + "ce_orig": 0.8461520075798035, + "epoch": 0.17973973686102523, + "kl_loss": 0.1934261918067932, + "loss_ib": 0.002754670800641179, + "step": 625 + }, + { + "ce_ib": 6.963839054107666, + "ce_orig": 0.7485008835792542, + "epoch": 0.17973973686102523, + "kl_loss": 0.08876027911901474, + "loss_ib": 0.001583986566402018, + "step": 625 + }, + { + "ce_ib": 13.58116626739502, + "ce_orig": 1.2827461957931519, + "epoch": 0.18002732044000289, + "kl_loss": 0.19602948427200317, + "loss_ib": 0.00331841129809618, + "step": 626 + }, + { + "ce_ib": 13.048462867736816, + "ce_orig": 1.3438245058059692, + "epoch": 0.18002732044000289, + "kl_loss": 0.16691835224628448, + "loss_ib": 0.002974029630422592, + "step": 626 + }, + { + "ce_ib": 8.618809700012207, + "ce_orig": 0.4586450755596161, + "epoch": 0.18002732044000289, + "kl_loss": 0.17586086690425873, + "loss_ib": 0.0026204895693808794, + "step": 626 + }, + { + "ce_ib": 9.625340461730957, + "ce_orig": 1.0119637250900269, + "epoch": 0.18002732044000289, + "kl_loss": 0.2463323473930359, + "loss_ib": 0.0034258572850376368, + "step": 626 + }, + { + "ce_ib": 9.643866539001465, + "ce_orig": 0.5789271593093872, + "epoch": 0.1803149040189805, + "kl_loss": 0.16274522244930267, + "loss_ib": 0.002591838827356696, + "step": 627 + }, + { + "ce_ib": 7.565330505371094, + "ce_orig": 0.3938678801059723, + "epoch": 0.1803149040189805, + "kl_loss": 0.16621270775794983, + "loss_ib": 0.002418660093098879, + "step": 627 + }, + { + "ce_ib": 9.062137603759766, + "ce_orig": 0.7573468089103699, + "epoch": 0.1803149040189805, + "kl_loss": 0.15040776133537292, + "loss_ib": 0.002410291461274028, + "step": 627 + }, + { + "ce_ib": 7.801846981048584, + "ce_orig": 0.5504791736602783, + "epoch": 0.1803149040189805, + "kl_loss": 0.20418506860733032, + "loss_ib": 0.0028220354579389095, + "step": 627 + }, + { + "ce_ib": 12.088622093200684, + "ce_orig": 1.4907240867614746, + "epoch": 0.18060248759795816, + "kl_loss": 0.1632252186536789, + "loss_ib": 0.0028411142993718386, + "step": 628 + }, + { + "ce_ib": 10.344350814819336, + "ce_orig": 1.1456356048583984, + "epoch": 0.18060248759795816, + "kl_loss": 0.11406631767749786, + "loss_ib": 0.0021750980522483587, + "step": 628 + }, + { + "ce_ib": 11.513299942016602, + "ce_orig": 1.1756242513656616, + "epoch": 0.18060248759795816, + "kl_loss": 0.21336236596107483, + "loss_ib": 0.003284953534603119, + "step": 628 + }, + { + "ce_ib": 13.674637794494629, + "ce_orig": 1.8630796670913696, + "epoch": 0.18060248759795816, + "kl_loss": 0.15529105067253113, + "loss_ib": 0.002920374274253845, + "step": 628 + }, + { + "ce_ib": 7.227867603302002, + "ce_orig": 0.6919394135475159, + "epoch": 0.18089007117693578, + "kl_loss": 0.1787305772304535, + "loss_ib": 0.00251009245403111, + "step": 629 + }, + { + "ce_ib": 13.092966079711914, + "ce_orig": 1.2497636079788208, + "epoch": 0.18089007117693578, + "kl_loss": 0.11016056686639786, + "loss_ib": 0.0024109024088829756, + "step": 629 + }, + { + "ce_ib": 12.19546890258789, + "ce_orig": 1.26802396774292, + "epoch": 0.18089007117693578, + "kl_loss": 0.201836496591568, + "loss_ib": 0.00323791173286736, + "step": 629 + }, + { + "ce_ib": 8.523748397827148, + "ce_orig": 0.5664364695549011, + "epoch": 0.18089007117693578, + "kl_loss": 0.2126503586769104, + "loss_ib": 0.002978878328576684, + "step": 629 + }, + { + "epoch": 0.18117765475591344, + "grad_norm": 0.13004900515079498, + "learning_rate": 4.9884325687987056e-05, + "loss": 0.8905, + "step": 630 + }, + { + "ce_ib": 12.112406730651855, + "ce_orig": 1.2464693784713745, + "epoch": 0.18117765475591344, + "kl_loss": 0.16276949644088745, + "loss_ib": 0.0028389354702085257, + "step": 630 + }, + { + "ce_ib": 6.565924644470215, + "ce_orig": 0.48202767968177795, + "epoch": 0.18117765475591344, + "kl_loss": 0.16982322931289673, + "loss_ib": 0.0023548246826976538, + "step": 630 + }, + { + "ce_ib": 11.657485961914062, + "ce_orig": 1.4749407768249512, + "epoch": 0.18117765475591344, + "kl_loss": 0.16900035738945007, + "loss_ib": 0.0028557521291077137, + "step": 630 + }, + { + "ce_ib": 10.074145317077637, + "ce_orig": 1.1275204420089722, + "epoch": 0.18117765475591344, + "kl_loss": 0.1381954848766327, + "loss_ib": 0.002389369299635291, + "step": 630 + }, + { + "ce_ib": 10.129415512084961, + "ce_orig": 0.6778865456581116, + "epoch": 0.1814652383348911, + "kl_loss": 0.12124676257371902, + "loss_ib": 0.0022254090290516615, + "step": 631 + }, + { + "ce_ib": 9.381417274475098, + "ce_orig": 0.7912308573722839, + "epoch": 0.1814652383348911, + "kl_loss": 0.1806401014328003, + "loss_ib": 0.0027445426676422358, + "step": 631 + }, + { + "ce_ib": 6.85666561126709, + "ce_orig": 0.6336774230003357, + "epoch": 0.1814652383348911, + "kl_loss": 0.14535778760910034, + "loss_ib": 0.0021392442286014557, + "step": 631 + }, + { + "ce_ib": 7.080989837646484, + "ce_orig": 0.8866851329803467, + "epoch": 0.1814652383348911, + "kl_loss": 0.1062016636133194, + "loss_ib": 0.0017701154574751854, + "step": 631 + }, + { + "ce_ib": 6.399113655090332, + "ce_orig": 0.7909007668495178, + "epoch": 0.1817528219138687, + "kl_loss": 0.09706030040979385, + "loss_ib": 0.0016105143586173654, + "step": 632 + }, + { + "ce_ib": 6.583844184875488, + "ce_orig": 0.45864665508270264, + "epoch": 0.1817528219138687, + "kl_loss": 0.1002674251794815, + "loss_ib": 0.0016610586317256093, + "step": 632 + }, + { + "ce_ib": 14.482834815979004, + "ce_orig": 1.6091009378433228, + "epoch": 0.1817528219138687, + "kl_loss": 0.1842235028743744, + "loss_ib": 0.00329051841981709, + "step": 632 + }, + { + "ce_ib": 9.707879066467285, + "ce_orig": 0.8563067317008972, + "epoch": 0.1817528219138687, + "kl_loss": 0.15452846884727478, + "loss_ib": 0.002516072243452072, + "step": 632 + }, + { + "ce_ib": 7.918076515197754, + "ce_orig": 1.0354722738265991, + "epoch": 0.18204040549284636, + "kl_loss": 0.126227468252182, + "loss_ib": 0.002054082229733467, + "step": 633 + }, + { + "ce_ib": 9.942896842956543, + "ce_orig": 1.557401418685913, + "epoch": 0.18204040549284636, + "kl_loss": 0.14876790344715118, + "loss_ib": 0.0024819686077535152, + "step": 633 + }, + { + "ce_ib": 9.64987564086914, + "ce_orig": 1.0955207347869873, + "epoch": 0.18204040549284636, + "kl_loss": 0.12603306770324707, + "loss_ib": 0.0022253182251006365, + "step": 633 + }, + { + "ce_ib": 8.467226028442383, + "ce_orig": 0.3422728478908539, + "epoch": 0.18204040549284636, + "kl_loss": 0.30516770482063293, + "loss_ib": 0.003898399416357279, + "step": 633 + }, + { + "ce_ib": 8.792363166809082, + "ce_orig": 0.9992319345474243, + "epoch": 0.182327989071824, + "kl_loss": 0.14874675869941711, + "loss_ib": 0.002366703934967518, + "step": 634 + }, + { + "ce_ib": 8.27891731262207, + "ce_orig": 1.0733726024627686, + "epoch": 0.182327989071824, + "kl_loss": 0.13452918827533722, + "loss_ib": 0.002173183485865593, + "step": 634 + }, + { + "ce_ib": 9.549663543701172, + "ce_orig": 0.8865470290184021, + "epoch": 0.182327989071824, + "kl_loss": 0.21063677966594696, + "loss_ib": 0.0030613341368734837, + "step": 634 + }, + { + "ce_ib": 6.622999668121338, + "ce_orig": 0.46363383531570435, + "epoch": 0.182327989071824, + "kl_loss": 0.161158949136734, + "loss_ib": 0.0022738894913345575, + "step": 634 + }, + { + "epoch": 0.18261557265080164, + "grad_norm": 0.09408904612064362, + "learning_rate": 4.9880567164209515e-05, + "loss": 0.8971, + "step": 635 + }, + { + "ce_ib": 8.740588188171387, + "ce_orig": 0.7484884262084961, + "epoch": 0.18261557265080164, + "kl_loss": 0.1564946174621582, + "loss_ib": 0.0024390050675719976, + "step": 635 + }, + { + "ce_ib": 7.376012325286865, + "ce_orig": 0.48069027066230774, + "epoch": 0.18261557265080164, + "kl_loss": 0.20798180997371674, + "loss_ib": 0.0028174191247671843, + "step": 635 + }, + { + "ce_ib": 7.95230770111084, + "ce_orig": 0.4582425057888031, + "epoch": 0.18261557265080164, + "kl_loss": 0.11464007198810577, + "loss_ib": 0.0019416314316913486, + "step": 635 + }, + { + "ce_ib": 11.704083442687988, + "ce_orig": 0.9559797048568726, + "epoch": 0.18261557265080164, + "kl_loss": 0.14773112535476685, + "loss_ib": 0.0026477195788174868, + "step": 635 + }, + { + "ce_ib": 9.855140686035156, + "ce_orig": 1.0313355922698975, + "epoch": 0.1829031562297793, + "kl_loss": 0.15524601936340332, + "loss_ib": 0.0025379741564393044, + "step": 636 + }, + { + "ce_ib": 8.928131103515625, + "ce_orig": 0.6653744578361511, + "epoch": 0.1829031562297793, + "kl_loss": 0.235584557056427, + "loss_ib": 0.0032486587297171354, + "step": 636 + }, + { + "ce_ib": 7.960230827331543, + "ce_orig": 0.4329434633255005, + "epoch": 0.1829031562297793, + "kl_loss": 0.13450872898101807, + "loss_ib": 0.0021411103662103415, + "step": 636 + }, + { + "ce_ib": 8.45853042602539, + "ce_orig": 0.3819558918476105, + "epoch": 0.1829031562297793, + "kl_loss": 0.18511459231376648, + "loss_ib": 0.0026969988830387592, + "step": 636 + }, + { + "ce_ib": 13.116471290588379, + "ce_orig": 1.6339662075042725, + "epoch": 0.1831907398087569, + "kl_loss": 0.1558864712715149, + "loss_ib": 0.0028705119621008635, + "step": 637 + }, + { + "ce_ib": 15.931092262268066, + "ce_orig": 1.6720495223999023, + "epoch": 0.1831907398087569, + "kl_loss": 0.15220539271831512, + "loss_ib": 0.0031151631847023964, + "step": 637 + }, + { + "ce_ib": 11.66745662689209, + "ce_orig": 1.4903631210327148, + "epoch": 0.1831907398087569, + "kl_loss": 0.29781395196914673, + "loss_ib": 0.004144885111600161, + "step": 637 + }, + { + "ce_ib": 8.845624923706055, + "ce_orig": 0.9857800602912903, + "epoch": 0.1831907398087569, + "kl_loss": 0.19395799934864044, + "loss_ib": 0.002824142575263977, + "step": 637 + }, + { + "ce_ib": 5.086018085479736, + "ce_orig": 0.40799444913864136, + "epoch": 0.18347832338773457, + "kl_loss": 0.13657420873641968, + "loss_ib": 0.0018743438413366675, + "step": 638 + }, + { + "ce_ib": 6.492795944213867, + "ce_orig": 0.6550372838973999, + "epoch": 0.18347832338773457, + "kl_loss": 0.14029884338378906, + "loss_ib": 0.002052268013358116, + "step": 638 + }, + { + "ce_ib": 11.760429382324219, + "ce_orig": 1.4298399686813354, + "epoch": 0.18347832338773457, + "kl_loss": 0.14298929274082184, + "loss_ib": 0.0026059357915073633, + "step": 638 + }, + { + "ce_ib": 10.418862342834473, + "ce_orig": 1.169357419013977, + "epoch": 0.18347832338773457, + "kl_loss": 0.13526105880737305, + "loss_ib": 0.0023944966960698366, + "step": 638 + }, + { + "ce_ib": 4.184670448303223, + "ce_orig": 0.1795201301574707, + "epoch": 0.1837659069667122, + "kl_loss": 0.3021865487098694, + "loss_ib": 0.003440332366153598, + "step": 639 + }, + { + "ce_ib": 10.151659965515137, + "ce_orig": 0.8202506899833679, + "epoch": 0.1837659069667122, + "kl_loss": 0.1445043683052063, + "loss_ib": 0.0024602096527814865, + "step": 639 + }, + { + "ce_ib": 7.495885372161865, + "ce_orig": 0.7958588004112244, + "epoch": 0.1837659069667122, + "kl_loss": 0.11683303862810135, + "loss_ib": 0.001917918911203742, + "step": 639 + }, + { + "ce_ib": 12.701330184936523, + "ce_orig": 1.1551557779312134, + "epoch": 0.1837659069667122, + "kl_loss": 0.1668703556060791, + "loss_ib": 0.002938836347311735, + "step": 639 + }, + { + "epoch": 0.18405349054568984, + "grad_norm": 0.09154196828603745, + "learning_rate": 4.987674869737077e-05, + "loss": 0.8505, + "step": 640 + }, + { + "ce_ib": 6.805731296539307, + "ce_orig": 0.8624812364578247, + "epoch": 0.18405349054568984, + "kl_loss": 0.08888162672519684, + "loss_ib": 0.0015693893656134605, + "step": 640 + }, + { + "ce_ib": 7.028994083404541, + "ce_orig": 0.5640987157821655, + "epoch": 0.18405349054568984, + "kl_loss": 0.1612461507320404, + "loss_ib": 0.0023153608199208975, + "step": 640 + }, + { + "ce_ib": 9.196657180786133, + "ce_orig": 0.7501866221427917, + "epoch": 0.18405349054568984, + "kl_loss": 0.15376678109169006, + "loss_ib": 0.0024573334958404303, + "step": 640 + }, + { + "ce_ib": 10.042610168457031, + "ce_orig": 1.2460449934005737, + "epoch": 0.18405349054568984, + "kl_loss": 0.13962647318840027, + "loss_ib": 0.0024005258455872536, + "step": 640 + }, + { + "ce_ib": 11.13807201385498, + "ce_orig": 1.2203491926193237, + "epoch": 0.1843410741246675, + "kl_loss": 0.2092892825603485, + "loss_ib": 0.003206700086593628, + "step": 641 + }, + { + "ce_ib": 6.8926472663879395, + "ce_orig": 0.9371761083602905, + "epoch": 0.1843410741246675, + "kl_loss": 0.1569281369447708, + "loss_ib": 0.0022585459519177675, + "step": 641 + }, + { + "ce_ib": 8.300681114196777, + "ce_orig": 0.8907142877578735, + "epoch": 0.1843410741246675, + "kl_loss": 0.11810189485549927, + "loss_ib": 0.002011086791753769, + "step": 641 + }, + { + "ce_ib": 9.104215621948242, + "ce_orig": 1.2735546827316284, + "epoch": 0.1843410741246675, + "kl_loss": 0.20481525361537933, + "loss_ib": 0.002958573866635561, + "step": 641 + }, + { + "ce_ib": 5.902524948120117, + "ce_orig": 0.6442005634307861, + "epoch": 0.18462865770364512, + "kl_loss": 0.06047610938549042, + "loss_ib": 0.0011950135231018066, + "step": 642 + }, + { + "ce_ib": 8.438175201416016, + "ce_orig": 0.8223277926445007, + "epoch": 0.18462865770364512, + "kl_loss": 0.1526065617799759, + "loss_ib": 0.002369883004575968, + "step": 642 + }, + { + "ce_ib": 7.181520938873291, + "ce_orig": 0.6614299416542053, + "epoch": 0.18462865770364512, + "kl_loss": 0.13312123715877533, + "loss_ib": 0.0020493643824011087, + "step": 642 + }, + { + "ce_ib": 6.217692852020264, + "ce_orig": 0.9638420939445496, + "epoch": 0.18462865770364512, + "kl_loss": 0.12894636392593384, + "loss_ib": 0.0019112328300252557, + "step": 642 + }, + { + "ce_ib": 9.330130577087402, + "ce_orig": 0.8886315226554871, + "epoch": 0.18491624128262277, + "kl_loss": 0.18079149723052979, + "loss_ib": 0.0027409279718995094, + "step": 643 + }, + { + "ce_ib": 7.036296844482422, + "ce_orig": 0.7276414632797241, + "epoch": 0.18491624128262277, + "kl_loss": 0.1510542333126068, + "loss_ib": 0.0022141719236969948, + "step": 643 + }, + { + "ce_ib": 8.81930160522461, + "ce_orig": 0.961742103099823, + "epoch": 0.18491624128262277, + "kl_loss": 0.11654697358608246, + "loss_ib": 0.0020473997574299574, + "step": 643 + }, + { + "ce_ib": 13.541308403015137, + "ce_orig": 1.0693286657333374, + "epoch": 0.18491624128262277, + "kl_loss": 0.1634470522403717, + "loss_ib": 0.0029886013362556696, + "step": 643 + }, + { + "ce_ib": 8.250872611999512, + "ce_orig": 0.7872787117958069, + "epoch": 0.1852038248616004, + "kl_loss": 0.15854009985923767, + "loss_ib": 0.0024104882031679153, + "step": 644 + }, + { + "ce_ib": 9.81741714477539, + "ce_orig": 0.981521725654602, + "epoch": 0.1852038248616004, + "kl_loss": 0.1492426097393036, + "loss_ib": 0.002474167849868536, + "step": 644 + }, + { + "ce_ib": 8.002400398254395, + "ce_orig": 0.9716108441352844, + "epoch": 0.1852038248616004, + "kl_loss": 0.2515189051628113, + "loss_ib": 0.0033154289703816175, + "step": 644 + }, + { + "ce_ib": 11.485904693603516, + "ce_orig": 1.4057406187057495, + "epoch": 0.1852038248616004, + "kl_loss": 0.1574161797761917, + "loss_ib": 0.00272275204770267, + "step": 644 + }, + { + "epoch": 0.18549140844057804, + "grad_norm": 0.10694620758295059, + "learning_rate": 4.98728702966704e-05, + "loss": 0.8809, + "step": 645 + }, + { + "ce_ib": 14.281476974487305, + "ce_orig": 0.44360876083374023, + "epoch": 0.18549140844057804, + "kl_loss": 0.16904665529727936, + "loss_ib": 0.003118614200502634, + "step": 645 + }, + { + "ce_ib": 6.123544692993164, + "ce_orig": 0.8242998719215393, + "epoch": 0.18549140844057804, + "kl_loss": 0.11964607238769531, + "loss_ib": 0.0018088150536641479, + "step": 645 + }, + { + "ce_ib": 7.845162391662598, + "ce_orig": 1.008780598640442, + "epoch": 0.18549140844057804, + "kl_loss": 0.07876063883304596, + "loss_ib": 0.0015721225645393133, + "step": 645 + }, + { + "ce_ib": 7.595258712768555, + "ce_orig": 0.41073235869407654, + "epoch": 0.18549140844057804, + "kl_loss": 0.14960895478725433, + "loss_ib": 0.0022556153126060963, + "step": 645 + }, + { + "ce_ib": 8.565896034240723, + "ce_orig": 0.8796496391296387, + "epoch": 0.1857789920195557, + "kl_loss": 0.17831774055957794, + "loss_ib": 0.0026397667825222015, + "step": 646 + }, + { + "ce_ib": 7.068782329559326, + "ce_orig": 0.5573152899742126, + "epoch": 0.1857789920195557, + "kl_loss": 0.13099712133407593, + "loss_ib": 0.002016849583014846, + "step": 646 + }, + { + "ce_ib": 7.277317047119141, + "ce_orig": 0.6383960843086243, + "epoch": 0.1857789920195557, + "kl_loss": 0.1211213618516922, + "loss_ib": 0.0019389452645555139, + "step": 646 + }, + { + "ce_ib": 6.5935797691345215, + "ce_orig": 0.6593443155288696, + "epoch": 0.1857789920195557, + "kl_loss": 0.12146840989589691, + "loss_ib": 0.0018740420928224921, + "step": 646 + }, + { + "ce_ib": 9.572060585021973, + "ce_orig": 0.9575638175010681, + "epoch": 0.18606657559853332, + "kl_loss": 0.14028745889663696, + "loss_ib": 0.0023600806016474962, + "step": 647 + }, + { + "ce_ib": 9.048948287963867, + "ce_orig": 0.7253939509391785, + "epoch": 0.18606657559853332, + "kl_loss": 0.11062663793563843, + "loss_ib": 0.0020111610647290945, + "step": 647 + }, + { + "ce_ib": 7.138680458068848, + "ce_orig": 0.7415602803230286, + "epoch": 0.18606657559853332, + "kl_loss": 0.1857338845729828, + "loss_ib": 0.0025712070055305958, + "step": 647 + }, + { + "ce_ib": 10.374600410461426, + "ce_orig": 0.7181751132011414, + "epoch": 0.18606657559853332, + "kl_loss": 0.23679107427597046, + "loss_ib": 0.0034053707495331764, + "step": 647 + }, + { + "ce_ib": 9.705562591552734, + "ce_orig": 1.087332010269165, + "epoch": 0.18635415917751097, + "kl_loss": 0.16214729845523834, + "loss_ib": 0.002592029282823205, + "step": 648 + }, + { + "ce_ib": 4.748839855194092, + "ce_orig": 0.2743741571903229, + "epoch": 0.18635415917751097, + "kl_loss": 0.2661653161048889, + "loss_ib": 0.0031365370377898216, + "step": 648 + }, + { + "ce_ib": 9.06843090057373, + "ce_orig": 0.987123966217041, + "epoch": 0.18635415917751097, + "kl_loss": 0.12213167548179626, + "loss_ib": 0.002128159860149026, + "step": 648 + }, + { + "ce_ib": 11.009614944458008, + "ce_orig": 1.2969286441802979, + "epoch": 0.18635415917751097, + "kl_loss": 0.1587267965078354, + "loss_ib": 0.0026882293168455362, + "step": 648 + }, + { + "ce_ib": 7.768403053283691, + "ce_orig": 0.9510530233383179, + "epoch": 0.1866417427564886, + "kl_loss": 0.144602969288826, + "loss_ib": 0.002222870010882616, + "step": 649 + }, + { + "ce_ib": 9.275205612182617, + "ce_orig": 0.5832199454307556, + "epoch": 0.1866417427564886, + "kl_loss": 0.18980665504932404, + "loss_ib": 0.0028255870565772057, + "step": 649 + }, + { + "ce_ib": 6.574225902557373, + "ce_orig": 0.6462238430976868, + "epoch": 0.1866417427564886, + "kl_loss": 0.156391441822052, + "loss_ib": 0.0022213368210941553, + "step": 649 + }, + { + "ce_ib": 4.824717044830322, + "ce_orig": 0.37470337748527527, + "epoch": 0.1866417427564886, + "kl_loss": 0.17615503072738647, + "loss_ib": 0.002244021976366639, + "step": 649 + }, + { + "epoch": 0.18692932633546624, + "grad_norm": 0.11466971039772034, + "learning_rate": 4.986893197145237e-05, + "loss": 0.8451, + "step": 650 + }, + { + "ce_ib": 11.404997825622559, + "ce_orig": 1.0702717304229736, + "epoch": 0.18692932633546624, + "kl_loss": 0.16459330916404724, + "loss_ib": 0.0027864326257258654, + "step": 650 + }, + { + "ce_ib": 8.943628311157227, + "ce_orig": 1.3345712423324585, + "epoch": 0.18692932633546624, + "kl_loss": 0.24026933312416077, + "loss_ib": 0.0032970558386296034, + "step": 650 + }, + { + "ce_ib": 9.607685089111328, + "ce_orig": 0.7295346856117249, + "epoch": 0.18692932633546624, + "kl_loss": 0.23726195096969604, + "loss_ib": 0.0033333878964185715, + "step": 650 + }, + { + "ce_ib": 4.072868824005127, + "ce_orig": 0.21132893860340118, + "epoch": 0.18692932633546624, + "kl_loss": 0.13498154282569885, + "loss_ib": 0.001757102319970727, + "step": 650 + }, + { + "ce_ib": 9.069856643676758, + "ce_orig": 0.3936823606491089, + "epoch": 0.1872169099144439, + "kl_loss": 0.21264250576496124, + "loss_ib": 0.0030334105249494314, + "step": 651 + }, + { + "ce_ib": 8.176159858703613, + "ce_orig": 0.7149850726127625, + "epoch": 0.1872169099144439, + "kl_loss": 0.11887285113334656, + "loss_ib": 0.002006344497203827, + "step": 651 + }, + { + "ce_ib": 6.622856140136719, + "ce_orig": 0.4089096188545227, + "epoch": 0.1872169099144439, + "kl_loss": 0.11365403234958649, + "loss_ib": 0.001798825804144144, + "step": 651 + }, + { + "ce_ib": 6.9014081954956055, + "ce_orig": 0.43414247035980225, + "epoch": 0.1872169099144439, + "kl_loss": 0.1480683982372284, + "loss_ib": 0.0021708246786147356, + "step": 651 + }, + { + "ce_ib": 9.448698043823242, + "ce_orig": 0.7146212458610535, + "epoch": 0.18750449349342152, + "kl_loss": 0.1836353838443756, + "loss_ib": 0.0027812235057353973, + "step": 652 + }, + { + "ce_ib": 7.287289619445801, + "ce_orig": 0.7478001713752747, + "epoch": 0.18750449349342152, + "kl_loss": 0.1715984046459198, + "loss_ib": 0.0024447129108011723, + "step": 652 + }, + { + "ce_ib": 9.546160697937012, + "ce_orig": 1.0961014032363892, + "epoch": 0.18750449349342152, + "kl_loss": 0.16767415404319763, + "loss_ib": 0.0026313576381653547, + "step": 652 + }, + { + "ce_ib": 10.555652618408203, + "ce_orig": 1.3851077556610107, + "epoch": 0.18750449349342152, + "kl_loss": 0.35710737109184265, + "loss_ib": 0.004626638721674681, + "step": 652 + }, + { + "ce_ib": 5.79029655456543, + "ce_orig": 0.8410673141479492, + "epoch": 0.18779207707239917, + "kl_loss": 0.12491299211978912, + "loss_ib": 0.001828159554861486, + "step": 653 + }, + { + "ce_ib": 10.17906665802002, + "ce_orig": 0.971206545829773, + "epoch": 0.18779207707239917, + "kl_loss": 0.12395337969064713, + "loss_ib": 0.0022574402391910553, + "step": 653 + }, + { + "ce_ib": 11.249361991882324, + "ce_orig": 1.02909255027771, + "epoch": 0.18779207707239917, + "kl_loss": 0.13359886407852173, + "loss_ib": 0.0024609246756881475, + "step": 653 + }, + { + "ce_ib": 7.954268932342529, + "ce_orig": 0.742697536945343, + "epoch": 0.18779207707239917, + "kl_loss": 0.14216434955596924, + "loss_ib": 0.002217070432379842, + "step": 653 + }, + { + "ce_ib": 12.005528450012207, + "ce_orig": 1.5412335395812988, + "epoch": 0.1880796606513768, + "kl_loss": 0.14288672804832458, + "loss_ib": 0.0026294200215488672, + "step": 654 + }, + { + "ce_ib": 5.368361949920654, + "ce_orig": 0.598798394203186, + "epoch": 0.1880796606513768, + "kl_loss": 0.1084536612033844, + "loss_ib": 0.001621372764930129, + "step": 654 + }, + { + "ce_ib": 8.782524108886719, + "ce_orig": 0.9397916197776794, + "epoch": 0.1880796606513768, + "kl_loss": 0.09554749727249146, + "loss_ib": 0.0018337273504585028, + "step": 654 + }, + { + "ce_ib": 9.918803215026855, + "ce_orig": 0.6324499249458313, + "epoch": 0.1880796606513768, + "kl_loss": 0.16397729516029358, + "loss_ib": 0.0026316531002521515, + "step": 654 + }, + { + "epoch": 0.18836724423035445, + "grad_norm": 0.10257323831319809, + "learning_rate": 4.986493373120502e-05, + "loss": 0.8898, + "step": 655 + }, + { + "ce_ib": 10.346714973449707, + "ce_orig": 0.493673712015152, + "epoch": 0.18836724423035445, + "kl_loss": 0.255359947681427, + "loss_ib": 0.0035882708616554737, + "step": 655 + }, + { + "ce_ib": 7.25407075881958, + "ce_orig": 0.8773781061172485, + "epoch": 0.18836724423035445, + "kl_loss": 0.1956409215927124, + "loss_ib": 0.002681816229596734, + "step": 655 + }, + { + "ce_ib": 11.828226089477539, + "ce_orig": 1.498853325843811, + "epoch": 0.18836724423035445, + "kl_loss": 0.18627619743347168, + "loss_ib": 0.0030455845408141613, + "step": 655 + }, + { + "ce_ib": 6.772419452667236, + "ce_orig": 0.41002458333969116, + "epoch": 0.18836724423035445, + "kl_loss": 0.10855728387832642, + "loss_ib": 0.0017628148198127747, + "step": 655 + }, + { + "ce_ib": 10.052515983581543, + "ce_orig": 0.9797083735466003, + "epoch": 0.1886548278093321, + "kl_loss": 0.16761581599712372, + "loss_ib": 0.0026814097072929144, + "step": 656 + }, + { + "ce_ib": 9.526687622070312, + "ce_orig": 0.9061644077301025, + "epoch": 0.1886548278093321, + "kl_loss": 0.131792351603508, + "loss_ib": 0.0022705921437591314, + "step": 656 + }, + { + "ce_ib": 5.809006690979004, + "ce_orig": 0.8629180788993835, + "epoch": 0.1886548278093321, + "kl_loss": 0.13694535195827484, + "loss_ib": 0.0019503540825098753, + "step": 656 + }, + { + "ce_ib": 7.719155311584473, + "ce_orig": 0.8281040787696838, + "epoch": 0.1886548278093321, + "kl_loss": 0.1683327555656433, + "loss_ib": 0.002455243142321706, + "step": 656 + }, + { + "ce_ib": 11.467092514038086, + "ce_orig": 0.7774757146835327, + "epoch": 0.18894241138830972, + "kl_loss": 0.17787596583366394, + "loss_ib": 0.0029254688415676355, + "step": 657 + }, + { + "ce_ib": 9.500710487365723, + "ce_orig": 0.6401790976524353, + "epoch": 0.18894241138830972, + "kl_loss": 0.20965366065502167, + "loss_ib": 0.0030466075986623764, + "step": 657 + }, + { + "ce_ib": 9.324553489685059, + "ce_orig": 0.6780283451080322, + "epoch": 0.18894241138830972, + "kl_loss": 0.18367739021778107, + "loss_ib": 0.0027692292351275682, + "step": 657 + }, + { + "ce_ib": 9.310490608215332, + "ce_orig": 0.8386406302452087, + "epoch": 0.18894241138830972, + "kl_loss": 0.18684491515159607, + "loss_ib": 0.002799498150125146, + "step": 657 + }, + { + "ce_ib": 10.932252883911133, + "ce_orig": 1.1578938961029053, + "epoch": 0.18922999496728737, + "kl_loss": 0.17252197861671448, + "loss_ib": 0.002818444976583123, + "step": 658 + }, + { + "ce_ib": 9.024280548095703, + "ce_orig": 0.5365549325942993, + "epoch": 0.18922999496728737, + "kl_loss": 0.12610819935798645, + "loss_ib": 0.0021635100711137056, + "step": 658 + }, + { + "ce_ib": 8.524632453918457, + "ce_orig": 0.6664254665374756, + "epoch": 0.18922999496728737, + "kl_loss": 0.08303587883710861, + "loss_ib": 0.0016828221268951893, + "step": 658 + }, + { + "ce_ib": 9.70861530303955, + "ce_orig": 1.0630748271942139, + "epoch": 0.18922999496728737, + "kl_loss": 0.17968884110450745, + "loss_ib": 0.002767750062048435, + "step": 658 + }, + { + "ce_ib": 8.12465763092041, + "ce_orig": 1.105127215385437, + "epoch": 0.189517578546265, + "kl_loss": 0.13839149475097656, + "loss_ib": 0.0021963806357234716, + "step": 659 + }, + { + "ce_ib": 10.226935386657715, + "ce_orig": 0.6290950775146484, + "epoch": 0.189517578546265, + "kl_loss": 0.20230096578598022, + "loss_ib": 0.003045703051611781, + "step": 659 + }, + { + "ce_ib": 11.315914154052734, + "ce_orig": 1.3234468698501587, + "epoch": 0.189517578546265, + "kl_loss": 0.15705978870391846, + "loss_ib": 0.002702189376577735, + "step": 659 + }, + { + "ce_ib": 13.434019088745117, + "ce_orig": 1.325426459312439, + "epoch": 0.189517578546265, + "kl_loss": 0.2508019208908081, + "loss_ib": 0.0038514207117259502, + "step": 659 + }, + { + "epoch": 0.18980516212524265, + "grad_norm": 0.08787354081869125, + "learning_rate": 4.986087558556104e-05, + "loss": 0.8634, + "step": 660 + }, + { + "ce_ib": 8.652778625488281, + "ce_orig": 0.8134385347366333, + "epoch": 0.18980516212524265, + "kl_loss": 0.147006094455719, + "loss_ib": 0.0023353388532996178, + "step": 660 + }, + { + "ce_ib": 9.470593452453613, + "ce_orig": 0.833583652973175, + "epoch": 0.18980516212524265, + "kl_loss": 0.1043887585401535, + "loss_ib": 0.0019909467082470655, + "step": 660 + }, + { + "ce_ib": 9.23963737487793, + "ce_orig": 0.939871609210968, + "epoch": 0.18980516212524265, + "kl_loss": 0.12045232951641083, + "loss_ib": 0.0021284869872033596, + "step": 660 + }, + { + "ce_ib": 8.225679397583008, + "ce_orig": 0.6521763801574707, + "epoch": 0.18980516212524265, + "kl_loss": 0.1941739022731781, + "loss_ib": 0.002764306962490082, + "step": 660 + }, + { + "ce_ib": 8.555021286010742, + "ce_orig": 0.6295510530471802, + "epoch": 0.1900927457042203, + "kl_loss": 0.13717862963676453, + "loss_ib": 0.0022272884380072355, + "step": 661 + }, + { + "ce_ib": 11.997432708740234, + "ce_orig": 1.630189299583435, + "epoch": 0.1900927457042203, + "kl_loss": 0.1967429369688034, + "loss_ib": 0.003167172661051154, + "step": 661 + }, + { + "ce_ib": 8.604825019836426, + "ce_orig": 0.9493278861045837, + "epoch": 0.1900927457042203, + "kl_loss": 0.1191963478922844, + "loss_ib": 0.0020524458959698677, + "step": 661 + }, + { + "ce_ib": 8.901871681213379, + "ce_orig": 0.6020686626434326, + "epoch": 0.1900927457042203, + "kl_loss": 0.1835116744041443, + "loss_ib": 0.002725303638726473, + "step": 661 + }, + { + "ce_ib": 4.333002090454102, + "ce_orig": 0.5005955100059509, + "epoch": 0.19038032928319792, + "kl_loss": 0.1462855488061905, + "loss_ib": 0.0018961557652801275, + "step": 662 + }, + { + "ce_ib": 10.557404518127441, + "ce_orig": 0.5266630053520203, + "epoch": 0.19038032928319792, + "kl_loss": 0.2895011901855469, + "loss_ib": 0.003950752317905426, + "step": 662 + }, + { + "ce_ib": 9.212621688842773, + "ce_orig": 0.936205267906189, + "epoch": 0.19038032928319792, + "kl_loss": 0.1948644369840622, + "loss_ib": 0.0028699063695967197, + "step": 662 + }, + { + "ce_ib": 8.389484405517578, + "ce_orig": 1.0228606462478638, + "epoch": 0.19038032928319792, + "kl_loss": 0.1212107315659523, + "loss_ib": 0.0020510556641966105, + "step": 662 + }, + { + "ce_ib": 7.66838264465332, + "ce_orig": 0.8763972520828247, + "epoch": 0.19066791286217558, + "kl_loss": 0.13818049430847168, + "loss_ib": 0.002148643136024475, + "step": 663 + }, + { + "ce_ib": 7.3809638023376465, + "ce_orig": 0.8760581612586975, + "epoch": 0.19066791286217558, + "kl_loss": 0.16190586984157562, + "loss_ib": 0.0023571550846099854, + "step": 663 + }, + { + "ce_ib": 12.378966331481934, + "ce_orig": 1.522248387336731, + "epoch": 0.19066791286217558, + "kl_loss": 0.27928757667541504, + "loss_ib": 0.0040307724848389626, + "step": 663 + }, + { + "ce_ib": 6.554606914520264, + "ce_orig": 0.6486589312553406, + "epoch": 0.19066791286217558, + "kl_loss": 0.16327285766601562, + "loss_ib": 0.0022881892509758472, + "step": 663 + }, + { + "ce_ib": 6.294197082519531, + "ce_orig": 0.5333172082901001, + "epoch": 0.1909554964411532, + "kl_loss": 0.10702653229236603, + "loss_ib": 0.0016996850026771426, + "step": 664 + }, + { + "ce_ib": 10.409896850585938, + "ce_orig": 1.274090051651001, + "epoch": 0.1909554964411532, + "kl_loss": 0.09604034572839737, + "loss_ib": 0.0020013931207358837, + "step": 664 + }, + { + "ce_ib": 6.934921741485596, + "ce_orig": 0.5752331614494324, + "epoch": 0.1909554964411532, + "kl_loss": 0.10759134590625763, + "loss_ib": 0.0017694055568426847, + "step": 664 + }, + { + "ce_ib": 8.656540870666504, + "ce_orig": 0.9213974475860596, + "epoch": 0.1909554964411532, + "kl_loss": 0.0822703018784523, + "loss_ib": 0.001688356976956129, + "step": 664 + }, + { + "epoch": 0.19124308002013085, + "grad_norm": 0.10957484692335129, + "learning_rate": 4.985675754429744e-05, + "loss": 0.8824, + "step": 665 + }, + { + "ce_ib": 7.545868873596191, + "ce_orig": 0.7198473811149597, + "epoch": 0.19124308002013085, + "kl_loss": 0.07544055581092834, + "loss_ib": 0.0015089923981577158, + "step": 665 + }, + { + "ce_ib": 5.871424674987793, + "ce_orig": 0.5256696939468384, + "epoch": 0.19124308002013085, + "kl_loss": 0.11832289397716522, + "loss_ib": 0.001770371338352561, + "step": 665 + }, + { + "ce_ib": 8.345460891723633, + "ce_orig": 0.8390303254127502, + "epoch": 0.19124308002013085, + "kl_loss": 0.13851025700569153, + "loss_ib": 0.002219648566097021, + "step": 665 + }, + { + "ce_ib": 10.029878616333008, + "ce_orig": 0.7570701837539673, + "epoch": 0.19124308002013085, + "kl_loss": 0.1960536539554596, + "loss_ib": 0.0029635243117809296, + "step": 665 + }, + { + "ce_ib": 12.230276107788086, + "ce_orig": 1.078134298324585, + "epoch": 0.1915306635991085, + "kl_loss": 0.17366138100624084, + "loss_ib": 0.0029596411623060703, + "step": 666 + }, + { + "ce_ib": 5.4964823722839355, + "ce_orig": 0.30615153908729553, + "epoch": 0.1915306635991085, + "kl_loss": 0.11125549674034119, + "loss_ib": 0.0016622032271698117, + "step": 666 + }, + { + "ce_ib": 11.66408634185791, + "ce_orig": 1.4393538236618042, + "epoch": 0.1915306635991085, + "kl_loss": 0.16732355952262878, + "loss_ib": 0.002839644206687808, + "step": 666 + }, + { + "ce_ib": 8.240169525146484, + "ce_orig": 0.8983179926872253, + "epoch": 0.1915306635991085, + "kl_loss": 0.13795308768749237, + "loss_ib": 0.0022035478614270687, + "step": 666 + }, + { + "ce_ib": 9.5233793258667, + "ce_orig": 1.325028657913208, + "epoch": 0.19181824717808613, + "kl_loss": 0.2247726023197174, + "loss_ib": 0.0032000639475882053, + "step": 667 + }, + { + "ce_ib": 7.075375556945801, + "ce_orig": 0.6608232855796814, + "epoch": 0.19181824717808613, + "kl_loss": 0.13226626813411713, + "loss_ib": 0.0020302000921219587, + "step": 667 + }, + { + "ce_ib": 12.767308235168457, + "ce_orig": 1.7473585605621338, + "epoch": 0.19181824717808613, + "kl_loss": 0.19319532811641693, + "loss_ib": 0.0032086840365082026, + "step": 667 + }, + { + "ce_ib": 13.47103500366211, + "ce_orig": 1.497598648071289, + "epoch": 0.19181824717808613, + "kl_loss": 0.14107322692871094, + "loss_ib": 0.002757835667580366, + "step": 667 + }, + { + "ce_ib": 8.687394142150879, + "ce_orig": 0.9108020663261414, + "epoch": 0.19210583075706378, + "kl_loss": 0.11285087466239929, + "loss_ib": 0.0019972482696175575, + "step": 668 + }, + { + "ce_ib": 9.960009574890137, + "ce_orig": 1.163490653038025, + "epoch": 0.19210583075706378, + "kl_loss": 0.13729822635650635, + "loss_ib": 0.0023689831141382456, + "step": 668 + }, + { + "ce_ib": 9.403281211853027, + "ce_orig": 0.7250685691833496, + "epoch": 0.19210583075706378, + "kl_loss": 0.18609736859798431, + "loss_ib": 0.002801301656290889, + "step": 668 + }, + { + "ce_ib": 7.637380599975586, + "ce_orig": 0.7920053601264954, + "epoch": 0.19210583075706378, + "kl_loss": 0.30280235409736633, + "loss_ib": 0.003791761351749301, + "step": 668 + }, + { + "ce_ib": 7.223052978515625, + "ce_orig": 0.5900572538375854, + "epoch": 0.1923934143360414, + "kl_loss": 0.11546684801578522, + "loss_ib": 0.0018769737798720598, + "step": 669 + }, + { + "ce_ib": 8.318660736083984, + "ce_orig": 0.772509753704071, + "epoch": 0.1923934143360414, + "kl_loss": 0.1792832911014557, + "loss_ib": 0.0026246989145874977, + "step": 669 + }, + { + "ce_ib": 5.634905815124512, + "ce_orig": 0.7107937932014465, + "epoch": 0.1923934143360414, + "kl_loss": 0.10171617567539215, + "loss_ib": 0.0015806523151695728, + "step": 669 + }, + { + "ce_ib": 9.892749786376953, + "ce_orig": 0.6184588670730591, + "epoch": 0.1923934143360414, + "kl_loss": 0.20480774343013763, + "loss_ib": 0.003037352580577135, + "step": 669 + }, + { + "epoch": 0.19268099791501905, + "grad_norm": 0.0848066508769989, + "learning_rate": 4.985257961733553e-05, + "loss": 0.8125, + "step": 670 + }, + { + "ce_ib": 6.28206205368042, + "ce_orig": 0.525664210319519, + "epoch": 0.19268099791501905, + "kl_loss": 0.21931828558444977, + "loss_ib": 0.0028213891200721264, + "step": 670 + }, + { + "ce_ib": 8.594695091247559, + "ce_orig": 1.0051332712173462, + "epoch": 0.19268099791501905, + "kl_loss": 0.20822405815124512, + "loss_ib": 0.002941709943115711, + "step": 670 + }, + { + "ce_ib": 6.8549370765686035, + "ce_orig": 0.5342727899551392, + "epoch": 0.19268099791501905, + "kl_loss": 0.15061280131340027, + "loss_ib": 0.0021916215773671865, + "step": 670 + }, + { + "ce_ib": 12.691079139709473, + "ce_orig": 1.5980875492095947, + "epoch": 0.19268099791501905, + "kl_loss": 0.18997497856616974, + "loss_ib": 0.003168857656419277, + "step": 670 + }, + { + "ce_ib": 9.243292808532715, + "ce_orig": 0.7765840291976929, + "epoch": 0.1929685814939967, + "kl_loss": 0.134027898311615, + "loss_ib": 0.0022646081633865833, + "step": 671 + }, + { + "ce_ib": 7.516940593719482, + "ce_orig": 0.7874254584312439, + "epoch": 0.1929685814939967, + "kl_loss": 0.17721888422966003, + "loss_ib": 0.0025238830130547285, + "step": 671 + }, + { + "ce_ib": 10.570497512817383, + "ce_orig": 1.05923593044281, + "epoch": 0.1929685814939967, + "kl_loss": 0.11501814424991608, + "loss_ib": 0.002207231242209673, + "step": 671 + }, + { + "ce_ib": 12.406679153442383, + "ce_orig": 1.1707953214645386, + "epoch": 0.1929685814939967, + "kl_loss": 0.22512602806091309, + "loss_ib": 0.0034919281024485826, + "step": 671 + }, + { + "ce_ib": 7.20285701751709, + "ce_orig": 0.9570423364639282, + "epoch": 0.19325616507297433, + "kl_loss": 0.17423811554908752, + "loss_ib": 0.0024626669473946095, + "step": 672 + }, + { + "ce_ib": 9.27402400970459, + "ce_orig": 0.7955689430236816, + "epoch": 0.19325616507297433, + "kl_loss": 0.20847541093826294, + "loss_ib": 0.0030121563468128443, + "step": 672 + }, + { + "ce_ib": 7.827235698699951, + "ce_orig": 0.6374052166938782, + "epoch": 0.19325616507297433, + "kl_loss": 0.18218199908733368, + "loss_ib": 0.002604543464258313, + "step": 672 + }, + { + "ce_ib": 9.849845886230469, + "ce_orig": 0.8688628673553467, + "epoch": 0.19325616507297433, + "kl_loss": 0.12743963301181793, + "loss_ib": 0.0022593808826059103, + "step": 672 + }, + { + "ce_ib": 7.564920425415039, + "ce_orig": 0.7611903548240662, + "epoch": 0.19354374865195198, + "kl_loss": 0.1504932940006256, + "loss_ib": 0.0022614249028265476, + "step": 673 + }, + { + "ce_ib": 9.132747650146484, + "ce_orig": 0.8816638588905334, + "epoch": 0.19354374865195198, + "kl_loss": 0.10706878453493118, + "loss_ib": 0.001983962720260024, + "step": 673 + }, + { + "ce_ib": 9.165678977966309, + "ce_orig": 0.9871057868003845, + "epoch": 0.19354374865195198, + "kl_loss": 0.5221493244171143, + "loss_ib": 0.006138061173260212, + "step": 673 + }, + { + "ce_ib": 11.87661361694336, + "ce_orig": 1.1763601303100586, + "epoch": 0.19354374865195198, + "kl_loss": 0.17384997010231018, + "loss_ib": 0.002926160814240575, + "step": 673 + }, + { + "ce_ib": 8.544137954711914, + "ce_orig": 0.5830659866333008, + "epoch": 0.1938313322309296, + "kl_loss": 0.2833155393600464, + "loss_ib": 0.003687569173052907, + "step": 674 + }, + { + "ce_ib": 5.531364440917969, + "ce_orig": 0.6243769526481628, + "epoch": 0.1938313322309296, + "kl_loss": 0.15741299092769623, + "loss_ib": 0.0021272662561386824, + "step": 674 + }, + { + "ce_ib": 8.059561729431152, + "ce_orig": 0.5852807760238647, + "epoch": 0.1938313322309296, + "kl_loss": 0.11351338028907776, + "loss_ib": 0.0019410898676142097, + "step": 674 + }, + { + "ce_ib": 4.790435314178467, + "ce_orig": 0.24659956991672516, + "epoch": 0.1938313322309296, + "kl_loss": 0.10444021970033646, + "loss_ib": 0.0015234457096084952, + "step": 674 + }, + { + "epoch": 0.19411891580990726, + "grad_norm": 0.0875202864408493, + "learning_rate": 4.984834181474093e-05, + "loss": 0.8311, + "step": 675 + }, + { + "ce_ib": 7.58983039855957, + "ce_orig": 0.4601333737373352, + "epoch": 0.19411891580990726, + "kl_loss": 0.30331283807754517, + "loss_ib": 0.0037921112962067127, + "step": 675 + }, + { + "ce_ib": 12.82439136505127, + "ce_orig": 1.5256766080856323, + "epoch": 0.19411891580990726, + "kl_loss": 0.13840830326080322, + "loss_ib": 0.0026665222831070423, + "step": 675 + }, + { + "ce_ib": 9.367082595825195, + "ce_orig": 0.7142963409423828, + "epoch": 0.19411891580990726, + "kl_loss": 0.11720157414674759, + "loss_ib": 0.002108723856508732, + "step": 675 + }, + { + "ce_ib": 8.170831680297852, + "ce_orig": 0.8159179091453552, + "epoch": 0.19411891580990726, + "kl_loss": 0.1447296142578125, + "loss_ib": 0.0022643792908638716, + "step": 675 + }, + { + "ce_ib": 7.056220054626465, + "ce_orig": 0.4294247627258301, + "epoch": 0.1944064993888849, + "kl_loss": 0.11858707666397095, + "loss_ib": 0.0018914927495643497, + "step": 676 + }, + { + "ce_ib": 6.837160110473633, + "ce_orig": 0.4128968417644501, + "epoch": 0.1944064993888849, + "kl_loss": 0.12182068079710007, + "loss_ib": 0.0019019227474927902, + "step": 676 + }, + { + "ce_ib": 6.321423530578613, + "ce_orig": 0.5308006405830383, + "epoch": 0.1944064993888849, + "kl_loss": 0.13625647127628326, + "loss_ib": 0.0019947069231420755, + "step": 676 + }, + { + "ce_ib": 8.114527702331543, + "ce_orig": 0.7963853478431702, + "epoch": 0.1944064993888849, + "kl_loss": 0.14647287130355835, + "loss_ib": 0.0022761814761906862, + "step": 676 + }, + { + "ce_ib": 6.881494998931885, + "ce_orig": 0.49466657638549805, + "epoch": 0.19469408296786253, + "kl_loss": 0.11938363313674927, + "loss_ib": 0.0018819858087226748, + "step": 677 + }, + { + "ce_ib": 11.58544921875, + "ce_orig": 0.9181330800056458, + "epoch": 0.19469408296786253, + "kl_loss": 0.21157556772232056, + "loss_ib": 0.0032743006013333797, + "step": 677 + }, + { + "ce_ib": 8.724087715148926, + "ce_orig": 0.7483495473861694, + "epoch": 0.19469408296786253, + "kl_loss": 0.24710629880428314, + "loss_ib": 0.0033434717915952206, + "step": 677 + }, + { + "ce_ib": 10.569550514221191, + "ce_orig": 1.092028260231018, + "epoch": 0.19469408296786253, + "kl_loss": 0.1760406792163849, + "loss_ib": 0.0028173618484288454, + "step": 677 + }, + { + "ce_ib": 9.511533737182617, + "ce_orig": 1.0603342056274414, + "epoch": 0.19498166654684018, + "kl_loss": 0.15394163131713867, + "loss_ib": 0.002490569604560733, + "step": 678 + }, + { + "ce_ib": 11.275010108947754, + "ce_orig": 0.7746595144271851, + "epoch": 0.19498166654684018, + "kl_loss": 0.15000641345977783, + "loss_ib": 0.0026275652926415205, + "step": 678 + }, + { + "ce_ib": 8.671473503112793, + "ce_orig": 1.1143134832382202, + "epoch": 0.19498166654684018, + "kl_loss": 0.10246935486793518, + "loss_ib": 0.0018918408313766122, + "step": 678 + }, + { + "ce_ib": 6.801086902618408, + "ce_orig": 0.7952865362167358, + "epoch": 0.19498166654684018, + "kl_loss": 0.21663819253444672, + "loss_ib": 0.00284649059176445, + "step": 678 + }, + { + "ce_ib": 3.812221050262451, + "ce_orig": 0.2622551918029785, + "epoch": 0.1952692501258178, + "kl_loss": 0.39648619294166565, + "loss_ib": 0.004346083849668503, + "step": 679 + }, + { + "ce_ib": 9.213861465454102, + "ce_orig": 0.8610155582427979, + "epoch": 0.1952692501258178, + "kl_loss": 0.14125597476959229, + "loss_ib": 0.0023339458275586367, + "step": 679 + }, + { + "ce_ib": 9.094021797180176, + "ce_orig": 0.7715781927108765, + "epoch": 0.1952692501258178, + "kl_loss": 0.16748535633087158, + "loss_ib": 0.0025842555332928896, + "step": 679 + }, + { + "ce_ib": 7.93590784072876, + "ce_orig": 0.555151104927063, + "epoch": 0.1952692501258178, + "kl_loss": 0.14854934811592102, + "loss_ib": 0.002279084175825119, + "step": 679 + }, + { + "epoch": 0.19555683370479546, + "grad_norm": 0.08938612043857574, + "learning_rate": 4.984404414672346e-05, + "loss": 0.8338, + "step": 680 + }, + { + "ce_ib": 5.116811752319336, + "ce_orig": 0.43150871992111206, + "epoch": 0.19555683370479546, + "kl_loss": 0.3063853979110718, + "loss_ib": 0.003575535025447607, + "step": 680 + }, + { + "ce_ib": 13.268800735473633, + "ce_orig": 1.4352538585662842, + "epoch": 0.19555683370479546, + "kl_loss": 0.17058660089969635, + "loss_ib": 0.0030327460262924433, + "step": 680 + }, + { + "ce_ib": 7.152599334716797, + "ce_orig": 0.5593920946121216, + "epoch": 0.19555683370479546, + "kl_loss": 0.09605440497398376, + "loss_ib": 0.0016758039128035307, + "step": 680 + }, + { + "ce_ib": 7.339946269989014, + "ce_orig": 0.482102632522583, + "epoch": 0.19555683370479546, + "kl_loss": 0.16910995543003082, + "loss_ib": 0.00242509413510561, + "step": 680 + }, + { + "ce_ib": 12.716341972351074, + "ce_orig": 1.5933265686035156, + "epoch": 0.1958444172837731, + "kl_loss": 0.19038161635398865, + "loss_ib": 0.0031754502560943365, + "step": 681 + }, + { + "ce_ib": 6.732904434204102, + "ce_orig": 0.5515825748443604, + "epoch": 0.1958444172837731, + "kl_loss": 0.0904855728149414, + "loss_ib": 0.0015781461261212826, + "step": 681 + }, + { + "ce_ib": 6.44156551361084, + "ce_orig": 0.720815122127533, + "epoch": 0.1958444172837731, + "kl_loss": 0.13166779279708862, + "loss_ib": 0.001960834488272667, + "step": 681 + }, + { + "ce_ib": 5.354030132293701, + "ce_orig": 0.7398959398269653, + "epoch": 0.1958444172837731, + "kl_loss": 0.07886020839214325, + "loss_ib": 0.0013240050757303834, + "step": 681 + }, + { + "ce_ib": 9.835302352905273, + "ce_orig": 0.5789269804954529, + "epoch": 0.19613200086275073, + "kl_loss": 0.12779417634010315, + "loss_ib": 0.0022614719346165657, + "step": 682 + }, + { + "ce_ib": 7.039217948913574, + "ce_orig": 0.9762406945228577, + "epoch": 0.19613200086275073, + "kl_loss": 0.07770641148090363, + "loss_ib": 0.0014809858985245228, + "step": 682 + }, + { + "ce_ib": 9.24921703338623, + "ce_orig": 0.9393744468688965, + "epoch": 0.19613200086275073, + "kl_loss": 0.1423812359571457, + "loss_ib": 0.002348734065890312, + "step": 682 + }, + { + "ce_ib": 10.166736602783203, + "ce_orig": 1.2005715370178223, + "epoch": 0.19613200086275073, + "kl_loss": 0.1362869143486023, + "loss_ib": 0.002379542915150523, + "step": 682 + }, + { + "ce_ib": 10.488746643066406, + "ce_orig": 1.114490032196045, + "epoch": 0.19641958444172838, + "kl_loss": 0.1684313416481018, + "loss_ib": 0.002733187982812524, + "step": 683 + }, + { + "ce_ib": 8.479165077209473, + "ce_orig": 1.0899877548217773, + "epoch": 0.19641958444172838, + "kl_loss": 0.17926128208637238, + "loss_ib": 0.002640529302880168, + "step": 683 + }, + { + "ce_ib": 7.596517562866211, + "ce_orig": 0.8943977355957031, + "epoch": 0.19641958444172838, + "kl_loss": 0.15933018922805786, + "loss_ib": 0.0023529534228146076, + "step": 683 + }, + { + "ce_ib": 12.160412788391113, + "ce_orig": 1.4548228979110718, + "epoch": 0.19641958444172838, + "kl_loss": 0.16472113132476807, + "loss_ib": 0.002863252302631736, + "step": 683 + }, + { + "ce_ib": 8.374470710754395, + "ce_orig": 0.7274401783943176, + "epoch": 0.196707168020706, + "kl_loss": 0.27311208844184875, + "loss_ib": 0.0035685678012669086, + "step": 684 + }, + { + "ce_ib": 8.078079223632812, + "ce_orig": 0.6676459908485413, + "epoch": 0.196707168020706, + "kl_loss": 0.210984006524086, + "loss_ib": 0.0029176478274166584, + "step": 684 + }, + { + "ce_ib": 8.356644630432129, + "ce_orig": 0.8512493371963501, + "epoch": 0.196707168020706, + "kl_loss": 0.12694287300109863, + "loss_ib": 0.0021050930954515934, + "step": 684 + }, + { + "ce_ib": 7.5951457023620605, + "ce_orig": 0.7244792580604553, + "epoch": 0.196707168020706, + "kl_loss": 0.08705386519432068, + "loss_ib": 0.0016300531569868326, + "step": 684 + }, + { + "epoch": 0.19699475159968366, + "grad_norm": 0.08720671385526657, + "learning_rate": 4.983968662363723e-05, + "loss": 0.8391, + "step": 685 + }, + { + "ce_ib": 10.551007270812988, + "ce_orig": 1.4780113697052002, + "epoch": 0.19699475159968366, + "kl_loss": 0.1139870285987854, + "loss_ib": 0.0021949708461761475, + "step": 685 + }, + { + "ce_ib": 11.811184883117676, + "ce_orig": 1.1642088890075684, + "epoch": 0.19699475159968366, + "kl_loss": 0.1284218281507492, + "loss_ib": 0.0024653368163853884, + "step": 685 + }, + { + "ce_ib": 9.679302215576172, + "ce_orig": 1.2183902263641357, + "epoch": 0.19699475159968366, + "kl_loss": 0.09832706302404404, + "loss_ib": 0.0019512007711455226, + "step": 685 + }, + { + "ce_ib": 11.119817733764648, + "ce_orig": 1.329836368560791, + "epoch": 0.19699475159968366, + "kl_loss": 0.15665268898010254, + "loss_ib": 0.002678508637472987, + "step": 685 + }, + { + "ce_ib": 6.385128974914551, + "ce_orig": 0.7773191332817078, + "epoch": 0.1972823351786613, + "kl_loss": 0.10081027448177338, + "loss_ib": 0.0016466155648231506, + "step": 686 + }, + { + "ce_ib": 9.239928245544434, + "ce_orig": 1.1652233600616455, + "epoch": 0.1972823351786613, + "kl_loss": 0.12052545696496964, + "loss_ib": 0.0021292471792548895, + "step": 686 + }, + { + "ce_ib": 7.90826416015625, + "ce_orig": 0.6415975093841553, + "epoch": 0.1972823351786613, + "kl_loss": 0.12905505299568176, + "loss_ib": 0.0020813769660890102, + "step": 686 + }, + { + "ce_ib": 7.7638397216796875, + "ce_orig": 1.3246078491210938, + "epoch": 0.1972823351786613, + "kl_loss": 0.10447216033935547, + "loss_ib": 0.0018211054848507047, + "step": 686 + }, + { + "ce_ib": 11.250504493713379, + "ce_orig": 1.2069780826568604, + "epoch": 0.19756991875763893, + "kl_loss": 0.0673050582408905, + "loss_ib": 0.0017981010023504496, + "step": 687 + }, + { + "ce_ib": 7.798498153686523, + "ce_orig": 0.8994898200035095, + "epoch": 0.19756991875763893, + "kl_loss": 0.11936768144369125, + "loss_ib": 0.0019735265523195267, + "step": 687 + }, + { + "ce_ib": 8.09422779083252, + "ce_orig": 1.0519702434539795, + "epoch": 0.19756991875763893, + "kl_loss": 0.17095480859279633, + "loss_ib": 0.002518970984965563, + "step": 687 + }, + { + "ce_ib": 11.222230911254883, + "ce_orig": 0.9225847125053406, + "epoch": 0.19756991875763893, + "kl_loss": 0.16072265803813934, + "loss_ib": 0.0027294494211673737, + "step": 687 + }, + { + "ce_ib": 7.485930919647217, + "ce_orig": 0.771294891834259, + "epoch": 0.1978575023366166, + "kl_loss": 0.13400742411613464, + "loss_ib": 0.0020886673592031, + "step": 688 + }, + { + "ce_ib": 7.196287631988525, + "ce_orig": 0.5827687382698059, + "epoch": 0.1978575023366166, + "kl_loss": 0.16821786761283875, + "loss_ib": 0.0024018073454499245, + "step": 688 + }, + { + "ce_ib": 11.187541007995605, + "ce_orig": 0.7954636812210083, + "epoch": 0.1978575023366166, + "kl_loss": 0.1373216211795807, + "loss_ib": 0.0024919703137129545, + "step": 688 + }, + { + "ce_ib": 14.164189338684082, + "ce_orig": 1.0269262790679932, + "epoch": 0.1978575023366166, + "kl_loss": 0.17505072057247162, + "loss_ib": 0.003166925860568881, + "step": 688 + }, + { + "ce_ib": 8.692153930664062, + "ce_orig": 0.7542197108268738, + "epoch": 0.1981450859155942, + "kl_loss": 0.13267484307289124, + "loss_ib": 0.002195963868871331, + "step": 689 + }, + { + "ce_ib": 7.100341796875, + "ce_orig": 0.8673600554466248, + "epoch": 0.1981450859155942, + "kl_loss": 0.17913147807121277, + "loss_ib": 0.0025013487320393324, + "step": 689 + }, + { + "ce_ib": 7.591789245605469, + "ce_orig": 0.6358543634414673, + "epoch": 0.1981450859155942, + "kl_loss": 0.1959693729877472, + "loss_ib": 0.0027188726235181093, + "step": 689 + }, + { + "ce_ib": 10.430386543273926, + "ce_orig": 1.0774213075637817, + "epoch": 0.1981450859155942, + "kl_loss": 0.1533883512020111, + "loss_ib": 0.0025769220665097237, + "step": 689 + }, + { + "epoch": 0.19843266949457186, + "grad_norm": 0.10669101029634476, + "learning_rate": 4.98352692559805e-05, + "loss": 0.8342, + "step": 690 + }, + { + "ce_ib": 8.840359687805176, + "ce_orig": 0.95009446144104, + "epoch": 0.19843266949457186, + "kl_loss": 0.26050540804862976, + "loss_ib": 0.0034890901297330856, + "step": 690 + }, + { + "ce_ib": 8.28864860534668, + "ce_orig": 0.6670023202896118, + "epoch": 0.19843266949457186, + "kl_loss": 0.2274959236383438, + "loss_ib": 0.0031038240995258093, + "step": 690 + }, + { + "ce_ib": 6.7845330238342285, + "ce_orig": 1.0058348178863525, + "epoch": 0.19843266949457186, + "kl_loss": 0.11773978173732758, + "loss_ib": 0.0018558510346338153, + "step": 690 + }, + { + "ce_ib": 11.961586952209473, + "ce_orig": 1.3419430255889893, + "epoch": 0.19843266949457186, + "kl_loss": 0.1497437208890915, + "loss_ib": 0.0026935958303511143, + "step": 690 + }, + { + "ce_ib": 8.472604751586914, + "ce_orig": 0.4991995096206665, + "epoch": 0.1987202530735495, + "kl_loss": 0.20153102278709412, + "loss_ib": 0.0028625705745071173, + "step": 691 + }, + { + "ce_ib": 9.45351791381836, + "ce_orig": 1.0036801099777222, + "epoch": 0.1987202530735495, + "kl_loss": 0.1353033483028412, + "loss_ib": 0.002298385137692094, + "step": 691 + }, + { + "ce_ib": 7.785329341888428, + "ce_orig": 0.7653356790542603, + "epoch": 0.1987202530735495, + "kl_loss": 0.15528883039951324, + "loss_ib": 0.0023314212448894978, + "step": 691 + }, + { + "ce_ib": 5.879989147186279, + "ce_orig": 0.363296777009964, + "epoch": 0.1987202530735495, + "kl_loss": 0.13487836718559265, + "loss_ib": 0.001936782500706613, + "step": 691 + }, + { + "ce_ib": 11.383331298828125, + "ce_orig": 1.4687296152114868, + "epoch": 0.19900783665252714, + "kl_loss": 0.13896168768405914, + "loss_ib": 0.0025279498659074306, + "step": 692 + }, + { + "ce_ib": 9.530930519104004, + "ce_orig": 1.2369105815887451, + "epoch": 0.19900783665252714, + "kl_loss": 0.14428474009037018, + "loss_ib": 0.0023959404788911343, + "step": 692 + }, + { + "ce_ib": 8.91688346862793, + "ce_orig": 1.2907278537750244, + "epoch": 0.19900783665252714, + "kl_loss": 0.1218222826719284, + "loss_ib": 0.002109911059960723, + "step": 692 + }, + { + "ce_ib": 6.032763957977295, + "ce_orig": 0.49460887908935547, + "epoch": 0.19900783665252714, + "kl_loss": 0.16958531737327576, + "loss_ib": 0.0022991294972598553, + "step": 692 + }, + { + "ce_ib": 8.140901565551758, + "ce_orig": 1.1833499670028687, + "epoch": 0.1992954202315048, + "kl_loss": 0.08373439311981201, + "loss_ib": 0.0016514339949935675, + "step": 693 + }, + { + "ce_ib": 6.6644673347473145, + "ce_orig": 0.7210538387298584, + "epoch": 0.1992954202315048, + "kl_loss": 0.135453999042511, + "loss_ib": 0.002020986517891288, + "step": 693 + }, + { + "ce_ib": 9.980579376220703, + "ce_orig": 1.094584345817566, + "epoch": 0.1992954202315048, + "kl_loss": 0.1299094706773758, + "loss_ib": 0.0022971525322645903, + "step": 693 + }, + { + "ce_ib": 9.847793579101562, + "ce_orig": 0.7237510681152344, + "epoch": 0.1992954202315048, + "kl_loss": 0.26058220863342285, + "loss_ib": 0.0035906012635678053, + "step": 693 + }, + { + "ce_ib": 7.582713603973389, + "ce_orig": 0.5019081234931946, + "epoch": 0.1995830038104824, + "kl_loss": 0.16329146921634674, + "loss_ib": 0.0023911860771477222, + "step": 694 + }, + { + "ce_ib": 10.57071304321289, + "ce_orig": 1.4766757488250732, + "epoch": 0.1995830038104824, + "kl_loss": 0.15818722546100616, + "loss_ib": 0.0026389434933662415, + "step": 694 + }, + { + "ce_ib": 7.770899772644043, + "ce_orig": 0.9018514156341553, + "epoch": 0.1995830038104824, + "kl_loss": 0.08791860938072205, + "loss_ib": 0.0016562759410589933, + "step": 694 + }, + { + "ce_ib": 6.5115766525268555, + "ce_orig": 0.567577600479126, + "epoch": 0.1995830038104824, + "kl_loss": 0.19944868981838226, + "loss_ib": 0.0026456445921212435, + "step": 694 + }, + { + "epoch": 0.19987058738946006, + "grad_norm": 0.09094507992267609, + "learning_rate": 4.983079205439574e-05, + "loss": 0.8932, + "step": 695 + }, + { + "ce_ib": 8.481407165527344, + "ce_orig": 0.986234724521637, + "epoch": 0.19987058738946006, + "kl_loss": 0.2988152503967285, + "loss_ib": 0.003836293239146471, + "step": 695 + }, + { + "ce_ib": 10.597221374511719, + "ce_orig": 1.2706291675567627, + "epoch": 0.19987058738946006, + "kl_loss": 0.13932810723781586, + "loss_ib": 0.002453003078699112, + "step": 695 + }, + { + "ce_ib": 9.393781661987305, + "ce_orig": 0.7206296324729919, + "epoch": 0.19987058738946006, + "kl_loss": 0.12034575641155243, + "loss_ib": 0.0021428356412798166, + "step": 695 + }, + { + "ce_ib": 8.420135498046875, + "ce_orig": 0.6943502426147461, + "epoch": 0.19987058738946006, + "kl_loss": 0.16418200731277466, + "loss_ib": 0.0024838335812091827, + "step": 695 + }, + { + "ce_ib": 9.8673095703125, + "ce_orig": 0.5838234424591064, + "epoch": 0.20015817096843772, + "kl_loss": 0.23933324217796326, + "loss_ib": 0.0033800629898905754, + "step": 696 + }, + { + "ce_ib": 7.822383880615234, + "ce_orig": 1.0413511991500854, + "epoch": 0.20015817096843772, + "kl_loss": 0.08770239353179932, + "loss_ib": 0.0016592623433098197, + "step": 696 + }, + { + "ce_ib": 5.614956855773926, + "ce_orig": 0.6229404211044312, + "epoch": 0.20015817096843772, + "kl_loss": 0.15624278783798218, + "loss_ib": 0.0021239235065877438, + "step": 696 + }, + { + "ce_ib": 7.636229038238525, + "ce_orig": 0.7482424378395081, + "epoch": 0.20015817096843772, + "kl_loss": 0.2800358831882477, + "loss_ib": 0.0035639815032482147, + "step": 696 + }, + { + "ce_ib": 13.990978240966797, + "ce_orig": 1.7175135612487793, + "epoch": 0.20044575454741534, + "kl_loss": 0.20113369822502136, + "loss_ib": 0.0034104345832020044, + "step": 697 + }, + { + "ce_ib": 5.339763641357422, + "ce_orig": 0.4271097779273987, + "epoch": 0.20044575454741534, + "kl_loss": 0.20965386927127838, + "loss_ib": 0.0026305150240659714, + "step": 697 + }, + { + "ce_ib": 8.591266632080078, + "ce_orig": 0.9749876260757446, + "epoch": 0.20044575454741534, + "kl_loss": 0.14603781700134277, + "loss_ib": 0.0023195049725472927, + "step": 697 + }, + { + "ce_ib": 11.095532417297363, + "ce_orig": 0.9422957301139832, + "epoch": 0.20044575454741534, + "kl_loss": 0.18376457691192627, + "loss_ib": 0.002947198925539851, + "step": 697 + }, + { + "ce_ib": 10.03927230834961, + "ce_orig": 1.1190671920776367, + "epoch": 0.200733338126393, + "kl_loss": 0.1347736120223999, + "loss_ib": 0.0023516633082181215, + "step": 698 + }, + { + "ce_ib": 9.63382625579834, + "ce_orig": 0.9594607353210449, + "epoch": 0.200733338126393, + "kl_loss": 0.1937919557094574, + "loss_ib": 0.0029013019520789385, + "step": 698 + }, + { + "ce_ib": 10.83333683013916, + "ce_orig": 0.5495674014091492, + "epoch": 0.200733338126393, + "kl_loss": 0.21553227305412292, + "loss_ib": 0.003238656558096409, + "step": 698 + }, + { + "ce_ib": 5.4021687507629395, + "ce_orig": 0.49759823083877563, + "epoch": 0.200733338126393, + "kl_loss": 0.11563403159379959, + "loss_ib": 0.0016965570393949747, + "step": 698 + }, + { + "ce_ib": 12.474681854248047, + "ce_orig": 0.9798773527145386, + "epoch": 0.20102092170537061, + "kl_loss": 0.19496232271194458, + "loss_ib": 0.0031970911659300327, + "step": 699 + }, + { + "ce_ib": 4.9287333488464355, + "ce_orig": 0.5070614814758301, + "epoch": 0.20102092170537061, + "kl_loss": 0.11845901608467102, + "loss_ib": 0.0016774634132161736, + "step": 699 + }, + { + "ce_ib": 6.752041816711426, + "ce_orig": 0.7083439826965332, + "epoch": 0.20102092170537061, + "kl_loss": 0.13689608871936798, + "loss_ib": 0.002044165041297674, + "step": 699 + }, + { + "ce_ib": 8.316426277160645, + "ce_orig": 0.6727461218833923, + "epoch": 0.20102092170537061, + "kl_loss": 0.1803036332130432, + "loss_ib": 0.002634678967297077, + "step": 699 + }, + { + "epoch": 0.20130850528434827, + "grad_norm": 0.09928663074970245, + "learning_rate": 4.9826255029669577e-05, + "loss": 0.8094, + "step": 700 + }, + { + "ce_ib": 5.444150924682617, + "ce_orig": 0.5673038363456726, + "epoch": 0.20130850528434827, + "kl_loss": 0.09558887034654617, + "loss_ib": 0.0015003037406131625, + "step": 700 + }, + { + "ce_ib": 8.76500129699707, + "ce_orig": 0.9259908199310303, + "epoch": 0.20130850528434827, + "kl_loss": 0.1608695387840271, + "loss_ib": 0.0024851954076439142, + "step": 700 + }, + { + "ce_ib": 10.177801132202148, + "ce_orig": 1.1493618488311768, + "epoch": 0.20130850528434827, + "kl_loss": 0.21496880054473877, + "loss_ib": 0.0031674678903073072, + "step": 700 + }, + { + "ce_ib": 8.045758247375488, + "ce_orig": 1.1165003776550293, + "epoch": 0.20130850528434827, + "kl_loss": 0.14270451664924622, + "loss_ib": 0.0022316209506243467, + "step": 700 + }, + { + "ce_ib": 7.650059223175049, + "ce_orig": 1.008382797241211, + "epoch": 0.20159608886332592, + "kl_loss": 0.12849614024162292, + "loss_ib": 0.0020499674137681723, + "step": 701 + }, + { + "ce_ib": 7.221034526824951, + "ce_orig": 0.6420146226882935, + "epoch": 0.20159608886332592, + "kl_loss": 0.09210291504859924, + "loss_ib": 0.0016431325348094106, + "step": 701 + }, + { + "ce_ib": 11.927567481994629, + "ce_orig": 1.6603496074676514, + "epoch": 0.20159608886332592, + "kl_loss": 0.15888632833957672, + "loss_ib": 0.00278162001632154, + "step": 701 + }, + { + "ce_ib": 8.415051460266113, + "ce_orig": 1.0107392072677612, + "epoch": 0.20159608886332592, + "kl_loss": 0.15115907788276672, + "loss_ib": 0.002353095915168524, + "step": 701 + }, + { + "ce_ib": 11.7966947555542, + "ce_orig": 0.8676571846008301, + "epoch": 0.20188367244230354, + "kl_loss": 0.13981035351753235, + "loss_ib": 0.0025777730625122786, + "step": 702 + }, + { + "ce_ib": 9.650755882263184, + "ce_orig": 1.0907669067382812, + "epoch": 0.20188367244230354, + "kl_loss": 0.21402806043624878, + "loss_ib": 0.0031053561251610518, + "step": 702 + }, + { + "ce_ib": 10.13062572479248, + "ce_orig": 0.6553956270217896, + "epoch": 0.20188367244230354, + "kl_loss": 0.16557368636131287, + "loss_ib": 0.002668799366801977, + "step": 702 + }, + { + "ce_ib": 7.759829521179199, + "ce_orig": 0.5733931660652161, + "epoch": 0.20188367244230354, + "kl_loss": 0.14907249808311462, + "loss_ib": 0.0022667080629616976, + "step": 702 + }, + { + "ce_ib": 11.757640838623047, + "ce_orig": 1.1416308879852295, + "epoch": 0.2021712560212812, + "kl_loss": 0.23711419105529785, + "loss_ib": 0.0035469059366732836, + "step": 703 + }, + { + "ce_ib": 6.699312210083008, + "ce_orig": 0.5058441758155823, + "epoch": 0.2021712560212812, + "kl_loss": 0.11143745481967926, + "loss_ib": 0.001784305670298636, + "step": 703 + }, + { + "ce_ib": 8.576489448547363, + "ce_orig": 0.918267011642456, + "epoch": 0.2021712560212812, + "kl_loss": 0.18565413355827332, + "loss_ib": 0.0027141901664435863, + "step": 703 + }, + { + "ce_ib": 9.381869316101074, + "ce_orig": 1.0155972242355347, + "epoch": 0.2021712560212812, + "kl_loss": 0.21349982917308807, + "loss_ib": 0.003073184983804822, + "step": 703 + }, + { + "ce_ib": 13.443440437316895, + "ce_orig": 1.5300159454345703, + "epoch": 0.20245883960025882, + "kl_loss": 0.1552499383687973, + "loss_ib": 0.0028968434780836105, + "step": 704 + }, + { + "ce_ib": 9.67573356628418, + "ce_orig": 0.6580417156219482, + "epoch": 0.20245883960025882, + "kl_loss": 0.23093733191490173, + "loss_ib": 0.0032769464887678623, + "step": 704 + }, + { + "ce_ib": 4.894504070281982, + "ce_orig": 0.5909029841423035, + "epoch": 0.20245883960025882, + "kl_loss": 0.0857822597026825, + "loss_ib": 0.0013472730061039329, + "step": 704 + }, + { + "ce_ib": 9.48438549041748, + "ce_orig": 1.0949956178665161, + "epoch": 0.20245883960025882, + "kl_loss": 0.1445428431034088, + "loss_ib": 0.002393866889178753, + "step": 704 + }, + { + "epoch": 0.20274642317923647, + "grad_norm": 0.08958058804273605, + "learning_rate": 4.982165819273275e-05, + "loss": 0.8698, + "step": 705 + }, + { + "ce_ib": 8.077401161193848, + "ce_orig": 0.7715499401092529, + "epoch": 0.20274642317923647, + "kl_loss": 0.34654679894447327, + "loss_ib": 0.004273207858204842, + "step": 705 + }, + { + "ce_ib": 6.711842060089111, + "ce_orig": 0.5991529226303101, + "epoch": 0.20274642317923647, + "kl_loss": 0.12920328974723816, + "loss_ib": 0.001963217044249177, + "step": 705 + }, + { + "ce_ib": 8.242565155029297, + "ce_orig": 0.6560894846916199, + "epoch": 0.20274642317923647, + "kl_loss": 0.1585426926612854, + "loss_ib": 0.002409683307632804, + "step": 705 + }, + { + "ce_ib": 7.23456335067749, + "ce_orig": 0.7808642387390137, + "epoch": 0.20274642317923647, + "kl_loss": 0.10972153395414352, + "loss_ib": 0.0018206714885309339, + "step": 705 + }, + { + "ce_ib": 7.769526958465576, + "ce_orig": 1.1062254905700684, + "epoch": 0.20303400675821412, + "kl_loss": 0.07121115922927856, + "loss_ib": 0.0014890641905367374, + "step": 706 + }, + { + "ce_ib": 9.15722370147705, + "ce_orig": 0.7969109416007996, + "epoch": 0.20303400675821412, + "kl_loss": 0.154433012008667, + "loss_ib": 0.0024600522592663765, + "step": 706 + }, + { + "ce_ib": 6.588254451751709, + "ce_orig": 0.43084779381752014, + "epoch": 0.20303400675821412, + "kl_loss": 0.07921120524406433, + "loss_ib": 0.0014509373577311635, + "step": 706 + }, + { + "ce_ib": 8.812955856323242, + "ce_orig": 0.728909969329834, + "epoch": 0.20303400675821412, + "kl_loss": 0.1336958110332489, + "loss_ib": 0.0022182536777108908, + "step": 706 + }, + { + "ce_ib": 13.082417488098145, + "ce_orig": 1.2153578996658325, + "epoch": 0.20332159033719174, + "kl_loss": 0.1605956256389618, + "loss_ib": 0.002914197975769639, + "step": 707 + }, + { + "ce_ib": 10.410371780395508, + "ce_orig": 1.198205590248108, + "epoch": 0.20332159033719174, + "kl_loss": 0.08825291693210602, + "loss_ib": 0.0019235662184655666, + "step": 707 + }, + { + "ce_ib": 11.29565715789795, + "ce_orig": 1.242211103439331, + "epoch": 0.20332159033719174, + "kl_loss": 0.19409918785095215, + "loss_ib": 0.0030705577228218317, + "step": 707 + }, + { + "ce_ib": 8.208379745483398, + "ce_orig": 0.872199296951294, + "epoch": 0.20332159033719174, + "kl_loss": 0.1233471930027008, + "loss_ib": 0.002054309705272317, + "step": 707 + }, + { + "ce_ib": 7.685192584991455, + "ce_orig": 0.9802998304367065, + "epoch": 0.2036091739161694, + "kl_loss": 0.10023842006921768, + "loss_ib": 0.001770903472788632, + "step": 708 + }, + { + "ce_ib": 7.994464874267578, + "ce_orig": 0.7009384632110596, + "epoch": 0.2036091739161694, + "kl_loss": 0.1141389012336731, + "loss_ib": 0.001940835383720696, + "step": 708 + }, + { + "ce_ib": 8.558679580688477, + "ce_orig": 0.6219755411148071, + "epoch": 0.2036091739161694, + "kl_loss": 0.20445235073566437, + "loss_ib": 0.002900391351431608, + "step": 708 + }, + { + "ce_ib": 4.708154678344727, + "ce_orig": 0.8312263488769531, + "epoch": 0.2036091739161694, + "kl_loss": 0.07935391366481781, + "loss_ib": 0.001264354563318193, + "step": 708 + }, + { + "ce_ib": 17.41877555847168, + "ce_orig": 2.1723456382751465, + "epoch": 0.20389675749514702, + "kl_loss": 0.20338605344295502, + "loss_ib": 0.003775737714022398, + "step": 709 + }, + { + "ce_ib": 7.36018705368042, + "ce_orig": 0.9659443497657776, + "epoch": 0.20389675749514702, + "kl_loss": 0.18725061416625977, + "loss_ib": 0.0026085248682647943, + "step": 709 + }, + { + "ce_ib": 9.507402420043945, + "ce_orig": 1.092591404914856, + "epoch": 0.20389675749514702, + "kl_loss": 0.15672272443771362, + "loss_ib": 0.0025179674848914146, + "step": 709 + }, + { + "ce_ib": 9.41507625579834, + "ce_orig": 1.0811327695846558, + "epoch": 0.20389675749514702, + "kl_loss": 0.1266251802444458, + "loss_ib": 0.00220775930210948, + "step": 709 + }, + { + "epoch": 0.20418434107412467, + "grad_norm": 0.11347930133342743, + "learning_rate": 4.98170015546601e-05, + "loss": 0.8764, + "step": 710 + }, + { + "ce_ib": 11.422625541687012, + "ce_orig": 0.9313300251960754, + "epoch": 0.20418434107412467, + "kl_loss": 0.13721315562725067, + "loss_ib": 0.0025143937673419714, + "step": 710 + }, + { + "ce_ib": 6.7661237716674805, + "ce_orig": 0.7199991941452026, + "epoch": 0.20418434107412467, + "kl_loss": 0.17893491685390472, + "loss_ib": 0.0024659615010023117, + "step": 710 + }, + { + "ce_ib": 5.129522323608398, + "ce_orig": 0.4815012812614441, + "epoch": 0.20418434107412467, + "kl_loss": 0.17363528907299042, + "loss_ib": 0.002249305136501789, + "step": 710 + }, + { + "ce_ib": 6.5166168212890625, + "ce_orig": 0.7667418718338013, + "epoch": 0.20418434107412467, + "kl_loss": 0.0964181199669838, + "loss_ib": 0.001615842804312706, + "step": 710 + }, + { + "ce_ib": 5.599911212921143, + "ce_orig": 0.5180696249008179, + "epoch": 0.20447192465310232, + "kl_loss": 0.13735494017601013, + "loss_ib": 0.0019335405668243766, + "step": 711 + }, + { + "ce_ib": 13.007081031799316, + "ce_orig": 1.5647224187850952, + "epoch": 0.20447192465310232, + "kl_loss": 0.12819761037826538, + "loss_ib": 0.0025826841592788696, + "step": 711 + }, + { + "ce_ib": 11.807796478271484, + "ce_orig": 1.4334594011306763, + "epoch": 0.20447192465310232, + "kl_loss": 0.17374610900878906, + "loss_ib": 0.002918240614235401, + "step": 711 + }, + { + "ce_ib": 7.3837714195251465, + "ce_orig": 0.7142208814620972, + "epoch": 0.20447192465310232, + "kl_loss": 0.1720019280910492, + "loss_ib": 0.00245839636772871, + "step": 711 + }, + { + "ce_ib": 7.580549716949463, + "ce_orig": 0.7085816264152527, + "epoch": 0.20475950823207995, + "kl_loss": 0.14903539419174194, + "loss_ib": 0.0022484087385237217, + "step": 712 + }, + { + "ce_ib": 5.8234477043151855, + "ce_orig": 0.5772075057029724, + "epoch": 0.20475950823207995, + "kl_loss": 0.13270236551761627, + "loss_ib": 0.0019093683222308755, + "step": 712 + }, + { + "ce_ib": 6.38820743560791, + "ce_orig": 0.6470109224319458, + "epoch": 0.20475950823207995, + "kl_loss": 0.10315969586372375, + "loss_ib": 0.001670417725108564, + "step": 712 + }, + { + "ce_ib": 8.948882102966309, + "ce_orig": 0.9020084738731384, + "epoch": 0.20475950823207995, + "kl_loss": 0.18468233942985535, + "loss_ib": 0.0027417116798460484, + "step": 712 + }, + { + "ce_ib": 8.562841415405273, + "ce_orig": 0.8737965226173401, + "epoch": 0.2050470918110576, + "kl_loss": 0.1370054930448532, + "loss_ib": 0.002226338954642415, + "step": 713 + }, + { + "ce_ib": 8.57533073425293, + "ce_orig": 1.3104912042617798, + "epoch": 0.2050470918110576, + "kl_loss": 0.18775838613510132, + "loss_ib": 0.002735116984695196, + "step": 713 + }, + { + "ce_ib": 6.767639636993408, + "ce_orig": 0.7026990056037903, + "epoch": 0.2050470918110576, + "kl_loss": 0.1764390766620636, + "loss_ib": 0.00244115456007421, + "step": 713 + }, + { + "ce_ib": 10.467775344848633, + "ce_orig": 0.9951248168945312, + "epoch": 0.2050470918110576, + "kl_loss": 0.14137201011180878, + "loss_ib": 0.0024604976642876863, + "step": 713 + }, + { + "ce_ib": 12.358627319335938, + "ce_orig": 1.6978172063827515, + "epoch": 0.20533467539003522, + "kl_loss": 0.13740137219429016, + "loss_ib": 0.002609876450151205, + "step": 714 + }, + { + "ce_ib": 9.153459548950195, + "ce_orig": 0.7250450849533081, + "epoch": 0.20533467539003522, + "kl_loss": 0.14596766233444214, + "loss_ib": 0.0023750225082039833, + "step": 714 + }, + { + "ce_ib": 6.154027462005615, + "ce_orig": 0.6910421848297119, + "epoch": 0.20533467539003522, + "kl_loss": 0.1430780291557312, + "loss_ib": 0.0020461829844862223, + "step": 714 + }, + { + "ce_ib": 8.212672233581543, + "ce_orig": 0.8080992698669434, + "epoch": 0.20533467539003522, + "kl_loss": 0.15016669034957886, + "loss_ib": 0.002322934102267027, + "step": 714 + }, + { + "epoch": 0.20562225896901287, + "grad_norm": 0.09610775858163834, + "learning_rate": 4.981228512667057e-05, + "loss": 0.8553, + "step": 715 + }, + { + "ce_ib": 7.444067478179932, + "ce_orig": 0.8634759783744812, + "epoch": 0.20562225896901287, + "kl_loss": 0.11479885876178741, + "loss_ib": 0.0018923953175544739, + "step": 715 + }, + { + "ce_ib": 6.535699367523193, + "ce_orig": 0.7095260620117188, + "epoch": 0.20562225896901287, + "kl_loss": 0.08906295150518417, + "loss_ib": 0.0015441994182765484, + "step": 715 + }, + { + "ce_ib": 9.0155029296875, + "ce_orig": 0.5211726427078247, + "epoch": 0.20562225896901287, + "kl_loss": 0.2069450318813324, + "loss_ib": 0.0029710005037486553, + "step": 715 + }, + { + "ce_ib": 7.391125679016113, + "ce_orig": 1.0465866327285767, + "epoch": 0.20562225896901287, + "kl_loss": 0.14382626116275787, + "loss_ib": 0.0021773751359432936, + "step": 715 + }, + { + "ce_ib": 5.707086086273193, + "ce_orig": 0.7374852895736694, + "epoch": 0.2059098425479905, + "kl_loss": 0.1332034170627594, + "loss_ib": 0.0019027426606044173, + "step": 716 + }, + { + "ce_ib": 5.903783798217773, + "ce_orig": 0.7245094180107117, + "epoch": 0.2059098425479905, + "kl_loss": 0.09125680476427078, + "loss_ib": 0.0015029464848339558, + "step": 716 + }, + { + "ce_ib": 9.94698715209961, + "ce_orig": 1.195637583732605, + "epoch": 0.2059098425479905, + "kl_loss": 0.13582003116607666, + "loss_ib": 0.0023528989404439926, + "step": 716 + }, + { + "ce_ib": 7.605693340301514, + "ce_orig": 0.8307034969329834, + "epoch": 0.2059098425479905, + "kl_loss": 0.09922278672456741, + "loss_ib": 0.001752797164954245, + "step": 716 + }, + { + "ce_ib": 9.15215015411377, + "ce_orig": 0.9594472646713257, + "epoch": 0.20619742612696815, + "kl_loss": 0.21580404043197632, + "loss_ib": 0.0030732552986592054, + "step": 717 + }, + { + "ce_ib": 8.207721710205078, + "ce_orig": 0.5845073461532593, + "epoch": 0.20619742612696815, + "kl_loss": 0.3212279975414276, + "loss_ib": 0.004033051896840334, + "step": 717 + }, + { + "ce_ib": 12.13454818725586, + "ce_orig": 1.3720506429672241, + "epoch": 0.20619742612696815, + "kl_loss": 0.17790672183036804, + "loss_ib": 0.002992521971464157, + "step": 717 + }, + { + "ce_ib": 4.388705253601074, + "ce_orig": 0.47980645298957825, + "epoch": 0.20619742612696815, + "kl_loss": 0.12024472653865814, + "loss_ib": 0.0016413177363574505, + "step": 717 + }, + { + "ce_ib": 11.236103057861328, + "ce_orig": 0.76035475730896, + "epoch": 0.2064850097059458, + "kl_loss": 0.1305321604013443, + "loss_ib": 0.0024289318826049566, + "step": 718 + }, + { + "ce_ib": 9.612162590026855, + "ce_orig": 0.898193895816803, + "epoch": 0.2064850097059458, + "kl_loss": 0.15393592417240143, + "loss_ib": 0.002500575501471758, + "step": 718 + }, + { + "ce_ib": 9.815557479858398, + "ce_orig": 1.1573611497879028, + "epoch": 0.2064850097059458, + "kl_loss": 0.23672373592853546, + "loss_ib": 0.003348792903125286, + "step": 718 + }, + { + "ce_ib": 8.44405746459961, + "ce_orig": 0.4511740803718567, + "epoch": 0.2064850097059458, + "kl_loss": 0.21269428730010986, + "loss_ib": 0.002971348585560918, + "step": 718 + }, + { + "ce_ib": 12.143657684326172, + "ce_orig": 1.3069207668304443, + "epoch": 0.20677259328492342, + "kl_loss": 0.13651582598686218, + "loss_ib": 0.0025795239489525557, + "step": 719 + }, + { + "ce_ib": 11.6182279586792, + "ce_orig": 1.6872526407241821, + "epoch": 0.20677259328492342, + "kl_loss": 0.14566782116889954, + "loss_ib": 0.002618500730022788, + "step": 719 + }, + { + "ce_ib": 5.154110431671143, + "ce_orig": 0.5514587759971619, + "epoch": 0.20677259328492342, + "kl_loss": 0.0836012214422226, + "loss_ib": 0.001351423212327063, + "step": 719 + }, + { + "ce_ib": 9.787772178649902, + "ce_orig": 0.9540395736694336, + "epoch": 0.20677259328492342, + "kl_loss": 0.1440960168838501, + "loss_ib": 0.0024197371676564217, + "step": 719 + }, + { + "epoch": 0.20706017686390107, + "grad_norm": 0.09736236929893494, + "learning_rate": 4.980750892012711e-05, + "loss": 0.8556, + "step": 720 + }, + { + "ce_ib": 6.094231605529785, + "ce_orig": 0.432678759098053, + "epoch": 0.20706017686390107, + "kl_loss": 0.07415103167295456, + "loss_ib": 0.0013509334530681372, + "step": 720 + }, + { + "ce_ib": 11.028846740722656, + "ce_orig": 1.3781291246414185, + "epoch": 0.20706017686390107, + "kl_loss": 0.13459554314613342, + "loss_ib": 0.0024488400667905807, + "step": 720 + }, + { + "ce_ib": 7.907367706298828, + "ce_orig": 0.524237871170044, + "epoch": 0.20706017686390107, + "kl_loss": 0.11885979026556015, + "loss_ib": 0.0019793345127254725, + "step": 720 + }, + { + "ce_ib": 6.396885395050049, + "ce_orig": 0.748169481754303, + "epoch": 0.20706017686390107, + "kl_loss": 0.10846064984798431, + "loss_ib": 0.001724294968880713, + "step": 720 + }, + { + "ce_ib": 7.320477485656738, + "ce_orig": 0.5779712796211243, + "epoch": 0.2073477604428787, + "kl_loss": 0.1938854306936264, + "loss_ib": 0.0026709020603448153, + "step": 721 + }, + { + "ce_ib": 8.144287109375, + "ce_orig": 0.6583978533744812, + "epoch": 0.2073477604428787, + "kl_loss": 0.1584765613079071, + "loss_ib": 0.002399194287136197, + "step": 721 + }, + { + "ce_ib": 8.83871078491211, + "ce_orig": 1.5272163152694702, + "epoch": 0.2073477604428787, + "kl_loss": 0.12005112320184708, + "loss_ib": 0.0020843823440372944, + "step": 721 + }, + { + "ce_ib": 11.102349281311035, + "ce_orig": 1.1282752752304077, + "epoch": 0.2073477604428787, + "kl_loss": 0.14990922808647156, + "loss_ib": 0.002609326969832182, + "step": 721 + }, + { + "ce_ib": 6.203333854675293, + "ce_orig": 0.46742182970046997, + "epoch": 0.20763534402185635, + "kl_loss": 0.1615336537361145, + "loss_ib": 0.0022356698755174875, + "step": 722 + }, + { + "ce_ib": 7.080376148223877, + "ce_orig": 0.8276212215423584, + "epoch": 0.20763534402185635, + "kl_loss": 0.0906413346529007, + "loss_ib": 0.0016144509427249432, + "step": 722 + }, + { + "ce_ib": 4.1770477294921875, + "ce_orig": 0.48553168773651123, + "epoch": 0.20763534402185635, + "kl_loss": 0.08101706951856613, + "loss_ib": 0.0012278754729777575, + "step": 722 + }, + { + "ce_ib": 4.8663129806518555, + "ce_orig": 0.35782018303871155, + "epoch": 0.20763534402185635, + "kl_loss": 0.10124047100543976, + "loss_ib": 0.0014990359777584672, + "step": 722 + }, + { + "ce_ib": 6.2184247970581055, + "ce_orig": 0.5856457948684692, + "epoch": 0.207922927600834, + "kl_loss": 0.10668499767780304, + "loss_ib": 0.0016886923694983125, + "step": 723 + }, + { + "ce_ib": 8.754106521606445, + "ce_orig": 0.7017520070075989, + "epoch": 0.207922927600834, + "kl_loss": 0.1465778797864914, + "loss_ib": 0.002341189421713352, + "step": 723 + }, + { + "ce_ib": 7.906350612640381, + "ce_orig": 0.8050028085708618, + "epoch": 0.207922927600834, + "kl_loss": 0.19620084762573242, + "loss_ib": 0.002752643544226885, + "step": 723 + }, + { + "ce_ib": 4.550933361053467, + "ce_orig": 0.5352786779403687, + "epoch": 0.207922927600834, + "kl_loss": 0.12208560854196548, + "loss_ib": 0.0016759493155404925, + "step": 723 + }, + { + "ce_ib": 9.424193382263184, + "ce_orig": 0.30670666694641113, + "epoch": 0.20821051117981162, + "kl_loss": 0.4818356931209564, + "loss_ib": 0.005760776344686747, + "step": 724 + }, + { + "ce_ib": 11.714197158813477, + "ce_orig": 0.9081876277923584, + "epoch": 0.20821051117981162, + "kl_loss": 0.18002018332481384, + "loss_ib": 0.00297162146307528, + "step": 724 + }, + { + "ce_ib": 3.9292664527893066, + "ce_orig": 0.1352614313364029, + "epoch": 0.20821051117981162, + "kl_loss": 0.4230746030807495, + "loss_ib": 0.00462367245927453, + "step": 724 + }, + { + "ce_ib": 10.647526741027832, + "ce_orig": 1.4817237854003906, + "epoch": 0.20821051117981162, + "kl_loss": 0.2258502095937729, + "loss_ib": 0.003323254408314824, + "step": 724 + }, + { + "epoch": 0.20849809475878928, + "grad_norm": 0.08137502521276474, + "learning_rate": 4.980267294653671e-05, + "loss": 0.8851, + "step": 725 + }, + { + "ce_ib": 6.349156856536865, + "ce_orig": 0.7236438393592834, + "epoch": 0.20849809475878928, + "kl_loss": 0.12380913645029068, + "loss_ib": 0.0018730070441961288, + "step": 725 + }, + { + "ce_ib": 6.632828235626221, + "ce_orig": 0.9744963049888611, + "epoch": 0.20849809475878928, + "kl_loss": 0.08201053738594055, + "loss_ib": 0.0014833882451057434, + "step": 725 + }, + { + "ce_ib": 7.85797643661499, + "ce_orig": 0.7217217683792114, + "epoch": 0.20849809475878928, + "kl_loss": 0.10760220885276794, + "loss_ib": 0.0018618195317685604, + "step": 725 + }, + { + "ce_ib": 5.568864822387695, + "ce_orig": 0.5582032799720764, + "epoch": 0.20849809475878928, + "kl_loss": 0.10348623245954514, + "loss_ib": 0.0015917486744001508, + "step": 725 + }, + { + "ce_ib": 6.322106838226318, + "ce_orig": 0.6340025663375854, + "epoch": 0.2087856783377669, + "kl_loss": 0.1141437366604805, + "loss_ib": 0.001773647964000702, + "step": 726 + }, + { + "ce_ib": 9.839473724365234, + "ce_orig": 0.8133766651153564, + "epoch": 0.2087856783377669, + "kl_loss": 0.1660975217819214, + "loss_ib": 0.002644922584295273, + "step": 726 + }, + { + "ce_ib": 11.004530906677246, + "ce_orig": 1.3718172311782837, + "epoch": 0.2087856783377669, + "kl_loss": 0.1735047996044159, + "loss_ib": 0.0028355009853839874, + "step": 726 + }, + { + "ce_ib": 12.823655128479004, + "ce_orig": 1.5520806312561035, + "epoch": 0.2087856783377669, + "kl_loss": 0.15558293461799622, + "loss_ib": 0.0028381948359310627, + "step": 726 + }, + { + "ce_ib": 9.323108673095703, + "ce_orig": 0.8158657550811768, + "epoch": 0.20907326191674455, + "kl_loss": 0.2026049792766571, + "loss_ib": 0.002958360593765974, + "step": 727 + }, + { + "ce_ib": 10.1639404296875, + "ce_orig": 0.658790111541748, + "epoch": 0.20907326191674455, + "kl_loss": 0.11903582513332367, + "loss_ib": 0.002206752309575677, + "step": 727 + }, + { + "ce_ib": 8.439207077026367, + "ce_orig": 0.5992903113365173, + "epoch": 0.20907326191674455, + "kl_loss": 0.12204043567180634, + "loss_ib": 0.002064324915409088, + "step": 727 + }, + { + "ce_ib": 5.409970760345459, + "ce_orig": 0.5538503527641296, + "epoch": 0.20907326191674455, + "kl_loss": 0.09466078877449036, + "loss_ib": 0.0014876049244776368, + "step": 727 + }, + { + "ce_ib": 6.850286960601807, + "ce_orig": 0.5898058414459229, + "epoch": 0.2093608454957222, + "kl_loss": 0.0770832896232605, + "loss_ib": 0.001455861609429121, + "step": 728 + }, + { + "ce_ib": 7.669926643371582, + "ce_orig": 0.7111138105392456, + "epoch": 0.2093608454957222, + "kl_loss": 0.1397348940372467, + "loss_ib": 0.0021643415093421936, + "step": 728 + }, + { + "ce_ib": 8.357053756713867, + "ce_orig": 0.866073727607727, + "epoch": 0.2093608454957222, + "kl_loss": 0.12581852078437805, + "loss_ib": 0.0020938904490321875, + "step": 728 + }, + { + "ce_ib": 9.84300422668457, + "ce_orig": 0.9218365550041199, + "epoch": 0.2093608454957222, + "kl_loss": 0.20025497674942017, + "loss_ib": 0.002986849984154105, + "step": 728 + }, + { + "ce_ib": 7.854262828826904, + "ce_orig": 0.6762682199478149, + "epoch": 0.20964842907469983, + "kl_loss": 0.1409384310245514, + "loss_ib": 0.00219481042586267, + "step": 729 + }, + { + "ce_ib": 8.109271049499512, + "ce_orig": 0.639650821685791, + "epoch": 0.20964842907469983, + "kl_loss": 0.15809345245361328, + "loss_ib": 0.002391861518844962, + "step": 729 + }, + { + "ce_ib": 7.235316276550293, + "ce_orig": 0.2508554458618164, + "epoch": 0.20964842907469983, + "kl_loss": 0.2572559714317322, + "loss_ib": 0.0032960912212729454, + "step": 729 + }, + { + "ce_ib": 7.926023960113525, + "ce_orig": 0.7422289848327637, + "epoch": 0.20964842907469983, + "kl_loss": 0.16561353206634521, + "loss_ib": 0.002448737621307373, + "step": 729 + }, + { + "epoch": 0.20993601265367748, + "grad_norm": 0.08178116381168365, + "learning_rate": 4.9797777217550367e-05, + "loss": 0.861, + "step": 730 + }, + { + "ce_ib": 8.730643272399902, + "ce_orig": 0.867591142654419, + "epoch": 0.20993601265367748, + "kl_loss": 0.11806496232748032, + "loss_ib": 0.0020537138916552067, + "step": 730 + }, + { + "ce_ib": 9.545483589172363, + "ce_orig": 0.9179695844650269, + "epoch": 0.20993601265367748, + "kl_loss": 0.36148688197135925, + "loss_ib": 0.004569417331367731, + "step": 730 + }, + { + "ce_ib": 11.301115989685059, + "ce_orig": 1.146166443824768, + "epoch": 0.20993601265367748, + "kl_loss": 0.16140805184841156, + "loss_ib": 0.002744192024692893, + "step": 730 + }, + { + "ce_ib": 11.486992835998535, + "ce_orig": 1.2627525329589844, + "epoch": 0.20993601265367748, + "kl_loss": 0.16576717793941498, + "loss_ib": 0.002806370845064521, + "step": 730 + }, + { + "ce_ib": 9.185856819152832, + "ce_orig": 0.7200559377670288, + "epoch": 0.2102235962326551, + "kl_loss": 0.20296761393547058, + "loss_ib": 0.002948261797428131, + "step": 731 + }, + { + "ce_ib": 10.240104675292969, + "ce_orig": 0.6126185059547424, + "epoch": 0.2102235962326551, + "kl_loss": 0.16589286923408508, + "loss_ib": 0.0026829391717910767, + "step": 731 + }, + { + "ce_ib": 6.059067249298096, + "ce_orig": 0.5460329055786133, + "epoch": 0.2102235962326551, + "kl_loss": 0.11571324616670609, + "loss_ib": 0.0017630391521379352, + "step": 731 + }, + { + "ce_ib": 4.1030120849609375, + "ce_orig": 0.14751267433166504, + "epoch": 0.2102235962326551, + "kl_loss": 0.3347550630569458, + "loss_ib": 0.0037578516639769077, + "step": 731 + }, + { + "ce_ib": 5.566345691680908, + "ce_orig": 0.47219768166542053, + "epoch": 0.21051117981163275, + "kl_loss": 0.10999684035778046, + "loss_ib": 0.0016566028352826834, + "step": 732 + }, + { + "ce_ib": 6.692113876342773, + "ce_orig": 0.8962125778198242, + "epoch": 0.21051117981163275, + "kl_loss": 0.10143055766820908, + "loss_ib": 0.0016835168935358524, + "step": 732 + }, + { + "ce_ib": 10.237395286560059, + "ce_orig": 0.8566427826881409, + "epoch": 0.21051117981163275, + "kl_loss": 0.13674971461296082, + "loss_ib": 0.002391236601397395, + "step": 732 + }, + { + "ce_ib": 7.358321189880371, + "ce_orig": 0.7953360080718994, + "epoch": 0.21051117981163275, + "kl_loss": 0.11545369774103165, + "loss_ib": 0.0018903689924627542, + "step": 732 + }, + { + "ce_ib": 7.0491719245910645, + "ce_orig": 0.5768455862998962, + "epoch": 0.2107987633906104, + "kl_loss": 0.14890553057193756, + "loss_ib": 0.0021939724683761597, + "step": 733 + }, + { + "ce_ib": 7.68407678604126, + "ce_orig": 0.8678704500198364, + "epoch": 0.2107987633906104, + "kl_loss": 0.19429832696914673, + "loss_ib": 0.002711390843614936, + "step": 733 + }, + { + "ce_ib": 9.173813819885254, + "ce_orig": 0.9225053191184998, + "epoch": 0.2107987633906104, + "kl_loss": 0.3797130286693573, + "loss_ib": 0.004714511334896088, + "step": 733 + }, + { + "ce_ib": 9.308879852294922, + "ce_orig": 0.5995781421661377, + "epoch": 0.2107987633906104, + "kl_loss": 0.145217627286911, + "loss_ib": 0.0023830642458051443, + "step": 733 + }, + { + "ce_ib": 9.65007495880127, + "ce_orig": 0.7233938574790955, + "epoch": 0.21108634696958803, + "kl_loss": 0.2051672786474228, + "loss_ib": 0.0030166800133883953, + "step": 734 + }, + { + "ce_ib": 8.14127254486084, + "ce_orig": 0.6786958575248718, + "epoch": 0.21108634696958803, + "kl_loss": 0.16025173664093018, + "loss_ib": 0.002416644711047411, + "step": 734 + }, + { + "ce_ib": 9.92184066772461, + "ce_orig": 0.828167200088501, + "epoch": 0.21108634696958803, + "kl_loss": 0.1490582823753357, + "loss_ib": 0.0024827667511999607, + "step": 734 + }, + { + "ce_ib": 9.757160186767578, + "ce_orig": 1.001948595046997, + "epoch": 0.21108634696958803, + "kl_loss": 0.17845755815505981, + "loss_ib": 0.002760291565209627, + "step": 734 + }, + { + "epoch": 0.21137393054856568, + "grad_norm": 0.09134446084499359, + "learning_rate": 4.979282174496302e-05, + "loss": 0.883, + "step": 735 + }, + { + "ce_ib": 7.697344779968262, + "ce_orig": 0.613048255443573, + "epoch": 0.21137393054856568, + "kl_loss": 0.1581844836473465, + "loss_ib": 0.0023515792563557625, + "step": 735 + }, + { + "ce_ib": 9.774848937988281, + "ce_orig": 0.9369478821754456, + "epoch": 0.21137393054856568, + "kl_loss": 0.1405380368232727, + "loss_ib": 0.0023828651756048203, + "step": 735 + }, + { + "ce_ib": 10.879542350769043, + "ce_orig": 1.151315450668335, + "epoch": 0.21137393054856568, + "kl_loss": 0.14188644289970398, + "loss_ib": 0.0025068186223506927, + "step": 735 + }, + { + "ce_ib": 7.50467586517334, + "ce_orig": 0.8081782460212708, + "epoch": 0.21137393054856568, + "kl_loss": 0.13665321469306946, + "loss_ib": 0.002116999588906765, + "step": 735 + }, + { + "ce_ib": 8.551518440246582, + "ce_orig": 1.0012387037277222, + "epoch": 0.2116615141275433, + "kl_loss": 0.09218282997608185, + "loss_ib": 0.0017769801197573543, + "step": 736 + }, + { + "ce_ib": 12.103117942810059, + "ce_orig": 1.6258405447006226, + "epoch": 0.2116615141275433, + "kl_loss": 0.2883414626121521, + "loss_ib": 0.0040937261655926704, + "step": 736 + }, + { + "ce_ib": 9.884223937988281, + "ce_orig": 1.289268970489502, + "epoch": 0.2116615141275433, + "kl_loss": 0.11228608340024948, + "loss_ib": 0.002111283130943775, + "step": 736 + }, + { + "ce_ib": 12.174933433532715, + "ce_orig": 1.3357943296432495, + "epoch": 0.2116615141275433, + "kl_loss": 0.1535387933254242, + "loss_ib": 0.0027528812643140554, + "step": 736 + }, + { + "ce_ib": 5.271547317504883, + "ce_orig": 0.6658823490142822, + "epoch": 0.21194909770652096, + "kl_loss": 0.09715841710567474, + "loss_ib": 0.001498738769441843, + "step": 737 + }, + { + "ce_ib": 4.523626327514648, + "ce_orig": 0.43075889348983765, + "epoch": 0.21194909770652096, + "kl_loss": 0.11853388696908951, + "loss_ib": 0.0016377015272155404, + "step": 737 + }, + { + "ce_ib": 7.904093265533447, + "ce_orig": 0.9350758194923401, + "epoch": 0.21194909770652096, + "kl_loss": 0.11697270721197128, + "loss_ib": 0.001960136229172349, + "step": 737 + }, + { + "ce_ib": 8.9533109664917, + "ce_orig": 0.7697760462760925, + "epoch": 0.21194909770652096, + "kl_loss": 0.3157818913459778, + "loss_ib": 0.004053149838000536, + "step": 737 + }, + { + "ce_ib": 8.857293128967285, + "ce_orig": 0.7813129425048828, + "epoch": 0.2122366812854986, + "kl_loss": 0.16268154978752136, + "loss_ib": 0.0025125446263700724, + "step": 738 + }, + { + "ce_ib": 8.424586296081543, + "ce_orig": 0.9425109624862671, + "epoch": 0.2122366812854986, + "kl_loss": 0.12331412732601166, + "loss_ib": 0.0020755997393280268, + "step": 738 + }, + { + "ce_ib": 9.702607154846191, + "ce_orig": 1.580017328262329, + "epoch": 0.2122366812854986, + "kl_loss": 0.09408050775527954, + "loss_ib": 0.0019110658904537559, + "step": 738 + }, + { + "ce_ib": 7.1836347579956055, + "ce_orig": 0.4377358555793762, + "epoch": 0.2122366812854986, + "kl_loss": 0.14707301557064056, + "loss_ib": 0.002189093502238393, + "step": 738 + }, + { + "ce_ib": 3.4647040367126465, + "ce_orig": 0.35970985889434814, + "epoch": 0.21252426486447623, + "kl_loss": 0.2587149143218994, + "loss_ib": 0.0029336195439100266, + "step": 739 + }, + { + "ce_ib": 6.37571907043457, + "ce_orig": 0.5920301675796509, + "epoch": 0.21252426486447623, + "kl_loss": 0.11138466000556946, + "loss_ib": 0.0017514183418825269, + "step": 739 + }, + { + "ce_ib": 5.618171215057373, + "ce_orig": 0.9007509350776672, + "epoch": 0.21252426486447623, + "kl_loss": 0.11110688745975494, + "loss_ib": 0.0016728859627619386, + "step": 739 + }, + { + "ce_ib": 8.733031272888184, + "ce_orig": 1.0930041074752808, + "epoch": 0.21252426486447623, + "kl_loss": 0.11575108021497726, + "loss_ib": 0.0020308138336986303, + "step": 739 + }, + { + "epoch": 0.21281184844345388, + "grad_norm": 0.08747689425945282, + "learning_rate": 4.9787806540713546e-05, + "loss": 0.8554, + "step": 740 + }, + { + "ce_ib": 8.982237815856934, + "ce_orig": 0.755938708782196, + "epoch": 0.21281184844345388, + "kl_loss": 0.15475055575370789, + "loss_ib": 0.0024457292165607214, + "step": 740 + }, + { + "ce_ib": 7.661835193634033, + "ce_orig": 0.7428460121154785, + "epoch": 0.21281184844345388, + "kl_loss": 0.11066774278879166, + "loss_ib": 0.001872860942967236, + "step": 740 + }, + { + "ce_ib": 6.563145637512207, + "ce_orig": 0.6415489912033081, + "epoch": 0.21281184844345388, + "kl_loss": 0.08858776092529297, + "loss_ib": 0.0015421920688822865, + "step": 740 + }, + { + "ce_ib": 9.22264575958252, + "ce_orig": 1.1372004747390747, + "epoch": 0.21281184844345388, + "kl_loss": 0.1382768154144287, + "loss_ib": 0.002305032452568412, + "step": 740 + }, + { + "ce_ib": 5.523608207702637, + "ce_orig": 0.5463114976882935, + "epoch": 0.2130994320224315, + "kl_loss": 0.12450896203517914, + "loss_ib": 0.0017974504735320807, + "step": 741 + }, + { + "ce_ib": 6.9053497314453125, + "ce_orig": 0.566726803779602, + "epoch": 0.2130994320224315, + "kl_loss": 0.11424671858549118, + "loss_ib": 0.001833002083003521, + "step": 741 + }, + { + "ce_ib": 8.808889389038086, + "ce_orig": 0.666398823261261, + "epoch": 0.2130994320224315, + "kl_loss": 0.19304627180099487, + "loss_ib": 0.0028113515581935644, + "step": 741 + }, + { + "ce_ib": 9.8599214553833, + "ce_orig": 1.0360751152038574, + "epoch": 0.2130994320224315, + "kl_loss": 0.3881288170814514, + "loss_ib": 0.004867279902100563, + "step": 741 + }, + { + "ce_ib": 10.459798812866211, + "ce_orig": 1.267624855041504, + "epoch": 0.21338701560140916, + "kl_loss": 0.1840190589427948, + "loss_ib": 0.0028861702885478735, + "step": 742 + }, + { + "ce_ib": 9.469386100769043, + "ce_orig": 1.4054698944091797, + "epoch": 0.21338701560140916, + "kl_loss": 0.10489386320114136, + "loss_ib": 0.0019958773627877235, + "step": 742 + }, + { + "ce_ib": 7.87640380859375, + "ce_orig": 0.8942380547523499, + "epoch": 0.21338701560140916, + "kl_loss": 0.17778579890727997, + "loss_ib": 0.0025654982309788465, + "step": 742 + }, + { + "ce_ib": 7.7208356857299805, + "ce_orig": 0.5113922953605652, + "epoch": 0.21338701560140916, + "kl_loss": 0.1375960260629654, + "loss_ib": 0.00214804382994771, + "step": 742 + }, + { + "ce_ib": 9.136881828308105, + "ce_orig": 0.6699970960617065, + "epoch": 0.2136745991803868, + "kl_loss": 0.15580974519252777, + "loss_ib": 0.0024717855267226696, + "step": 743 + }, + { + "ce_ib": 4.8074541091918945, + "ce_orig": 0.4599434435367584, + "epoch": 0.2136745991803868, + "kl_loss": 0.08423975110054016, + "loss_ib": 0.0013231429038569331, + "step": 743 + }, + { + "ce_ib": 8.024765968322754, + "ce_orig": 0.4649258255958557, + "epoch": 0.2136745991803868, + "kl_loss": 0.1424439400434494, + "loss_ib": 0.0022269159089773893, + "step": 743 + }, + { + "ce_ib": 9.357501983642578, + "ce_orig": 0.8131682276725769, + "epoch": 0.2136745991803868, + "kl_loss": 0.13159029185771942, + "loss_ib": 0.0022516530007123947, + "step": 743 + }, + { + "ce_ib": 8.306589126586914, + "ce_orig": 0.8594993352890015, + "epoch": 0.21396218275936443, + "kl_loss": 0.19585707783699036, + "loss_ib": 0.0027892296202480793, + "step": 744 + }, + { + "ce_ib": 6.1273651123046875, + "ce_orig": 0.8473232388496399, + "epoch": 0.21396218275936443, + "kl_loss": 0.08322134613990784, + "loss_ib": 0.0014449498848989606, + "step": 744 + }, + { + "ce_ib": 7.272594451904297, + "ce_orig": 0.9642258882522583, + "epoch": 0.21396218275936443, + "kl_loss": 0.15547984838485718, + "loss_ib": 0.0022820578888058662, + "step": 744 + }, + { + "ce_ib": 11.65031909942627, + "ce_orig": 1.2790560722351074, + "epoch": 0.21396218275936443, + "kl_loss": 0.1697477102279663, + "loss_ib": 0.002862508874386549, + "step": 744 + }, + { + "epoch": 0.21424976633834208, + "grad_norm": 0.10272146761417389, + "learning_rate": 4.9782731616884736e-05, + "loss": 0.8185, + "step": 745 + }, + { + "ce_ib": 12.1670503616333, + "ce_orig": 0.6025158762931824, + "epoch": 0.21424976633834208, + "kl_loss": 0.17272210121154785, + "loss_ib": 0.002943925792351365, + "step": 745 + }, + { + "ce_ib": 8.464635848999023, + "ce_orig": 0.9119886755943298, + "epoch": 0.21424976633834208, + "kl_loss": 0.09585784375667572, + "loss_ib": 0.001805042033083737, + "step": 745 + }, + { + "ce_ib": 7.6008076667785645, + "ce_orig": 0.9770063161849976, + "epoch": 0.21424976633834208, + "kl_loss": 0.1800667643547058, + "loss_ib": 0.0025607484858483076, + "step": 745 + }, + { + "ce_ib": 9.016298294067383, + "ce_orig": 0.9819609522819519, + "epoch": 0.21424976633834208, + "kl_loss": 0.15516135096549988, + "loss_ib": 0.002453243127092719, + "step": 745 + }, + { + "ce_ib": 7.594570636749268, + "ce_orig": 0.8066179156303406, + "epoch": 0.2145373499173197, + "kl_loss": 0.16977733373641968, + "loss_ib": 0.0024572303518652916, + "step": 746 + }, + { + "ce_ib": 6.28651237487793, + "ce_orig": 0.46179917454719543, + "epoch": 0.2145373499173197, + "kl_loss": 0.1124720424413681, + "loss_ib": 0.001753371674567461, + "step": 746 + }, + { + "ce_ib": 6.5596394538879395, + "ce_orig": 0.6535757780075073, + "epoch": 0.2145373499173197, + "kl_loss": 0.14391300082206726, + "loss_ib": 0.0020950939506292343, + "step": 746 + }, + { + "ce_ib": 10.635780334472656, + "ce_orig": 1.0490766763687134, + "epoch": 0.2145373499173197, + "kl_loss": 0.11995188891887665, + "loss_ib": 0.002263096859678626, + "step": 746 + }, + { + "ce_ib": 7.497809886932373, + "ce_orig": 0.743564784526825, + "epoch": 0.21482493349629736, + "kl_loss": 0.15132027864456177, + "loss_ib": 0.0022629837039858103, + "step": 747 + }, + { + "ce_ib": 11.009923934936523, + "ce_orig": 1.1343742609024048, + "epoch": 0.21482493349629736, + "kl_loss": 0.13817675411701202, + "loss_ib": 0.0024827599991112947, + "step": 747 + }, + { + "ce_ib": 8.217153549194336, + "ce_orig": 0.9935986995697021, + "epoch": 0.21482493349629736, + "kl_loss": 0.08536257594823837, + "loss_ib": 0.0016753410454839468, + "step": 747 + }, + { + "ce_ib": 7.1405253410339355, + "ce_orig": 0.6493523716926575, + "epoch": 0.21482493349629736, + "kl_loss": 0.12277388572692871, + "loss_ib": 0.0019417913863435388, + "step": 747 + }, + { + "ce_ib": 8.954559326171875, + "ce_orig": 0.9859476089477539, + "epoch": 0.215112517075275, + "kl_loss": 0.09736071527004242, + "loss_ib": 0.0018690630095079541, + "step": 748 + }, + { + "ce_ib": 9.049851417541504, + "ce_orig": 0.5681316256523132, + "epoch": 0.215112517075275, + "kl_loss": 0.2084466814994812, + "loss_ib": 0.002989451866596937, + "step": 748 + }, + { + "ce_ib": 10.671403884887695, + "ce_orig": 1.2401421070098877, + "epoch": 0.215112517075275, + "kl_loss": 0.12268570065498352, + "loss_ib": 0.0022939974442124367, + "step": 748 + }, + { + "ce_ib": 9.870866775512695, + "ce_orig": 0.7144081592559814, + "epoch": 0.215112517075275, + "kl_loss": 0.1475132703781128, + "loss_ib": 0.002462219214066863, + "step": 748 + }, + { + "ce_ib": 7.921173095703125, + "ce_orig": 0.7684431076049805, + "epoch": 0.21540010065425264, + "kl_loss": 0.18796128034591675, + "loss_ib": 0.0026717297732830048, + "step": 749 + }, + { + "ce_ib": 5.337991714477539, + "ce_orig": 0.701056718826294, + "epoch": 0.21540010065425264, + "kl_loss": 0.13435563445091248, + "loss_ib": 0.0018773555057123303, + "step": 749 + }, + { + "ce_ib": 8.451627731323242, + "ce_orig": 1.0293208360671997, + "epoch": 0.21540010065425264, + "kl_loss": 0.13992534577846527, + "loss_ib": 0.002244416158646345, + "step": 749 + }, + { + "ce_ib": 9.440494537353516, + "ce_orig": 0.6093024015426636, + "epoch": 0.21540010065425264, + "kl_loss": 0.16520021855831146, + "loss_ib": 0.0025960516650229692, + "step": 749 + }, + { + "epoch": 0.2156876842332303, + "grad_norm": 0.0997624471783638, + "learning_rate": 4.977759698570328e-05, + "loss": 0.8982, + "step": 750 + }, + { + "ce_ib": 7.538696765899658, + "ce_orig": 0.8382879495620728, + "epoch": 0.2156876842332303, + "kl_loss": 0.13652510941028595, + "loss_ib": 0.0021191206760704517, + "step": 750 + }, + { + "ce_ib": 12.170858383178711, + "ce_orig": 0.9007084369659424, + "epoch": 0.2156876842332303, + "kl_loss": 0.1638849973678589, + "loss_ib": 0.002855935599654913, + "step": 750 + }, + { + "ce_ib": 11.650144577026367, + "ce_orig": 1.1218953132629395, + "epoch": 0.2156876842332303, + "kl_loss": 0.15194326639175415, + "loss_ib": 0.0026844472158700228, + "step": 750 + }, + { + "ce_ib": 10.354049682617188, + "ce_orig": 0.8704142570495605, + "epoch": 0.2156876842332303, + "kl_loss": 0.14096030592918396, + "loss_ib": 0.0024450079072266817, + "step": 750 + }, + { + "ce_ib": 8.408703804016113, + "ce_orig": 0.4949643015861511, + "epoch": 0.2159752678122079, + "kl_loss": 0.09109346568584442, + "loss_ib": 0.0017518049571663141, + "step": 751 + }, + { + "ce_ib": 9.992585182189941, + "ce_orig": 0.7481764554977417, + "epoch": 0.2159752678122079, + "kl_loss": 0.22328117489814758, + "loss_ib": 0.003232070244848728, + "step": 751 + }, + { + "ce_ib": 6.670741081237793, + "ce_orig": 0.8640801906585693, + "epoch": 0.2159752678122079, + "kl_loss": 0.16885803639888763, + "loss_ib": 0.002355654491111636, + "step": 751 + }, + { + "ce_ib": 8.197010040283203, + "ce_orig": 0.8291086554527283, + "epoch": 0.2159752678122079, + "kl_loss": 0.20890876650810242, + "loss_ib": 0.0029087886214256287, + "step": 751 + }, + { + "ce_ib": 9.433305740356445, + "ce_orig": 1.1036256551742554, + "epoch": 0.21626285139118556, + "kl_loss": 0.0969347208738327, + "loss_ib": 0.0019126776605844498, + "step": 752 + }, + { + "ce_ib": 10.305464744567871, + "ce_orig": 0.9490968585014343, + "epoch": 0.21626285139118556, + "kl_loss": 0.12411025166511536, + "loss_ib": 0.0022716489620506763, + "step": 752 + }, + { + "ce_ib": 6.960268497467041, + "ce_orig": 0.6807049512863159, + "epoch": 0.21626285139118556, + "kl_loss": 0.2037605196237564, + "loss_ib": 0.0027336319908499718, + "step": 752 + }, + { + "ce_ib": 14.218523025512695, + "ce_orig": 1.866547703742981, + "epoch": 0.21626285139118556, + "kl_loss": 0.1546570360660553, + "loss_ib": 0.0029684226028621197, + "step": 752 + }, + { + "ce_ib": 5.236147403717041, + "ce_orig": 0.6408563256263733, + "epoch": 0.2165504349701632, + "kl_loss": 0.11485590040683746, + "loss_ib": 0.0016721737338230014, + "step": 753 + }, + { + "ce_ib": 13.076562881469727, + "ce_orig": 1.6438502073287964, + "epoch": 0.2165504349701632, + "kl_loss": 0.18503537774085999, + "loss_ib": 0.0031580100767314434, + "step": 753 + }, + { + "ce_ib": 7.389747142791748, + "ce_orig": 1.122937798500061, + "epoch": 0.2165504349701632, + "kl_loss": 0.12777473032474518, + "loss_ib": 0.0020167219918221235, + "step": 753 + }, + { + "ce_ib": 9.260527610778809, + "ce_orig": 0.832629919052124, + "epoch": 0.2165504349701632, + "kl_loss": 0.17178930342197418, + "loss_ib": 0.002643945859745145, + "step": 753 + }, + { + "ce_ib": 8.497020721435547, + "ce_orig": 1.3572919368743896, + "epoch": 0.21683801854914084, + "kl_loss": 0.14210839569568634, + "loss_ib": 0.0022707858588546515, + "step": 754 + }, + { + "ce_ib": 8.139203071594238, + "ce_orig": 0.7181857824325562, + "epoch": 0.21683801854914084, + "kl_loss": 0.13708284497261047, + "loss_ib": 0.002184748649597168, + "step": 754 + }, + { + "ce_ib": 7.636706829071045, + "ce_orig": 1.157459020614624, + "epoch": 0.21683801854914084, + "kl_loss": 0.1138184517621994, + "loss_ib": 0.0019018551101908088, + "step": 754 + }, + { + "ce_ib": 7.350226879119873, + "ce_orig": 0.8966202735900879, + "epoch": 0.21683801854914084, + "kl_loss": 0.12013451755046844, + "loss_ib": 0.0019363677129149437, + "step": 754 + }, + { + "epoch": 0.2171256021281185, + "grad_norm": 0.10654988884925842, + "learning_rate": 4.9772402659539674e-05, + "loss": 0.9059, + "step": 755 + }, + { + "ce_ib": 6.863460540771484, + "ce_orig": 0.7471085786819458, + "epoch": 0.2171256021281185, + "kl_loss": 0.12676385045051575, + "loss_ib": 0.0019539843779057264, + "step": 755 + }, + { + "ce_ib": 5.62061882019043, + "ce_orig": 0.4919710159301758, + "epoch": 0.2171256021281185, + "kl_loss": 0.1688041090965271, + "loss_ib": 0.0022501028142869473, + "step": 755 + }, + { + "ce_ib": 7.917287349700928, + "ce_orig": 0.42383748292922974, + "epoch": 0.2171256021281185, + "kl_loss": 0.15159422159194946, + "loss_ib": 0.0023076708894222975, + "step": 755 + }, + { + "ce_ib": 7.0679731369018555, + "ce_orig": 0.9794975519180298, + "epoch": 0.2171256021281185, + "kl_loss": 0.12907591462135315, + "loss_ib": 0.0019975563045591116, + "step": 755 + }, + { + "ce_ib": 10.093253135681152, + "ce_orig": 0.6868664622306824, + "epoch": 0.2174131857070961, + "kl_loss": 0.1843118667602539, + "loss_ib": 0.0028524440713226795, + "step": 756 + }, + { + "ce_ib": 8.960195541381836, + "ce_orig": 0.9731432795524597, + "epoch": 0.2174131857070961, + "kl_loss": 0.15665888786315918, + "loss_ib": 0.0024626085069030523, + "step": 756 + }, + { + "ce_ib": 8.387116432189941, + "ce_orig": 0.6317304372787476, + "epoch": 0.2174131857070961, + "kl_loss": 0.1664927899837494, + "loss_ib": 0.002503639319911599, + "step": 756 + }, + { + "ce_ib": 7.766140937805176, + "ce_orig": 0.6035816073417664, + "epoch": 0.2174131857070961, + "kl_loss": 0.12087871134281158, + "loss_ib": 0.0019854011479765177, + "step": 756 + }, + { + "ce_ib": 9.264239311218262, + "ce_orig": 0.9195747375488281, + "epoch": 0.21770076928607376, + "kl_loss": 0.1662123203277588, + "loss_ib": 0.0025885470677167177, + "step": 757 + }, + { + "ce_ib": 10.044254302978516, + "ce_orig": 1.1064798831939697, + "epoch": 0.21770076928607376, + "kl_loss": 0.17693579196929932, + "loss_ib": 0.0027737831696867943, + "step": 757 + }, + { + "ce_ib": 6.537443161010742, + "ce_orig": 0.634574294090271, + "epoch": 0.21770076928607376, + "kl_loss": 0.2076174020767212, + "loss_ib": 0.0027299183420836926, + "step": 757 + }, + { + "ce_ib": 7.090516567230225, + "ce_orig": 0.8195549249649048, + "epoch": 0.21770076928607376, + "kl_loss": 0.0990576520562172, + "loss_ib": 0.001699628192000091, + "step": 757 + }, + { + "ce_ib": 7.096627712249756, + "ce_orig": 0.6805820465087891, + "epoch": 0.21798835286505142, + "kl_loss": 0.09520435333251953, + "loss_ib": 0.0016617062501609325, + "step": 758 + }, + { + "ce_ib": 6.252221584320068, + "ce_orig": 0.5069523453712463, + "epoch": 0.21798835286505142, + "kl_loss": 0.11282311379909515, + "loss_ib": 0.0017534532817080617, + "step": 758 + }, + { + "ce_ib": 9.68701171875, + "ce_orig": 1.14574134349823, + "epoch": 0.21798835286505142, + "kl_loss": 0.139566570520401, + "loss_ib": 0.0023643667809665203, + "step": 758 + }, + { + "ce_ib": 7.7025065422058105, + "ce_orig": 0.6887799501419067, + "epoch": 0.21798835286505142, + "kl_loss": 0.1532985270023346, + "loss_ib": 0.002303235698491335, + "step": 758 + }, + { + "ce_ib": 5.569612503051758, + "ce_orig": 0.7203543186187744, + "epoch": 0.21827593644402904, + "kl_loss": 0.17881646752357483, + "loss_ib": 0.002345125889405608, + "step": 759 + }, + { + "ce_ib": 7.569202899932861, + "ce_orig": 0.6448748707771301, + "epoch": 0.21827593644402904, + "kl_loss": 0.15759307146072388, + "loss_ib": 0.0023328508250415325, + "step": 759 + }, + { + "ce_ib": 7.562583923339844, + "ce_orig": 0.7065814733505249, + "epoch": 0.21827593644402904, + "kl_loss": 0.20230957865715027, + "loss_ib": 0.0027793541084975004, + "step": 759 + }, + { + "ce_ib": 7.415134429931641, + "ce_orig": 0.8418720960617065, + "epoch": 0.21827593644402904, + "kl_loss": 0.16802442073822021, + "loss_ib": 0.0024217574391514063, + "step": 759 + }, + { + "epoch": 0.2185635200230067, + "grad_norm": 0.09956327825784683, + "learning_rate": 4.976714865090827e-05, + "loss": 0.871, + "step": 760 + }, + { + "ce_ib": 8.476737976074219, + "ce_orig": 0.9896325469017029, + "epoch": 0.2185635200230067, + "kl_loss": 0.10956001281738281, + "loss_ib": 0.001943273819051683, + "step": 760 + }, + { + "ce_ib": 7.939772605895996, + "ce_orig": 0.4322037100791931, + "epoch": 0.2185635200230067, + "kl_loss": 0.19129762053489685, + "loss_ib": 0.0027069533243775368, + "step": 760 + }, + { + "ce_ib": 4.769078254699707, + "ce_orig": 0.621930718421936, + "epoch": 0.2185635200230067, + "kl_loss": 0.08950284123420715, + "loss_ib": 0.0013719361741095781, + "step": 760 + }, + { + "ce_ib": 11.311341285705566, + "ce_orig": 1.3912595510482788, + "epoch": 0.2185635200230067, + "kl_loss": 0.10796618461608887, + "loss_ib": 0.0022107958793640137, + "step": 760 + }, + { + "ce_ib": 11.38323974609375, + "ce_orig": 1.3958052396774292, + "epoch": 0.21885110360198431, + "kl_loss": 0.1116107627749443, + "loss_ib": 0.00225443160161376, + "step": 761 + }, + { + "ce_ib": 7.089112758636475, + "ce_orig": 0.39750197529792786, + "epoch": 0.21885110360198431, + "kl_loss": 0.22810953855514526, + "loss_ib": 0.0029900067020207644, + "step": 761 + }, + { + "ce_ib": 9.000521659851074, + "ce_orig": 0.47054076194763184, + "epoch": 0.21885110360198431, + "kl_loss": 0.1665971875190735, + "loss_ib": 0.002566023962572217, + "step": 761 + }, + { + "ce_ib": 7.182977676391602, + "ce_orig": 0.8364933133125305, + "epoch": 0.21885110360198431, + "kl_loss": 0.11331808567047119, + "loss_ib": 0.0018514784751459956, + "step": 761 + }, + { + "ce_ib": 12.852972030639648, + "ce_orig": 1.7782992124557495, + "epoch": 0.21913868718096197, + "kl_loss": 0.1779230237007141, + "loss_ib": 0.0030645274091511965, + "step": 762 + }, + { + "ce_ib": 12.079954147338867, + "ce_orig": 1.306889533996582, + "epoch": 0.21913868718096197, + "kl_loss": 0.15216562151908875, + "loss_ib": 0.002729651518166065, + "step": 762 + }, + { + "ce_ib": 7.572328567504883, + "ce_orig": 1.0523992776870728, + "epoch": 0.21913868718096197, + "kl_loss": 0.1251816600561142, + "loss_ib": 0.002009049290791154, + "step": 762 + }, + { + "ce_ib": 9.228711128234863, + "ce_orig": 0.7844505310058594, + "epoch": 0.21913868718096197, + "kl_loss": 0.18018165230751038, + "loss_ib": 0.002724687336012721, + "step": 762 + }, + { + "ce_ib": 11.388764381408691, + "ce_orig": 0.7564859390258789, + "epoch": 0.21942627075993962, + "kl_loss": 0.3528626263141632, + "loss_ib": 0.004667502362281084, + "step": 763 + }, + { + "ce_ib": 10.088685989379883, + "ce_orig": 1.3417243957519531, + "epoch": 0.21942627075993962, + "kl_loss": 0.1274024248123169, + "loss_ib": 0.002282892819494009, + "step": 763 + }, + { + "ce_ib": 5.146511554718018, + "ce_orig": 0.5599508285522461, + "epoch": 0.21942627075993962, + "kl_loss": 0.1776261031627655, + "loss_ib": 0.0022909119725227356, + "step": 763 + }, + { + "ce_ib": 7.239597797393799, + "ce_orig": 0.857480525970459, + "epoch": 0.21942627075993962, + "kl_loss": 0.11923994868993759, + "loss_ib": 0.0019163591787219048, + "step": 763 + }, + { + "ce_ib": 6.657400608062744, + "ce_orig": 0.642911970615387, + "epoch": 0.21971385433891724, + "kl_loss": 0.09522129595279694, + "loss_ib": 0.001617952948436141, + "step": 764 + }, + { + "ce_ib": 5.648870944976807, + "ce_orig": 0.5404942035675049, + "epoch": 0.21971385433891724, + "kl_loss": 0.2369777113199234, + "loss_ib": 0.002934664022177458, + "step": 764 + }, + { + "ce_ib": 5.368570327758789, + "ce_orig": 0.2739843726158142, + "epoch": 0.21971385433891724, + "kl_loss": 0.0845465362071991, + "loss_ib": 0.00138232228346169, + "step": 764 + }, + { + "ce_ib": 10.863349914550781, + "ce_orig": 0.7003340721130371, + "epoch": 0.21971385433891724, + "kl_loss": 0.4336280822753906, + "loss_ib": 0.00542261591181159, + "step": 764 + }, + { + "epoch": 0.2200014379178949, + "grad_norm": 0.08464247733354568, + "learning_rate": 4.9761834972467185e-05, + "loss": 0.8365, + "step": 765 + }, + { + "ce_ib": 9.576865196228027, + "ce_orig": 1.0863311290740967, + "epoch": 0.2200014379178949, + "kl_loss": 0.08513174206018448, + "loss_ib": 0.0018090038793161511, + "step": 765 + }, + { + "ce_ib": 4.838799953460693, + "ce_orig": 0.6935594081878662, + "epoch": 0.2200014379178949, + "kl_loss": 0.08847616612911224, + "loss_ib": 0.001368641504086554, + "step": 765 + }, + { + "ce_ib": 9.570419311523438, + "ce_orig": 1.0386337041854858, + "epoch": 0.2200014379178949, + "kl_loss": 0.16993722319602966, + "loss_ib": 0.002656414173543453, + "step": 765 + }, + { + "ce_ib": 6.40081262588501, + "ce_orig": 0.8035165667533875, + "epoch": 0.2200014379178949, + "kl_loss": 0.0860595852136612, + "loss_ib": 0.0015006770845502615, + "step": 765 + }, + { + "ce_ib": 5.312402248382568, + "ce_orig": 0.509066641330719, + "epoch": 0.22028902149687252, + "kl_loss": 0.09284783899784088, + "loss_ib": 0.0014597185654565692, + "step": 766 + }, + { + "ce_ib": 7.273090839385986, + "ce_orig": 0.7035940289497375, + "epoch": 0.22028902149687252, + "kl_loss": 0.1298186182975769, + "loss_ib": 0.0020254950504750013, + "step": 766 + }, + { + "ce_ib": 8.391249656677246, + "ce_orig": 1.0880930423736572, + "epoch": 0.22028902149687252, + "kl_loss": 0.1667131632566452, + "loss_ib": 0.0025062565691769123, + "step": 766 + }, + { + "ce_ib": 8.774653434753418, + "ce_orig": 0.9759833216667175, + "epoch": 0.22028902149687252, + "kl_loss": 0.13655591011047363, + "loss_ib": 0.0022430242970585823, + "step": 766 + }, + { + "ce_ib": 9.209096908569336, + "ce_orig": 0.836510956287384, + "epoch": 0.22057660507585017, + "kl_loss": 0.2317269891500473, + "loss_ib": 0.0032381797209382057, + "step": 767 + }, + { + "ce_ib": 8.248529434204102, + "ce_orig": 0.668925404548645, + "epoch": 0.22057660507585017, + "kl_loss": 0.15469834208488464, + "loss_ib": 0.00237183622084558, + "step": 767 + }, + { + "ce_ib": 9.433579444885254, + "ce_orig": 1.0252512693405151, + "epoch": 0.22057660507585017, + "kl_loss": 0.11495402455329895, + "loss_ib": 0.0020928981248289347, + "step": 767 + }, + { + "ce_ib": 4.54541015625, + "ce_orig": 0.5551728010177612, + "epoch": 0.22057660507585017, + "kl_loss": 0.14444425702095032, + "loss_ib": 0.0018989834934473038, + "step": 767 + }, + { + "ce_ib": 10.690596580505371, + "ce_orig": 0.8334415555000305, + "epoch": 0.22086418865482782, + "kl_loss": 0.14985781908035278, + "loss_ib": 0.002567637711763382, + "step": 768 + }, + { + "ce_ib": 9.497960090637207, + "ce_orig": 1.0465785264968872, + "epoch": 0.22086418865482782, + "kl_loss": 0.10572009533643723, + "loss_ib": 0.002006996888667345, + "step": 768 + }, + { + "ce_ib": 9.094498634338379, + "ce_orig": 0.8460086584091187, + "epoch": 0.22086418865482782, + "kl_loss": 0.12992800772190094, + "loss_ib": 0.002208729973062873, + "step": 768 + }, + { + "ce_ib": 7.6064934730529785, + "ce_orig": 0.7029877305030823, + "epoch": 0.22086418865482782, + "kl_loss": 0.2135726511478424, + "loss_ib": 0.0028963754884898663, + "step": 768 + }, + { + "ce_ib": 10.828954696655273, + "ce_orig": 0.9486677646636963, + "epoch": 0.22115177223380544, + "kl_loss": 0.1448005884885788, + "loss_ib": 0.0025309014599770308, + "step": 769 + }, + { + "ce_ib": 6.5569987297058105, + "ce_orig": 0.5725328326225281, + "epoch": 0.22115177223380544, + "kl_loss": 0.14763084053993225, + "loss_ib": 0.0021320083178579807, + "step": 769 + }, + { + "ce_ib": 8.761213302612305, + "ce_orig": 0.9140744209289551, + "epoch": 0.22115177223380544, + "kl_loss": 0.10454612970352173, + "loss_ib": 0.0019215825013816357, + "step": 769 + }, + { + "ce_ib": 12.391948699951172, + "ce_orig": 1.4241856336593628, + "epoch": 0.22115177223380544, + "kl_loss": 0.16774173080921173, + "loss_ib": 0.0029166119638830423, + "step": 769 + }, + { + "epoch": 0.2214393558127831, + "grad_norm": 0.09514256566762924, + "learning_rate": 4.975646163701831e-05, + "loss": 0.9192, + "step": 770 + }, + { + "ce_ib": 9.242008209228516, + "ce_orig": 1.029587984085083, + "epoch": 0.2214393558127831, + "kl_loss": 0.16982057690620422, + "loss_ib": 0.0026224064640700817, + "step": 770 + }, + { + "ce_ib": 6.082286357879639, + "ce_orig": 0.7157595753669739, + "epoch": 0.2214393558127831, + "kl_loss": 0.12317483127117157, + "loss_ib": 0.0018399768741801381, + "step": 770 + }, + { + "ce_ib": 14.0202054977417, + "ce_orig": 2.0901854038238525, + "epoch": 0.2214393558127831, + "kl_loss": 0.15905095636844635, + "loss_ib": 0.0029925298877060413, + "step": 770 + }, + { + "ce_ib": 8.374448776245117, + "ce_orig": 0.7532703280448914, + "epoch": 0.2214393558127831, + "kl_loss": 0.14771360158920288, + "loss_ib": 0.002314580837264657, + "step": 770 + }, + { + "ce_ib": 7.05565881729126, + "ce_orig": 0.4825326204299927, + "epoch": 0.22172693939176072, + "kl_loss": 0.13400578498840332, + "loss_ib": 0.0020456237252801657, + "step": 771 + }, + { + "ce_ib": 9.674530982971191, + "ce_orig": 0.9703323245048523, + "epoch": 0.22172693939176072, + "kl_loss": 0.21550621092319489, + "loss_ib": 0.003122515045106411, + "step": 771 + }, + { + "ce_ib": 7.9153361320495605, + "ce_orig": 0.7711546421051025, + "epoch": 0.22172693939176072, + "kl_loss": 0.1621248424053192, + "loss_ib": 0.0024127820506691933, + "step": 771 + }, + { + "ce_ib": 5.088393211364746, + "ce_orig": 0.41473719477653503, + "epoch": 0.22172693939176072, + "kl_loss": 0.2381688356399536, + "loss_ib": 0.002890527481213212, + "step": 771 + }, + { + "ce_ib": 5.727818965911865, + "ce_orig": 0.47517552971839905, + "epoch": 0.22201452297073837, + "kl_loss": 0.09936833381652832, + "loss_ib": 0.0015664651291444898, + "step": 772 + }, + { + "ce_ib": 8.125673294067383, + "ce_orig": 0.8591288924217224, + "epoch": 0.22201452297073837, + "kl_loss": 0.16502976417541504, + "loss_ib": 0.0024628648534417152, + "step": 772 + }, + { + "ce_ib": 5.820714473724365, + "ce_orig": 0.6233316659927368, + "epoch": 0.22201452297073837, + "kl_loss": 0.1810447871685028, + "loss_ib": 0.0023925192654132843, + "step": 772 + }, + { + "ce_ib": 4.671726703643799, + "ce_orig": 0.31970176100730896, + "epoch": 0.22201452297073837, + "kl_loss": 0.06134669482707977, + "loss_ib": 0.001080639660358429, + "step": 772 + }, + { + "ce_ib": 7.360152721405029, + "ce_orig": 0.5947248935699463, + "epoch": 0.22230210654971602, + "kl_loss": 0.1539122462272644, + "loss_ib": 0.002275137696415186, + "step": 773 + }, + { + "ce_ib": 6.672601222991943, + "ce_orig": 0.6609217524528503, + "epoch": 0.22230210654971602, + "kl_loss": 0.1591755449771881, + "loss_ib": 0.0022590155713260174, + "step": 773 + }, + { + "ce_ib": 5.962435245513916, + "ce_orig": 0.6456025242805481, + "epoch": 0.22230210654971602, + "kl_loss": 0.08421975374221802, + "loss_ib": 0.0014384409878402948, + "step": 773 + }, + { + "ce_ib": 10.913044929504395, + "ce_orig": 1.3626434803009033, + "epoch": 0.22230210654971602, + "kl_loss": 0.18016685545444489, + "loss_ib": 0.0028929731342941523, + "step": 773 + }, + { + "ce_ib": 5.439235687255859, + "ce_orig": 0.5556134581565857, + "epoch": 0.22258969012869365, + "kl_loss": 0.14146508276462555, + "loss_ib": 0.001958574401214719, + "step": 774 + }, + { + "ce_ib": 11.287528038024902, + "ce_orig": 1.369814395904541, + "epoch": 0.22258969012869365, + "kl_loss": 0.1479172706604004, + "loss_ib": 0.0026079255621880293, + "step": 774 + }, + { + "ce_ib": 10.662454605102539, + "ce_orig": 1.3886735439300537, + "epoch": 0.22258969012869365, + "kl_loss": 0.1496671736240387, + "loss_ib": 0.0025629170704632998, + "step": 774 + }, + { + "ce_ib": 6.370062351226807, + "ce_orig": 0.7246447801589966, + "epoch": 0.22258969012869365, + "kl_loss": 0.10358047485351562, + "loss_ib": 0.0016728108748793602, + "step": 774 + }, + { + "epoch": 0.2228772737076713, + "grad_norm": 0.09098898619413376, + "learning_rate": 4.975102865750725e-05, + "loss": 0.8571, + "step": 775 + }, + { + "ce_ib": 9.296186447143555, + "ce_orig": 0.8976593613624573, + "epoch": 0.2228772737076713, + "kl_loss": 0.11623773723840714, + "loss_ib": 0.0020919961389154196, + "step": 775 + }, + { + "ce_ib": 12.612628936767578, + "ce_orig": 1.3324798345565796, + "epoch": 0.2228772737076713, + "kl_loss": 0.14951446652412415, + "loss_ib": 0.0027564074844121933, + "step": 775 + }, + { + "ce_ib": 7.314347267150879, + "ce_orig": 0.8472996354103088, + "epoch": 0.2228772737076713, + "kl_loss": 0.3302251100540161, + "loss_ib": 0.00403368566185236, + "step": 775 + }, + { + "ce_ib": 7.56318473815918, + "ce_orig": 0.82821124792099, + "epoch": 0.2228772737076713, + "kl_loss": 0.13697001338005066, + "loss_ib": 0.0021260185167193413, + "step": 775 + }, + { + "ce_ib": 8.542595863342285, + "ce_orig": 1.0312063694000244, + "epoch": 0.22316485728664892, + "kl_loss": 0.11569753289222717, + "loss_ib": 0.0020112348720431328, + "step": 776 + }, + { + "ce_ib": 7.049263000488281, + "ce_orig": 0.8572791814804077, + "epoch": 0.22316485728664892, + "kl_loss": 0.12043007463216782, + "loss_ib": 0.0019092269940301776, + "step": 776 + }, + { + "ce_ib": 9.571832656860352, + "ce_orig": 0.8414773941040039, + "epoch": 0.22316485728664892, + "kl_loss": 0.22330361604690552, + "loss_ib": 0.0031902194023132324, + "step": 776 + }, + { + "ce_ib": 6.793343544006348, + "ce_orig": 0.8694676160812378, + "epoch": 0.22316485728664892, + "kl_loss": 0.10456552356481552, + "loss_ib": 0.0017249895026907325, + "step": 776 + }, + { + "ce_ib": 8.001526832580566, + "ce_orig": 0.7133891582489014, + "epoch": 0.22345244086562657, + "kl_loss": 0.12774983048439026, + "loss_ib": 0.0020776509772986174, + "step": 777 + }, + { + "ce_ib": 6.953750133514404, + "ce_orig": 0.7376645803451538, + "epoch": 0.22345244086562657, + "kl_loss": 0.11668390780687332, + "loss_ib": 0.001862214063294232, + "step": 777 + }, + { + "ce_ib": 14.96567440032959, + "ce_orig": 2.0463645458221436, + "epoch": 0.22345244086562657, + "kl_loss": 0.23001086711883545, + "loss_ib": 0.003796675940975547, + "step": 777 + }, + { + "ce_ib": 6.386999130249023, + "ce_orig": 0.5333241820335388, + "epoch": 0.22345244086562657, + "kl_loss": 0.1477910727262497, + "loss_ib": 0.002116610761731863, + "step": 777 + }, + { + "ce_ib": 4.84302282333374, + "ce_orig": 0.6310755014419556, + "epoch": 0.22374002444460422, + "kl_loss": 0.08825128525495529, + "loss_ib": 0.0013668150641024113, + "step": 778 + }, + { + "ce_ib": 11.377039909362793, + "ce_orig": 1.1023659706115723, + "epoch": 0.22374002444460422, + "kl_loss": 0.14939387142658234, + "loss_ib": 0.0026316428557038307, + "step": 778 + }, + { + "ce_ib": 8.388514518737793, + "ce_orig": 0.8287127614021301, + "epoch": 0.22374002444460422, + "kl_loss": 0.15204116702079773, + "loss_ib": 0.002359262900426984, + "step": 778 + }, + { + "ce_ib": 7.143672943115234, + "ce_orig": 1.143507719039917, + "epoch": 0.22374002444460422, + "kl_loss": 0.15379482507705688, + "loss_ib": 0.0022523156367242336, + "step": 778 + }, + { + "ce_ib": 6.243022918701172, + "ce_orig": 0.6409770846366882, + "epoch": 0.22402760802358185, + "kl_loss": 0.1467035710811615, + "loss_ib": 0.00209133792668581, + "step": 779 + }, + { + "ce_ib": 6.419865608215332, + "ce_orig": 0.5069408416748047, + "epoch": 0.22402760802358185, + "kl_loss": 0.11442327499389648, + "loss_ib": 0.0017862193053588271, + "step": 779 + }, + { + "ce_ib": 8.781683921813965, + "ce_orig": 0.7263645529747009, + "epoch": 0.22402760802358185, + "kl_loss": 0.1588398516178131, + "loss_ib": 0.0024665668606758118, + "step": 779 + }, + { + "ce_ib": 5.928696155548096, + "ce_orig": 0.7882704734802246, + "epoch": 0.22402760802358185, + "kl_loss": 0.11991754919290543, + "loss_ib": 0.0017920450773090124, + "step": 779 + }, + { + "epoch": 0.2243151916025595, + "grad_norm": 0.10398890823125839, + "learning_rate": 4.9745536047023324e-05, + "loss": 0.8706, + "step": 780 + }, + { + "ce_ib": 8.916302680969238, + "ce_orig": 0.8575690984725952, + "epoch": 0.2243151916025595, + "kl_loss": 0.15519979596138, + "loss_ib": 0.002443628152832389, + "step": 780 + }, + { + "ce_ib": 9.42071533203125, + "ce_orig": 0.7921391725540161, + "epoch": 0.2243151916025595, + "kl_loss": 0.1433313935995102, + "loss_ib": 0.0023753854911774397, + "step": 780 + }, + { + "ce_ib": 8.125630378723145, + "ce_orig": 1.126109004020691, + "epoch": 0.2243151916025595, + "kl_loss": 0.1137545108795166, + "loss_ib": 0.001950108096934855, + "step": 780 + }, + { + "ce_ib": 9.454373359680176, + "ce_orig": 0.7673488259315491, + "epoch": 0.2243151916025595, + "kl_loss": 0.1790568232536316, + "loss_ib": 0.0027360054664313793, + "step": 780 + }, + { + "ce_ib": 8.905075073242188, + "ce_orig": 1.072008490562439, + "epoch": 0.22460277518153712, + "kl_loss": 0.2111148089170456, + "loss_ib": 0.0030016556847840548, + "step": 781 + }, + { + "ce_ib": 6.170800685882568, + "ce_orig": 0.6450889706611633, + "epoch": 0.22460277518153712, + "kl_loss": 0.14403045177459717, + "loss_ib": 0.00205738446675241, + "step": 781 + }, + { + "ce_ib": 4.165629863739014, + "ce_orig": 0.602213442325592, + "epoch": 0.22460277518153712, + "kl_loss": 0.09626461565494537, + "loss_ib": 0.001379209104925394, + "step": 781 + }, + { + "ce_ib": 11.429503440856934, + "ce_orig": 1.4089714288711548, + "epoch": 0.22460277518153712, + "kl_loss": 0.07960424572229385, + "loss_ib": 0.0019389926455914974, + "step": 781 + }, + { + "ce_ib": 5.01399040222168, + "ce_orig": 0.8204609751701355, + "epoch": 0.22489035876051477, + "kl_loss": 0.13064663112163544, + "loss_ib": 0.0018078653374686837, + "step": 782 + }, + { + "ce_ib": 8.626324653625488, + "ce_orig": 1.1237170696258545, + "epoch": 0.22489035876051477, + "kl_loss": 0.10630378127098083, + "loss_ib": 0.0019256701925769448, + "step": 782 + }, + { + "ce_ib": 8.674455642700195, + "ce_orig": 1.2192915678024292, + "epoch": 0.22489035876051477, + "kl_loss": 0.11441108584403992, + "loss_ib": 0.002011556178331375, + "step": 782 + }, + { + "ce_ib": 4.90167760848999, + "ce_orig": 0.7894431352615356, + "epoch": 0.22489035876051477, + "kl_loss": 0.13594220578670502, + "loss_ib": 0.0018495897529646754, + "step": 782 + }, + { + "ce_ib": 8.894073486328125, + "ce_orig": 0.8344406485557556, + "epoch": 0.22517794233949243, + "kl_loss": 0.2180064618587494, + "loss_ib": 0.0030694720335304737, + "step": 783 + }, + { + "ce_ib": 7.178038597106934, + "ce_orig": 0.6856690645217896, + "epoch": 0.22517794233949243, + "kl_loss": 0.14548270404338837, + "loss_ib": 0.0021726307459175587, + "step": 783 + }, + { + "ce_ib": 5.8297576904296875, + "ce_orig": 0.644056499004364, + "epoch": 0.22517794233949243, + "kl_loss": 0.1136014312505722, + "loss_ib": 0.0017189900390803814, + "step": 783 + }, + { + "ce_ib": 6.987138271331787, + "ce_orig": 0.2856312394142151, + "epoch": 0.22517794233949243, + "kl_loss": 0.25451338291168213, + "loss_ib": 0.003243847517296672, + "step": 783 + }, + { + "ce_ib": 7.792212009429932, + "ce_orig": 1.1498944759368896, + "epoch": 0.22546552591847005, + "kl_loss": 0.09925955533981323, + "loss_ib": 0.0017718167509883642, + "step": 784 + }, + { + "ce_ib": 10.821850776672363, + "ce_orig": 1.169548511505127, + "epoch": 0.22546552591847005, + "kl_loss": 0.1603054404258728, + "loss_ib": 0.0026852393057197332, + "step": 784 + }, + { + "ce_ib": 11.060176849365234, + "ce_orig": 0.6884800791740417, + "epoch": 0.22546552591847005, + "kl_loss": 0.23210455477237701, + "loss_ib": 0.00342706311494112, + "step": 784 + }, + { + "ce_ib": 7.353107929229736, + "ce_orig": 1.0316444635391235, + "epoch": 0.22546552591847005, + "kl_loss": 0.11858627945184708, + "loss_ib": 0.0019211735343560576, + "step": 784 + }, + { + "epoch": 0.2257531094974477, + "grad_norm": 0.09250946342945099, + "learning_rate": 4.973998381879949e-05, + "loss": 0.8962, + "step": 785 + }, + { + "ce_ib": 10.600707054138184, + "ce_orig": 1.2356044054031372, + "epoch": 0.2257531094974477, + "kl_loss": 0.09297977387905121, + "loss_ib": 0.0019898684695363045, + "step": 785 + }, + { + "ce_ib": 9.557263374328613, + "ce_orig": 0.9233797788619995, + "epoch": 0.2257531094974477, + "kl_loss": 0.13532377779483795, + "loss_ib": 0.002308964030817151, + "step": 785 + }, + { + "ce_ib": 11.174012184143066, + "ce_orig": 1.6008797883987427, + "epoch": 0.2257531094974477, + "kl_loss": 0.20560556650161743, + "loss_ib": 0.003173456760123372, + "step": 785 + }, + { + "ce_ib": 7.452459812164307, + "ce_orig": 0.6774175763130188, + "epoch": 0.2257531094974477, + "kl_loss": 0.15653352439403534, + "loss_ib": 0.002310581039637327, + "step": 785 + }, + { + "ce_ib": 4.595323085784912, + "ce_orig": 0.48615762591362, + "epoch": 0.22604069307642533, + "kl_loss": 0.10117632150650024, + "loss_ib": 0.0014712954871356487, + "step": 786 + }, + { + "ce_ib": 9.364824295043945, + "ce_orig": 1.0560548305511475, + "epoch": 0.22604069307642533, + "kl_loss": 0.16391177475452423, + "loss_ib": 0.002575600054115057, + "step": 786 + }, + { + "ce_ib": 5.813327789306641, + "ce_orig": 0.7324556708335876, + "epoch": 0.22604069307642533, + "kl_loss": 0.14763601124286652, + "loss_ib": 0.0020576927345246077, + "step": 786 + }, + { + "ce_ib": 8.11837100982666, + "ce_orig": 0.6141310334205627, + "epoch": 0.22604069307642533, + "kl_loss": 0.19747400283813477, + "loss_ib": 0.0027865769807249308, + "step": 786 + }, + { + "ce_ib": 7.725346565246582, + "ce_orig": 0.5023128390312195, + "epoch": 0.22632827665540298, + "kl_loss": 0.1939527690410614, + "loss_ib": 0.0027120623271912336, + "step": 787 + }, + { + "ce_ib": 6.31313943862915, + "ce_orig": 0.5808637738227844, + "epoch": 0.22632827665540298, + "kl_loss": 0.1817108541727066, + "loss_ib": 0.0024484223686158657, + "step": 787 + }, + { + "ce_ib": 7.112109184265137, + "ce_orig": 0.41836559772491455, + "epoch": 0.22632827665540298, + "kl_loss": 0.1097191721200943, + "loss_ib": 0.0018084024777635932, + "step": 787 + }, + { + "ce_ib": 6.42437744140625, + "ce_orig": 0.45045945048332214, + "epoch": 0.22632827665540298, + "kl_loss": 0.4161728620529175, + "loss_ib": 0.004804166033864021, + "step": 787 + }, + { + "ce_ib": 6.569514274597168, + "ce_orig": 0.7998676300048828, + "epoch": 0.22661586023438063, + "kl_loss": 0.15867647528648376, + "loss_ib": 0.002243716036900878, + "step": 788 + }, + { + "ce_ib": 11.660774230957031, + "ce_orig": 1.3734692335128784, + "epoch": 0.22661586023438063, + "kl_loss": 0.1902739703655243, + "loss_ib": 0.0030688170809298754, + "step": 788 + }, + { + "ce_ib": 11.077461242675781, + "ce_orig": 1.3770290613174438, + "epoch": 0.22661586023438063, + "kl_loss": 0.1206635981798172, + "loss_ib": 0.0023143819998949766, + "step": 788 + }, + { + "ce_ib": 4.626669406890869, + "ce_orig": 0.5129048228263855, + "epoch": 0.22661586023438063, + "kl_loss": 0.13894721865653992, + "loss_ib": 0.0018521390156820416, + "step": 788 + }, + { + "ce_ib": 9.747454643249512, + "ce_orig": 0.8806663155555725, + "epoch": 0.22690344381335825, + "kl_loss": 0.216609925031662, + "loss_ib": 0.003140844637528062, + "step": 789 + }, + { + "ce_ib": 4.014584541320801, + "ce_orig": 0.5087191462516785, + "epoch": 0.22690344381335825, + "kl_loss": 0.10901661217212677, + "loss_ib": 0.0014916245127096772, + "step": 789 + }, + { + "ce_ib": 6.954845905303955, + "ce_orig": 0.5227988362312317, + "epoch": 0.22690344381335825, + "kl_loss": 0.13825106620788574, + "loss_ib": 0.0020779951009899378, + "step": 789 + }, + { + "ce_ib": 9.023216247558594, + "ce_orig": 0.9189206957817078, + "epoch": 0.22690344381335825, + "kl_loss": 0.14568957686424255, + "loss_ib": 0.0023592172656208277, + "step": 789 + }, + { + "epoch": 0.2271910273923359, + "grad_norm": 0.09999674558639526, + "learning_rate": 4.973437198621237e-05, + "loss": 0.8786, + "step": 790 + }, + { + "ce_ib": 9.979926109313965, + "ce_orig": 0.7355432510375977, + "epoch": 0.2271910273923359, + "kl_loss": 0.16418962180614471, + "loss_ib": 0.002639888785779476, + "step": 790 + }, + { + "ce_ib": 10.314345359802246, + "ce_orig": 1.2523021697998047, + "epoch": 0.2271910273923359, + "kl_loss": 0.14147615432739258, + "loss_ib": 0.0024461960420012474, + "step": 790 + }, + { + "ce_ib": 7.049835205078125, + "ce_orig": 0.5150954723358154, + "epoch": 0.2271910273923359, + "kl_loss": 0.21324026584625244, + "loss_ib": 0.002837385982275009, + "step": 790 + }, + { + "ce_ib": 6.2608137130737305, + "ce_orig": 0.8024607300758362, + "epoch": 0.2271910273923359, + "kl_loss": 0.11846175044775009, + "loss_ib": 0.0018106987699866295, + "step": 790 + }, + { + "ce_ib": 10.44774341583252, + "ce_orig": 0.47436025738716125, + "epoch": 0.22747861097131353, + "kl_loss": 0.30650708079338074, + "loss_ib": 0.004109845031052828, + "step": 791 + }, + { + "ce_ib": 6.676301956176758, + "ce_orig": 0.7750266194343567, + "epoch": 0.22747861097131353, + "kl_loss": 0.13108739256858826, + "loss_ib": 0.001978504005819559, + "step": 791 + }, + { + "ce_ib": 9.86817455291748, + "ce_orig": 0.7998197078704834, + "epoch": 0.22747861097131353, + "kl_loss": 0.158120796084404, + "loss_ib": 0.002568025141954422, + "step": 791 + }, + { + "ce_ib": 9.6447172164917, + "ce_orig": 1.2034450769424438, + "epoch": 0.22747861097131353, + "kl_loss": 0.15354730188846588, + "loss_ib": 0.0024999447632580996, + "step": 791 + }, + { + "ce_ib": 10.113997459411621, + "ce_orig": 0.657515823841095, + "epoch": 0.22776619455029118, + "kl_loss": 0.17590703070163727, + "loss_ib": 0.0027704699896275997, + "step": 792 + }, + { + "ce_ib": 9.48293685913086, + "ce_orig": 1.4745361804962158, + "epoch": 0.22776619455029118, + "kl_loss": 0.1749359369277954, + "loss_ib": 0.0026976531371474266, + "step": 792 + }, + { + "ce_ib": 12.24776554107666, + "ce_orig": 1.7418227195739746, + "epoch": 0.22776619455029118, + "kl_loss": 0.17912819981575012, + "loss_ib": 0.0030160583555698395, + "step": 792 + }, + { + "ce_ib": 5.070426940917969, + "ce_orig": 0.35741880536079407, + "epoch": 0.22776619455029118, + "kl_loss": 0.12080200016498566, + "loss_ib": 0.001715062651783228, + "step": 792 + }, + { + "ce_ib": 10.660409927368164, + "ce_orig": 0.9658419489860535, + "epoch": 0.22805377812926883, + "kl_loss": 0.17371653020381927, + "loss_ib": 0.002803206443786621, + "step": 793 + }, + { + "ce_ib": 5.962430477142334, + "ce_orig": 0.6669458150863647, + "epoch": 0.22805377812926883, + "kl_loss": 0.16286441683769226, + "loss_ib": 0.0022248870227485895, + "step": 793 + }, + { + "ce_ib": 7.378296852111816, + "ce_orig": 0.938609778881073, + "epoch": 0.22805377812926883, + "kl_loss": 0.15764188766479492, + "loss_ib": 0.002314248587936163, + "step": 793 + }, + { + "ce_ib": 10.278300285339355, + "ce_orig": 0.8079096078872681, + "epoch": 0.22805377812926883, + "kl_loss": 0.20027652382850647, + "loss_ib": 0.003030595136806369, + "step": 793 + }, + { + "ce_ib": 3.596778154373169, + "ce_orig": 0.483516126871109, + "epoch": 0.22834136170824645, + "kl_loss": 0.4539979100227356, + "loss_ib": 0.004899656865745783, + "step": 794 + }, + { + "ce_ib": 7.560880661010742, + "ce_orig": 0.12549665570259094, + "epoch": 0.22834136170824645, + "kl_loss": 0.4688561260700226, + "loss_ib": 0.005444649141281843, + "step": 794 + }, + { + "ce_ib": 6.17995548248291, + "ce_orig": 0.4187643826007843, + "epoch": 0.22834136170824645, + "kl_loss": 0.12410400807857513, + "loss_ib": 0.001859035575762391, + "step": 794 + }, + { + "ce_ib": 9.39510726928711, + "ce_orig": 0.9322624206542969, + "epoch": 0.22834136170824645, + "kl_loss": 0.22806903719902039, + "loss_ib": 0.003220201004296541, + "step": 794 + }, + { + "epoch": 0.2286289452872241, + "grad_norm": 0.09920623153448105, + "learning_rate": 4.972870056278216e-05, + "loss": 0.8659, + "step": 795 + }, + { + "ce_ib": 8.673847198486328, + "ce_orig": 0.805489718914032, + "epoch": 0.2286289452872241, + "kl_loss": 0.12766921520233154, + "loss_ib": 0.0021440768614411354, + "step": 795 + }, + { + "ce_ib": 7.604935646057129, + "ce_orig": 0.6243016719818115, + "epoch": 0.2286289452872241, + "kl_loss": 0.19819331169128418, + "loss_ib": 0.0027424267027527094, + "step": 795 + }, + { + "ce_ib": 6.0254225730896, + "ce_orig": 0.33990374207496643, + "epoch": 0.2286289452872241, + "kl_loss": 0.10447593778371811, + "loss_ib": 0.0016473016003146768, + "step": 795 + }, + { + "ce_ib": 9.746012687683105, + "ce_orig": 1.2364401817321777, + "epoch": 0.2286289452872241, + "kl_loss": 0.12592259049415588, + "loss_ib": 0.002233827020972967, + "step": 795 + }, + { + "ce_ib": 11.734527587890625, + "ce_orig": 1.7873170375823975, + "epoch": 0.22891652886620173, + "kl_loss": 0.1425134390592575, + "loss_ib": 0.00259858719073236, + "step": 796 + }, + { + "ce_ib": 8.280077934265137, + "ce_orig": 1.111116886138916, + "epoch": 0.22891652886620173, + "kl_loss": 0.15820619463920593, + "loss_ib": 0.0024100695736706257, + "step": 796 + }, + { + "ce_ib": 7.481040000915527, + "ce_orig": 0.8186451196670532, + "epoch": 0.22891652886620173, + "kl_loss": 0.13088949024677277, + "loss_ib": 0.002056998899206519, + "step": 796 + }, + { + "ce_ib": 9.56103801727295, + "ce_orig": 0.745765209197998, + "epoch": 0.22891652886620173, + "kl_loss": 0.18324166536331177, + "loss_ib": 0.0027885204181075096, + "step": 796 + }, + { + "ce_ib": 6.580921173095703, + "ce_orig": 0.7819819450378418, + "epoch": 0.22920411244517938, + "kl_loss": 0.11691413819789886, + "loss_ib": 0.0018272333545610309, + "step": 797 + }, + { + "ce_ib": 6.933665752410889, + "ce_orig": 0.7905831336975098, + "epoch": 0.22920411244517938, + "kl_loss": 0.15403807163238525, + "loss_ib": 0.002233747160062194, + "step": 797 + }, + { + "ce_ib": 10.086153984069824, + "ce_orig": 0.8927478790283203, + "epoch": 0.22920411244517938, + "kl_loss": 0.22950385510921478, + "loss_ib": 0.0033036537934094667, + "step": 797 + }, + { + "ce_ib": 8.20946216583252, + "ce_orig": 0.8975688815116882, + "epoch": 0.22920411244517938, + "kl_loss": 0.22808901965618134, + "loss_ib": 0.003101836424320936, + "step": 797 + }, + { + "ce_ib": 6.896512985229492, + "ce_orig": 0.6491000652313232, + "epoch": 0.22949169602415703, + "kl_loss": 0.1170637458562851, + "loss_ib": 0.0018602886702865362, + "step": 798 + }, + { + "ce_ib": 5.728438377380371, + "ce_orig": 0.8358505368232727, + "epoch": 0.22949169602415703, + "kl_loss": 0.1260027289390564, + "loss_ib": 0.0018328711157664657, + "step": 798 + }, + { + "ce_ib": 5.766465187072754, + "ce_orig": 0.6195739507675171, + "epoch": 0.22949169602415703, + "kl_loss": 0.16632917523384094, + "loss_ib": 0.0022399381268769503, + "step": 798 + }, + { + "ce_ib": 4.202791690826416, + "ce_orig": 0.4293147325515747, + "epoch": 0.22949169602415703, + "kl_loss": 0.10201328992843628, + "loss_ib": 0.0014404120156541467, + "step": 798 + }, + { + "ce_ib": 10.78468132019043, + "ce_orig": 1.314720869064331, + "epoch": 0.22977927960313466, + "kl_loss": 0.08088640868663788, + "loss_ib": 0.0018873321823775768, + "step": 799 + }, + { + "ce_ib": 7.550471305847168, + "ce_orig": 0.5705544352531433, + "epoch": 0.22977927960313466, + "kl_loss": 0.17705941200256348, + "loss_ib": 0.0025256413500756025, + "step": 799 + }, + { + "ce_ib": 7.8654398918151855, + "ce_orig": 0.6908157467842102, + "epoch": 0.22977927960313466, + "kl_loss": 0.15278248488903046, + "loss_ib": 0.0023143687285482883, + "step": 799 + }, + { + "ce_ib": 9.453792572021484, + "ce_orig": 0.9354315996170044, + "epoch": 0.22977927960313466, + "kl_loss": 0.15325641632080078, + "loss_ib": 0.0024779431987553835, + "step": 799 + }, + { + "epoch": 0.2300668631821123, + "grad_norm": 0.08366382122039795, + "learning_rate": 4.972296956217265e-05, + "loss": 0.8533, + "step": 800 + }, + { + "ce_ib": 6.754378795623779, + "ce_orig": 0.6593028903007507, + "epoch": 0.2300668631821123, + "kl_loss": 0.27594077587127686, + "loss_ib": 0.0034348457120358944, + "step": 800 + }, + { + "ce_ib": 9.11864185333252, + "ce_orig": 1.0368092060089111, + "epoch": 0.2300668631821123, + "kl_loss": 0.2670667767524719, + "loss_ib": 0.0035825318191200495, + "step": 800 + }, + { + "ce_ib": 9.781817436218262, + "ce_orig": 1.2738507986068726, + "epoch": 0.2300668631821123, + "kl_loss": 0.14330054819583893, + "loss_ib": 0.0024111871607601643, + "step": 800 + }, + { + "ce_ib": 11.391901969909668, + "ce_orig": 1.4393359422683716, + "epoch": 0.2300668631821123, + "kl_loss": 0.17502886056900024, + "loss_ib": 0.002889478811994195, + "step": 800 + }, + { + "ce_ib": 5.774404525756836, + "ce_orig": 0.4819744825363159, + "epoch": 0.23035444676108993, + "kl_loss": 0.12495163083076477, + "loss_ib": 0.0018269566353410482, + "step": 801 + }, + { + "ce_ib": 8.93179702758789, + "ce_orig": 0.9988149404525757, + "epoch": 0.23035444676108993, + "kl_loss": 0.15840879082679749, + "loss_ib": 0.0024772675242275, + "step": 801 + }, + { + "ce_ib": 6.787378787994385, + "ce_orig": 0.6689311861991882, + "epoch": 0.23035444676108993, + "kl_loss": 0.12284412235021591, + "loss_ib": 0.0019071790156885982, + "step": 801 + }, + { + "ce_ib": 6.773270130157471, + "ce_orig": 0.8712309002876282, + "epoch": 0.23035444676108993, + "kl_loss": 0.1521710455417633, + "loss_ib": 0.0021990372333675623, + "step": 801 + }, + { + "ce_ib": 11.580021858215332, + "ce_orig": 0.9791839122772217, + "epoch": 0.23064203034006758, + "kl_loss": 0.12627840042114258, + "loss_ib": 0.0024207860697060823, + "step": 802 + }, + { + "ce_ib": 9.938983917236328, + "ce_orig": 1.2283821105957031, + "epoch": 0.23064203034006758, + "kl_loss": 0.27586644887924194, + "loss_ib": 0.00375256291590631, + "step": 802 + }, + { + "ce_ib": 7.058392524719238, + "ce_orig": 0.5756127834320068, + "epoch": 0.23064203034006758, + "kl_loss": 0.1477137804031372, + "loss_ib": 0.0021829770412296057, + "step": 802 + }, + { + "ce_ib": 3.1069231033325195, + "ce_orig": 0.1339166760444641, + "epoch": 0.23064203034006758, + "kl_loss": 0.3081814646720886, + "loss_ib": 0.0033925068564713, + "step": 802 + }, + { + "ce_ib": 10.960715293884277, + "ce_orig": 1.3200205564498901, + "epoch": 0.23092961391904523, + "kl_loss": 0.16258013248443604, + "loss_ib": 0.002721872879192233, + "step": 803 + }, + { + "ce_ib": 9.877068519592285, + "ce_orig": 0.7766249179840088, + "epoch": 0.23092961391904523, + "kl_loss": 0.11411984264850616, + "loss_ib": 0.0021289053838700056, + "step": 803 + }, + { + "ce_ib": 8.853331565856934, + "ce_orig": 1.3422200679779053, + "epoch": 0.23092961391904523, + "kl_loss": 0.2864736318588257, + "loss_ib": 0.003750069299712777, + "step": 803 + }, + { + "ce_ib": 6.893815994262695, + "ce_orig": 0.8211920261383057, + "epoch": 0.23092961391904523, + "kl_loss": 0.1476418375968933, + "loss_ib": 0.0021657999604940414, + "step": 803 + }, + { + "ce_ib": 5.529889106750488, + "ce_orig": 0.5488899946212769, + "epoch": 0.23121719749802286, + "kl_loss": 0.09762927889823914, + "loss_ib": 0.0015292817261070013, + "step": 804 + }, + { + "ce_ib": 7.026906967163086, + "ce_orig": 0.7769794464111328, + "epoch": 0.23121719749802286, + "kl_loss": 0.09977978467941284, + "loss_ib": 0.001700488617643714, + "step": 804 + }, + { + "ce_ib": 4.3003740310668945, + "ce_orig": 0.3249621093273163, + "epoch": 0.23121719749802286, + "kl_loss": 0.32431769371032715, + "loss_ib": 0.0036732142325490713, + "step": 804 + }, + { + "ce_ib": 8.477049827575684, + "ce_orig": 0.7804782390594482, + "epoch": 0.23121719749802286, + "kl_loss": 0.10728403180837631, + "loss_ib": 0.0019205452408641577, + "step": 804 + }, + { + "epoch": 0.2315047810770005, + "grad_norm": 0.0931197851896286, + "learning_rate": 4.971717899819113e-05, + "loss": 0.8871, + "step": 805 + }, + { + "ce_ib": 9.26341438293457, + "ce_orig": 1.0961662530899048, + "epoch": 0.2315047810770005, + "kl_loss": 0.17699837684631348, + "loss_ib": 0.002696325071156025, + "step": 805 + }, + { + "ce_ib": 9.548799514770508, + "ce_orig": 1.2269480228424072, + "epoch": 0.2315047810770005, + "kl_loss": 0.18628443777561188, + "loss_ib": 0.0028177243657410145, + "step": 805 + }, + { + "ce_ib": 9.232553482055664, + "ce_orig": 1.10349440574646, + "epoch": 0.2315047810770005, + "kl_loss": 0.17955434322357178, + "loss_ib": 0.002718798816204071, + "step": 805 + }, + { + "ce_ib": 9.328289985656738, + "ce_orig": 1.420828938484192, + "epoch": 0.2315047810770005, + "kl_loss": 0.1031360924243927, + "loss_ib": 0.0019641900435090065, + "step": 805 + }, + { + "ce_ib": 8.62269401550293, + "ce_orig": 0.6604496836662292, + "epoch": 0.23179236465597813, + "kl_loss": 0.14411967992782593, + "loss_ib": 0.0023034662008285522, + "step": 806 + }, + { + "ce_ib": 11.635430335998535, + "ce_orig": 1.4418182373046875, + "epoch": 0.23179236465597813, + "kl_loss": 0.14442530274391174, + "loss_ib": 0.0026077961083501577, + "step": 806 + }, + { + "ce_ib": 10.606904983520508, + "ce_orig": 1.4939662218093872, + "epoch": 0.23179236465597813, + "kl_loss": 0.10982929170131683, + "loss_ib": 0.002158983377739787, + "step": 806 + }, + { + "ce_ib": 9.094931602478027, + "ce_orig": 1.313065528869629, + "epoch": 0.23179236465597813, + "kl_loss": 0.11446712166070938, + "loss_ib": 0.002054164418950677, + "step": 806 + }, + { + "ce_ib": 8.538125038146973, + "ce_orig": 1.2573816776275635, + "epoch": 0.23207994823495579, + "kl_loss": 0.15647096931934357, + "loss_ib": 0.0024185222573578358, + "step": 807 + }, + { + "ce_ib": 6.605203151702881, + "ce_orig": 0.6826567053794861, + "epoch": 0.23207994823495579, + "kl_loss": 0.14014863967895508, + "loss_ib": 0.0020620066206902266, + "step": 807 + }, + { + "ce_ib": 8.780204772949219, + "ce_orig": 1.2285255193710327, + "epoch": 0.23207994823495579, + "kl_loss": 0.10112646222114563, + "loss_ib": 0.0018892850494012237, + "step": 807 + }, + { + "ce_ib": 8.27536392211914, + "ce_orig": 0.528601884841919, + "epoch": 0.23207994823495579, + "kl_loss": 0.2035016268491745, + "loss_ib": 0.002862552646547556, + "step": 807 + }, + { + "ce_ib": 6.892450332641602, + "ce_orig": 0.8363218903541565, + "epoch": 0.23236753181393344, + "kl_loss": 0.14483782649040222, + "loss_ib": 0.0021376232616603374, + "step": 808 + }, + { + "ce_ib": 9.606917381286621, + "ce_orig": 1.0296199321746826, + "epoch": 0.23236753181393344, + "kl_loss": 0.2421770691871643, + "loss_ib": 0.003382462076842785, + "step": 808 + }, + { + "ce_ib": 8.105813026428223, + "ce_orig": 0.7836432456970215, + "epoch": 0.23236753181393344, + "kl_loss": 0.1529703140258789, + "loss_ib": 0.0023402844090014696, + "step": 808 + }, + { + "ce_ib": 9.704617500305176, + "ce_orig": 1.174484372138977, + "epoch": 0.23236753181393344, + "kl_loss": 0.13153155148029327, + "loss_ib": 0.002285777358338237, + "step": 808 + }, + { + "ce_ib": 6.115880012512207, + "ce_orig": 0.7601160407066345, + "epoch": 0.23265511539291106, + "kl_loss": 0.10592241585254669, + "loss_ib": 0.0016708120238035917, + "step": 809 + }, + { + "ce_ib": 6.771764278411865, + "ce_orig": 0.5327339172363281, + "epoch": 0.23265511539291106, + "kl_loss": 0.20717453956604004, + "loss_ib": 0.0027489217463880777, + "step": 809 + }, + { + "ce_ib": 11.804062843322754, + "ce_orig": 0.6770117282867432, + "epoch": 0.23265511539291106, + "kl_loss": 0.11569809913635254, + "loss_ib": 0.002337387064471841, + "step": 809 + }, + { + "ce_ib": 8.106520652770996, + "ce_orig": 1.036596655845642, + "epoch": 0.23265511539291106, + "kl_loss": 0.13593992590904236, + "loss_ib": 0.0021700512152165174, + "step": 809 + }, + { + "epoch": 0.2329426989718887, + "grad_norm": 0.10620737075805664, + "learning_rate": 4.9711328884788434e-05, + "loss": 0.8885, + "step": 810 + }, + { + "ce_ib": 9.17796802520752, + "ce_orig": 1.0972896814346313, + "epoch": 0.2329426989718887, + "kl_loss": 0.13107869029045105, + "loss_ib": 0.002228583674877882, + "step": 810 + }, + { + "ce_ib": 10.562804222106934, + "ce_orig": 0.9702454209327698, + "epoch": 0.2329426989718887, + "kl_loss": 0.14495953917503357, + "loss_ib": 0.0025058756582438946, + "step": 810 + }, + { + "ce_ib": 6.212893962860107, + "ce_orig": 0.783852219581604, + "epoch": 0.2329426989718887, + "kl_loss": 0.1454237699508667, + "loss_ib": 0.002075526863336563, + "step": 810 + }, + { + "ce_ib": 11.819994926452637, + "ce_orig": 1.4107239246368408, + "epoch": 0.2329426989718887, + "kl_loss": 0.1865510791540146, + "loss_ib": 0.0030475102830678225, + "step": 810 + }, + { + "ce_ib": 6.329524517059326, + "ce_orig": 0.6930771470069885, + "epoch": 0.23323028255086634, + "kl_loss": 0.14692606031894684, + "loss_ib": 0.002102212980389595, + "step": 811 + }, + { + "ce_ib": 7.555665969848633, + "ce_orig": 1.0139789581298828, + "epoch": 0.23323028255086634, + "kl_loss": 0.11456457525491714, + "loss_ib": 0.0019012122647836804, + "step": 811 + }, + { + "ce_ib": 6.150085926055908, + "ce_orig": 0.5341890454292297, + "epoch": 0.23323028255086634, + "kl_loss": 0.19206318259239197, + "loss_ib": 0.002535640262067318, + "step": 811 + }, + { + "ce_ib": 9.82231330871582, + "ce_orig": 0.7214909195899963, + "epoch": 0.23323028255086634, + "kl_loss": 0.08180269598960876, + "loss_ib": 0.0018002580618485808, + "step": 811 + }, + { + "ce_ib": 10.72133731842041, + "ce_orig": 1.3293489217758179, + "epoch": 0.233517866129844, + "kl_loss": 0.1370580643415451, + "loss_ib": 0.0024427142925560474, + "step": 812 + }, + { + "ce_ib": 7.326596736907959, + "ce_orig": 1.0470627546310425, + "epoch": 0.233517866129844, + "kl_loss": 0.1406283676624298, + "loss_ib": 0.002138943411409855, + "step": 812 + }, + { + "ce_ib": 12.790474891662598, + "ce_orig": 1.9125604629516602, + "epoch": 0.233517866129844, + "kl_loss": 0.17688024044036865, + "loss_ib": 0.00304784975014627, + "step": 812 + }, + { + "ce_ib": 6.637095928192139, + "ce_orig": 0.4109753668308258, + "epoch": 0.233517866129844, + "kl_loss": 0.1006464958190918, + "loss_ib": 0.0016701745335012674, + "step": 812 + }, + { + "ce_ib": 9.163846015930176, + "ce_orig": 1.499794363975525, + "epoch": 0.23380544970882164, + "kl_loss": 0.13409070670604706, + "loss_ib": 0.002257291693240404, + "step": 813 + }, + { + "ce_ib": 7.725475311279297, + "ce_orig": 0.6007325649261475, + "epoch": 0.23380544970882164, + "kl_loss": 0.11577519029378891, + "loss_ib": 0.001930299331434071, + "step": 813 + }, + { + "ce_ib": 10.204571723937988, + "ce_orig": 1.0934782028198242, + "epoch": 0.23380544970882164, + "kl_loss": 0.11761227995157242, + "loss_ib": 0.0021965799387544394, + "step": 813 + }, + { + "ce_ib": 6.080852031707764, + "ce_orig": 0.566545844078064, + "epoch": 0.23380544970882164, + "kl_loss": 0.07402099668979645, + "loss_ib": 0.0013482951326295733, + "step": 813 + }, + { + "ce_ib": 7.153719425201416, + "ce_orig": 0.8980187773704529, + "epoch": 0.23409303328779926, + "kl_loss": 0.10498209297657013, + "loss_ib": 0.0017651927191764116, + "step": 814 + }, + { + "ce_ib": 6.541371822357178, + "ce_orig": 0.9704118371009827, + "epoch": 0.23409303328779926, + "kl_loss": 0.09279821068048477, + "loss_ib": 0.0015821191482245922, + "step": 814 + }, + { + "ce_ib": 6.066292762756348, + "ce_orig": 0.668864905834198, + "epoch": 0.23409303328779926, + "kl_loss": 0.10827763378620148, + "loss_ib": 0.0016894055297598243, + "step": 814 + }, + { + "ce_ib": 7.872057914733887, + "ce_orig": 0.6739667654037476, + "epoch": 0.23409303328779926, + "kl_loss": 0.11645969748497009, + "loss_ib": 0.0019518026383593678, + "step": 814 + }, + { + "epoch": 0.23438061686677691, + "grad_norm": 0.11317208409309387, + "learning_rate": 4.9705419236058825e-05, + "loss": 0.9053, + "step": 815 + }, + { + "ce_ib": 7.275731086730957, + "ce_orig": 0.5088436007499695, + "epoch": 0.23438061686677691, + "kl_loss": 0.1330629289150238, + "loss_ib": 0.0020582021679729223, + "step": 815 + }, + { + "ce_ib": 11.397271156311035, + "ce_orig": 0.6790756583213806, + "epoch": 0.23438061686677691, + "kl_loss": 0.18960818648338318, + "loss_ib": 0.0030358086805790663, + "step": 815 + }, + { + "ce_ib": 7.752520561218262, + "ce_orig": 0.5414913892745972, + "epoch": 0.23438061686677691, + "kl_loss": 0.16944292187690735, + "loss_ib": 0.002469681203365326, + "step": 815 + }, + { + "ce_ib": 6.504335403442383, + "ce_orig": 0.5197587609291077, + "epoch": 0.23438061686677691, + "kl_loss": 0.10620582848787308, + "loss_ib": 0.001712491735816002, + "step": 815 + }, + { + "ce_ib": 6.495504856109619, + "ce_orig": 1.07676362991333, + "epoch": 0.23466820044575454, + "kl_loss": 0.09302103519439697, + "loss_ib": 0.001579760923050344, + "step": 816 + }, + { + "ce_ib": 6.134542942047119, + "ce_orig": 1.04434335231781, + "epoch": 0.23466820044575454, + "kl_loss": 0.08342786133289337, + "loss_ib": 0.0014477329095825553, + "step": 816 + }, + { + "ce_ib": 6.335164546966553, + "ce_orig": 0.625989556312561, + "epoch": 0.23466820044575454, + "kl_loss": 0.14174768328666687, + "loss_ib": 0.002050993265584111, + "step": 816 + }, + { + "ce_ib": 10.488992691040039, + "ce_orig": 1.2239924669265747, + "epoch": 0.23466820044575454, + "kl_loss": 0.1575402021408081, + "loss_ib": 0.002624301239848137, + "step": 816 + }, + { + "ce_ib": 6.741306781768799, + "ce_orig": 0.8143828511238098, + "epoch": 0.2349557840247322, + "kl_loss": 0.14506280422210693, + "loss_ib": 0.00212475867010653, + "step": 817 + }, + { + "ce_ib": 5.542083740234375, + "ce_orig": 0.7346311211585999, + "epoch": 0.2349557840247322, + "kl_loss": 0.083269402384758, + "loss_ib": 0.0013869022950530052, + "step": 817 + }, + { + "ce_ib": 7.235450267791748, + "ce_orig": 0.6082554459571838, + "epoch": 0.2349557840247322, + "kl_loss": 0.16868101060390472, + "loss_ib": 0.002410355256870389, + "step": 817 + }, + { + "ce_ib": 6.437801837921143, + "ce_orig": 0.3926742672920227, + "epoch": 0.2349557840247322, + "kl_loss": 0.32581275701522827, + "loss_ib": 0.0039019077084958553, + "step": 817 + }, + { + "ce_ib": 7.426485061645508, + "ce_orig": 0.40766289830207825, + "epoch": 0.23524336760370984, + "kl_loss": 0.1242557093501091, + "loss_ib": 0.0019852055702358484, + "step": 818 + }, + { + "ce_ib": 7.13580846786499, + "ce_orig": 0.6141798496246338, + "epoch": 0.23524336760370984, + "kl_loss": 0.14575709402561188, + "loss_ib": 0.0021711518056690693, + "step": 818 + }, + { + "ce_ib": 7.478182315826416, + "ce_orig": 0.9550573825836182, + "epoch": 0.23524336760370984, + "kl_loss": 0.134103924036026, + "loss_ib": 0.0020888573490083218, + "step": 818 + }, + { + "ce_ib": 9.941823959350586, + "ce_orig": 1.12644362449646, + "epoch": 0.23524336760370984, + "kl_loss": 0.19895997643470764, + "loss_ib": 0.002983782207593322, + "step": 818 + }, + { + "ce_ib": 9.203929901123047, + "ce_orig": 1.0872249603271484, + "epoch": 0.23553095118268746, + "kl_loss": 0.19965161383152008, + "loss_ib": 0.0029169090557843447, + "step": 819 + }, + { + "ce_ib": 8.64352035522461, + "ce_orig": 1.0518972873687744, + "epoch": 0.23553095118268746, + "kl_loss": 0.10414116084575653, + "loss_ib": 0.0019057635217905045, + "step": 819 + }, + { + "ce_ib": 2.9574663639068604, + "ce_orig": 0.15662173926830292, + "epoch": 0.23553095118268746, + "kl_loss": 0.2213229537010193, + "loss_ib": 0.0025089760310947895, + "step": 819 + }, + { + "ce_ib": 9.636175155639648, + "ce_orig": 1.2638198137283325, + "epoch": 0.23553095118268746, + "kl_loss": 0.28710055351257324, + "loss_ib": 0.003834622912108898, + "step": 819 + }, + { + "epoch": 0.23581853476166512, + "grad_norm": 0.09467697888612747, + "learning_rate": 4.969945006624003e-05, + "loss": 0.8118, + "step": 820 + }, + { + "ce_ib": 6.223392963409424, + "ce_orig": 0.4875457286834717, + "epoch": 0.23581853476166512, + "kl_loss": 0.09896323084831238, + "loss_ib": 0.001611971529200673, + "step": 820 + }, + { + "ce_ib": 8.920371055603027, + "ce_orig": 1.074205756187439, + "epoch": 0.23581853476166512, + "kl_loss": 0.09543949365615845, + "loss_ib": 0.0018464321037754416, + "step": 820 + }, + { + "ce_ib": 7.298362731933594, + "ce_orig": 0.6542941331863403, + "epoch": 0.23581853476166512, + "kl_loss": 0.1842930018901825, + "loss_ib": 0.0025727662723511457, + "step": 820 + }, + { + "ce_ib": 4.780843734741211, + "ce_orig": 0.4712677597999573, + "epoch": 0.23581853476166512, + "kl_loss": 0.09687883406877518, + "loss_ib": 0.0014468726003542542, + "step": 820 + }, + { + "ce_ib": 13.902252197265625, + "ce_orig": 1.981177806854248, + "epoch": 0.23610611834064274, + "kl_loss": 0.21506190299987793, + "loss_ib": 0.003540844190865755, + "step": 821 + }, + { + "ce_ib": 4.231637001037598, + "ce_orig": 0.4178099036216736, + "epoch": 0.23610611834064274, + "kl_loss": 0.12692990899085999, + "loss_ib": 0.0016924628289416432, + "step": 821 + }, + { + "ce_ib": 9.162365913391113, + "ce_orig": 0.6495018005371094, + "epoch": 0.23610611834064274, + "kl_loss": 0.1748168021440506, + "loss_ib": 0.0026644044555723667, + "step": 821 + }, + { + "ce_ib": 6.503030776977539, + "ce_orig": 0.7990416288375854, + "epoch": 0.23610611834064274, + "kl_loss": 0.143586203455925, + "loss_ib": 0.002086165128275752, + "step": 821 + }, + { + "ce_ib": 7.101401329040527, + "ce_orig": 0.7612082958221436, + "epoch": 0.2363937019196204, + "kl_loss": 0.15823988616466522, + "loss_ib": 0.002292538760229945, + "step": 822 + }, + { + "ce_ib": 5.6985883712768555, + "ce_orig": 0.7191292643547058, + "epoch": 0.2363937019196204, + "kl_loss": 0.10908360779285431, + "loss_ib": 0.0016606948338449001, + "step": 822 + }, + { + "ce_ib": 5.306804656982422, + "ce_orig": 0.5981192588806152, + "epoch": 0.2363937019196204, + "kl_loss": 0.12700998783111572, + "loss_ib": 0.0018007803009822965, + "step": 822 + }, + { + "ce_ib": 6.363889694213867, + "ce_orig": 0.5744786858558655, + "epoch": 0.2363937019196204, + "kl_loss": 0.09524562954902649, + "loss_ib": 0.0015888451598584652, + "step": 822 + }, + { + "ce_ib": 14.43346118927002, + "ce_orig": 0.6333121061325073, + "epoch": 0.23668128549859804, + "kl_loss": 0.18850618600845337, + "loss_ib": 0.003328407881781459, + "step": 823 + }, + { + "ce_ib": 4.770277500152588, + "ce_orig": 0.6905955076217651, + "epoch": 0.23668128549859804, + "kl_loss": 0.10428404062986374, + "loss_ib": 0.0015198680339381099, + "step": 823 + }, + { + "ce_ib": 6.385842323303223, + "ce_orig": 0.5164174437522888, + "epoch": 0.23668128549859804, + "kl_loss": 0.17304900288581848, + "loss_ib": 0.0023690741509199142, + "step": 823 + }, + { + "ce_ib": 7.28901481628418, + "ce_orig": 0.8119218349456787, + "epoch": 0.23668128549859804, + "kl_loss": 0.11210718750953674, + "loss_ib": 0.0018499733414500952, + "step": 823 + }, + { + "ce_ib": 7.684753894805908, + "ce_orig": 0.4133395552635193, + "epoch": 0.23696886907757567, + "kl_loss": 0.24416936933994293, + "loss_ib": 0.003210169030353427, + "step": 824 + }, + { + "ce_ib": 6.748510360717773, + "ce_orig": 0.4567016363143921, + "epoch": 0.23696886907757567, + "kl_loss": 0.19947892427444458, + "loss_ib": 0.0026696401182562113, + "step": 824 + }, + { + "ce_ib": 9.122523307800293, + "ce_orig": 1.1772557497024536, + "epoch": 0.23696886907757567, + "kl_loss": 0.10364347696304321, + "loss_ib": 0.0019486871315166354, + "step": 824 + }, + { + "ce_ib": 8.981727600097656, + "ce_orig": 1.1669374704360962, + "epoch": 0.23696886907757567, + "kl_loss": 0.18251746892929077, + "loss_ib": 0.002723347395658493, + "step": 824 + }, + { + "epoch": 0.23725645265655332, + "grad_norm": 0.1238342672586441, + "learning_rate": 4.9693421389713156e-05, + "loss": 0.8523, + "step": 825 + }, + { + "ce_ib": 8.044266700744629, + "ce_orig": 1.2204943895339966, + "epoch": 0.23725645265655332, + "kl_loss": 0.13153710961341858, + "loss_ib": 0.002119797747582197, + "step": 825 + }, + { + "ce_ib": 5.693978786468506, + "ce_orig": 0.6213144063949585, + "epoch": 0.23725645265655332, + "kl_loss": 0.14717428386211395, + "loss_ib": 0.002041140804067254, + "step": 825 + }, + { + "ce_ib": 8.591774940490723, + "ce_orig": 1.2455124855041504, + "epoch": 0.23725645265655332, + "kl_loss": 0.11206641793251038, + "loss_ib": 0.0019798416178673506, + "step": 825 + }, + { + "ce_ib": 9.281439781188965, + "ce_orig": 0.8546298742294312, + "epoch": 0.23725645265655332, + "kl_loss": 0.2340502291917801, + "loss_ib": 0.0032686463091522455, + "step": 825 + }, + { + "ce_ib": 9.022857666015625, + "ce_orig": 1.0518991947174072, + "epoch": 0.23754403623553094, + "kl_loss": 0.11388400197029114, + "loss_ib": 0.0020411256700754166, + "step": 826 + }, + { + "ce_ib": 9.080761909484863, + "ce_orig": 0.9453988075256348, + "epoch": 0.23754403623553094, + "kl_loss": 0.13348603248596191, + "loss_ib": 0.002242936519905925, + "step": 826 + }, + { + "ce_ib": 9.81284236907959, + "ce_orig": 1.3243674039840698, + "epoch": 0.23754403623553094, + "kl_loss": 0.13746213912963867, + "loss_ib": 0.002355905482545495, + "step": 826 + }, + { + "ce_ib": 11.309003829956055, + "ce_orig": 1.3904995918273926, + "epoch": 0.23754403623553094, + "kl_loss": 0.13674914836883545, + "loss_ib": 0.0024983917828649282, + "step": 826 + }, + { + "ce_ib": 9.344799041748047, + "ce_orig": 1.560760259628296, + "epoch": 0.2378316198145086, + "kl_loss": 0.11014031618833542, + "loss_ib": 0.0020358830224722624, + "step": 827 + }, + { + "ce_ib": 5.963412761688232, + "ce_orig": 0.8929498791694641, + "epoch": 0.2378316198145086, + "kl_loss": 0.10516968369483948, + "loss_ib": 0.001648038043640554, + "step": 827 + }, + { + "ce_ib": 7.594840049743652, + "ce_orig": 0.8408666253089905, + "epoch": 0.2378316198145086, + "kl_loss": 0.09780211001634598, + "loss_ib": 0.0017375051975250244, + "step": 827 + }, + { + "ce_ib": 9.385004997253418, + "ce_orig": 1.3767368793487549, + "epoch": 0.2378316198145086, + "kl_loss": 0.17203593254089355, + "loss_ib": 0.0026588598266243935, + "step": 827 + }, + { + "ce_ib": 12.047724723815918, + "ce_orig": 1.5393359661102295, + "epoch": 0.23811920339348625, + "kl_loss": 0.11752515286207199, + "loss_ib": 0.002380023943260312, + "step": 828 + }, + { + "ce_ib": 7.751171112060547, + "ce_orig": 0.89743971824646, + "epoch": 0.23811920339348625, + "kl_loss": 0.15309442579746246, + "loss_ib": 0.0023060613311827183, + "step": 828 + }, + { + "ce_ib": 7.59684419631958, + "ce_orig": 0.5676822066307068, + "epoch": 0.23811920339348625, + "kl_loss": 0.1733154058456421, + "loss_ib": 0.00249283853918314, + "step": 828 + }, + { + "ce_ib": 10.951656341552734, + "ce_orig": 1.7123820781707764, + "epoch": 0.23811920339348625, + "kl_loss": 0.15858086943626404, + "loss_ib": 0.0026809743139892817, + "step": 828 + }, + { + "ce_ib": 6.502542972564697, + "ce_orig": 0.8747706413269043, + "epoch": 0.23840678697246387, + "kl_loss": 0.09269970655441284, + "loss_ib": 0.001577251241542399, + "step": 829 + }, + { + "ce_ib": 6.223927021026611, + "ce_orig": 0.6755560040473938, + "epoch": 0.23840678697246387, + "kl_loss": 0.12796303629875183, + "loss_ib": 0.001902023097500205, + "step": 829 + }, + { + "ce_ib": 5.137899875640869, + "ce_orig": 0.7314615249633789, + "epoch": 0.23840678697246387, + "kl_loss": 0.08606921136379242, + "loss_ib": 0.0013744820607826114, + "step": 829 + }, + { + "ce_ib": 7.8740620613098145, + "ce_orig": 0.7106395959854126, + "epoch": 0.23840678697246387, + "kl_loss": 0.11989139020442963, + "loss_ib": 0.001986319897696376, + "step": 829 + }, + { + "epoch": 0.23869437055144152, + "grad_norm": 0.11228325217962265, + "learning_rate": 4.96873332210027e-05, + "loss": 0.8861, + "step": 830 + }, + { + "ce_ib": 4.807431697845459, + "ce_orig": 0.8043792247772217, + "epoch": 0.23869437055144152, + "kl_loss": 0.07121051847934723, + "loss_ib": 0.0011928483145311475, + "step": 830 + }, + { + "ce_ib": 12.73621940612793, + "ce_orig": 0.9692448973655701, + "epoch": 0.23869437055144152, + "kl_loss": 0.1215188279747963, + "loss_ib": 0.0024888101033866405, + "step": 830 + }, + { + "ce_ib": 7.984679222106934, + "ce_orig": 1.0848784446716309, + "epoch": 0.23869437055144152, + "kl_loss": 0.15019136667251587, + "loss_ib": 0.0023003816604614258, + "step": 830 + }, + { + "ce_ib": 10.544111251831055, + "ce_orig": 1.18326997756958, + "epoch": 0.23869437055144152, + "kl_loss": 0.18155136704444885, + "loss_ib": 0.0028699247632175684, + "step": 830 + }, + { + "ce_ib": 5.490835189819336, + "ce_orig": 0.4462292790412903, + "epoch": 0.23898195413041914, + "kl_loss": 0.1400134116411209, + "loss_ib": 0.001949217519722879, + "step": 831 + }, + { + "ce_ib": 8.376250267028809, + "ce_orig": 0.8037113547325134, + "epoch": 0.23898195413041914, + "kl_loss": 0.13274028897285461, + "loss_ib": 0.002165027894079685, + "step": 831 + }, + { + "ce_ib": 9.137575149536133, + "ce_orig": 0.9506906867027283, + "epoch": 0.23898195413041914, + "kl_loss": 0.12928739190101624, + "loss_ib": 0.002206631237640977, + "step": 831 + }, + { + "ce_ib": 8.357852935791016, + "ce_orig": 1.6927680969238281, + "epoch": 0.23898195413041914, + "kl_loss": 0.16599999368190765, + "loss_ib": 0.002495785243809223, + "step": 831 + }, + { + "ce_ib": 13.266925811767578, + "ce_orig": 1.9612349271774292, + "epoch": 0.2392695377093968, + "kl_loss": 0.16674180328845978, + "loss_ib": 0.0029941105749458075, + "step": 832 + }, + { + "ce_ib": 5.552142143249512, + "ce_orig": 0.7127636671066284, + "epoch": 0.2392695377093968, + "kl_loss": 0.16101816296577454, + "loss_ib": 0.0021653957664966583, + "step": 832 + }, + { + "ce_ib": 7.777743339538574, + "ce_orig": 0.6769495010375977, + "epoch": 0.2392695377093968, + "kl_loss": 0.15848492085933685, + "loss_ib": 0.00236262334510684, + "step": 832 + }, + { + "ce_ib": 8.940630912780762, + "ce_orig": 1.3365821838378906, + "epoch": 0.2392695377093968, + "kl_loss": 0.20301774144172668, + "loss_ib": 0.0029242404270917177, + "step": 832 + }, + { + "ce_ib": 9.560267448425293, + "ce_orig": 0.9713718891143799, + "epoch": 0.23955712128837445, + "kl_loss": 0.13948719203472137, + "loss_ib": 0.0023508986923843622, + "step": 833 + }, + { + "ce_ib": 8.09939193725586, + "ce_orig": 0.9209976196289062, + "epoch": 0.23955712128837445, + "kl_loss": 0.1723610758781433, + "loss_ib": 0.0025335499085485935, + "step": 833 + }, + { + "ce_ib": 5.985219478607178, + "ce_orig": 0.7876350283622742, + "epoch": 0.23955712128837445, + "kl_loss": 0.06713362783193588, + "loss_ib": 0.0012698580976575613, + "step": 833 + }, + { + "ce_ib": 7.734393119812012, + "ce_orig": 0.9870476126670837, + "epoch": 0.23955712128837445, + "kl_loss": 0.17694149911403656, + "loss_ib": 0.0025428542867302895, + "step": 833 + }, + { + "ce_ib": 7.501764297485352, + "ce_orig": 0.9713405966758728, + "epoch": 0.23984470486735207, + "kl_loss": 0.11347226798534393, + "loss_ib": 0.0018848991021513939, + "step": 834 + }, + { + "ce_ib": 5.259660243988037, + "ce_orig": 0.8085374236106873, + "epoch": 0.23984470486735207, + "kl_loss": 0.07916925847530365, + "loss_ib": 0.0013176585780456662, + "step": 834 + }, + { + "ce_ib": 10.007915496826172, + "ce_orig": 1.2824541330337524, + "epoch": 0.23984470486735207, + "kl_loss": 0.15162745118141174, + "loss_ib": 0.002517065964639187, + "step": 834 + }, + { + "ce_ib": 5.082655906677246, + "ce_orig": 0.43925535678863525, + "epoch": 0.23984470486735207, + "kl_loss": 0.24141454696655273, + "loss_ib": 0.0029224108438938856, + "step": 834 + }, + { + "epoch": 0.24013228844632972, + "grad_norm": 0.0987640768289566, + "learning_rate": 4.9681185574776446e-05, + "loss": 0.8962, + "step": 835 + }, + { + "ce_ib": 7.371253967285156, + "ce_orig": 1.0186665058135986, + "epoch": 0.24013228844632972, + "kl_loss": 0.1406937539577484, + "loss_ib": 0.0021440626587718725, + "step": 835 + }, + { + "ce_ib": 5.862243175506592, + "ce_orig": 0.4793251156806946, + "epoch": 0.24013228844632972, + "kl_loss": 0.11569086462259293, + "loss_ib": 0.0017431328305974603, + "step": 835 + }, + { + "ce_ib": 7.355347633361816, + "ce_orig": 0.7462732791900635, + "epoch": 0.24013228844632972, + "kl_loss": 0.18415525555610657, + "loss_ib": 0.0025770871434360743, + "step": 835 + }, + { + "ce_ib": 5.006382465362549, + "ce_orig": 0.30985262989997864, + "epoch": 0.24013228844632972, + "kl_loss": 0.1356092095375061, + "loss_ib": 0.0018567302031442523, + "step": 835 + }, + { + "ce_ib": 6.978796482086182, + "ce_orig": 0.8654583692550659, + "epoch": 0.24041987202530735, + "kl_loss": 0.10511042922735214, + "loss_ib": 0.001748983864672482, + "step": 836 + }, + { + "ce_ib": 8.150212287902832, + "ce_orig": 0.4995287358760834, + "epoch": 0.24041987202530735, + "kl_loss": 0.2076493203639984, + "loss_ib": 0.002891514217481017, + "step": 836 + }, + { + "ce_ib": 11.759064674377441, + "ce_orig": 1.6323509216308594, + "epoch": 0.24041987202530735, + "kl_loss": 0.19571126997470856, + "loss_ib": 0.0031330191995948553, + "step": 836 + }, + { + "ce_ib": 11.017194747924805, + "ce_orig": 1.2573686838150024, + "epoch": 0.24041987202530735, + "kl_loss": 0.12838904559612274, + "loss_ib": 0.002385609783232212, + "step": 836 + }, + { + "ce_ib": 2.2583088874816895, + "ce_orig": 0.16112041473388672, + "epoch": 0.240707455604285, + "kl_loss": 0.3453638553619385, + "loss_ib": 0.003679469460621476, + "step": 837 + }, + { + "ce_ib": 10.247907638549805, + "ce_orig": 1.4024429321289062, + "epoch": 0.240707455604285, + "kl_loss": 0.1429089903831482, + "loss_ib": 0.0024538806173950434, + "step": 837 + }, + { + "ce_ib": 5.830674648284912, + "ce_orig": 0.6212292909622192, + "epoch": 0.240707455604285, + "kl_loss": 0.15315499901771545, + "loss_ib": 0.0021146174985915422, + "step": 837 + }, + { + "ce_ib": 4.623894691467285, + "ce_orig": 0.4536569118499756, + "epoch": 0.240707455604285, + "kl_loss": 0.1346922367811203, + "loss_ib": 0.0018093117978423834, + "step": 837 + }, + { + "ce_ib": 8.1456937789917, + "ce_orig": 1.164689540863037, + "epoch": 0.24099503918326265, + "kl_loss": 0.10320104658603668, + "loss_ib": 0.00184657983481884, + "step": 838 + }, + { + "ce_ib": 6.376285552978516, + "ce_orig": 0.7863490581512451, + "epoch": 0.24099503918326265, + "kl_loss": 0.13553473353385925, + "loss_ib": 0.001992975827306509, + "step": 838 + }, + { + "ce_ib": 4.726283073425293, + "ce_orig": 0.357181191444397, + "epoch": 0.24099503918326265, + "kl_loss": 0.17762082815170288, + "loss_ib": 0.002248836448416114, + "step": 838 + }, + { + "ce_ib": 5.20059871673584, + "ce_orig": 0.41502946615219116, + "epoch": 0.24099503918326265, + "kl_loss": 0.08028832077980042, + "loss_ib": 0.001322943135164678, + "step": 838 + }, + { + "ce_ib": 5.245758533477783, + "ce_orig": 0.5355814695358276, + "epoch": 0.24128262276224027, + "kl_loss": 0.12484327703714371, + "loss_ib": 0.0017730086110532284, + "step": 839 + }, + { + "ce_ib": 6.892401218414307, + "ce_orig": 1.2295132875442505, + "epoch": 0.24128262276224027, + "kl_loss": 0.12067458033561707, + "loss_ib": 0.001895985915325582, + "step": 839 + }, + { + "ce_ib": 6.100953102111816, + "ce_orig": 0.762692391872406, + "epoch": 0.24128262276224027, + "kl_loss": 0.15519046783447266, + "loss_ib": 0.0021619999315589666, + "step": 839 + }, + { + "ce_ib": 5.918198585510254, + "ce_orig": 0.7303500771522522, + "epoch": 0.24128262276224027, + "kl_loss": 0.08662743866443634, + "loss_ib": 0.001458094222471118, + "step": 839 + }, + { + "epoch": 0.24157020634121792, + "grad_norm": 0.10592812299728394, + "learning_rate": 4.967497846584552e-05, + "loss": 0.8622, + "step": 840 + }, + { + "ce_ib": 8.441739082336426, + "ce_orig": 0.8219562768936157, + "epoch": 0.24157020634121792, + "kl_loss": 0.13953891396522522, + "loss_ib": 0.002239563036710024, + "step": 840 + }, + { + "ce_ib": 5.784753322601318, + "ce_orig": 0.7019002437591553, + "epoch": 0.24157020634121792, + "kl_loss": 0.10375018417835236, + "loss_ib": 0.0016159771475940943, + "step": 840 + }, + { + "ce_ib": 6.836467266082764, + "ce_orig": 0.8054057359695435, + "epoch": 0.24157020634121792, + "kl_loss": 0.1309238076210022, + "loss_ib": 0.0019928847905248404, + "step": 840 + }, + { + "ce_ib": 6.871645927429199, + "ce_orig": 0.4505913257598877, + "epoch": 0.24157020634121792, + "kl_loss": 0.1236223429441452, + "loss_ib": 0.0019233878701925278, + "step": 840 + }, + { + "ce_ib": 7.9782023429870605, + "ce_orig": 0.7888794541358948, + "epoch": 0.24185778992019555, + "kl_loss": 0.13207761943340302, + "loss_ib": 0.0021185963414609432, + "step": 841 + }, + { + "ce_ib": 12.381393432617188, + "ce_orig": 1.5155247449874878, + "epoch": 0.24185778992019555, + "kl_loss": 0.11220519244670868, + "loss_ib": 0.002360191196203232, + "step": 841 + }, + { + "ce_ib": 5.204415798187256, + "ce_orig": 0.7087980508804321, + "epoch": 0.24185778992019555, + "kl_loss": 0.11631835997104645, + "loss_ib": 0.0016836250433698297, + "step": 841 + }, + { + "ce_ib": 7.512041091918945, + "ce_orig": 0.9449790120124817, + "epoch": 0.24185778992019555, + "kl_loss": 0.1673145592212677, + "loss_ib": 0.0024243497755378485, + "step": 841 + }, + { + "ce_ib": 10.613036155700684, + "ce_orig": 1.0805548429489136, + "epoch": 0.2421453734991732, + "kl_loss": 0.14164038002490997, + "loss_ib": 0.002477707341313362, + "step": 842 + }, + { + "ce_ib": 7.113550186157227, + "ce_orig": 0.8366743326187134, + "epoch": 0.2421453734991732, + "kl_loss": 0.10816210508346558, + "loss_ib": 0.0017929759342223406, + "step": 842 + }, + { + "ce_ib": 11.691953659057617, + "ce_orig": 0.9221868515014648, + "epoch": 0.2421453734991732, + "kl_loss": 0.20221099257469177, + "loss_ib": 0.003191305324435234, + "step": 842 + }, + { + "ce_ib": 5.920987129211426, + "ce_orig": 0.6415844559669495, + "epoch": 0.2421453734991732, + "kl_loss": 0.12173058837652206, + "loss_ib": 0.0018094044644385576, + "step": 842 + }, + { + "ce_ib": 8.948143005371094, + "ce_orig": 0.6214312314987183, + "epoch": 0.24243295707815085, + "kl_loss": 0.47353020310401917, + "loss_ib": 0.005630116444081068, + "step": 843 + }, + { + "ce_ib": 9.252907752990723, + "ce_orig": 0.9344848990440369, + "epoch": 0.24243295707815085, + "kl_loss": 0.15891718864440918, + "loss_ib": 0.0025144624523818493, + "step": 843 + }, + { + "ce_ib": 8.631677627563477, + "ce_orig": 0.6140725612640381, + "epoch": 0.24243295707815085, + "kl_loss": 0.1363321989774704, + "loss_ib": 0.0022264898288995028, + "step": 843 + }, + { + "ce_ib": 7.94747257232666, + "ce_orig": 1.072943091392517, + "epoch": 0.24243295707815085, + "kl_loss": 0.22313611209392548, + "loss_ib": 0.0030261084903031588, + "step": 843 + }, + { + "ce_ib": 2.1055665016174316, + "ce_orig": 0.09904298186302185, + "epoch": 0.24272054065712848, + "kl_loss": 0.27453792095184326, + "loss_ib": 0.0029559358954429626, + "step": 844 + }, + { + "ce_ib": 9.203268051147461, + "ce_orig": 0.764379620552063, + "epoch": 0.24272054065712848, + "kl_loss": 0.1489913910627365, + "loss_ib": 0.0024102407041937113, + "step": 844 + }, + { + "ce_ib": 6.683197498321533, + "ce_orig": 0.6841699481010437, + "epoch": 0.24272054065712848, + "kl_loss": 0.12199226766824722, + "loss_ib": 0.0018882423173636198, + "step": 844 + }, + { + "ce_ib": 9.931221961975098, + "ce_orig": 0.8138086795806885, + "epoch": 0.24272054065712848, + "kl_loss": 0.12627676129341125, + "loss_ib": 0.002255889819934964, + "step": 844 + }, + { + "epoch": 0.24300812423610613, + "grad_norm": 0.09755532443523407, + "learning_rate": 4.96687119091643e-05, + "loss": 0.8374, + "step": 845 + }, + { + "ce_ib": 8.698118209838867, + "ce_orig": 0.943545401096344, + "epoch": 0.24300812423610613, + "kl_loss": 0.07969736307859421, + "loss_ib": 0.0016667854506522417, + "step": 845 + }, + { + "ce_ib": 8.357364654541016, + "ce_orig": 1.0275182723999023, + "epoch": 0.24300812423610613, + "kl_loss": 0.15439343452453613, + "loss_ib": 0.002379670739173889, + "step": 845 + }, + { + "ce_ib": 6.27513313293457, + "ce_orig": 0.6612330079078674, + "epoch": 0.24300812423610613, + "kl_loss": 0.0844767689704895, + "loss_ib": 0.0014722809428349137, + "step": 845 + }, + { + "ce_ib": 9.774298667907715, + "ce_orig": 1.1108872890472412, + "epoch": 0.24300812423610613, + "kl_loss": 0.1235627681016922, + "loss_ib": 0.0022130575962364674, + "step": 845 + }, + { + "ce_ib": 7.689428329467773, + "ce_orig": 1.050940990447998, + "epoch": 0.24329570781508375, + "kl_loss": 0.16950029134750366, + "loss_ib": 0.0024639456532895565, + "step": 846 + }, + { + "ce_ib": 11.597556114196777, + "ce_orig": 0.8476027846336365, + "epoch": 0.24329570781508375, + "kl_loss": 0.22601580619812012, + "loss_ib": 0.0034199135843664408, + "step": 846 + }, + { + "ce_ib": 9.092070579528809, + "ce_orig": 0.7816643118858337, + "epoch": 0.24329570781508375, + "kl_loss": 0.16418534517288208, + "loss_ib": 0.0025510601699352264, + "step": 846 + }, + { + "ce_ib": 8.94192123413086, + "ce_orig": 0.9194644093513489, + "epoch": 0.24329570781508375, + "kl_loss": 0.2698814570903778, + "loss_ib": 0.0035930066369473934, + "step": 846 + }, + { + "ce_ib": 6.7314372062683105, + "ce_orig": 0.6977682113647461, + "epoch": 0.2435832913940614, + "kl_loss": 0.12507882714271545, + "loss_ib": 0.0019239319954067469, + "step": 847 + }, + { + "ce_ib": 8.144753456115723, + "ce_orig": 0.8696945309638977, + "epoch": 0.2435832913940614, + "kl_loss": 0.18285347521305084, + "loss_ib": 0.0026430098805576563, + "step": 847 + }, + { + "ce_ib": 6.152936935424805, + "ce_orig": 0.5545529127120972, + "epoch": 0.2435832913940614, + "kl_loss": 0.08746962249279022, + "loss_ib": 0.0014899899251759052, + "step": 847 + }, + { + "ce_ib": 7.335354804992676, + "ce_orig": 0.7962471842765808, + "epoch": 0.2435832913940614, + "kl_loss": 0.08078482747077942, + "loss_ib": 0.0015413836808875203, + "step": 847 + }, + { + "ce_ib": 10.031454086303711, + "ce_orig": 1.227720022201538, + "epoch": 0.24387087497303903, + "kl_loss": 0.15805502235889435, + "loss_ib": 0.002583695575594902, + "step": 848 + }, + { + "ce_ib": 7.987579345703125, + "ce_orig": 0.7593538761138916, + "epoch": 0.24387087497303903, + "kl_loss": 0.0982484295964241, + "loss_ib": 0.0017812422011047602, + "step": 848 + }, + { + "ce_ib": 4.435133457183838, + "ce_orig": 0.5653334856033325, + "epoch": 0.24387087497303903, + "kl_loss": 0.14194026589393616, + "loss_ib": 0.0018629160476848483, + "step": 848 + }, + { + "ce_ib": 6.288003921508789, + "ce_orig": 0.6333604454994202, + "epoch": 0.24387087497303903, + "kl_loss": 0.14213651418685913, + "loss_ib": 0.0020501655526459217, + "step": 848 + }, + { + "ce_ib": 5.361220836639404, + "ce_orig": 0.5679022073745728, + "epoch": 0.24415845855201668, + "kl_loss": 0.12149189412593842, + "loss_ib": 0.001751040923409164, + "step": 849 + }, + { + "ce_ib": 8.351764678955078, + "ce_orig": 1.017719030380249, + "epoch": 0.24415845855201668, + "kl_loss": 0.18313950300216675, + "loss_ib": 0.0026665714103728533, + "step": 849 + }, + { + "ce_ib": 6.253474712371826, + "ce_orig": 0.9022907614707947, + "epoch": 0.24415845855201668, + "kl_loss": 0.1341339647769928, + "loss_ib": 0.0019666871521621943, + "step": 849 + }, + { + "ce_ib": 5.258345127105713, + "ce_orig": 0.5824132561683655, + "epoch": 0.24415845855201668, + "kl_loss": 0.1137775108218193, + "loss_ib": 0.001663609524257481, + "step": 849 + }, + { + "epoch": 0.24444604213099433, + "grad_norm": 0.10801159590482712, + "learning_rate": 4.9662385919830347e-05, + "loss": 0.8029, + "step": 850 + }, + { + "ce_ib": 5.604668617248535, + "ce_orig": 0.5457404255867004, + "epoch": 0.24444604213099433, + "kl_loss": 0.12972185015678406, + "loss_ib": 0.0018576852744445205, + "step": 850 + }, + { + "ce_ib": 9.36043643951416, + "ce_orig": 1.0641096830368042, + "epoch": 0.24444604213099433, + "kl_loss": 0.16872264444828033, + "loss_ib": 0.002623270032927394, + "step": 850 + }, + { + "ce_ib": 10.290757179260254, + "ce_orig": 1.258750319480896, + "epoch": 0.24444604213099433, + "kl_loss": 0.18471182882785797, + "loss_ib": 0.0028761939611285925, + "step": 850 + }, + { + "ce_ib": 8.235913276672363, + "ce_orig": 0.9114575982093811, + "epoch": 0.24444604213099433, + "kl_loss": 0.14525389671325684, + "loss_ib": 0.0022761302534490824, + "step": 850 + }, + { + "ce_ib": 5.972557544708252, + "ce_orig": 0.3176371157169342, + "epoch": 0.24473362570997195, + "kl_loss": 0.1236177608370781, + "loss_ib": 0.001833433285355568, + "step": 851 + }, + { + "ce_ib": 6.819241523742676, + "ce_orig": 0.4694307744503021, + "epoch": 0.24473362570997195, + "kl_loss": 0.13898760080337524, + "loss_ib": 0.0020718001760542393, + "step": 851 + }, + { + "ce_ib": 5.671989917755127, + "ce_orig": 0.7460182309150696, + "epoch": 0.24473362570997195, + "kl_loss": 0.09529311209917068, + "loss_ib": 0.0015201299684122205, + "step": 851 + }, + { + "ce_ib": 6.121541500091553, + "ce_orig": 0.580995500087738, + "epoch": 0.24473362570997195, + "kl_loss": 0.0963808223605156, + "loss_ib": 0.001575962291099131, + "step": 851 + }, + { + "ce_ib": 11.055216789245605, + "ce_orig": 1.4706339836120605, + "epoch": 0.2450212092889496, + "kl_loss": 0.21470844745635986, + "loss_ib": 0.0032526059076189995, + "step": 852 + }, + { + "ce_ib": 9.577301979064941, + "ce_orig": 0.6492410898208618, + "epoch": 0.2450212092889496, + "kl_loss": 0.15032362937927246, + "loss_ib": 0.0024609663523733616, + "step": 852 + }, + { + "ce_ib": 4.050168514251709, + "ce_orig": 0.366349995136261, + "epoch": 0.2450212092889496, + "kl_loss": 0.16982409358024597, + "loss_ib": 0.00210325769148767, + "step": 852 + }, + { + "ce_ib": 7.699157238006592, + "ce_orig": 0.9390696287155151, + "epoch": 0.2450212092889496, + "kl_loss": 0.1424662470817566, + "loss_ib": 0.0021945780608803034, + "step": 852 + }, + { + "ce_ib": 7.2492804527282715, + "ce_orig": 1.026281476020813, + "epoch": 0.24530879286792723, + "kl_loss": 0.12123741209506989, + "loss_ib": 0.0019373020622879267, + "step": 853 + }, + { + "ce_ib": 5.721704483032227, + "ce_orig": 0.6315010190010071, + "epoch": 0.24530879286792723, + "kl_loss": 0.15391187369823456, + "loss_ib": 0.00211128918454051, + "step": 853 + }, + { + "ce_ib": 3.785083293914795, + "ce_orig": 0.3414902687072754, + "epoch": 0.24530879286792723, + "kl_loss": 0.13184034824371338, + "loss_ib": 0.0016969117568805814, + "step": 853 + }, + { + "ce_ib": 4.662577152252197, + "ce_orig": 0.562096118927002, + "epoch": 0.24530879286792723, + "kl_loss": 0.08554943650960922, + "loss_ib": 0.0013217520900070667, + "step": 853 + }, + { + "ce_ib": 6.567530155181885, + "ce_orig": 0.8952581882476807, + "epoch": 0.24559637644690488, + "kl_loss": 0.27641063928604126, + "loss_ib": 0.0034208595752716064, + "step": 854 + }, + { + "ce_ib": 9.29099178314209, + "ce_orig": 0.5317293405532837, + "epoch": 0.24559637644690488, + "kl_loss": 0.11667747795581818, + "loss_ib": 0.0020958739332854748, + "step": 854 + }, + { + "ce_ib": 5.92125129699707, + "ce_orig": 0.8134939074516296, + "epoch": 0.24559637644690488, + "kl_loss": 0.10207337141036987, + "loss_ib": 0.0016128587303683162, + "step": 854 + }, + { + "ce_ib": 5.6100382804870605, + "ce_orig": 0.7054432034492493, + "epoch": 0.24559637644690488, + "kl_loss": 0.08496654033660889, + "loss_ib": 0.001410669181495905, + "step": 854 + }, + { + "epoch": 0.24588396002588253, + "grad_norm": 0.10967330634593964, + "learning_rate": 4.9656000513084455e-05, + "loss": 0.8293, + "step": 855 + }, + { + "ce_ib": 8.405036926269531, + "ce_orig": 0.5193880796432495, + "epoch": 0.24588396002588253, + "kl_loss": 0.16030897200107574, + "loss_ib": 0.0024435934610664845, + "step": 855 + }, + { + "ce_ib": 6.491952896118164, + "ce_orig": 0.8281493186950684, + "epoch": 0.24588396002588253, + "kl_loss": 0.09462558478116989, + "loss_ib": 0.0015954510308802128, + "step": 855 + }, + { + "ce_ib": 6.649583339691162, + "ce_orig": 0.7275351881980896, + "epoch": 0.24588396002588253, + "kl_loss": 0.11419142782688141, + "loss_ib": 0.0018068724311888218, + "step": 855 + }, + { + "ce_ib": 8.009108543395996, + "ce_orig": 1.0409449338912964, + "epoch": 0.24588396002588253, + "kl_loss": 0.12785518169403076, + "loss_ib": 0.002079462632536888, + "step": 855 + }, + { + "ce_ib": 5.276194095611572, + "ce_orig": 0.44244295358657837, + "epoch": 0.24617154360486015, + "kl_loss": 0.1523188054561615, + "loss_ib": 0.0020508074667304754, + "step": 856 + }, + { + "ce_ib": 7.91589879989624, + "ce_orig": 0.9207080006599426, + "epoch": 0.24617154360486015, + "kl_loss": 0.1827230304479599, + "loss_ib": 0.0026188199408352375, + "step": 856 + }, + { + "ce_ib": 8.683913230895996, + "ce_orig": 0.8899644017219543, + "epoch": 0.24617154360486015, + "kl_loss": 0.14581285417079926, + "loss_ib": 0.0023265196941792965, + "step": 856 + }, + { + "ce_ib": 8.30103874206543, + "ce_orig": 0.948857843875885, + "epoch": 0.24617154360486015, + "kl_loss": 0.16537992656230927, + "loss_ib": 0.0024839031975716352, + "step": 856 + }, + { + "ce_ib": 9.835265159606934, + "ce_orig": 1.3811537027359009, + "epoch": 0.2464591271838378, + "kl_loss": 0.13627251982688904, + "loss_ib": 0.0023462516255676746, + "step": 857 + }, + { + "ce_ib": 6.258663177490234, + "ce_orig": 0.6032944917678833, + "epoch": 0.2464591271838378, + "kl_loss": 0.1421205997467041, + "loss_ib": 0.00204707239754498, + "step": 857 + }, + { + "ce_ib": 4.90713357925415, + "ce_orig": 0.6079412698745728, + "epoch": 0.2464591271838378, + "kl_loss": 0.1172361895442009, + "loss_ib": 0.0016630751779302955, + "step": 857 + }, + { + "ce_ib": 12.298185348510742, + "ce_orig": 1.3265632390975952, + "epoch": 0.2464591271838378, + "kl_loss": 0.14415337145328522, + "loss_ib": 0.002671352354809642, + "step": 857 + }, + { + "ce_ib": 2.617100954055786, + "ce_orig": 0.17996713519096375, + "epoch": 0.24674671076281543, + "kl_loss": 0.3445594608783722, + "loss_ib": 0.00370730459690094, + "step": 858 + }, + { + "ce_ib": 7.720390796661377, + "ce_orig": 0.7807556986808777, + "epoch": 0.24674671076281543, + "kl_loss": 0.15011197328567505, + "loss_ib": 0.002273158635944128, + "step": 858 + }, + { + "ce_ib": 8.279568672180176, + "ce_orig": 0.8452048301696777, + "epoch": 0.24674671076281543, + "kl_loss": 0.19392766058444977, + "loss_ib": 0.0027672334108501673, + "step": 858 + }, + { + "ce_ib": 3.605633020401001, + "ce_orig": 0.3069818913936615, + "epoch": 0.24674671076281543, + "kl_loss": 0.2172875851392746, + "loss_ib": 0.0025334390811622143, + "step": 858 + }, + { + "ce_ib": 8.567093849182129, + "ce_orig": 0.745737612247467, + "epoch": 0.24703429434179308, + "kl_loss": 0.12669722735881805, + "loss_ib": 0.0021236815955489874, + "step": 859 + }, + { + "ce_ib": 3.890596389770508, + "ce_orig": 0.3934782147407532, + "epoch": 0.24703429434179308, + "kl_loss": 0.11800628155469894, + "loss_ib": 0.0015691223088651896, + "step": 859 + }, + { + "ce_ib": 5.680053234100342, + "ce_orig": 0.7315199375152588, + "epoch": 0.24703429434179308, + "kl_loss": 0.10949228703975677, + "loss_ib": 0.001662928145378828, + "step": 859 + }, + { + "ce_ib": 5.438368320465088, + "ce_orig": 0.7946762442588806, + "epoch": 0.24703429434179308, + "kl_loss": 0.1166611984372139, + "loss_ib": 0.0017104488797485828, + "step": 859 + }, + { + "epoch": 0.24732187792077073, + "grad_norm": 0.08520792424678802, + "learning_rate": 4.964955570431055e-05, + "loss": 0.8513, + "step": 860 + }, + { + "ce_ib": 8.245199203491211, + "ce_orig": 1.2291977405548096, + "epoch": 0.24732187792077073, + "kl_loss": 0.12287493050098419, + "loss_ib": 0.002053269185125828, + "step": 860 + }, + { + "ce_ib": 7.371245384216309, + "ce_orig": 0.7984662652015686, + "epoch": 0.24732187792077073, + "kl_loss": 0.0932641252875328, + "loss_ib": 0.0016697656828910112, + "step": 860 + }, + { + "ce_ib": 9.231488227844238, + "ce_orig": 1.1531388759613037, + "epoch": 0.24732187792077073, + "kl_loss": 0.17074084281921387, + "loss_ib": 0.0026305571664124727, + "step": 860 + }, + { + "ce_ib": 10.435407638549805, + "ce_orig": 1.37950599193573, + "epoch": 0.24732187792077073, + "kl_loss": 0.1756286323070526, + "loss_ib": 0.002799827139824629, + "step": 860 + }, + { + "ce_ib": 5.245795726776123, + "ce_orig": 0.8678305745124817, + "epoch": 0.24760946149974836, + "kl_loss": 0.10350409895181656, + "loss_ib": 0.0015596204902976751, + "step": 861 + }, + { + "ce_ib": 8.952935218811035, + "ce_orig": 0.7108750939369202, + "epoch": 0.24760946149974836, + "kl_loss": 0.11388901621103287, + "loss_ib": 0.002034183591604233, + "step": 861 + }, + { + "ce_ib": 9.032291412353516, + "ce_orig": 0.5742269158363342, + "epoch": 0.24760946149974836, + "kl_loss": 0.1395682990550995, + "loss_ib": 0.0022989120334386826, + "step": 861 + }, + { + "ce_ib": 4.5656280517578125, + "ce_orig": 0.6355873942375183, + "epoch": 0.24760946149974836, + "kl_loss": 0.15184998512268066, + "loss_ib": 0.0019750625360757113, + "step": 861 + }, + { + "ce_ib": 7.731656551361084, + "ce_orig": 0.8927205204963684, + "epoch": 0.247897045078726, + "kl_loss": 0.14044861495494843, + "loss_ib": 0.0021776517387479544, + "step": 862 + }, + { + "ce_ib": 7.413579940795898, + "ce_orig": 0.5944902300834656, + "epoch": 0.247897045078726, + "kl_loss": 0.19562631845474243, + "loss_ib": 0.0026976210065186024, + "step": 862 + }, + { + "ce_ib": 7.817788600921631, + "ce_orig": 0.5387774705886841, + "epoch": 0.247897045078726, + "kl_loss": 0.1566201001405716, + "loss_ib": 0.0023479796946048737, + "step": 862 + }, + { + "ce_ib": 7.256852149963379, + "ce_orig": 0.6728155612945557, + "epoch": 0.247897045078726, + "kl_loss": 0.18778842687606812, + "loss_ib": 0.002603569533675909, + "step": 862 + }, + { + "ce_ib": 5.361456394195557, + "ce_orig": 0.7745912075042725, + "epoch": 0.24818462865770363, + "kl_loss": 0.08714807778596878, + "loss_ib": 0.0014076264342293143, + "step": 863 + }, + { + "ce_ib": 5.879522800445557, + "ce_orig": 0.4841998219490051, + "epoch": 0.24818462865770363, + "kl_loss": 0.10464229434728622, + "loss_ib": 0.0016343750758096576, + "step": 863 + }, + { + "ce_ib": 11.785419464111328, + "ce_orig": 1.7475069761276245, + "epoch": 0.24818462865770363, + "kl_loss": 0.12912291288375854, + "loss_ib": 0.0024697710759937763, + "step": 863 + }, + { + "ce_ib": 12.430148124694824, + "ce_orig": 1.5259690284729004, + "epoch": 0.24818462865770363, + "kl_loss": 0.49463123083114624, + "loss_ib": 0.006189327221363783, + "step": 863 + }, + { + "ce_ib": 9.013341903686523, + "ce_orig": 0.6152147650718689, + "epoch": 0.24847221223668128, + "kl_loss": 0.2053050994873047, + "loss_ib": 0.002954385243356228, + "step": 864 + }, + { + "ce_ib": 9.306499481201172, + "ce_orig": 1.2726976871490479, + "epoch": 0.24847221223668128, + "kl_loss": 0.14018534123897552, + "loss_ib": 0.0023325032088905573, + "step": 864 + }, + { + "ce_ib": 9.57296371459961, + "ce_orig": 0.7869003415107727, + "epoch": 0.24847221223668128, + "kl_loss": 0.17775607109069824, + "loss_ib": 0.0027348571456968784, + "step": 864 + }, + { + "ce_ib": 7.739504814147949, + "ce_orig": 1.0867712497711182, + "epoch": 0.24847221223668128, + "kl_loss": 0.11266046017408371, + "loss_ib": 0.0019005549838766456, + "step": 864 + }, + { + "epoch": 0.24875979581565894, + "grad_norm": 0.09175752103328705, + "learning_rate": 4.964305150903566e-05, + "loss": 0.901, + "step": 865 + }, + { + "ce_ib": 9.967638969421387, + "ce_orig": 0.9802643656730652, + "epoch": 0.24875979581565894, + "kl_loss": 0.14677110314369202, + "loss_ib": 0.0024644748773425817, + "step": 865 + }, + { + "ce_ib": 10.069565773010254, + "ce_orig": 0.9357901811599731, + "epoch": 0.24875979581565894, + "kl_loss": 0.18884184956550598, + "loss_ib": 0.002895374782383442, + "step": 865 + }, + { + "ce_ib": 10.978303909301758, + "ce_orig": 1.3109445571899414, + "epoch": 0.24875979581565894, + "kl_loss": 0.17037174105644226, + "loss_ib": 0.0028015475254505873, + "step": 865 + }, + { + "ce_ib": 5.982998371124268, + "ce_orig": 0.558401882648468, + "epoch": 0.24875979581565894, + "kl_loss": 0.17013560235500336, + "loss_ib": 0.002299655694514513, + "step": 865 + }, + { + "ce_ib": 6.899177074432373, + "ce_orig": 0.7828370928764343, + "epoch": 0.24904737939463656, + "kl_loss": 0.19635936617851257, + "loss_ib": 0.0026535114739090204, + "step": 866 + }, + { + "ce_ib": 6.190727710723877, + "ce_orig": 0.6056550145149231, + "epoch": 0.24904737939463656, + "kl_loss": 0.16528858244419098, + "loss_ib": 0.002271958626806736, + "step": 866 + }, + { + "ce_ib": 10.652737617492676, + "ce_orig": 1.5506879091262817, + "epoch": 0.24904737939463656, + "kl_loss": 0.16518110036849976, + "loss_ib": 0.0027170847170054913, + "step": 866 + }, + { + "ce_ib": 8.15753173828125, + "ce_orig": 1.1266238689422607, + "epoch": 0.24904737939463656, + "kl_loss": 0.1654905378818512, + "loss_ib": 0.002470658626407385, + "step": 866 + }, + { + "ce_ib": 5.835992336273193, + "ce_orig": 0.5801675915718079, + "epoch": 0.2493349629736142, + "kl_loss": 0.15277384221553802, + "loss_ib": 0.00211133761331439, + "step": 867 + }, + { + "ce_ib": 6.383537769317627, + "ce_orig": 0.8467006087303162, + "epoch": 0.2493349629736142, + "kl_loss": 0.11249042302370071, + "loss_ib": 0.001763257896527648, + "step": 867 + }, + { + "ce_ib": 15.002373695373535, + "ce_orig": 2.252410650253296, + "epoch": 0.2493349629736142, + "kl_loss": 0.15410232543945312, + "loss_ib": 0.0030412604101002216, + "step": 867 + }, + { + "ce_ib": 7.483471393585205, + "ce_orig": 0.5316120982170105, + "epoch": 0.2493349629736142, + "kl_loss": 0.3930402994155884, + "loss_ib": 0.004678749945014715, + "step": 867 + }, + { + "ce_ib": 8.862229347229004, + "ce_orig": 0.9752901792526245, + "epoch": 0.24962254655259183, + "kl_loss": 0.1402927041053772, + "loss_ib": 0.002289149910211563, + "step": 868 + }, + { + "ce_ib": 12.469680786132812, + "ce_orig": 1.5079736709594727, + "epoch": 0.24962254655259183, + "kl_loss": 0.18042317032814026, + "loss_ib": 0.0030511999502778053, + "step": 868 + }, + { + "ce_ib": 5.774080753326416, + "ce_orig": 0.8587755560874939, + "epoch": 0.24962254655259183, + "kl_loss": 0.1097964197397232, + "loss_ib": 0.0016753722447901964, + "step": 868 + }, + { + "ce_ib": 9.14037036895752, + "ce_orig": 0.701475977897644, + "epoch": 0.24962254655259183, + "kl_loss": 0.3841056227684021, + "loss_ib": 0.00475509325042367, + "step": 868 + }, + { + "ce_ib": 8.243424415588379, + "ce_orig": 0.8065715432167053, + "epoch": 0.24991013013156949, + "kl_loss": 0.172191321849823, + "loss_ib": 0.002546255476772785, + "step": 869 + }, + { + "ce_ib": 5.66312313079834, + "ce_orig": 0.771187961101532, + "epoch": 0.24991013013156949, + "kl_loss": 0.06962478160858154, + "loss_ib": 0.0012625601375475526, + "step": 869 + }, + { + "ce_ib": 6.285440921783447, + "ce_orig": 0.7630317211151123, + "epoch": 0.24991013013156949, + "kl_loss": 0.10592949390411377, + "loss_ib": 0.001687839045189321, + "step": 869 + }, + { + "ce_ib": 6.893797397613525, + "ce_orig": 0.7610549926757812, + "epoch": 0.24991013013156949, + "kl_loss": 0.1999196857213974, + "loss_ib": 0.002688576467335224, + "step": 869 + }, + { + "epoch": 0.2501977137105471, + "grad_norm": 0.09903328865766525, + "learning_rate": 4.963648794292992e-05, + "loss": 0.8646, + "step": 870 + }, + { + "ce_ib": 9.630828857421875, + "ce_orig": 1.031785011291504, + "epoch": 0.2501977137105471, + "kl_loss": 0.17289333045482635, + "loss_ib": 0.0026920160744339228, + "step": 870 + }, + { + "ce_ib": 5.168337345123291, + "ce_orig": 0.5879364609718323, + "epoch": 0.2501977137105471, + "kl_loss": 0.13910838961601257, + "loss_ib": 0.0019079175544902682, + "step": 870 + }, + { + "ce_ib": 9.871175765991211, + "ce_orig": 0.9261044859886169, + "epoch": 0.2501977137105471, + "kl_loss": 0.08857513964176178, + "loss_ib": 0.001872868975624442, + "step": 870 + }, + { + "ce_ib": 6.347537994384766, + "ce_orig": 0.9719029068946838, + "epoch": 0.2501977137105471, + "kl_loss": 0.07947400212287903, + "loss_ib": 0.0014294936554506421, + "step": 870 + }, + { + "ce_ib": 9.052336692810059, + "ce_orig": 1.1934534311294556, + "epoch": 0.2504852972895248, + "kl_loss": 0.16040383279323578, + "loss_ib": 0.0025092719588428736, + "step": 871 + }, + { + "ce_ib": 8.992751121520996, + "ce_orig": 0.9628534317016602, + "epoch": 0.2504852972895248, + "kl_loss": 0.10795988142490387, + "loss_ib": 0.001978873973712325, + "step": 871 + }, + { + "ce_ib": 6.835910797119141, + "ce_orig": 0.41721463203430176, + "epoch": 0.2504852972895248, + "kl_loss": 0.22937476634979248, + "loss_ib": 0.002977338619530201, + "step": 871 + }, + { + "ce_ib": 6.061643123626709, + "ce_orig": 0.7965611815452576, + "epoch": 0.2504852972895248, + "kl_loss": 0.10941595584154129, + "loss_ib": 0.001700323773548007, + "step": 871 + }, + { + "ce_ib": 4.520278453826904, + "ce_orig": 0.34330859780311584, + "epoch": 0.2507728808685024, + "kl_loss": 0.3076925277709961, + "loss_ib": 0.0035289530642330647, + "step": 872 + }, + { + "ce_ib": 5.3499250411987305, + "ce_orig": 0.7291832566261292, + "epoch": 0.2507728808685024, + "kl_loss": 0.24808456003665924, + "loss_ib": 0.0030158378649502993, + "step": 872 + }, + { + "ce_ib": 6.289168357849121, + "ce_orig": 0.5440212488174438, + "epoch": 0.2507728808685024, + "kl_loss": 0.1381717026233673, + "loss_ib": 0.002010633936151862, + "step": 872 + }, + { + "ce_ib": 4.648695945739746, + "ce_orig": 0.5465182662010193, + "epoch": 0.2507728808685024, + "kl_loss": 0.11056109517812729, + "loss_ib": 0.0015704804100096226, + "step": 872 + }, + { + "ce_ib": 7.006433963775635, + "ce_orig": 1.0426201820373535, + "epoch": 0.25106046444748004, + "kl_loss": 0.10194164514541626, + "loss_ib": 0.001720059779472649, + "step": 873 + }, + { + "ce_ib": 6.2683305740356445, + "ce_orig": 0.6865673065185547, + "epoch": 0.25106046444748004, + "kl_loss": 0.15586382150650024, + "loss_ib": 0.0021854713559150696, + "step": 873 + }, + { + "ce_ib": 8.336287498474121, + "ce_orig": 0.7352637052536011, + "epoch": 0.25106046444748004, + "kl_loss": 0.14312681555747986, + "loss_ib": 0.002264896873384714, + "step": 873 + }, + { + "ce_ib": 5.228641986846924, + "ce_orig": 0.7911911010742188, + "epoch": 0.25106046444748004, + "kl_loss": 0.13875475525856018, + "loss_ib": 0.0019104116363450885, + "step": 873 + }, + { + "ce_ib": 7.886993885040283, + "ce_orig": 0.8877960443496704, + "epoch": 0.2513480480264577, + "kl_loss": 0.21264252066612244, + "loss_ib": 0.0029151246417313814, + "step": 874 + }, + { + "ce_ib": 8.536798477172852, + "ce_orig": 1.23441481590271, + "epoch": 0.2513480480264577, + "kl_loss": 0.12252427637577057, + "loss_ib": 0.002078922698274255, + "step": 874 + }, + { + "ce_ib": 5.5121893882751465, + "ce_orig": 0.7155554294586182, + "epoch": 0.2513480480264577, + "kl_loss": 0.10985850542783737, + "loss_ib": 0.0016498039476573467, + "step": 874 + }, + { + "ce_ib": 8.014837265014648, + "ce_orig": 0.9826061129570007, + "epoch": 0.2513480480264577, + "kl_loss": 0.12555983662605286, + "loss_ib": 0.0020570820197463036, + "step": 874 + }, + { + "epoch": 0.25163563160543534, + "grad_norm": 0.09154003113508224, + "learning_rate": 4.962986502180648e-05, + "loss": 0.8859, + "step": 875 + }, + { + "ce_ib": 10.280941009521484, + "ce_orig": 1.6837189197540283, + "epoch": 0.25163563160543534, + "kl_loss": 0.1310340017080307, + "loss_ib": 0.0023384341038763523, + "step": 875 + }, + { + "ce_ib": 4.716495513916016, + "ce_orig": 0.6339424848556519, + "epoch": 0.25163563160543534, + "kl_loss": 0.1124955490231514, + "loss_ib": 0.0015966049395501614, + "step": 875 + }, + { + "ce_ib": 3.884503126144409, + "ce_orig": 0.2738972306251526, + "epoch": 0.25163563160543534, + "kl_loss": 0.19628892838954926, + "loss_ib": 0.002351339440792799, + "step": 875 + }, + { + "ce_ib": 9.08601188659668, + "ce_orig": 0.8660534620285034, + "epoch": 0.25163563160543534, + "kl_loss": 0.15325835347175598, + "loss_ib": 0.0024411845952272415, + "step": 875 + }, + { + "ce_ib": 10.415492057800293, + "ce_orig": 0.7765947580337524, + "epoch": 0.25192321518441296, + "kl_loss": 0.1637212485074997, + "loss_ib": 0.0026787614915519953, + "step": 876 + }, + { + "ce_ib": 10.087233543395996, + "ce_orig": 1.2951592206954956, + "epoch": 0.25192321518441296, + "kl_loss": 0.14713844656944275, + "loss_ib": 0.0024801078252494335, + "step": 876 + }, + { + "ce_ib": 9.932022094726562, + "ce_orig": 1.3337452411651611, + "epoch": 0.25192321518441296, + "kl_loss": 0.10245153307914734, + "loss_ib": 0.0020177175756543875, + "step": 876 + }, + { + "ce_ib": 8.801789283752441, + "ce_orig": 1.1899961233139038, + "epoch": 0.25192321518441296, + "kl_loss": 0.13182136416435242, + "loss_ib": 0.002198392292484641, + "step": 876 + }, + { + "ce_ib": 4.309330463409424, + "ce_orig": 0.517691433429718, + "epoch": 0.2522107987633906, + "kl_loss": 0.09331747889518738, + "loss_ib": 0.001364107825793326, + "step": 877 + }, + { + "ce_ib": 13.823863983154297, + "ce_orig": 1.6123818159103394, + "epoch": 0.2522107987633906, + "kl_loss": 0.15045878291130066, + "loss_ib": 0.0028869742527604103, + "step": 877 + }, + { + "ce_ib": 5.531408786773682, + "ce_orig": 0.622874915599823, + "epoch": 0.2522107987633906, + "kl_loss": 0.09007586538791656, + "loss_ib": 0.001453899429179728, + "step": 877 + }, + { + "ce_ib": 7.4732136726379395, + "ce_orig": 0.40653085708618164, + "epoch": 0.2522107987633906, + "kl_loss": 0.1261965036392212, + "loss_ib": 0.0020092863123863935, + "step": 877 + }, + { + "ce_ib": 10.97634220123291, + "ce_orig": 1.196432113647461, + "epoch": 0.25249838234236827, + "kl_loss": 0.14364852011203766, + "loss_ib": 0.002534119412302971, + "step": 878 + }, + { + "ce_ib": 7.987130165100098, + "ce_orig": 0.8016747832298279, + "epoch": 0.25249838234236827, + "kl_loss": 0.14365743100643158, + "loss_ib": 0.0022352873347699642, + "step": 878 + }, + { + "ce_ib": 5.696638107299805, + "ce_orig": 0.7144477367401123, + "epoch": 0.25249838234236827, + "kl_loss": 0.12607070803642273, + "loss_ib": 0.0018303708638995886, + "step": 878 + }, + { + "ce_ib": 6.691961288452148, + "ce_orig": 0.7480747699737549, + "epoch": 0.25249838234236827, + "kl_loss": 0.18506991863250732, + "loss_ib": 0.0025198953226208687, + "step": 878 + }, + { + "ce_ib": 7.895229816436768, + "ce_orig": 0.43932151794433594, + "epoch": 0.2527859659213459, + "kl_loss": 0.13867847621440887, + "loss_ib": 0.0021763076074421406, + "step": 879 + }, + { + "ce_ib": 9.321691513061523, + "ce_orig": 0.8994438052177429, + "epoch": 0.2527859659213459, + "kl_loss": 0.10325250029563904, + "loss_ib": 0.001964694121852517, + "step": 879 + }, + { + "ce_ib": 10.132121086120605, + "ce_orig": 0.9195699095726013, + "epoch": 0.2527859659213459, + "kl_loss": 0.12802192568778992, + "loss_ib": 0.0022934312000870705, + "step": 879 + }, + { + "ce_ib": 4.005758285522461, + "ce_orig": 0.6068211793899536, + "epoch": 0.2527859659213459, + "kl_loss": 0.08329135924577713, + "loss_ib": 0.0012334893690422177, + "step": 879 + }, + { + "epoch": 0.2530735495003235, + "grad_norm": 0.09337525814771652, + "learning_rate": 4.962318276162148e-05, + "loss": 0.8195, + "step": 880 + }, + { + "ce_ib": 6.823204517364502, + "ce_orig": 1.1319113969802856, + "epoch": 0.2530735495003235, + "kl_loss": 0.14816519618034363, + "loss_ib": 0.0021639724727720022, + "step": 880 + }, + { + "ce_ib": 10.786904335021973, + "ce_orig": 1.6341019868850708, + "epoch": 0.2530735495003235, + "kl_loss": 0.28171437978744507, + "loss_ib": 0.0038958340883255005, + "step": 880 + }, + { + "ce_ib": 9.805071830749512, + "ce_orig": 1.169114589691162, + "epoch": 0.2530735495003235, + "kl_loss": 0.15983524918556213, + "loss_ib": 0.002578859683126211, + "step": 880 + }, + { + "ce_ib": 11.32461929321289, + "ce_orig": 1.619084358215332, + "epoch": 0.2530735495003235, + "kl_loss": 0.18278929591178894, + "loss_ib": 0.002960354555398226, + "step": 880 + }, + { + "ce_ib": 5.599311828613281, + "ce_orig": 0.800615668296814, + "epoch": 0.2533611330793012, + "kl_loss": 0.15062254667282104, + "loss_ib": 0.002066156594082713, + "step": 881 + }, + { + "ce_ib": 10.209794998168945, + "ce_orig": 0.7918082475662231, + "epoch": 0.2533611330793012, + "kl_loss": 0.22898060083389282, + "loss_ib": 0.0033107856288552284, + "step": 881 + }, + { + "ce_ib": 10.093332290649414, + "ce_orig": 1.3126697540283203, + "epoch": 0.2533611330793012, + "kl_loss": 0.13278324902057648, + "loss_ib": 0.002337165642529726, + "step": 881 + }, + { + "ce_ib": 7.731043815612793, + "ce_orig": 0.8154935836791992, + "epoch": 0.2533611330793012, + "kl_loss": 0.14035317301750183, + "loss_ib": 0.002176636131480336, + "step": 881 + }, + { + "ce_ib": 6.037160396575928, + "ce_orig": 0.728894829750061, + "epoch": 0.2536487166582788, + "kl_loss": 0.12200887501239777, + "loss_ib": 0.0018238048069179058, + "step": 882 + }, + { + "ce_ib": 5.499096870422363, + "ce_orig": 0.8592274188995361, + "epoch": 0.2536487166582788, + "kl_loss": 0.08512680232524872, + "loss_ib": 0.0014011776074767113, + "step": 882 + }, + { + "ce_ib": 8.627275466918945, + "ce_orig": 0.9206818342208862, + "epoch": 0.2536487166582788, + "kl_loss": 0.11716502904891968, + "loss_ib": 0.00203437777236104, + "step": 882 + }, + { + "ce_ib": 7.004565238952637, + "ce_orig": 0.9188127517700195, + "epoch": 0.2536487166582788, + "kl_loss": 0.17430272698402405, + "loss_ib": 0.0024434837978333235, + "step": 882 + }, + { + "ce_ib": 4.813985347747803, + "ce_orig": 0.5024008750915527, + "epoch": 0.25393630023725644, + "kl_loss": 0.11281563341617584, + "loss_ib": 0.0016095548635348678, + "step": 883 + }, + { + "ce_ib": 7.398796081542969, + "ce_orig": 1.046441674232483, + "epoch": 0.25393630023725644, + "kl_loss": 0.13350337743759155, + "loss_ib": 0.002074913354590535, + "step": 883 + }, + { + "ce_ib": 6.332355976104736, + "ce_orig": 0.6513664126396179, + "epoch": 0.25393630023725644, + "kl_loss": 0.12430296093225479, + "loss_ib": 0.0018762650433927774, + "step": 883 + }, + { + "ce_ib": 3.995943784713745, + "ce_orig": 0.4478011727333069, + "epoch": 0.25393630023725644, + "kl_loss": 0.1076059564948082, + "loss_ib": 0.0014756539603695273, + "step": 883 + }, + { + "ce_ib": 5.7259297370910645, + "ce_orig": 0.7168182730674744, + "epoch": 0.2542238838162341, + "kl_loss": 0.1538955271244049, + "loss_ib": 0.0021115480922162533, + "step": 884 + }, + { + "ce_ib": 9.420291900634766, + "ce_orig": 0.7637949585914612, + "epoch": 0.2542238838162341, + "kl_loss": 0.14228776097297668, + "loss_ib": 0.0023649067152291536, + "step": 884 + }, + { + "ce_ib": 5.211411952972412, + "ce_orig": 0.55478835105896, + "epoch": 0.2542238838162341, + "kl_loss": 0.1502072811126709, + "loss_ib": 0.0020232140086591244, + "step": 884 + }, + { + "ce_ib": 6.278275489807129, + "ce_orig": 0.6644842028617859, + "epoch": 0.2542238838162341, + "kl_loss": 0.10490093380212784, + "loss_ib": 0.001676836865954101, + "step": 884 + }, + { + "epoch": 0.25451146739521174, + "grad_norm": 0.09509612619876862, + "learning_rate": 4.9616441178474044e-05, + "loss": 0.883, + "step": 885 + }, + { + "ce_ib": 10.628482818603516, + "ce_orig": 1.298897385597229, + "epoch": 0.25451146739521174, + "kl_loss": 0.18016132712364197, + "loss_ib": 0.0028644613921642303, + "step": 885 + }, + { + "ce_ib": 8.307840347290039, + "ce_orig": 1.0710110664367676, + "epoch": 0.25451146739521174, + "kl_loss": 0.10537383705377579, + "loss_ib": 0.001884522382169962, + "step": 885 + }, + { + "ce_ib": 7.4360222816467285, + "ce_orig": 0.7180832028388977, + "epoch": 0.25451146739521174, + "kl_loss": 0.0949300155043602, + "loss_ib": 0.0016929024131968617, + "step": 885 + }, + { + "ce_ib": 5.020169258117676, + "ce_orig": 0.6514172554016113, + "epoch": 0.25451146739521174, + "kl_loss": 0.0820763111114502, + "loss_ib": 0.0013227799208834767, + "step": 885 + }, + { + "ce_ib": 9.22244644165039, + "ce_orig": 1.4875003099441528, + "epoch": 0.25479905097418937, + "kl_loss": 0.09314162284135818, + "loss_ib": 0.0018536609131842852, + "step": 886 + }, + { + "ce_ib": 5.483860492706299, + "ce_orig": 0.8450250625610352, + "epoch": 0.25479905097418937, + "kl_loss": 0.10762491822242737, + "loss_ib": 0.0016246350714936852, + "step": 886 + }, + { + "ce_ib": 3.3092868328094482, + "ce_orig": 0.42637813091278076, + "epoch": 0.25479905097418937, + "kl_loss": 0.07745881378650665, + "loss_ib": 0.0011055167997255921, + "step": 886 + }, + { + "ce_ib": 7.81026029586792, + "ce_orig": 0.7796308398246765, + "epoch": 0.25479905097418937, + "kl_loss": 0.14506946504116058, + "loss_ib": 0.0022317206021398306, + "step": 886 + }, + { + "ce_ib": 11.031375885009766, + "ce_orig": 1.4328182935714722, + "epoch": 0.255086634553167, + "kl_loss": 0.18477189540863037, + "loss_ib": 0.0029508566949516535, + "step": 887 + }, + { + "ce_ib": 6.726977825164795, + "ce_orig": 0.8052558302879333, + "epoch": 0.255086634553167, + "kl_loss": 0.14022012054920197, + "loss_ib": 0.0020748989190906286, + "step": 887 + }, + { + "ce_ib": 5.699434280395508, + "ce_orig": 0.6416595578193665, + "epoch": 0.255086634553167, + "kl_loss": 0.17253366112709045, + "loss_ib": 0.002295280108228326, + "step": 887 + }, + { + "ce_ib": 3.5943145751953125, + "ce_orig": 0.3237018585205078, + "epoch": 0.255086634553167, + "kl_loss": 0.13443215191364288, + "loss_ib": 0.001703753019683063, + "step": 887 + }, + { + "ce_ib": 8.55632209777832, + "ce_orig": 1.148319125175476, + "epoch": 0.25537421813214467, + "kl_loss": 0.12872056663036346, + "loss_ib": 0.002142837969586253, + "step": 888 + }, + { + "ce_ib": 8.510859489440918, + "ce_orig": 1.0213509798049927, + "epoch": 0.25537421813214467, + "kl_loss": 0.13635118305683136, + "loss_ib": 0.0022145977709442377, + "step": 888 + }, + { + "ce_ib": 7.644900321960449, + "ce_orig": 1.0376789569854736, + "epoch": 0.25537421813214467, + "kl_loss": 0.16750267148017883, + "loss_ib": 0.002439516829326749, + "step": 888 + }, + { + "ce_ib": 5.5938920974731445, + "ce_orig": 0.7180891036987305, + "epoch": 0.25537421813214467, + "kl_loss": 0.1146186962723732, + "loss_ib": 0.0017055762000381947, + "step": 888 + }, + { + "ce_ib": 7.188848972320557, + "ce_orig": 0.9195379018783569, + "epoch": 0.2556618017111223, + "kl_loss": 0.18785634636878967, + "loss_ib": 0.002597448183223605, + "step": 889 + }, + { + "ce_ib": 6.305577754974365, + "ce_orig": 0.7665948271751404, + "epoch": 0.2556618017111223, + "kl_loss": 0.1667861044406891, + "loss_ib": 0.0022984188981354237, + "step": 889 + }, + { + "ce_ib": 5.9111199378967285, + "ce_orig": 0.6647948026657104, + "epoch": 0.2556618017111223, + "kl_loss": 0.11888445913791656, + "loss_ib": 0.0017799565102905035, + "step": 889 + }, + { + "ce_ib": 7.212058067321777, + "ce_orig": 1.0140317678451538, + "epoch": 0.2556618017111223, + "kl_loss": 0.14530731737613678, + "loss_ib": 0.0021742789540439844, + "step": 889 + }, + { + "epoch": 0.2559493852900999, + "grad_norm": 0.08070097863674164, + "learning_rate": 4.9609640288606205e-05, + "loss": 0.8527, + "step": 890 + }, + { + "ce_ib": 7.837732315063477, + "ce_orig": 0.9493030905723572, + "epoch": 0.2559493852900999, + "kl_loss": 0.13700008392333984, + "loss_ib": 0.0021537740249186754, + "step": 890 + }, + { + "ce_ib": 8.325454711914062, + "ce_orig": 0.43459352850914, + "epoch": 0.2559493852900999, + "kl_loss": 0.18098728358745575, + "loss_ib": 0.002642418025061488, + "step": 890 + }, + { + "ce_ib": 4.257238864898682, + "ce_orig": 0.5357754826545715, + "epoch": 0.2559493852900999, + "kl_loss": 0.11766894906759262, + "loss_ib": 0.0016024133656173944, + "step": 890 + }, + { + "ce_ib": 5.715028285980225, + "ce_orig": 0.5111578702926636, + "epoch": 0.2559493852900999, + "kl_loss": 0.11944234371185303, + "loss_ib": 0.0017659261357039213, + "step": 890 + }, + { + "ce_ib": 7.341698169708252, + "ce_orig": 0.5357319712638855, + "epoch": 0.2562369688690776, + "kl_loss": 0.17682841420173645, + "loss_ib": 0.0025024539791047573, + "step": 891 + }, + { + "ce_ib": 8.842231750488281, + "ce_orig": 0.9881081581115723, + "epoch": 0.2562369688690776, + "kl_loss": 0.13770417869091034, + "loss_ib": 0.0022612649481743574, + "step": 891 + }, + { + "ce_ib": 8.00112247467041, + "ce_orig": 1.0665711164474487, + "epoch": 0.2562369688690776, + "kl_loss": 0.3117631673812866, + "loss_ib": 0.003917743917554617, + "step": 891 + }, + { + "ce_ib": 5.529026508331299, + "ce_orig": 0.5637280344963074, + "epoch": 0.2562369688690776, + "kl_loss": 0.18045048415660858, + "loss_ib": 0.002357407473027706, + "step": 891 + }, + { + "ce_ib": 6.365163803100586, + "ce_orig": 0.7977558970451355, + "epoch": 0.2565245524480552, + "kl_loss": 0.14809830486774445, + "loss_ib": 0.00211749947629869, + "step": 892 + }, + { + "ce_ib": 6.874579429626465, + "ce_orig": 0.4706336259841919, + "epoch": 0.2565245524480552, + "kl_loss": 0.14625059068202972, + "loss_ib": 0.00214996375143528, + "step": 892 + }, + { + "ce_ib": 7.569476127624512, + "ce_orig": 1.0135836601257324, + "epoch": 0.2565245524480552, + "kl_loss": 0.15987078845500946, + "loss_ib": 0.0023556554224342108, + "step": 892 + }, + { + "ce_ib": 10.080713272094727, + "ce_orig": 0.8937276601791382, + "epoch": 0.2565245524480552, + "kl_loss": 0.10532618314027786, + "loss_ib": 0.002061333041638136, + "step": 892 + }, + { + "ce_ib": 3.6760833263397217, + "ce_orig": 0.3286120891571045, + "epoch": 0.25681213602703284, + "kl_loss": 0.1290312111377716, + "loss_ib": 0.0016579204238951206, + "step": 893 + }, + { + "ce_ib": 8.532036781311035, + "ce_orig": 0.9751139283180237, + "epoch": 0.25681213602703284, + "kl_loss": 0.177982360124588, + "loss_ib": 0.0026330272667109966, + "step": 893 + }, + { + "ce_ib": 7.181567668914795, + "ce_orig": 0.8265181183815002, + "epoch": 0.25681213602703284, + "kl_loss": 0.11877339333295822, + "loss_ib": 0.0019058906473219395, + "step": 893 + }, + { + "ce_ib": 6.04387092590332, + "ce_orig": 0.7296001315116882, + "epoch": 0.25681213602703284, + "kl_loss": 0.07870463281869888, + "loss_ib": 0.0013914334122091532, + "step": 893 + }, + { + "ce_ib": 4.4211602210998535, + "ce_orig": 0.5287713408470154, + "epoch": 0.2570997196060105, + "kl_loss": 0.09574338793754578, + "loss_ib": 0.0013995497720316052, + "step": 894 + }, + { + "ce_ib": 6.459323883056641, + "ce_orig": 0.6177991628646851, + "epoch": 0.2570997196060105, + "kl_loss": 0.1003279983997345, + "loss_ib": 0.0016492123249918222, + "step": 894 + }, + { + "ce_ib": 8.493982315063477, + "ce_orig": 1.0795583724975586, + "epoch": 0.2570997196060105, + "kl_loss": 0.2378617227077484, + "loss_ib": 0.0032280152663588524, + "step": 894 + }, + { + "ce_ib": 8.2710599899292, + "ce_orig": 0.8991292715072632, + "epoch": 0.2570997196060105, + "kl_loss": 0.13314013183116913, + "loss_ib": 0.0021585074719041586, + "step": 894 + }, + { + "epoch": 0.25738730318498815, + "grad_norm": 0.08752463012933731, + "learning_rate": 4.96027801084029e-05, + "loss": 0.8476, + "step": 895 + }, + { + "ce_ib": 7.055402755737305, + "ce_orig": 0.7234551906585693, + "epoch": 0.25738730318498815, + "kl_loss": 0.10340514779090881, + "loss_ib": 0.0017395915929228067, + "step": 895 + }, + { + "ce_ib": 7.390566349029541, + "ce_orig": 0.6613413691520691, + "epoch": 0.25738730318498815, + "kl_loss": 0.19420111179351807, + "loss_ib": 0.002681067446246743, + "step": 895 + }, + { + "ce_ib": 8.047496795654297, + "ce_orig": 0.5606889724731445, + "epoch": 0.25738730318498815, + "kl_loss": 0.1564003825187683, + "loss_ib": 0.002368753543123603, + "step": 895 + }, + { + "ce_ib": 7.199287414550781, + "ce_orig": 0.4812185764312744, + "epoch": 0.25738730318498815, + "kl_loss": 0.15393242239952087, + "loss_ib": 0.0022592528257519007, + "step": 895 + }, + { + "ce_ib": 7.276932239532471, + "ce_orig": 1.0173298120498657, + "epoch": 0.25767488676396577, + "kl_loss": 0.11038987338542938, + "loss_ib": 0.0018315919442102313, + "step": 896 + }, + { + "ce_ib": 13.540701866149902, + "ce_orig": 1.9171059131622314, + "epoch": 0.25767488676396577, + "kl_loss": 0.1913508027791977, + "loss_ib": 0.0032675780821591616, + "step": 896 + }, + { + "ce_ib": 11.850728988647461, + "ce_orig": 1.4846457242965698, + "epoch": 0.25767488676396577, + "kl_loss": 0.15523210167884827, + "loss_ib": 0.0027373938355594873, + "step": 896 + }, + { + "ce_ib": 8.497753143310547, + "ce_orig": 1.1700297594070435, + "epoch": 0.25767488676396577, + "kl_loss": 0.1125074177980423, + "loss_ib": 0.0019748492632061243, + "step": 896 + }, + { + "ce_ib": 8.266855239868164, + "ce_orig": 0.8340995907783508, + "epoch": 0.2579624703429434, + "kl_loss": 0.09629837423563004, + "loss_ib": 0.0017896691570058465, + "step": 897 + }, + { + "ce_ib": 7.022110939025879, + "ce_orig": 1.0352916717529297, + "epoch": 0.2579624703429434, + "kl_loss": 0.08450064808130264, + "loss_ib": 0.0015472176019102335, + "step": 897 + }, + { + "ce_ib": 8.604351043701172, + "ce_orig": 0.8481941223144531, + "epoch": 0.2579624703429434, + "kl_loss": 0.14510974287986755, + "loss_ib": 0.002311532385647297, + "step": 897 + }, + { + "ce_ib": 7.79695987701416, + "ce_orig": 0.8436591029167175, + "epoch": 0.2579624703429434, + "kl_loss": 0.16441552340984344, + "loss_ib": 0.002423851052299142, + "step": 897 + }, + { + "ce_ib": 8.946906089782715, + "ce_orig": 1.135650873184204, + "epoch": 0.2582500539219211, + "kl_loss": 0.17562073469161987, + "loss_ib": 0.0026508979499340057, + "step": 898 + }, + { + "ce_ib": 7.635839939117432, + "ce_orig": 0.8321837782859802, + "epoch": 0.2582500539219211, + "kl_loss": 0.24018771946430206, + "loss_ib": 0.0031654611229896545, + "step": 898 + }, + { + "ce_ib": 5.402726173400879, + "ce_orig": 0.5606065392494202, + "epoch": 0.2582500539219211, + "kl_loss": 0.16093802452087402, + "loss_ib": 0.0021496526896953583, + "step": 898 + }, + { + "ce_ib": 12.75641918182373, + "ce_orig": 1.9636436700820923, + "epoch": 0.2582500539219211, + "kl_loss": 0.14146681129932404, + "loss_ib": 0.002690309891477227, + "step": 898 + }, + { + "ce_ib": 6.529972076416016, + "ce_orig": 0.9304379820823669, + "epoch": 0.2585376375008987, + "kl_loss": 0.09048961102962494, + "loss_ib": 0.001557893236167729, + "step": 899 + }, + { + "ce_ib": 9.07840633392334, + "ce_orig": 0.864215612411499, + "epoch": 0.2585376375008987, + "kl_loss": 0.1602395474910736, + "loss_ib": 0.0025102358777076006, + "step": 899 + }, + { + "ce_ib": 12.58299732208252, + "ce_orig": 1.5156524181365967, + "epoch": 0.2585376375008987, + "kl_loss": 0.13811329007148743, + "loss_ib": 0.0026394324377179146, + "step": 899 + }, + { + "ce_ib": 5.367837429046631, + "ce_orig": 0.3980950117111206, + "epoch": 0.2585376375008987, + "kl_loss": 0.15040189027786255, + "loss_ib": 0.0020408027339726686, + "step": 899 + }, + { + "epoch": 0.2588252210798763, + "grad_norm": 0.10310398787260056, + "learning_rate": 4.959586065439189e-05, + "loss": 0.8364, + "step": 900 + }, + { + "ce_ib": 7.380722522735596, + "ce_orig": 0.7507438063621521, + "epoch": 0.2588252210798763, + "kl_loss": 0.1838665008544922, + "loss_ib": 0.002576737431809306, + "step": 900 + }, + { + "ce_ib": 6.7126383781433105, + "ce_orig": 0.9561918377876282, + "epoch": 0.2588252210798763, + "kl_loss": 0.10371539741754532, + "loss_ib": 0.0017084177816286683, + "step": 900 + }, + { + "ce_ib": 7.612314224243164, + "ce_orig": 0.5231863856315613, + "epoch": 0.2588252210798763, + "kl_loss": 0.12142337113618851, + "loss_ib": 0.001975465100258589, + "step": 900 + }, + { + "ce_ib": 8.406620979309082, + "ce_orig": 0.7458207607269287, + "epoch": 0.2588252210798763, + "kl_loss": 0.18224099278450012, + "loss_ib": 0.0026630719657987356, + "step": 900 + }, + { + "ce_ib": 4.506711483001709, + "ce_orig": 0.4876461923122406, + "epoch": 0.259112804658854, + "kl_loss": 0.09765396267175674, + "loss_ib": 0.001427210634574294, + "step": 901 + }, + { + "ce_ib": 6.801796913146973, + "ce_orig": 0.6831690669059753, + "epoch": 0.259112804658854, + "kl_loss": 0.17530401051044464, + "loss_ib": 0.0024332196917384863, + "step": 901 + }, + { + "ce_ib": 4.268216609954834, + "ce_orig": 0.7346028089523315, + "epoch": 0.259112804658854, + "kl_loss": 0.08807877451181412, + "loss_ib": 0.0013076093746349216, + "step": 901 + }, + { + "ce_ib": 6.742536544799805, + "ce_orig": 0.497639924287796, + "epoch": 0.259112804658854, + "kl_loss": 0.13441647589206696, + "loss_ib": 0.002018418163061142, + "step": 901 + }, + { + "ce_ib": 5.220005512237549, + "ce_orig": 0.7627407312393188, + "epoch": 0.2594003882378316, + "kl_loss": 0.10395920276641846, + "loss_ib": 0.0015615924494341016, + "step": 902 + }, + { + "ce_ib": 6.868656158447266, + "ce_orig": 0.9283803701400757, + "epoch": 0.2594003882378316, + "kl_loss": 0.08269625902175903, + "loss_ib": 0.0015138281742110848, + "step": 902 + }, + { + "ce_ib": 6.296173095703125, + "ce_orig": 0.7904551029205322, + "epoch": 0.2594003882378316, + "kl_loss": 0.11948268860578537, + "loss_ib": 0.0018244441598653793, + "step": 902 + }, + { + "ce_ib": 8.819576263427734, + "ce_orig": 0.9299820065498352, + "epoch": 0.2594003882378316, + "kl_loss": 0.16270163655281067, + "loss_ib": 0.0025089739356189966, + "step": 902 + }, + { + "ce_ib": 4.031962871551514, + "ce_orig": 0.4961770176887512, + "epoch": 0.25968797181680925, + "kl_loss": 0.08937801420688629, + "loss_ib": 0.0012969764648005366, + "step": 903 + }, + { + "ce_ib": 6.408451080322266, + "ce_orig": 0.4822241961956024, + "epoch": 0.25968797181680925, + "kl_loss": 0.1613832265138626, + "loss_ib": 0.0022546774707734585, + "step": 903 + }, + { + "ce_ib": 5.839984893798828, + "ce_orig": 0.38266420364379883, + "epoch": 0.25968797181680925, + "kl_loss": 0.0887475237250328, + "loss_ib": 0.0014714737189933658, + "step": 903 + }, + { + "ce_ib": 7.462576866149902, + "ce_orig": 0.7355101108551025, + "epoch": 0.25968797181680925, + "kl_loss": 0.10309060662984848, + "loss_ib": 0.0017771637067198753, + "step": 903 + }, + { + "ce_ib": 5.721911430358887, + "ce_orig": 0.6964151263237, + "epoch": 0.25997555539578693, + "kl_loss": 0.1606331169605255, + "loss_ib": 0.002178522292524576, + "step": 904 + }, + { + "ce_ib": 8.619745254516602, + "ce_orig": 1.2370572090148926, + "epoch": 0.25997555539578693, + "kl_loss": 0.11422259360551834, + "loss_ib": 0.0020042003598064184, + "step": 904 + }, + { + "ce_ib": 9.898813247680664, + "ce_orig": 0.7860179543495178, + "epoch": 0.25997555539578693, + "kl_loss": 0.13293001055717468, + "loss_ib": 0.002319181337952614, + "step": 904 + }, + { + "ce_ib": 6.747185707092285, + "ce_orig": 0.9206150770187378, + "epoch": 0.25997555539578693, + "kl_loss": 0.08839345723390579, + "loss_ib": 0.0015586530789732933, + "step": 904 + }, + { + "epoch": 0.26026313897476455, + "grad_norm": 0.08432283997535706, + "learning_rate": 4.958888194324374e-05, + "loss": 0.7976, + "step": 905 + }, + { + "ce_ib": 9.342927932739258, + "ce_orig": 0.9755980372428894, + "epoch": 0.26026313897476455, + "kl_loss": 0.16350196301937103, + "loss_ib": 0.0025693124625831842, + "step": 905 + }, + { + "ce_ib": 3.104599952697754, + "ce_orig": 0.15370331704616547, + "epoch": 0.26026313897476455, + "kl_loss": 0.28929489850997925, + "loss_ib": 0.0032034090254455805, + "step": 905 + }, + { + "ce_ib": 7.122314453125, + "ce_orig": 0.8155995607376099, + "epoch": 0.26026313897476455, + "kl_loss": 0.11030598729848862, + "loss_ib": 0.001815291354432702, + "step": 905 + }, + { + "ce_ib": 7.569455146789551, + "ce_orig": 0.924788236618042, + "epoch": 0.26026313897476455, + "kl_loss": 0.16827288269996643, + "loss_ib": 0.002439674222841859, + "step": 905 + }, + { + "ce_ib": 8.322404861450195, + "ce_orig": 0.6664920449256897, + "epoch": 0.2605507225537422, + "kl_loss": 0.19735190272331238, + "loss_ib": 0.002805759198963642, + "step": 906 + }, + { + "ce_ib": 11.654561996459961, + "ce_orig": 1.7791378498077393, + "epoch": 0.2605507225537422, + "kl_loss": 0.16949772834777832, + "loss_ib": 0.0028604334220290184, + "step": 906 + }, + { + "ce_ib": 8.227044105529785, + "ce_orig": 1.3938320875167847, + "epoch": 0.2605507225537422, + "kl_loss": 0.12992843985557556, + "loss_ib": 0.00212198868393898, + "step": 906 + }, + { + "ce_ib": 7.226250171661377, + "ce_orig": 0.25086575746536255, + "epoch": 0.2605507225537422, + "kl_loss": 0.11621251702308655, + "loss_ib": 0.0018847500905394554, + "step": 906 + }, + { + "ce_ib": 8.640125274658203, + "ce_orig": 0.8351717591285706, + "epoch": 0.2608383061327198, + "kl_loss": 0.12283913791179657, + "loss_ib": 0.0020924038253724575, + "step": 907 + }, + { + "ce_ib": 5.74461555480957, + "ce_orig": 0.6159311532974243, + "epoch": 0.2608383061327198, + "kl_loss": 0.0854053944349289, + "loss_ib": 0.0014285154175013304, + "step": 907 + }, + { + "ce_ib": 6.865052700042725, + "ce_orig": 0.7871037721633911, + "epoch": 0.2608383061327198, + "kl_loss": 0.10555486381053925, + "loss_ib": 0.0017420538933947682, + "step": 907 + }, + { + "ce_ib": 10.412860870361328, + "ce_orig": 1.4906829595565796, + "epoch": 0.2608383061327198, + "kl_loss": 0.1556229293346405, + "loss_ib": 0.0025975152384489775, + "step": 907 + }, + { + "ce_ib": 6.144519805908203, + "ce_orig": 0.548992395401001, + "epoch": 0.2611258897116975, + "kl_loss": 0.17790073156356812, + "loss_ib": 0.002393459202721715, + "step": 908 + }, + { + "ce_ib": 4.397858142852783, + "ce_orig": 0.41396814584732056, + "epoch": 0.2611258897116975, + "kl_loss": 0.10652049630880356, + "loss_ib": 0.0015049907378852367, + "step": 908 + }, + { + "ce_ib": 9.191903114318848, + "ce_orig": 1.1926069259643555, + "epoch": 0.2611258897116975, + "kl_loss": 0.12529361248016357, + "loss_ib": 0.0021721264347434044, + "step": 908 + }, + { + "ce_ib": 8.310478210449219, + "ce_orig": 1.0322816371917725, + "epoch": 0.2611258897116975, + "kl_loss": 0.12497265636920929, + "loss_ib": 0.0020807741675525904, + "step": 908 + }, + { + "ce_ib": 6.497647762298584, + "ce_orig": 0.48664146661758423, + "epoch": 0.2614134732906751, + "kl_loss": 0.10144509375095367, + "loss_ib": 0.0016642155824229121, + "step": 909 + }, + { + "ce_ib": 8.37979793548584, + "ce_orig": 0.7440108060836792, + "epoch": 0.2614134732906751, + "kl_loss": 0.11546805500984192, + "loss_ib": 0.001992660341784358, + "step": 909 + }, + { + "ce_ib": 4.586841106414795, + "ce_orig": 0.7357204556465149, + "epoch": 0.2614134732906751, + "kl_loss": 0.11390420794487, + "loss_ib": 0.0015977261355146766, + "step": 909 + }, + { + "ce_ib": 5.848711967468262, + "ce_orig": 0.7831753492355347, + "epoch": 0.2614134732906751, + "kl_loss": 0.10904887318611145, + "loss_ib": 0.0016753599047660828, + "step": 909 + }, + { + "epoch": 0.2617010568696527, + "grad_norm": 0.09389620274305344, + "learning_rate": 4.958184399177178e-05, + "loss": 0.8516, + "step": 910 + }, + { + "ce_ib": 4.011692047119141, + "ce_orig": 0.5226119160652161, + "epoch": 0.2617010568696527, + "kl_loss": 0.0997629314661026, + "loss_ib": 0.001398798543959856, + "step": 910 + }, + { + "ce_ib": 5.39235782623291, + "ce_orig": 0.4774095416069031, + "epoch": 0.2617010568696527, + "kl_loss": 0.11160556972026825, + "loss_ib": 0.001655291416682303, + "step": 910 + }, + { + "ce_ib": 6.545243740081787, + "ce_orig": 0.877449631690979, + "epoch": 0.2617010568696527, + "kl_loss": 0.113972969353199, + "loss_ib": 0.0017942539416253567, + "step": 910 + }, + { + "ce_ib": 4.371427059173584, + "ce_orig": 0.677146315574646, + "epoch": 0.2617010568696527, + "kl_loss": 0.08138425648212433, + "loss_ib": 0.0012509851949289441, + "step": 910 + }, + { + "ce_ib": 6.525064945220947, + "ce_orig": 0.7258946299552917, + "epoch": 0.2619886404486304, + "kl_loss": 0.1372908651828766, + "loss_ib": 0.002025415189564228, + "step": 911 + }, + { + "ce_ib": 7.87410306930542, + "ce_orig": 1.1585533618927002, + "epoch": 0.2619886404486304, + "kl_loss": 0.11789745837450027, + "loss_ib": 0.0019663849379867315, + "step": 911 + }, + { + "ce_ib": 9.139239311218262, + "ce_orig": 0.9819349646568298, + "epoch": 0.2619886404486304, + "kl_loss": 0.16793784499168396, + "loss_ib": 0.0025933024007827044, + "step": 911 + }, + { + "ce_ib": 7.358417510986328, + "ce_orig": 0.6855488419532776, + "epoch": 0.2619886404486304, + "kl_loss": 0.14852701127529144, + "loss_ib": 0.0022211119066923857, + "step": 911 + }, + { + "ce_ib": 12.192155838012695, + "ce_orig": 1.4514764547348022, + "epoch": 0.26227622402760803, + "kl_loss": 0.09740576148033142, + "loss_ib": 0.0021932730451226234, + "step": 912 + }, + { + "ce_ib": 6.15526819229126, + "ce_orig": 0.7057509422302246, + "epoch": 0.26227622402760803, + "kl_loss": 0.09535631537437439, + "loss_ib": 0.0015690898289903998, + "step": 912 + }, + { + "ce_ib": 6.982746124267578, + "ce_orig": 0.739734411239624, + "epoch": 0.26227622402760803, + "kl_loss": 0.1276528239250183, + "loss_ib": 0.001974802929908037, + "step": 912 + }, + { + "ce_ib": 9.937287330627441, + "ce_orig": 0.5557059645652771, + "epoch": 0.26227622402760803, + "kl_loss": 0.12562233209609985, + "loss_ib": 0.0022499519400298595, + "step": 912 + }, + { + "ce_ib": 6.873095512390137, + "ce_orig": 0.4055517911911011, + "epoch": 0.26256380760658565, + "kl_loss": 0.15974824130535126, + "loss_ib": 0.00228479178622365, + "step": 913 + }, + { + "ce_ib": 10.225861549377441, + "ce_orig": 1.6037037372589111, + "epoch": 0.26256380760658565, + "kl_loss": 0.10312145203351974, + "loss_ib": 0.0020538007374852896, + "step": 913 + }, + { + "ce_ib": 11.041903495788574, + "ce_orig": 1.3984054327011108, + "epoch": 0.26256380760658565, + "kl_loss": 0.08899977803230286, + "loss_ib": 0.0019941881764680147, + "step": 913 + }, + { + "ce_ib": 7.812140464782715, + "ce_orig": 0.8241643309593201, + "epoch": 0.26256380760658565, + "kl_loss": 0.15096673369407654, + "loss_ib": 0.0022908812388777733, + "step": 913 + }, + { + "ce_ib": 4.639538764953613, + "ce_orig": 0.47728028893470764, + "epoch": 0.2628513911855633, + "kl_loss": 0.101595938205719, + "loss_ib": 0.0014799132477492094, + "step": 914 + }, + { + "ce_ib": 8.127906799316406, + "ce_orig": 0.6452031135559082, + "epoch": 0.2628513911855633, + "kl_loss": 0.19892573356628418, + "loss_ib": 0.0028020478785037994, + "step": 914 + }, + { + "ce_ib": 10.456913948059082, + "ce_orig": 1.3510535955429077, + "epoch": 0.2628513911855633, + "kl_loss": 0.19482071697711945, + "loss_ib": 0.002993898233398795, + "step": 914 + }, + { + "ce_ib": 3.9594027996063232, + "ce_orig": 0.4253597855567932, + "epoch": 0.2628513911855633, + "kl_loss": 0.19654974341392517, + "loss_ib": 0.002361437538638711, + "step": 914 + }, + { + "epoch": 0.26313897476454096, + "grad_norm": 0.0944618359208107, + "learning_rate": 4.9574746816932084e-05, + "loss": 0.902, + "step": 915 + }, + { + "ce_ib": 9.708820343017578, + "ce_orig": 0.7267554402351379, + "epoch": 0.26313897476454096, + "kl_loss": 0.18204952776432037, + "loss_ib": 0.0027913772501051426, + "step": 915 + }, + { + "ce_ib": 4.622426986694336, + "ce_orig": 0.5747974514961243, + "epoch": 0.26313897476454096, + "kl_loss": 0.11130377650260925, + "loss_ib": 0.0015752804465591908, + "step": 915 + }, + { + "ce_ib": 8.35964298248291, + "ce_orig": 1.0757566690444946, + "epoch": 0.26313897476454096, + "kl_loss": 0.11115144193172455, + "loss_ib": 0.00194747862406075, + "step": 915 + }, + { + "ce_ib": 7.628917217254639, + "ce_orig": 0.6007151007652283, + "epoch": 0.26313897476454096, + "kl_loss": 0.16418364644050598, + "loss_ib": 0.0024047282058745623, + "step": 915 + }, + { + "ce_ib": 4.411064624786377, + "ce_orig": 0.3923698663711548, + "epoch": 0.2634265583435186, + "kl_loss": 0.07581924647092819, + "loss_ib": 0.001199298887513578, + "step": 916 + }, + { + "ce_ib": 8.376907348632812, + "ce_orig": 0.9289705753326416, + "epoch": 0.2634265583435186, + "kl_loss": 0.11779868602752686, + "loss_ib": 0.0020156775135546923, + "step": 916 + }, + { + "ce_ib": 4.883301734924316, + "ce_orig": 0.49596890807151794, + "epoch": 0.2634265583435186, + "kl_loss": 0.15270809829235077, + "loss_ib": 0.0020154111552983522, + "step": 916 + }, + { + "ce_ib": 3.5627071857452393, + "ce_orig": 0.35683321952819824, + "epoch": 0.2634265583435186, + "kl_loss": 0.08917704224586487, + "loss_ib": 0.001248041051439941, + "step": 916 + }, + { + "ce_ib": 5.704797744750977, + "ce_orig": 0.5445812940597534, + "epoch": 0.2637141419224962, + "kl_loss": 0.126393124461174, + "loss_ib": 0.0018344109412282705, + "step": 917 + }, + { + "ce_ib": 8.156935691833496, + "ce_orig": 1.2448252439498901, + "epoch": 0.2637141419224962, + "kl_loss": 0.11635102331638336, + "loss_ib": 0.001979203661903739, + "step": 917 + }, + { + "ce_ib": 7.226184368133545, + "ce_orig": 0.8044269680976868, + "epoch": 0.2637141419224962, + "kl_loss": 0.11676135659217834, + "loss_ib": 0.001890231971628964, + "step": 917 + }, + { + "ce_ib": 6.512085914611816, + "ce_orig": 1.0719903707504272, + "epoch": 0.2637141419224962, + "kl_loss": 0.14638350903987885, + "loss_ib": 0.002115043578669429, + "step": 917 + }, + { + "ce_ib": 4.944369316101074, + "ce_orig": 0.7357708215713501, + "epoch": 0.2640017255014739, + "kl_loss": 0.10602225363254547, + "loss_ib": 0.0015546594513580203, + "step": 918 + }, + { + "ce_ib": 7.204721927642822, + "ce_orig": 0.7557398080825806, + "epoch": 0.2640017255014739, + "kl_loss": 0.12639883160591125, + "loss_ib": 0.001984460512176156, + "step": 918 + }, + { + "ce_ib": 8.273849487304688, + "ce_orig": 1.0668854713439941, + "epoch": 0.2640017255014739, + "kl_loss": 0.18592819571495056, + "loss_ib": 0.002686666790395975, + "step": 918 + }, + { + "ce_ib": 11.763835906982422, + "ce_orig": 0.6219754219055176, + "epoch": 0.2640017255014739, + "kl_loss": 0.1133304089307785, + "loss_ib": 0.0023096876684576273, + "step": 918 + }, + { + "ce_ib": 7.171568870544434, + "ce_orig": 0.7699258923530579, + "epoch": 0.2642893090804515, + "kl_loss": 0.10670562088489532, + "loss_ib": 0.0017842131201177835, + "step": 919 + }, + { + "ce_ib": 9.258209228515625, + "ce_orig": 0.8205808401107788, + "epoch": 0.2642893090804515, + "kl_loss": 0.16718566417694092, + "loss_ib": 0.002597677754238248, + "step": 919 + }, + { + "ce_ib": 7.402035236358643, + "ce_orig": 1.1611003875732422, + "epoch": 0.2642893090804515, + "kl_loss": 0.10410024970769882, + "loss_ib": 0.001781205995939672, + "step": 919 + }, + { + "ce_ib": 9.35278606414795, + "ce_orig": 1.3219811916351318, + "epoch": 0.2642893090804515, + "kl_loss": 0.11624269187450409, + "loss_ib": 0.0020977056119590998, + "step": 919 + }, + { + "epoch": 0.26457689265942913, + "grad_norm": 0.09958989173173904, + "learning_rate": 4.9567590435823383e-05, + "loss": 0.8282, + "step": 920 + }, + { + "ce_ib": 6.871311187744141, + "ce_orig": 0.5260292291641235, + "epoch": 0.26457689265942913, + "kl_loss": 0.20388224720954895, + "loss_ib": 0.002725953469052911, + "step": 920 + }, + { + "ce_ib": 8.602705955505371, + "ce_orig": 1.0546218156814575, + "epoch": 0.26457689265942913, + "kl_loss": 0.13618257641792297, + "loss_ib": 0.0022220963146537542, + "step": 920 + }, + { + "ce_ib": 8.520659446716309, + "ce_orig": 0.9017817378044128, + "epoch": 0.26457689265942913, + "kl_loss": 0.1527824103832245, + "loss_ib": 0.002379890065640211, + "step": 920 + }, + { + "ce_ib": 9.107587814331055, + "ce_orig": 0.9084448218345642, + "epoch": 0.26457689265942913, + "kl_loss": 0.17240267992019653, + "loss_ib": 0.002634785370901227, + "step": 920 + }, + { + "ce_ib": 9.081536293029785, + "ce_orig": 1.3624303340911865, + "epoch": 0.2648644762384068, + "kl_loss": 0.19510559737682343, + "loss_ib": 0.00285920943133533, + "step": 921 + }, + { + "ce_ib": 2.0994462966918945, + "ce_orig": 0.09127107262611389, + "epoch": 0.2648644762384068, + "kl_loss": 0.23770758509635925, + "loss_ib": 0.0025870203971862793, + "step": 921 + }, + { + "ce_ib": 9.57007122039795, + "ce_orig": 0.840829074382782, + "epoch": 0.2648644762384068, + "kl_loss": 0.13091081380844116, + "loss_ib": 0.002266115276142955, + "step": 921 + }, + { + "ce_ib": 6.87959623336792, + "ce_orig": 0.47246253490448, + "epoch": 0.2648644762384068, + "kl_loss": 0.1876753866672516, + "loss_ib": 0.0025647133588790894, + "step": 921 + }, + { + "ce_ib": 10.32319450378418, + "ce_orig": 1.2931030988693237, + "epoch": 0.26515205981738443, + "kl_loss": 0.1872691810131073, + "loss_ib": 0.0029050111770629883, + "step": 922 + }, + { + "ce_ib": 6.233536720275879, + "ce_orig": 0.5803642272949219, + "epoch": 0.26515205981738443, + "kl_loss": 0.11040376126766205, + "loss_ib": 0.0017273911507800221, + "step": 922 + }, + { + "ce_ib": 5.026586532592773, + "ce_orig": 0.42871662974357605, + "epoch": 0.26515205981738443, + "kl_loss": 0.10527107119560242, + "loss_ib": 0.001555369351990521, + "step": 922 + }, + { + "ce_ib": 8.718320846557617, + "ce_orig": 1.142681360244751, + "epoch": 0.26515205981738443, + "kl_loss": 0.11979550123214722, + "loss_ib": 0.002069787122309208, + "step": 922 + }, + { + "ce_ib": 5.136258125305176, + "ce_orig": 0.751221239566803, + "epoch": 0.26543964339636206, + "kl_loss": 0.09198145568370819, + "loss_ib": 0.0014334403676912189, + "step": 923 + }, + { + "ce_ib": 4.875478744506836, + "ce_orig": 0.6654835343360901, + "epoch": 0.26543964339636206, + "kl_loss": 0.11321474611759186, + "loss_ib": 0.001619695220142603, + "step": 923 + }, + { + "ce_ib": 4.8394036293029785, + "ce_orig": 0.2200663983821869, + "epoch": 0.26543964339636206, + "kl_loss": 0.20495754480361938, + "loss_ib": 0.0025335156824439764, + "step": 923 + }, + { + "ce_ib": 7.12498140335083, + "ce_orig": 0.9060279726982117, + "epoch": 0.26543964339636206, + "kl_loss": 0.10331471264362335, + "loss_ib": 0.0017456451896578074, + "step": 923 + }, + { + "ce_ib": 6.738324165344238, + "ce_orig": 0.7269691824913025, + "epoch": 0.2657272269753397, + "kl_loss": 0.12429259717464447, + "loss_ib": 0.0019167583668604493, + "step": 924 + }, + { + "ce_ib": 12.713713645935059, + "ce_orig": 1.8025732040405273, + "epoch": 0.2657272269753397, + "kl_loss": 0.18177086114883423, + "loss_ib": 0.0030890798661857843, + "step": 924 + }, + { + "ce_ib": 9.708475112915039, + "ce_orig": 1.155458927154541, + "epoch": 0.2657272269753397, + "kl_loss": 0.14343459904193878, + "loss_ib": 0.0024051934015005827, + "step": 924 + }, + { + "ce_ib": 7.697851181030273, + "ce_orig": 0.7456066608428955, + "epoch": 0.2657272269753397, + "kl_loss": 0.15187275409698486, + "loss_ib": 0.0022885126527398825, + "step": 924 + }, + { + "epoch": 0.26601481055431736, + "grad_norm": 0.08948251605033875, + "learning_rate": 4.956037486568706e-05, + "loss": 0.8789, + "step": 925 + }, + { + "ce_ib": 5.39132022857666, + "ce_orig": 0.6468148231506348, + "epoch": 0.26601481055431736, + "kl_loss": 0.11626164615154266, + "loss_ib": 0.001701748464256525, + "step": 925 + }, + { + "ce_ib": 7.801209926605225, + "ce_orig": 0.6465848684310913, + "epoch": 0.26601481055431736, + "kl_loss": 0.15845727920532227, + "loss_ib": 0.0023646936751902103, + "step": 925 + }, + { + "ce_ib": 8.216442108154297, + "ce_orig": 1.0202922821044922, + "epoch": 0.26601481055431736, + "kl_loss": 0.151662677526474, + "loss_ib": 0.002338270889595151, + "step": 925 + }, + { + "ce_ib": 7.495940685272217, + "ce_orig": 0.8159665465354919, + "epoch": 0.26601481055431736, + "kl_loss": 0.13693495094776154, + "loss_ib": 0.002118943491950631, + "step": 925 + }, + { + "ce_ib": 6.499263763427734, + "ce_orig": 0.7159230709075928, + "epoch": 0.266302394133295, + "kl_loss": 0.09872101247310638, + "loss_ib": 0.0016371364472433925, + "step": 926 + }, + { + "ce_ib": 6.620962619781494, + "ce_orig": 0.8149300813674927, + "epoch": 0.266302394133295, + "kl_loss": 0.13282713294029236, + "loss_ib": 0.0019903674256056547, + "step": 926 + }, + { + "ce_ib": 7.567070007324219, + "ce_orig": 0.9069898724555969, + "epoch": 0.266302394133295, + "kl_loss": 0.08865264058113098, + "loss_ib": 0.0016432332340627909, + "step": 926 + }, + { + "ce_ib": 4.736062526702881, + "ce_orig": 0.6141554117202759, + "epoch": 0.266302394133295, + "kl_loss": 0.10103703290224075, + "loss_ib": 0.001483976491726935, + "step": 926 + }, + { + "ce_ib": 4.775996208190918, + "ce_orig": 0.5475775003433228, + "epoch": 0.2665899777122726, + "kl_loss": 0.08474655449390411, + "loss_ib": 0.0013250651536509395, + "step": 927 + }, + { + "ce_ib": 5.6749396324157715, + "ce_orig": 0.5858866572380066, + "epoch": 0.2665899777122726, + "kl_loss": 0.12519749999046326, + "loss_ib": 0.0018194690346717834, + "step": 927 + }, + { + "ce_ib": 10.990907669067383, + "ce_orig": 1.7932952642440796, + "epoch": 0.2665899777122726, + "kl_loss": 0.09833800792694092, + "loss_ib": 0.002082470804452896, + "step": 927 + }, + { + "ce_ib": 9.858685493469238, + "ce_orig": 1.2990039587020874, + "epoch": 0.2665899777122726, + "kl_loss": 0.13593445718288422, + "loss_ib": 0.0023452129680663347, + "step": 927 + }, + { + "ce_ib": 7.085406303405762, + "ce_orig": 1.029772400856018, + "epoch": 0.2668775612912503, + "kl_loss": 0.08736493438482285, + "loss_ib": 0.001582189928740263, + "step": 928 + }, + { + "ce_ib": 5.06296968460083, + "ce_orig": 0.7175295948982239, + "epoch": 0.2668775612912503, + "kl_loss": 0.09772807359695435, + "loss_ib": 0.0014835776528343558, + "step": 928 + }, + { + "ce_ib": 8.964816093444824, + "ce_orig": 1.3526090383529663, + "epoch": 0.2668775612912503, + "kl_loss": 0.08428291231393814, + "loss_ib": 0.0017393105663359165, + "step": 928 + }, + { + "ce_ib": 5.652362823486328, + "ce_orig": 0.6536108255386353, + "epoch": 0.2668775612912503, + "kl_loss": 0.11744387447834015, + "loss_ib": 0.0017396750627085567, + "step": 928 + }, + { + "ce_ib": 5.928165435791016, + "ce_orig": 0.5609773993492126, + "epoch": 0.2671651448702279, + "kl_loss": 0.10269203037023544, + "loss_ib": 0.0016197367804124951, + "step": 929 + }, + { + "ce_ib": 7.495223522186279, + "ce_orig": 0.6944816708564758, + "epoch": 0.2671651448702279, + "kl_loss": 0.13109838962554932, + "loss_ib": 0.002060506260022521, + "step": 929 + }, + { + "ce_ib": 4.497866153717041, + "ce_orig": 0.3619462251663208, + "epoch": 0.2671651448702279, + "kl_loss": 0.2023397535085678, + "loss_ib": 0.0024731841403990984, + "step": 929 + }, + { + "ce_ib": 10.906074523925781, + "ce_orig": 1.6625550985336304, + "epoch": 0.2671651448702279, + "kl_loss": 0.20823043584823608, + "loss_ib": 0.0031729117035865784, + "step": 929 + }, + { + "epoch": 0.26745272844920553, + "grad_norm": 0.09865286946296692, + "learning_rate": 4.955310012390711e-05, + "loss": 0.9273, + "step": 930 + }, + { + "ce_ib": 10.8095121383667, + "ce_orig": 1.3800263404846191, + "epoch": 0.26745272844920553, + "kl_loss": 0.12191278487443924, + "loss_ib": 0.0023000789806246758, + "step": 930 + }, + { + "ce_ib": 7.083028316497803, + "ce_orig": 1.2210267782211304, + "epoch": 0.26745272844920553, + "kl_loss": 0.05867953598499298, + "loss_ib": 0.0012950979871675372, + "step": 930 + }, + { + "ce_ib": 7.6254425048828125, + "ce_orig": 0.8195652961730957, + "epoch": 0.26745272844920553, + "kl_loss": 0.1415867656469345, + "loss_ib": 0.0021784116979688406, + "step": 930 + }, + { + "ce_ib": 2.1736412048339844, + "ce_orig": 0.1812783181667328, + "epoch": 0.26745272844920553, + "kl_loss": 0.26977869868278503, + "loss_ib": 0.0029151509515941143, + "step": 930 + }, + { + "ce_ib": 6.303011894226074, + "ce_orig": 0.6517394185066223, + "epoch": 0.2677403120281832, + "kl_loss": 0.09915041923522949, + "loss_ib": 0.001621805364266038, + "step": 931 + }, + { + "ce_ib": 2.727393865585327, + "ce_orig": 0.37597399950027466, + "epoch": 0.2677403120281832, + "kl_loss": 0.21541434526443481, + "loss_ib": 0.002426882740110159, + "step": 931 + }, + { + "ce_ib": 10.125308990478516, + "ce_orig": 1.2238138914108276, + "epoch": 0.2677403120281832, + "kl_loss": 0.13982701301574707, + "loss_ib": 0.0024108008947223425, + "step": 931 + }, + { + "ce_ib": 5.368266582489014, + "ce_orig": 0.8618974089622498, + "epoch": 0.2677403120281832, + "kl_loss": 0.12464284151792526, + "loss_ib": 0.0017832550220191479, + "step": 931 + }, + { + "ce_ib": 5.595724105834961, + "ce_orig": 0.733282208442688, + "epoch": 0.26802789560716084, + "kl_loss": 0.14293760061264038, + "loss_ib": 0.0019889483228325844, + "step": 932 + }, + { + "ce_ib": 14.770402908325195, + "ce_orig": 2.0770435333251953, + "epoch": 0.26802789560716084, + "kl_loss": 0.15044143795967102, + "loss_ib": 0.002981454599648714, + "step": 932 + }, + { + "ce_ib": 6.786084175109863, + "ce_orig": 0.7236509323120117, + "epoch": 0.26802789560716084, + "kl_loss": 0.11320096254348755, + "loss_ib": 0.0018106179777532816, + "step": 932 + }, + { + "ce_ib": 6.113559246063232, + "ce_orig": 0.547592043876648, + "epoch": 0.26802789560716084, + "kl_loss": 0.10517580807209015, + "loss_ib": 0.0016631139442324638, + "step": 932 + }, + { + "ce_ib": 4.208815097808838, + "ce_orig": 0.6119555234909058, + "epoch": 0.26831547918613846, + "kl_loss": 0.08684396743774414, + "loss_ib": 0.0012893211096525192, + "step": 933 + }, + { + "ce_ib": 9.651471138000488, + "ce_orig": 1.3539904356002808, + "epoch": 0.26831547918613846, + "kl_loss": 0.15913406014442444, + "loss_ib": 0.002556487452238798, + "step": 933 + }, + { + "ce_ib": 8.182539939880371, + "ce_orig": 0.7589184641838074, + "epoch": 0.26831547918613846, + "kl_loss": 0.11191841959953308, + "loss_ib": 0.0019374381517991424, + "step": 933 + }, + { + "ce_ib": 9.856441497802734, + "ce_orig": 0.6627126336097717, + "epoch": 0.26831547918613846, + "kl_loss": 0.19334280490875244, + "loss_ib": 0.002919072052463889, + "step": 933 + }, + { + "ce_ib": 5.727910041809082, + "ce_orig": 0.5337786674499512, + "epoch": 0.2686030627651161, + "kl_loss": 0.08856379240751266, + "loss_ib": 0.0014584289165213704, + "step": 934 + }, + { + "ce_ib": 6.411639213562012, + "ce_orig": 0.7354600429534912, + "epoch": 0.2686030627651161, + "kl_loss": 0.11536514759063721, + "loss_ib": 0.0017948152963072062, + "step": 934 + }, + { + "ce_ib": 6.148459434509277, + "ce_orig": 0.7758929133415222, + "epoch": 0.2686030627651161, + "kl_loss": 0.12726286053657532, + "loss_ib": 0.0018874744419008493, + "step": 934 + }, + { + "ce_ib": 6.0549845695495605, + "ce_orig": 0.6095043420791626, + "epoch": 0.2686030627651161, + "kl_loss": 0.10193461179733276, + "loss_ib": 0.001624844502657652, + "step": 934 + }, + { + "epoch": 0.26889064634409376, + "grad_norm": 0.10580892115831375, + "learning_rate": 4.954576622801006e-05, + "loss": 0.8217, + "step": 935 + }, + { + "ce_ib": 8.449764251708984, + "ce_orig": 1.0514365434646606, + "epoch": 0.26889064634409376, + "kl_loss": 0.13770011067390442, + "loss_ib": 0.002221977338194847, + "step": 935 + }, + { + "ce_ib": 6.928380489349365, + "ce_orig": 0.6566464900970459, + "epoch": 0.26889064634409376, + "kl_loss": 0.19501274824142456, + "loss_ib": 0.002642965642735362, + "step": 935 + }, + { + "ce_ib": 8.186603546142578, + "ce_orig": 0.7874239683151245, + "epoch": 0.26889064634409376, + "kl_loss": 0.09358172118663788, + "loss_ib": 0.001754477620124817, + "step": 935 + }, + { + "ce_ib": 6.060873985290527, + "ce_orig": 0.624786913394928, + "epoch": 0.26889064634409376, + "kl_loss": 0.1422898769378662, + "loss_ib": 0.0020289861131459475, + "step": 935 + }, + { + "ce_ib": 5.951212406158447, + "ce_orig": 0.5704233050346375, + "epoch": 0.2691782299230714, + "kl_loss": 0.12384402006864548, + "loss_ib": 0.0018335613422095776, + "step": 936 + }, + { + "ce_ib": 4.107890605926514, + "ce_orig": 0.4098372459411621, + "epoch": 0.2691782299230714, + "kl_loss": 0.1344190239906311, + "loss_ib": 0.0017549792537465692, + "step": 936 + }, + { + "ce_ib": 7.884426116943359, + "ce_orig": 0.6171572804450989, + "epoch": 0.2691782299230714, + "kl_loss": 0.1363716423511505, + "loss_ib": 0.0021521588787436485, + "step": 936 + }, + { + "ce_ib": 8.209308624267578, + "ce_orig": 1.1194345951080322, + "epoch": 0.2691782299230714, + "kl_loss": 0.1114901453256607, + "loss_ib": 0.0019358322024345398, + "step": 936 + }, + { + "ce_ib": 5.281935214996338, + "ce_orig": 0.6868589520454407, + "epoch": 0.269465813502049, + "kl_loss": 0.11274972558021545, + "loss_ib": 0.0016556908376514912, + "step": 937 + }, + { + "ce_ib": 8.918413162231445, + "ce_orig": 0.9377288818359375, + "epoch": 0.269465813502049, + "kl_loss": 0.09959867596626282, + "loss_ib": 0.0018878281116485596, + "step": 937 + }, + { + "ce_ib": 5.661655902862549, + "ce_orig": 0.9097051024436951, + "epoch": 0.269465813502049, + "kl_loss": 0.08012732863426208, + "loss_ib": 0.00136743881739676, + "step": 937 + }, + { + "ce_ib": 6.918816089630127, + "ce_orig": 0.47837385535240173, + "epoch": 0.269465813502049, + "kl_loss": 0.1599336564540863, + "loss_ib": 0.0022912181448191404, + "step": 937 + }, + { + "ce_ib": 7.659276485443115, + "ce_orig": 0.4183506965637207, + "epoch": 0.2697533970810267, + "kl_loss": 0.13869816064834595, + "loss_ib": 0.002152909291908145, + "step": 938 + }, + { + "ce_ib": 7.376327991485596, + "ce_orig": 0.9017499685287476, + "epoch": 0.2697533970810267, + "kl_loss": 0.18247684836387634, + "loss_ib": 0.0025624013505876064, + "step": 938 + }, + { + "ce_ib": 6.123374938964844, + "ce_orig": 0.6197956800460815, + "epoch": 0.2697533970810267, + "kl_loss": 0.15362223982810974, + "loss_ib": 0.002148559782654047, + "step": 938 + }, + { + "ce_ib": 6.555751800537109, + "ce_orig": 0.4299125373363495, + "epoch": 0.2697533970810267, + "kl_loss": 0.1346968412399292, + "loss_ib": 0.0020025435369461775, + "step": 938 + }, + { + "ce_ib": 6.542535781860352, + "ce_orig": 0.549587070941925, + "epoch": 0.2700409806600043, + "kl_loss": 0.20072101056575775, + "loss_ib": 0.0026614635717123747, + "step": 939 + }, + { + "ce_ib": 7.215294361114502, + "ce_orig": 0.9843421578407288, + "epoch": 0.2700409806600043, + "kl_loss": 0.10044269263744354, + "loss_ib": 0.0017259563319385052, + "step": 939 + }, + { + "ce_ib": 5.538031578063965, + "ce_orig": 0.8232380747795105, + "epoch": 0.2700409806600043, + "kl_loss": 0.06865495443344116, + "loss_ib": 0.0012403526343405247, + "step": 939 + }, + { + "ce_ib": 10.024641990661621, + "ce_orig": 1.4815994501113892, + "epoch": 0.2700409806600043, + "kl_loss": 0.12842750549316406, + "loss_ib": 0.002286739181727171, + "step": 939 + }, + { + "epoch": 0.27032856423898194, + "grad_norm": 0.0914548933506012, + "learning_rate": 4.953837319566497e-05, + "loss": 0.8191, + "step": 940 + }, + { + "ce_ib": 3.5875911712646484, + "ce_orig": 0.4435073435306549, + "epoch": 0.27032856423898194, + "kl_loss": 0.07417052984237671, + "loss_ib": 0.001100464491173625, + "step": 940 + }, + { + "ce_ib": 5.263745307922363, + "ce_orig": 0.660015344619751, + "epoch": 0.27032856423898194, + "kl_loss": 0.07660418748855591, + "loss_ib": 0.0012924164766445756, + "step": 940 + }, + { + "ce_ib": 7.743003845214844, + "ce_orig": 1.1857998371124268, + "epoch": 0.27032856423898194, + "kl_loss": 0.14771535992622375, + "loss_ib": 0.0022514539305120707, + "step": 940 + }, + { + "ce_ib": 10.746769905090332, + "ce_orig": 1.7959994077682495, + "epoch": 0.27032856423898194, + "kl_loss": 0.11718818545341492, + "loss_ib": 0.002246558666229248, + "step": 940 + }, + { + "ce_ib": 6.919939041137695, + "ce_orig": 0.9838729500770569, + "epoch": 0.2706161478179596, + "kl_loss": 0.0781673863530159, + "loss_ib": 0.0014736676821485162, + "step": 941 + }, + { + "ce_ib": 5.7579755783081055, + "ce_orig": 0.6983100175857544, + "epoch": 0.2706161478179596, + "kl_loss": 0.09704221040010452, + "loss_ib": 0.0015462195733562112, + "step": 941 + }, + { + "ce_ib": 9.96190357208252, + "ce_orig": 1.2530534267425537, + "epoch": 0.2706161478179596, + "kl_loss": 0.21759331226348877, + "loss_ib": 0.0031721233390271664, + "step": 941 + }, + { + "ce_ib": 7.985954761505127, + "ce_orig": 0.3687174320220947, + "epoch": 0.2706161478179596, + "kl_loss": 0.12543964385986328, + "loss_ib": 0.002052991883829236, + "step": 941 + }, + { + "ce_ib": 7.240424633026123, + "ce_orig": 0.6854646801948547, + "epoch": 0.27090373139693724, + "kl_loss": 0.1816634237766266, + "loss_ib": 0.002540676621720195, + "step": 942 + }, + { + "ce_ib": 7.516676902770996, + "ce_orig": 0.7132022976875305, + "epoch": 0.27090373139693724, + "kl_loss": 0.11213131248950958, + "loss_ib": 0.0018729808507487178, + "step": 942 + }, + { + "ce_ib": 7.73270845413208, + "ce_orig": 0.9187069535255432, + "epoch": 0.27090373139693724, + "kl_loss": 0.14587682485580444, + "loss_ib": 0.0022320388816297054, + "step": 942 + }, + { + "ce_ib": 7.356476783752441, + "ce_orig": 0.8093485236167908, + "epoch": 0.27090373139693724, + "kl_loss": 0.1405678391456604, + "loss_ib": 0.002141325967386365, + "step": 942 + }, + { + "ce_ib": 7.718157768249512, + "ce_orig": 0.5669108033180237, + "epoch": 0.27119131497591487, + "kl_loss": 0.23648160696029663, + "loss_ib": 0.0031366317998617887, + "step": 943 + }, + { + "ce_ib": 9.620587348937988, + "ce_orig": 1.4407234191894531, + "epoch": 0.27119131497591487, + "kl_loss": 0.18117858469486237, + "loss_ib": 0.002773844636976719, + "step": 943 + }, + { + "ce_ib": 6.493579387664795, + "ce_orig": 0.7694388031959534, + "epoch": 0.27119131497591487, + "kl_loss": 0.11386668682098389, + "loss_ib": 0.0017880249070003629, + "step": 943 + }, + { + "ce_ib": 8.296916961669922, + "ce_orig": 1.0959537029266357, + "epoch": 0.27119131497591487, + "kl_loss": 0.14310365915298462, + "loss_ib": 0.0022607280407100916, + "step": 943 + }, + { + "ce_ib": 8.241883277893066, + "ce_orig": 1.090752124786377, + "epoch": 0.2714788985548925, + "kl_loss": 0.2852647304534912, + "loss_ib": 0.00367683544754982, + "step": 944 + }, + { + "ce_ib": 6.730658531188965, + "ce_orig": 0.7248155474662781, + "epoch": 0.2714788985548925, + "kl_loss": 0.14084209501743317, + "loss_ib": 0.002081486862152815, + "step": 944 + }, + { + "ce_ib": 4.981124401092529, + "ce_orig": 0.676002562046051, + "epoch": 0.2714788985548925, + "kl_loss": 0.08100677281618118, + "loss_ib": 0.0013081800425425172, + "step": 944 + }, + { + "ce_ib": 6.86767578125, + "ce_orig": 0.7279362678527832, + "epoch": 0.2714788985548925, + "kl_loss": 0.1707763522863388, + "loss_ib": 0.0023945309221744537, + "step": 944 + }, + { + "epoch": 0.27176648213387017, + "grad_norm": 0.0977693498134613, + "learning_rate": 4.9530921044683374e-05, + "loss": 0.8319, + "step": 945 + }, + { + "ce_ib": 6.423201560974121, + "ce_orig": 0.9425351619720459, + "epoch": 0.27176648213387017, + "kl_loss": 0.13374529778957367, + "loss_ib": 0.0019797729328274727, + "step": 945 + }, + { + "ce_ib": 7.035811901092529, + "ce_orig": 0.728024959564209, + "epoch": 0.27176648213387017, + "kl_loss": 0.17420685291290283, + "loss_ib": 0.0024456498213112354, + "step": 945 + }, + { + "ce_ib": 5.947598934173584, + "ce_orig": 0.43916672468185425, + "epoch": 0.27176648213387017, + "kl_loss": 0.1581525057554245, + "loss_ib": 0.0021762847900390625, + "step": 945 + }, + { + "ce_ib": 11.003464698791504, + "ce_orig": 1.16835355758667, + "epoch": 0.27176648213387017, + "kl_loss": 0.1289106160402298, + "loss_ib": 0.002389452653005719, + "step": 945 + }, + { + "ce_ib": 11.310383796691895, + "ce_orig": 1.4770395755767822, + "epoch": 0.2720540657128478, + "kl_loss": 0.1667083203792572, + "loss_ib": 0.0027981214225292206, + "step": 946 + }, + { + "ce_ib": 6.251741409301758, + "ce_orig": 0.6906797289848328, + "epoch": 0.2720540657128478, + "kl_loss": 0.1108119785785675, + "loss_ib": 0.0017332937568426132, + "step": 946 + }, + { + "ce_ib": 4.572713851928711, + "ce_orig": 0.5401942133903503, + "epoch": 0.2720540657128478, + "kl_loss": 0.11654126644134521, + "loss_ib": 0.0016226839506998658, + "step": 946 + }, + { + "ce_ib": 11.478056907653809, + "ce_orig": 1.915285587310791, + "epoch": 0.2720540657128478, + "kl_loss": 0.16052968800067902, + "loss_ib": 0.0027531024534255266, + "step": 946 + }, + { + "ce_ib": 8.262956619262695, + "ce_orig": 1.16330885887146, + "epoch": 0.2723416492918254, + "kl_loss": 0.10167140513658524, + "loss_ib": 0.0018430094933137298, + "step": 947 + }, + { + "ce_ib": 3.876277446746826, + "ce_orig": 0.4559311866760254, + "epoch": 0.2723416492918254, + "kl_loss": 0.1875893473625183, + "loss_ib": 0.002263521309942007, + "step": 947 + }, + { + "ce_ib": 6.256802082061768, + "ce_orig": 0.8145736455917358, + "epoch": 0.2723416492918254, + "kl_loss": 0.24971362948417664, + "loss_ib": 0.003122816327959299, + "step": 947 + }, + { + "ce_ib": 6.3700761795043945, + "ce_orig": 0.3787907361984253, + "epoch": 0.2723416492918254, + "kl_loss": 0.4455419182777405, + "loss_ib": 0.00509242620319128, + "step": 947 + }, + { + "ce_ib": 7.6757049560546875, + "ce_orig": 1.1953049898147583, + "epoch": 0.2726292328708031, + "kl_loss": 0.16150033473968506, + "loss_ib": 0.0023825736716389656, + "step": 948 + }, + { + "ce_ib": 6.165060520172119, + "ce_orig": 0.8224126696586609, + "epoch": 0.2726292328708031, + "kl_loss": 0.12725231051445007, + "loss_ib": 0.0018890290521085262, + "step": 948 + }, + { + "ce_ib": 8.268503189086914, + "ce_orig": 0.600581705570221, + "epoch": 0.2726292328708031, + "kl_loss": 0.15321186184883118, + "loss_ib": 0.002358968835324049, + "step": 948 + }, + { + "ce_ib": 7.921170234680176, + "ce_orig": 0.6710708737373352, + "epoch": 0.2726292328708031, + "kl_loss": 0.17998698353767395, + "loss_ib": 0.0025919866748154163, + "step": 948 + }, + { + "ce_ib": 8.306171417236328, + "ce_orig": 0.8242880702018738, + "epoch": 0.2729168164497807, + "kl_loss": 0.14044451713562012, + "loss_ib": 0.002235062187537551, + "step": 949 + }, + { + "ce_ib": 9.588052749633789, + "ce_orig": 1.178617000579834, + "epoch": 0.2729168164497807, + "kl_loss": 0.13992881774902344, + "loss_ib": 0.0023580933921039104, + "step": 949 + }, + { + "ce_ib": 3.2139103412628174, + "ce_orig": 0.4203823208808899, + "epoch": 0.2729168164497807, + "kl_loss": 0.08177628368139267, + "loss_ib": 0.0011391538428142667, + "step": 949 + }, + { + "ce_ib": 9.011565208435059, + "ce_orig": 1.113925814628601, + "epoch": 0.2729168164497807, + "kl_loss": 0.13560637831687927, + "loss_ib": 0.0022572202142328024, + "step": 949 + }, + { + "epoch": 0.27320440002875834, + "grad_norm": 0.10619111359119415, + "learning_rate": 4.952340979301924e-05, + "loss": 0.8482, + "step": 950 + }, + { + "ce_ib": 4.546792030334473, + "ce_orig": 0.5716915726661682, + "epoch": 0.27320440002875834, + "kl_loss": 0.12070260941982269, + "loss_ib": 0.001661705318838358, + "step": 950 + }, + { + "ce_ib": 5.78235387802124, + "ce_orig": 0.7140153050422668, + "epoch": 0.27320440002875834, + "kl_loss": 0.14742611348628998, + "loss_ib": 0.0020524964202195406, + "step": 950 + }, + { + "ce_ib": 6.821101665496826, + "ce_orig": 0.9626045227050781, + "epoch": 0.27320440002875834, + "kl_loss": 0.18677476048469543, + "loss_ib": 0.002549857832491398, + "step": 950 + }, + { + "ce_ib": 7.699038028717041, + "ce_orig": 0.9650766849517822, + "epoch": 0.27320440002875834, + "kl_loss": 0.1534407138824463, + "loss_ib": 0.0023043109104037285, + "step": 950 + }, + { + "ce_ib": 8.740656852722168, + "ce_orig": 1.00751531124115, + "epoch": 0.273491983607736, + "kl_loss": 0.14653702080249786, + "loss_ib": 0.0023394357413053513, + "step": 951 + }, + { + "ce_ib": 5.055346488952637, + "ce_orig": 0.7659692764282227, + "epoch": 0.273491983607736, + "kl_loss": 0.11174146831035614, + "loss_ib": 0.0016229492612183094, + "step": 951 + }, + { + "ce_ib": 9.451150894165039, + "ce_orig": 1.0679696798324585, + "epoch": 0.273491983607736, + "kl_loss": 0.07588327676057816, + "loss_ib": 0.0017039477825164795, + "step": 951 + }, + { + "ce_ib": 9.73048210144043, + "ce_orig": 1.5051500797271729, + "epoch": 0.273491983607736, + "kl_loss": 0.15752311050891876, + "loss_ib": 0.002548279007896781, + "step": 951 + }, + { + "ce_ib": 10.435773849487305, + "ce_orig": 1.525643229484558, + "epoch": 0.27377956718671365, + "kl_loss": 0.11705584824085236, + "loss_ib": 0.0022141358349472284, + "step": 952 + }, + { + "ce_ib": 5.055952548980713, + "ce_orig": 0.34516263008117676, + "epoch": 0.27377956718671365, + "kl_loss": 0.06728208065032959, + "loss_ib": 0.0011784159578382969, + "step": 952 + }, + { + "ce_ib": 7.771252632141113, + "ce_orig": 1.0997884273529053, + "epoch": 0.27377956718671365, + "kl_loss": 0.13861092925071716, + "loss_ib": 0.0021632343996316195, + "step": 952 + }, + { + "ce_ib": 8.088645935058594, + "ce_orig": 1.0756627321243286, + "epoch": 0.27377956718671365, + "kl_loss": 0.08951813727617264, + "loss_ib": 0.0017040459206327796, + "step": 952 + }, + { + "ce_ib": 6.806406021118164, + "ce_orig": 0.6910561919212341, + "epoch": 0.27406715076569127, + "kl_loss": 0.10122386366128922, + "loss_ib": 0.0016928791301324964, + "step": 953 + }, + { + "ce_ib": 8.417391777038574, + "ce_orig": 0.7060823440551758, + "epoch": 0.27406715076569127, + "kl_loss": 0.1868474781513214, + "loss_ib": 0.0027102138847112656, + "step": 953 + }, + { + "ce_ib": 5.739500522613525, + "ce_orig": 0.664655327796936, + "epoch": 0.27406715076569127, + "kl_loss": 0.12121591717004776, + "loss_ib": 0.0017861091764643788, + "step": 953 + }, + { + "ce_ib": 3.8396944999694824, + "ce_orig": 0.2927386164665222, + "epoch": 0.27406715076569127, + "kl_loss": 0.12834453582763672, + "loss_ib": 0.0016674146754667163, + "step": 953 + }, + { + "ce_ib": 10.94080924987793, + "ce_orig": 1.681357979774475, + "epoch": 0.2743547343446689, + "kl_loss": 0.14820876717567444, + "loss_ib": 0.002576168393716216, + "step": 954 + }, + { + "ce_ib": 9.138489723205566, + "ce_orig": 0.8752321600914001, + "epoch": 0.2743547343446689, + "kl_loss": 0.15767325460910797, + "loss_ib": 0.002490581478923559, + "step": 954 + }, + { + "ce_ib": 10.636330604553223, + "ce_orig": 1.0436869859695435, + "epoch": 0.2743547343446689, + "kl_loss": 0.10358019173145294, + "loss_ib": 0.0020994350779801607, + "step": 954 + }, + { + "ce_ib": 5.532519340515137, + "ce_orig": 0.5938249826431274, + "epoch": 0.2743547343446689, + "kl_loss": 0.16369004547595978, + "loss_ib": 0.0021901524160057306, + "step": 954 + }, + { + "epoch": 0.2746423179236466, + "grad_norm": 0.08476871252059937, + "learning_rate": 4.9515839458768905e-05, + "loss": 0.8402, + "step": 955 + }, + { + "ce_ib": 7.063848495483398, + "ce_orig": 0.47076311707496643, + "epoch": 0.2746423179236466, + "kl_loss": 0.19533999264240265, + "loss_ib": 0.0026597848627716303, + "step": 955 + }, + { + "ce_ib": 8.551830291748047, + "ce_orig": 0.8301795125007629, + "epoch": 0.2746423179236466, + "kl_loss": 0.14242975413799286, + "loss_ib": 0.002279480453580618, + "step": 955 + }, + { + "ce_ib": 8.352051734924316, + "ce_orig": 0.5644355416297913, + "epoch": 0.2746423179236466, + "kl_loss": 0.17968347668647766, + "loss_ib": 0.0026320398319512606, + "step": 955 + }, + { + "ce_ib": 6.1855645179748535, + "ce_orig": 0.8240520358085632, + "epoch": 0.2746423179236466, + "kl_loss": 0.20574909448623657, + "loss_ib": 0.002676047384738922, + "step": 955 + }, + { + "ce_ib": 4.310649871826172, + "ce_orig": 0.43459251523017883, + "epoch": 0.2749299015026242, + "kl_loss": 0.14753244817256927, + "loss_ib": 0.001906389370560646, + "step": 956 + }, + { + "ce_ib": 7.657717227935791, + "ce_orig": 0.8175066709518433, + "epoch": 0.2749299015026242, + "kl_loss": 0.08417732268571854, + "loss_ib": 0.0016075449530035257, + "step": 956 + }, + { + "ce_ib": 6.655043601989746, + "ce_orig": 0.46536022424697876, + "epoch": 0.2749299015026242, + "kl_loss": 0.4305586814880371, + "loss_ib": 0.004971091169863939, + "step": 956 + }, + { + "ce_ib": 4.4826507568359375, + "ce_orig": 0.7566420435905457, + "epoch": 0.2749299015026242, + "kl_loss": 0.10255283117294312, + "loss_ib": 0.0014737934106960893, + "step": 956 + }, + { + "ce_ib": 6.804374694824219, + "ce_orig": 0.8272842168807983, + "epoch": 0.2752174850816018, + "kl_loss": 0.17810380458831787, + "loss_ib": 0.002461475320160389, + "step": 957 + }, + { + "ce_ib": 7.224662780761719, + "ce_orig": 1.0401030778884888, + "epoch": 0.2752174850816018, + "kl_loss": 0.11128075420856476, + "loss_ib": 0.0018352738115936518, + "step": 957 + }, + { + "ce_ib": 6.040480613708496, + "ce_orig": 0.6835022568702698, + "epoch": 0.2752174850816018, + "kl_loss": 0.14640529453754425, + "loss_ib": 0.0020681009627878666, + "step": 957 + }, + { + "ce_ib": 9.293033599853516, + "ce_orig": 1.5635472536087036, + "epoch": 0.2752174850816018, + "kl_loss": 0.11437170207500458, + "loss_ib": 0.0020730202086269855, + "step": 957 + }, + { + "ce_ib": 7.540154933929443, + "ce_orig": 0.5447720289230347, + "epoch": 0.2755050686605795, + "kl_loss": 0.15002988278865814, + "loss_ib": 0.0022543142549693584, + "step": 958 + }, + { + "ce_ib": 6.4098615646362305, + "ce_orig": 0.6705033183097839, + "epoch": 0.2755050686605795, + "kl_loss": 0.12306762486696243, + "loss_ib": 0.0018716624472290277, + "step": 958 + }, + { + "ce_ib": 4.094173431396484, + "ce_orig": 0.6588820219039917, + "epoch": 0.2755050686605795, + "kl_loss": 0.13385936617851257, + "loss_ib": 0.0017480109818279743, + "step": 958 + }, + { + "ce_ib": 10.632689476013184, + "ce_orig": 1.3828617334365845, + "epoch": 0.2755050686605795, + "kl_loss": 0.11779659241437912, + "loss_ib": 0.0022412347607314587, + "step": 958 + }, + { + "ce_ib": 4.860330104827881, + "ce_orig": 0.7951819896697998, + "epoch": 0.2757926522395571, + "kl_loss": 0.06981615722179413, + "loss_ib": 0.0011841944651678205, + "step": 959 + }, + { + "ce_ib": 7.603151321411133, + "ce_orig": 0.838845431804657, + "epoch": 0.2757926522395571, + "kl_loss": 0.12948527932167053, + "loss_ib": 0.002055167919024825, + "step": 959 + }, + { + "ce_ib": 4.83817195892334, + "ce_orig": 0.35420238971710205, + "epoch": 0.2757926522395571, + "kl_loss": 0.12115734070539474, + "loss_ib": 0.0016953905578702688, + "step": 959 + }, + { + "ce_ib": 10.160306930541992, + "ce_orig": 1.2111716270446777, + "epoch": 0.2757926522395571, + "kl_loss": 0.2245725691318512, + "loss_ib": 0.0032617561519145966, + "step": 959 + }, + { + "epoch": 0.27608023581853475, + "grad_norm": 0.11242754757404327, + "learning_rate": 4.950821006017107e-05, + "loss": 0.7701, + "step": 960 + }, + { + "ce_ib": 12.709158897399902, + "ce_orig": 1.7171677350997925, + "epoch": 0.27608023581853475, + "kl_loss": 0.15762443840503693, + "loss_ib": 0.002847159979864955, + "step": 960 + }, + { + "ce_ib": 5.944572925567627, + "ce_orig": 0.7122161388397217, + "epoch": 0.27608023581853475, + "kl_loss": 0.11333119124174118, + "loss_ib": 0.0017277691513299942, + "step": 960 + }, + { + "ce_ib": 11.57944393157959, + "ce_orig": 1.2153781652450562, + "epoch": 0.27608023581853475, + "kl_loss": 0.11728625744581223, + "loss_ib": 0.002330806804820895, + "step": 960 + }, + { + "ce_ib": 8.180597305297852, + "ce_orig": 1.0113308429718018, + "epoch": 0.27608023581853475, + "kl_loss": 0.06936834752559662, + "loss_ib": 0.0015117431757971644, + "step": 960 + }, + { + "ce_ib": 7.877357006072998, + "ce_orig": 1.2754735946655273, + "epoch": 0.2763678193975124, + "kl_loss": 0.23580428957939148, + "loss_ib": 0.0031457783188670874, + "step": 961 + }, + { + "ce_ib": 9.339914321899414, + "ce_orig": 1.0975970029830933, + "epoch": 0.2763678193975124, + "kl_loss": 0.12502053380012512, + "loss_ib": 0.0021841966081410646, + "step": 961 + }, + { + "ce_ib": 9.925426483154297, + "ce_orig": 1.3003571033477783, + "epoch": 0.2763678193975124, + "kl_loss": 0.12838998436927795, + "loss_ib": 0.002276442479342222, + "step": 961 + }, + { + "ce_ib": 4.420860767364502, + "ce_orig": 0.4933626055717468, + "epoch": 0.2763678193975124, + "kl_loss": 0.07248329371213913, + "loss_ib": 0.0011669190134853125, + "step": 961 + }, + { + "ce_ib": 4.496903419494629, + "ce_orig": 0.4532707631587982, + "epoch": 0.27665540297649005, + "kl_loss": 0.21559402346611023, + "loss_ib": 0.0026056303177028894, + "step": 962 + }, + { + "ce_ib": 8.14924144744873, + "ce_orig": 0.8354823589324951, + "epoch": 0.27665540297649005, + "kl_loss": 0.1402096003293991, + "loss_ib": 0.0022170201409608126, + "step": 962 + }, + { + "ce_ib": 9.650452613830566, + "ce_orig": 1.246025562286377, + "epoch": 0.27665540297649005, + "kl_loss": 0.20398131012916565, + "loss_ib": 0.0030048585031181574, + "step": 962 + }, + { + "ce_ib": 5.663705825805664, + "ce_orig": 0.6232472658157349, + "epoch": 0.27665540297649005, + "kl_loss": 0.10515444725751877, + "loss_ib": 0.0016179149970412254, + "step": 962 + }, + { + "ce_ib": 6.809243202209473, + "ce_orig": 0.8654524087905884, + "epoch": 0.2769429865554677, + "kl_loss": 0.106082022190094, + "loss_ib": 0.001741744577884674, + "step": 963 + }, + { + "ce_ib": 4.1796956062316895, + "ce_orig": 0.4485793113708496, + "epoch": 0.2769429865554677, + "kl_loss": 0.11551377177238464, + "loss_ib": 0.0015731072053313255, + "step": 963 + }, + { + "ce_ib": 9.93834114074707, + "ce_orig": 1.3555216789245605, + "epoch": 0.2769429865554677, + "kl_loss": 0.14208224415779114, + "loss_ib": 0.0024146565701812506, + "step": 963 + }, + { + "ce_ib": 4.870518684387207, + "ce_orig": 0.9535307884216309, + "epoch": 0.2769429865554677, + "kl_loss": 0.08944790810346603, + "loss_ib": 0.0013815308921039104, + "step": 963 + }, + { + "ce_ib": 5.912570953369141, + "ce_orig": 0.6123507022857666, + "epoch": 0.2772305701344453, + "kl_loss": 0.10765205323696136, + "loss_ib": 0.0016677775420248508, + "step": 964 + }, + { + "ce_ib": 5.70536470413208, + "ce_orig": 0.5494059920310974, + "epoch": 0.2772305701344453, + "kl_loss": 0.14170430600643158, + "loss_ib": 0.0019875795114785433, + "step": 964 + }, + { + "ce_ib": 10.486671447753906, + "ce_orig": 1.401133418083191, + "epoch": 0.2772305701344453, + "kl_loss": 0.08173699676990509, + "loss_ib": 0.0018660370260477066, + "step": 964 + }, + { + "ce_ib": 4.862819671630859, + "ce_orig": 0.48107895255088806, + "epoch": 0.2772305701344453, + "kl_loss": 0.08526574820280075, + "loss_ib": 0.00133893929887563, + "step": 964 + }, + { + "epoch": 0.277518153713423, + "grad_norm": 0.09270508587360382, + "learning_rate": 4.9500521615606716e-05, + "loss": 0.8706, + "step": 965 + }, + { + "ce_ib": 3.7485735416412354, + "ce_orig": 0.6303088068962097, + "epoch": 0.277518153713423, + "kl_loss": 0.07887739688158035, + "loss_ib": 0.001163631328381598, + "step": 965 + }, + { + "ce_ib": 5.018900394439697, + "ce_orig": 0.42151200771331787, + "epoch": 0.277518153713423, + "kl_loss": 0.16141340136528015, + "loss_ib": 0.0021160240285098553, + "step": 965 + }, + { + "ce_ib": 6.465913772583008, + "ce_orig": 0.5550833344459534, + "epoch": 0.277518153713423, + "kl_loss": 0.0822734534740448, + "loss_ib": 0.0014693258563056588, + "step": 965 + }, + { + "ce_ib": 9.078096389770508, + "ce_orig": 1.2660472393035889, + "epoch": 0.277518153713423, + "kl_loss": 0.11909198760986328, + "loss_ib": 0.002098729368299246, + "step": 965 + }, + { + "ce_ib": 6.4851603507995605, + "ce_orig": 0.8773921132087708, + "epoch": 0.2778057372924006, + "kl_loss": 0.11482943594455719, + "loss_ib": 0.0017968103056773543, + "step": 966 + }, + { + "ce_ib": 8.88327407836914, + "ce_orig": 1.1675580739974976, + "epoch": 0.2778057372924006, + "kl_loss": 0.14929817616939545, + "loss_ib": 0.0023813091684132814, + "step": 966 + }, + { + "ce_ib": 9.463626861572266, + "ce_orig": 1.2751141786575317, + "epoch": 0.2778057372924006, + "kl_loss": 0.14539405703544617, + "loss_ib": 0.002400303026661277, + "step": 966 + }, + { + "ce_ib": 5.47840690612793, + "ce_orig": 0.7138165235519409, + "epoch": 0.2778057372924006, + "kl_loss": 0.10650286078453064, + "loss_ib": 0.0016128692077472806, + "step": 966 + }, + { + "ce_ib": 9.330538749694824, + "ce_orig": 1.5326582193374634, + "epoch": 0.2780933208713782, + "kl_loss": 0.12342990189790726, + "loss_ib": 0.0021673529408872128, + "step": 967 + }, + { + "ce_ib": 3.4421770572662354, + "ce_orig": 0.248417928814888, + "epoch": 0.2780933208713782, + "kl_loss": 0.25681114196777344, + "loss_ib": 0.0029123290441930294, + "step": 967 + }, + { + "ce_ib": 7.802358150482178, + "ce_orig": 0.8956292271614075, + "epoch": 0.2780933208713782, + "kl_loss": 0.2054874747991562, + "loss_ib": 0.0028351102955639362, + "step": 967 + }, + { + "ce_ib": 4.530078887939453, + "ce_orig": 0.43318378925323486, + "epoch": 0.2780933208713782, + "kl_loss": 0.06482702493667603, + "loss_ib": 0.0011012781178578734, + "step": 967 + }, + { + "ce_ib": 6.4143900871276855, + "ce_orig": 0.9001240134239197, + "epoch": 0.2783809044503559, + "kl_loss": 0.1251344531774521, + "loss_ib": 0.0018927834462374449, + "step": 968 + }, + { + "ce_ib": 9.123422622680664, + "ce_orig": 1.1014381647109985, + "epoch": 0.2783809044503559, + "kl_loss": 0.15173783898353577, + "loss_ib": 0.0024297204799950123, + "step": 968 + }, + { + "ce_ib": 2.160301685333252, + "ce_orig": 0.17562763392925262, + "epoch": 0.2783809044503559, + "kl_loss": 0.3059711456298828, + "loss_ib": 0.0032757415901869535, + "step": 968 + }, + { + "ce_ib": 5.158329963684082, + "ce_orig": 0.48985013365745544, + "epoch": 0.2783809044503559, + "kl_loss": 0.14261916279792786, + "loss_ib": 0.0019420244498178363, + "step": 968 + }, + { + "ce_ib": 5.991279125213623, + "ce_orig": 0.8108060359954834, + "epoch": 0.2786684880293335, + "kl_loss": 0.16150274872779846, + "loss_ib": 0.0022141553927212954, + "step": 969 + }, + { + "ce_ib": 6.686485767364502, + "ce_orig": 1.2489796876907349, + "epoch": 0.2786684880293335, + "kl_loss": 0.10450765490531921, + "loss_ib": 0.0017137250397354364, + "step": 969 + }, + { + "ce_ib": 4.417532920837402, + "ce_orig": 0.6061100363731384, + "epoch": 0.2786684880293335, + "kl_loss": 0.11191940307617188, + "loss_ib": 0.0015609472757205367, + "step": 969 + }, + { + "ce_ib": 6.771849632263184, + "ce_orig": 0.6890683770179749, + "epoch": 0.2786684880293335, + "kl_loss": 0.16166692972183228, + "loss_ib": 0.0022938542533665895, + "step": 969 + }, + { + "epoch": 0.27895607160831115, + "grad_norm": 0.10704389959573746, + "learning_rate": 4.94927741435991e-05, + "loss": 0.8406, + "step": 970 + }, + { + "ce_ib": 7.555027008056641, + "ce_orig": 1.2379882335662842, + "epoch": 0.27895607160831115, + "kl_loss": 0.13296280801296234, + "loss_ib": 0.002085130661725998, + "step": 970 + }, + { + "ce_ib": 6.707873821258545, + "ce_orig": 1.0386744737625122, + "epoch": 0.27895607160831115, + "kl_loss": 0.11403495073318481, + "loss_ib": 0.0018111368408426642, + "step": 970 + }, + { + "ce_ib": 7.210244178771973, + "ce_orig": 0.7288638949394226, + "epoch": 0.27895607160831115, + "kl_loss": 0.18328508734703064, + "loss_ib": 0.0025538753252476454, + "step": 970 + }, + { + "ce_ib": 8.974588394165039, + "ce_orig": 0.6134091019630432, + "epoch": 0.27895607160831115, + "kl_loss": 0.11008819192647934, + "loss_ib": 0.0019983407109975815, + "step": 970 + }, + { + "ce_ib": 6.788546562194824, + "ce_orig": 0.7252051830291748, + "epoch": 0.27924365518728883, + "kl_loss": 0.11143806576728821, + "loss_ib": 0.0017932351911440492, + "step": 971 + }, + { + "ce_ib": 7.9452314376831055, + "ce_orig": 0.6665673851966858, + "epoch": 0.27924365518728883, + "kl_loss": 0.08723453432321548, + "loss_ib": 0.0016668684547767043, + "step": 971 + }, + { + "ce_ib": 6.406195163726807, + "ce_orig": 0.4924929141998291, + "epoch": 0.27924365518728883, + "kl_loss": 0.10466066002845764, + "loss_ib": 0.0016872260021045804, + "step": 971 + }, + { + "ce_ib": 5.720137596130371, + "ce_orig": 0.6668531894683838, + "epoch": 0.27924365518728883, + "kl_loss": 0.10390918701887131, + "loss_ib": 0.0016111056320369244, + "step": 971 + }, + { + "ce_ib": 9.839632987976074, + "ce_orig": 1.026991844177246, + "epoch": 0.27953123876626645, + "kl_loss": 0.14091286063194275, + "loss_ib": 0.002393091795966029, + "step": 972 + }, + { + "ce_ib": 7.829953193664551, + "ce_orig": 0.9950670003890991, + "epoch": 0.27953123876626645, + "kl_loss": 0.1180608719587326, + "loss_ib": 0.0019636040087789297, + "step": 972 + }, + { + "ce_ib": 8.307772636413574, + "ce_orig": 1.106313705444336, + "epoch": 0.27953123876626645, + "kl_loss": 0.23758354783058167, + "loss_ib": 0.0032066127751022577, + "step": 972 + }, + { + "ce_ib": 7.722874164581299, + "ce_orig": 0.6765212416648865, + "epoch": 0.27953123876626645, + "kl_loss": 0.093751460313797, + "loss_ib": 0.0017098019598051906, + "step": 972 + }, + { + "ce_ib": 6.720781326293945, + "ce_orig": 0.8855082392692566, + "epoch": 0.2798188223452441, + "kl_loss": 0.16517385840415955, + "loss_ib": 0.0023238167632371187, + "step": 973 + }, + { + "ce_ib": 7.164329528808594, + "ce_orig": 0.5470868945121765, + "epoch": 0.2798188223452441, + "kl_loss": 0.06691183894872665, + "loss_ib": 0.0013855514116585255, + "step": 973 + }, + { + "ce_ib": 5.7693562507629395, + "ce_orig": 0.5427741408348083, + "epoch": 0.2798188223452441, + "kl_loss": 0.15940767526626587, + "loss_ib": 0.0021710123401135206, + "step": 973 + }, + { + "ce_ib": 7.7585530281066895, + "ce_orig": 1.0339730978012085, + "epoch": 0.2798188223452441, + "kl_loss": 0.17508529126644135, + "loss_ib": 0.0025267081800848246, + "step": 973 + }, + { + "ce_ib": 6.318320274353027, + "ce_orig": 0.6542577147483826, + "epoch": 0.2801064059242217, + "kl_loss": 0.13582909107208252, + "loss_ib": 0.001990122953429818, + "step": 974 + }, + { + "ce_ib": 8.61296558380127, + "ce_orig": 0.584928035736084, + "epoch": 0.2801064059242217, + "kl_loss": 0.12153268605470657, + "loss_ib": 0.002076623495668173, + "step": 974 + }, + { + "ce_ib": 9.057924270629883, + "ce_orig": 0.7471427917480469, + "epoch": 0.2801064059242217, + "kl_loss": 0.20692235231399536, + "loss_ib": 0.00297501590102911, + "step": 974 + }, + { + "ce_ib": 7.101727485656738, + "ce_orig": 0.6872411966323853, + "epoch": 0.2801064059242217, + "kl_loss": 0.12273427844047546, + "loss_ib": 0.0019375154515728354, + "step": 974 + }, + { + "epoch": 0.2803939895031994, + "grad_norm": 0.106829434633255, + "learning_rate": 4.948496766281368e-05, + "loss": 0.8025, + "step": 975 + }, + { + "ce_ib": 7.509278774261475, + "ce_orig": 0.8762276768684387, + "epoch": 0.2803939895031994, + "kl_loss": 0.1653478443622589, + "loss_ib": 0.002404406201094389, + "step": 975 + }, + { + "ce_ib": 8.116082191467285, + "ce_orig": 0.8653174638748169, + "epoch": 0.2803939895031994, + "kl_loss": 0.15266814827919006, + "loss_ib": 0.0023382895160466433, + "step": 975 + }, + { + "ce_ib": 4.437204360961914, + "ce_orig": 0.6615025997161865, + "epoch": 0.2803939895031994, + "kl_loss": 0.08827726542949677, + "loss_ib": 0.0013264929875731468, + "step": 975 + }, + { + "ce_ib": 7.19654655456543, + "ce_orig": 0.7415102124214172, + "epoch": 0.2803939895031994, + "kl_loss": 0.14185458421707153, + "loss_ib": 0.0021382004488259554, + "step": 975 + }, + { + "ce_ib": 5.182796955108643, + "ce_orig": 0.43733975291252136, + "epoch": 0.280681573082177, + "kl_loss": 0.0836355909705162, + "loss_ib": 0.0013546355767175555, + "step": 976 + }, + { + "ce_ib": 4.123630046844482, + "ce_orig": 0.45059525966644287, + "epoch": 0.280681573082177, + "kl_loss": 0.08219993859529495, + "loss_ib": 0.001234362367540598, + "step": 976 + }, + { + "ce_ib": 7.806309700012207, + "ce_orig": 0.784027636051178, + "epoch": 0.280681573082177, + "kl_loss": 0.18839582800865173, + "loss_ib": 0.002664589323103428, + "step": 976 + }, + { + "ce_ib": 9.582615852355957, + "ce_orig": 1.1754748821258545, + "epoch": 0.280681573082177, + "kl_loss": 0.08723856508731842, + "loss_ib": 0.0018306472338736057, + "step": 976 + }, + { + "ce_ib": 6.1553449630737305, + "ce_orig": 0.3517453074455261, + "epoch": 0.28096915666115463, + "kl_loss": 0.226718932390213, + "loss_ib": 0.0028827236965298653, + "step": 977 + }, + { + "ce_ib": 4.898242950439453, + "ce_orig": 0.7067365050315857, + "epoch": 0.28096915666115463, + "kl_loss": 0.09806376695632935, + "loss_ib": 0.0014704619534313679, + "step": 977 + }, + { + "ce_ib": 4.567273139953613, + "ce_orig": 0.6066350936889648, + "epoch": 0.28096915666115463, + "kl_loss": 0.12975236773490906, + "loss_ib": 0.0017542509594932199, + "step": 977 + }, + { + "ce_ib": 8.436737060546875, + "ce_orig": 1.028468132019043, + "epoch": 0.28096915666115463, + "kl_loss": 0.11548972129821777, + "loss_ib": 0.0019985707476735115, + "step": 977 + }, + { + "ce_ib": 5.896955966949463, + "ce_orig": 0.6225305199623108, + "epoch": 0.2812567402401323, + "kl_loss": 0.10841213911771774, + "loss_ib": 0.0016738170525059104, + "step": 978 + }, + { + "ce_ib": 8.217554092407227, + "ce_orig": 1.3426951169967651, + "epoch": 0.2812567402401323, + "kl_loss": 0.11771957576274872, + "loss_ib": 0.001998951192945242, + "step": 978 + }, + { + "ce_ib": 8.582084655761719, + "ce_orig": 0.990624189376831, + "epoch": 0.2812567402401323, + "kl_loss": 0.24700742959976196, + "loss_ib": 0.0033282828517258167, + "step": 978 + }, + { + "ce_ib": 11.665776252746582, + "ce_orig": 1.7566227912902832, + "epoch": 0.2812567402401323, + "kl_loss": 0.1942056119441986, + "loss_ib": 0.0031086336821317673, + "step": 978 + }, + { + "ce_ib": 6.679926872253418, + "ce_orig": 0.6784400343894958, + "epoch": 0.28154432381910993, + "kl_loss": 0.17957545816898346, + "loss_ib": 0.0024637472815811634, + "step": 979 + }, + { + "ce_ib": 5.655110836029053, + "ce_orig": 0.7223968505859375, + "epoch": 0.28154432381910993, + "kl_loss": 0.09845352172851562, + "loss_ib": 0.0015500461449846625, + "step": 979 + }, + { + "ce_ib": 9.194509506225586, + "ce_orig": 1.1544585227966309, + "epoch": 0.28154432381910993, + "kl_loss": 0.17654821276664734, + "loss_ib": 0.0026849329005926847, + "step": 979 + }, + { + "ce_ib": 5.0447773933410645, + "ce_orig": 0.8073973059654236, + "epoch": 0.28154432381910993, + "kl_loss": 0.1345929354429245, + "loss_ib": 0.0018504071049392223, + "step": 979 + }, + { + "epoch": 0.28183190739808756, + "grad_norm": 0.09493906795978546, + "learning_rate": 4.947710219205808e-05, + "loss": 0.8179, + "step": 980 + }, + { + "ce_ib": 4.477856159210205, + "ce_orig": 0.55965256690979, + "epoch": 0.28183190739808756, + "kl_loss": 0.09496183693408966, + "loss_ib": 0.0013974038884043694, + "step": 980 + }, + { + "ce_ib": 7.635720252990723, + "ce_orig": 0.7726288437843323, + "epoch": 0.28183190739808756, + "kl_loss": 0.12528853118419647, + "loss_ib": 0.002016457263380289, + "step": 980 + }, + { + "ce_ib": 5.129683494567871, + "ce_orig": 0.7097966074943542, + "epoch": 0.28183190739808756, + "kl_loss": 0.1135503426194191, + "loss_ib": 0.0016484718071296811, + "step": 980 + }, + { + "ce_ib": 6.113365173339844, + "ce_orig": 0.8547828197479248, + "epoch": 0.28183190739808756, + "kl_loss": 0.07798996567726135, + "loss_ib": 0.0013912362046539783, + "step": 980 + }, + { + "ce_ib": 6.860866069793701, + "ce_orig": 0.6225427985191345, + "epoch": 0.28211949097706523, + "kl_loss": 0.09530524909496307, + "loss_ib": 0.0016391390236094594, + "step": 981 + }, + { + "ce_ib": 5.513166427612305, + "ce_orig": 0.7645548582077026, + "epoch": 0.28211949097706523, + "kl_loss": 0.10374398529529572, + "loss_ib": 0.001588756451383233, + "step": 981 + }, + { + "ce_ib": 5.0253400802612305, + "ce_orig": 0.45561596751213074, + "epoch": 0.28211949097706523, + "kl_loss": 0.12157364189624786, + "loss_ib": 0.0017182704759761691, + "step": 981 + }, + { + "ce_ib": 8.435596466064453, + "ce_orig": 0.8526706695556641, + "epoch": 0.28211949097706523, + "kl_loss": 0.15986429154872894, + "loss_ib": 0.0024422025308012962, + "step": 981 + }, + { + "ce_ib": 6.921075344085693, + "ce_orig": 0.9211604595184326, + "epoch": 0.28240707455604286, + "kl_loss": 0.1525169461965561, + "loss_ib": 0.002217276953160763, + "step": 982 + }, + { + "ce_ib": 8.066728591918945, + "ce_orig": 1.1519079208374023, + "epoch": 0.28240707455604286, + "kl_loss": 0.18316397070884705, + "loss_ib": 0.0026383125223219395, + "step": 982 + }, + { + "ce_ib": 3.8123619556427, + "ce_orig": 0.5717668533325195, + "epoch": 0.28240707455604286, + "kl_loss": 0.14196446537971497, + "loss_ib": 0.001800880883820355, + "step": 982 + }, + { + "ce_ib": 9.549180030822754, + "ce_orig": 1.2062931060791016, + "epoch": 0.28240707455604286, + "kl_loss": 0.1640200912952423, + "loss_ib": 0.002595118712633848, + "step": 982 + }, + { + "ce_ib": 12.103609085083008, + "ce_orig": 1.862855076789856, + "epoch": 0.2826946581350205, + "kl_loss": 0.1695042848587036, + "loss_ib": 0.002905403496697545, + "step": 983 + }, + { + "ce_ib": 5.744802951812744, + "ce_orig": 0.559029757976532, + "epoch": 0.2826946581350205, + "kl_loss": 0.15063290297985077, + "loss_ib": 0.002080809324979782, + "step": 983 + }, + { + "ce_ib": 4.9721784591674805, + "ce_orig": 0.5969537496566772, + "epoch": 0.2826946581350205, + "kl_loss": 0.08739107847213745, + "loss_ib": 0.0013711284846067429, + "step": 983 + }, + { + "ce_ib": 7.158525466918945, + "ce_orig": 1.099306583404541, + "epoch": 0.2826946581350205, + "kl_loss": 0.11099478602409363, + "loss_ib": 0.0018258003983646631, + "step": 983 + }, + { + "ce_ib": 6.190611839294434, + "ce_orig": 0.5297380685806274, + "epoch": 0.2829822417139981, + "kl_loss": 0.1250627338886261, + "loss_ib": 0.0018696883926168084, + "step": 984 + }, + { + "ce_ib": 8.836004257202148, + "ce_orig": 1.0750619173049927, + "epoch": 0.2829822417139981, + "kl_loss": 0.11145786195993423, + "loss_ib": 0.001998178893700242, + "step": 984 + }, + { + "ce_ib": 4.149031639099121, + "ce_orig": 0.654617428779602, + "epoch": 0.2829822417139981, + "kl_loss": 0.10280697047710419, + "loss_ib": 0.0014429728034883738, + "step": 984 + }, + { + "ce_ib": 7.487407684326172, + "ce_orig": 0.9551166296005249, + "epoch": 0.2829822417139981, + "kl_loss": 0.10395655781030655, + "loss_ib": 0.0017883061664178967, + "step": 984 + }, + { + "epoch": 0.2832698252929758, + "grad_norm": 0.0821569636464119, + "learning_rate": 4.946917775028204e-05, + "loss": 0.8411, + "step": 985 + }, + { + "ce_ib": 6.546420574188232, + "ce_orig": 0.8366989493370056, + "epoch": 0.2832698252929758, + "kl_loss": 0.09713012725114822, + "loss_ib": 0.0016259433468803763, + "step": 985 + }, + { + "ce_ib": 5.233980178833008, + "ce_orig": 0.6575462818145752, + "epoch": 0.2832698252929758, + "kl_loss": 0.1284598708152771, + "loss_ib": 0.0018079965375363827, + "step": 985 + }, + { + "ce_ib": 7.609673976898193, + "ce_orig": 0.8251411318778992, + "epoch": 0.2832698252929758, + "kl_loss": 0.16431501507759094, + "loss_ib": 0.002404117491096258, + "step": 985 + }, + { + "ce_ib": 8.2174711227417, + "ce_orig": 1.1952818632125854, + "epoch": 0.2832698252929758, + "kl_loss": 0.1562347710132599, + "loss_ib": 0.0023840947542339563, + "step": 985 + }, + { + "ce_ib": 8.832913398742676, + "ce_orig": 0.8630291223526001, + "epoch": 0.2835574088719534, + "kl_loss": 0.14526310563087463, + "loss_ib": 0.0023359223268926144, + "step": 986 + }, + { + "ce_ib": 9.954623222351074, + "ce_orig": 1.3729417324066162, + "epoch": 0.2835574088719534, + "kl_loss": 0.18901260197162628, + "loss_ib": 0.0028855884447693825, + "step": 986 + }, + { + "ce_ib": 7.703253269195557, + "ce_orig": 1.0099605321884155, + "epoch": 0.2835574088719534, + "kl_loss": 0.1130913719534874, + "loss_ib": 0.0019012389238923788, + "step": 986 + }, + { + "ce_ib": 8.98315715789795, + "ce_orig": 0.9903598427772522, + "epoch": 0.2835574088719534, + "kl_loss": 0.1280607134103775, + "loss_ib": 0.002178922761231661, + "step": 986 + }, + { + "ce_ib": 7.346461296081543, + "ce_orig": 0.526503324508667, + "epoch": 0.28384499245093103, + "kl_loss": 0.20876693725585938, + "loss_ib": 0.0028223153203725815, + "step": 987 + }, + { + "ce_ib": 6.163048267364502, + "ce_orig": 0.6525804996490479, + "epoch": 0.28384499245093103, + "kl_loss": 0.10793370008468628, + "loss_ib": 0.0016956417821347713, + "step": 987 + }, + { + "ce_ib": 6.027551651000977, + "ce_orig": 0.6111074686050415, + "epoch": 0.28384499245093103, + "kl_loss": 0.08734263479709625, + "loss_ib": 0.0014761814381927252, + "step": 987 + }, + { + "ce_ib": 6.522339344024658, + "ce_orig": 0.47297385334968567, + "epoch": 0.28384499245093103, + "kl_loss": 0.19408410787582397, + "loss_ib": 0.0025930749252438545, + "step": 987 + }, + { + "ce_ib": 5.587515830993652, + "ce_orig": 0.7599571347236633, + "epoch": 0.2841325760299087, + "kl_loss": 0.15328392386436462, + "loss_ib": 0.002091590780764818, + "step": 988 + }, + { + "ce_ib": 5.406798362731934, + "ce_orig": 0.7001034021377563, + "epoch": 0.2841325760299087, + "kl_loss": 0.12634865939617157, + "loss_ib": 0.0018041663570329547, + "step": 988 + }, + { + "ce_ib": 5.6719746589660645, + "ce_orig": 0.6059353947639465, + "epoch": 0.2841325760299087, + "kl_loss": 0.17453685402870178, + "loss_ib": 0.0023125659208744764, + "step": 988 + }, + { + "ce_ib": 6.926519870758057, + "ce_orig": 0.9303135871887207, + "epoch": 0.2841325760299087, + "kl_loss": 0.15477648377418518, + "loss_ib": 0.002240416593849659, + "step": 988 + }, + { + "ce_ib": 8.383512496948242, + "ce_orig": 1.1582266092300415, + "epoch": 0.28442015960888634, + "kl_loss": 0.16042135655879974, + "loss_ib": 0.0024425648152828217, + "step": 989 + }, + { + "ce_ib": 9.473687171936035, + "ce_orig": 1.514312982559204, + "epoch": 0.28442015960888634, + "kl_loss": 0.09347137808799744, + "loss_ib": 0.0018820824334397912, + "step": 989 + }, + { + "ce_ib": 8.357083320617676, + "ce_orig": 1.2286492586135864, + "epoch": 0.28442015960888634, + "kl_loss": 0.14219217002391815, + "loss_ib": 0.002257629996165633, + "step": 989 + }, + { + "ce_ib": 8.783169746398926, + "ce_orig": 1.3666160106658936, + "epoch": 0.28442015960888634, + "kl_loss": 0.1416359841823578, + "loss_ib": 0.0022946768440306187, + "step": 989 + }, + { + "epoch": 0.28470774318786396, + "grad_norm": 0.1091771125793457, + "learning_rate": 4.946119435657738e-05, + "loss": 0.8971, + "step": 990 + }, + { + "ce_ib": 6.181784152984619, + "ce_orig": 0.6989761590957642, + "epoch": 0.28470774318786396, + "kl_loss": 0.11948366463184357, + "loss_ib": 0.0018130149692296982, + "step": 990 + }, + { + "ce_ib": 7.10377836227417, + "ce_orig": 0.8298677802085876, + "epoch": 0.28470774318786396, + "kl_loss": 0.12324854731559753, + "loss_ib": 0.0019428632222115993, + "step": 990 + }, + { + "ce_ib": 7.611861705780029, + "ce_orig": 1.172329306602478, + "epoch": 0.28470774318786396, + "kl_loss": 0.08517073839902878, + "loss_ib": 0.0016128936549648643, + "step": 990 + }, + { + "ce_ib": 7.055534362792969, + "ce_orig": 0.9290443062782288, + "epoch": 0.28470774318786396, + "kl_loss": 0.15436816215515137, + "loss_ib": 0.0022492350544780493, + "step": 990 + }, + { + "ce_ib": 8.848563194274902, + "ce_orig": 0.7931856513023376, + "epoch": 0.28499532676684164, + "kl_loss": 0.16488288342952728, + "loss_ib": 0.0025336849503219128, + "step": 991 + }, + { + "ce_ib": 10.097640991210938, + "ce_orig": 1.1702800989151, + "epoch": 0.28499532676684164, + "kl_loss": 0.13779164850711823, + "loss_ib": 0.0023876805789768696, + "step": 991 + }, + { + "ce_ib": 7.169426918029785, + "ce_orig": 0.8789024949073792, + "epoch": 0.28499532676684164, + "kl_loss": 0.1385534256696701, + "loss_ib": 0.0021024770103394985, + "step": 991 + }, + { + "ce_ib": 7.275373458862305, + "ce_orig": 0.854453444480896, + "epoch": 0.28499532676684164, + "kl_loss": 0.11206218600273132, + "loss_ib": 0.0018481591250747442, + "step": 991 + }, + { + "ce_ib": 7.726741790771484, + "ce_orig": 1.2789119482040405, + "epoch": 0.28528291034581926, + "kl_loss": 0.1516597718000412, + "loss_ib": 0.0022892719134688377, + "step": 992 + }, + { + "ce_ib": 8.328351974487305, + "ce_orig": 1.348406195640564, + "epoch": 0.28528291034581926, + "kl_loss": 0.12467220425605774, + "loss_ib": 0.0020795571617782116, + "step": 992 + }, + { + "ce_ib": 5.893527507781982, + "ce_orig": 0.49985817074775696, + "epoch": 0.28528291034581926, + "kl_loss": 0.1101238876581192, + "loss_ib": 0.001690591569058597, + "step": 992 + }, + { + "ce_ib": 6.144350528717041, + "ce_orig": 0.6708614230155945, + "epoch": 0.28528291034581926, + "kl_loss": 0.1393936723470688, + "loss_ib": 0.002008371753618121, + "step": 992 + }, + { + "ce_ib": 6.9593353271484375, + "ce_orig": 0.9741218090057373, + "epoch": 0.2855704939247969, + "kl_loss": 0.10788355767726898, + "loss_ib": 0.0017747690435498953, + "step": 993 + }, + { + "ce_ib": 3.7139055728912354, + "ce_orig": 0.7304720282554626, + "epoch": 0.2855704939247969, + "kl_loss": 0.08104556798934937, + "loss_ib": 0.0011818462517112494, + "step": 993 + }, + { + "ce_ib": 10.939255714416504, + "ce_orig": 1.5362738370895386, + "epoch": 0.2855704939247969, + "kl_loss": 0.15437957644462585, + "loss_ib": 0.0026377211324870586, + "step": 993 + }, + { + "ce_ib": 5.173558712005615, + "ce_orig": 0.7878844141960144, + "epoch": 0.2855704939247969, + "kl_loss": 0.11559354513883591, + "loss_ib": 0.00167329132091254, + "step": 993 + }, + { + "ce_ib": 4.338344573974609, + "ce_orig": 0.6058388352394104, + "epoch": 0.2858580775037745, + "kl_loss": 0.1061791256070137, + "loss_ib": 0.0014956255909055471, + "step": 994 + }, + { + "ce_ib": 6.763393878936768, + "ce_orig": 0.9681786298751831, + "epoch": 0.2858580775037745, + "kl_loss": 0.10215041786432266, + "loss_ib": 0.0016978434287011623, + "step": 994 + }, + { + "ce_ib": 6.972935199737549, + "ce_orig": 0.7812452912330627, + "epoch": 0.2858580775037745, + "kl_loss": 0.07625571638345718, + "loss_ib": 0.001459850580431521, + "step": 994 + }, + { + "ce_ib": 8.671821594238281, + "ce_orig": 1.0143651962280273, + "epoch": 0.2858580775037745, + "kl_loss": 0.1577187478542328, + "loss_ib": 0.002444369485601783, + "step": 994 + }, + { + "epoch": 0.2861456610827522, + "grad_norm": 0.10612454265356064, + "learning_rate": 4.945315203017795e-05, + "loss": 0.8991, + "step": 995 + }, + { + "ce_ib": 8.718454360961914, + "ce_orig": 1.094069480895996, + "epoch": 0.2861456610827522, + "kl_loss": 0.1519099771976471, + "loss_ib": 0.0023909450974315405, + "step": 995 + }, + { + "ce_ib": 9.989187240600586, + "ce_orig": 0.839239776134491, + "epoch": 0.2861456610827522, + "kl_loss": 0.1391148716211319, + "loss_ib": 0.0023900673259049654, + "step": 995 + }, + { + "ce_ib": 5.35410737991333, + "ce_orig": 0.7497389316558838, + "epoch": 0.2861456610827522, + "kl_loss": 0.10185503959655762, + "loss_ib": 0.0015539609594270587, + "step": 995 + }, + { + "ce_ib": 3.817272901535034, + "ce_orig": 0.5471851825714111, + "epoch": 0.2861456610827522, + "kl_loss": 0.14413145184516907, + "loss_ib": 0.0018230417044833302, + "step": 995 + }, + { + "ce_ib": 4.422999382019043, + "ce_orig": 0.5248720645904541, + "epoch": 0.2864332446617298, + "kl_loss": 0.14078199863433838, + "loss_ib": 0.0018501197919249535, + "step": 996 + }, + { + "ce_ib": 6.990998268127441, + "ce_orig": 0.8086313605308533, + "epoch": 0.2864332446617298, + "kl_loss": 0.1101246327161789, + "loss_ib": 0.001800346071831882, + "step": 996 + }, + { + "ce_ib": 7.523761749267578, + "ce_orig": 0.8706881403923035, + "epoch": 0.2864332446617298, + "kl_loss": 0.11096677929162979, + "loss_ib": 0.0018620439805090427, + "step": 996 + }, + { + "ce_ib": 7.341949462890625, + "ce_orig": 0.8602787256240845, + "epoch": 0.2864332446617298, + "kl_loss": 0.10801561176776886, + "loss_ib": 0.001814351067878306, + "step": 996 + }, + { + "ce_ib": 5.043796062469482, + "ce_orig": 0.5980595350265503, + "epoch": 0.28672082824070744, + "kl_loss": 0.1614397168159485, + "loss_ib": 0.002118776785209775, + "step": 997 + }, + { + "ce_ib": 2.875126600265503, + "ce_orig": 0.45491382479667664, + "epoch": 0.28672082824070744, + "kl_loss": 0.10723177343606949, + "loss_ib": 0.001359830261208117, + "step": 997 + }, + { + "ce_ib": 6.1942009925842285, + "ce_orig": 0.8003867864608765, + "epoch": 0.28672082824070744, + "kl_loss": 0.08695151656866074, + "loss_ib": 0.0014889352023601532, + "step": 997 + }, + { + "ce_ib": 7.5594801902771, + "ce_orig": 1.2212069034576416, + "epoch": 0.28672082824070744, + "kl_loss": 0.09208080172538757, + "loss_ib": 0.0016767559573054314, + "step": 997 + }, + { + "ce_ib": 5.673044204711914, + "ce_orig": 0.5247984528541565, + "epoch": 0.2870084118196851, + "kl_loss": 0.15386104583740234, + "loss_ib": 0.0021059149876236916, + "step": 998 + }, + { + "ce_ib": 7.351710319519043, + "ce_orig": 1.040869116783142, + "epoch": 0.2870084118196851, + "kl_loss": 0.09939703345298767, + "loss_ib": 0.0017291413387283683, + "step": 998 + }, + { + "ce_ib": 7.567747116088867, + "ce_orig": 0.5997657179832458, + "epoch": 0.2870084118196851, + "kl_loss": 0.13425284624099731, + "loss_ib": 0.002099303063005209, + "step": 998 + }, + { + "ce_ib": 8.138786315917969, + "ce_orig": 1.1768231391906738, + "epoch": 0.2870084118196851, + "kl_loss": 0.1242784708738327, + "loss_ib": 0.002056663390249014, + "step": 998 + }, + { + "ce_ib": 4.522161483764648, + "ce_orig": 0.48833024501800537, + "epoch": 0.28729599539866274, + "kl_loss": 0.22905391454696655, + "loss_ib": 0.0027427554596215487, + "step": 999 + }, + { + "ce_ib": 5.886623859405518, + "ce_orig": 0.8945769667625427, + "epoch": 0.28729599539866274, + "kl_loss": 0.12364333122968674, + "loss_ib": 0.0018250956200063229, + "step": 999 + }, + { + "ce_ib": 8.664619445800781, + "ce_orig": 0.699190616607666, + "epoch": 0.28729599539866274, + "kl_loss": 0.18423990905284882, + "loss_ib": 0.002708860905840993, + "step": 999 + }, + { + "ce_ib": 8.612241744995117, + "ce_orig": 1.0460861921310425, + "epoch": 0.28729599539866274, + "kl_loss": 0.13352595269680023, + "loss_ib": 0.0021964835468679667, + "step": 999 + }, + { + "epoch": 0.28758357897764036, + "grad_norm": 0.0895831435918808, + "learning_rate": 4.944505079045958e-05, + "loss": 0.8976, + "step": 1000 + }, + { + "ce_ib": 8.301177024841309, + "ce_orig": 0.6439716815948486, + "epoch": 0.28758357897764036, + "kl_loss": 0.12516939640045166, + "loss_ib": 0.002081811660900712, + "step": 1000 + }, + { + "ce_ib": 7.792233467102051, + "ce_orig": 0.8878663778305054, + "epoch": 0.28758357897764036, + "kl_loss": 0.11768750846385956, + "loss_ib": 0.00195609824731946, + "step": 1000 + }, + { + "ce_ib": 6.912393569946289, + "ce_orig": 0.5264413952827454, + "epoch": 0.28758357897764036, + "kl_loss": 0.16177913546562195, + "loss_ib": 0.002309030620381236, + "step": 1000 + }, + { + "ce_ib": 6.791064739227295, + "ce_orig": 0.8416721224784851, + "epoch": 0.28758357897764036, + "kl_loss": 0.14784878492355347, + "loss_ib": 0.002157594310119748, + "step": 1000 + }, + { + "ce_ib": 7.734585762023926, + "ce_orig": 0.657818615436554, + "epoch": 0.28787116255661804, + "kl_loss": 0.13893797993659973, + "loss_ib": 0.0021628381218761206, + "step": 1001 + }, + { + "ce_ib": 7.315647602081299, + "ce_orig": 0.6256568431854248, + "epoch": 0.28787116255661804, + "kl_loss": 0.13449688255786896, + "loss_ib": 0.0020765336230397224, + "step": 1001 + }, + { + "ce_ib": 4.7383222579956055, + "ce_orig": 0.757487952709198, + "epoch": 0.28787116255661804, + "kl_loss": 0.11639707535505295, + "loss_ib": 0.0016378029249608517, + "step": 1001 + }, + { + "ce_ib": 6.276235580444336, + "ce_orig": 0.45473286509513855, + "epoch": 0.28787116255661804, + "kl_loss": 0.14961040019989014, + "loss_ib": 0.002123727463185787, + "step": 1001 + }, + { + "ce_ib": 10.06789493560791, + "ce_orig": 1.3343020677566528, + "epoch": 0.28815874613559567, + "kl_loss": 0.22612667083740234, + "loss_ib": 0.003268056083470583, + "step": 1002 + }, + { + "ce_ib": 6.92459774017334, + "ce_orig": 0.8351874351501465, + "epoch": 0.28815874613559567, + "kl_loss": 0.07932960987091064, + "loss_ib": 0.0014857558999210596, + "step": 1002 + }, + { + "ce_ib": 8.357982635498047, + "ce_orig": 0.896270751953125, + "epoch": 0.28815874613559567, + "kl_loss": 0.08860597014427185, + "loss_ib": 0.001721857930533588, + "step": 1002 + }, + { + "ce_ib": 7.726855278015137, + "ce_orig": 0.7626195549964905, + "epoch": 0.28815874613559567, + "kl_loss": 0.17634786665439606, + "loss_ib": 0.002536164131015539, + "step": 1002 + }, + { + "ce_ib": 3.290245532989502, + "ce_orig": 0.5970833897590637, + "epoch": 0.2884463297145733, + "kl_loss": 0.07192050665616989, + "loss_ib": 0.0010482296347618103, + "step": 1003 + }, + { + "ce_ib": 11.031519889831543, + "ce_orig": 1.4460707902908325, + "epoch": 0.2884463297145733, + "kl_loss": 0.1282051056623459, + "loss_ib": 0.002385202795267105, + "step": 1003 + }, + { + "ce_ib": 8.045802116394043, + "ce_orig": 1.0216474533081055, + "epoch": 0.2884463297145733, + "kl_loss": 0.1990397721529007, + "loss_ib": 0.0027949779760092497, + "step": 1003 + }, + { + "ce_ib": 4.998257160186768, + "ce_orig": 0.4931280016899109, + "epoch": 0.2884463297145733, + "kl_loss": 0.1314055621623993, + "loss_ib": 0.0018138813320547342, + "step": 1003 + }, + { + "ce_ib": 8.403632164001465, + "ce_orig": 0.9838512539863586, + "epoch": 0.2887339132935509, + "kl_loss": 0.14859150350093842, + "loss_ib": 0.0023262782488018274, + "step": 1004 + }, + { + "ce_ib": 4.052238941192627, + "ce_orig": 0.42797771096229553, + "epoch": 0.2887339132935509, + "kl_loss": 0.10543163865804672, + "loss_ib": 0.0014595402171835303, + "step": 1004 + }, + { + "ce_ib": 7.557340621948242, + "ce_orig": 0.8052495121955872, + "epoch": 0.2887339132935509, + "kl_loss": 0.1491703987121582, + "loss_ib": 0.0022474380675703287, + "step": 1004 + }, + { + "ce_ib": 3.635923147201538, + "ce_orig": 0.3550164997577667, + "epoch": 0.2887339132935509, + "kl_loss": 0.14298149943351746, + "loss_ib": 0.0017934072529897094, + "step": 1004 + }, + { + "epoch": 0.2890214968725286, + "grad_norm": 0.08703169226646423, + "learning_rate": 4.9436890656940045e-05, + "loss": 0.8759, + "step": 1005 + }, + { + "ce_ib": 6.973578453063965, + "ce_orig": 0.7662994265556335, + "epoch": 0.2890214968725286, + "kl_loss": 0.13552214205265045, + "loss_ib": 0.0020525790750980377, + "step": 1005 + }, + { + "ce_ib": 10.7005615234375, + "ce_orig": 1.4963085651397705, + "epoch": 0.2890214968725286, + "kl_loss": 0.17894543707370758, + "loss_ib": 0.0028595104813575745, + "step": 1005 + }, + { + "ce_ib": 7.626862049102783, + "ce_orig": 0.6037044525146484, + "epoch": 0.2890214968725286, + "kl_loss": 0.10745424032211304, + "loss_ib": 0.0018372285412624478, + "step": 1005 + }, + { + "ce_ib": 4.8975138664245605, + "ce_orig": 0.36308395862579346, + "epoch": 0.2890214968725286, + "kl_loss": 0.16487862169742584, + "loss_ib": 0.002138537587597966, + "step": 1005 + }, + { + "ce_ib": 7.104249477386475, + "ce_orig": 0.9271300435066223, + "epoch": 0.2893090804515062, + "kl_loss": 0.11113549023866653, + "loss_ib": 0.001821779878810048, + "step": 1006 + }, + { + "ce_ib": 6.194459915161133, + "ce_orig": 0.5316910743713379, + "epoch": 0.2893090804515062, + "kl_loss": 0.25875431299209595, + "loss_ib": 0.0032069890294224024, + "step": 1006 + }, + { + "ce_ib": 9.120644569396973, + "ce_orig": 0.7684857249259949, + "epoch": 0.2893090804515062, + "kl_loss": 0.05433555692434311, + "loss_ib": 0.0014554199296981096, + "step": 1006 + }, + { + "ce_ib": 7.741818904876709, + "ce_orig": 0.7994129657745361, + "epoch": 0.2893090804515062, + "kl_loss": 0.16110675036907196, + "loss_ib": 0.002385249361395836, + "step": 1006 + }, + { + "ce_ib": 8.448857307434082, + "ce_orig": 1.0468580722808838, + "epoch": 0.28959666403048384, + "kl_loss": 0.13067549467086792, + "loss_ib": 0.002151640597730875, + "step": 1007 + }, + { + "ce_ib": 4.776382923126221, + "ce_orig": 0.857218325138092, + "epoch": 0.28959666403048384, + "kl_loss": 0.10937680304050446, + "loss_ib": 0.0015714062610641122, + "step": 1007 + }, + { + "ce_ib": 4.323080539703369, + "ce_orig": 0.7916050553321838, + "epoch": 0.28959666403048384, + "kl_loss": 0.09697412699460983, + "loss_ib": 0.0014020493254065514, + "step": 1007 + }, + { + "ce_ib": 8.446868896484375, + "ce_orig": 1.1747890710830688, + "epoch": 0.28959666403048384, + "kl_loss": 0.13406559824943542, + "loss_ib": 0.0021853428333997726, + "step": 1007 + }, + { + "ce_ib": 7.49606990814209, + "ce_orig": 0.980958878993988, + "epoch": 0.2898842476094615, + "kl_loss": 0.12899692356586456, + "loss_ib": 0.0020395761821419, + "step": 1008 + }, + { + "ce_ib": 6.99376106262207, + "ce_orig": 0.8433418869972229, + "epoch": 0.2898842476094615, + "kl_loss": 0.12246361374855042, + "loss_ib": 0.0019240122055634856, + "step": 1008 + }, + { + "ce_ib": 9.26354694366455, + "ce_orig": 0.5582899451255798, + "epoch": 0.2898842476094615, + "kl_loss": 0.1489538997411728, + "loss_ib": 0.0024158935993909836, + "step": 1008 + }, + { + "ce_ib": 9.026296615600586, + "ce_orig": 1.4614930152893066, + "epoch": 0.2898842476094615, + "kl_loss": 0.09286147356033325, + "loss_ib": 0.001831244328059256, + "step": 1008 + }, + { + "ce_ib": 7.721127033233643, + "ce_orig": 0.7993571162223816, + "epoch": 0.29017183118843914, + "kl_loss": 0.13815072178840637, + "loss_ib": 0.0021536198910325766, + "step": 1009 + }, + { + "ce_ib": 4.602123260498047, + "ce_orig": 0.7283000946044922, + "epoch": 0.29017183118843914, + "kl_loss": 0.09357312321662903, + "loss_ib": 0.0013959434581920505, + "step": 1009 + }, + { + "ce_ib": 7.143542766571045, + "ce_orig": 1.1499704122543335, + "epoch": 0.29017183118843914, + "kl_loss": 0.13207654654979706, + "loss_ib": 0.002035119803622365, + "step": 1009 + }, + { + "ce_ib": 6.815535068511963, + "ce_orig": 1.1950119733810425, + "epoch": 0.29017183118843914, + "kl_loss": 0.10215964913368225, + "loss_ib": 0.0017031499883159995, + "step": 1009 + }, + { + "epoch": 0.29045941476741677, + "grad_norm": 0.08291789889335632, + "learning_rate": 4.942867164927899e-05, + "loss": 0.8737, + "step": 1010 + }, + { + "ce_ib": 7.340415000915527, + "ce_orig": 0.9612502455711365, + "epoch": 0.29045941476741677, + "kl_loss": 0.11465869843959808, + "loss_ib": 0.0018806284060701728, + "step": 1010 + }, + { + "ce_ib": 6.426059722900391, + "ce_orig": 0.9481909275054932, + "epoch": 0.29045941476741677, + "kl_loss": 0.11971482634544373, + "loss_ib": 0.001839754288084805, + "step": 1010 + }, + { + "ce_ib": 6.488442420959473, + "ce_orig": 0.889805793762207, + "epoch": 0.29045941476741677, + "kl_loss": 0.11869333684444427, + "loss_ib": 0.001835777540691197, + "step": 1010 + }, + { + "ce_ib": 2.905393362045288, + "ce_orig": 0.5455615520477295, + "epoch": 0.29045941476741677, + "kl_loss": 0.0905960351228714, + "loss_ib": 0.0011964996811002493, + "step": 1010 + }, + { + "ce_ib": 8.19021224975586, + "ce_orig": 1.2127585411071777, + "epoch": 0.29074699834639445, + "kl_loss": 0.25930172204971313, + "loss_ib": 0.0034120383206754923, + "step": 1011 + }, + { + "ce_ib": 5.116666793823242, + "ce_orig": 0.70909184217453, + "epoch": 0.29074699834639445, + "kl_loss": 0.11982348561286926, + "loss_ib": 0.0017099014949053526, + "step": 1011 + }, + { + "ce_ib": 6.482151031494141, + "ce_orig": 0.6337217688560486, + "epoch": 0.29074699834639445, + "kl_loss": 0.13345953822135925, + "loss_ib": 0.001982810441404581, + "step": 1011 + }, + { + "ce_ib": 7.245599746704102, + "ce_orig": 1.1574453115463257, + "epoch": 0.29074699834639445, + "kl_loss": 0.1491246074438095, + "loss_ib": 0.0022158059291541576, + "step": 1011 + }, + { + "ce_ib": 10.480112075805664, + "ce_orig": 1.089797854423523, + "epoch": 0.29103458192537207, + "kl_loss": 0.11675558984279633, + "loss_ib": 0.0022155670449137688, + "step": 1012 + }, + { + "ce_ib": 6.354058742523193, + "ce_orig": 0.7734960317611694, + "epoch": 0.29103458192537207, + "kl_loss": 0.1279613971710205, + "loss_ib": 0.001915019704028964, + "step": 1012 + }, + { + "ce_ib": 5.588810920715332, + "ce_orig": 0.7622073888778687, + "epoch": 0.29103458192537207, + "kl_loss": 0.10941126942634583, + "loss_ib": 0.0016529938438907266, + "step": 1012 + }, + { + "ce_ib": 7.674766540527344, + "ce_orig": 1.4139724969863892, + "epoch": 0.29103458192537207, + "kl_loss": 0.15296992659568787, + "loss_ib": 0.0022971758153289557, + "step": 1012 + }, + { + "ce_ib": 6.944582462310791, + "ce_orig": 0.6798045039176941, + "epoch": 0.2913221655043497, + "kl_loss": 0.13148388266563416, + "loss_ib": 0.0020092970225960016, + "step": 1013 + }, + { + "ce_ib": 9.336368560791016, + "ce_orig": 0.8694828748703003, + "epoch": 0.2913221655043497, + "kl_loss": 0.11502645909786224, + "loss_ib": 0.0020839013159275055, + "step": 1013 + }, + { + "ce_ib": 6.466903209686279, + "ce_orig": 1.046998143196106, + "epoch": 0.2913221655043497, + "kl_loss": 0.09708376228809357, + "loss_ib": 0.0016175279160961509, + "step": 1013 + }, + { + "ce_ib": 8.016132354736328, + "ce_orig": 1.270522952079773, + "epoch": 0.2913221655043497, + "kl_loss": 0.13104431331157684, + "loss_ib": 0.0021120563615113497, + "step": 1013 + }, + { + "ce_ib": 7.8399553298950195, + "ce_orig": 1.0986905097961426, + "epoch": 0.2916097490833273, + "kl_loss": 0.15167173743247986, + "loss_ib": 0.0023007127456367016, + "step": 1014 + }, + { + "ce_ib": 5.281484603881836, + "ce_orig": 0.8865677714347839, + "epoch": 0.2916097490833273, + "kl_loss": 0.09800760447978973, + "loss_ib": 0.0015082244062796235, + "step": 1014 + }, + { + "ce_ib": 5.640346527099609, + "ce_orig": 0.8331298232078552, + "epoch": 0.2916097490833273, + "kl_loss": 0.10224798321723938, + "loss_ib": 0.0015865144087001681, + "step": 1014 + }, + { + "ce_ib": 10.421279907226562, + "ce_orig": 1.338667631149292, + "epoch": 0.2916097490833273, + "kl_loss": 0.13115710020065308, + "loss_ib": 0.0023536989465355873, + "step": 1014 + }, + { + "epoch": 0.291897332662305, + "grad_norm": 0.0798521637916565, + "learning_rate": 4.9420393787277917e-05, + "loss": 0.8894, + "step": 1015 + }, + { + "ce_ib": 8.863430976867676, + "ce_orig": 1.5828720331192017, + "epoch": 0.291897332662305, + "kl_loss": 0.12114414572715759, + "loss_ib": 0.0020977845415472984, + "step": 1015 + }, + { + "ce_ib": 6.6428985595703125, + "ce_orig": 0.6384519338607788, + "epoch": 0.291897332662305, + "kl_loss": 0.14254824817180634, + "loss_ib": 0.002089772140607238, + "step": 1015 + }, + { + "ce_ib": 2.3048622608184814, + "ce_orig": 0.16694949567317963, + "epoch": 0.291897332662305, + "kl_loss": 0.17876015603542328, + "loss_ib": 0.002018087776377797, + "step": 1015 + }, + { + "ce_ib": 5.116494655609131, + "ce_orig": 0.802765965461731, + "epoch": 0.291897332662305, + "kl_loss": 0.11342580616474152, + "loss_ib": 0.0016459074104204774, + "step": 1015 + }, + { + "ce_ib": 7.742083549499512, + "ce_orig": 0.7786831855773926, + "epoch": 0.2921849162412826, + "kl_loss": 0.168631911277771, + "loss_ib": 0.002460527466610074, + "step": 1016 + }, + { + "ce_ib": 4.972054958343506, + "ce_orig": 0.7613850831985474, + "epoch": 0.2921849162412826, + "kl_loss": 0.10056689381599426, + "loss_ib": 0.001502874423749745, + "step": 1016 + }, + { + "ce_ib": 7.624302387237549, + "ce_orig": 1.0380606651306152, + "epoch": 0.2921849162412826, + "kl_loss": 0.12155883759260178, + "loss_ib": 0.0019780185539275408, + "step": 1016 + }, + { + "ce_ib": 7.306596755981445, + "ce_orig": 0.5652252435684204, + "epoch": 0.2921849162412826, + "kl_loss": 0.14521154761314392, + "loss_ib": 0.0021827751770615578, + "step": 1016 + }, + { + "ce_ib": 6.975562572479248, + "ce_orig": 0.8997268080711365, + "epoch": 0.29247249982026025, + "kl_loss": 0.11250243335962296, + "loss_ib": 0.001822580466978252, + "step": 1017 + }, + { + "ce_ib": 6.613406181335449, + "ce_orig": 0.8821706175804138, + "epoch": 0.29247249982026025, + "kl_loss": 0.0819438248872757, + "loss_ib": 0.0014807786792516708, + "step": 1017 + }, + { + "ce_ib": 4.1242146492004395, + "ce_orig": 0.7115670442581177, + "epoch": 0.29247249982026025, + "kl_loss": 0.42394354939460754, + "loss_ib": 0.004651857074350119, + "step": 1017 + }, + { + "ce_ib": 4.448148727416992, + "ce_orig": 0.862206757068634, + "epoch": 0.29247249982026025, + "kl_loss": 0.08045890927314758, + "loss_ib": 0.0012494039256125689, + "step": 1017 + }, + { + "ce_ib": 8.916611671447754, + "ce_orig": 0.8049607276916504, + "epoch": 0.2927600833992379, + "kl_loss": 0.1913137435913086, + "loss_ib": 0.0028047983068972826, + "step": 1018 + }, + { + "ce_ib": 6.2450175285339355, + "ce_orig": 0.7353116869926453, + "epoch": 0.2927600833992379, + "kl_loss": 0.09168928116559982, + "loss_ib": 0.0015413945075124502, + "step": 1018 + }, + { + "ce_ib": 6.868838310241699, + "ce_orig": 0.8467898964881897, + "epoch": 0.2927600833992379, + "kl_loss": 0.1104859858751297, + "loss_ib": 0.0017917435616254807, + "step": 1018 + }, + { + "ce_ib": 3.588919162750244, + "ce_orig": 0.4546128213405609, + "epoch": 0.2927600833992379, + "kl_loss": 0.08714288473129272, + "loss_ib": 0.0012303207768127322, + "step": 1018 + }, + { + "ce_ib": 2.2777414321899414, + "ce_orig": 0.2392132729291916, + "epoch": 0.29304766697821555, + "kl_loss": 0.3012656271457672, + "loss_ib": 0.003240430261939764, + "step": 1019 + }, + { + "ce_ib": 8.048588752746582, + "ce_orig": 0.9696701765060425, + "epoch": 0.29304766697821555, + "kl_loss": 0.16815456748008728, + "loss_ib": 0.0024864044971764088, + "step": 1019 + }, + { + "ce_ib": 9.795534133911133, + "ce_orig": 1.3076472282409668, + "epoch": 0.29304766697821555, + "kl_loss": 0.11489086598157883, + "loss_ib": 0.0021284620743244886, + "step": 1019 + }, + { + "ce_ib": 8.209424018859863, + "ce_orig": 0.8340076208114624, + "epoch": 0.29304766697821555, + "kl_loss": 0.12861773371696472, + "loss_ib": 0.0021071196533739567, + "step": 1019 + }, + { + "epoch": 0.2933352505571932, + "grad_norm": 0.08202947676181793, + "learning_rate": 4.941205709088011e-05, + "loss": 0.8508, + "step": 1020 + }, + { + "ce_ib": 3.581411361694336, + "ce_orig": 0.7349913716316223, + "epoch": 0.2933352505571932, + "kl_loss": 0.07525119930505753, + "loss_ib": 0.0011106531601399183, + "step": 1020 + }, + { + "ce_ib": 9.23768138885498, + "ce_orig": 1.4128665924072266, + "epoch": 0.2933352505571932, + "kl_loss": 0.06985117495059967, + "loss_ib": 0.0016222798731178045, + "step": 1020 + }, + { + "ce_ib": 5.13540506362915, + "ce_orig": 0.4822990894317627, + "epoch": 0.2933352505571932, + "kl_loss": 0.17090243101119995, + "loss_ib": 0.002222564769908786, + "step": 1020 + }, + { + "ce_ib": 7.272921085357666, + "ce_orig": 0.6506208777427673, + "epoch": 0.2933352505571932, + "kl_loss": 0.1548406183719635, + "loss_ib": 0.002275698119774461, + "step": 1020 + }, + { + "ce_ib": 9.71750259399414, + "ce_orig": 1.235391616821289, + "epoch": 0.29362283413617085, + "kl_loss": 0.09263736009597778, + "loss_ib": 0.0018981238827109337, + "step": 1021 + }, + { + "ce_ib": 7.791978359222412, + "ce_orig": 0.8824278116226196, + "epoch": 0.29362283413617085, + "kl_loss": 0.1360311508178711, + "loss_ib": 0.002139509189873934, + "step": 1021 + }, + { + "ce_ib": 6.253500938415527, + "ce_orig": 0.5768558382987976, + "epoch": 0.29362283413617085, + "kl_loss": 0.15343819558620453, + "loss_ib": 0.0021597319282591343, + "step": 1021 + }, + { + "ce_ib": 4.855053901672363, + "ce_orig": 0.3929537832736969, + "epoch": 0.29362283413617085, + "kl_loss": 0.07283283770084381, + "loss_ib": 0.0012138336896896362, + "step": 1021 + }, + { + "ce_ib": 6.272524833679199, + "ce_orig": 0.6167612671852112, + "epoch": 0.2939104177151485, + "kl_loss": 0.14953365921974182, + "loss_ib": 0.0021225889213383198, + "step": 1022 + }, + { + "ce_ib": 8.505683898925781, + "ce_orig": 1.1090301275253296, + "epoch": 0.2939104177151485, + "kl_loss": 0.1480690836906433, + "loss_ib": 0.0023312591947615147, + "step": 1022 + }, + { + "ce_ib": 7.212671279907227, + "ce_orig": 0.8089624047279358, + "epoch": 0.2939104177151485, + "kl_loss": 0.13812242448329926, + "loss_ib": 0.0021024912130087614, + "step": 1022 + }, + { + "ce_ib": 6.410940647125244, + "ce_orig": 0.7297493815422058, + "epoch": 0.2939104177151485, + "kl_loss": 0.15360087156295776, + "loss_ib": 0.0021771027240902185, + "step": 1022 + }, + { + "ce_ib": 6.700841426849365, + "ce_orig": 1.0308021306991577, + "epoch": 0.2941980012941261, + "kl_loss": 0.13294780254364014, + "loss_ib": 0.0019995621405541897, + "step": 1023 + }, + { + "ce_ib": 6.241668701171875, + "ce_orig": 0.6736411452293396, + "epoch": 0.2941980012941261, + "kl_loss": 0.11182098090648651, + "loss_ib": 0.0017423765966668725, + "step": 1023 + }, + { + "ce_ib": 3.8634262084960938, + "ce_orig": 0.48370254039764404, + "epoch": 0.2941980012941261, + "kl_loss": 0.13036774098873138, + "loss_ib": 0.0016900199698284268, + "step": 1023 + }, + { + "ce_ib": 8.329598426818848, + "ce_orig": 0.9687957167625427, + "epoch": 0.2941980012941261, + "kl_loss": 0.13080094754695892, + "loss_ib": 0.002140969270840287, + "step": 1023 + }, + { + "ce_ib": 7.702944755554199, + "ce_orig": 0.9619100689888, + "epoch": 0.2944855848731037, + "kl_loss": 0.09282632917165756, + "loss_ib": 0.0016985577531158924, + "step": 1024 + }, + { + "ce_ib": 7.137479305267334, + "ce_orig": 0.7066961526870728, + "epoch": 0.2944855848731037, + "kl_loss": 0.12376445531845093, + "loss_ib": 0.0019513923907652497, + "step": 1024 + }, + { + "ce_ib": 12.413986206054688, + "ce_orig": 1.8791048526763916, + "epoch": 0.2944855848731037, + "kl_loss": 0.11838547140359879, + "loss_ib": 0.002425253391265869, + "step": 1024 + }, + { + "ce_ib": 9.755016326904297, + "ce_orig": 1.334018349647522, + "epoch": 0.2944855848731037, + "kl_loss": 0.11656536161899567, + "loss_ib": 0.0021411553025245667, + "step": 1024 + }, + { + "epoch": 0.2947731684520814, + "grad_norm": 0.10105545818805695, + "learning_rate": 4.9403661580170626e-05, + "loss": 0.8413, + "step": 1025 + }, + { + "ce_ib": 5.8657989501953125, + "ce_orig": 0.4482717514038086, + "epoch": 0.2947731684520814, + "kl_loss": 0.16809025406837463, + "loss_ib": 0.0022674824576824903, + "step": 1025 + }, + { + "ce_ib": 4.58872127532959, + "ce_orig": 0.5492852926254272, + "epoch": 0.2947731684520814, + "kl_loss": 0.06326945126056671, + "loss_ib": 0.0010915666352957487, + "step": 1025 + }, + { + "ce_ib": 5.113560676574707, + "ce_orig": 0.8479498028755188, + "epoch": 0.2947731684520814, + "kl_loss": 0.10429667681455612, + "loss_ib": 0.0015543227782472968, + "step": 1025 + }, + { + "ce_ib": 8.390458106994629, + "ce_orig": 1.3818213939666748, + "epoch": 0.2947731684520814, + "kl_loss": 0.17246775329113007, + "loss_ib": 0.002563723362982273, + "step": 1025 + }, + { + "ce_ib": 6.4424309730529785, + "ce_orig": 0.8801824450492859, + "epoch": 0.295060752031059, + "kl_loss": 0.14136923849582672, + "loss_ib": 0.002057935344055295, + "step": 1026 + }, + { + "ce_ib": 6.539217472076416, + "ce_orig": 0.47349444031715393, + "epoch": 0.295060752031059, + "kl_loss": 0.09952758997678757, + "loss_ib": 0.001649197656661272, + "step": 1026 + }, + { + "ce_ib": 4.090629577636719, + "ce_orig": 0.6171263456344604, + "epoch": 0.295060752031059, + "kl_loss": 0.08213280886411667, + "loss_ib": 0.0012303909752517939, + "step": 1026 + }, + { + "ce_ib": 7.95316219329834, + "ce_orig": 1.1671141386032104, + "epoch": 0.295060752031059, + "kl_loss": 0.10801438242197037, + "loss_ib": 0.0018754599150270224, + "step": 1026 + }, + { + "ce_ib": 9.029088973999023, + "ce_orig": 0.8997545838356018, + "epoch": 0.29534833561003665, + "kl_loss": 0.09331157803535461, + "loss_ib": 0.0018360245740041137, + "step": 1027 + }, + { + "ce_ib": 6.017154693603516, + "ce_orig": 0.7910234928131104, + "epoch": 0.29534833561003665, + "kl_loss": 0.06913614273071289, + "loss_ib": 0.0012930769007652998, + "step": 1027 + }, + { + "ce_ib": 6.308470249176025, + "ce_orig": 0.4661364257335663, + "epoch": 0.29534833561003665, + "kl_loss": 0.11379620432853699, + "loss_ib": 0.0017688089283183217, + "step": 1027 + }, + { + "ce_ib": 5.081976413726807, + "ce_orig": 0.8399765491485596, + "epoch": 0.29534833561003665, + "kl_loss": 0.0796096995472908, + "loss_ib": 0.0013042945647612214, + "step": 1027 + }, + { + "ce_ib": 8.540277481079102, + "ce_orig": 1.1448512077331543, + "epoch": 0.29563591918901433, + "kl_loss": 0.2748773694038391, + "loss_ib": 0.003602801589295268, + "step": 1028 + }, + { + "ce_ib": 4.430081367492676, + "ce_orig": 0.38741615414619446, + "epoch": 0.29563591918901433, + "kl_loss": 0.11507681012153625, + "loss_ib": 0.0015937761636450887, + "step": 1028 + }, + { + "ce_ib": 9.555229187011719, + "ce_orig": 0.7497819066047668, + "epoch": 0.29563591918901433, + "kl_loss": 0.1984127163887024, + "loss_ib": 0.0029396500904113054, + "step": 1028 + }, + { + "ce_ib": 7.1798930168151855, + "ce_orig": 0.4140661656856537, + "epoch": 0.29563591918901433, + "kl_loss": 0.11173292994499207, + "loss_ib": 0.0018353185150772333, + "step": 1028 + }, + { + "ce_ib": 5.1883063316345215, + "ce_orig": 0.3764129877090454, + "epoch": 0.29592350276799195, + "kl_loss": 0.11982943117618561, + "loss_ib": 0.0017171248327940702, + "step": 1029 + }, + { + "ce_ib": 5.456558704376221, + "ce_orig": 0.44917434453964233, + "epoch": 0.29592350276799195, + "kl_loss": 0.06864330172538757, + "loss_ib": 0.0012320888927206397, + "step": 1029 + }, + { + "ce_ib": 5.454277992248535, + "ce_orig": 0.7155625224113464, + "epoch": 0.29592350276799195, + "kl_loss": 0.10746078193187714, + "loss_ib": 0.0016200356185436249, + "step": 1029 + }, + { + "ce_ib": 6.581263065338135, + "ce_orig": 1.0528942346572876, + "epoch": 0.29592350276799195, + "kl_loss": 0.11141886562108994, + "loss_ib": 0.0017723148921504617, + "step": 1029 + }, + { + "epoch": 0.2962110863469696, + "grad_norm": 0.08278331160545349, + "learning_rate": 4.9395207275376175e-05, + "loss": 0.8518, + "step": 1030 + }, + { + "ce_ib": 5.568306922912598, + "ce_orig": 0.6978300213813782, + "epoch": 0.2962110863469696, + "kl_loss": 0.1296089142560959, + "loss_ib": 0.001852919696830213, + "step": 1030 + }, + { + "ce_ib": 8.422260284423828, + "ce_orig": 0.5663745403289795, + "epoch": 0.2962110863469696, + "kl_loss": 0.13868647813796997, + "loss_ib": 0.00222909078001976, + "step": 1030 + }, + { + "ce_ib": 4.727840423583984, + "ce_orig": 0.5925155878067017, + "epoch": 0.2962110863469696, + "kl_loss": 0.10484916716814041, + "loss_ib": 0.0015212756115943193, + "step": 1030 + }, + { + "ce_ib": 11.636137008666992, + "ce_orig": 1.7095333337783813, + "epoch": 0.2962110863469696, + "kl_loss": 0.14001572132110596, + "loss_ib": 0.0025637708604335785, + "step": 1030 + }, + { + "ce_ib": 6.531741142272949, + "ce_orig": 0.6975887417793274, + "epoch": 0.29649866992594726, + "kl_loss": 0.14314265549182892, + "loss_ib": 0.002084600506350398, + "step": 1031 + }, + { + "ce_ib": 6.6402268409729, + "ce_orig": 0.5389083027839661, + "epoch": 0.29649866992594726, + "kl_loss": 0.1222705990076065, + "loss_ib": 0.001886728685349226, + "step": 1031 + }, + { + "ce_ib": 5.543909072875977, + "ce_orig": 0.39250531792640686, + "epoch": 0.29649866992594726, + "kl_loss": 0.10852260887622833, + "loss_ib": 0.001639616908505559, + "step": 1031 + }, + { + "ce_ib": 3.4556961059570312, + "ce_orig": 0.577335000038147, + "epoch": 0.29649866992594726, + "kl_loss": 0.07768696546554565, + "loss_ib": 0.001122439163736999, + "step": 1031 + }, + { + "ce_ib": 5.805666446685791, + "ce_orig": 0.9784355163574219, + "epoch": 0.2967862535049249, + "kl_loss": 0.08703579753637314, + "loss_ib": 0.0014509245520457625, + "step": 1032 + }, + { + "ce_ib": 5.100844383239746, + "ce_orig": 0.6642316579818726, + "epoch": 0.2967862535049249, + "kl_loss": 0.05223621428012848, + "loss_ib": 0.0010324466275051236, + "step": 1032 + }, + { + "ce_ib": 7.460930824279785, + "ce_orig": 1.1851062774658203, + "epoch": 0.2967862535049249, + "kl_loss": 0.08702461421489716, + "loss_ib": 0.001616339199244976, + "step": 1032 + }, + { + "ce_ib": 6.544296741485596, + "ce_orig": 0.8161260485649109, + "epoch": 0.2967862535049249, + "kl_loss": 0.06634721159934998, + "loss_ib": 0.0013179017696529627, + "step": 1032 + }, + { + "ce_ib": 5.057808876037598, + "ce_orig": 0.7515257596969604, + "epoch": 0.2970738370839025, + "kl_loss": 0.07609856128692627, + "loss_ib": 0.0012667664559558034, + "step": 1033 + }, + { + "ce_ib": 5.757803916931152, + "ce_orig": 0.6364601850509644, + "epoch": 0.2970738370839025, + "kl_loss": 0.2905520796775818, + "loss_ib": 0.003481301013380289, + "step": 1033 + }, + { + "ce_ib": 8.416952133178711, + "ce_orig": 1.2678064107894897, + "epoch": 0.2970738370839025, + "kl_loss": 0.10128816962242126, + "loss_ib": 0.0018545768689364195, + "step": 1033 + }, + { + "ce_ib": 8.29395866394043, + "ce_orig": 0.6720482707023621, + "epoch": 0.2970738370839025, + "kl_loss": 0.18802112340927124, + "loss_ib": 0.0027096071280539036, + "step": 1033 + }, + { + "ce_ib": 11.922521591186523, + "ce_orig": 2.0747408866882324, + "epoch": 0.2973614206628801, + "kl_loss": 0.22134807705879211, + "loss_ib": 0.003405732801184058, + "step": 1034 + }, + { + "ce_ib": 6.077800750732422, + "ce_orig": 0.9439600706100464, + "epoch": 0.2973614206628801, + "kl_loss": 0.1280437707901001, + "loss_ib": 0.0018882177537307143, + "step": 1034 + }, + { + "ce_ib": 10.979880332946777, + "ce_orig": 1.6073428392410278, + "epoch": 0.2973614206628801, + "kl_loss": 0.13061973452568054, + "loss_ib": 0.0024041852448135614, + "step": 1034 + }, + { + "ce_ib": 10.396629333496094, + "ce_orig": 1.6829266548156738, + "epoch": 0.2973614206628801, + "kl_loss": 0.15234152972698212, + "loss_ib": 0.0025630779564380646, + "step": 1034 + }, + { + "epoch": 0.2976490042418578, + "grad_norm": 0.09937281906604767, + "learning_rate": 4.938669419686516e-05, + "loss": 0.8606, + "step": 1035 + }, + { + "ce_ib": 7.982855319976807, + "ce_orig": 0.5196459293365479, + "epoch": 0.2976490042418578, + "kl_loss": 0.14941297471523285, + "loss_ib": 0.002292415127158165, + "step": 1035 + }, + { + "ce_ib": 5.798346519470215, + "ce_orig": 0.758726179599762, + "epoch": 0.2976490042418578, + "kl_loss": 0.11238957196474075, + "loss_ib": 0.0017037303186953068, + "step": 1035 + }, + { + "ce_ib": 6.345818042755127, + "ce_orig": 0.6751218438148499, + "epoch": 0.2976490042418578, + "kl_loss": 0.08302076905965805, + "loss_ib": 0.0014647895004600286, + "step": 1035 + }, + { + "ce_ib": 6.886514663696289, + "ce_orig": 0.911274790763855, + "epoch": 0.2976490042418578, + "kl_loss": 0.12200108170509338, + "loss_ib": 0.0019086622633039951, + "step": 1035 + }, + { + "ce_ib": 6.984527587890625, + "ce_orig": 0.8811057209968567, + "epoch": 0.29793658782083543, + "kl_loss": 0.15952670574188232, + "loss_ib": 0.0022937196772545576, + "step": 1036 + }, + { + "ce_ib": 7.364218235015869, + "ce_orig": 0.7897801399230957, + "epoch": 0.29793658782083543, + "kl_loss": 0.11431416869163513, + "loss_ib": 0.0018795634387061, + "step": 1036 + }, + { + "ce_ib": 5.554378509521484, + "ce_orig": 0.8424960970878601, + "epoch": 0.29793658782083543, + "kl_loss": 0.08800135552883148, + "loss_ib": 0.0014354513259604573, + "step": 1036 + }, + { + "ce_ib": 4.187707901000977, + "ce_orig": 0.5914511680603027, + "epoch": 0.29793658782083543, + "kl_loss": 0.08407006412744522, + "loss_ib": 0.0012594714062288404, + "step": 1036 + }, + { + "ce_ib": 7.833523750305176, + "ce_orig": 1.050671935081482, + "epoch": 0.29822417139981305, + "kl_loss": 0.12092088162899017, + "loss_ib": 0.0019925611559301615, + "step": 1037 + }, + { + "ce_ib": 7.959522724151611, + "ce_orig": 0.8137964010238647, + "epoch": 0.29822417139981305, + "kl_loss": 0.2204059660434723, + "loss_ib": 0.0030000119004398584, + "step": 1037 + }, + { + "ce_ib": 7.40364933013916, + "ce_orig": 0.7971987128257751, + "epoch": 0.29822417139981305, + "kl_loss": 0.1723841428756714, + "loss_ib": 0.002464206423610449, + "step": 1037 + }, + { + "ce_ib": 9.738763809204102, + "ce_orig": 1.3775016069412231, + "epoch": 0.29822417139981305, + "kl_loss": 0.20213446021080017, + "loss_ib": 0.0029952209442853928, + "step": 1037 + }, + { + "ce_ib": 11.012545585632324, + "ce_orig": 0.8159978985786438, + "epoch": 0.29851175497879073, + "kl_loss": 0.1513182371854782, + "loss_ib": 0.0026144366711378098, + "step": 1038 + }, + { + "ce_ib": 5.5462565422058105, + "ce_orig": 0.489400178194046, + "epoch": 0.29851175497879073, + "kl_loss": 0.11296382546424866, + "loss_ib": 0.0016842639306560159, + "step": 1038 + }, + { + "ce_ib": 8.526248931884766, + "ce_orig": 1.3904296159744263, + "epoch": 0.29851175497879073, + "kl_loss": 0.08981022238731384, + "loss_ib": 0.0017507269512861967, + "step": 1038 + }, + { + "ce_ib": 6.063345432281494, + "ce_orig": 0.8342424631118774, + "epoch": 0.29851175497879073, + "kl_loss": 0.10905717313289642, + "loss_ib": 0.0016969061689451337, + "step": 1038 + }, + { + "ce_ib": 6.95911979675293, + "ce_orig": 1.0990016460418701, + "epoch": 0.29879933855776836, + "kl_loss": 0.10464999079704285, + "loss_ib": 0.001742411870509386, + "step": 1039 + }, + { + "ce_ib": 6.887325286865234, + "ce_orig": 0.7486907243728638, + "epoch": 0.29879933855776836, + "kl_loss": 0.1422598958015442, + "loss_ib": 0.002111331559717655, + "step": 1039 + }, + { + "ce_ib": 4.417207717895508, + "ce_orig": 0.8200974464416504, + "epoch": 0.29879933855776836, + "kl_loss": 0.08904124051332474, + "loss_ib": 0.0013321330770850182, + "step": 1039 + }, + { + "ce_ib": 8.487707138061523, + "ce_orig": 1.3702133893966675, + "epoch": 0.29879933855776836, + "kl_loss": 0.13536900281906128, + "loss_ib": 0.002202460775151849, + "step": 1039 + }, + { + "epoch": 0.299086922136746, + "grad_norm": 0.10019273310899734, + "learning_rate": 4.9378122365147536e-05, + "loss": 0.8481, + "step": 1040 + }, + { + "ce_ib": 5.079341888427734, + "ce_orig": 0.6118531227111816, + "epoch": 0.299086922136746, + "kl_loss": 0.08346180617809296, + "loss_ib": 0.0013425522483885288, + "step": 1040 + }, + { + "ce_ib": 5.607305526733398, + "ce_orig": 0.753025472164154, + "epoch": 0.299086922136746, + "kl_loss": 0.05994057655334473, + "loss_ib": 0.0011601363075897098, + "step": 1040 + }, + { + "ce_ib": 5.748326301574707, + "ce_orig": 0.856826901435852, + "epoch": 0.299086922136746, + "kl_loss": 0.09018149971961975, + "loss_ib": 0.0014766475651413202, + "step": 1040 + }, + { + "ce_ib": 7.7500901222229, + "ce_orig": 0.7174243330955505, + "epoch": 0.299086922136746, + "kl_loss": 0.1808132529258728, + "loss_ib": 0.0025831416714936495, + "step": 1040 + }, + { + "ce_ib": 5.7866387367248535, + "ce_orig": 0.5503837466239929, + "epoch": 0.2993745057157236, + "kl_loss": 0.10648153722286224, + "loss_ib": 0.0016434791032224894, + "step": 1041 + }, + { + "ce_ib": 5.319000244140625, + "ce_orig": 0.6512373089790344, + "epoch": 0.2993745057157236, + "kl_loss": 0.09968775510787964, + "loss_ib": 0.001528777414932847, + "step": 1041 + }, + { + "ce_ib": 5.886722564697266, + "ce_orig": 0.8125132322311401, + "epoch": 0.2993745057157236, + "kl_loss": 0.09538036584854126, + "loss_ib": 0.0015424757730215788, + "step": 1041 + }, + { + "ce_ib": 6.476065158843994, + "ce_orig": 0.7084008455276489, + "epoch": 0.2993745057157236, + "kl_loss": 0.14250393211841583, + "loss_ib": 0.0020726455841213465, + "step": 1041 + }, + { + "ce_ib": 6.568038463592529, + "ce_orig": 0.975024402141571, + "epoch": 0.2996620892947013, + "kl_loss": 0.09894341975450516, + "loss_ib": 0.001646237913519144, + "step": 1042 + }, + { + "ce_ib": 3.8546910285949707, + "ce_orig": 0.7323302626609802, + "epoch": 0.2996620892947013, + "kl_loss": 0.051041483879089355, + "loss_ib": 0.0008958838880062103, + "step": 1042 + }, + { + "ce_ib": 9.201594352722168, + "ce_orig": 1.5363073348999023, + "epoch": 0.2996620892947013, + "kl_loss": 0.14216488599777222, + "loss_ib": 0.002341808285564184, + "step": 1042 + }, + { + "ce_ib": 7.402287483215332, + "ce_orig": 1.1694655418395996, + "epoch": 0.2996620892947013, + "kl_loss": 0.13332687318325043, + "loss_ib": 0.002073497511446476, + "step": 1042 + }, + { + "ce_ib": 5.4790544509887695, + "ce_orig": 0.8705706596374512, + "epoch": 0.2999496728736789, + "kl_loss": 0.10294032841920853, + "loss_ib": 0.0015773087507113814, + "step": 1043 + }, + { + "ce_ib": 5.983057022094727, + "ce_orig": 0.9364078640937805, + "epoch": 0.2999496728736789, + "kl_loss": 0.11039305478334427, + "loss_ib": 0.00170223624445498, + "step": 1043 + }, + { + "ce_ib": 6.705295562744141, + "ce_orig": 0.7721993923187256, + "epoch": 0.2999496728736789, + "kl_loss": 0.1317780315876007, + "loss_ib": 0.0019883099012076855, + "step": 1043 + }, + { + "ce_ib": 5.997276782989502, + "ce_orig": 0.9583907127380371, + "epoch": 0.2999496728736789, + "kl_loss": 0.14914849400520325, + "loss_ib": 0.0020912124309688807, + "step": 1043 + }, + { + "ce_ib": 8.191937446594238, + "ce_orig": 1.0112303495407104, + "epoch": 0.30023725645265653, + "kl_loss": 0.12284594774246216, + "loss_ib": 0.002047653077170253, + "step": 1044 + }, + { + "ce_ib": 10.865947723388672, + "ce_orig": 0.9174994826316833, + "epoch": 0.30023725645265653, + "kl_loss": 0.11603246629238129, + "loss_ib": 0.002246919320896268, + "step": 1044 + }, + { + "ce_ib": 10.262970924377441, + "ce_orig": 1.3994874954223633, + "epoch": 0.30023725645265653, + "kl_loss": 0.11475729942321777, + "loss_ib": 0.0021738701034337282, + "step": 1044 + }, + { + "ce_ib": 8.54948902130127, + "ce_orig": 0.46436163783073425, + "epoch": 0.30023725645265653, + "kl_loss": 0.12399379909038544, + "loss_ib": 0.002094886964187026, + "step": 1044 + }, + { + "epoch": 0.3005248400316342, + "grad_norm": 0.08161374926567078, + "learning_rate": 4.936949180087486e-05, + "loss": 0.8708, + "step": 1045 + }, + { + "ce_ib": 3.662956476211548, + "ce_orig": 0.6253257989883423, + "epoch": 0.3005248400316342, + "kl_loss": 0.05958956480026245, + "loss_ib": 0.0009621912613511086, + "step": 1045 + }, + { + "ce_ib": 4.044605731964111, + "ce_orig": 0.745154619216919, + "epoch": 0.3005248400316342, + "kl_loss": 0.10994522273540497, + "loss_ib": 0.0015039127320051193, + "step": 1045 + }, + { + "ce_ib": 7.987272262573242, + "ce_orig": 1.0346519947052002, + "epoch": 0.3005248400316342, + "kl_loss": 0.17194198071956635, + "loss_ib": 0.002518146764487028, + "step": 1045 + }, + { + "ce_ib": 5.648697376251221, + "ce_orig": 1.072109580039978, + "epoch": 0.3005248400316342, + "kl_loss": 0.11444295197725296, + "loss_ib": 0.00170929916203022, + "step": 1045 + }, + { + "ce_ib": 5.086002826690674, + "ce_orig": 0.6663815379142761, + "epoch": 0.30081242361061183, + "kl_loss": 0.09517970681190491, + "loss_ib": 0.001460397383198142, + "step": 1046 + }, + { + "ce_ib": 7.656687259674072, + "ce_orig": 1.0890663862228394, + "epoch": 0.30081242361061183, + "kl_loss": 0.10373960435390472, + "loss_ib": 0.0018030646024271846, + "step": 1046 + }, + { + "ce_ib": 7.713747024536133, + "ce_orig": 1.1821391582489014, + "epoch": 0.30081242361061183, + "kl_loss": 0.16192738711833954, + "loss_ib": 0.0023906484711915255, + "step": 1046 + }, + { + "ce_ib": 6.953602313995361, + "ce_orig": 1.0051076412200928, + "epoch": 0.30081242361061183, + "kl_loss": 0.10963944345712662, + "loss_ib": 0.0017917546210810542, + "step": 1046 + }, + { + "ce_ib": 7.628968238830566, + "ce_orig": 0.8743991255760193, + "epoch": 0.30110000718958946, + "kl_loss": 0.1502256691455841, + "loss_ib": 0.0022651534527540207, + "step": 1047 + }, + { + "ce_ib": 9.052668571472168, + "ce_orig": 1.0976598262786865, + "epoch": 0.30110000718958946, + "kl_loss": 0.22826503217220306, + "loss_ib": 0.003187917172908783, + "step": 1047 + }, + { + "ce_ib": 4.832150936126709, + "ce_orig": 0.648388683795929, + "epoch": 0.30110000718958946, + "kl_loss": 0.13129526376724243, + "loss_ib": 0.0017961676931008697, + "step": 1047 + }, + { + "ce_ib": 6.754344940185547, + "ce_orig": 0.9024156332015991, + "epoch": 0.30110000718958946, + "kl_loss": 0.22326111793518066, + "loss_ib": 0.002908045658841729, + "step": 1047 + }, + { + "ce_ib": 5.170908451080322, + "ce_orig": 0.4192695915699005, + "epoch": 0.30138759076856714, + "kl_loss": 0.10928487777709961, + "loss_ib": 0.0016099396161735058, + "step": 1048 + }, + { + "ce_ib": 6.1424407958984375, + "ce_orig": 0.7895532846450806, + "epoch": 0.30138759076856714, + "kl_loss": 0.08899113535881042, + "loss_ib": 0.0015041553415358067, + "step": 1048 + }, + { + "ce_ib": 5.934038162231445, + "ce_orig": 0.6904341578483582, + "epoch": 0.30138759076856714, + "kl_loss": 0.09739300608634949, + "loss_ib": 0.0015673339366912842, + "step": 1048 + }, + { + "ce_ib": 8.76187801361084, + "ce_orig": 1.6208300590515137, + "epoch": 0.30138759076856714, + "kl_loss": 0.13631777465343475, + "loss_ib": 0.0022393655963242054, + "step": 1048 + }, + { + "ce_ib": 6.55295467376709, + "ce_orig": 0.5757778882980347, + "epoch": 0.30167517434754476, + "kl_loss": 0.0959104374051094, + "loss_ib": 0.0016143998363986611, + "step": 1049 + }, + { + "ce_ib": 8.004493713378906, + "ce_orig": 1.1587669849395752, + "epoch": 0.30167517434754476, + "kl_loss": 0.09329302608966827, + "loss_ib": 0.0017333796713501215, + "step": 1049 + }, + { + "ce_ib": 11.593831062316895, + "ce_orig": 1.764388918876648, + "epoch": 0.30167517434754476, + "kl_loss": 0.20481374859809875, + "loss_ib": 0.003207520581781864, + "step": 1049 + }, + { + "ce_ib": 8.537985801696777, + "ce_orig": 1.107876181602478, + "epoch": 0.30167517434754476, + "kl_loss": 0.21293607354164124, + "loss_ib": 0.0029831593856215477, + "step": 1049 + }, + { + "epoch": 0.3019627579265224, + "grad_norm": 0.13020655512809753, + "learning_rate": 4.9360802524840156e-05, + "loss": 0.9349, + "step": 1050 + }, + { + "ce_ib": 7.10082483291626, + "ce_orig": 0.9058458805084229, + "epoch": 0.3019627579265224, + "kl_loss": 0.10159776359796524, + "loss_ib": 0.0017260601744055748, + "step": 1050 + }, + { + "ce_ib": 5.80906343460083, + "ce_orig": 0.597679078578949, + "epoch": 0.3019627579265224, + "kl_loss": 0.10538171231746674, + "loss_ib": 0.0016347235068678856, + "step": 1050 + }, + { + "ce_ib": 6.977040767669678, + "ce_orig": 0.5290160179138184, + "epoch": 0.3019627579265224, + "kl_loss": 0.13264355063438416, + "loss_ib": 0.0020241395104676485, + "step": 1050 + }, + { + "ce_ib": 7.416954517364502, + "ce_orig": 1.0477352142333984, + "epoch": 0.3019627579265224, + "kl_loss": 0.14112350344657898, + "loss_ib": 0.0021529304794967175, + "step": 1050 + }, + { + "ce_ib": 4.349487781524658, + "ce_orig": 0.6894300580024719, + "epoch": 0.3022503415055, + "kl_loss": 0.06405559182167053, + "loss_ib": 0.0010755046969279647, + "step": 1051 + }, + { + "ce_ib": 6.142280101776123, + "ce_orig": 0.8738764524459839, + "epoch": 0.3022503415055, + "kl_loss": 0.10166037082672119, + "loss_ib": 0.0016308316262438893, + "step": 1051 + }, + { + "ce_ib": 5.816431522369385, + "ce_orig": 0.8697793483734131, + "epoch": 0.3022503415055, + "kl_loss": 0.09778453409671783, + "loss_ib": 0.0015594884753227234, + "step": 1051 + }, + { + "ce_ib": 5.512357234954834, + "ce_orig": 0.618314802646637, + "epoch": 0.3022503415055, + "kl_loss": 0.12777763605117798, + "loss_ib": 0.0018290119478479028, + "step": 1051 + }, + { + "ce_ib": 8.661779403686523, + "ce_orig": 1.062333106994629, + "epoch": 0.3025379250844777, + "kl_loss": 0.14228984713554382, + "loss_ib": 0.0022890763357281685, + "step": 1052 + }, + { + "ce_ib": 6.235224723815918, + "ce_orig": 0.600572943687439, + "epoch": 0.3025379250844777, + "kl_loss": 0.110453762114048, + "loss_ib": 0.0017280600732192397, + "step": 1052 + }, + { + "ce_ib": 3.774848699569702, + "ce_orig": 0.46214503049850464, + "epoch": 0.3025379250844777, + "kl_loss": 0.23360615968704224, + "loss_ib": 0.0027135463897138834, + "step": 1052 + }, + { + "ce_ib": 6.417864799499512, + "ce_orig": 1.1410952806472778, + "epoch": 0.3025379250844777, + "kl_loss": 0.10604594647884369, + "loss_ib": 0.0017022459069266915, + "step": 1052 + }, + { + "ce_ib": 10.220464706420898, + "ce_orig": 1.548168659210205, + "epoch": 0.3028255086634553, + "kl_loss": 0.11154457926750183, + "loss_ib": 0.0021374921780079603, + "step": 1053 + }, + { + "ce_ib": 4.950402736663818, + "ce_orig": 0.6234180331230164, + "epoch": 0.3028255086634553, + "kl_loss": 0.11562936753034592, + "loss_ib": 0.0016513338778167963, + "step": 1053 + }, + { + "ce_ib": 6.0322699546813965, + "ce_orig": 1.0492932796478271, + "epoch": 0.3028255086634553, + "kl_loss": 0.12673497200012207, + "loss_ib": 0.001870576641522348, + "step": 1053 + }, + { + "ce_ib": 6.646451473236084, + "ce_orig": 1.0099207162857056, + "epoch": 0.3028255086634553, + "kl_loss": 0.115445077419281, + "loss_ib": 0.0018190959235653281, + "step": 1053 + }, + { + "ce_ib": 7.095217704772949, + "ce_orig": 0.9569370746612549, + "epoch": 0.30311309224243294, + "kl_loss": 0.15879088640213013, + "loss_ib": 0.002297430532053113, + "step": 1054 + }, + { + "ce_ib": 7.884916305541992, + "ce_orig": 1.0214117765426636, + "epoch": 0.30311309224243294, + "kl_loss": 0.14951792359352112, + "loss_ib": 0.0022836709395051003, + "step": 1054 + }, + { + "ce_ib": 4.689992904663086, + "ce_orig": 0.7026373147964478, + "epoch": 0.30311309224243294, + "kl_loss": 0.0819244235754013, + "loss_ib": 0.0012882434530183673, + "step": 1054 + }, + { + "ce_ib": 6.779373645782471, + "ce_orig": 0.4927053451538086, + "epoch": 0.30311309224243294, + "kl_loss": 0.15542671084403992, + "loss_ib": 0.0022322044242173433, + "step": 1054 + }, + { + "epoch": 0.3034006758214106, + "grad_norm": 0.09209935367107391, + "learning_rate": 4.9352054557977905e-05, + "loss": 0.85, + "step": 1055 + }, + { + "ce_ib": 7.405393123626709, + "ce_orig": 0.7001031637191772, + "epoch": 0.3034006758214106, + "kl_loss": 0.19542153179645538, + "loss_ib": 0.0026947546284645796, + "step": 1055 + }, + { + "ce_ib": 6.689201354980469, + "ce_orig": 0.8217753767967224, + "epoch": 0.3034006758214106, + "kl_loss": 0.0826321691274643, + "loss_ib": 0.0014952417695894837, + "step": 1055 + }, + { + "ce_ib": 4.1700825691223145, + "ce_orig": 0.6262937188148499, + "epoch": 0.3034006758214106, + "kl_loss": 0.0850646048784256, + "loss_ib": 0.0012676542392000556, + "step": 1055 + }, + { + "ce_ib": 3.701383590698242, + "ce_orig": 0.7168072462081909, + "epoch": 0.3034006758214106, + "kl_loss": 0.07148706912994385, + "loss_ib": 0.0010850090766325593, + "step": 1055 + }, + { + "ce_ib": 11.786508560180664, + "ce_orig": 1.2961478233337402, + "epoch": 0.30368825940038824, + "kl_loss": 0.12000411748886108, + "loss_ib": 0.0023786919191479683, + "step": 1056 + }, + { + "ce_ib": 5.3256402015686035, + "ce_orig": 0.5889015793800354, + "epoch": 0.30368825940038824, + "kl_loss": 0.10559151321649551, + "loss_ib": 0.0015884791500866413, + "step": 1056 + }, + { + "ce_ib": 6.689587593078613, + "ce_orig": 0.7410926222801208, + "epoch": 0.30368825940038824, + "kl_loss": 0.13415184617042542, + "loss_ib": 0.002010477241128683, + "step": 1056 + }, + { + "ce_ib": 4.489993095397949, + "ce_orig": 0.4996775984764099, + "epoch": 0.30368825940038824, + "kl_loss": 0.09554598480463028, + "loss_ib": 0.001404459122568369, + "step": 1056 + }, + { + "ce_ib": 4.924744129180908, + "ce_orig": 0.47715672850608826, + "epoch": 0.30397584297936586, + "kl_loss": 0.13373792171478271, + "loss_ib": 0.0018298536306247115, + "step": 1057 + }, + { + "ce_ib": 8.683158874511719, + "ce_orig": 0.9591328501701355, + "epoch": 0.30397584297936586, + "kl_loss": 0.08485978841781616, + "loss_ib": 0.0017169136554002762, + "step": 1057 + }, + { + "ce_ib": 8.079586029052734, + "ce_orig": 1.0162447690963745, + "epoch": 0.30397584297936586, + "kl_loss": 0.20607218146324158, + "loss_ib": 0.0028686802834272385, + "step": 1057 + }, + { + "ce_ib": 3.9310555458068848, + "ce_orig": 0.3155190646648407, + "epoch": 0.30397584297936586, + "kl_loss": 0.3025915026664734, + "loss_ib": 0.0034190204460173845, + "step": 1057 + }, + { + "ce_ib": 5.745120048522949, + "ce_orig": 0.41896164417266846, + "epoch": 0.30426342655834354, + "kl_loss": 0.12001323699951172, + "loss_ib": 0.0017746443627402186, + "step": 1058 + }, + { + "ce_ib": 4.246820449829102, + "ce_orig": 0.39408591389656067, + "epoch": 0.30426342655834354, + "kl_loss": 0.10306812822818756, + "loss_ib": 0.0014553633518517017, + "step": 1058 + }, + { + "ce_ib": 4.1398844718933105, + "ce_orig": 0.6241393685340881, + "epoch": 0.30426342655834354, + "kl_loss": 0.11600920557975769, + "loss_ib": 0.0015740805538371205, + "step": 1058 + }, + { + "ce_ib": 5.1157755851745605, + "ce_orig": 0.5613738298416138, + "epoch": 0.30426342655834354, + "kl_loss": 0.15554389357566833, + "loss_ib": 0.002067016437649727, + "step": 1058 + }, + { + "ce_ib": 8.72223949432373, + "ce_orig": 0.7960661053657532, + "epoch": 0.30455101013732117, + "kl_loss": 0.15092414617538452, + "loss_ib": 0.002381465397775173, + "step": 1059 + }, + { + "ce_ib": 7.334806442260742, + "ce_orig": 0.8918651342391968, + "epoch": 0.30455101013732117, + "kl_loss": 0.11382514238357544, + "loss_ib": 0.0018717319471761584, + "step": 1059 + }, + { + "ce_ib": 3.012343406677246, + "ce_orig": 0.585025429725647, + "epoch": 0.30455101013732117, + "kl_loss": 0.06840424239635468, + "loss_ib": 0.0009852767689153552, + "step": 1059 + }, + { + "ce_ib": 10.939859390258789, + "ce_orig": 1.4614068269729614, + "epoch": 0.30455101013732117, + "kl_loss": 0.16254915297031403, + "loss_ib": 0.0027194772846996784, + "step": 1059 + }, + { + "epoch": 0.3048385937162988, + "grad_norm": 0.08189968019723892, + "learning_rate": 4.934324792136399e-05, + "loss": 0.8205, + "step": 1060 + }, + { + "ce_ib": 11.700628280639648, + "ce_orig": 1.5280909538269043, + "epoch": 0.3048385937162988, + "kl_loss": 0.10970384627580643, + "loss_ib": 0.002267101313918829, + "step": 1060 + }, + { + "ce_ib": 5.980067253112793, + "ce_orig": 0.7644383311271667, + "epoch": 0.3048385937162988, + "kl_loss": 0.07235151529312134, + "loss_ib": 0.001321521820500493, + "step": 1060 + }, + { + "ce_ib": 7.974216938018799, + "ce_orig": 0.8650918006896973, + "epoch": 0.3048385937162988, + "kl_loss": 0.13781039416790009, + "loss_ib": 0.002175525762140751, + "step": 1060 + }, + { + "ce_ib": 2.881650686264038, + "ce_orig": 0.31516513228416443, + "epoch": 0.3048385937162988, + "kl_loss": 0.3138682246208191, + "loss_ib": 0.003426847280934453, + "step": 1060 + }, + { + "ce_ib": 10.01279354095459, + "ce_orig": 1.4876623153686523, + "epoch": 0.3051261772952764, + "kl_loss": 0.1276686042547226, + "loss_ib": 0.002277965424582362, + "step": 1061 + }, + { + "ce_ib": 7.159035682678223, + "ce_orig": 0.40885889530181885, + "epoch": 0.3051261772952764, + "kl_loss": 0.1310914009809494, + "loss_ib": 0.0020268175285309553, + "step": 1061 + }, + { + "ce_ib": 10.178457260131836, + "ce_orig": 1.53799569606781, + "epoch": 0.3051261772952764, + "kl_loss": 0.15176355838775635, + "loss_ib": 0.002535481471568346, + "step": 1061 + }, + { + "ce_ib": 8.092935562133789, + "ce_orig": 0.9259153604507446, + "epoch": 0.3051261772952764, + "kl_loss": 0.22177819907665253, + "loss_ib": 0.0030270754359662533, + "step": 1061 + }, + { + "ce_ib": 4.42587947845459, + "ce_orig": 0.5612567663192749, + "epoch": 0.3054137608742541, + "kl_loss": 0.11645185202360153, + "loss_ib": 0.0016071064164862037, + "step": 1062 + }, + { + "ce_ib": 5.609553337097168, + "ce_orig": 0.908251941204071, + "epoch": 0.3054137608742541, + "kl_loss": 0.11812228709459305, + "loss_ib": 0.0017421781085431576, + "step": 1062 + }, + { + "ce_ib": 6.579468250274658, + "ce_orig": 0.693250298500061, + "epoch": 0.3054137608742541, + "kl_loss": 0.08764868974685669, + "loss_ib": 0.0015344336861744523, + "step": 1062 + }, + { + "ce_ib": 10.069143295288086, + "ce_orig": 1.441835641860962, + "epoch": 0.3054137608742541, + "kl_loss": 0.2499997913837433, + "loss_ib": 0.003506912151351571, + "step": 1062 + }, + { + "ce_ib": 5.1232991218566895, + "ce_orig": 0.5163192749023438, + "epoch": 0.3057013444532317, + "kl_loss": 0.10996317863464355, + "loss_ib": 0.0016119616338983178, + "step": 1063 + }, + { + "ce_ib": 4.89235782623291, + "ce_orig": 0.40680912137031555, + "epoch": 0.3057013444532317, + "kl_loss": 0.09841237962245941, + "loss_ib": 0.0014733595307916403, + "step": 1063 + }, + { + "ce_ib": 6.575563430786133, + "ce_orig": 0.8432719111442566, + "epoch": 0.3057013444532317, + "kl_loss": 0.08154290169477463, + "loss_ib": 0.0014729853719472885, + "step": 1063 + }, + { + "ce_ib": 5.160298824310303, + "ce_orig": 0.6140801906585693, + "epoch": 0.3057013444532317, + "kl_loss": 0.06841346621513367, + "loss_ib": 0.001200164551846683, + "step": 1063 + }, + { + "ce_ib": 8.059441566467285, + "ce_orig": 0.7148778438568115, + "epoch": 0.30598892803220934, + "kl_loss": 0.11310233175754547, + "loss_ib": 0.0019369673682376742, + "step": 1064 + }, + { + "ce_ib": 5.755097389221191, + "ce_orig": 0.5257096290588379, + "epoch": 0.30598892803220934, + "kl_loss": 0.10900678485631943, + "loss_ib": 0.0016655775252729654, + "step": 1064 + }, + { + "ce_ib": 9.884034156799316, + "ce_orig": 1.1813336610794067, + "epoch": 0.30598892803220934, + "kl_loss": 0.12008243799209595, + "loss_ib": 0.002189227845519781, + "step": 1064 + }, + { + "ce_ib": 5.514344215393066, + "ce_orig": 0.6407434940338135, + "epoch": 0.30598892803220934, + "kl_loss": 0.07819973677396774, + "loss_ib": 0.0013334316899999976, + "step": 1064 + }, + { + "epoch": 0.306276511611187, + "grad_norm": 0.08639845997095108, + "learning_rate": 4.9334382636215646e-05, + "loss": 0.8505, + "step": 1065 + }, + { + "ce_ib": 7.71027135848999, + "ce_orig": 0.8334816694259644, + "epoch": 0.306276511611187, + "kl_loss": 0.1781705617904663, + "loss_ib": 0.002552732825279236, + "step": 1065 + }, + { + "ce_ib": 7.65843391418457, + "ce_orig": 1.1109449863433838, + "epoch": 0.306276511611187, + "kl_loss": 0.09281620383262634, + "loss_ib": 0.001694005448371172, + "step": 1065 + }, + { + "ce_ib": 5.949745178222656, + "ce_orig": 0.5543390512466431, + "epoch": 0.306276511611187, + "kl_loss": 0.10283538699150085, + "loss_ib": 0.001623328309506178, + "step": 1065 + }, + { + "ce_ib": 8.59256649017334, + "ce_orig": 0.7991865873336792, + "epoch": 0.306276511611187, + "kl_loss": 0.1408337950706482, + "loss_ib": 0.0022675946820527315, + "step": 1065 + }, + { + "ce_ib": 5.791524887084961, + "ce_orig": 0.5286558866500854, + "epoch": 0.30656409519016464, + "kl_loss": 0.09039951115846634, + "loss_ib": 0.0014831476146355271, + "step": 1066 + }, + { + "ce_ib": 11.240863800048828, + "ce_orig": 1.9939417839050293, + "epoch": 0.30656409519016464, + "kl_loss": 0.15476444363594055, + "loss_ib": 0.0026717307046055794, + "step": 1066 + }, + { + "ce_ib": 5.748254776000977, + "ce_orig": 0.6310633420944214, + "epoch": 0.30656409519016464, + "kl_loss": 0.1464613676071167, + "loss_ib": 0.002039439044892788, + "step": 1066 + }, + { + "ce_ib": 5.478874206542969, + "ce_orig": 0.9024366736412048, + "epoch": 0.30656409519016464, + "kl_loss": 0.0984884575009346, + "loss_ib": 0.0015327719738706946, + "step": 1066 + }, + { + "ce_ib": 8.004805564880371, + "ce_orig": 0.8263446688652039, + "epoch": 0.30685167876914227, + "kl_loss": 0.1449379026889801, + "loss_ib": 0.002249859506264329, + "step": 1067 + }, + { + "ce_ib": 6.605048179626465, + "ce_orig": 0.7789648175239563, + "epoch": 0.30685167876914227, + "kl_loss": 0.08828707039356232, + "loss_ib": 0.0015433755470439792, + "step": 1067 + }, + { + "ce_ib": 10.462506294250488, + "ce_orig": 1.3526828289031982, + "epoch": 0.30685167876914227, + "kl_loss": 0.1412639617919922, + "loss_ib": 0.0024588902015239, + "step": 1067 + }, + { + "ce_ib": 8.052318572998047, + "ce_orig": 1.0146571397781372, + "epoch": 0.30685167876914227, + "kl_loss": 0.14023709297180176, + "loss_ib": 0.0022076028399169445, + "step": 1067 + }, + { + "ce_ib": 7.057815074920654, + "ce_orig": 0.7951789498329163, + "epoch": 0.30713926234811995, + "kl_loss": 0.19950871169567108, + "loss_ib": 0.0027008685283362865, + "step": 1068 + }, + { + "ce_ib": 7.53904914855957, + "ce_orig": 0.5687925219535828, + "epoch": 0.30713926234811995, + "kl_loss": 0.17302866280078888, + "loss_ib": 0.0024841914419084787, + "step": 1068 + }, + { + "ce_ib": 4.6915178298950195, + "ce_orig": 0.6768220067024231, + "epoch": 0.30713926234811995, + "kl_loss": 0.0843624621629715, + "loss_ib": 0.0013127763522788882, + "step": 1068 + }, + { + "ce_ib": 6.781597137451172, + "ce_orig": 1.1427780389785767, + "epoch": 0.30713926234811995, + "kl_loss": 0.1272558867931366, + "loss_ib": 0.0019507184624671936, + "step": 1068 + }, + { + "ce_ib": 8.581425666809082, + "ce_orig": 1.328827977180481, + "epoch": 0.30742684592709757, + "kl_loss": 0.18549340963363647, + "loss_ib": 0.0027130763046443462, + "step": 1069 + }, + { + "ce_ib": 6.743143558502197, + "ce_orig": 1.0867711305618286, + "epoch": 0.30742684592709757, + "kl_loss": 0.13806043565273285, + "loss_ib": 0.002054918557405472, + "step": 1069 + }, + { + "ce_ib": 7.102622985839844, + "ce_orig": 1.0041338205337524, + "epoch": 0.30742684592709757, + "kl_loss": 0.1152433380484581, + "loss_ib": 0.00186269567348063, + "step": 1069 + }, + { + "ce_ib": 7.737115383148193, + "ce_orig": 0.7306109666824341, + "epoch": 0.30742684592709757, + "kl_loss": 0.10602246224880219, + "loss_ib": 0.0018339360831305385, + "step": 1069 + }, + { + "epoch": 0.3077144295060752, + "grad_norm": 0.10589238256216049, + "learning_rate": 4.9325458723891405e-05, + "loss": 0.8881, + "step": 1070 + }, + { + "ce_ib": 8.593700408935547, + "ce_orig": 1.2556962966918945, + "epoch": 0.3077144295060752, + "kl_loss": 0.12282206118106842, + "loss_ib": 0.0020875907503068447, + "step": 1070 + }, + { + "ce_ib": 7.65969181060791, + "ce_orig": 0.8783318996429443, + "epoch": 0.3077144295060752, + "kl_loss": 0.1287730187177658, + "loss_ib": 0.0020536992233246565, + "step": 1070 + }, + { + "ce_ib": 6.38693904876709, + "ce_orig": 0.8617218732833862, + "epoch": 0.3077144295060752, + "kl_loss": 0.10807206481695175, + "loss_ib": 0.0017194146057590842, + "step": 1070 + }, + { + "ce_ib": 6.419977188110352, + "ce_orig": 0.9678337574005127, + "epoch": 0.3077144295060752, + "kl_loss": 0.09231145679950714, + "loss_ib": 0.0015651121502742171, + "step": 1070 + }, + { + "ce_ib": 5.127742290496826, + "ce_orig": 0.3816016614437103, + "epoch": 0.3080020130850528, + "kl_loss": 0.16862201690673828, + "loss_ib": 0.00219899439252913, + "step": 1071 + }, + { + "ce_ib": 6.4471116065979, + "ce_orig": 1.1590927839279175, + "epoch": 0.3080020130850528, + "kl_loss": 0.11116702854633331, + "loss_ib": 0.0017563813598826528, + "step": 1071 + }, + { + "ce_ib": 8.026755332946777, + "ce_orig": 1.3219317197799683, + "epoch": 0.3080020130850528, + "kl_loss": 0.10397316515445709, + "loss_ib": 0.0018424070440232754, + "step": 1071 + }, + { + "ce_ib": 4.885175704956055, + "ce_orig": 0.6101803183555603, + "epoch": 0.3080020130850528, + "kl_loss": 0.14653921127319336, + "loss_ib": 0.0019539096392691135, + "step": 1071 + }, + { + "ce_ib": 8.081513404846191, + "ce_orig": 1.0486963987350464, + "epoch": 0.3082895966640305, + "kl_loss": 0.1061769351363182, + "loss_ib": 0.0018699206411838531, + "step": 1072 + }, + { + "ce_ib": 4.16774845123291, + "ce_orig": 0.545195996761322, + "epoch": 0.3082895966640305, + "kl_loss": 0.10797026008367538, + "loss_ib": 0.0014964774018153548, + "step": 1072 + }, + { + "ce_ib": 5.15360164642334, + "ce_orig": 0.7959751486778259, + "epoch": 0.3082895966640305, + "kl_loss": 0.1221914291381836, + "loss_ib": 0.0017372744623571634, + "step": 1072 + }, + { + "ce_ib": 8.818731307983398, + "ce_orig": 1.3822993040084839, + "epoch": 0.3082895966640305, + "kl_loss": 0.21638715267181396, + "loss_ib": 0.0030457444954663515, + "step": 1072 + }, + { + "ce_ib": 4.697595119476318, + "ce_orig": 0.648749589920044, + "epoch": 0.3085771802430081, + "kl_loss": 0.09851400554180145, + "loss_ib": 0.0014548995532095432, + "step": 1073 + }, + { + "ce_ib": 8.36463737487793, + "ce_orig": 1.2512192726135254, + "epoch": 0.3085771802430081, + "kl_loss": 0.1037246435880661, + "loss_ib": 0.0018737100763246417, + "step": 1073 + }, + { + "ce_ib": 6.351157188415527, + "ce_orig": 0.5725387334823608, + "epoch": 0.3085771802430081, + "kl_loss": 0.1913139820098877, + "loss_ib": 0.002548255492001772, + "step": 1073 + }, + { + "ce_ib": 8.301779747009277, + "ce_orig": 1.3257883787155151, + "epoch": 0.3085771802430081, + "kl_loss": 0.0830536037683487, + "loss_ib": 0.0016607139259576797, + "step": 1073 + }, + { + "ce_ib": 9.013845443725586, + "ce_orig": 1.4739587306976318, + "epoch": 0.30886476382198574, + "kl_loss": 0.08682604134082794, + "loss_ib": 0.001769644906744361, + "step": 1074 + }, + { + "ce_ib": 5.944709300994873, + "ce_orig": 0.6181069016456604, + "epoch": 0.30886476382198574, + "kl_loss": 0.08755936473608017, + "loss_ib": 0.0014700645115226507, + "step": 1074 + }, + { + "ce_ib": 4.296208381652832, + "ce_orig": 0.5747141242027283, + "epoch": 0.30886476382198574, + "kl_loss": 0.08138515800237656, + "loss_ib": 0.0012434723321348429, + "step": 1074 + }, + { + "ce_ib": 8.861016273498535, + "ce_orig": 0.9520947933197021, + "epoch": 0.30886476382198574, + "kl_loss": 0.11723033338785172, + "loss_ib": 0.002058404963463545, + "step": 1074 + }, + { + "epoch": 0.3091523474009634, + "grad_norm": 0.09396370500326157, + "learning_rate": 4.931647620589104e-05, + "loss": 0.9133, + "step": 1075 + }, + { + "ce_ib": 9.394133567810059, + "ce_orig": 1.0916211605072021, + "epoch": 0.3091523474009634, + "kl_loss": 0.11132264137268066, + "loss_ib": 0.0020526396110653877, + "step": 1075 + }, + { + "ce_ib": 5.052248001098633, + "ce_orig": 0.8652608394622803, + "epoch": 0.3091523474009634, + "kl_loss": 0.0959598571062088, + "loss_ib": 0.0014648232609033585, + "step": 1075 + }, + { + "ce_ib": 9.793839454650879, + "ce_orig": 1.2052462100982666, + "epoch": 0.3091523474009634, + "kl_loss": 0.1138681098818779, + "loss_ib": 0.002118065021932125, + "step": 1075 + }, + { + "ce_ib": 3.8647079467773438, + "ce_orig": 0.571103036403656, + "epoch": 0.3091523474009634, + "kl_loss": 0.12573961913585663, + "loss_ib": 0.0016438668826594949, + "step": 1075 + }, + { + "ce_ib": 4.259771823883057, + "ce_orig": 0.6271834373474121, + "epoch": 0.30943993097994105, + "kl_loss": 0.07756983488798141, + "loss_ib": 0.0012016755063086748, + "step": 1076 + }, + { + "ce_ib": 8.445744514465332, + "ce_orig": 0.760082483291626, + "epoch": 0.30943993097994105, + "kl_loss": 0.10391563922166824, + "loss_ib": 0.0018837308743968606, + "step": 1076 + }, + { + "ce_ib": 3.1767988204956055, + "ce_orig": 0.40816348791122437, + "epoch": 0.30943993097994105, + "kl_loss": 0.06804482638835907, + "loss_ib": 0.0009981280891224742, + "step": 1076 + }, + { + "ce_ib": 7.188921928405762, + "ce_orig": 1.1699033975601196, + "epoch": 0.30943993097994105, + "kl_loss": 0.18664929270744324, + "loss_ib": 0.0025853849947452545, + "step": 1076 + }, + { + "ce_ib": 9.287099838256836, + "ce_orig": 1.3222893476486206, + "epoch": 0.30972751455891867, + "kl_loss": 0.26636114716529846, + "loss_ib": 0.0035923211835324764, + "step": 1077 + }, + { + "ce_ib": 5.982432842254639, + "ce_orig": 0.6701129674911499, + "epoch": 0.30972751455891867, + "kl_loss": 0.08747244626283646, + "loss_ib": 0.0014729676768183708, + "step": 1077 + }, + { + "ce_ib": 6.506511211395264, + "ce_orig": 1.0417073965072632, + "epoch": 0.30972751455891867, + "kl_loss": 0.15278568863868713, + "loss_ib": 0.0021785080898553133, + "step": 1077 + }, + { + "ce_ib": 3.9755756855010986, + "ce_orig": 0.711229681968689, + "epoch": 0.30972751455891867, + "kl_loss": 0.055553995072841644, + "loss_ib": 0.0009530974784865975, + "step": 1077 + }, + { + "ce_ib": 6.50349760055542, + "ce_orig": 0.8074575066566467, + "epoch": 0.31001509813789635, + "kl_loss": 0.13486388325691223, + "loss_ib": 0.00199898867867887, + "step": 1078 + }, + { + "ce_ib": 4.8907952308654785, + "ce_orig": 0.7938336133956909, + "epoch": 0.31001509813789635, + "kl_loss": 0.09148908406496048, + "loss_ib": 0.0014039704110473394, + "step": 1078 + }, + { + "ce_ib": 4.82853889465332, + "ce_orig": 0.6393706202507019, + "epoch": 0.31001509813789635, + "kl_loss": 0.16172416508197784, + "loss_ib": 0.002100095385685563, + "step": 1078 + }, + { + "ce_ib": 8.266220092773438, + "ce_orig": 0.8295358419418335, + "epoch": 0.31001509813789635, + "kl_loss": 0.08213948458433151, + "loss_ib": 0.0016480168560519814, + "step": 1078 + }, + { + "ce_ib": 6.136702537536621, + "ce_orig": 0.36163759231567383, + "epoch": 0.310302681716874, + "kl_loss": 0.1180807575583458, + "loss_ib": 0.0017944778082892299, + "step": 1079 + }, + { + "ce_ib": 7.075868606567383, + "ce_orig": 0.8117911219596863, + "epoch": 0.310302681716874, + "kl_loss": 0.18309111893177032, + "loss_ib": 0.0025384980253875256, + "step": 1079 + }, + { + "ce_ib": 4.986417293548584, + "ce_orig": 0.5661327242851257, + "epoch": 0.310302681716874, + "kl_loss": 0.14899908006191254, + "loss_ib": 0.001988632371649146, + "step": 1079 + }, + { + "ce_ib": 7.7030439376831055, + "ce_orig": 0.769463062286377, + "epoch": 0.310302681716874, + "kl_loss": 0.12090402096509933, + "loss_ib": 0.0019793445244431496, + "step": 1079 + }, + { + "epoch": 0.3105902652958516, + "grad_norm": 0.08785349130630493, + "learning_rate": 4.9307435103855507e-05, + "loss": 0.8484, + "step": 1080 + }, + { + "ce_ib": 7.207575798034668, + "ce_orig": 0.9277626872062683, + "epoch": 0.3105902652958516, + "kl_loss": 0.09557029604911804, + "loss_ib": 0.0016764604952186346, + "step": 1080 + }, + { + "ce_ib": 5.483087062835693, + "ce_orig": 1.1695438623428345, + "epoch": 0.3105902652958516, + "kl_loss": 0.1018943339586258, + "loss_ib": 0.001567251980304718, + "step": 1080 + }, + { + "ce_ib": 9.196081161499023, + "ce_orig": 0.6741942763328552, + "epoch": 0.3105902652958516, + "kl_loss": 0.1952982246875763, + "loss_ib": 0.0028725904412567616, + "step": 1080 + }, + { + "ce_ib": 7.061942100524902, + "ce_orig": 0.766249418258667, + "epoch": 0.3105902652958516, + "kl_loss": 0.07882822304964066, + "loss_ib": 0.0014944764552637935, + "step": 1080 + }, + { + "ce_ib": 3.37656569480896, + "ce_orig": 0.1894591599702835, + "epoch": 0.3108778488748292, + "kl_loss": 0.3519730567932129, + "loss_ib": 0.0038573869969695807, + "step": 1081 + }, + { + "ce_ib": 9.07180404663086, + "ce_orig": 0.8924823999404907, + "epoch": 0.3108778488748292, + "kl_loss": 0.12007517367601395, + "loss_ib": 0.002107931999489665, + "step": 1081 + }, + { + "ce_ib": 7.9613938331604, + "ce_orig": 0.8214685320854187, + "epoch": 0.3108778488748292, + "kl_loss": 0.13357852399349213, + "loss_ib": 0.0021319244988262653, + "step": 1081 + }, + { + "ce_ib": 3.412221670150757, + "ce_orig": 0.38754329085350037, + "epoch": 0.3108778488748292, + "kl_loss": 0.1467348337173462, + "loss_ib": 0.0018085704650729895, + "step": 1081 + }, + { + "ce_ib": 10.103753089904785, + "ce_orig": 0.83680260181427, + "epoch": 0.3111654324538069, + "kl_loss": 0.1199386864900589, + "loss_ib": 0.0022097621113061905, + "step": 1082 + }, + { + "ce_ib": 7.5057172775268555, + "ce_orig": 1.240213394165039, + "epoch": 0.3111654324538069, + "kl_loss": 0.10108008980751038, + "loss_ib": 0.0017613726668059826, + "step": 1082 + }, + { + "ce_ib": 6.571592330932617, + "ce_orig": 0.8511654734611511, + "epoch": 0.3111654324538069, + "kl_loss": 0.10485070198774338, + "loss_ib": 0.0017056661890819669, + "step": 1082 + }, + { + "ce_ib": 8.051729202270508, + "ce_orig": 1.262660264968872, + "epoch": 0.3111654324538069, + "kl_loss": 0.16858017444610596, + "loss_ib": 0.002490974497050047, + "step": 1082 + }, + { + "ce_ib": 8.413061141967773, + "ce_orig": 1.2371526956558228, + "epoch": 0.3114530160327845, + "kl_loss": 0.10371717810630798, + "loss_ib": 0.0018784778658300638, + "step": 1083 + }, + { + "ce_ib": 4.668495178222656, + "ce_orig": 0.5106315016746521, + "epoch": 0.3114530160327845, + "kl_loss": 0.11991654336452484, + "loss_ib": 0.0016660148976370692, + "step": 1083 + }, + { + "ce_ib": 6.911253929138184, + "ce_orig": 0.8380542993545532, + "epoch": 0.3114530160327845, + "kl_loss": 0.13106882572174072, + "loss_ib": 0.0020018136128783226, + "step": 1083 + }, + { + "ce_ib": 5.011559009552002, + "ce_orig": 0.5626703500747681, + "epoch": 0.3114530160327845, + "kl_loss": 0.14843079447746277, + "loss_ib": 0.0019854637794196606, + "step": 1083 + }, + { + "ce_ib": 8.654923439025879, + "ce_orig": 1.2997244596481323, + "epoch": 0.31174059961176215, + "kl_loss": 0.15330947935581207, + "loss_ib": 0.002398587064817548, + "step": 1084 + }, + { + "ce_ib": 6.45515251159668, + "ce_orig": 1.1039117574691772, + "epoch": 0.31174059961176215, + "kl_loss": 0.15635761618614197, + "loss_ib": 0.0022090913262218237, + "step": 1084 + }, + { + "ce_ib": 6.613543510437012, + "ce_orig": 0.7879582643508911, + "epoch": 0.31174059961176215, + "kl_loss": 0.1213221549987793, + "loss_ib": 0.0018745758570730686, + "step": 1084 + }, + { + "ce_ib": 11.970968246459961, + "ce_orig": 1.7786223888397217, + "epoch": 0.31174059961176215, + "kl_loss": 0.20531702041625977, + "loss_ib": 0.0032502668909728527, + "step": 1084 + }, + { + "epoch": 0.3120281831907398, + "grad_norm": 0.10645577311515808, + "learning_rate": 4.9298335439566946e-05, + "loss": 0.9048, + "step": 1085 + }, + { + "ce_ib": 5.543363094329834, + "ce_orig": 0.7198330760002136, + "epoch": 0.3120281831907398, + "kl_loss": 0.09693525731563568, + "loss_ib": 0.0015236889012157917, + "step": 1085 + }, + { + "ce_ib": 4.480995178222656, + "ce_orig": 0.5485464930534363, + "epoch": 0.3120281831907398, + "kl_loss": 0.08271978050470352, + "loss_ib": 0.0012752973707392812, + "step": 1085 + }, + { + "ce_ib": 6.836972713470459, + "ce_orig": 0.661550760269165, + "epoch": 0.3120281831907398, + "kl_loss": 0.15786096453666687, + "loss_ib": 0.0022623068653047085, + "step": 1085 + }, + { + "ce_ib": 5.3108320236206055, + "ce_orig": 0.6704514622688293, + "epoch": 0.3120281831907398, + "kl_loss": 0.08122578263282776, + "loss_ib": 0.0013433409621939063, + "step": 1085 + }, + { + "ce_ib": 5.4588189125061035, + "ce_orig": 0.7251988649368286, + "epoch": 0.31231576676971745, + "kl_loss": 0.09487950801849365, + "loss_ib": 0.0014946769224479795, + "step": 1086 + }, + { + "ce_ib": 8.106192588806152, + "ce_orig": 1.0608155727386475, + "epoch": 0.31231576676971745, + "kl_loss": 0.13376294076442719, + "loss_ib": 0.002148248488083482, + "step": 1086 + }, + { + "ce_ib": 4.68678092956543, + "ce_orig": 0.4922115206718445, + "epoch": 0.31231576676971745, + "kl_loss": 0.13289615511894226, + "loss_ib": 0.0017976395320147276, + "step": 1086 + }, + { + "ce_ib": 7.68958854675293, + "ce_orig": 1.3556071519851685, + "epoch": 0.31231576676971745, + "kl_loss": 0.11010056734085083, + "loss_ib": 0.001869964529760182, + "step": 1086 + }, + { + "ce_ib": 4.1490583419799805, + "ce_orig": 0.5750277638435364, + "epoch": 0.3126033503486951, + "kl_loss": 0.07980804145336151, + "loss_ib": 0.0012129861861467361, + "step": 1087 + }, + { + "ce_ib": 4.900514125823975, + "ce_orig": 0.6933945417404175, + "epoch": 0.3126033503486951, + "kl_loss": 0.11946623772382736, + "loss_ib": 0.0016847137594595551, + "step": 1087 + }, + { + "ce_ib": 8.583094596862793, + "ce_orig": 1.2328472137451172, + "epoch": 0.3126033503486951, + "kl_loss": 0.23440392315387726, + "loss_ib": 0.003202348481863737, + "step": 1087 + }, + { + "ce_ib": 5.387114524841309, + "ce_orig": 0.8373020887374878, + "epoch": 0.3126033503486951, + "kl_loss": 0.05764302611351013, + "loss_ib": 0.0011151416692882776, + "step": 1087 + }, + { + "ce_ib": 5.245874881744385, + "ce_orig": 0.7845153212547302, + "epoch": 0.31289093392767275, + "kl_loss": 0.13156041502952576, + "loss_ib": 0.001840191544033587, + "step": 1088 + }, + { + "ce_ib": 6.439451217651367, + "ce_orig": 1.0369670391082764, + "epoch": 0.31289093392767275, + "kl_loss": 0.148685485124588, + "loss_ib": 0.002130799926817417, + "step": 1088 + }, + { + "ce_ib": 9.124055862426758, + "ce_orig": 1.409205675125122, + "epoch": 0.31289093392767275, + "kl_loss": 0.20957261323928833, + "loss_ib": 0.0030081316363066435, + "step": 1088 + }, + { + "ce_ib": 7.134158611297607, + "ce_orig": 0.9640491008758545, + "epoch": 0.31289093392767275, + "kl_loss": 0.11817365884780884, + "loss_ib": 0.0018951522652059793, + "step": 1088 + }, + { + "ce_ib": 8.807918548583984, + "ce_orig": 1.233889102935791, + "epoch": 0.3131785175066504, + "kl_loss": 0.12623171508312225, + "loss_ib": 0.0021431089844554663, + "step": 1089 + }, + { + "ce_ib": 4.509085178375244, + "ce_orig": 0.8164548277854919, + "epoch": 0.3131785175066504, + "kl_loss": 0.08705399185419083, + "loss_ib": 0.0013214483624324203, + "step": 1089 + }, + { + "ce_ib": 6.4740095138549805, + "ce_orig": 0.8254353404045105, + "epoch": 0.3131785175066504, + "kl_loss": 0.11919526755809784, + "loss_ib": 0.0018393535865470767, + "step": 1089 + }, + { + "ce_ib": 8.610673904418945, + "ce_orig": 0.8705013990402222, + "epoch": 0.3131785175066504, + "kl_loss": 0.18129542469978333, + "loss_ib": 0.0026740217581391335, + "step": 1089 + }, + { + "epoch": 0.313466101085628, + "grad_norm": 0.09719450771808624, + "learning_rate": 4.9289177234948535e-05, + "loss": 0.9306, + "step": 1090 + }, + { + "ce_ib": 6.462231159210205, + "ce_orig": 0.8928760290145874, + "epoch": 0.313466101085628, + "kl_loss": 0.14842715859413147, + "loss_ib": 0.002130494685843587, + "step": 1090 + }, + { + "ce_ib": 4.3265485763549805, + "ce_orig": 0.44724979996681213, + "epoch": 0.313466101085628, + "kl_loss": 0.22264862060546875, + "loss_ib": 0.0026591410860419273, + "step": 1090 + }, + { + "ce_ib": 7.629527568817139, + "ce_orig": 0.6570071578025818, + "epoch": 0.313466101085628, + "kl_loss": 0.09228412061929703, + "loss_ib": 0.0016857939772307873, + "step": 1090 + }, + { + "ce_ib": 4.116368770599365, + "ce_orig": 0.7895284295082092, + "epoch": 0.313466101085628, + "kl_loss": 0.0641932412981987, + "loss_ib": 0.0010535692563280463, + "step": 1090 + }, + { + "ce_ib": 5.082763671875, + "ce_orig": 0.6230810284614563, + "epoch": 0.3137536846646056, + "kl_loss": 0.09477473795413971, + "loss_ib": 0.001456023775972426, + "step": 1091 + }, + { + "ce_ib": 4.967955589294434, + "ce_orig": 0.4889741539955139, + "epoch": 0.3137536846646056, + "kl_loss": 0.1820719838142395, + "loss_ib": 0.0023175152018666267, + "step": 1091 + }, + { + "ce_ib": 7.032950401306152, + "ce_orig": 0.8132173418998718, + "epoch": 0.3137536846646056, + "kl_loss": 0.10906738042831421, + "loss_ib": 0.0017939688405022025, + "step": 1091 + }, + { + "ce_ib": 2.5505075454711914, + "ce_orig": 0.4912968873977661, + "epoch": 0.3137536846646056, + "kl_loss": 0.05822046473622322, + "loss_ib": 0.0008372553857043386, + "step": 1091 + }, + { + "ce_ib": 6.664619445800781, + "ce_orig": 0.940836489200592, + "epoch": 0.3140412682435833, + "kl_loss": 0.09742730855941772, + "loss_ib": 0.0016407349612563848, + "step": 1092 + }, + { + "ce_ib": 8.534614562988281, + "ce_orig": 1.0564587116241455, + "epoch": 0.3140412682435833, + "kl_loss": 0.19022688269615173, + "loss_ib": 0.002755730180069804, + "step": 1092 + }, + { + "ce_ib": 9.131776809692383, + "ce_orig": 1.1223782300949097, + "epoch": 0.3140412682435833, + "kl_loss": 0.12838055193424225, + "loss_ib": 0.002196982968598604, + "step": 1092 + }, + { + "ce_ib": 7.103231430053711, + "ce_orig": 1.214754581451416, + "epoch": 0.3140412682435833, + "kl_loss": 0.2429865449666977, + "loss_ib": 0.0031401882879436016, + "step": 1092 + }, + { + "ce_ib": 4.4372429847717285, + "ce_orig": 0.611566424369812, + "epoch": 0.31432885182256093, + "kl_loss": 0.10111133754253387, + "loss_ib": 0.0014548376202583313, + "step": 1093 + }, + { + "ce_ib": 6.007613182067871, + "ce_orig": 0.5547209978103638, + "epoch": 0.31432885182256093, + "kl_loss": 0.16770076751708984, + "loss_ib": 0.0022777689155191183, + "step": 1093 + }, + { + "ce_ib": 6.475616455078125, + "ce_orig": 0.9302380681037903, + "epoch": 0.31432885182256093, + "kl_loss": 0.10777818411588669, + "loss_ib": 0.0017253434052690864, + "step": 1093 + }, + { + "ce_ib": 6.58966064453125, + "ce_orig": 0.8381888270378113, + "epoch": 0.31432885182256093, + "kl_loss": 0.4541102647781372, + "loss_ib": 0.005200068932026625, + "step": 1093 + }, + { + "ce_ib": 4.925845146179199, + "ce_orig": 0.7416554689407349, + "epoch": 0.31461643540153855, + "kl_loss": 0.1399184912443161, + "loss_ib": 0.0018917694687843323, + "step": 1094 + }, + { + "ce_ib": 7.991376876831055, + "ce_orig": 0.6140057444572449, + "epoch": 0.31461643540153855, + "kl_loss": 0.11923287063837051, + "loss_ib": 0.0019914661534130573, + "step": 1094 + }, + { + "ce_ib": 7.087644100189209, + "ce_orig": 0.7012661695480347, + "epoch": 0.31461643540153855, + "kl_loss": 0.10533401370048523, + "loss_ib": 0.0017621045699343085, + "step": 1094 + }, + { + "ce_ib": 5.176535129547119, + "ce_orig": 0.5360772609710693, + "epoch": 0.31461643540153855, + "kl_loss": 0.10969047248363495, + "loss_ib": 0.0016145581612363458, + "step": 1094 + }, + { + "epoch": 0.31490401898051623, + "grad_norm": 0.08524588495492935, + "learning_rate": 4.927996051206454e-05, + "loss": 0.7879, + "step": 1095 + }, + { + "ce_ib": 7.138000965118408, + "ce_orig": 0.6079102158546448, + "epoch": 0.31490401898051623, + "kl_loss": 0.1486617475748062, + "loss_ib": 0.0022004174534231424, + "step": 1095 + }, + { + "ce_ib": 4.854630947113037, + "ce_orig": 0.11275182664394379, + "epoch": 0.31490401898051623, + "kl_loss": 0.1564667969942093, + "loss_ib": 0.0020501308608800173, + "step": 1095 + }, + { + "ce_ib": 5.65756368637085, + "ce_orig": 0.7183927893638611, + "epoch": 0.31490401898051623, + "kl_loss": 0.09430578351020813, + "loss_ib": 0.0015088141662999988, + "step": 1095 + }, + { + "ce_ib": 8.676382064819336, + "ce_orig": 0.9703883528709412, + "epoch": 0.31490401898051623, + "kl_loss": 0.11354076862335205, + "loss_ib": 0.0020030459854751825, + "step": 1095 + }, + { + "ce_ib": 9.102498054504395, + "ce_orig": 1.273013710975647, + "epoch": 0.31519160255949386, + "kl_loss": 0.1675441563129425, + "loss_ib": 0.0025856911670416594, + "step": 1096 + }, + { + "ce_ib": 5.018817901611328, + "ce_orig": 0.8406528234481812, + "epoch": 0.31519160255949386, + "kl_loss": 0.09821783006191254, + "loss_ib": 0.0014840599615126848, + "step": 1096 + }, + { + "ce_ib": 6.319728374481201, + "ce_orig": 0.7162959575653076, + "epoch": 0.31519160255949386, + "kl_loss": 0.08511405438184738, + "loss_ib": 0.00148311338853091, + "step": 1096 + }, + { + "ce_ib": 6.150030612945557, + "ce_orig": 0.9151242971420288, + "epoch": 0.31519160255949386, + "kl_loss": 0.15213200449943542, + "loss_ib": 0.0021363231353461742, + "step": 1096 + }, + { + "ce_ib": 2.826596975326538, + "ce_orig": 0.5897778868675232, + "epoch": 0.3154791861384715, + "kl_loss": 0.05788389965891838, + "loss_ib": 0.000861498701851815, + "step": 1097 + }, + { + "ce_ib": 6.561267852783203, + "ce_orig": 0.6017415523529053, + "epoch": 0.3154791861384715, + "kl_loss": 0.19087156653404236, + "loss_ib": 0.0025648423470556736, + "step": 1097 + }, + { + "ce_ib": 9.689705848693848, + "ce_orig": 0.8445053100585938, + "epoch": 0.3154791861384715, + "kl_loss": 0.12390824407339096, + "loss_ib": 0.0022080529015511274, + "step": 1097 + }, + { + "ce_ib": 3.773801803588867, + "ce_orig": 0.5975720286369324, + "epoch": 0.3154791861384715, + "kl_loss": 0.07443420588970184, + "loss_ib": 0.001121722161769867, + "step": 1097 + }, + { + "ce_ib": 7.749790668487549, + "ce_orig": 1.0078861713409424, + "epoch": 0.31576676971744916, + "kl_loss": 0.11658213287591934, + "loss_ib": 0.001940800342708826, + "step": 1098 + }, + { + "ce_ib": 7.260580539703369, + "ce_orig": 0.6796541213989258, + "epoch": 0.31576676971744916, + "kl_loss": 0.0810522586107254, + "loss_ib": 0.0015365806175395846, + "step": 1098 + }, + { + "ce_ib": 5.737454414367676, + "ce_orig": 0.9053107500076294, + "epoch": 0.31576676971744916, + "kl_loss": 0.12374285608530045, + "loss_ib": 0.001811173977330327, + "step": 1098 + }, + { + "ce_ib": 7.831982612609863, + "ce_orig": 1.2352817058563232, + "epoch": 0.31576676971744916, + "kl_loss": 0.1046704649925232, + "loss_ib": 0.0018299027578905225, + "step": 1098 + }, + { + "ce_ib": 4.12555456161499, + "ce_orig": 0.6692237854003906, + "epoch": 0.3160543532964268, + "kl_loss": 0.07782041281461716, + "loss_ib": 0.0011907594744116068, + "step": 1099 + }, + { + "ce_ib": 3.2291836738586426, + "ce_orig": 0.5272323489189148, + "epoch": 0.3160543532964268, + "kl_loss": 0.08906276524066925, + "loss_ib": 0.0012135460274294019, + "step": 1099 + }, + { + "ce_ib": 4.781414031982422, + "ce_orig": 0.6674253344535828, + "epoch": 0.3160543532964268, + "kl_loss": 0.11731104552745819, + "loss_ib": 0.0016512519214302301, + "step": 1099 + }, + { + "ce_ib": 4.504537582397461, + "ce_orig": 0.6338003277778625, + "epoch": 0.3160543532964268, + "kl_loss": 0.08345992863178253, + "loss_ib": 0.0012850529747083783, + "step": 1099 + }, + { + "epoch": 0.3163419368754044, + "grad_norm": 0.10443267971277237, + "learning_rate": 4.9270685293120164e-05, + "loss": 0.823, + "step": 1100 + }, + { + "ce_ib": 2.7047183513641357, + "ce_orig": 0.5154035091400146, + "epoch": 0.3163419368754044, + "kl_loss": 0.07203371077775955, + "loss_ib": 0.0009908088250085711, + "step": 1100 + }, + { + "ce_ib": 8.287751197814941, + "ce_orig": 1.131018877029419, + "epoch": 0.3163419368754044, + "kl_loss": 0.10799284279346466, + "loss_ib": 0.0019087033579126, + "step": 1100 + }, + { + "ce_ib": 9.141654968261719, + "ce_orig": 1.1767055988311768, + "epoch": 0.3163419368754044, + "kl_loss": 0.14867722988128662, + "loss_ib": 0.0024009377229958773, + "step": 1100 + }, + { + "ce_ib": 4.616094589233398, + "ce_orig": 0.7783187627792358, + "epoch": 0.3163419368754044, + "kl_loss": 0.07596094906330109, + "loss_ib": 0.0012212188448756933, + "step": 1100 + }, + { + "ce_ib": 2.9336156845092773, + "ce_orig": 0.1434953659772873, + "epoch": 0.31662952045438203, + "kl_loss": 0.3486359715461731, + "loss_ib": 0.003779721213504672, + "step": 1101 + }, + { + "ce_ib": 7.112979888916016, + "ce_orig": 0.8758820295333862, + "epoch": 0.31662952045438203, + "kl_loss": 0.06863940507173538, + "loss_ib": 0.0013976918999105692, + "step": 1101 + }, + { + "ce_ib": 9.88776969909668, + "ce_orig": 1.4645624160766602, + "epoch": 0.31662952045438203, + "kl_loss": 0.328406423330307, + "loss_ib": 0.004272840917110443, + "step": 1101 + }, + { + "ce_ib": 4.820501804351807, + "ce_orig": 0.47723662853240967, + "epoch": 0.31662952045438203, + "kl_loss": 0.09538324177265167, + "loss_ib": 0.0014358825283125043, + "step": 1101 + }, + { + "ce_ib": 5.636781215667725, + "ce_orig": 0.602377712726593, + "epoch": 0.3169171040333597, + "kl_loss": 0.11122557520866394, + "loss_ib": 0.0016759338323026896, + "step": 1102 + }, + { + "ce_ib": 6.5077972412109375, + "ce_orig": 0.5252118110656738, + "epoch": 0.3169171040333597, + "kl_loss": 0.19280904531478882, + "loss_ib": 0.002578869927674532, + "step": 1102 + }, + { + "ce_ib": 4.5214409828186035, + "ce_orig": 0.8415386080741882, + "epoch": 0.3169171040333597, + "kl_loss": 0.06283672153949738, + "loss_ib": 0.0010805112542584538, + "step": 1102 + }, + { + "ce_ib": 4.760388374328613, + "ce_orig": 0.5159482955932617, + "epoch": 0.3169171040333597, + "kl_loss": 0.13038045167922974, + "loss_ib": 0.001779843238182366, + "step": 1102 + }, + { + "ce_ib": 8.62554931640625, + "ce_orig": 1.2505888938903809, + "epoch": 0.31720468761233733, + "kl_loss": 0.12411917746067047, + "loss_ib": 0.002103746635839343, + "step": 1103 + }, + { + "ce_ib": 6.545633792877197, + "ce_orig": 1.0744396448135376, + "epoch": 0.31720468761233733, + "kl_loss": 0.1949809491634369, + "loss_ib": 0.002604372799396515, + "step": 1103 + }, + { + "ce_ib": 5.3097310066223145, + "ce_orig": 0.7355318069458008, + "epoch": 0.31720468761233733, + "kl_loss": 0.13482140004634857, + "loss_ib": 0.0018791870679706335, + "step": 1103 + }, + { + "ce_ib": 7.22735595703125, + "ce_orig": 0.7363559007644653, + "epoch": 0.31720468761233733, + "kl_loss": 0.13793884217739105, + "loss_ib": 0.0021021240390837193, + "step": 1103 + }, + { + "ce_ib": 3.681156873703003, + "ce_orig": 0.7018173336982727, + "epoch": 0.31749227119131496, + "kl_loss": 0.06493549793958664, + "loss_ib": 0.0010174706112593412, + "step": 1104 + }, + { + "ce_ib": 6.435656547546387, + "ce_orig": 1.0511516332626343, + "epoch": 0.31749227119131496, + "kl_loss": 0.12981092929840088, + "loss_ib": 0.00194167485460639, + "step": 1104 + }, + { + "ce_ib": 6.3591718673706055, + "ce_orig": 0.8714869022369385, + "epoch": 0.31749227119131496, + "kl_loss": 0.07830870151519775, + "loss_ib": 0.001419004169292748, + "step": 1104 + }, + { + "ce_ib": 4.863466262817383, + "ce_orig": 0.524034857749939, + "epoch": 0.31749227119131496, + "kl_loss": 0.09495489299297333, + "loss_ib": 0.001435895566828549, + "step": 1104 + }, + { + "epoch": 0.31777985477029264, + "grad_norm": 0.11291716247797012, + "learning_rate": 4.926135160046157e-05, + "loss": 0.8209, + "step": 1105 + }, + { + "ce_ib": 5.778404235839844, + "ce_orig": 0.6216086745262146, + "epoch": 0.31777985477029264, + "kl_loss": 0.13615110516548157, + "loss_ib": 0.0019393513211980462, + "step": 1105 + }, + { + "ce_ib": 8.00284481048584, + "ce_orig": 1.2720705270767212, + "epoch": 0.31777985477029264, + "kl_loss": 0.1311652958393097, + "loss_ib": 0.0021119373850524426, + "step": 1105 + }, + { + "ce_ib": 5.706634998321533, + "ce_orig": 0.5876952409744263, + "epoch": 0.31777985477029264, + "kl_loss": 0.12042839080095291, + "loss_ib": 0.0017749472754076123, + "step": 1105 + }, + { + "ce_ib": 4.406344890594482, + "ce_orig": 0.6132227778434753, + "epoch": 0.31777985477029264, + "kl_loss": 0.0869908332824707, + "loss_ib": 0.0013105428079143167, + "step": 1105 + }, + { + "ce_ib": 5.951411247253418, + "ce_orig": 0.6965615153312683, + "epoch": 0.31806743834927026, + "kl_loss": 0.0936947911977768, + "loss_ib": 0.0015320890815928578, + "step": 1106 + }, + { + "ce_ib": 4.328317165374756, + "ce_orig": 0.48852014541625977, + "epoch": 0.31806743834927026, + "kl_loss": 0.11511750519275665, + "loss_ib": 0.001584006822668016, + "step": 1106 + }, + { + "ce_ib": 5.561557769775391, + "ce_orig": 0.8280245661735535, + "epoch": 0.31806743834927026, + "kl_loss": 0.13165396451950073, + "loss_ib": 0.0018726954003795981, + "step": 1106 + }, + { + "ce_ib": 9.187907218933105, + "ce_orig": 1.1692800521850586, + "epoch": 0.31806743834927026, + "kl_loss": 0.1220262423157692, + "loss_ib": 0.002139053074643016, + "step": 1106 + }, + { + "ce_ib": 6.820673942565918, + "ce_orig": 0.9661232829093933, + "epoch": 0.3183550219282479, + "kl_loss": 0.08730831742286682, + "loss_ib": 0.0015551503747701645, + "step": 1107 + }, + { + "ce_ib": 6.703719615936279, + "ce_orig": 0.6939508318901062, + "epoch": 0.3183550219282479, + "kl_loss": 0.21232838928699493, + "loss_ib": 0.002793655963614583, + "step": 1107 + }, + { + "ce_ib": 9.952964782714844, + "ce_orig": 1.418191909790039, + "epoch": 0.3183550219282479, + "kl_loss": 0.10500174760818481, + "loss_ib": 0.002045314060524106, + "step": 1107 + }, + { + "ce_ib": 4.250537395477295, + "ce_orig": 0.5294516086578369, + "epoch": 0.3183550219282479, + "kl_loss": 0.09490614384412766, + "loss_ib": 0.0013741151196882129, + "step": 1107 + }, + { + "ce_ib": 2.295755624771118, + "ce_orig": 0.15018223226070404, + "epoch": 0.31864260550722556, + "kl_loss": 0.15408454835414886, + "loss_ib": 0.0017704209312796593, + "step": 1108 + }, + { + "ce_ib": 6.062755584716797, + "ce_orig": 0.8133069276809692, + "epoch": 0.31864260550722556, + "kl_loss": 0.13426713645458221, + "loss_ib": 0.0019489468540996313, + "step": 1108 + }, + { + "ce_ib": 5.790729999542236, + "ce_orig": 0.8639890551567078, + "epoch": 0.31864260550722556, + "kl_loss": 0.1315470188856125, + "loss_ib": 0.0018945431802421808, + "step": 1108 + }, + { + "ce_ib": 5.948178291320801, + "ce_orig": 1.021532654762268, + "epoch": 0.31864260550722556, + "kl_loss": 0.09913386404514313, + "loss_ib": 0.0015861564315855503, + "step": 1108 + }, + { + "ce_ib": 4.356767177581787, + "ce_orig": 0.6611163020133972, + "epoch": 0.3189301890862032, + "kl_loss": 0.12729594111442566, + "loss_ib": 0.0017086360603570938, + "step": 1109 + }, + { + "ce_ib": 5.728999137878418, + "ce_orig": 0.9258163571357727, + "epoch": 0.3189301890862032, + "kl_loss": 0.10851135849952698, + "loss_ib": 0.0016580134397372603, + "step": 1109 + }, + { + "ce_ib": 6.743879318237305, + "ce_orig": 0.4497535824775696, + "epoch": 0.3189301890862032, + "kl_loss": 0.1591167449951172, + "loss_ib": 0.0022655553184449673, + "step": 1109 + }, + { + "ce_ib": 9.192666053771973, + "ce_orig": 1.3244154453277588, + "epoch": 0.3189301890862032, + "kl_loss": 0.07153521478176117, + "loss_ib": 0.0016346186166629195, + "step": 1109 + }, + { + "epoch": 0.3192177726651808, + "grad_norm": 0.09862250834703445, + "learning_rate": 4.92519594565758e-05, + "loss": 0.7799, + "step": 1110 + }, + { + "ce_ib": 6.488005638122559, + "ce_orig": 0.6318284869194031, + "epoch": 0.3192177726651808, + "kl_loss": 0.10362868010997772, + "loss_ib": 0.0016850873362272978, + "step": 1110 + }, + { + "ce_ib": 7.06200647354126, + "ce_orig": 1.099223256111145, + "epoch": 0.3192177726651808, + "kl_loss": 0.15589573979377747, + "loss_ib": 0.00226515787653625, + "step": 1110 + }, + { + "ce_ib": 6.631660461425781, + "ce_orig": 1.0255488157272339, + "epoch": 0.3192177726651808, + "kl_loss": 0.08422824740409851, + "loss_ib": 0.0015054484829306602, + "step": 1110 + }, + { + "ce_ib": 4.097757816314697, + "ce_orig": 0.6223424077033997, + "epoch": 0.3192177726651808, + "kl_loss": 0.11507381498813629, + "loss_ib": 0.0015605139778926969, + "step": 1110 + }, + { + "ce_ib": 7.600182056427002, + "ce_orig": 1.1621298789978027, + "epoch": 0.31950535624415843, + "kl_loss": 0.1489579677581787, + "loss_ib": 0.002249597804620862, + "step": 1111 + }, + { + "ce_ib": 4.432876110076904, + "ce_orig": 0.43630632758140564, + "epoch": 0.31950535624415843, + "kl_loss": 0.22790104150772095, + "loss_ib": 0.0027222977951169014, + "step": 1111 + }, + { + "ce_ib": 4.863165855407715, + "ce_orig": 0.8000689148902893, + "epoch": 0.31950535624415843, + "kl_loss": 0.043151505291461945, + "loss_ib": 0.0009178316104225814, + "step": 1111 + }, + { + "ce_ib": 4.82898473739624, + "ce_orig": 0.652249813079834, + "epoch": 0.31950535624415843, + "kl_loss": 0.10808897018432617, + "loss_ib": 0.0015637881588190794, + "step": 1111 + }, + { + "ce_ib": 5.253769874572754, + "ce_orig": 0.9016509652137756, + "epoch": 0.3197929398231361, + "kl_loss": 0.09644834697246552, + "loss_ib": 0.0014898603549227118, + "step": 1112 + }, + { + "ce_ib": 7.7812886238098145, + "ce_orig": 0.9018441438674927, + "epoch": 0.3197929398231361, + "kl_loss": 0.14024998247623444, + "loss_ib": 0.0021806287113577127, + "step": 1112 + }, + { + "ce_ib": 4.902985095977783, + "ce_orig": 0.6490185856819153, + "epoch": 0.3197929398231361, + "kl_loss": 0.08993034064769745, + "loss_ib": 0.00138960184995085, + "step": 1112 + }, + { + "ce_ib": 4.417405128479004, + "ce_orig": 0.5911821126937866, + "epoch": 0.3197929398231361, + "kl_loss": 0.09331687539815903, + "loss_ib": 0.0013749093050137162, + "step": 1112 + }, + { + "ce_ib": 7.669703483581543, + "ce_orig": 1.1216349601745605, + "epoch": 0.32008052340211374, + "kl_loss": 0.08051162958145142, + "loss_ib": 0.0015720865922048688, + "step": 1113 + }, + { + "ce_ib": 4.903026103973389, + "ce_orig": 0.6339471936225891, + "epoch": 0.32008052340211374, + "kl_loss": 0.05427828058600426, + "loss_ib": 0.001033085398375988, + "step": 1113 + }, + { + "ce_ib": 7.180636405944824, + "ce_orig": 0.9941835403442383, + "epoch": 0.32008052340211374, + "kl_loss": 0.12049823254346848, + "loss_ib": 0.001923045958392322, + "step": 1113 + }, + { + "ce_ib": 8.455522537231445, + "ce_orig": 1.3635344505310059, + "epoch": 0.32008052340211374, + "kl_loss": 0.14797498285770416, + "loss_ib": 0.0023253019899129868, + "step": 1113 + }, + { + "ce_ib": 5.931006908416748, + "ce_orig": 0.658078134059906, + "epoch": 0.32036810698109136, + "kl_loss": 0.08447106182575226, + "loss_ib": 0.0014378111809492111, + "step": 1114 + }, + { + "ce_ib": 7.390595436096191, + "ce_orig": 0.950509250164032, + "epoch": 0.32036810698109136, + "kl_loss": 0.11157698929309845, + "loss_ib": 0.001854829490184784, + "step": 1114 + }, + { + "ce_ib": 5.800291061401367, + "ce_orig": 0.9057431817054749, + "epoch": 0.32036810698109136, + "kl_loss": 0.13156373798847198, + "loss_ib": 0.001895666355267167, + "step": 1114 + }, + { + "ce_ib": 4.867544174194336, + "ce_orig": 0.4064299762248993, + "epoch": 0.32036810698109136, + "kl_loss": 0.10979843884706497, + "loss_ib": 0.001584738725796342, + "step": 1114 + }, + { + "epoch": 0.32065569056006904, + "grad_norm": 0.09395995736122131, + "learning_rate": 4.924250888409069e-05, + "loss": 0.8017, + "step": 1115 + }, + { + "ce_ib": 8.88463020324707, + "ce_orig": 1.196564793586731, + "epoch": 0.32065569056006904, + "kl_loss": 0.14164546132087708, + "loss_ib": 0.002304917434230447, + "step": 1115 + }, + { + "ce_ib": 6.908357620239258, + "ce_orig": 1.1473089456558228, + "epoch": 0.32065569056006904, + "kl_loss": 0.12058570235967636, + "loss_ib": 0.0018966927891597152, + "step": 1115 + }, + { + "ce_ib": 6.580382347106934, + "ce_orig": 0.7484446167945862, + "epoch": 0.32065569056006904, + "kl_loss": 0.07694339752197266, + "loss_ib": 0.0014274722198024392, + "step": 1115 + }, + { + "ce_ib": 6.1991047859191895, + "ce_orig": 0.7058902382850647, + "epoch": 0.32065569056006904, + "kl_loss": 0.13657167553901672, + "loss_ib": 0.0019856272265315056, + "step": 1115 + }, + { + "ce_ib": 3.3146166801452637, + "ce_orig": 0.3218761086463928, + "epoch": 0.32094327413904666, + "kl_loss": 0.1967342048883438, + "loss_ib": 0.00229880353435874, + "step": 1116 + }, + { + "ce_ib": 7.196146011352539, + "ce_orig": 0.8870516419410706, + "epoch": 0.32094327413904666, + "kl_loss": 0.2019130289554596, + "loss_ib": 0.0027387449517846107, + "step": 1116 + }, + { + "ce_ib": 2.6767497062683105, + "ce_orig": 0.29367595911026, + "epoch": 0.32094327413904666, + "kl_loss": 0.19664621353149414, + "loss_ib": 0.0022341369185596704, + "step": 1116 + }, + { + "ce_ib": 7.518855571746826, + "ce_orig": 1.0005066394805908, + "epoch": 0.32094327413904666, + "kl_loss": 0.12175123393535614, + "loss_ib": 0.0019693979993462563, + "step": 1116 + }, + { + "ce_ib": 9.178970336914062, + "ce_orig": 1.2377865314483643, + "epoch": 0.3212308577180243, + "kl_loss": 0.1200728565454483, + "loss_ib": 0.0021186256781220436, + "step": 1117 + }, + { + "ce_ib": 8.34536361694336, + "ce_orig": 0.9707791209220886, + "epoch": 0.3212308577180243, + "kl_loss": 0.10708945989608765, + "loss_ib": 0.001905430806800723, + "step": 1117 + }, + { + "ce_ib": 10.378780364990234, + "ce_orig": 1.690807819366455, + "epoch": 0.3212308577180243, + "kl_loss": 0.13962461054325104, + "loss_ib": 0.002434124005958438, + "step": 1117 + }, + { + "ce_ib": 6.600798606872559, + "ce_orig": 0.7438252568244934, + "epoch": 0.3212308577180243, + "kl_loss": 0.10462431609630585, + "loss_ib": 0.0017063230043277144, + "step": 1117 + }, + { + "ce_ib": 4.960424423217773, + "ce_orig": 0.6897417306900024, + "epoch": 0.32151844129700197, + "kl_loss": 0.1169540211558342, + "loss_ib": 0.0016655826475471258, + "step": 1118 + }, + { + "ce_ib": 6.60028600692749, + "ce_orig": 0.9910760521888733, + "epoch": 0.32151844129700197, + "kl_loss": 0.1475672870874405, + "loss_ib": 0.0021357014775276184, + "step": 1118 + }, + { + "ce_ib": 3.456740617752075, + "ce_orig": 0.5141904950141907, + "epoch": 0.32151844129700197, + "kl_loss": 0.09396034479141235, + "loss_ib": 0.0012852774234488606, + "step": 1118 + }, + { + "ce_ib": 6.369697570800781, + "ce_orig": 0.9031816124916077, + "epoch": 0.32151844129700197, + "kl_loss": 0.1125561073422432, + "loss_ib": 0.001762530766427517, + "step": 1118 + }, + { + "ce_ib": 3.408698320388794, + "ce_orig": 0.41078513860702515, + "epoch": 0.3218060248759796, + "kl_loss": 0.10214546322822571, + "loss_ib": 0.001362324459478259, + "step": 1119 + }, + { + "ce_ib": 5.6393585205078125, + "ce_orig": 0.5337005853652954, + "epoch": 0.3218060248759796, + "kl_loss": 0.07566121220588684, + "loss_ib": 0.001320547889918089, + "step": 1119 + }, + { + "ce_ib": 6.25368595123291, + "ce_orig": 0.8251882791519165, + "epoch": 0.3218060248759796, + "kl_loss": 0.14441141486167908, + "loss_ib": 0.0020694828126579523, + "step": 1119 + }, + { + "ce_ib": 4.823451042175293, + "ce_orig": 0.5779725313186646, + "epoch": 0.3218060248759796, + "kl_loss": 0.09135837107896805, + "loss_ib": 0.0013959287898615003, + "step": 1119 + }, + { + "epoch": 0.3220936084549572, + "grad_norm": 0.08428891748189926, + "learning_rate": 4.923299990577488e-05, + "loss": 0.8434, + "step": 1120 + }, + { + "ce_ib": 5.67487907409668, + "ce_orig": 0.8123293519020081, + "epoch": 0.3220936084549572, + "kl_loss": 0.08672993630170822, + "loss_ib": 0.0014347871765494347, + "step": 1120 + }, + { + "ce_ib": 7.47396183013916, + "ce_orig": 0.6569360494613647, + "epoch": 0.3220936084549572, + "kl_loss": 0.15070360898971558, + "loss_ib": 0.002254432300105691, + "step": 1120 + }, + { + "ce_ib": 6.670865058898926, + "ce_orig": 0.4752155840396881, + "epoch": 0.3220936084549572, + "kl_loss": 0.18852224946022034, + "loss_ib": 0.002552309073507786, + "step": 1120 + }, + { + "ce_ib": 4.767125129699707, + "ce_orig": 0.691856861114502, + "epoch": 0.3220936084549572, + "kl_loss": 0.08532582223415375, + "loss_ib": 0.0013299706624820828, + "step": 1120 + }, + { + "ce_ib": 2.6434950828552246, + "ce_orig": 0.3332846462726593, + "epoch": 0.32238119203393484, + "kl_loss": 0.08305683732032776, + "loss_ib": 0.0010949178831651807, + "step": 1121 + }, + { + "ce_ib": 10.54102897644043, + "ce_orig": 1.667731523513794, + "epoch": 0.32238119203393484, + "kl_loss": 0.13803747296333313, + "loss_ib": 0.0024344774428755045, + "step": 1121 + }, + { + "ce_ib": 6.683977127075195, + "ce_orig": 0.6325282454490662, + "epoch": 0.32238119203393484, + "kl_loss": 0.12083125114440918, + "loss_ib": 0.0018767102155834436, + "step": 1121 + }, + { + "ce_ib": 9.683277130126953, + "ce_orig": 1.1499894857406616, + "epoch": 0.32238119203393484, + "kl_loss": 0.15769684314727783, + "loss_ib": 0.0025452959816902876, + "step": 1121 + }, + { + "ce_ib": 9.049882888793945, + "ce_orig": 1.3263394832611084, + "epoch": 0.3226687756129125, + "kl_loss": 0.10114380717277527, + "loss_ib": 0.001916426350362599, + "step": 1122 + }, + { + "ce_ib": 5.353783130645752, + "ce_orig": 0.6200342178344727, + "epoch": 0.3226687756129125, + "kl_loss": 0.07643938064575195, + "loss_ib": 0.0012997720623388886, + "step": 1122 + }, + { + "ce_ib": 6.108999729156494, + "ce_orig": 0.7126256227493286, + "epoch": 0.3226687756129125, + "kl_loss": 0.12426118552684784, + "loss_ib": 0.0018535117851570249, + "step": 1122 + }, + { + "ce_ib": 8.723121643066406, + "ce_orig": 1.3226583003997803, + "epoch": 0.3226687756129125, + "kl_loss": 0.14628058671951294, + "loss_ib": 0.0023351178970187902, + "step": 1122 + }, + { + "ce_ib": 9.964115142822266, + "ce_orig": 1.3997361660003662, + "epoch": 0.32295635919189014, + "kl_loss": 0.14625918865203857, + "loss_ib": 0.002459003357216716, + "step": 1123 + }, + { + "ce_ib": 4.798125267028809, + "ce_orig": 0.5897971391677856, + "epoch": 0.32295635919189014, + "kl_loss": 0.12656289339065552, + "loss_ib": 0.0017454413464292884, + "step": 1123 + }, + { + "ce_ib": 5.80087947845459, + "ce_orig": 1.046662449836731, + "epoch": 0.32295635919189014, + "kl_loss": 0.11369995772838593, + "loss_ib": 0.0017170874634757638, + "step": 1123 + }, + { + "ce_ib": 10.707456588745117, + "ce_orig": 1.7433552742004395, + "epoch": 0.32295635919189014, + "kl_loss": 0.19035384058952332, + "loss_ib": 0.002974283881485462, + "step": 1123 + }, + { + "ce_ib": 9.244044303894043, + "ce_orig": 1.1507402658462524, + "epoch": 0.32324394277086776, + "kl_loss": 0.13072650134563446, + "loss_ib": 0.0022316693793982267, + "step": 1124 + }, + { + "ce_ib": 5.5080389976501465, + "ce_orig": 0.8370607495307922, + "epoch": 0.32324394277086776, + "kl_loss": 0.06126859784126282, + "loss_ib": 0.0011634897673502564, + "step": 1124 + }, + { + "ce_ib": 7.186861038208008, + "ce_orig": 0.6832764744758606, + "epoch": 0.32324394277086776, + "kl_loss": 0.1627086102962494, + "loss_ib": 0.0023457719944417477, + "step": 1124 + }, + { + "ce_ib": 8.068244934082031, + "ce_orig": 0.9853929877281189, + "epoch": 0.32324394277086776, + "kl_loss": 0.1222214549779892, + "loss_ib": 0.002029038965702057, + "step": 1124 + }, + { + "epoch": 0.32353152634984544, + "grad_norm": 0.09608148038387299, + "learning_rate": 4.922343254453768e-05, + "loss": 0.846, + "step": 1125 + }, + { + "ce_ib": 10.975310325622559, + "ce_orig": 1.636178731918335, + "epoch": 0.32353152634984544, + "kl_loss": 0.0799994021654129, + "loss_ib": 0.001897525042295456, + "step": 1125 + }, + { + "ce_ib": 7.978636264801025, + "ce_orig": 0.6996608972549438, + "epoch": 0.32353152634984544, + "kl_loss": 0.15940502285957336, + "loss_ib": 0.0023919136729091406, + "step": 1125 + }, + { + "ce_ib": 6.658046722412109, + "ce_orig": 0.9537755250930786, + "epoch": 0.32353152634984544, + "kl_loss": 0.10534384846687317, + "loss_ib": 0.001719243242405355, + "step": 1125 + }, + { + "ce_ib": 6.551901340484619, + "ce_orig": 0.853861391544342, + "epoch": 0.32353152634984544, + "kl_loss": 0.10188733041286469, + "loss_ib": 0.001674063503742218, + "step": 1125 + }, + { + "ce_ib": 9.186063766479492, + "ce_orig": 0.34158048033714294, + "epoch": 0.32381910992882307, + "kl_loss": 0.15255634486675262, + "loss_ib": 0.0024441697169095278, + "step": 1126 + }, + { + "ce_ib": 7.030545234680176, + "ce_orig": 1.3549855947494507, + "epoch": 0.32381910992882307, + "kl_loss": 0.07460334897041321, + "loss_ib": 0.0014490879839286208, + "step": 1126 + }, + { + "ce_ib": 4.667653560638428, + "ce_orig": 0.8390925526618958, + "epoch": 0.32381910992882307, + "kl_loss": 0.10421881079673767, + "loss_ib": 0.0015089533990249038, + "step": 1126 + }, + { + "ce_ib": 9.643956184387207, + "ce_orig": 0.7430564761161804, + "epoch": 0.32381910992882307, + "kl_loss": 0.13251788914203644, + "loss_ib": 0.002289574360474944, + "step": 1126 + }, + { + "ce_ib": 7.423088073730469, + "ce_orig": 1.0442339181900024, + "epoch": 0.3241066935078007, + "kl_loss": 0.10334056615829468, + "loss_ib": 0.0017757144523784518, + "step": 1127 + }, + { + "ce_ib": 5.813991546630859, + "ce_orig": 0.9771583676338196, + "epoch": 0.3241066935078007, + "kl_loss": 0.1183927059173584, + "loss_ib": 0.0017653262475505471, + "step": 1127 + }, + { + "ce_ib": 4.857069969177246, + "ce_orig": 0.7142473459243774, + "epoch": 0.3241066935078007, + "kl_loss": 0.20338529348373413, + "loss_ib": 0.0025195598136633635, + "step": 1127 + }, + { + "ce_ib": 3.728487730026245, + "ce_orig": 0.4864121973514557, + "epoch": 0.3241066935078007, + "kl_loss": 0.2202530950307846, + "loss_ib": 0.0025753795634955168, + "step": 1127 + }, + { + "ce_ib": 5.813749313354492, + "ce_orig": 0.6549258232116699, + "epoch": 0.32439427708677837, + "kl_loss": 0.17920701205730438, + "loss_ib": 0.0023734450805932283, + "step": 1128 + }, + { + "ce_ib": 4.604310512542725, + "ce_orig": 1.2016593217849731, + "epoch": 0.32439427708677837, + "kl_loss": 0.08224816620349884, + "loss_ib": 0.001282912795431912, + "step": 1128 + }, + { + "ce_ib": 5.9080634117126465, + "ce_orig": 0.9900830388069153, + "epoch": 0.32439427708677837, + "kl_loss": 0.09860847145318985, + "loss_ib": 0.0015768910525366664, + "step": 1128 + }, + { + "ce_ib": 7.773683071136475, + "ce_orig": 1.154776692390442, + "epoch": 0.32439427708677837, + "kl_loss": 0.093455970287323, + "loss_ib": 0.001711927936412394, + "step": 1128 + }, + { + "ce_ib": 7.422971248626709, + "ce_orig": 1.3916915655136108, + "epoch": 0.324681860665756, + "kl_loss": 0.09650696814060211, + "loss_ib": 0.001707366667687893, + "step": 1129 + }, + { + "ce_ib": 5.6086297035217285, + "ce_orig": 0.8838496804237366, + "epoch": 0.324681860665756, + "kl_loss": 0.13013264536857605, + "loss_ib": 0.0018621893832460046, + "step": 1129 + }, + { + "ce_ib": 9.813456535339355, + "ce_orig": 1.1540263891220093, + "epoch": 0.324681860665756, + "kl_loss": 0.11320735514163971, + "loss_ib": 0.0021134191192686558, + "step": 1129 + }, + { + "ce_ib": 7.494252681732178, + "ce_orig": 1.1366146802902222, + "epoch": 0.324681860665756, + "kl_loss": 0.10920100659132004, + "loss_ib": 0.0018414352089166641, + "step": 1129 + }, + { + "epoch": 0.3249694442447336, + "grad_norm": 0.1281149685382843, + "learning_rate": 4.921380682342912e-05, + "loss": 0.8778, + "step": 1130 + }, + { + "ce_ib": 7.405741214752197, + "ce_orig": 1.4356822967529297, + "epoch": 0.3249694442447336, + "kl_loss": 0.08208800852298737, + "loss_ib": 0.0015614541480317712, + "step": 1130 + }, + { + "ce_ib": 5.840486526489258, + "ce_orig": 0.8860843777656555, + "epoch": 0.3249694442447336, + "kl_loss": 0.10055000334978104, + "loss_ib": 0.0015895485412329435, + "step": 1130 + }, + { + "ce_ib": 5.952149391174316, + "ce_orig": 0.6938628554344177, + "epoch": 0.3249694442447336, + "kl_loss": 0.13012732565402985, + "loss_ib": 0.0018964881310239434, + "step": 1130 + }, + { + "ce_ib": 5.876269340515137, + "ce_orig": 0.7113330364227295, + "epoch": 0.3249694442447336, + "kl_loss": 0.13111966848373413, + "loss_ib": 0.0018988236552104354, + "step": 1130 + }, + { + "ce_ib": 7.067512512207031, + "ce_orig": 0.7368212342262268, + "epoch": 0.32525702782371124, + "kl_loss": 0.13936303555965424, + "loss_ib": 0.0021003815345466137, + "step": 1131 + }, + { + "ce_ib": 4.883144378662109, + "ce_orig": 0.7645682096481323, + "epoch": 0.32525702782371124, + "kl_loss": 0.11572670936584473, + "loss_ib": 0.0016455815639346838, + "step": 1131 + }, + { + "ce_ib": 12.358269691467285, + "ce_orig": 2.1583681106567383, + "epoch": 0.32525702782371124, + "kl_loss": 0.10350771248340607, + "loss_ib": 0.002270903903990984, + "step": 1131 + }, + { + "ce_ib": 5.750890254974365, + "ce_orig": 0.5138911604881287, + "epoch": 0.32525702782371124, + "kl_loss": 0.18141251802444458, + "loss_ib": 0.0023892142344266176, + "step": 1131 + }, + { + "ce_ib": 5.482202529907227, + "ce_orig": 0.4194678068161011, + "epoch": 0.3255446114026889, + "kl_loss": 0.16889557242393494, + "loss_ib": 0.0022371760569512844, + "step": 1132 + }, + { + "ce_ib": 7.464616298675537, + "ce_orig": 0.6531148552894592, + "epoch": 0.3255446114026889, + "kl_loss": 0.1452089548110962, + "loss_ib": 0.002198551082983613, + "step": 1132 + }, + { + "ce_ib": 7.846071720123291, + "ce_orig": 1.1021744012832642, + "epoch": 0.3255446114026889, + "kl_loss": 0.2762680947780609, + "loss_ib": 0.0035472880117595196, + "step": 1132 + }, + { + "ce_ib": 5.043225288391113, + "ce_orig": 0.9153444766998291, + "epoch": 0.3255446114026889, + "kl_loss": 0.05944227799773216, + "loss_ib": 0.0010987452697008848, + "step": 1132 + }, + { + "ce_ib": 4.470819473266602, + "ce_orig": 0.5241734981536865, + "epoch": 0.32583219498166655, + "kl_loss": 0.10007923096418381, + "loss_ib": 0.0014478742377832532, + "step": 1133 + }, + { + "ce_ib": 7.373484134674072, + "ce_orig": 0.9568267464637756, + "epoch": 0.32583219498166655, + "kl_loss": 0.11206928640604019, + "loss_ib": 0.0018580412724986672, + "step": 1133 + }, + { + "ce_ib": 4.963825702667236, + "ce_orig": 0.6650660037994385, + "epoch": 0.32583219498166655, + "kl_loss": 0.17485421895980835, + "loss_ib": 0.002244924660772085, + "step": 1133 + }, + { + "ce_ib": 4.26594352722168, + "ce_orig": 0.5342496037483215, + "epoch": 0.32583219498166655, + "kl_loss": 0.12280713766813278, + "loss_ib": 0.0016546656843274832, + "step": 1133 + }, + { + "ce_ib": 4.813910961151123, + "ce_orig": 0.6347959041595459, + "epoch": 0.32611977856064417, + "kl_loss": 0.10050021857023239, + "loss_ib": 0.0014863931573927402, + "step": 1134 + }, + { + "ce_ib": 5.719820976257324, + "ce_orig": 0.7599513530731201, + "epoch": 0.32611977856064417, + "kl_loss": 0.14795079827308655, + "loss_ib": 0.0020514901261776686, + "step": 1134 + }, + { + "ce_ib": 7.878007888793945, + "ce_orig": 1.600778579711914, + "epoch": 0.32611977856064417, + "kl_loss": 0.08611056953668594, + "loss_ib": 0.001648906385526061, + "step": 1134 + }, + { + "ce_ib": 6.737755298614502, + "ce_orig": 0.7938176393508911, + "epoch": 0.32611977856064417, + "kl_loss": 0.10635479539632797, + "loss_ib": 0.0017373233567923307, + "step": 1134 + }, + { + "epoch": 0.32640736213962185, + "grad_norm": 0.08962231129407883, + "learning_rate": 4.920412276563977e-05, + "loss": 0.7977, + "step": 1135 + }, + { + "ce_ib": 6.567104816436768, + "ce_orig": 0.9490758180618286, + "epoch": 0.32640736213962185, + "kl_loss": 0.10037635266780853, + "loss_ib": 0.0016604738775640726, + "step": 1135 + }, + { + "ce_ib": 3.867321729660034, + "ce_orig": 0.6551004648208618, + "epoch": 0.32640736213962185, + "kl_loss": 0.12545770406723022, + "loss_ib": 0.0016413092380389571, + "step": 1135 + }, + { + "ce_ib": 5.023824691772461, + "ce_orig": 0.549644947052002, + "epoch": 0.32640736213962185, + "kl_loss": 0.10960295051336288, + "loss_ib": 0.001598411938175559, + "step": 1135 + }, + { + "ce_ib": 9.607556343078613, + "ce_orig": 1.194618821144104, + "epoch": 0.32640736213962185, + "kl_loss": 0.09962357580661774, + "loss_ib": 0.001956991385668516, + "step": 1135 + }, + { + "ce_ib": 5.548495292663574, + "ce_orig": 0.6250153183937073, + "epoch": 0.32669494571859947, + "kl_loss": 0.12165582925081253, + "loss_ib": 0.0017714076675474644, + "step": 1136 + }, + { + "ce_ib": 8.4020357131958, + "ce_orig": 0.9137877225875854, + "epoch": 0.32669494571859947, + "kl_loss": 0.12433409690856934, + "loss_ib": 0.002083544386550784, + "step": 1136 + }, + { + "ce_ib": 4.013749599456787, + "ce_orig": 0.5571913123130798, + "epoch": 0.32669494571859947, + "kl_loss": 0.157148078083992, + "loss_ib": 0.00197285576723516, + "step": 1136 + }, + { + "ce_ib": 5.231479644775391, + "ce_orig": 0.7425175905227661, + "epoch": 0.32669494571859947, + "kl_loss": 0.13806116580963135, + "loss_ib": 0.0019037595484405756, + "step": 1136 + }, + { + "ce_ib": 4.444094657897949, + "ce_orig": 0.5933840274810791, + "epoch": 0.3269825292975771, + "kl_loss": 0.08529434353113174, + "loss_ib": 0.0012973528355360031, + "step": 1137 + }, + { + "ce_ib": 6.240205764770508, + "ce_orig": 0.9035214781761169, + "epoch": 0.3269825292975771, + "kl_loss": 0.08053313940763474, + "loss_ib": 0.001429351861588657, + "step": 1137 + }, + { + "ce_ib": 10.129720687866211, + "ce_orig": 1.1901086568832397, + "epoch": 0.3269825292975771, + "kl_loss": 0.15460729598999023, + "loss_ib": 0.0025590448640286922, + "step": 1137 + }, + { + "ce_ib": 5.573910236358643, + "ce_orig": 0.6092091202735901, + "epoch": 0.3269825292975771, + "kl_loss": 0.2540702223777771, + "loss_ib": 0.003098093206062913, + "step": 1137 + }, + { + "ce_ib": 5.491816520690918, + "ce_orig": 0.702403724193573, + "epoch": 0.3272701128765548, + "kl_loss": 0.13322144746780396, + "loss_ib": 0.0018813961651176214, + "step": 1138 + }, + { + "ce_ib": 8.6897554397583, + "ce_orig": 1.1769925355911255, + "epoch": 0.3272701128765548, + "kl_loss": 0.1068679541349411, + "loss_ib": 0.001937655033543706, + "step": 1138 + }, + { + "ce_ib": 6.18388032913208, + "ce_orig": 0.5875335335731506, + "epoch": 0.3272701128765548, + "kl_loss": 0.13150034844875336, + "loss_ib": 0.001933391555212438, + "step": 1138 + }, + { + "ce_ib": 4.367621421813965, + "ce_orig": 0.771990954875946, + "epoch": 0.3272701128765548, + "kl_loss": 0.10100337862968445, + "loss_ib": 0.0014467958826571703, + "step": 1138 + }, + { + "ce_ib": 3.696974277496338, + "ce_orig": 0.5121734738349915, + "epoch": 0.3275576964555324, + "kl_loss": 0.09040553122758865, + "loss_ib": 0.0012737527722492814, + "step": 1139 + }, + { + "ce_ib": 8.0768404006958, + "ce_orig": 1.3299756050109863, + "epoch": 0.3275576964555324, + "kl_loss": 0.10132066160440445, + "loss_ib": 0.0018208905821666121, + "step": 1139 + }, + { + "ce_ib": 5.821397304534912, + "ce_orig": 0.8105670809745789, + "epoch": 0.3275576964555324, + "kl_loss": 0.10337543487548828, + "loss_ib": 0.0016158941434696317, + "step": 1139 + }, + { + "ce_ib": 7.536628723144531, + "ce_orig": 0.7028417587280273, + "epoch": 0.3275576964555324, + "kl_loss": 0.15450000762939453, + "loss_ib": 0.002298662904649973, + "step": 1139 + }, + { + "epoch": 0.32784528003451, + "grad_norm": 0.08544071763753891, + "learning_rate": 4.919438039450078e-05, + "loss": 0.862, + "step": 1140 + }, + { + "ce_ib": 5.5745720863342285, + "ce_orig": 1.0211468935012817, + "epoch": 0.32784528003451, + "kl_loss": 0.09475058317184448, + "loss_ib": 0.001504963031038642, + "step": 1140 + }, + { + "ce_ib": 8.765974044799805, + "ce_orig": 0.9023078680038452, + "epoch": 0.32784528003451, + "kl_loss": 0.1581837236881256, + "loss_ib": 0.0024584345519542694, + "step": 1140 + }, + { + "ce_ib": 6.658539772033691, + "ce_orig": 0.40172821283340454, + "epoch": 0.32784528003451, + "kl_loss": 0.16421042382717133, + "loss_ib": 0.0023079582024365664, + "step": 1140 + }, + { + "ce_ib": 5.496406078338623, + "ce_orig": 0.5838255286216736, + "epoch": 0.32784528003451, + "kl_loss": 0.11605791002511978, + "loss_ib": 0.0017102196579799056, + "step": 1140 + }, + { + "ce_ib": 9.511661529541016, + "ce_orig": 1.2888432741165161, + "epoch": 0.32813286361348765, + "kl_loss": 0.14782628417015076, + "loss_ib": 0.0024294289760291576, + "step": 1141 + }, + { + "ce_ib": 6.050619602203369, + "ce_orig": 0.732530951499939, + "epoch": 0.32813286361348765, + "kl_loss": 0.10595589131116867, + "loss_ib": 0.0016646209405735135, + "step": 1141 + }, + { + "ce_ib": 7.932619571685791, + "ce_orig": 0.702617347240448, + "epoch": 0.32813286361348765, + "kl_loss": 0.16292458772659302, + "loss_ib": 0.0024225078523159027, + "step": 1141 + }, + { + "ce_ib": 7.2254743576049805, + "ce_orig": 0.72353595495224, + "epoch": 0.32813286361348765, + "kl_loss": 0.13075268268585205, + "loss_ib": 0.0020300743635743856, + "step": 1141 + }, + { + "ce_ib": 5.295469284057617, + "ce_orig": 0.5043757557868958, + "epoch": 0.3284204471924653, + "kl_loss": 0.11717567592859268, + "loss_ib": 0.0017013036413118243, + "step": 1142 + }, + { + "ce_ib": 5.344768047332764, + "ce_orig": 0.623521625995636, + "epoch": 0.3284204471924653, + "kl_loss": 0.071539506316185, + "loss_ib": 0.0012498717987909913, + "step": 1142 + }, + { + "ce_ib": 7.219688415527344, + "ce_orig": 0.8054973483085632, + "epoch": 0.3284204471924653, + "kl_loss": 0.18407700955867767, + "loss_ib": 0.0025627389550209045, + "step": 1142 + }, + { + "ce_ib": 7.002214431762695, + "ce_orig": 0.7284294962882996, + "epoch": 0.3284204471924653, + "kl_loss": 0.07964880764484406, + "loss_ib": 0.0014967095339670777, + "step": 1142 + }, + { + "ce_ib": 8.63122272491455, + "ce_orig": 0.7607702016830444, + "epoch": 0.32870803077144295, + "kl_loss": 0.15003398060798645, + "loss_ib": 0.0023634620010852814, + "step": 1143 + }, + { + "ce_ib": 5.379940032958984, + "ce_orig": 0.7627663612365723, + "epoch": 0.32870803077144295, + "kl_loss": 0.1444094330072403, + "loss_ib": 0.0019820884335786104, + "step": 1143 + }, + { + "ce_ib": 4.847815036773682, + "ce_orig": 0.7149484157562256, + "epoch": 0.32870803077144295, + "kl_loss": 0.09257819503545761, + "loss_ib": 0.0014105633599683642, + "step": 1143 + }, + { + "ce_ib": 9.955081939697266, + "ce_orig": 0.9020329713821411, + "epoch": 0.32870803077144295, + "kl_loss": 0.1339617371559143, + "loss_ib": 0.002335125347599387, + "step": 1143 + }, + { + "ce_ib": 8.813331604003906, + "ce_orig": 0.7900945544242859, + "epoch": 0.3289956143504206, + "kl_loss": 0.16139021515846252, + "loss_ib": 0.002495235064998269, + "step": 1144 + }, + { + "ce_ib": 8.96036434173584, + "ce_orig": 1.1033498048782349, + "epoch": 0.3289956143504206, + "kl_loss": 0.1222626268863678, + "loss_ib": 0.002118662465363741, + "step": 1144 + }, + { + "ce_ib": 4.615363597869873, + "ce_orig": 0.3875173032283783, + "epoch": 0.3289956143504206, + "kl_loss": 0.10488709807395935, + "loss_ib": 0.0015104073099792004, + "step": 1144 + }, + { + "ce_ib": 9.573678970336914, + "ce_orig": 1.2343288660049438, + "epoch": 0.3289956143504206, + "kl_loss": 0.13394644856452942, + "loss_ib": 0.002296832390129566, + "step": 1144 + }, + { + "epoch": 0.32928319792939825, + "grad_norm": 0.0860443264245987, + "learning_rate": 4.9184579733483796e-05, + "loss": 0.854, + "step": 1145 + }, + { + "ce_ib": 5.269725322723389, + "ce_orig": 0.6295099854469299, + "epoch": 0.32928319792939825, + "kl_loss": 0.09791259467601776, + "loss_ib": 0.00150609842967242, + "step": 1145 + }, + { + "ce_ib": 6.847803592681885, + "ce_orig": 0.955610990524292, + "epoch": 0.32928319792939825, + "kl_loss": 0.1154763400554657, + "loss_ib": 0.00183954369276762, + "step": 1145 + }, + { + "ce_ib": 6.657177448272705, + "ce_orig": 0.5914230942726135, + "epoch": 0.32928319792939825, + "kl_loss": 0.09953590482473373, + "loss_ib": 0.0016610767925158143, + "step": 1145 + }, + { + "ce_ib": 7.972986221313477, + "ce_orig": 0.8305418491363525, + "epoch": 0.32928319792939825, + "kl_loss": 0.19533243775367737, + "loss_ib": 0.002750622807070613, + "step": 1145 + }, + { + "ce_ib": 8.388483047485352, + "ce_orig": 1.120553731918335, + "epoch": 0.3295707815083759, + "kl_loss": 0.10701696574687958, + "loss_ib": 0.0019090177956968546, + "step": 1146 + }, + { + "ce_ib": 4.477085590362549, + "ce_orig": 0.739358127117157, + "epoch": 0.3295707815083759, + "kl_loss": 0.11214235424995422, + "loss_ib": 0.0015691319713369012, + "step": 1146 + }, + { + "ce_ib": 6.1088972091674805, + "ce_orig": 0.8950079083442688, + "epoch": 0.3295707815083759, + "kl_loss": 0.14025771617889404, + "loss_ib": 0.0020134670194238424, + "step": 1146 + }, + { + "ce_ib": 7.437435626983643, + "ce_orig": 0.8652719855308533, + "epoch": 0.3295707815083759, + "kl_loss": 0.10425149649381638, + "loss_ib": 0.001786258420906961, + "step": 1146 + }, + { + "ce_ib": 6.148717880249023, + "ce_orig": 0.5317816138267517, + "epoch": 0.3298583650873535, + "kl_loss": 0.0778564065694809, + "loss_ib": 0.0013934358721598983, + "step": 1147 + }, + { + "ce_ib": 8.765717506408691, + "ce_orig": 0.6234492659568787, + "epoch": 0.3298583650873535, + "kl_loss": 0.10909809172153473, + "loss_ib": 0.0019675525836646557, + "step": 1147 + }, + { + "ce_ib": 6.933672904968262, + "ce_orig": 0.6965823769569397, + "epoch": 0.3298583650873535, + "kl_loss": 0.11135978251695633, + "loss_ib": 0.001806965097784996, + "step": 1147 + }, + { + "ce_ib": 4.914548873901367, + "ce_orig": 0.6677202582359314, + "epoch": 0.3298583650873535, + "kl_loss": 0.09731614589691162, + "loss_ib": 0.0014646162744611502, + "step": 1147 + }, + { + "ce_ib": 5.5134711265563965, + "ce_orig": 0.826160192489624, + "epoch": 0.3301459486663312, + "kl_loss": 0.07226017117500305, + "loss_ib": 0.001273948815651238, + "step": 1148 + }, + { + "ce_ib": 8.220447540283203, + "ce_orig": 1.2149168252944946, + "epoch": 0.3301459486663312, + "kl_loss": 0.1457306444644928, + "loss_ib": 0.00227935123257339, + "step": 1148 + }, + { + "ce_ib": 5.302207946777344, + "ce_orig": 0.7826932072639465, + "epoch": 0.3301459486663312, + "kl_loss": 0.10453931242227554, + "loss_ib": 0.001575613860040903, + "step": 1148 + }, + { + "ce_ib": 3.9958691596984863, + "ce_orig": 0.6564205288887024, + "epoch": 0.3301459486663312, + "kl_loss": 0.06783327460289001, + "loss_ib": 0.0010779196163639426, + "step": 1148 + }, + { + "ce_ib": 4.644093990325928, + "ce_orig": 0.4070664942264557, + "epoch": 0.3304335322453088, + "kl_loss": 0.14819657802581787, + "loss_ib": 0.0019463751232251525, + "step": 1149 + }, + { + "ce_ib": 4.468021392822266, + "ce_orig": 0.6336247324943542, + "epoch": 0.3304335322453088, + "kl_loss": 0.09321756660938263, + "loss_ib": 0.0013789776712656021, + "step": 1149 + }, + { + "ce_ib": 4.387839317321777, + "ce_orig": 0.30651336908340454, + "epoch": 0.3304335322453088, + "kl_loss": 0.1521071493625641, + "loss_ib": 0.0019598554354161024, + "step": 1149 + }, + { + "ce_ib": 7.417322635650635, + "ce_orig": 0.9508139491081238, + "epoch": 0.3304335322453088, + "kl_loss": 0.08571872115135193, + "loss_ib": 0.0015989193925634027, + "step": 1149 + }, + { + "epoch": 0.3307211158242864, + "grad_norm": 0.09715892374515533, + "learning_rate": 4.917472080620086e-05, + "loss": 0.8048, + "step": 1150 + }, + { + "ce_ib": 9.67892074584961, + "ce_orig": 1.3727235794067383, + "epoch": 0.3307211158242864, + "kl_loss": 0.1465449333190918, + "loss_ib": 0.0024333414621651173, + "step": 1150 + }, + { + "ce_ib": 4.275513648986816, + "ce_orig": 0.8803659677505493, + "epoch": 0.3307211158242864, + "kl_loss": 0.08852915465831757, + "loss_ib": 0.0013128429418429732, + "step": 1150 + }, + { + "ce_ib": 8.305662155151367, + "ce_orig": 0.8471624851226807, + "epoch": 0.3307211158242864, + "kl_loss": 0.19541916251182556, + "loss_ib": 0.002784757874906063, + "step": 1150 + }, + { + "ce_ib": 7.027608871459961, + "ce_orig": 0.49434998631477356, + "epoch": 0.3307211158242864, + "kl_loss": 0.13142399489879608, + "loss_ib": 0.002017000922933221, + "step": 1150 + }, + { + "ce_ib": 4.126051902770996, + "ce_orig": 0.47386792302131653, + "epoch": 0.33100869940326405, + "kl_loss": 0.08531898260116577, + "loss_ib": 0.0012657948536798358, + "step": 1151 + }, + { + "ce_ib": 4.111065864562988, + "ce_orig": 0.8229820132255554, + "epoch": 0.33100869940326405, + "kl_loss": 0.09980519860982895, + "loss_ib": 0.0014091585762798786, + "step": 1151 + }, + { + "ce_ib": 10.319543838500977, + "ce_orig": 1.7134897708892822, + "epoch": 0.33100869940326405, + "kl_loss": 0.14278292655944824, + "loss_ib": 0.0024597835727036, + "step": 1151 + }, + { + "ce_ib": 6.541288375854492, + "ce_orig": 1.1026983261108398, + "epoch": 0.33100869940326405, + "kl_loss": 0.12487839162349701, + "loss_ib": 0.0019029126269742846, + "step": 1151 + }, + { + "ce_ib": 8.641268730163574, + "ce_orig": 1.2764012813568115, + "epoch": 0.33129628298224173, + "kl_loss": 0.1464216709136963, + "loss_ib": 0.0023283434566110373, + "step": 1152 + }, + { + "ce_ib": 8.653627395629883, + "ce_orig": 0.9396832585334778, + "epoch": 0.33129628298224173, + "kl_loss": 0.12554971873760223, + "loss_ib": 0.002120859920978546, + "step": 1152 + }, + { + "ce_ib": 6.012033939361572, + "ce_orig": 0.34361323714256287, + "epoch": 0.33129628298224173, + "kl_loss": 0.13796600699424744, + "loss_ib": 0.0019808635115623474, + "step": 1152 + }, + { + "ce_ib": 5.108508586883545, + "ce_orig": 0.748778223991394, + "epoch": 0.33129628298224173, + "kl_loss": 0.10739289224147797, + "loss_ib": 0.001584779703989625, + "step": 1152 + }, + { + "ce_ib": 5.372162342071533, + "ce_orig": 0.6447109580039978, + "epoch": 0.33158386656121935, + "kl_loss": 0.11223471164703369, + "loss_ib": 0.0016595632769167423, + "step": 1153 + }, + { + "ce_ib": 6.9633612632751465, + "ce_orig": 0.930819571018219, + "epoch": 0.33158386656121935, + "kl_loss": 0.126115083694458, + "loss_ib": 0.0019574868492782116, + "step": 1153 + }, + { + "ce_ib": 8.455766677856445, + "ce_orig": 1.0738056898117065, + "epoch": 0.33158386656121935, + "kl_loss": 0.16140224039554596, + "loss_ib": 0.0024595989380031824, + "step": 1153 + }, + { + "ce_ib": 7.3503265380859375, + "ce_orig": 1.1345939636230469, + "epoch": 0.33158386656121935, + "kl_loss": 0.14327988028526306, + "loss_ib": 0.0021678314078599215, + "step": 1153 + }, + { + "ce_ib": 4.839994430541992, + "ce_orig": 0.5257171392440796, + "epoch": 0.331871450140197, + "kl_loss": 0.09663469344377518, + "loss_ib": 0.00145034643355757, + "step": 1154 + }, + { + "ce_ib": 6.527209281921387, + "ce_orig": 0.6023959517478943, + "epoch": 0.331871450140197, + "kl_loss": 0.10009995102882385, + "loss_ib": 0.0016537203919142485, + "step": 1154 + }, + { + "ce_ib": 7.900789260864258, + "ce_orig": 1.0611294507980347, + "epoch": 0.331871450140197, + "kl_loss": 0.14826743304729462, + "loss_ib": 0.0022727532777935266, + "step": 1154 + }, + { + "ce_ib": 4.652851104736328, + "ce_orig": 0.8940951824188232, + "epoch": 0.331871450140197, + "kl_loss": 0.09196630120277405, + "loss_ib": 0.0013849481474608183, + "step": 1154 + }, + { + "epoch": 0.33215903371917466, + "grad_norm": 0.08988802134990692, + "learning_rate": 4.916480363640443e-05, + "loss": 0.8431, + "step": 1155 + }, + { + "ce_ib": 5.224212646484375, + "ce_orig": 0.8306000232696533, + "epoch": 0.33215903371917466, + "kl_loss": 0.09435658156871796, + "loss_ib": 0.0014659870648756623, + "step": 1155 + }, + { + "ce_ib": 4.522388458251953, + "ce_orig": 0.5686253309249878, + "epoch": 0.33215903371917466, + "kl_loss": 0.08125274628400803, + "loss_ib": 0.0012647663243114948, + "step": 1155 + }, + { + "ce_ib": 7.296513080596924, + "ce_orig": 0.76470547914505, + "epoch": 0.33215903371917466, + "kl_loss": 0.1121765673160553, + "loss_ib": 0.0018514168914407492, + "step": 1155 + }, + { + "ce_ib": 5.358783721923828, + "ce_orig": 0.8416937589645386, + "epoch": 0.33215903371917466, + "kl_loss": 0.0656447559595108, + "loss_ib": 0.00119232595898211, + "step": 1155 + }, + { + "ce_ib": 7.572870254516602, + "ce_orig": 1.0910744667053223, + "epoch": 0.3324466172981523, + "kl_loss": 0.09344696253538132, + "loss_ib": 0.0016917565371841192, + "step": 1156 + }, + { + "ce_ib": 6.930093765258789, + "ce_orig": 1.1059174537658691, + "epoch": 0.3324466172981523, + "kl_loss": 0.10829582810401917, + "loss_ib": 0.0017759675392881036, + "step": 1156 + }, + { + "ce_ib": 6.122500896453857, + "ce_orig": 0.8133541941642761, + "epoch": 0.3324466172981523, + "kl_loss": 0.10187964141368866, + "loss_ib": 0.0016310465289279819, + "step": 1156 + }, + { + "ce_ib": 4.905231952667236, + "ce_orig": 0.46612951159477234, + "epoch": 0.3324466172981523, + "kl_loss": 0.09792307019233704, + "loss_ib": 0.0014697537990286946, + "step": 1156 + }, + { + "ce_ib": 4.672698974609375, + "ce_orig": 0.5849744081497192, + "epoch": 0.3327342008771299, + "kl_loss": 0.351125568151474, + "loss_ib": 0.0039785257540643215, + "step": 1157 + }, + { + "ce_ib": 9.564355850219727, + "ce_orig": 1.7009320259094238, + "epoch": 0.3327342008771299, + "kl_loss": 0.09928872436285019, + "loss_ib": 0.0019493227591738105, + "step": 1157 + }, + { + "ce_ib": 4.546131610870361, + "ce_orig": 0.8941453695297241, + "epoch": 0.3327342008771299, + "kl_loss": 0.06103827804327011, + "loss_ib": 0.0010649960022419691, + "step": 1157 + }, + { + "ce_ib": 7.061286926269531, + "ce_orig": 0.9295308589935303, + "epoch": 0.3327342008771299, + "kl_loss": 0.11667195707559586, + "loss_ib": 0.001872848253697157, + "step": 1157 + }, + { + "ce_ib": 9.296477317810059, + "ce_orig": 1.4580363035202026, + "epoch": 0.3330217844561076, + "kl_loss": 0.11636004596948624, + "loss_ib": 0.0020932480692863464, + "step": 1158 + }, + { + "ce_ib": 8.64181900024414, + "ce_orig": 1.3367538452148438, + "epoch": 0.3330217844561076, + "kl_loss": 0.1105409562587738, + "loss_ib": 0.001969591248780489, + "step": 1158 + }, + { + "ce_ib": 3.508538007736206, + "ce_orig": 0.4009856879711151, + "epoch": 0.3330217844561076, + "kl_loss": 0.13424761593341827, + "loss_ib": 0.0016933298902586102, + "step": 1158 + }, + { + "ce_ib": 7.230615139007568, + "ce_orig": 0.8530421257019043, + "epoch": 0.3330217844561076, + "kl_loss": 0.12663200497627258, + "loss_ib": 0.0019893816206604242, + "step": 1158 + }, + { + "ce_ib": 6.42070198059082, + "ce_orig": 0.6392161250114441, + "epoch": 0.3333093680350852, + "kl_loss": 0.12846484780311584, + "loss_ib": 0.0019267186289653182, + "step": 1159 + }, + { + "ce_ib": 9.253157615661621, + "ce_orig": 1.423343300819397, + "epoch": 0.3333093680350852, + "kl_loss": 0.15739576518535614, + "loss_ib": 0.002499273279681802, + "step": 1159 + }, + { + "ce_ib": 8.63932991027832, + "ce_orig": 1.5198041200637817, + "epoch": 0.3333093680350852, + "kl_loss": 0.13295453786849976, + "loss_ib": 0.002193478401750326, + "step": 1159 + }, + { + "ce_ib": 7.621316432952881, + "ce_orig": 0.928875744342804, + "epoch": 0.3333093680350852, + "kl_loss": 0.13527259230613708, + "loss_ib": 0.0021148575469851494, + "step": 1159 + }, + { + "epoch": 0.33359695161406283, + "grad_norm": 0.08410744369029999, + "learning_rate": 4.9154828247987275e-05, + "loss": 0.9149, + "step": 1160 + }, + { + "ce_ib": 5.347615718841553, + "ce_orig": 0.6876154541969299, + "epoch": 0.33359695161406283, + "kl_loss": 0.14489194750785828, + "loss_ib": 0.001983680995181203, + "step": 1160 + }, + { + "ce_ib": 5.55272102355957, + "ce_orig": 0.39458954334259033, + "epoch": 0.33359695161406283, + "kl_loss": 0.1338169127702713, + "loss_ib": 0.001893441192805767, + "step": 1160 + }, + { + "ce_ib": 5.986666679382324, + "ce_orig": 0.9985933899879456, + "epoch": 0.33359695161406283, + "kl_loss": 0.1406264454126358, + "loss_ib": 0.0020049309823662043, + "step": 1160 + }, + { + "ce_ib": 5.156513214111328, + "ce_orig": 0.8448862433433533, + "epoch": 0.33359695161406283, + "kl_loss": 0.12365314364433289, + "loss_ib": 0.0017521826084703207, + "step": 1160 + }, + { + "ce_ib": 2.9134364128112793, + "ce_orig": 0.5574214458465576, + "epoch": 0.33388453519304045, + "kl_loss": 0.05487529933452606, + "loss_ib": 0.0008400966180488467, + "step": 1161 + }, + { + "ce_ib": 5.857978343963623, + "ce_orig": 0.8570786118507385, + "epoch": 0.33388453519304045, + "kl_loss": 0.10978295654058456, + "loss_ib": 0.0016836273716762662, + "step": 1161 + }, + { + "ce_ib": 4.626968860626221, + "ce_orig": 0.6697400212287903, + "epoch": 0.33388453519304045, + "kl_loss": 0.142439603805542, + "loss_ib": 0.0018870928324759007, + "step": 1161 + }, + { + "ce_ib": 6.765016078948975, + "ce_orig": 1.0183452367782593, + "epoch": 0.33388453519304045, + "kl_loss": 0.21222910284996033, + "loss_ib": 0.0027987926732748747, + "step": 1161 + }, + { + "ce_ib": 3.8295748233795166, + "ce_orig": 0.6635098457336426, + "epoch": 0.33417211877201813, + "kl_loss": 0.10773129761219025, + "loss_ib": 0.0014602703740820289, + "step": 1162 + }, + { + "ce_ib": 7.3349928855896, + "ce_orig": 0.8590999841690063, + "epoch": 0.33417211877201813, + "kl_loss": 0.15064631402492523, + "loss_ib": 0.002239962574094534, + "step": 1162 + }, + { + "ce_ib": 7.64417839050293, + "ce_orig": 1.1145079135894775, + "epoch": 0.33417211877201813, + "kl_loss": 0.0957171767950058, + "loss_ib": 0.001721589476801455, + "step": 1162 + }, + { + "ce_ib": 7.106287479400635, + "ce_orig": 0.46655088663101196, + "epoch": 0.33417211877201813, + "kl_loss": 0.11425713449716568, + "loss_ib": 0.0018532000249251723, + "step": 1162 + }, + { + "ce_ib": 5.427411079406738, + "ce_orig": 0.7361214756965637, + "epoch": 0.33445970235099576, + "kl_loss": 0.10274486243724823, + "loss_ib": 0.0015701897209510207, + "step": 1163 + }, + { + "ce_ib": 6.271327018737793, + "ce_orig": 1.0419801473617554, + "epoch": 0.33445970235099576, + "kl_loss": 0.05931294336915016, + "loss_ib": 0.0012202620273455977, + "step": 1163 + }, + { + "ce_ib": 8.937814712524414, + "ce_orig": 1.2333555221557617, + "epoch": 0.33445970235099576, + "kl_loss": 0.07176488637924194, + "loss_ib": 0.0016114303143694997, + "step": 1163 + }, + { + "ce_ib": 5.644059658050537, + "ce_orig": 0.42556285858154297, + "epoch": 0.33445970235099576, + "kl_loss": 0.09541893005371094, + "loss_ib": 0.0015185951488092542, + "step": 1163 + }, + { + "ce_ib": 6.685428619384766, + "ce_orig": 0.4348882734775543, + "epoch": 0.3347472859299734, + "kl_loss": 0.09693093597888947, + "loss_ib": 0.0016378521686419845, + "step": 1164 + }, + { + "ce_ib": 4.863894939422607, + "ce_orig": 0.6920062899589539, + "epoch": 0.3347472859299734, + "kl_loss": 0.10376952588558197, + "loss_ib": 0.001524084829725325, + "step": 1164 + }, + { + "ce_ib": 8.66261100769043, + "ce_orig": 1.1178714036941528, + "epoch": 0.3347472859299734, + "kl_loss": 0.14406812191009521, + "loss_ib": 0.0023069423623383045, + "step": 1164 + }, + { + "ce_ib": 7.869550704956055, + "ce_orig": 1.0147631168365479, + "epoch": 0.3347472859299734, + "kl_loss": 0.11258833855390549, + "loss_ib": 0.0019128384301438928, + "step": 1164 + }, + { + "epoch": 0.33503486950895106, + "grad_norm": 0.09307786077260971, + "learning_rate": 4.9144794664982413e-05, + "loss": 0.8537, + "step": 1165 + }, + { + "ce_ib": 7.7619948387146, + "ce_orig": 1.240858793258667, + "epoch": 0.33503486950895106, + "kl_loss": 0.09256239235401154, + "loss_ib": 0.0017018234357237816, + "step": 1165 + }, + { + "ce_ib": 8.073474884033203, + "ce_orig": 1.4170767068862915, + "epoch": 0.33503486950895106, + "kl_loss": 0.09512190520763397, + "loss_ib": 0.0017585664754733443, + "step": 1165 + }, + { + "ce_ib": 6.9882378578186035, + "ce_orig": 1.0470027923583984, + "epoch": 0.33503486950895106, + "kl_loss": 0.12313065677881241, + "loss_ib": 0.0019301304128021002, + "step": 1165 + }, + { + "ce_ib": 5.528054237365723, + "ce_orig": 0.1656164973974228, + "epoch": 0.33503486950895106, + "kl_loss": 0.19854716956615448, + "loss_ib": 0.002538277069106698, + "step": 1165 + }, + { + "ce_ib": 6.298125743865967, + "ce_orig": 0.44228455424308777, + "epoch": 0.3353224530879287, + "kl_loss": 0.09168311953544617, + "loss_ib": 0.0015466436743736267, + "step": 1166 + }, + { + "ce_ib": 6.714663982391357, + "ce_orig": 0.5558184385299683, + "epoch": 0.3353224530879287, + "kl_loss": 0.1673613041639328, + "loss_ib": 0.0023450793232768774, + "step": 1166 + }, + { + "ce_ib": 4.478449821472168, + "ce_orig": 0.7729062438011169, + "epoch": 0.3353224530879287, + "kl_loss": 0.10390207916498184, + "loss_ib": 0.0014868656871840358, + "step": 1166 + }, + { + "ce_ib": 8.8925142288208, + "ce_orig": 1.3629997968673706, + "epoch": 0.3353224530879287, + "kl_loss": 0.12358202040195465, + "loss_ib": 0.002125071594491601, + "step": 1166 + }, + { + "ce_ib": 6.042492389678955, + "ce_orig": 0.8548057079315186, + "epoch": 0.3356100366669063, + "kl_loss": 0.10940991342067719, + "loss_ib": 0.0016983483219519258, + "step": 1167 + }, + { + "ce_ib": 3.1412997245788574, + "ce_orig": 0.34335675835609436, + "epoch": 0.3356100366669063, + "kl_loss": 0.08861685544252396, + "loss_ib": 0.0012002985458821058, + "step": 1167 + }, + { + "ce_ib": 7.260244846343994, + "ce_orig": 1.2476314306259155, + "epoch": 0.3356100366669063, + "kl_loss": 0.10213663429021835, + "loss_ib": 0.0017473907209932804, + "step": 1167 + }, + { + "ce_ib": 7.48936653137207, + "ce_orig": 1.0032376050949097, + "epoch": 0.3356100366669063, + "kl_loss": 0.1021503359079361, + "loss_ib": 0.0017704400233924389, + "step": 1167 + }, + { + "ce_ib": 6.35796594619751, + "ce_orig": 1.062179446220398, + "epoch": 0.335897620245884, + "kl_loss": 0.2645478844642639, + "loss_ib": 0.003281275276094675, + "step": 1168 + }, + { + "ce_ib": 7.562673568725586, + "ce_orig": 1.0366406440734863, + "epoch": 0.335897620245884, + "kl_loss": 0.13738609850406647, + "loss_ib": 0.0021301282104104757, + "step": 1168 + }, + { + "ce_ib": 7.556421279907227, + "ce_orig": 1.2071533203125, + "epoch": 0.335897620245884, + "kl_loss": 0.10314060747623444, + "loss_ib": 0.0017870481824502349, + "step": 1168 + }, + { + "ce_ib": 4.021859645843506, + "ce_orig": 0.38638952374458313, + "epoch": 0.335897620245884, + "kl_loss": 0.09852783381938934, + "loss_ib": 0.0013874642318114638, + "step": 1168 + }, + { + "ce_ib": 8.36083698272705, + "ce_orig": 0.5689254403114319, + "epoch": 0.3361852038248616, + "kl_loss": 0.19644485414028168, + "loss_ib": 0.0028005321510136127, + "step": 1169 + }, + { + "ce_ib": 4.525207996368408, + "ce_orig": 0.4924004077911377, + "epoch": 0.3361852038248616, + "kl_loss": 0.09402735531330109, + "loss_ib": 0.0013927941909059882, + "step": 1169 + }, + { + "ce_ib": 5.216853618621826, + "ce_orig": 0.7216584086418152, + "epoch": 0.3361852038248616, + "kl_loss": 0.11405150592327118, + "loss_ib": 0.001662200316786766, + "step": 1169 + }, + { + "ce_ib": 5.908849716186523, + "ce_orig": 0.6451670527458191, + "epoch": 0.3361852038248616, + "kl_loss": 0.09382858872413635, + "loss_ib": 0.0015291707823053002, + "step": 1169 + }, + { + "epoch": 0.33647278740383924, + "grad_norm": 0.07818438857793808, + "learning_rate": 4.913470291156308e-05, + "loss": 0.8495, + "step": 1170 + }, + { + "ce_ib": 7.972014904022217, + "ce_orig": 1.0323179960250854, + "epoch": 0.33647278740383924, + "kl_loss": 0.12066707015037537, + "loss_ib": 0.0020038720685988665, + "step": 1170 + }, + { + "ce_ib": 4.814333915710449, + "ce_orig": 1.0412518978118896, + "epoch": 0.33647278740383924, + "kl_loss": 0.09671176224946976, + "loss_ib": 0.0014485509600490332, + "step": 1170 + }, + { + "ce_ib": 7.407196044921875, + "ce_orig": 0.6773426532745361, + "epoch": 0.33647278740383924, + "kl_loss": 0.16202744841575623, + "loss_ib": 0.0023609939962625504, + "step": 1170 + }, + { + "ce_ib": 5.377325534820557, + "ce_orig": 0.7081344723701477, + "epoch": 0.33647278740383924, + "kl_loss": 0.12697473168373108, + "loss_ib": 0.001807479769922793, + "step": 1170 + }, + { + "ce_ib": 4.1743035316467285, + "ce_orig": 0.3675990104675293, + "epoch": 0.33676037098281686, + "kl_loss": 0.07218644767999649, + "loss_ib": 0.001139294821768999, + "step": 1171 + }, + { + "ce_ib": 5.744592666625977, + "ce_orig": 0.6870226263999939, + "epoch": 0.33676037098281686, + "kl_loss": 0.07420578598976135, + "loss_ib": 0.0013165171258151531, + "step": 1171 + }, + { + "ce_ib": 7.887357711791992, + "ce_orig": 1.7738786935806274, + "epoch": 0.33676037098281686, + "kl_loss": 0.11280599236488342, + "loss_ib": 0.001916795619763434, + "step": 1171 + }, + { + "ce_ib": 5.786555290222168, + "ce_orig": 0.8866435289382935, + "epoch": 0.33676037098281686, + "kl_loss": 0.0884263664484024, + "loss_ib": 0.0014629190554842353, + "step": 1171 + }, + { + "ce_ib": 7.102581024169922, + "ce_orig": 1.0043176412582397, + "epoch": 0.33704795456179454, + "kl_loss": 0.16740411520004272, + "loss_ib": 0.0023842991795390844, + "step": 1172 + }, + { + "ce_ib": 4.370709419250488, + "ce_orig": 0.5108112692832947, + "epoch": 0.33704795456179454, + "kl_loss": 0.1324308067560196, + "loss_ib": 0.0017613789532333612, + "step": 1172 + }, + { + "ce_ib": 6.938513278961182, + "ce_orig": 1.0623221397399902, + "epoch": 0.33704795456179454, + "kl_loss": 0.10967092216014862, + "loss_ib": 0.0017905604327097535, + "step": 1172 + }, + { + "ce_ib": 9.058341979980469, + "ce_orig": 0.9803903102874756, + "epoch": 0.33704795456179454, + "kl_loss": 0.12941080331802368, + "loss_ib": 0.0021999420132488012, + "step": 1172 + }, + { + "ce_ib": 6.757338523864746, + "ce_orig": 0.7687369585037231, + "epoch": 0.33733553814077216, + "kl_loss": 0.12288660556077957, + "loss_ib": 0.0019045999506488442, + "step": 1173 + }, + { + "ce_ib": 7.767806529998779, + "ce_orig": 0.9069968461990356, + "epoch": 0.33733553814077216, + "kl_loss": 0.12564904987812042, + "loss_ib": 0.0020332711283117533, + "step": 1173 + }, + { + "ce_ib": 10.300495147705078, + "ce_orig": 1.603036880493164, + "epoch": 0.33733553814077216, + "kl_loss": 0.12140758335590363, + "loss_ib": 0.0022441253531724215, + "step": 1173 + }, + { + "ce_ib": 5.464423656463623, + "ce_orig": 0.5434802770614624, + "epoch": 0.33733553814077216, + "kl_loss": 0.10957443714141846, + "loss_ib": 0.0016421866603195667, + "step": 1173 + }, + { + "ce_ib": 6.049656391143799, + "ce_orig": 0.7471191883087158, + "epoch": 0.3376231217197498, + "kl_loss": 0.14804700016975403, + "loss_ib": 0.0020854356698691845, + "step": 1174 + }, + { + "ce_ib": 7.153292655944824, + "ce_orig": 0.8166505098342896, + "epoch": 0.3376231217197498, + "kl_loss": 0.09506618976593018, + "loss_ib": 0.0016659912653267384, + "step": 1174 + }, + { + "ce_ib": 4.490015506744385, + "ce_orig": 0.6768890619277954, + "epoch": 0.3376231217197498, + "kl_loss": 0.15018723905086517, + "loss_ib": 0.0019508738769218326, + "step": 1174 + }, + { + "ce_ib": 7.776271343231201, + "ce_orig": 1.1005843877792358, + "epoch": 0.3376231217197498, + "kl_loss": 0.14981666207313538, + "loss_ib": 0.002275793580338359, + "step": 1174 + }, + { + "epoch": 0.33791070529872747, + "grad_norm": 0.08677754551172256, + "learning_rate": 4.912455301204264e-05, + "loss": 0.9106, + "step": 1175 + }, + { + "ce_ib": 6.626636505126953, + "ce_orig": 0.7961812615394592, + "epoch": 0.33791070529872747, + "kl_loss": 0.14855235815048218, + "loss_ib": 0.002148187020793557, + "step": 1175 + }, + { + "ce_ib": 5.3702616691589355, + "ce_orig": 0.35888469219207764, + "epoch": 0.33791070529872747, + "kl_loss": 0.056084148585796356, + "loss_ib": 0.0010978676145896316, + "step": 1175 + }, + { + "ce_ib": 9.21849536895752, + "ce_orig": 1.443953275680542, + "epoch": 0.33791070529872747, + "kl_loss": 0.11867404729127884, + "loss_ib": 0.002108589978888631, + "step": 1175 + }, + { + "ce_ib": 7.601804256439209, + "ce_orig": 1.0470943450927734, + "epoch": 0.33791070529872747, + "kl_loss": 0.18766771256923676, + "loss_ib": 0.0026368575636297464, + "step": 1175 + }, + { + "ce_ib": 4.885053634643555, + "ce_orig": 0.8753991723060608, + "epoch": 0.3381982888777051, + "kl_loss": 0.09170867502689362, + "loss_ib": 0.0014055920764803886, + "step": 1176 + }, + { + "ce_ib": 6.546106815338135, + "ce_orig": 0.5040268898010254, + "epoch": 0.3381982888777051, + "kl_loss": 0.15207909047603607, + "loss_ib": 0.0021754016634076834, + "step": 1176 + }, + { + "ce_ib": 7.3167853355407715, + "ce_orig": 1.3194838762283325, + "epoch": 0.3381982888777051, + "kl_loss": 0.07922293990850449, + "loss_ib": 0.0015239078784361482, + "step": 1176 + }, + { + "ce_ib": 9.338371276855469, + "ce_orig": 1.1496158838272095, + "epoch": 0.3381982888777051, + "kl_loss": 0.10646973550319672, + "loss_ib": 0.0019985344260931015, + "step": 1176 + }, + { + "ce_ib": 4.440505504608154, + "ce_orig": 0.5622341632843018, + "epoch": 0.3384858724566827, + "kl_loss": 0.0706862211227417, + "loss_ib": 0.0011509127216413617, + "step": 1177 + }, + { + "ce_ib": 4.418371200561523, + "ce_orig": 0.6522874236106873, + "epoch": 0.3384858724566827, + "kl_loss": 0.09112890809774399, + "loss_ib": 0.0013531261356547475, + "step": 1177 + }, + { + "ce_ib": 6.717861652374268, + "ce_orig": 0.743884265422821, + "epoch": 0.3384858724566827, + "kl_loss": 0.19413869082927704, + "loss_ib": 0.0026131730992347, + "step": 1177 + }, + { + "ce_ib": 4.397217273712158, + "ce_orig": 0.32822972536087036, + "epoch": 0.3384858724566827, + "kl_loss": 0.09809207916259766, + "loss_ib": 0.0014206423657014966, + "step": 1177 + }, + { + "ce_ib": 4.6463751792907715, + "ce_orig": 0.5614964962005615, + "epoch": 0.33877345603566034, + "kl_loss": 0.10629543662071228, + "loss_ib": 0.0015275919577106833, + "step": 1178 + }, + { + "ce_ib": 10.708198547363281, + "ce_orig": 2.025780439376831, + "epoch": 0.33877345603566034, + "kl_loss": 0.148756206035614, + "loss_ib": 0.002558381762355566, + "step": 1178 + }, + { + "ce_ib": 7.28439998626709, + "ce_orig": 0.9603209495544434, + "epoch": 0.33877345603566034, + "kl_loss": 0.07741572707891464, + "loss_ib": 0.0015025973552837968, + "step": 1178 + }, + { + "ce_ib": 5.248563289642334, + "ce_orig": 0.4768196642398834, + "epoch": 0.33877345603566034, + "kl_loss": 0.12366919219493866, + "loss_ib": 0.0017615482211112976, + "step": 1178 + }, + { + "ce_ib": 7.977797031402588, + "ce_orig": 1.0344756841659546, + "epoch": 0.339061039614638, + "kl_loss": 0.11780491471290588, + "loss_ib": 0.001975828781723976, + "step": 1179 + }, + { + "ce_ib": 7.562273979187012, + "ce_orig": 0.921010434627533, + "epoch": 0.339061039614638, + "kl_loss": 0.13255661725997925, + "loss_ib": 0.002081793500110507, + "step": 1179 + }, + { + "ce_ib": 4.792715072631836, + "ce_orig": 0.789234459400177, + "epoch": 0.339061039614638, + "kl_loss": 0.08384158462285995, + "loss_ib": 0.0013176873326301575, + "step": 1179 + }, + { + "ce_ib": 6.483942985534668, + "ce_orig": 1.0698708295822144, + "epoch": 0.339061039614638, + "kl_loss": 0.10677614063024521, + "loss_ib": 0.001716155675239861, + "step": 1179 + }, + { + "epoch": 0.33934862319361564, + "grad_norm": 0.09814277291297913, + "learning_rate": 4.911434499087457e-05, + "loss": 0.8816, + "step": 1180 + }, + { + "ce_ib": 6.803238868713379, + "ce_orig": 0.81837397813797, + "epoch": 0.33934862319361564, + "kl_loss": 0.15313437581062317, + "loss_ib": 0.0022116675972938538, + "step": 1180 + }, + { + "ce_ib": 4.936272144317627, + "ce_orig": 0.6542596817016602, + "epoch": 0.33934862319361564, + "kl_loss": 0.11818765103816986, + "loss_ib": 0.001675503677688539, + "step": 1180 + }, + { + "ce_ib": 6.393680572509766, + "ce_orig": 0.8188475966453552, + "epoch": 0.33934862319361564, + "kl_loss": 0.12543442845344543, + "loss_ib": 0.0018937122076749802, + "step": 1180 + }, + { + "ce_ib": 4.207862854003906, + "ce_orig": 0.602934718132019, + "epoch": 0.33934862319361564, + "kl_loss": 0.13584521412849426, + "loss_ib": 0.0017792383441701531, + "step": 1180 + }, + { + "ce_ib": 7.836092472076416, + "ce_orig": 1.3347225189208984, + "epoch": 0.33963620677259326, + "kl_loss": 0.1234862357378006, + "loss_ib": 0.0020184717141091824, + "step": 1181 + }, + { + "ce_ib": 5.5221757888793945, + "ce_orig": 0.4668470323085785, + "epoch": 0.33963620677259326, + "kl_loss": 0.1307273656129837, + "loss_ib": 0.0018594911089166999, + "step": 1181 + }, + { + "ce_ib": 8.858410835266113, + "ce_orig": 1.2801547050476074, + "epoch": 0.33963620677259326, + "kl_loss": 0.14932915568351746, + "loss_ib": 0.0023791324347257614, + "step": 1181 + }, + { + "ce_ib": 4.417026996612549, + "ce_orig": 0.6737905144691467, + "epoch": 0.33963620677259326, + "kl_loss": 0.08919131010770798, + "loss_ib": 0.001333615742623806, + "step": 1181 + }, + { + "ce_ib": 5.903581619262695, + "ce_orig": 0.5304061770439148, + "epoch": 0.33992379035157094, + "kl_loss": 0.09965941309928894, + "loss_ib": 0.0015869521303102374, + "step": 1182 + }, + { + "ce_ib": 9.489951133728027, + "ce_orig": 1.0668960809707642, + "epoch": 0.33992379035157094, + "kl_loss": 0.13894206285476685, + "loss_ib": 0.0023384157102555037, + "step": 1182 + }, + { + "ce_ib": 8.417776107788086, + "ce_orig": 1.4453734159469604, + "epoch": 0.33992379035157094, + "kl_loss": 0.15153531730175018, + "loss_ib": 0.0023571306373924017, + "step": 1182 + }, + { + "ce_ib": 9.380576133728027, + "ce_orig": 1.4487861394882202, + "epoch": 0.33992379035157094, + "kl_loss": 0.07837877422571182, + "loss_ib": 0.0017218452412635088, + "step": 1182 + }, + { + "ce_ib": 5.527102470397949, + "ce_orig": 0.7645937204360962, + "epoch": 0.34021137393054857, + "kl_loss": 0.3171248435974121, + "loss_ib": 0.0037239587400108576, + "step": 1183 + }, + { + "ce_ib": 5.285163879394531, + "ce_orig": 0.40946123003959656, + "epoch": 0.34021137393054857, + "kl_loss": 0.11538106203079224, + "loss_ib": 0.0016823268961161375, + "step": 1183 + }, + { + "ce_ib": 6.348681449890137, + "ce_orig": 1.2779086828231812, + "epoch": 0.34021137393054857, + "kl_loss": 0.11431214213371277, + "loss_ib": 0.0017779895570129156, + "step": 1183 + }, + { + "ce_ib": 7.705173969268799, + "ce_orig": 1.1170061826705933, + "epoch": 0.34021137393054857, + "kl_loss": 0.12195007503032684, + "loss_ib": 0.001990018179640174, + "step": 1183 + }, + { + "ce_ib": 6.010852813720703, + "ce_orig": 0.9313013553619385, + "epoch": 0.3404989575095262, + "kl_loss": 0.10329874604940414, + "loss_ib": 0.0016340726288035512, + "step": 1184 + }, + { + "ce_ib": 6.481108665466309, + "ce_orig": 0.8383347392082214, + "epoch": 0.3404989575095262, + "kl_loss": 0.12615548074245453, + "loss_ib": 0.0019096657633781433, + "step": 1184 + }, + { + "ce_ib": 5.928338527679443, + "ce_orig": 0.5312626361846924, + "epoch": 0.3404989575095262, + "kl_loss": 0.16932004690170288, + "loss_ib": 0.002286034170538187, + "step": 1184 + }, + { + "ce_ib": 6.648979187011719, + "ce_orig": 1.1185437440872192, + "epoch": 0.3404989575095262, + "kl_loss": 0.12974053621292114, + "loss_ib": 0.0019623031839728355, + "step": 1184 + }, + { + "epoch": 0.34078654108850387, + "grad_norm": 0.09255577623844147, + "learning_rate": 4.9104078872652356e-05, + "loss": 0.8955, + "step": 1185 + }, + { + "ce_ib": 6.759406089782715, + "ce_orig": 0.7852493524551392, + "epoch": 0.34078654108850387, + "kl_loss": 0.10004919767379761, + "loss_ib": 0.0016764324391260743, + "step": 1185 + }, + { + "ce_ib": 5.248275279998779, + "ce_orig": 0.7133815288543701, + "epoch": 0.34078654108850387, + "kl_loss": 0.07941492646932602, + "loss_ib": 0.0013189767487347126, + "step": 1185 + }, + { + "ce_ib": 6.77577543258667, + "ce_orig": 1.3345876932144165, + "epoch": 0.34078654108850387, + "kl_loss": 0.18256714940071106, + "loss_ib": 0.0025032490957528353, + "step": 1185 + }, + { + "ce_ib": 5.814952373504639, + "ce_orig": 0.705886960029602, + "epoch": 0.34078654108850387, + "kl_loss": 0.17423757910728455, + "loss_ib": 0.00232387101277709, + "step": 1185 + }, + { + "ce_ib": 7.930660247802734, + "ce_orig": 0.6901426911354065, + "epoch": 0.3410741246674815, + "kl_loss": 0.121292844414711, + "loss_ib": 0.0020059943199157715, + "step": 1186 + }, + { + "ce_ib": 7.607260227203369, + "ce_orig": 0.8918116092681885, + "epoch": 0.3410741246674815, + "kl_loss": 0.12813860177993774, + "loss_ib": 0.0020421119406819344, + "step": 1186 + }, + { + "ce_ib": 4.995139122009277, + "ce_orig": 0.812433123588562, + "epoch": 0.3410741246674815, + "kl_loss": 0.08000831305980682, + "loss_ib": 0.001299596973694861, + "step": 1186 + }, + { + "ce_ib": 7.650750160217285, + "ce_orig": 1.2010334730148315, + "epoch": 0.3410741246674815, + "kl_loss": 0.12894907593727112, + "loss_ib": 0.002054565818980336, + "step": 1186 + }, + { + "ce_ib": 8.260749816894531, + "ce_orig": 1.1590336561203003, + "epoch": 0.3413617082464591, + "kl_loss": 0.10733570158481598, + "loss_ib": 0.0018994319252669811, + "step": 1187 + }, + { + "ce_ib": 2.059155225753784, + "ce_orig": 0.17005686461925507, + "epoch": 0.3413617082464591, + "kl_loss": 0.29403194785118103, + "loss_ib": 0.0031462348997592926, + "step": 1187 + }, + { + "ce_ib": 5.506524085998535, + "ce_orig": 0.7021316885948181, + "epoch": 0.3413617082464591, + "kl_loss": 0.10228325426578522, + "loss_ib": 0.001573484973050654, + "step": 1187 + }, + { + "ce_ib": 7.717523574829102, + "ce_orig": 1.5595512390136719, + "epoch": 0.3413617082464591, + "kl_loss": 0.22363264858722687, + "loss_ib": 0.003008078783750534, + "step": 1187 + }, + { + "ce_ib": 5.724874973297119, + "ce_orig": 0.5891698598861694, + "epoch": 0.34164929182543674, + "kl_loss": 0.27257198095321655, + "loss_ib": 0.0032982071861624718, + "step": 1188 + }, + { + "ce_ib": 6.959388732910156, + "ce_orig": 0.865048348903656, + "epoch": 0.34164929182543674, + "kl_loss": 0.1387251764535904, + "loss_ib": 0.002083190716803074, + "step": 1188 + }, + { + "ce_ib": 5.347512722015381, + "ce_orig": 0.6758795976638794, + "epoch": 0.34164929182543674, + "kl_loss": 0.09549650549888611, + "loss_ib": 0.00148971623275429, + "step": 1188 + }, + { + "ce_ib": 5.358241081237793, + "ce_orig": 0.5190110802650452, + "epoch": 0.34164929182543674, + "kl_loss": 0.14641422033309937, + "loss_ib": 0.001999966334551573, + "step": 1188 + }, + { + "ce_ib": 6.069690704345703, + "ce_orig": 0.5385083556175232, + "epoch": 0.3419368754044144, + "kl_loss": 0.14202791452407837, + "loss_ib": 0.0020272480323910713, + "step": 1189 + }, + { + "ce_ib": 4.275310039520264, + "ce_orig": 0.9987708330154419, + "epoch": 0.3419368754044144, + "kl_loss": 0.09483664482831955, + "loss_ib": 0.0013758974382653832, + "step": 1189 + }, + { + "ce_ib": 6.6289262771606445, + "ce_orig": 0.8859413862228394, + "epoch": 0.3419368754044144, + "kl_loss": 0.14262329041957855, + "loss_ib": 0.002089125569909811, + "step": 1189 + }, + { + "ce_ib": 7.901646614074707, + "ce_orig": 1.298345685005188, + "epoch": 0.3419368754044144, + "kl_loss": 0.1411547064781189, + "loss_ib": 0.0022017115261405706, + "step": 1189 + }, + { + "epoch": 0.34222445898339204, + "grad_norm": 0.09175170212984085, + "learning_rate": 4.9093754682109474e-05, + "loss": 0.8886, + "step": 1190 + }, + { + "ce_ib": 8.60566234588623, + "ce_orig": 1.3086930513381958, + "epoch": 0.34222445898339204, + "kl_loss": 0.12472963333129883, + "loss_ib": 0.0021078623831272125, + "step": 1190 + }, + { + "ce_ib": 5.091987133026123, + "ce_orig": 0.3752136528491974, + "epoch": 0.34222445898339204, + "kl_loss": 0.16557569801807404, + "loss_ib": 0.0021649557165801525, + "step": 1190 + }, + { + "ce_ib": 5.3551025390625, + "ce_orig": 0.884888231754303, + "epoch": 0.34222445898339204, + "kl_loss": 0.22206942737102509, + "loss_ib": 0.002756204456090927, + "step": 1190 + }, + { + "ce_ib": 5.993772029876709, + "ce_orig": 0.8438685536384583, + "epoch": 0.34222445898339204, + "kl_loss": 0.11380597203969955, + "loss_ib": 0.001737436861731112, + "step": 1190 + }, + { + "ce_ib": 4.851998329162598, + "ce_orig": 0.5593371987342834, + "epoch": 0.34251204256236967, + "kl_loss": 0.16617000102996826, + "loss_ib": 0.002146899700164795, + "step": 1191 + }, + { + "ce_ib": 5.660830974578857, + "ce_orig": 0.27298951148986816, + "epoch": 0.34251204256236967, + "kl_loss": 0.20350384712219238, + "loss_ib": 0.0026011215522885323, + "step": 1191 + }, + { + "ce_ib": 10.260374069213867, + "ce_orig": 1.6395344734191895, + "epoch": 0.34251204256236967, + "kl_loss": 0.17721888422966003, + "loss_ib": 0.002798226196318865, + "step": 1191 + }, + { + "ce_ib": 7.967227935791016, + "ce_orig": 0.7440553307533264, + "epoch": 0.34251204256236967, + "kl_loss": 0.14830437302589417, + "loss_ib": 0.002279766369611025, + "step": 1191 + }, + { + "ce_ib": 13.623431205749512, + "ce_orig": 0.7643979787826538, + "epoch": 0.34279962614134735, + "kl_loss": 0.3118378520011902, + "loss_ib": 0.004480721428990364, + "step": 1192 + }, + { + "ce_ib": 5.249303817749023, + "ce_orig": 0.8421902060508728, + "epoch": 0.34279962614134735, + "kl_loss": 0.08752257376909256, + "loss_ib": 0.001400155946612358, + "step": 1192 + }, + { + "ce_ib": 3.79552960395813, + "ce_orig": 0.5759400129318237, + "epoch": 0.34279962614134735, + "kl_loss": 0.11850893497467041, + "loss_ib": 0.0015646422980353236, + "step": 1192 + }, + { + "ce_ib": 6.008509635925293, + "ce_orig": 0.8309887647628784, + "epoch": 0.34279962614134735, + "kl_loss": 0.14160123467445374, + "loss_ib": 0.0020168630871921778, + "step": 1192 + }, + { + "ce_ib": 5.897069454193115, + "ce_orig": 0.6842920780181885, + "epoch": 0.34308720972032497, + "kl_loss": 0.15098875761032104, + "loss_ib": 0.00209959433414042, + "step": 1193 + }, + { + "ce_ib": 5.2511305809021, + "ce_orig": 0.5834655165672302, + "epoch": 0.34308720972032497, + "kl_loss": 0.08498527109622955, + "loss_ib": 0.0013749657664448023, + "step": 1193 + }, + { + "ce_ib": 3.6320414543151855, + "ce_orig": 0.3882826268672943, + "epoch": 0.34308720972032497, + "kl_loss": 0.13914722204208374, + "loss_ib": 0.0017546763410791755, + "step": 1193 + }, + { + "ce_ib": 7.605522632598877, + "ce_orig": 1.264225721359253, + "epoch": 0.34308720972032497, + "kl_loss": 0.1367885321378708, + "loss_ib": 0.0021284373942762613, + "step": 1193 + }, + { + "ce_ib": 7.945181846618652, + "ce_orig": 1.2413365840911865, + "epoch": 0.3433747932993026, + "kl_loss": 0.13250866532325745, + "loss_ib": 0.002119604730978608, + "step": 1194 + }, + { + "ce_ib": 5.88801383972168, + "ce_orig": 0.8613604307174683, + "epoch": 0.3433747932993026, + "kl_loss": 0.07940033078193665, + "loss_ib": 0.001382804592140019, + "step": 1194 + }, + { + "ce_ib": 8.324530601501465, + "ce_orig": 1.506277084350586, + "epoch": 0.3433747932993026, + "kl_loss": 0.1109517514705658, + "loss_ib": 0.0019419705495238304, + "step": 1194 + }, + { + "ce_ib": 6.082563400268555, + "ce_orig": 0.7858896255493164, + "epoch": 0.3433747932993026, + "kl_loss": 0.09026671200990677, + "loss_ib": 0.001510923495516181, + "step": 1194 + }, + { + "epoch": 0.3436623768782803, + "grad_norm": 0.09013213962316513, + "learning_rate": 4.908337244411927e-05, + "loss": 0.9225, + "step": 1195 + }, + { + "ce_ib": 5.888381004333496, + "ce_orig": 0.5251995921134949, + "epoch": 0.3436623768782803, + "kl_loss": 0.09415371716022491, + "loss_ib": 0.0015303750988095999, + "step": 1195 + }, + { + "ce_ib": 3.702908754348755, + "ce_orig": 0.6380143165588379, + "epoch": 0.3436623768782803, + "kl_loss": 0.09093981981277466, + "loss_ib": 0.0012796890223398805, + "step": 1195 + }, + { + "ce_ib": 9.148548126220703, + "ce_orig": 0.8597902059555054, + "epoch": 0.3436623768782803, + "kl_loss": 0.12271426618099213, + "loss_ib": 0.0021419974509626627, + "step": 1195 + }, + { + "ce_ib": 5.375368118286133, + "ce_orig": 0.5654981732368469, + "epoch": 0.3436623768782803, + "kl_loss": 0.09991110116243362, + "loss_ib": 0.0015366477891802788, + "step": 1195 + }, + { + "ce_ib": 5.33026647567749, + "ce_orig": 0.785590648651123, + "epoch": 0.3439499604572579, + "kl_loss": 0.10818203538656235, + "loss_ib": 0.0016148469876497984, + "step": 1196 + }, + { + "ce_ib": 6.909395694732666, + "ce_orig": 0.8492533564567566, + "epoch": 0.3439499604572579, + "kl_loss": 0.14566785097122192, + "loss_ib": 0.002147617982700467, + "step": 1196 + }, + { + "ce_ib": 6.664641380310059, + "ce_orig": 0.9143601059913635, + "epoch": 0.3439499604572579, + "kl_loss": 0.0853910744190216, + "loss_ib": 0.0015203747898340225, + "step": 1196 + }, + { + "ce_ib": 4.416428565979004, + "ce_orig": 0.30311673879623413, + "epoch": 0.3439499604572579, + "kl_loss": 0.09974046051502228, + "loss_ib": 0.0014390473952516913, + "step": 1196 + }, + { + "ce_ib": 4.570541858673096, + "ce_orig": 0.5030437111854553, + "epoch": 0.3442375440362355, + "kl_loss": 0.08337417244911194, + "loss_ib": 0.0012907959753647447, + "step": 1197 + }, + { + "ce_ib": 4.714263916015625, + "ce_orig": 0.5458657145500183, + "epoch": 0.3442375440362355, + "kl_loss": 0.07602685689926147, + "loss_ib": 0.0012316949432715774, + "step": 1197 + }, + { + "ce_ib": 5.780970096588135, + "ce_orig": 0.5303636789321899, + "epoch": 0.3442375440362355, + "kl_loss": 0.12981534004211426, + "loss_ib": 0.0018762502586469054, + "step": 1197 + }, + { + "ce_ib": 5.22108793258667, + "ce_orig": 0.7108772993087769, + "epoch": 0.3442375440362355, + "kl_loss": 0.08436115086078644, + "loss_ib": 0.0013657202944159508, + "step": 1197 + }, + { + "ce_ib": 8.01385498046875, + "ce_orig": 1.0963972806930542, + "epoch": 0.34452512761521314, + "kl_loss": 0.13373345136642456, + "loss_ib": 0.002138719893991947, + "step": 1198 + }, + { + "ce_ib": 6.439573287963867, + "ce_orig": 1.1356852054595947, + "epoch": 0.34452512761521314, + "kl_loss": 0.11461237072944641, + "loss_ib": 0.0017900809179991484, + "step": 1198 + }, + { + "ce_ib": 6.095149517059326, + "ce_orig": 0.6653095483779907, + "epoch": 0.34452512761521314, + "kl_loss": 0.1102496087551117, + "loss_ib": 0.0017120110569521785, + "step": 1198 + }, + { + "ce_ib": 8.758551597595215, + "ce_orig": 1.2035597562789917, + "epoch": 0.34452512761521314, + "kl_loss": 0.1343771070241928, + "loss_ib": 0.0022196262143552303, + "step": 1198 + }, + { + "ce_ib": 6.242119789123535, + "ce_orig": 0.6952025294303894, + "epoch": 0.3448127111941908, + "kl_loss": 0.09257640689611435, + "loss_ib": 0.0015499759465456009, + "step": 1199 + }, + { + "ce_ib": 6.205290794372559, + "ce_orig": 0.7195900678634644, + "epoch": 0.3448127111941908, + "kl_loss": 0.19770804047584534, + "loss_ib": 0.0025976093020290136, + "step": 1199 + }, + { + "ce_ib": 7.515991687774658, + "ce_orig": 0.9152024388313293, + "epoch": 0.3448127111941908, + "kl_loss": 0.0929107517004013, + "loss_ib": 0.001680706744082272, + "step": 1199 + }, + { + "ce_ib": 6.326588153839111, + "ce_orig": 0.9795824885368347, + "epoch": 0.3448127111941908, + "kl_loss": 0.13213500380516052, + "loss_ib": 0.00195400882512331, + "step": 1199 + }, + { + "epoch": 0.34510029477316845, + "grad_norm": 0.09391340613365173, + "learning_rate": 4.907293218369499e-05, + "loss": 0.8149, + "step": 1200 + }, + { + "ce_ib": 2.401818037033081, + "ce_orig": 0.34821420907974243, + "epoch": 0.34510029477316845, + "kl_loss": 0.2819208800792694, + "loss_ib": 0.003059390466660261, + "step": 1200 + }, + { + "ce_ib": 3.6997857093811035, + "ce_orig": 0.4937102794647217, + "epoch": 0.34510029477316845, + "kl_loss": 0.07171545922756195, + "loss_ib": 0.0010871330741792917, + "step": 1200 + }, + { + "ce_ib": 2.2247281074523926, + "ce_orig": 0.21438711881637573, + "epoch": 0.34510029477316845, + "kl_loss": 0.10463137924671173, + "loss_ib": 0.0012687866110354662, + "step": 1200 + }, + { + "ce_ib": 3.6803174018859863, + "ce_orig": 0.5729905366897583, + "epoch": 0.34510029477316845, + "kl_loss": 0.06283712387084961, + "loss_ib": 0.0009964029304683208, + "step": 1200 + }, + { + "ce_ib": 6.305063247680664, + "ce_orig": 0.8172087073326111, + "epoch": 0.34538787835214607, + "kl_loss": 0.13369494676589966, + "loss_ib": 0.0019674557261168957, + "step": 1201 + }, + { + "ce_ib": 5.51954984664917, + "ce_orig": 1.072908878326416, + "epoch": 0.34538787835214607, + "kl_loss": 0.07258590310811996, + "loss_ib": 0.001277813920751214, + "step": 1201 + }, + { + "ce_ib": 8.108406066894531, + "ce_orig": 1.3059308528900146, + "epoch": 0.34538787835214607, + "kl_loss": 0.1217356026172638, + "loss_ib": 0.002028196584433317, + "step": 1201 + }, + { + "ce_ib": 7.712138652801514, + "ce_orig": 0.9945723414421082, + "epoch": 0.34538787835214607, + "kl_loss": 0.08213340491056442, + "loss_ib": 0.0015925478655844927, + "step": 1201 + }, + { + "ce_ib": 8.866911888122559, + "ce_orig": 0.5718355774879456, + "epoch": 0.34567546193112375, + "kl_loss": 0.17121396958827972, + "loss_ib": 0.002598830731585622, + "step": 1202 + }, + { + "ce_ib": 6.687133312225342, + "ce_orig": 1.0171817541122437, + "epoch": 0.34567546193112375, + "kl_loss": 0.1398601233959198, + "loss_ib": 0.002067314460873604, + "step": 1202 + }, + { + "ce_ib": 5.847391128540039, + "ce_orig": 0.6118772625923157, + "epoch": 0.34567546193112375, + "kl_loss": 0.18488171696662903, + "loss_ib": 0.00243355636484921, + "step": 1202 + }, + { + "ce_ib": 5.880655765533447, + "ce_orig": 0.7155951261520386, + "epoch": 0.34567546193112375, + "kl_loss": 0.1624474674463272, + "loss_ib": 0.0022125402465462685, + "step": 1202 + }, + { + "ce_ib": 3.520111083984375, + "ce_orig": 0.8310386538505554, + "epoch": 0.3459630455101014, + "kl_loss": 0.08313309401273727, + "loss_ib": 0.0011833419557660818, + "step": 1203 + }, + { + "ce_ib": 3.7372233867645264, + "ce_orig": 0.7661170959472656, + "epoch": 0.3459630455101014, + "kl_loss": 0.07800716161727905, + "loss_ib": 0.0011537938844412565, + "step": 1203 + }, + { + "ce_ib": 9.536810874938965, + "ce_orig": 1.6414639949798584, + "epoch": 0.3459630455101014, + "kl_loss": 0.12650559842586517, + "loss_ib": 0.002218737034127116, + "step": 1203 + }, + { + "ce_ib": 8.819865226745605, + "ce_orig": 1.2084544897079468, + "epoch": 0.3459630455101014, + "kl_loss": 0.12717044353485107, + "loss_ib": 0.002153690904378891, + "step": 1203 + }, + { + "ce_ib": 6.483260631561279, + "ce_orig": 0.8722451329231262, + "epoch": 0.346250629089079, + "kl_loss": 0.12065816670656204, + "loss_ib": 0.0018549077212810516, + "step": 1204 + }, + { + "ce_ib": 8.621737480163574, + "ce_orig": 1.4052873849868774, + "epoch": 0.346250629089079, + "kl_loss": 0.1244899183511734, + "loss_ib": 0.0021070728544145823, + "step": 1204 + }, + { + "ce_ib": 7.135779857635498, + "ce_orig": 0.940719723701477, + "epoch": 0.346250629089079, + "kl_loss": 0.1017652228474617, + "loss_ib": 0.001731230178847909, + "step": 1204 + }, + { + "ce_ib": 5.0328803062438965, + "ce_orig": 0.23511050641536713, + "epoch": 0.346250629089079, + "kl_loss": 0.06364941596984863, + "loss_ib": 0.0011397822527214885, + "step": 1204 + }, + { + "epoch": 0.3465382126680567, + "grad_norm": 0.08434471487998962, + "learning_rate": 4.906243392598962e-05, + "loss": 0.8444, + "step": 1205 + }, + { + "ce_ib": 5.679015636444092, + "ce_orig": 0.5664239525794983, + "epoch": 0.3465382126680567, + "kl_loss": 0.0821281373500824, + "loss_ib": 0.0013891828712075949, + "step": 1205 + }, + { + "ce_ib": 9.240987777709961, + "ce_orig": 1.2426046133041382, + "epoch": 0.3465382126680567, + "kl_loss": 0.130295991897583, + "loss_ib": 0.002227058634161949, + "step": 1205 + }, + { + "ce_ib": 6.624649524688721, + "ce_orig": 1.208556056022644, + "epoch": 0.3465382126680567, + "kl_loss": 0.06771036982536316, + "loss_ib": 0.0013395686401054263, + "step": 1205 + }, + { + "ce_ib": 7.402683258056641, + "ce_orig": 0.836148202419281, + "epoch": 0.3465382126680567, + "kl_loss": 0.17013391852378845, + "loss_ib": 0.002441607415676117, + "step": 1205 + }, + { + "ce_ib": 9.6786527633667, + "ce_orig": 1.5588017702102661, + "epoch": 0.3468257962470343, + "kl_loss": 0.12188644707202911, + "loss_ib": 0.0021867298055440187, + "step": 1206 + }, + { + "ce_ib": 6.250092506408691, + "ce_orig": 0.7235032320022583, + "epoch": 0.3468257962470343, + "kl_loss": 0.10161735117435455, + "loss_ib": 0.0016411826945841312, + "step": 1206 + }, + { + "ce_ib": 5.281203746795654, + "ce_orig": 0.6332097053527832, + "epoch": 0.3468257962470343, + "kl_loss": 0.1234881728887558, + "loss_ib": 0.0017630021320655942, + "step": 1206 + }, + { + "ce_ib": 6.397225856781006, + "ce_orig": 0.7792978286743164, + "epoch": 0.3468257962470343, + "kl_loss": 0.12359906733036041, + "loss_ib": 0.001875713118351996, + "step": 1206 + }, + { + "ce_ib": 5.468921661376953, + "ce_orig": 0.6424912214279175, + "epoch": 0.3471133798260119, + "kl_loss": 0.1228179931640625, + "loss_ib": 0.0017750720726326108, + "step": 1207 + }, + { + "ce_ib": 5.05369758605957, + "ce_orig": 0.4094921946525574, + "epoch": 0.3471133798260119, + "kl_loss": 0.10941329598426819, + "loss_ib": 0.0015995026333257556, + "step": 1207 + }, + { + "ce_ib": 7.615618705749512, + "ce_orig": 0.823706865310669, + "epoch": 0.3471133798260119, + "kl_loss": 0.11898506432771683, + "loss_ib": 0.0019514125306159258, + "step": 1207 + }, + { + "ce_ib": 5.8422112464904785, + "ce_orig": 0.4948480427265167, + "epoch": 0.3471133798260119, + "kl_loss": 0.0846252590417862, + "loss_ib": 0.0014304736396297812, + "step": 1207 + }, + { + "ce_ib": 8.193414688110352, + "ce_orig": 0.8717488646507263, + "epoch": 0.34740096340498955, + "kl_loss": 0.10458209365606308, + "loss_ib": 0.0018651623977348208, + "step": 1208 + }, + { + "ce_ib": 6.99772834777832, + "ce_orig": 0.9259209632873535, + "epoch": 0.34740096340498955, + "kl_loss": 0.12178865075111389, + "loss_ib": 0.001917659305036068, + "step": 1208 + }, + { + "ce_ib": 7.931736946105957, + "ce_orig": 1.271628975868225, + "epoch": 0.34740096340498955, + "kl_loss": 0.08694090694189072, + "loss_ib": 0.0016625827411189675, + "step": 1208 + }, + { + "ce_ib": 5.559422969818115, + "ce_orig": 0.6151393055915833, + "epoch": 0.34740096340498955, + "kl_loss": 0.07467767596244812, + "loss_ib": 0.0013027191162109375, + "step": 1208 + }, + { + "ce_ib": 3.7163591384887695, + "ce_orig": 0.48353731632232666, + "epoch": 0.34768854698396723, + "kl_loss": 0.11771485209465027, + "loss_ib": 0.0015487843193113804, + "step": 1209 + }, + { + "ce_ib": 8.291979789733887, + "ce_orig": 1.2653006315231323, + "epoch": 0.34768854698396723, + "kl_loss": 0.12108492106199265, + "loss_ib": 0.002040047198534012, + "step": 1209 + }, + { + "ce_ib": 7.506729602813721, + "ce_orig": 0.45977285504341125, + "epoch": 0.34768854698396723, + "kl_loss": 0.10264462232589722, + "loss_ib": 0.0017771191196516156, + "step": 1209 + }, + { + "ce_ib": 8.623528480529785, + "ce_orig": 1.3400567770004272, + "epoch": 0.34768854698396723, + "kl_loss": 0.13698822259902954, + "loss_ib": 0.002232234925031662, + "step": 1209 + }, + { + "epoch": 0.34797613056294485, + "grad_norm": 0.0997847318649292, + "learning_rate": 4.905187769629592e-05, + "loss": 0.8703, + "step": 1210 + }, + { + "ce_ib": 6.054376125335693, + "ce_orig": 0.675933301448822, + "epoch": 0.34797613056294485, + "kl_loss": 0.1163405105471611, + "loss_ib": 0.0017688425723463297, + "step": 1210 + }, + { + "ce_ib": 5.936535835266113, + "ce_orig": 0.8529510498046875, + "epoch": 0.34797613056294485, + "kl_loss": 0.08462969213724136, + "loss_ib": 0.0014399504289031029, + "step": 1210 + }, + { + "ce_ib": 5.637547492980957, + "ce_orig": 0.47358766198158264, + "epoch": 0.34797613056294485, + "kl_loss": 0.12910223007202148, + "loss_ib": 0.00185477698687464, + "step": 1210 + }, + { + "ce_ib": 7.844357490539551, + "ce_orig": 1.2808501720428467, + "epoch": 0.34797613056294485, + "kl_loss": 0.09163414686918259, + "loss_ib": 0.001700777094811201, + "step": 1210 + }, + { + "ce_ib": 4.482351779937744, + "ce_orig": 0.7072798013687134, + "epoch": 0.3482637141419225, + "kl_loss": 0.08652029931545258, + "loss_ib": 0.0013134380569681525, + "step": 1211 + }, + { + "ce_ib": 8.130898475646973, + "ce_orig": 1.357262372970581, + "epoch": 0.3482637141419225, + "kl_loss": 0.1302678883075714, + "loss_ib": 0.0021157688461244106, + "step": 1211 + }, + { + "ce_ib": 5.954352378845215, + "ce_orig": 0.5915926694869995, + "epoch": 0.3482637141419225, + "kl_loss": 0.19226759672164917, + "loss_ib": 0.002518111141398549, + "step": 1211 + }, + { + "ce_ib": 6.65979528427124, + "ce_orig": 0.9146237969398499, + "epoch": 0.3482637141419225, + "kl_loss": 0.15383297204971313, + "loss_ib": 0.002204309217631817, + "step": 1211 + }, + { + "ce_ib": 4.272495269775391, + "ce_orig": 0.6837877035140991, + "epoch": 0.34855129772090016, + "kl_loss": 0.0908074602484703, + "loss_ib": 0.0013353240210562944, + "step": 1212 + }, + { + "ce_ib": 4.43595552444458, + "ce_orig": 0.7194166779518127, + "epoch": 0.34855129772090016, + "kl_loss": 0.11665447056293488, + "loss_ib": 0.0016101401997730136, + "step": 1212 + }, + { + "ce_ib": 4.019759654998779, + "ce_orig": 0.540570080280304, + "epoch": 0.34855129772090016, + "kl_loss": 0.08123409003019333, + "loss_ib": 0.001214316813275218, + "step": 1212 + }, + { + "ce_ib": 7.188493728637695, + "ce_orig": 1.0330560207366943, + "epoch": 0.34855129772090016, + "kl_loss": 0.07747453451156616, + "loss_ib": 0.0014935946092009544, + "step": 1212 + }, + { + "ce_ib": 4.260969638824463, + "ce_orig": 0.3584219813346863, + "epoch": 0.3488388812998778, + "kl_loss": 0.08526185154914856, + "loss_ib": 0.0012787154410034418, + "step": 1213 + }, + { + "ce_ib": 4.712890148162842, + "ce_orig": 0.8325070142745972, + "epoch": 0.3488388812998778, + "kl_loss": 0.07010379433631897, + "loss_ib": 0.001172326970845461, + "step": 1213 + }, + { + "ce_ib": 6.134801387786865, + "ce_orig": 0.7156159281730652, + "epoch": 0.3488388812998778, + "kl_loss": 0.08078397810459137, + "loss_ib": 0.0014213197864592075, + "step": 1213 + }, + { + "ce_ib": 7.529421329498291, + "ce_orig": 0.9373047947883606, + "epoch": 0.3488388812998778, + "kl_loss": 0.16475126147270203, + "loss_ib": 0.0024004545994102955, + "step": 1213 + }, + { + "ce_ib": 5.597670078277588, + "ce_orig": 0.8782711625099182, + "epoch": 0.3491264648788554, + "kl_loss": 0.0708613395690918, + "loss_ib": 0.001268380437977612, + "step": 1214 + }, + { + "ce_ib": 7.250506401062012, + "ce_orig": 0.8862557411193848, + "epoch": 0.3491264648788554, + "kl_loss": 0.1195152997970581, + "loss_ib": 0.0019202035618945956, + "step": 1214 + }, + { + "ce_ib": 6.910298824310303, + "ce_orig": 1.1094839572906494, + "epoch": 0.3491264648788554, + "kl_loss": 0.11096344888210297, + "loss_ib": 0.0018006643513217568, + "step": 1214 + }, + { + "ce_ib": 7.909558296203613, + "ce_orig": 1.0560638904571533, + "epoch": 0.3491264648788554, + "kl_loss": 0.10399037599563599, + "loss_ib": 0.0018308594590052962, + "step": 1214 + }, + { + "epoch": 0.3494140484578331, + "grad_norm": 0.0876988098025322, + "learning_rate": 4.9041263520046286e-05, + "loss": 0.8914, + "step": 1215 + }, + { + "ce_ib": 7.24706506729126, + "ce_orig": 1.0320097208023071, + "epoch": 0.3494140484578331, + "kl_loss": 0.10548631846904755, + "loss_ib": 0.0017795696621760726, + "step": 1215 + }, + { + "ce_ib": 5.6692304611206055, + "ce_orig": 0.7191046476364136, + "epoch": 0.3494140484578331, + "kl_loss": 0.1607440710067749, + "loss_ib": 0.0021743637043982744, + "step": 1215 + }, + { + "ce_ib": 4.234683513641357, + "ce_orig": 0.5094934701919556, + "epoch": 0.3494140484578331, + "kl_loss": 0.09779174625873566, + "loss_ib": 0.0014013857580721378, + "step": 1215 + }, + { + "ce_ib": 5.873825550079346, + "ce_orig": 0.7927730679512024, + "epoch": 0.3494140484578331, + "kl_loss": 0.2151471972465515, + "loss_ib": 0.0027388546150177717, + "step": 1215 + }, + { + "ce_ib": 4.758713245391846, + "ce_orig": 0.526610255241394, + "epoch": 0.3497016320368107, + "kl_loss": 0.09265977144241333, + "loss_ib": 0.0014024690026417375, + "step": 1216 + }, + { + "ce_ib": 6.536386489868164, + "ce_orig": 0.8642744421958923, + "epoch": 0.3497016320368107, + "kl_loss": 0.12214571237564087, + "loss_ib": 0.0018750956514850259, + "step": 1216 + }, + { + "ce_ib": 6.256294250488281, + "ce_orig": 0.831453800201416, + "epoch": 0.3497016320368107, + "kl_loss": 0.2257319986820221, + "loss_ib": 0.002882949309423566, + "step": 1216 + }, + { + "ce_ib": 5.987385272979736, + "ce_orig": 0.956009566783905, + "epoch": 0.3497016320368107, + "kl_loss": 0.09195905178785324, + "loss_ib": 0.0015183290233835578, + "step": 1216 + }, + { + "ce_ib": 3.4908852577209473, + "ce_orig": 0.4334961473941803, + "epoch": 0.34998921561578833, + "kl_loss": 0.07544069737195969, + "loss_ib": 0.001103495480492711, + "step": 1217 + }, + { + "ce_ib": 5.128939628601074, + "ce_orig": 0.9095044136047363, + "epoch": 0.34998921561578833, + "kl_loss": 0.10558851063251495, + "loss_ib": 0.001568779000081122, + "step": 1217 + }, + { + "ce_ib": 3.9634666442871094, + "ce_orig": 0.8367429971694946, + "epoch": 0.34998921561578833, + "kl_loss": 0.05537325143814087, + "loss_ib": 0.0009500791784375906, + "step": 1217 + }, + { + "ce_ib": 3.037557601928711, + "ce_orig": 0.17842040956020355, + "epoch": 0.34998921561578833, + "kl_loss": 0.1399865448474884, + "loss_ib": 0.0017036211211234331, + "step": 1217 + }, + { + "ce_ib": 5.171171188354492, + "ce_orig": 0.7285898923873901, + "epoch": 0.35027679919476595, + "kl_loss": 0.07729800045490265, + "loss_ib": 0.0012900970177724957, + "step": 1218 + }, + { + "ce_ib": 7.920377254486084, + "ce_orig": 1.0457279682159424, + "epoch": 0.35027679919476595, + "kl_loss": 0.13923516869544983, + "loss_ib": 0.00218438939191401, + "step": 1218 + }, + { + "ce_ib": 2.397667646408081, + "ce_orig": 0.20700818300247192, + "epoch": 0.35027679919476595, + "kl_loss": 0.27611932158470154, + "loss_ib": 0.003000959986820817, + "step": 1218 + }, + { + "ce_ib": 6.343082904815674, + "ce_orig": 0.9448485374450684, + "epoch": 0.35027679919476595, + "kl_loss": 0.1542467325925827, + "loss_ib": 0.002176775597035885, + "step": 1218 + }, + { + "ce_ib": 5.304510593414307, + "ce_orig": 0.5575739741325378, + "epoch": 0.35056438277374363, + "kl_loss": 0.17677997052669525, + "loss_ib": 0.0022982507944107056, + "step": 1219 + }, + { + "ce_ib": 11.7191801071167, + "ce_orig": 1.7555348873138428, + "epoch": 0.35056438277374363, + "kl_loss": 0.1374063789844513, + "loss_ib": 0.0025459816679358482, + "step": 1219 + }, + { + "ce_ib": 6.514389991760254, + "ce_orig": 1.0503309965133667, + "epoch": 0.35056438277374363, + "kl_loss": 0.09809248894453049, + "loss_ib": 0.0016323637682944536, + "step": 1219 + }, + { + "ce_ib": 7.660717487335205, + "ce_orig": 1.3491915464401245, + "epoch": 0.35056438277374363, + "kl_loss": 0.11068889498710632, + "loss_ib": 0.00187296059448272, + "step": 1219 + }, + { + "epoch": 0.35085196635272126, + "grad_norm": 0.09520356357097626, + "learning_rate": 4.903059142281273e-05, + "loss": 0.8888, + "step": 1220 + }, + { + "ce_ib": 5.612930774688721, + "ce_orig": 1.0689162015914917, + "epoch": 0.35085196635272126, + "kl_loss": 0.09737086296081543, + "loss_ib": 0.001535001676529646, + "step": 1220 + }, + { + "ce_ib": 7.617819786071777, + "ce_orig": 0.7557920813560486, + "epoch": 0.35085196635272126, + "kl_loss": 0.10410600900650024, + "loss_ib": 0.0018028420163318515, + "step": 1220 + }, + { + "ce_ib": 5.905435085296631, + "ce_orig": 0.7370005249977112, + "epoch": 0.35085196635272126, + "kl_loss": 0.14456571638584137, + "loss_ib": 0.0020362006034702063, + "step": 1220 + }, + { + "ce_ib": 4.493112087249756, + "ce_orig": 0.5472113490104675, + "epoch": 0.35085196635272126, + "kl_loss": 0.40910571813583374, + "loss_ib": 0.0045403684489429, + "step": 1220 + }, + { + "ce_ib": 4.242155075073242, + "ce_orig": 0.4515751004219055, + "epoch": 0.3511395499316989, + "kl_loss": 0.0820990800857544, + "loss_ib": 0.0012452062219381332, + "step": 1221 + }, + { + "ce_ib": 8.886751174926758, + "ce_orig": 1.421873927116394, + "epoch": 0.3511395499316989, + "kl_loss": 0.11533261835575104, + "loss_ib": 0.002042001113295555, + "step": 1221 + }, + { + "ce_ib": 6.462634563446045, + "ce_orig": 0.9847761392593384, + "epoch": 0.3511395499316989, + "kl_loss": 0.2458612620830536, + "loss_ib": 0.0031048760283738375, + "step": 1221 + }, + { + "ce_ib": 6.834673881530762, + "ce_orig": 0.8003082871437073, + "epoch": 0.3511395499316989, + "kl_loss": 0.1408001333475113, + "loss_ib": 0.0020914687775075436, + "step": 1221 + }, + { + "ce_ib": 4.3196868896484375, + "ce_orig": 0.7693087458610535, + "epoch": 0.35142713351067656, + "kl_loss": 0.05976279079914093, + "loss_ib": 0.0010295965475961566, + "step": 1222 + }, + { + "ce_ib": 9.042092323303223, + "ce_orig": 1.5040372610092163, + "epoch": 0.35142713351067656, + "kl_loss": 0.1564899981021881, + "loss_ib": 0.0024691091384738684, + "step": 1222 + }, + { + "ce_ib": 5.826852798461914, + "ce_orig": 0.7353617548942566, + "epoch": 0.35142713351067656, + "kl_loss": 0.1619482934474945, + "loss_ib": 0.002202168107032776, + "step": 1222 + }, + { + "ce_ib": 5.496313571929932, + "ce_orig": 0.5784755349159241, + "epoch": 0.35142713351067656, + "kl_loss": 0.15220040082931519, + "loss_ib": 0.0020716353319585323, + "step": 1222 + }, + { + "ce_ib": 9.001765251159668, + "ce_orig": 1.6193560361862183, + "epoch": 0.3517147170896542, + "kl_loss": 0.12370666861534119, + "loss_ib": 0.0021372430492192507, + "step": 1223 + }, + { + "ce_ib": 7.717227935791016, + "ce_orig": 1.40751314163208, + "epoch": 0.3517147170896542, + "kl_loss": 0.12144547700881958, + "loss_ib": 0.0019861774053424597, + "step": 1223 + }, + { + "ce_ib": 3.555147409439087, + "ce_orig": 0.661032497882843, + "epoch": 0.3517147170896542, + "kl_loss": 0.09536592662334442, + "loss_ib": 0.0013091739965602756, + "step": 1223 + }, + { + "ce_ib": 6.92410945892334, + "ce_orig": 0.7192381024360657, + "epoch": 0.3517147170896542, + "kl_loss": 0.1294439435005188, + "loss_ib": 0.0019868502859026194, + "step": 1223 + }, + { + "ce_ib": 7.191586494445801, + "ce_orig": 1.0977661609649658, + "epoch": 0.3520023006686318, + "kl_loss": 0.11238360404968262, + "loss_ib": 0.001842994592152536, + "step": 1224 + }, + { + "ce_ib": 4.655055046081543, + "ce_orig": 0.49468541145324707, + "epoch": 0.3520023006686318, + "kl_loss": 0.06912495940923691, + "loss_ib": 0.0011567550245672464, + "step": 1224 + }, + { + "ce_ib": 7.1955413818359375, + "ce_orig": 0.7798792719841003, + "epoch": 0.3520023006686318, + "kl_loss": 0.13946586847305298, + "loss_ib": 0.002114212838932872, + "step": 1224 + }, + { + "ce_ib": 6.654889106750488, + "ce_orig": 0.7581393718719482, + "epoch": 0.3520023006686318, + "kl_loss": 0.1490647792816162, + "loss_ib": 0.0021561365574598312, + "step": 1224 + }, + { + "epoch": 0.3522898842476095, + "grad_norm": 0.09010814130306244, + "learning_rate": 4.9019861430306826e-05, + "loss": 0.8805, + "step": 1225 + }, + { + "ce_ib": 7.411494255065918, + "ce_orig": 1.2153384685516357, + "epoch": 0.3522898842476095, + "kl_loss": 0.15417200326919556, + "loss_ib": 0.0022828695364296436, + "step": 1225 + }, + { + "ce_ib": 8.146852493286133, + "ce_orig": 1.3873924016952515, + "epoch": 0.3522898842476095, + "kl_loss": 0.43568554520606995, + "loss_ib": 0.005171540658921003, + "step": 1225 + }, + { + "ce_ib": 7.1912007331848145, + "ce_orig": 1.285669207572937, + "epoch": 0.3522898842476095, + "kl_loss": 0.10966229438781738, + "loss_ib": 0.0018157429294660687, + "step": 1225 + }, + { + "ce_ib": 8.135693550109863, + "ce_orig": 1.074797511100769, + "epoch": 0.3522898842476095, + "kl_loss": 0.09935620427131653, + "loss_ib": 0.0018071314552798867, + "step": 1225 + }, + { + "ce_ib": 4.371519088745117, + "ce_orig": 0.5803431868553162, + "epoch": 0.3525774678265871, + "kl_loss": 0.077149398624897, + "loss_ib": 0.0012086458737030625, + "step": 1226 + }, + { + "ce_ib": 5.3036208152771, + "ce_orig": 0.7989436388015747, + "epoch": 0.3525774678265871, + "kl_loss": 0.06289440393447876, + "loss_ib": 0.00115930603351444, + "step": 1226 + }, + { + "ce_ib": 6.5967912673950195, + "ce_orig": 0.8013715744018555, + "epoch": 0.3525774678265871, + "kl_loss": 0.12518391013145447, + "loss_ib": 0.0019115182803943753, + "step": 1226 + }, + { + "ce_ib": 8.87879467010498, + "ce_orig": 1.0425429344177246, + "epoch": 0.3525774678265871, + "kl_loss": 0.1504276543855667, + "loss_ib": 0.0023921558167785406, + "step": 1226 + }, + { + "ce_ib": 4.603278160095215, + "ce_orig": 0.5779634118080139, + "epoch": 0.35286505140556473, + "kl_loss": 0.11667022109031677, + "loss_ib": 0.0016270300839096308, + "step": 1227 + }, + { + "ce_ib": 7.099244594573975, + "ce_orig": 1.0896610021591187, + "epoch": 0.35286505140556473, + "kl_loss": 0.0980406403541565, + "loss_ib": 0.0016903307987377048, + "step": 1227 + }, + { + "ce_ib": 6.287186145782471, + "ce_orig": 0.5730931758880615, + "epoch": 0.35286505140556473, + "kl_loss": 0.1500208079814911, + "loss_ib": 0.002128926571458578, + "step": 1227 + }, + { + "ce_ib": 8.6245756149292, + "ce_orig": 1.530518651008606, + "epoch": 0.35286505140556473, + "kl_loss": 0.141280397772789, + "loss_ib": 0.00227526156231761, + "step": 1227 + }, + { + "ce_ib": 3.7510311603546143, + "ce_orig": 0.5379764437675476, + "epoch": 0.35315263498454236, + "kl_loss": 0.09724898636341095, + "loss_ib": 0.0013475929154083133, + "step": 1228 + }, + { + "ce_ib": 8.606772422790527, + "ce_orig": 1.2441927194595337, + "epoch": 0.35315263498454236, + "kl_loss": 0.12222611159086227, + "loss_ib": 0.0020829380955547094, + "step": 1228 + }, + { + "ce_ib": 6.924900054931641, + "ce_orig": 1.4320727586746216, + "epoch": 0.35315263498454236, + "kl_loss": 0.08893856406211853, + "loss_ib": 0.0015818756073713303, + "step": 1228 + }, + { + "ce_ib": 11.847168922424316, + "ce_orig": 2.392171859741211, + "epoch": 0.35315263498454236, + "kl_loss": 0.08733272552490234, + "loss_ib": 0.0020580440759658813, + "step": 1228 + }, + { + "ce_ib": 6.087507724761963, + "ce_orig": 0.901356041431427, + "epoch": 0.35344021856352004, + "kl_loss": 0.1012146919965744, + "loss_ib": 0.0016208975575864315, + "step": 1229 + }, + { + "ce_ib": 3.3782029151916504, + "ce_orig": 0.6678668260574341, + "epoch": 0.35344021856352004, + "kl_loss": 0.0639922171831131, + "loss_ib": 0.000977742369286716, + "step": 1229 + }, + { + "ce_ib": 8.152597427368164, + "ce_orig": 1.268547534942627, + "epoch": 0.35344021856352004, + "kl_loss": 0.11389666050672531, + "loss_ib": 0.001954226288944483, + "step": 1229 + }, + { + "ce_ib": 7.217771053314209, + "ce_orig": 1.30826997756958, + "epoch": 0.35344021856352004, + "kl_loss": 0.09987150877714157, + "loss_ib": 0.0017204922623932362, + "step": 1229 + }, + { + "epoch": 0.35372780214249766, + "grad_norm": 0.13646991550922394, + "learning_rate": 4.900907356837961e-05, + "loss": 0.8732, + "step": 1230 + }, + { + "ce_ib": 8.69139575958252, + "ce_orig": 1.6489487886428833, + "epoch": 0.35372780214249766, + "kl_loss": 0.09994374215602875, + "loss_ib": 0.0018685769755393267, + "step": 1230 + }, + { + "ce_ib": 6.952277660369873, + "ce_orig": 1.0789859294891357, + "epoch": 0.35372780214249766, + "kl_loss": 0.08018951863050461, + "loss_ib": 0.0014971229247748852, + "step": 1230 + }, + { + "ce_ib": 6.629817485809326, + "ce_orig": 0.8269151449203491, + "epoch": 0.35372780214249766, + "kl_loss": 0.07821352034807205, + "loss_ib": 0.0014451169408857822, + "step": 1230 + }, + { + "ce_ib": 10.143780708312988, + "ce_orig": 1.7797720432281494, + "epoch": 0.35372780214249766, + "kl_loss": 0.1310126781463623, + "loss_ib": 0.002324504777789116, + "step": 1230 + }, + { + "ce_ib": 6.090595722198486, + "ce_orig": 1.0558695793151855, + "epoch": 0.3540153857214753, + "kl_loss": 0.10666792094707489, + "loss_ib": 0.0016757386038079858, + "step": 1231 + }, + { + "ce_ib": 6.059688091278076, + "ce_orig": 0.46506354212760925, + "epoch": 0.3540153857214753, + "kl_loss": 0.1932515949010849, + "loss_ib": 0.0025384845212101936, + "step": 1231 + }, + { + "ce_ib": 7.274968147277832, + "ce_orig": 0.6159886717796326, + "epoch": 0.3540153857214753, + "kl_loss": 0.08063336461782455, + "loss_ib": 0.0015338304219767451, + "step": 1231 + }, + { + "ce_ib": 6.708252906799316, + "ce_orig": 1.0086669921875, + "epoch": 0.3540153857214753, + "kl_loss": 0.07794315367937088, + "loss_ib": 0.0014502566773444414, + "step": 1231 + }, + { + "ce_ib": 3.0718331336975098, + "ce_orig": 0.4599364697933197, + "epoch": 0.35430296930045296, + "kl_loss": 0.08010546863079071, + "loss_ib": 0.001108237891457975, + "step": 1232 + }, + { + "ce_ib": 8.709245681762695, + "ce_orig": 1.4117929935455322, + "epoch": 0.35430296930045296, + "kl_loss": 0.09973221272230148, + "loss_ib": 0.0018682465888559818, + "step": 1232 + }, + { + "ce_ib": 6.9291229248046875, + "ce_orig": 0.7602047920227051, + "epoch": 0.35430296930045296, + "kl_loss": 0.1128532811999321, + "loss_ib": 0.0018214450683444738, + "step": 1232 + }, + { + "ce_ib": 9.679916381835938, + "ce_orig": 1.7955446243286133, + "epoch": 0.35430296930045296, + "kl_loss": 0.1334252804517746, + "loss_ib": 0.0023022443056106567, + "step": 1232 + }, + { + "ce_ib": 3.906406879425049, + "ce_orig": 0.3601726293563843, + "epoch": 0.3545905528794306, + "kl_loss": 0.15083280205726624, + "loss_ib": 0.00189896859228611, + "step": 1233 + }, + { + "ce_ib": 5.880161285400391, + "ce_orig": 0.9220426678657532, + "epoch": 0.3545905528794306, + "kl_loss": 0.12105061113834381, + "loss_ib": 0.0017985220765694976, + "step": 1233 + }, + { + "ce_ib": 5.015464782714844, + "ce_orig": 0.5734464526176453, + "epoch": 0.3545905528794306, + "kl_loss": 0.1499069184064865, + "loss_ib": 0.00200061546638608, + "step": 1233 + }, + { + "ce_ib": 8.755268096923828, + "ce_orig": 0.9812235832214355, + "epoch": 0.3545905528794306, + "kl_loss": 0.10746465623378754, + "loss_ib": 0.001950173289515078, + "step": 1233 + }, + { + "ce_ib": 5.069394111633301, + "ce_orig": 0.8159754276275635, + "epoch": 0.3548781364584082, + "kl_loss": 0.07964402437210083, + "loss_ib": 0.0013033796567469835, + "step": 1234 + }, + { + "ce_ib": 7.249039173126221, + "ce_orig": 0.8200325965881348, + "epoch": 0.3548781364584082, + "kl_loss": 0.13815820217132568, + "loss_ib": 0.002106485888361931, + "step": 1234 + }, + { + "ce_ib": 5.429012298583984, + "ce_orig": 0.533208966255188, + "epoch": 0.3548781364584082, + "kl_loss": 0.11810462921857834, + "loss_ib": 0.0017239474691450596, + "step": 1234 + }, + { + "ce_ib": 5.718509674072266, + "ce_orig": 0.4275233745574951, + "epoch": 0.3548781364584082, + "kl_loss": 0.2195826917886734, + "loss_ib": 0.0027676778845489025, + "step": 1234 + }, + { + "epoch": 0.3551657200373859, + "grad_norm": 0.11201301217079163, + "learning_rate": 4.899822786302154e-05, + "loss": 0.8977, + "step": 1235 + }, + { + "ce_ib": 8.761982917785645, + "ce_orig": 1.4217331409454346, + "epoch": 0.3551657200373859, + "kl_loss": 0.15910804271697998, + "loss_ib": 0.0024672786239534616, + "step": 1235 + }, + { + "ce_ib": 7.589676380157471, + "ce_orig": 0.7130719423294067, + "epoch": 0.3551657200373859, + "kl_loss": 0.10391992330551147, + "loss_ib": 0.0017981668934226036, + "step": 1235 + }, + { + "ce_ib": 7.034261703491211, + "ce_orig": 1.073976993560791, + "epoch": 0.3551657200373859, + "kl_loss": 0.10549747198820114, + "loss_ib": 0.0017584008164703846, + "step": 1235 + }, + { + "ce_ib": 6.313925743103027, + "ce_orig": 0.8495429754257202, + "epoch": 0.3551657200373859, + "kl_loss": 0.13002285361289978, + "loss_ib": 0.001931621110998094, + "step": 1235 + }, + { + "ce_ib": 9.567757606506348, + "ce_orig": 1.6718113422393799, + "epoch": 0.3554533036163635, + "kl_loss": 0.12206310778856277, + "loss_ib": 0.0021774068009108305, + "step": 1236 + }, + { + "ce_ib": 6.441582202911377, + "ce_orig": 0.9020083546638489, + "epoch": 0.3554533036163635, + "kl_loss": 0.1499362736940384, + "loss_ib": 0.00214352086186409, + "step": 1236 + }, + { + "ce_ib": 7.211685657501221, + "ce_orig": 0.8299764394760132, + "epoch": 0.3554533036163635, + "kl_loss": 0.11877041310071945, + "loss_ib": 0.0019088726257905364, + "step": 1236 + }, + { + "ce_ib": 6.965940475463867, + "ce_orig": 1.1821565628051758, + "epoch": 0.3554533036163635, + "kl_loss": 0.25949639081954956, + "loss_ib": 0.0032915580086410046, + "step": 1236 + }, + { + "ce_ib": 6.2936930656433105, + "ce_orig": 1.240807294845581, + "epoch": 0.35574088719534114, + "kl_loss": 0.11591099947690964, + "loss_ib": 0.0017884793924167752, + "step": 1237 + }, + { + "ce_ib": 7.929975986480713, + "ce_orig": 0.9705590605735779, + "epoch": 0.35574088719534114, + "kl_loss": 0.09377407282590866, + "loss_ib": 0.0017307382076978683, + "step": 1237 + }, + { + "ce_ib": 7.808045864105225, + "ce_orig": 0.6784370541572571, + "epoch": 0.35574088719534114, + "kl_loss": 0.1167113184928894, + "loss_ib": 0.0019479177426546812, + "step": 1237 + }, + { + "ce_ib": 5.054869651794434, + "ce_orig": 0.3603076636791229, + "epoch": 0.35574088719534114, + "kl_loss": 0.10277507454156876, + "loss_ib": 0.0015332376351580024, + "step": 1237 + }, + { + "ce_ib": 5.274099826812744, + "ce_orig": 0.7418103218078613, + "epoch": 0.35602847077431876, + "kl_loss": 0.15691673755645752, + "loss_ib": 0.002096577314659953, + "step": 1238 + }, + { + "ce_ib": 7.625789165496826, + "ce_orig": 1.1013562679290771, + "epoch": 0.35602847077431876, + "kl_loss": 0.1414494514465332, + "loss_ib": 0.002177073387429118, + "step": 1238 + }, + { + "ce_ib": 7.515286445617676, + "ce_orig": 0.9005677700042725, + "epoch": 0.35602847077431876, + "kl_loss": 0.11921834945678711, + "loss_ib": 0.0019437120063230395, + "step": 1238 + }, + { + "ce_ib": 4.873547554016113, + "ce_orig": 0.8680614829063416, + "epoch": 0.35602847077431876, + "kl_loss": 0.09163566678762436, + "loss_ib": 0.0014037113869562745, + "step": 1238 + }, + { + "ce_ib": 5.0864763259887695, + "ce_orig": 0.6006460189819336, + "epoch": 0.35631605435329644, + "kl_loss": 0.12252326309680939, + "loss_ib": 0.0017338802572339773, + "step": 1239 + }, + { + "ce_ib": 6.825660705566406, + "ce_orig": 1.0597938299179077, + "epoch": 0.35631605435329644, + "kl_loss": 0.12392257153987885, + "loss_ib": 0.0019217916997149587, + "step": 1239 + }, + { + "ce_ib": 6.830639839172363, + "ce_orig": 0.7523980140686035, + "epoch": 0.35631605435329644, + "kl_loss": 0.18000862002372742, + "loss_ib": 0.002483149990439415, + "step": 1239 + }, + { + "ce_ib": 7.899068355560303, + "ce_orig": 0.7786166071891785, + "epoch": 0.35631605435329644, + "kl_loss": 0.1359153687953949, + "loss_ib": 0.002149060368537903, + "step": 1239 + }, + { + "epoch": 0.35660363793227406, + "grad_norm": 0.11122456192970276, + "learning_rate": 4.898732434036244e-05, + "loss": 0.8958, + "step": 1240 + }, + { + "ce_ib": 6.576417446136475, + "ce_orig": 1.028070330619812, + "epoch": 0.35660363793227406, + "kl_loss": 0.08930405229330063, + "loss_ib": 0.001550682121887803, + "step": 1240 + }, + { + "ce_ib": 4.134368419647217, + "ce_orig": 0.7497093677520752, + "epoch": 0.35660363793227406, + "kl_loss": 0.07175867259502411, + "loss_ib": 0.0011310235131531954, + "step": 1240 + }, + { + "ce_ib": 5.499692440032959, + "ce_orig": 0.8722239136695862, + "epoch": 0.35660363793227406, + "kl_loss": 0.24425196647644043, + "loss_ib": 0.0029924886766821146, + "step": 1240 + }, + { + "ce_ib": 5.032848358154297, + "ce_orig": 0.7244812250137329, + "epoch": 0.35660363793227406, + "kl_loss": 0.11990936845541, + "loss_ib": 0.0017023785039782524, + "step": 1240 + }, + { + "ce_ib": 5.378225803375244, + "ce_orig": 0.744158148765564, + "epoch": 0.3568912215112517, + "kl_loss": 0.09403784573078156, + "loss_ib": 0.0014782010111957788, + "step": 1241 + }, + { + "ce_ib": 8.463665008544922, + "ce_orig": 1.0507760047912598, + "epoch": 0.3568912215112517, + "kl_loss": 0.16647399961948395, + "loss_ib": 0.0025111064314842224, + "step": 1241 + }, + { + "ce_ib": 4.823368549346924, + "ce_orig": 0.5231988430023193, + "epoch": 0.3568912215112517, + "kl_loss": 0.11122848093509674, + "loss_ib": 0.0015946216881275177, + "step": 1241 + }, + { + "ce_ib": 4.179252624511719, + "ce_orig": 0.29963740706443787, + "epoch": 0.3568912215112517, + "kl_loss": 0.11958669126033783, + "loss_ib": 0.0016137921484187245, + "step": 1241 + }, + { + "ce_ib": 3.577817440032959, + "ce_orig": 0.49526020884513855, + "epoch": 0.35717880509022937, + "kl_loss": 0.08421307802200317, + "loss_ib": 0.0011999125126749277, + "step": 1242 + }, + { + "ce_ib": 6.855809688568115, + "ce_orig": 0.7919394373893738, + "epoch": 0.35717880509022937, + "kl_loss": 0.12446132302284241, + "loss_ib": 0.0019301942083984613, + "step": 1242 + }, + { + "ce_ib": 7.013069152832031, + "ce_orig": 1.062800645828247, + "epoch": 0.35717880509022937, + "kl_loss": 0.14006005227565765, + "loss_ib": 0.0021019072737544775, + "step": 1242 + }, + { + "ce_ib": 3.7290921211242676, + "ce_orig": 0.384665846824646, + "epoch": 0.35717880509022937, + "kl_loss": 0.10171931236982346, + "loss_ib": 0.001390102319419384, + "step": 1242 + }, + { + "ce_ib": 7.695999622344971, + "ce_orig": 1.3372113704681396, + "epoch": 0.357466388669207, + "kl_loss": 0.13230851292610168, + "loss_ib": 0.0020926850847899914, + "step": 1243 + }, + { + "ce_ib": 5.93403959274292, + "ce_orig": 0.6903248429298401, + "epoch": 0.357466388669207, + "kl_loss": 0.09127384424209595, + "loss_ib": 0.0015061423182487488, + "step": 1243 + }, + { + "ce_ib": 3.7722675800323486, + "ce_orig": 0.5317506790161133, + "epoch": 0.357466388669207, + "kl_loss": 0.0804959237575531, + "loss_ib": 0.0011821859516203403, + "step": 1243 + }, + { + "ce_ib": 6.712879657745361, + "ce_orig": 0.9313479661941528, + "epoch": 0.357466388669207, + "kl_loss": 0.13765740394592285, + "loss_ib": 0.0020478619262576103, + "step": 1243 + }, + { + "ce_ib": 5.3466033935546875, + "ce_orig": 0.5497527718544006, + "epoch": 0.3577539722481846, + "kl_loss": 0.17618891596794128, + "loss_ib": 0.0022965495008975267, + "step": 1244 + }, + { + "ce_ib": 3.2458081245422363, + "ce_orig": 0.4139023721218109, + "epoch": 0.3577539722481846, + "kl_loss": 0.1082507073879242, + "loss_ib": 0.001407087896950543, + "step": 1244 + }, + { + "ce_ib": 6.442051410675049, + "ce_orig": 0.7312730550765991, + "epoch": 0.3577539722481846, + "kl_loss": 0.22959205508232117, + "loss_ib": 0.0029401257634162903, + "step": 1244 + }, + { + "ce_ib": 7.719517230987549, + "ce_orig": 0.7423241138458252, + "epoch": 0.3577539722481846, + "kl_loss": 0.05368277058005333, + "loss_ib": 0.0013087793486192822, + "step": 1244 + }, + { + "epoch": 0.3580415558271623, + "grad_norm": 0.09787463396787643, + "learning_rate": 4.897636302667142e-05, + "loss": 0.7935, + "step": 1245 + }, + { + "ce_ib": 4.224658012390137, + "ce_orig": 0.7047666311264038, + "epoch": 0.3580415558271623, + "kl_loss": 0.056395336985588074, + "loss_ib": 0.000986419152468443, + "step": 1245 + }, + { + "ce_ib": 10.30202579498291, + "ce_orig": 1.5900565385818481, + "epoch": 0.3580415558271623, + "kl_loss": 0.09452299028635025, + "loss_ib": 0.0019754325039684772, + "step": 1245 + }, + { + "ce_ib": 6.778504848480225, + "ce_orig": 0.9461814761161804, + "epoch": 0.3580415558271623, + "kl_loss": 0.1325336992740631, + "loss_ib": 0.002003187546506524, + "step": 1245 + }, + { + "ce_ib": 3.336076021194458, + "ce_orig": 0.41546472907066345, + "epoch": 0.3580415558271623, + "kl_loss": 0.08192800730466843, + "loss_ib": 0.0011528875911608338, + "step": 1245 + }, + { + "ce_ib": 7.632260322570801, + "ce_orig": 0.6931596398353577, + "epoch": 0.3583291394061399, + "kl_loss": 0.11511077731847763, + "loss_ib": 0.001914333668537438, + "step": 1246 + }, + { + "ce_ib": 7.651729106903076, + "ce_orig": 0.8976790308952332, + "epoch": 0.3583291394061399, + "kl_loss": 0.10843627899885178, + "loss_ib": 0.001849535619840026, + "step": 1246 + }, + { + "ce_ib": 5.129650592803955, + "ce_orig": 0.5657326579093933, + "epoch": 0.3583291394061399, + "kl_loss": 0.18685322999954224, + "loss_ib": 0.0023814972955733538, + "step": 1246 + }, + { + "ce_ib": 5.636063098907471, + "ce_orig": 0.6586743593215942, + "epoch": 0.3583291394061399, + "kl_loss": 0.06884510815143585, + "loss_ib": 0.0012520573800429702, + "step": 1246 + }, + { + "ce_ib": 3.2437944412231445, + "ce_orig": 0.5597171187400818, + "epoch": 0.35861672298511754, + "kl_loss": 0.09059131145477295, + "loss_ib": 0.0012302924878895283, + "step": 1247 + }, + { + "ce_ib": 5.134467601776123, + "ce_orig": 0.5335696339607239, + "epoch": 0.35861672298511754, + "kl_loss": 0.14224691689014435, + "loss_ib": 0.0019359159050509334, + "step": 1247 + }, + { + "ce_ib": 7.443212509155273, + "ce_orig": 0.22147001326084137, + "epoch": 0.35861672298511754, + "kl_loss": 0.07838533818721771, + "loss_ib": 0.0015281744999811053, + "step": 1247 + }, + { + "ce_ib": 3.532418966293335, + "ce_orig": 0.4406964182853699, + "epoch": 0.35861672298511754, + "kl_loss": 0.05399385839700699, + "loss_ib": 0.0008931804914027452, + "step": 1247 + }, + { + "ce_ib": 5.0010881423950195, + "ce_orig": 0.7583406567573547, + "epoch": 0.35890430656409517, + "kl_loss": 0.11531706154346466, + "loss_ib": 0.0016532792942598462, + "step": 1248 + }, + { + "ce_ib": 3.3550872802734375, + "ce_orig": 0.5256078243255615, + "epoch": 0.35890430656409517, + "kl_loss": 0.04886241257190704, + "loss_ib": 0.0008241328177973628, + "step": 1248 + }, + { + "ce_ib": 1.7382667064666748, + "ce_orig": 0.11116386950016022, + "epoch": 0.35890430656409517, + "kl_loss": 0.23131829500198364, + "loss_ib": 0.002487009624019265, + "step": 1248 + }, + { + "ce_ib": 6.645051002502441, + "ce_orig": 1.1898750066757202, + "epoch": 0.35890430656409517, + "kl_loss": 0.084653839468956, + "loss_ib": 0.0015110434032976627, + "step": 1248 + }, + { + "ce_ib": 5.560726642608643, + "ce_orig": 0.7055838704109192, + "epoch": 0.35919189014307285, + "kl_loss": 0.12825129926204681, + "loss_ib": 0.0018385857110843062, + "step": 1249 + }, + { + "ce_ib": 4.6536760330200195, + "ce_orig": 0.4988435208797455, + "epoch": 0.35919189014307285, + "kl_loss": 0.10604903101921082, + "loss_ib": 0.0015258578350767493, + "step": 1249 + }, + { + "ce_ib": 6.926509857177734, + "ce_orig": 0.18700924515724182, + "epoch": 0.35919189014307285, + "kl_loss": 0.22794826328754425, + "loss_ib": 0.0029721336904913187, + "step": 1249 + }, + { + "ce_ib": 9.14644718170166, + "ce_orig": 1.0003561973571777, + "epoch": 0.35919189014307285, + "kl_loss": 0.150540292263031, + "loss_ib": 0.0024200475309044123, + "step": 1249 + }, + { + "epoch": 0.35947947372205047, + "grad_norm": 0.0926978662610054, + "learning_rate": 4.8965343948356846e-05, + "loss": 0.8247, + "step": 1250 + }, + { + "ce_ib": 4.405333042144775, + "ce_orig": 0.620575487613678, + "epoch": 0.35947947372205047, + "kl_loss": 0.07144202291965485, + "loss_ib": 0.0011549534974619746, + "step": 1250 + }, + { + "ce_ib": 10.016497611999512, + "ce_orig": 1.3082729578018188, + "epoch": 0.35947947372205047, + "kl_loss": 0.10396544635295868, + "loss_ib": 0.0020413040183484554, + "step": 1250 + }, + { + "ce_ib": 5.4515485763549805, + "ce_orig": 0.6093593835830688, + "epoch": 0.35947947372205047, + "kl_loss": 0.10453015565872192, + "loss_ib": 0.00159045634791255, + "step": 1250 + }, + { + "ce_ib": 3.653608560562134, + "ce_orig": 0.5887627005577087, + "epoch": 0.35947947372205047, + "kl_loss": 0.13053785264492035, + "loss_ib": 0.00167073926422745, + "step": 1250 + }, + { + "ce_ib": 7.036374092102051, + "ce_orig": 0.972096860408783, + "epoch": 0.3597670573010281, + "kl_loss": 0.12440192699432373, + "loss_ib": 0.0019476565066725016, + "step": 1251 + }, + { + "ce_ib": 5.862993240356445, + "ce_orig": 0.731389582157135, + "epoch": 0.3597670573010281, + "kl_loss": 0.09715241193771362, + "loss_ib": 0.0015578233869746327, + "step": 1251 + }, + { + "ce_ib": 6.658885478973389, + "ce_orig": 0.7261188626289368, + "epoch": 0.3597670573010281, + "kl_loss": 0.17306064069271088, + "loss_ib": 0.0023964950814843178, + "step": 1251 + }, + { + "ce_ib": 4.695159435272217, + "ce_orig": 0.7654480934143066, + "epoch": 0.3597670573010281, + "kl_loss": 0.08340594172477722, + "loss_ib": 0.0013035753509029746, + "step": 1251 + }, + { + "ce_ib": 4.917471408843994, + "ce_orig": 0.7266609072685242, + "epoch": 0.36005464088000577, + "kl_loss": 0.1739560216665268, + "loss_ib": 0.002231307327747345, + "step": 1252 + }, + { + "ce_ib": 6.264862060546875, + "ce_orig": 0.6531610488891602, + "epoch": 0.36005464088000577, + "kl_loss": 0.13514098525047302, + "loss_ib": 0.001977896085008979, + "step": 1252 + }, + { + "ce_ib": 9.444012641906738, + "ce_orig": 1.688317894935608, + "epoch": 0.36005464088000577, + "kl_loss": 0.11596217751502991, + "loss_ib": 0.00210402300581336, + "step": 1252 + }, + { + "ce_ib": 7.3551716804504395, + "ce_orig": 1.08235502243042, + "epoch": 0.36005464088000577, + "kl_loss": 0.09125493466854095, + "loss_ib": 0.0016480664489790797, + "step": 1252 + }, + { + "ce_ib": 4.903650760650635, + "ce_orig": 0.46829381585121155, + "epoch": 0.3603422244589834, + "kl_loss": 0.18105095624923706, + "loss_ib": 0.002300874562934041, + "step": 1253 + }, + { + "ce_ib": 6.1150221824646, + "ce_orig": 0.7645502686500549, + "epoch": 0.3603422244589834, + "kl_loss": 0.11290599405765533, + "loss_ib": 0.001740562147460878, + "step": 1253 + }, + { + "ce_ib": 3.758120059967041, + "ce_orig": 0.5434147119522095, + "epoch": 0.3603422244589834, + "kl_loss": 0.04552270844578743, + "loss_ib": 0.0008310390985570848, + "step": 1253 + }, + { + "ce_ib": 8.314358711242676, + "ce_orig": 1.216098427772522, + "epoch": 0.3603422244589834, + "kl_loss": 0.09088920801877975, + "loss_ib": 0.001740327919833362, + "step": 1253 + }, + { + "ce_ib": 8.22514820098877, + "ce_orig": 1.1783727407455444, + "epoch": 0.360629808037961, + "kl_loss": 0.1134648323059082, + "loss_ib": 0.0019571632146835327, + "step": 1254 + }, + { + "ce_ib": 4.879001140594482, + "ce_orig": 0.7157498002052307, + "epoch": 0.360629808037961, + "kl_loss": 0.09442845731973648, + "loss_ib": 0.0014321847120299935, + "step": 1254 + }, + { + "ce_ib": 4.070014476776123, + "ce_orig": 0.5319107174873352, + "epoch": 0.360629808037961, + "kl_loss": 0.16356313228607178, + "loss_ib": 0.0020426325500011444, + "step": 1254 + }, + { + "ce_ib": 5.681893825531006, + "ce_orig": 0.7256168723106384, + "epoch": 0.360629808037961, + "kl_loss": 0.10619882494211197, + "loss_ib": 0.0016301776049658656, + "step": 1254 + }, + { + "epoch": 0.3609173916169387, + "grad_norm": 0.10951834917068481, + "learning_rate": 4.8954267131966225e-05, + "loss": 0.9201, + "step": 1255 + }, + { + "ce_ib": 4.51348352432251, + "ce_orig": 0.5521302223205566, + "epoch": 0.3609173916169387, + "kl_loss": 0.07769614458084106, + "loss_ib": 0.0012283098185434937, + "step": 1255 + }, + { + "ce_ib": 7.691596984863281, + "ce_orig": 0.7468867897987366, + "epoch": 0.3609173916169387, + "kl_loss": 0.12418323755264282, + "loss_ib": 0.002010992029681802, + "step": 1255 + }, + { + "ce_ib": 6.768327236175537, + "ce_orig": 0.841541051864624, + "epoch": 0.3609173916169387, + "kl_loss": 0.10890116542577744, + "loss_ib": 0.001765844295732677, + "step": 1255 + }, + { + "ce_ib": 4.823737144470215, + "ce_orig": 0.7781549692153931, + "epoch": 0.3609173916169387, + "kl_loss": 0.1455298513174057, + "loss_ib": 0.0019376721465960145, + "step": 1255 + }, + { + "ce_ib": 5.633728504180908, + "ce_orig": 0.8247250914573669, + "epoch": 0.3612049751959163, + "kl_loss": 0.11697879433631897, + "loss_ib": 0.001733160694129765, + "step": 1256 + }, + { + "ce_ib": 6.972712993621826, + "ce_orig": 0.6569220423698425, + "epoch": 0.3612049751959163, + "kl_loss": 0.09403789043426514, + "loss_ib": 0.001637650071643293, + "step": 1256 + }, + { + "ce_ib": 5.785305500030518, + "ce_orig": 0.9212448596954346, + "epoch": 0.3612049751959163, + "kl_loss": 0.11507164686918259, + "loss_ib": 0.0017292469274252653, + "step": 1256 + }, + { + "ce_ib": 4.723819255828857, + "ce_orig": 0.3406326472759247, + "epoch": 0.3612049751959163, + "kl_loss": 0.13277126848697662, + "loss_ib": 0.001800094498321414, + "step": 1256 + }, + { + "ce_ib": 6.737639427185059, + "ce_orig": 0.9714068174362183, + "epoch": 0.36149255877489395, + "kl_loss": 0.11174871772527695, + "loss_ib": 0.0017912510083988309, + "step": 1257 + }, + { + "ce_ib": 3.7057688236236572, + "ce_orig": 0.6621173024177551, + "epoch": 0.36149255877489395, + "kl_loss": 0.07731989026069641, + "loss_ib": 0.0011437757639214396, + "step": 1257 + }, + { + "ce_ib": 7.429227828979492, + "ce_orig": 1.174642562866211, + "epoch": 0.36149255877489395, + "kl_loss": 0.21665500104427338, + "loss_ib": 0.0029094729106873274, + "step": 1257 + }, + { + "ce_ib": 4.477489471435547, + "ce_orig": 0.6718473434448242, + "epoch": 0.36149255877489395, + "kl_loss": 0.16298699378967285, + "loss_ib": 0.002077618846669793, + "step": 1257 + }, + { + "ce_ib": 8.133766174316406, + "ce_orig": 0.9104800820350647, + "epoch": 0.36178014235387157, + "kl_loss": 0.16997173428535461, + "loss_ib": 0.002513093873858452, + "step": 1258 + }, + { + "ce_ib": 8.973519325256348, + "ce_orig": 1.3371100425720215, + "epoch": 0.36178014235387157, + "kl_loss": 0.07697136700153351, + "loss_ib": 0.0016670655459165573, + "step": 1258 + }, + { + "ce_ib": 7.888665676116943, + "ce_orig": 0.5700468420982361, + "epoch": 0.36178014235387157, + "kl_loss": 0.1357317864894867, + "loss_ib": 0.0021461844444274902, + "step": 1258 + }, + { + "ce_ib": 6.661389350891113, + "ce_orig": 0.5237452387809753, + "epoch": 0.36178014235387157, + "kl_loss": 0.09358532726764679, + "loss_ib": 0.0016019921749830246, + "step": 1258 + }, + { + "ce_ib": 5.645505905151367, + "ce_orig": 0.7846198678016663, + "epoch": 0.36206772593284925, + "kl_loss": 0.17019706964492798, + "loss_ib": 0.002266521332785487, + "step": 1259 + }, + { + "ce_ib": 5.187131881713867, + "ce_orig": 0.6325237154960632, + "epoch": 0.36206772593284925, + "kl_loss": 0.13489633798599243, + "loss_ib": 0.0018676765030249953, + "step": 1259 + }, + { + "ce_ib": 5.370206832885742, + "ce_orig": 0.6260020732879639, + "epoch": 0.36206772593284925, + "kl_loss": 0.11543045192956924, + "loss_ib": 0.0016913251020014286, + "step": 1259 + }, + { + "ce_ib": 8.244893074035645, + "ce_orig": 0.9257451295852661, + "epoch": 0.36206772593284925, + "kl_loss": 0.21367326378822327, + "loss_ib": 0.0029612218495458364, + "step": 1259 + }, + { + "epoch": 0.3623553095118269, + "grad_norm": 0.0853675901889801, + "learning_rate": 4.894313260418617e-05, + "loss": 0.8574, + "step": 1260 + }, + { + "ce_ib": 5.996172904968262, + "ce_orig": 0.9969424605369568, + "epoch": 0.3623553095118269, + "kl_loss": 0.07828725129365921, + "loss_ib": 0.001382489805109799, + "step": 1260 + }, + { + "ce_ib": 7.6174139976501465, + "ce_orig": 1.1654242277145386, + "epoch": 0.3623553095118269, + "kl_loss": 0.11603623628616333, + "loss_ib": 0.0019221036927774549, + "step": 1260 + }, + { + "ce_ib": 6.490176200866699, + "ce_orig": 0.9987762570381165, + "epoch": 0.3623553095118269, + "kl_loss": 0.07767187803983688, + "loss_ib": 0.0014257363509386778, + "step": 1260 + }, + { + "ce_ib": 7.6730122566223145, + "ce_orig": 1.5139427185058594, + "epoch": 0.3623553095118269, + "kl_loss": 0.0912584513425827, + "loss_ib": 0.0016798856668174267, + "step": 1260 + }, + { + "ce_ib": 7.314441680908203, + "ce_orig": 1.2183951139450073, + "epoch": 0.3626428930908045, + "kl_loss": 0.10375961661338806, + "loss_ib": 0.0017690402455627918, + "step": 1261 + }, + { + "ce_ib": 9.13668155670166, + "ce_orig": 1.484241008758545, + "epoch": 0.3626428930908045, + "kl_loss": 0.1495942771434784, + "loss_ib": 0.0024096108973026276, + "step": 1261 + }, + { + "ce_ib": 4.436420440673828, + "ce_orig": 0.7606337070465088, + "epoch": 0.3626428930908045, + "kl_loss": 0.09683829545974731, + "loss_ib": 0.0014120249543339014, + "step": 1261 + }, + { + "ce_ib": 5.428145885467529, + "ce_orig": 0.5036700963973999, + "epoch": 0.3626428930908045, + "kl_loss": 0.150125652551651, + "loss_ib": 0.002044070977717638, + "step": 1261 + }, + { + "ce_ib": 2.7986819744110107, + "ce_orig": 0.2716585695743561, + "epoch": 0.3629304766697822, + "kl_loss": 0.11242157220840454, + "loss_ib": 0.0014040839159861207, + "step": 1262 + }, + { + "ce_ib": 7.776880264282227, + "ce_orig": 1.1999222040176392, + "epoch": 0.3629304766697822, + "kl_loss": 0.08300190418958664, + "loss_ib": 0.0016077071195468307, + "step": 1262 + }, + { + "ce_ib": 7.587123870849609, + "ce_orig": 1.1394379138946533, + "epoch": 0.3629304766697822, + "kl_loss": 0.12824639678001404, + "loss_ib": 0.0020411761943250895, + "step": 1262 + }, + { + "ce_ib": 6.979325771331787, + "ce_orig": 0.7309539914131165, + "epoch": 0.3629304766697822, + "kl_loss": 0.08371274173259735, + "loss_ib": 0.0015350598841905594, + "step": 1262 + }, + { + "ce_ib": 4.161574840545654, + "ce_orig": 0.5990742444992065, + "epoch": 0.3632180602487598, + "kl_loss": 0.11062033474445343, + "loss_ib": 0.0015223607188090682, + "step": 1263 + }, + { + "ce_ib": 3.989654779434204, + "ce_orig": 0.4041389524936676, + "epoch": 0.3632180602487598, + "kl_loss": 0.08145566284656525, + "loss_ib": 0.0012135220458731055, + "step": 1263 + }, + { + "ce_ib": 6.267369270324707, + "ce_orig": 0.9718990921974182, + "epoch": 0.3632180602487598, + "kl_loss": 0.14130017161369324, + "loss_ib": 0.002039738465100527, + "step": 1263 + }, + { + "ce_ib": 6.479578971862793, + "ce_orig": 0.5610483288764954, + "epoch": 0.3632180602487598, + "kl_loss": 0.15332993865013123, + "loss_ib": 0.002181257354095578, + "step": 1263 + }, + { + "ce_ib": 5.573829650878906, + "ce_orig": 0.5869829058647156, + "epoch": 0.3635056438277374, + "kl_loss": 0.10969609767198563, + "loss_ib": 0.0016543439123779535, + "step": 1264 + }, + { + "ce_ib": 6.148459434509277, + "ce_orig": 0.9820066690444946, + "epoch": 0.3635056438277374, + "kl_loss": 0.1340855062007904, + "loss_ib": 0.0019557008054107428, + "step": 1264 + }, + { + "ce_ib": 7.5752763748168945, + "ce_orig": 1.008094072341919, + "epoch": 0.3635056438277374, + "kl_loss": 0.09246852993965149, + "loss_ib": 0.001682212925516069, + "step": 1264 + }, + { + "ce_ib": 6.046870231628418, + "ce_orig": 0.8507830500602722, + "epoch": 0.3635056438277374, + "kl_loss": 0.10141552239656448, + "loss_ib": 0.0016188421286642551, + "step": 1264 + }, + { + "epoch": 0.3637932274067151, + "grad_norm": 0.1048649325966835, + "learning_rate": 4.893194039184236e-05, + "loss": 0.8718, + "step": 1265 + }, + { + "ce_ib": 5.948637962341309, + "ce_orig": 0.7164708971977234, + "epoch": 0.3637932274067151, + "kl_loss": 0.18087129294872284, + "loss_ib": 0.0024035766255110502, + "step": 1265 + }, + { + "ce_ib": 4.5438103675842285, + "ce_orig": 0.716766357421875, + "epoch": 0.3637932274067151, + "kl_loss": 0.0770442932844162, + "loss_ib": 0.0012248239945620298, + "step": 1265 + }, + { + "ce_ib": 6.885209560394287, + "ce_orig": 1.285949468612671, + "epoch": 0.3637932274067151, + "kl_loss": 0.11101078242063522, + "loss_ib": 0.0017986288294196129, + "step": 1265 + }, + { + "ce_ib": 8.132999420166016, + "ce_orig": 0.8165818452835083, + "epoch": 0.3637932274067151, + "kl_loss": 0.10078869760036469, + "loss_ib": 0.0018211867427453399, + "step": 1265 + }, + { + "ce_ib": 4.9748334884643555, + "ce_orig": 0.5936683416366577, + "epoch": 0.3640808109856927, + "kl_loss": 0.09034372121095657, + "loss_ib": 0.0014009205624461174, + "step": 1266 + }, + { + "ce_ib": 4.65391206741333, + "ce_orig": 0.8400774002075195, + "epoch": 0.3640808109856927, + "kl_loss": 0.09988667815923691, + "loss_ib": 0.0014642579481005669, + "step": 1266 + }, + { + "ce_ib": 5.725048065185547, + "ce_orig": 0.9536218047142029, + "epoch": 0.3640808109856927, + "kl_loss": 0.11655110120773315, + "loss_ib": 0.0017380157951265574, + "step": 1266 + }, + { + "ce_ib": 7.574244976043701, + "ce_orig": 1.4102758169174194, + "epoch": 0.3640808109856927, + "kl_loss": 0.12130621075630188, + "loss_ib": 0.001970486482605338, + "step": 1266 + }, + { + "ce_ib": 2.4335663318634033, + "ce_orig": 0.11583693325519562, + "epoch": 0.36436839456467035, + "kl_loss": 0.32097506523132324, + "loss_ib": 0.0034531070850789547, + "step": 1267 + }, + { + "ce_ib": 6.081480026245117, + "ce_orig": 0.8565067052841187, + "epoch": 0.36436839456467035, + "kl_loss": 0.16416233777999878, + "loss_ib": 0.002249771263450384, + "step": 1267 + }, + { + "ce_ib": 6.017972946166992, + "ce_orig": 0.7665581107139587, + "epoch": 0.36436839456467035, + "kl_loss": 0.11499577015638351, + "loss_ib": 0.0017517550149932504, + "step": 1267 + }, + { + "ce_ib": 5.2015838623046875, + "ce_orig": 0.6542770862579346, + "epoch": 0.36436839456467035, + "kl_loss": 0.11964156478643417, + "loss_ib": 0.0017165739554911852, + "step": 1267 + }, + { + "ce_ib": 5.111125946044922, + "ce_orig": 0.7856875061988831, + "epoch": 0.364655978143648, + "kl_loss": 0.11653504520654678, + "loss_ib": 0.0016764630563557148, + "step": 1268 + }, + { + "ce_ib": 7.048427104949951, + "ce_orig": 0.9540700912475586, + "epoch": 0.364655978143648, + "kl_loss": 0.0664190873503685, + "loss_ib": 0.0013690335908904672, + "step": 1268 + }, + { + "ce_ib": 7.886468410491943, + "ce_orig": 1.0301730632781982, + "epoch": 0.364655978143648, + "kl_loss": 0.08469361811876297, + "loss_ib": 0.0016355830011889338, + "step": 1268 + }, + { + "ce_ib": 6.763402462005615, + "ce_orig": 1.0440094470977783, + "epoch": 0.364655978143648, + "kl_loss": 0.1510259360074997, + "loss_ib": 0.0021865996532142162, + "step": 1268 + }, + { + "ce_ib": 5.486627578735352, + "ce_orig": 0.4695490002632141, + "epoch": 0.36494356172262565, + "kl_loss": 0.15088656544685364, + "loss_ib": 0.002057528356090188, + "step": 1269 + }, + { + "ce_ib": 5.369198799133301, + "ce_orig": 0.9851084351539612, + "epoch": 0.36494356172262565, + "kl_loss": 0.11704091727733612, + "loss_ib": 0.001707328949123621, + "step": 1269 + }, + { + "ce_ib": 5.675687313079834, + "ce_orig": 0.6099631190299988, + "epoch": 0.36494356172262565, + "kl_loss": 0.0743517056107521, + "loss_ib": 0.0013110857689753175, + "step": 1269 + }, + { + "ce_ib": 5.100560665130615, + "ce_orig": 0.9225177764892578, + "epoch": 0.36494356172262565, + "kl_loss": 0.08952625840902328, + "loss_ib": 0.0014053186168894172, + "step": 1269 + }, + { + "epoch": 0.3652311453016033, + "grad_norm": 0.08690284192562103, + "learning_rate": 4.8920690521899425e-05, + "loss": 0.8222, + "step": 1270 + }, + { + "ce_ib": 4.783676624298096, + "ce_orig": 0.7209357023239136, + "epoch": 0.3652311453016033, + "kl_loss": 0.10517607629299164, + "loss_ib": 0.0015301284147426486, + "step": 1270 + }, + { + "ce_ib": 4.74617338180542, + "ce_orig": 0.6954101324081421, + "epoch": 0.3652311453016033, + "kl_loss": 0.11000341176986694, + "loss_ib": 0.0015746514545753598, + "step": 1270 + }, + { + "ce_ib": 7.062074661254883, + "ce_orig": 1.0939521789550781, + "epoch": 0.3652311453016033, + "kl_loss": 0.10517151653766632, + "loss_ib": 0.0017579225823283195, + "step": 1270 + }, + { + "ce_ib": 6.2351484298706055, + "ce_orig": 1.0175594091415405, + "epoch": 0.3652311453016033, + "kl_loss": 0.11980479955673218, + "loss_ib": 0.0018215627642348409, + "step": 1270 + }, + { + "ce_ib": 5.415085315704346, + "ce_orig": 0.7584806084632874, + "epoch": 0.3655187288805809, + "kl_loss": 0.08196678757667542, + "loss_ib": 0.0013611763715744019, + "step": 1271 + }, + { + "ce_ib": 5.287896633148193, + "ce_orig": 0.7736698985099792, + "epoch": 0.3655187288805809, + "kl_loss": 0.11232677102088928, + "loss_ib": 0.0016520572826266289, + "step": 1271 + }, + { + "ce_ib": 4.119640350341797, + "ce_orig": 0.7556843757629395, + "epoch": 0.3655187288805809, + "kl_loss": 0.09644928574562073, + "loss_ib": 0.0013764569303020835, + "step": 1271 + }, + { + "ce_ib": 7.895566940307617, + "ce_orig": 1.4006571769714355, + "epoch": 0.3655187288805809, + "kl_loss": 0.10593666881322861, + "loss_ib": 0.0018489232752472162, + "step": 1271 + }, + { + "ce_ib": 7.084376335144043, + "ce_orig": 1.3110722303390503, + "epoch": 0.3658063124595586, + "kl_loss": 0.10005828738212585, + "loss_ib": 0.0017090203473344445, + "step": 1272 + }, + { + "ce_ib": 2.2814648151397705, + "ce_orig": 0.22170490026474, + "epoch": 0.3658063124595586, + "kl_loss": 0.2755497395992279, + "loss_ib": 0.0029836439061909914, + "step": 1272 + }, + { + "ce_ib": 10.033036231994629, + "ce_orig": 1.714808464050293, + "epoch": 0.3658063124595586, + "kl_loss": 0.2861132025718689, + "loss_ib": 0.0038644354790449142, + "step": 1272 + }, + { + "ce_ib": 5.259953498840332, + "ce_orig": 0.7123859524726868, + "epoch": 0.3658063124595586, + "kl_loss": 0.11281996965408325, + "loss_ib": 0.0016541950171813369, + "step": 1272 + }, + { + "ce_ib": 4.626567840576172, + "ce_orig": 0.5473626852035522, + "epoch": 0.3660938960385362, + "kl_loss": 0.15294522047042847, + "loss_ib": 0.0019921089988201857, + "step": 1273 + }, + { + "ce_ib": 8.993037223815918, + "ce_orig": 1.4961568117141724, + "epoch": 0.3660938960385362, + "kl_loss": 0.20619803667068481, + "loss_ib": 0.0029612837824970484, + "step": 1273 + }, + { + "ce_ib": 9.032368659973145, + "ce_orig": 1.423782229423523, + "epoch": 0.3660938960385362, + "kl_loss": 0.20301368832588196, + "loss_ib": 0.002933373674750328, + "step": 1273 + }, + { + "ce_ib": 6.045455455780029, + "ce_orig": 0.7214930057525635, + "epoch": 0.3660938960385362, + "kl_loss": 0.0861063301563263, + "loss_ib": 0.0014656087150797248, + "step": 1273 + }, + { + "ce_ib": 5.263514518737793, + "ce_orig": 0.6097703576087952, + "epoch": 0.3663814796175138, + "kl_loss": 0.10200537741184235, + "loss_ib": 0.0015464052557945251, + "step": 1274 + }, + { + "ce_ib": 7.112468242645264, + "ce_orig": 0.9531702995300293, + "epoch": 0.3663814796175138, + "kl_loss": 0.14293800294399261, + "loss_ib": 0.0021406267769634724, + "step": 1274 + }, + { + "ce_ib": 5.52689790725708, + "ce_orig": 0.868090808391571, + "epoch": 0.3663814796175138, + "kl_loss": 0.1180892065167427, + "loss_ib": 0.0017335818847641349, + "step": 1274 + }, + { + "ce_ib": 4.249706268310547, + "ce_orig": 0.4917110800743103, + "epoch": 0.3663814796175138, + "kl_loss": 0.0959208756685257, + "loss_ib": 0.0013841792242601514, + "step": 1274 + }, + { + "epoch": 0.3666690631964915, + "grad_norm": 0.09557251632213593, + "learning_rate": 4.890938302146091e-05, + "loss": 0.8762, + "step": 1275 + }, + { + "ce_ib": 6.830130577087402, + "ce_orig": 0.6520479321479797, + "epoch": 0.3666690631964915, + "kl_loss": 0.07018201053142548, + "loss_ib": 0.0013848331291228533, + "step": 1275 + }, + { + "ce_ib": 6.835606098175049, + "ce_orig": 0.4915773570537567, + "epoch": 0.3666690631964915, + "kl_loss": 0.11724641919136047, + "loss_ib": 0.001856024842709303, + "step": 1275 + }, + { + "ce_ib": 6.633254528045654, + "ce_orig": 0.8117356300354004, + "epoch": 0.3666690631964915, + "kl_loss": 0.19811943173408508, + "loss_ib": 0.0026445197872817516, + "step": 1275 + }, + { + "ce_ib": 4.69884729385376, + "ce_orig": 0.622662365436554, + "epoch": 0.3666690631964915, + "kl_loss": 0.1123548075556755, + "loss_ib": 0.0015934327384456992, + "step": 1275 + }, + { + "ce_ib": 5.555670261383057, + "ce_orig": 0.9462684988975525, + "epoch": 0.36695664677546913, + "kl_loss": 0.18677571415901184, + "loss_ib": 0.002423324156552553, + "step": 1276 + }, + { + "ce_ib": 6.706098556518555, + "ce_orig": 1.0116491317749023, + "epoch": 0.36695664677546913, + "kl_loss": 0.16157668828964233, + "loss_ib": 0.002286376664415002, + "step": 1276 + }, + { + "ce_ib": 5.311933517456055, + "ce_orig": 0.6050148010253906, + "epoch": 0.36695664677546913, + "kl_loss": 0.11970527470111847, + "loss_ib": 0.0017282459884881973, + "step": 1276 + }, + { + "ce_ib": 9.499307632446289, + "ce_orig": 1.2046724557876587, + "epoch": 0.36695664677546913, + "kl_loss": 0.14970532059669495, + "loss_ib": 0.002446983940899372, + "step": 1276 + }, + { + "ce_ib": 7.650247097015381, + "ce_orig": 1.1416409015655518, + "epoch": 0.36724423035444675, + "kl_loss": 0.1492568850517273, + "loss_ib": 0.0022575934417545795, + "step": 1277 + }, + { + "ce_ib": 8.05804443359375, + "ce_orig": 1.2605984210968018, + "epoch": 0.36724423035444675, + "kl_loss": 0.06794284284114838, + "loss_ib": 0.0014852328458800912, + "step": 1277 + }, + { + "ce_ib": 5.465357303619385, + "ce_orig": 0.6775657534599304, + "epoch": 0.36724423035444675, + "kl_loss": 0.11054760962724686, + "loss_ib": 0.0016520118806511164, + "step": 1277 + }, + { + "ce_ib": 8.604063034057617, + "ce_orig": 0.8220978379249573, + "epoch": 0.36724423035444675, + "kl_loss": 0.08851812779903412, + "loss_ib": 0.001745587564073503, + "step": 1277 + }, + { + "ce_ib": 4.675428867340088, + "ce_orig": 0.9593977332115173, + "epoch": 0.3675318139334244, + "kl_loss": 0.10727477818727493, + "loss_ib": 0.0015402906574308872, + "step": 1278 + }, + { + "ce_ib": 5.886958122253418, + "ce_orig": 0.9332735538482666, + "epoch": 0.3675318139334244, + "kl_loss": 0.11322903633117676, + "loss_ib": 0.0017209862126037478, + "step": 1278 + }, + { + "ce_ib": 4.394830703735352, + "ce_orig": 0.9940218329429626, + "epoch": 0.3675318139334244, + "kl_loss": 0.07776137441396713, + "loss_ib": 0.0012170968111604452, + "step": 1278 + }, + { + "ce_ib": 6.783079624176025, + "ce_orig": 0.7369851469993591, + "epoch": 0.3675318139334244, + "kl_loss": 0.09048350155353546, + "loss_ib": 0.0015831427881494164, + "step": 1278 + }, + { + "ce_ib": 3.8256616592407227, + "ce_orig": 0.6405181884765625, + "epoch": 0.36781939751240206, + "kl_loss": 0.09209297597408295, + "loss_ib": 0.0013034958392381668, + "step": 1279 + }, + { + "ce_ib": 6.023566722869873, + "ce_orig": 0.6345555186271667, + "epoch": 0.36781939751240206, + "kl_loss": 0.11298112571239471, + "loss_ib": 0.001732167904265225, + "step": 1279 + }, + { + "ce_ib": 2.3995261192321777, + "ce_orig": 0.2504364252090454, + "epoch": 0.36781939751240206, + "kl_loss": 0.26038891077041626, + "loss_ib": 0.0028438414447009563, + "step": 1279 + }, + { + "ce_ib": 5.019238471984863, + "ce_orig": 0.7604730725288391, + "epoch": 0.36781939751240206, + "kl_loss": 0.08353875577449799, + "loss_ib": 0.001337311347015202, + "step": 1279 + }, + { + "epoch": 0.3681069810913797, + "grad_norm": 0.08959182351827621, + "learning_rate": 4.889801791776921e-05, + "loss": 0.879, + "step": 1280 + }, + { + "ce_ib": 5.5621867179870605, + "ce_orig": 0.7939660549163818, + "epoch": 0.3681069810913797, + "kl_loss": 0.16857171058654785, + "loss_ib": 0.002241935580968857, + "step": 1280 + }, + { + "ce_ib": 7.465915203094482, + "ce_orig": 0.9574486613273621, + "epoch": 0.3681069810913797, + "kl_loss": 0.09049826860427856, + "loss_ib": 0.001651574159041047, + "step": 1280 + }, + { + "ce_ib": 5.6863579750061035, + "ce_orig": 0.8255658745765686, + "epoch": 0.3681069810913797, + "kl_loss": 0.09053034335374832, + "loss_ib": 0.0014739392790943384, + "step": 1280 + }, + { + "ce_ib": 3.539290428161621, + "ce_orig": 0.4773739278316498, + "epoch": 0.3681069810913797, + "kl_loss": 0.13628683984279633, + "loss_ib": 0.001716797356493771, + "step": 1280 + }, + { + "ce_ib": 8.499439239501953, + "ce_orig": 1.0162585973739624, + "epoch": 0.3683945646703573, + "kl_loss": 0.16252519190311432, + "loss_ib": 0.002475195797160268, + "step": 1281 + }, + { + "ce_ib": 5.038724899291992, + "ce_orig": 1.0485836267471313, + "epoch": 0.3683945646703573, + "kl_loss": 0.07839176803827286, + "loss_ib": 0.001287790248170495, + "step": 1281 + }, + { + "ce_ib": 4.911864280700684, + "ce_orig": 0.656204104423523, + "epoch": 0.3683945646703573, + "kl_loss": 0.08212752640247345, + "loss_ib": 0.0013124615652486682, + "step": 1281 + }, + { + "ce_ib": 3.5285279750823975, + "ce_orig": 0.5432829260826111, + "epoch": 0.3683945646703573, + "kl_loss": 0.07672290503978729, + "loss_ib": 0.0011200818698853254, + "step": 1281 + }, + { + "ce_ib": 8.837839126586914, + "ce_orig": 1.1560882329940796, + "epoch": 0.368682148249335, + "kl_loss": 0.15169808268547058, + "loss_ib": 0.0024007647298276424, + "step": 1282 + }, + { + "ce_ib": 6.747598171234131, + "ce_orig": 0.9385497570037842, + "epoch": 0.368682148249335, + "kl_loss": 0.15202166140079498, + "loss_ib": 0.002194976434111595, + "step": 1282 + }, + { + "ce_ib": 5.9851531982421875, + "ce_orig": 0.6511201858520508, + "epoch": 0.368682148249335, + "kl_loss": 0.16669416427612305, + "loss_ib": 0.0022654568310827017, + "step": 1282 + }, + { + "ce_ib": 7.009302139282227, + "ce_orig": 1.0727838277816772, + "epoch": 0.368682148249335, + "kl_loss": 0.14475645124912262, + "loss_ib": 0.002148494590073824, + "step": 1282 + }, + { + "ce_ib": 7.189665794372559, + "ce_orig": 0.7218466401100159, + "epoch": 0.3689697318283126, + "kl_loss": 0.1025504320859909, + "loss_ib": 0.001744470908306539, + "step": 1283 + }, + { + "ce_ib": 4.639631748199463, + "ce_orig": 0.551937997341156, + "epoch": 0.3689697318283126, + "kl_loss": 0.07025028765201569, + "loss_ib": 0.0011664660414680839, + "step": 1283 + }, + { + "ce_ib": 6.67887544631958, + "ce_orig": 1.0466539859771729, + "epoch": 0.3689697318283126, + "kl_loss": 0.07730047404766083, + "loss_ib": 0.0014408922288566828, + "step": 1283 + }, + { + "ce_ib": 6.689807891845703, + "ce_orig": 0.7414644956588745, + "epoch": 0.3689697318283126, + "kl_loss": 0.15120118856430054, + "loss_ib": 0.0021809926256537437, + "step": 1283 + }, + { + "ce_ib": 5.785625457763672, + "ce_orig": 0.6918303370475769, + "epoch": 0.36925731540729023, + "kl_loss": 0.10686799883842468, + "loss_ib": 0.0016472425777465105, + "step": 1284 + }, + { + "ce_ib": 6.988125801086426, + "ce_orig": 0.45072489976882935, + "epoch": 0.36925731540729023, + "kl_loss": 0.15907561779022217, + "loss_ib": 0.0022895687725394964, + "step": 1284 + }, + { + "ce_ib": 5.028069972991943, + "ce_orig": 0.7821161150932312, + "epoch": 0.36925731540729023, + "kl_loss": 0.11405050754547119, + "loss_ib": 0.0016433119308203459, + "step": 1284 + }, + { + "ce_ib": 7.529943466186523, + "ce_orig": 1.2662677764892578, + "epoch": 0.36925731540729023, + "kl_loss": 0.0978567972779274, + "loss_ib": 0.0017315623117610812, + "step": 1284 + }, + { + "epoch": 0.3695448989862679, + "grad_norm": 0.09508899599313736, + "learning_rate": 4.888659523820549e-05, + "loss": 0.8243, + "step": 1285 + }, + { + "ce_ib": 8.22745418548584, + "ce_orig": 1.2493830919265747, + "epoch": 0.3695448989862679, + "kl_loss": 0.13128282129764557, + "loss_ib": 0.0021355736535042524, + "step": 1285 + }, + { + "ce_ib": 7.42917537689209, + "ce_orig": 0.8488349318504333, + "epoch": 0.3695448989862679, + "kl_loss": 0.09238451719284058, + "loss_ib": 0.0016667626332491636, + "step": 1285 + }, + { + "ce_ib": 7.670847415924072, + "ce_orig": 0.818599283695221, + "epoch": 0.3695448989862679, + "kl_loss": 0.1114754006266594, + "loss_ib": 0.0018818386597558856, + "step": 1285 + }, + { + "ce_ib": 4.175411701202393, + "ce_orig": 0.6316226720809937, + "epoch": 0.3695448989862679, + "kl_loss": 0.07252389937639236, + "loss_ib": 0.0011427801800891757, + "step": 1285 + }, + { + "ce_ib": 6.936399459838867, + "ce_orig": 0.947404682636261, + "epoch": 0.36983248256524553, + "kl_loss": 0.15562579035758972, + "loss_ib": 0.002249897923320532, + "step": 1286 + }, + { + "ce_ib": 3.784386396408081, + "ce_orig": 0.6171741485595703, + "epoch": 0.36983248256524553, + "kl_loss": 0.06299932301044464, + "loss_ib": 0.0010084318928420544, + "step": 1286 + }, + { + "ce_ib": 9.482007026672363, + "ce_orig": 1.793513298034668, + "epoch": 0.36983248256524553, + "kl_loss": 0.1756105124950409, + "loss_ib": 0.002704305574297905, + "step": 1286 + }, + { + "ce_ib": 8.821845054626465, + "ce_orig": 1.5388833284378052, + "epoch": 0.36983248256524553, + "kl_loss": 0.12496484071016312, + "loss_ib": 0.0021318327635526657, + "step": 1286 + }, + { + "ce_ib": 3.470980405807495, + "ce_orig": 0.477110356092453, + "epoch": 0.37012006614422316, + "kl_loss": 0.11538689583539963, + "loss_ib": 0.0015009669587016106, + "step": 1287 + }, + { + "ce_ib": 7.616337776184082, + "ce_orig": 0.6729000210762024, + "epoch": 0.37012006614422316, + "kl_loss": 0.12497438490390778, + "loss_ib": 0.002011377364397049, + "step": 1287 + }, + { + "ce_ib": 6.450989723205566, + "ce_orig": 1.0107957124710083, + "epoch": 0.37012006614422316, + "kl_loss": 0.08995261788368225, + "loss_ib": 0.0015446251491084695, + "step": 1287 + }, + { + "ce_ib": 8.401474952697754, + "ce_orig": 1.377687692642212, + "epoch": 0.37012006614422316, + "kl_loss": 0.11534081399440765, + "loss_ib": 0.001993555575609207, + "step": 1287 + }, + { + "ce_ib": 4.6463518142700195, + "ce_orig": 0.6928462386131287, + "epoch": 0.3704076497232008, + "kl_loss": 0.07001467049121857, + "loss_ib": 0.0011647818610072136, + "step": 1288 + }, + { + "ce_ib": 5.731445789337158, + "ce_orig": 0.7224513292312622, + "epoch": 0.3704076497232008, + "kl_loss": 0.08494200557470322, + "loss_ib": 0.001422564615495503, + "step": 1288 + }, + { + "ce_ib": 6.065194606781006, + "ce_orig": 0.7397277355194092, + "epoch": 0.3704076497232008, + "kl_loss": 0.06865018606185913, + "loss_ib": 0.0012930212542414665, + "step": 1288 + }, + { + "ce_ib": 8.368396759033203, + "ce_orig": 1.4819865226745605, + "epoch": 0.3704076497232008, + "kl_loss": 0.09231487661600113, + "loss_ib": 0.0017599883722141385, + "step": 1288 + }, + { + "ce_ib": 5.247856616973877, + "ce_orig": 0.9851112961769104, + "epoch": 0.37069523330217846, + "kl_loss": 0.10900059342384338, + "loss_ib": 0.0016147915739566088, + "step": 1289 + }, + { + "ce_ib": 5.296111583709717, + "ce_orig": 0.8326603770256042, + "epoch": 0.37069523330217846, + "kl_loss": 0.09622007608413696, + "loss_ib": 0.0014918118249624968, + "step": 1289 + }, + { + "ce_ib": 7.173306465148926, + "ce_orig": 0.4392928183078766, + "epoch": 0.37069523330217846, + "kl_loss": 0.14689943194389343, + "loss_ib": 0.0021863249130547047, + "step": 1289 + }, + { + "ce_ib": 6.673140525817871, + "ce_orig": 0.8259045481681824, + "epoch": 0.37069523330217846, + "kl_loss": 0.15683668851852417, + "loss_ib": 0.0022356808185577393, + "step": 1289 + }, + { + "epoch": 0.3709828168811561, + "grad_norm": 0.10334688425064087, + "learning_rate": 4.887511501028965e-05, + "loss": 0.8809, + "step": 1290 + }, + { + "ce_ib": 6.785250186920166, + "ce_orig": 0.667524516582489, + "epoch": 0.3709828168811561, + "kl_loss": 0.10752973705530167, + "loss_ib": 0.0017538222018629313, + "step": 1290 + }, + { + "ce_ib": 5.8398003578186035, + "ce_orig": 0.6426234841346741, + "epoch": 0.3709828168811561, + "kl_loss": 0.10408969968557358, + "loss_ib": 0.0016248769825324416, + "step": 1290 + }, + { + "ce_ib": 6.559090614318848, + "ce_orig": 0.9128428101539612, + "epoch": 0.3709828168811561, + "kl_loss": 0.1208682581782341, + "loss_ib": 0.0018645914969965816, + "step": 1290 + }, + { + "ce_ib": 5.339390754699707, + "ce_orig": 0.8618748784065247, + "epoch": 0.3709828168811561, + "kl_loss": 0.1005004346370697, + "loss_ib": 0.0015389432664960623, + "step": 1290 + }, + { + "ce_ib": 6.498045921325684, + "ce_orig": 0.6593946814537048, + "epoch": 0.3712704004601337, + "kl_loss": 0.10259787738323212, + "loss_ib": 0.0016757833072915673, + "step": 1291 + }, + { + "ce_ib": 3.9394936561584473, + "ce_orig": 0.6524549126625061, + "epoch": 0.3712704004601337, + "kl_loss": 0.0753093808889389, + "loss_ib": 0.0011470431927591562, + "step": 1291 + }, + { + "ce_ib": 4.098540782928467, + "ce_orig": 0.4942637085914612, + "epoch": 0.3712704004601337, + "kl_loss": 0.07822375744581223, + "loss_ib": 0.0011920916149392724, + "step": 1291 + }, + { + "ce_ib": 5.597634792327881, + "ce_orig": 0.8362662196159363, + "epoch": 0.3712704004601337, + "kl_loss": 0.15009665489196777, + "loss_ib": 0.0020607300102710724, + "step": 1291 + }, + { + "ce_ib": 7.818037986755371, + "ce_orig": 0.9996674060821533, + "epoch": 0.3715579840391114, + "kl_loss": 0.10779958218336105, + "loss_ib": 0.0018597996095195413, + "step": 1292 + }, + { + "ce_ib": 3.643066167831421, + "ce_orig": 0.5499973297119141, + "epoch": 0.3715579840391114, + "kl_loss": 0.10857464373111725, + "loss_ib": 0.0014500529505312443, + "step": 1292 + }, + { + "ce_ib": 3.60650634765625, + "ce_orig": 0.46445924043655396, + "epoch": 0.3715579840391114, + "kl_loss": 0.10870900750160217, + "loss_ib": 0.0014477407094091177, + "step": 1292 + }, + { + "ce_ib": 3.7332096099853516, + "ce_orig": 0.6267498731613159, + "epoch": 0.3715579840391114, + "kl_loss": 0.11489161849021912, + "loss_ib": 0.001522237085737288, + "step": 1292 + }, + { + "ce_ib": 4.233822345733643, + "ce_orig": 0.5855693817138672, + "epoch": 0.371845567618089, + "kl_loss": 0.11481663584709167, + "loss_ib": 0.0015715485205873847, + "step": 1293 + }, + { + "ce_ib": 8.881464958190918, + "ce_orig": 1.2566783428192139, + "epoch": 0.371845567618089, + "kl_loss": 0.12536683678627014, + "loss_ib": 0.0021418146789073944, + "step": 1293 + }, + { + "ce_ib": 5.271763801574707, + "ce_orig": 0.42172160744667053, + "epoch": 0.371845567618089, + "kl_loss": 0.1555880904197693, + "loss_ib": 0.00208305730484426, + "step": 1293 + }, + { + "ce_ib": 5.407679080963135, + "ce_orig": 0.797361433506012, + "epoch": 0.371845567618089, + "kl_loss": 0.0833439826965332, + "loss_ib": 0.0013742076698690653, + "step": 1293 + }, + { + "ce_ib": 6.48065710067749, + "ce_orig": 0.9504364132881165, + "epoch": 0.37213315119706664, + "kl_loss": 0.07627981901168823, + "loss_ib": 0.0014108639443293214, + "step": 1294 + }, + { + "ce_ib": 5.477166652679443, + "ce_orig": 0.8126649856567383, + "epoch": 0.37213315119706664, + "kl_loss": 0.10854905098676682, + "loss_ib": 0.0016332071973010898, + "step": 1294 + }, + { + "ce_ib": 2.3084990978240967, + "ce_orig": 0.2607325613498688, + "epoch": 0.37213315119706664, + "kl_loss": 0.16034573316574097, + "loss_ib": 0.0018343072151765227, + "step": 1294 + }, + { + "ce_ib": 5.476607799530029, + "ce_orig": 0.621596097946167, + "epoch": 0.37213315119706664, + "kl_loss": 0.11543017625808716, + "loss_ib": 0.001701962435618043, + "step": 1294 + }, + { + "epoch": 0.3724207347760443, + "grad_norm": 0.091304711997509, + "learning_rate": 4.8863577261680226e-05, + "loss": 0.8258, + "step": 1295 + }, + { + "ce_ib": 4.283220291137695, + "ce_orig": 0.6745275259017944, + "epoch": 0.3724207347760443, + "kl_loss": 0.0820574015378952, + "loss_ib": 0.001248896005563438, + "step": 1295 + }, + { + "ce_ib": 3.2045159339904785, + "ce_orig": 0.529264509677887, + "epoch": 0.3724207347760443, + "kl_loss": 0.08010916411876678, + "loss_ib": 0.001121543231420219, + "step": 1295 + }, + { + "ce_ib": 6.051677703857422, + "ce_orig": 0.7465357780456543, + "epoch": 0.3724207347760443, + "kl_loss": 0.21004080772399902, + "loss_ib": 0.002705575665459037, + "step": 1295 + }, + { + "ce_ib": 6.0474324226379395, + "ce_orig": 1.0841017961502075, + "epoch": 0.3724207347760443, + "kl_loss": 0.12120488286018372, + "loss_ib": 0.0018167919479310513, + "step": 1295 + }, + { + "ce_ib": 6.126406669616699, + "ce_orig": 1.0412760972976685, + "epoch": 0.37270831835502194, + "kl_loss": 0.10262041538953781, + "loss_ib": 0.001638844725675881, + "step": 1296 + }, + { + "ce_ib": 5.098903179168701, + "ce_orig": 0.6683371067047119, + "epoch": 0.37270831835502194, + "kl_loss": 0.11465729773044586, + "loss_ib": 0.0016564632533118129, + "step": 1296 + }, + { + "ce_ib": 4.434388160705566, + "ce_orig": 0.4733821749687195, + "epoch": 0.37270831835502194, + "kl_loss": 0.13180866837501526, + "loss_ib": 0.0017615255201235414, + "step": 1296 + }, + { + "ce_ib": 4.306613445281982, + "ce_orig": 0.6650025844573975, + "epoch": 0.37270831835502194, + "kl_loss": 0.08112086355686188, + "loss_ib": 0.0012418698752298951, + "step": 1296 + }, + { + "ce_ib": 7.580479621887207, + "ce_orig": 1.093042254447937, + "epoch": 0.37299590193399956, + "kl_loss": 0.09305058419704437, + "loss_ib": 0.0016885536024346948, + "step": 1297 + }, + { + "ce_ib": 4.302249908447266, + "ce_orig": 0.537747859954834, + "epoch": 0.37299590193399956, + "kl_loss": 0.2276405543088913, + "loss_ib": 0.002706630388274789, + "step": 1297 + }, + { + "ce_ib": 4.3821821212768555, + "ce_orig": 0.8077664971351624, + "epoch": 0.37299590193399956, + "kl_loss": 0.1235668808221817, + "loss_ib": 0.0016738870181143284, + "step": 1297 + }, + { + "ce_ib": 6.15134859085083, + "ce_orig": 0.8116844892501831, + "epoch": 0.37299590193399956, + "kl_loss": 0.13009323179721832, + "loss_ib": 0.001916067092679441, + "step": 1297 + }, + { + "ce_ib": 6.408975124359131, + "ce_orig": 1.0396615266799927, + "epoch": 0.3732834855129772, + "kl_loss": 0.1137128621339798, + "loss_ib": 0.001778026227839291, + "step": 1298 + }, + { + "ce_ib": 5.81378698348999, + "ce_orig": 0.9238652586936951, + "epoch": 0.3732834855129772, + "kl_loss": 0.11552828550338745, + "loss_ib": 0.0017366614192724228, + "step": 1298 + }, + { + "ce_ib": 4.731773376464844, + "ce_orig": 0.8088876008987427, + "epoch": 0.3732834855129772, + "kl_loss": 0.1280672252178192, + "loss_ib": 0.0017538494430482388, + "step": 1298 + }, + { + "ce_ib": 7.179605484008789, + "ce_orig": 0.9171648621559143, + "epoch": 0.3732834855129772, + "kl_loss": 0.08370693027973175, + "loss_ib": 0.0015550297684967518, + "step": 1298 + }, + { + "ce_ib": 9.204768180847168, + "ce_orig": 1.7088639736175537, + "epoch": 0.37357106909195487, + "kl_loss": 0.14535124599933624, + "loss_ib": 0.0023739892058074474, + "step": 1299 + }, + { + "ce_ib": 7.621387958526611, + "ce_orig": 0.7997857332229614, + "epoch": 0.37357106909195487, + "kl_loss": 0.5037106871604919, + "loss_ib": 0.0057992455549538136, + "step": 1299 + }, + { + "ce_ib": 4.995852947235107, + "ce_orig": 0.3401322066783905, + "epoch": 0.37357106909195487, + "kl_loss": 0.09861315041780472, + "loss_ib": 0.001485716667957604, + "step": 1299 + }, + { + "ce_ib": 7.1057305335998535, + "ce_orig": 0.9840693473815918, + "epoch": 0.37357106909195487, + "kl_loss": 0.10749038308858871, + "loss_ib": 0.001785476808436215, + "step": 1299 + }, + { + "epoch": 0.3738586526709325, + "grad_norm": 0.11028212308883667, + "learning_rate": 4.8851982020174316e-05, + "loss": 0.881, + "step": 1300 + }, + { + "ce_ib": 8.377426147460938, + "ce_orig": 1.3471934795379639, + "epoch": 0.3738586526709325, + "kl_loss": 0.09995093941688538, + "loss_ib": 0.0018372520571574569, + "step": 1300 + }, + { + "ce_ib": 4.688452243804932, + "ce_orig": 0.5826649069786072, + "epoch": 0.3738586526709325, + "kl_loss": 0.12547636032104492, + "loss_ib": 0.001723608816973865, + "step": 1300 + }, + { + "ce_ib": 3.959388017654419, + "ce_orig": 0.4809862971305847, + "epoch": 0.3738586526709325, + "kl_loss": 0.06880328059196472, + "loss_ib": 0.0010839715832844377, + "step": 1300 + }, + { + "ce_ib": 7.195967197418213, + "ce_orig": 0.8778854012489319, + "epoch": 0.3738586526709325, + "kl_loss": 0.1450991928577423, + "loss_ib": 0.0021705885883420706, + "step": 1300 + }, + { + "ce_ib": 5.772943019866943, + "ce_orig": 0.7366096377372742, + "epoch": 0.3741462362499101, + "kl_loss": 0.17550821602344513, + "loss_ib": 0.0023323765490204096, + "step": 1301 + }, + { + "ce_ib": 5.280375003814697, + "ce_orig": 0.5117489695549011, + "epoch": 0.3741462362499101, + "kl_loss": 0.12708105146884918, + "loss_ib": 0.0017988479230552912, + "step": 1301 + }, + { + "ce_ib": 6.551858901977539, + "ce_orig": 1.004411220550537, + "epoch": 0.3741462362499101, + "kl_loss": 0.16514693200588226, + "loss_ib": 0.0023066550493240356, + "step": 1301 + }, + { + "ce_ib": 7.748233318328857, + "ce_orig": 1.162596344947815, + "epoch": 0.3741462362499101, + "kl_loss": 0.11069048941135406, + "loss_ib": 0.0018817281816154718, + "step": 1301 + }, + { + "ce_ib": 2.3434817790985107, + "ce_orig": 0.2155807465314865, + "epoch": 0.3744338198288878, + "kl_loss": 0.24002450704574585, + "loss_ib": 0.0026345932856202126, + "step": 1302 + }, + { + "ce_ib": 4.517918586730957, + "ce_orig": 0.6924988627433777, + "epoch": 0.3744338198288878, + "kl_loss": 0.06375230848789215, + "loss_ib": 0.0010893149301409721, + "step": 1302 + }, + { + "ce_ib": 5.591329097747803, + "ce_orig": 0.7973899841308594, + "epoch": 0.3744338198288878, + "kl_loss": 0.11718559265136719, + "loss_ib": 0.00173098873347044, + "step": 1302 + }, + { + "ce_ib": 6.910216331481934, + "ce_orig": 0.9021272659301758, + "epoch": 0.3744338198288878, + "kl_loss": 0.1621193140745163, + "loss_ib": 0.0023122145794332027, + "step": 1302 + }, + { + "ce_ib": 4.825242042541504, + "ce_orig": 0.5692858099937439, + "epoch": 0.3747214034078654, + "kl_loss": 0.12797698378562927, + "loss_ib": 0.001762293977662921, + "step": 1303 + }, + { + "ce_ib": 3.828768730163574, + "ce_orig": 0.7801523208618164, + "epoch": 0.3747214034078654, + "kl_loss": 0.11873648315668106, + "loss_ib": 0.0015702417585998774, + "step": 1303 + }, + { + "ce_ib": 3.9736738204956055, + "ce_orig": 0.7081290483474731, + "epoch": 0.3747214034078654, + "kl_loss": 0.06060061603784561, + "loss_ib": 0.0010033735306933522, + "step": 1303 + }, + { + "ce_ib": 9.061676979064941, + "ce_orig": 1.2129696607589722, + "epoch": 0.3747214034078654, + "kl_loss": 0.0726584941148758, + "loss_ib": 0.0016327527118846774, + "step": 1303 + }, + { + "ce_ib": 4.078707218170166, + "ce_orig": 0.42766043543815613, + "epoch": 0.37500898698684304, + "kl_loss": 0.0907704085111618, + "loss_ib": 0.001315574743784964, + "step": 1304 + }, + { + "ce_ib": 8.35204792022705, + "ce_orig": 1.4698388576507568, + "epoch": 0.37500898698684304, + "kl_loss": 0.13164952397346497, + "loss_ib": 0.0021516999695450068, + "step": 1304 + }, + { + "ce_ib": 7.965769290924072, + "ce_orig": 1.2763580083847046, + "epoch": 0.37500898698684304, + "kl_loss": 0.09261095523834229, + "loss_ib": 0.0017226864583790302, + "step": 1304 + }, + { + "ce_ib": 6.983380317687988, + "ce_orig": 1.0021127462387085, + "epoch": 0.37500898698684304, + "kl_loss": 0.13547343015670776, + "loss_ib": 0.0020530722104012966, + "step": 1304 + }, + { + "epoch": 0.37529657056582066, + "grad_norm": 0.08439778536558151, + "learning_rate": 4.8840329313707556e-05, + "loss": 0.859, + "step": 1305 + }, + { + "ce_ib": 7.7243733406066895, + "ce_orig": 1.0866235494613647, + "epoch": 0.37529657056582066, + "kl_loss": 0.11019238829612732, + "loss_ib": 0.0018743611872196198, + "step": 1305 + }, + { + "ce_ib": 5.882345676422119, + "ce_orig": 0.7346667647361755, + "epoch": 0.37529657056582066, + "kl_loss": 0.09288327395915985, + "loss_ib": 0.0015170671977102757, + "step": 1305 + }, + { + "ce_ib": 3.667390823364258, + "ce_orig": 0.9194813966751099, + "epoch": 0.37529657056582066, + "kl_loss": 0.05089962109923363, + "loss_ib": 0.0008757352479733527, + "step": 1305 + }, + { + "ce_ib": 7.411571502685547, + "ce_orig": 0.9807973504066467, + "epoch": 0.37529657056582066, + "kl_loss": 0.08906038105487823, + "loss_ib": 0.001631760853342712, + "step": 1305 + }, + { + "ce_ib": 8.256385803222656, + "ce_orig": 1.1893174648284912, + "epoch": 0.37558415414479834, + "kl_loss": 0.1135597825050354, + "loss_ib": 0.0019612363539636135, + "step": 1306 + }, + { + "ce_ib": 6.674210071563721, + "ce_orig": 0.8289363980293274, + "epoch": 0.37558415414479834, + "kl_loss": 0.137071430683136, + "loss_ib": 0.0020381351932883263, + "step": 1306 + }, + { + "ce_ib": 7.589658260345459, + "ce_orig": 1.0683447122573853, + "epoch": 0.37558415414479834, + "kl_loss": 0.1060531884431839, + "loss_ib": 0.001819497556425631, + "step": 1306 + }, + { + "ce_ib": 8.912439346313477, + "ce_orig": 1.4486478567123413, + "epoch": 0.37558415414479834, + "kl_loss": 0.15647874772548676, + "loss_ib": 0.002456031274050474, + "step": 1306 + }, + { + "ce_ib": 9.321609497070312, + "ce_orig": 1.840979814529419, + "epoch": 0.37587173772377597, + "kl_loss": 0.0922217071056366, + "loss_ib": 0.0018543779151514173, + "step": 1307 + }, + { + "ce_ib": 4.2626566886901855, + "ce_orig": 0.5894846320152283, + "epoch": 0.37587173772377597, + "kl_loss": 0.14015614986419678, + "loss_ib": 0.0018278270727023482, + "step": 1307 + }, + { + "ce_ib": 10.738929748535156, + "ce_orig": 1.6773637533187866, + "epoch": 0.37587173772377597, + "kl_loss": 0.0642920583486557, + "loss_ib": 0.0017168134218081832, + "step": 1307 + }, + { + "ce_ib": 3.526549816131592, + "ce_orig": 0.6496335864067078, + "epoch": 0.37587173772377597, + "kl_loss": 0.08111678808927536, + "loss_ib": 0.0011638228315860033, + "step": 1307 + }, + { + "ce_ib": 6.587804317474365, + "ce_orig": 0.6036332845687866, + "epoch": 0.3761593213027536, + "kl_loss": 0.10333004593849182, + "loss_ib": 0.0016920807538554072, + "step": 1308 + }, + { + "ce_ib": 6.432805061340332, + "ce_orig": 0.7364359498023987, + "epoch": 0.3761593213027536, + "kl_loss": 0.07528705894947052, + "loss_ib": 0.0013961511431261897, + "step": 1308 + }, + { + "ce_ib": 4.427136421203613, + "ce_orig": 0.6185834407806396, + "epoch": 0.3761593213027536, + "kl_loss": 0.09119290858507156, + "loss_ib": 0.0013546427944675088, + "step": 1308 + }, + { + "ce_ib": 7.926487922668457, + "ce_orig": 1.2750617265701294, + "epoch": 0.3761593213027536, + "kl_loss": 0.10837486386299133, + "loss_ib": 0.0018763974076136947, + "step": 1308 + }, + { + "ce_ib": 7.6131205558776855, + "ce_orig": 1.4484436511993408, + "epoch": 0.37644690488173127, + "kl_loss": 0.12636443972587585, + "loss_ib": 0.0020249562803655863, + "step": 1309 + }, + { + "ce_ib": 5.368390083312988, + "ce_orig": 0.9395446181297302, + "epoch": 0.37644690488173127, + "kl_loss": 0.09168052673339844, + "loss_ib": 0.0014536442467942834, + "step": 1309 + }, + { + "ce_ib": 5.244813442230225, + "ce_orig": 1.0128648281097412, + "epoch": 0.37644690488173127, + "kl_loss": 0.07855847477912903, + "loss_ib": 0.0013100660871714354, + "step": 1309 + }, + { + "ce_ib": 5.962604999542236, + "ce_orig": 0.9374620914459229, + "epoch": 0.37644690488173127, + "kl_loss": 0.11523380875587463, + "loss_ib": 0.0017485985299572349, + "step": 1309 + }, + { + "epoch": 0.3767344884607089, + "grad_norm": 0.10580755770206451, + "learning_rate": 4.882861917035402e-05, + "loss": 0.8392, + "step": 1310 + }, + { + "ce_ib": 9.31829833984375, + "ce_orig": 1.440765142440796, + "epoch": 0.3767344884607089, + "kl_loss": 0.19183677434921265, + "loss_ib": 0.0028501974884420633, + "step": 1310 + }, + { + "ce_ib": 10.160751342773438, + "ce_orig": 1.4808740615844727, + "epoch": 0.3767344884607089, + "kl_loss": 0.1334453672170639, + "loss_ib": 0.0023505287244915962, + "step": 1310 + }, + { + "ce_ib": 5.5851545333862305, + "ce_orig": 1.0320905447006226, + "epoch": 0.3767344884607089, + "kl_loss": 0.1124086007475853, + "loss_ib": 0.0016826014034450054, + "step": 1310 + }, + { + "ce_ib": 6.999680519104004, + "ce_orig": 0.9789513349533081, + "epoch": 0.3767344884607089, + "kl_loss": 0.17336848378181458, + "loss_ib": 0.0024336527567356825, + "step": 1310 + }, + { + "ce_ib": 5.470460891723633, + "ce_orig": 0.8240450024604797, + "epoch": 0.3770220720396865, + "kl_loss": 0.05858692526817322, + "loss_ib": 0.001132915262132883, + "step": 1311 + }, + { + "ce_ib": 6.502416133880615, + "ce_orig": 0.784637451171875, + "epoch": 0.3770220720396865, + "kl_loss": 0.09532777965068817, + "loss_ib": 0.0016035193111747503, + "step": 1311 + }, + { + "ce_ib": 4.977993488311768, + "ce_orig": 0.3519165515899658, + "epoch": 0.3770220720396865, + "kl_loss": 0.13049781322479248, + "loss_ib": 0.0018027774058282375, + "step": 1311 + }, + { + "ce_ib": 5.470664978027344, + "ce_orig": 0.5675799250602722, + "epoch": 0.3770220720396865, + "kl_loss": 0.09750883281230927, + "loss_ib": 0.0015221547801047564, + "step": 1311 + }, + { + "ce_ib": 5.6255574226379395, + "ce_orig": 0.4978528320789337, + "epoch": 0.3773096556186642, + "kl_loss": 0.13040006160736084, + "loss_ib": 0.0018665563547983766, + "step": 1312 + }, + { + "ce_ib": 6.08099365234375, + "ce_orig": 0.6202380657196045, + "epoch": 0.3773096556186642, + "kl_loss": 0.12318846583366394, + "loss_ib": 0.0018399839755147696, + "step": 1312 + }, + { + "ce_ib": 5.541018486022949, + "ce_orig": 0.7439128160476685, + "epoch": 0.3773096556186642, + "kl_loss": 0.07084212452173233, + "loss_ib": 0.0012625230010598898, + "step": 1312 + }, + { + "ce_ib": 4.4165730476379395, + "ce_orig": 0.5543098449707031, + "epoch": 0.3773096556186642, + "kl_loss": 0.07426542043685913, + "loss_ib": 0.0011843114625662565, + "step": 1312 + }, + { + "ce_ib": 3.938666820526123, + "ce_orig": 0.7918110489845276, + "epoch": 0.3775972391976418, + "kl_loss": 0.0728941410779953, + "loss_ib": 0.0011228080838918686, + "step": 1313 + }, + { + "ce_ib": 8.127522468566895, + "ce_orig": 1.214548110961914, + "epoch": 0.3775972391976418, + "kl_loss": 0.12012702226638794, + "loss_ib": 0.002014022320508957, + "step": 1313 + }, + { + "ce_ib": 6.598063945770264, + "ce_orig": 1.1606128215789795, + "epoch": 0.3775972391976418, + "kl_loss": 0.11751188337802887, + "loss_ib": 0.0018349250312894583, + "step": 1313 + }, + { + "ce_ib": 6.119964122772217, + "ce_orig": 0.8887215256690979, + "epoch": 0.3775972391976418, + "kl_loss": 0.11058682203292847, + "loss_ib": 0.0017178645357489586, + "step": 1313 + }, + { + "ce_ib": 5.292278289794922, + "ce_orig": 0.8706098794937134, + "epoch": 0.37788482277661944, + "kl_loss": 0.11289675533771515, + "loss_ib": 0.0016581953968852758, + "step": 1314 + }, + { + "ce_ib": 4.4833784103393555, + "ce_orig": 0.7065367698669434, + "epoch": 0.37788482277661944, + "kl_loss": 0.08428777754306793, + "loss_ib": 0.001291215536184609, + "step": 1314 + }, + { + "ce_ib": 5.184488773345947, + "ce_orig": 0.5441928505897522, + "epoch": 0.37788482277661944, + "kl_loss": 0.11040713638067245, + "loss_ib": 0.0016225201543420553, + "step": 1314 + }, + { + "ce_ib": 7.428981304168701, + "ce_orig": 1.2260949611663818, + "epoch": 0.37788482277661944, + "kl_loss": 0.11685201525688171, + "loss_ib": 0.001911418279632926, + "step": 1314 + }, + { + "epoch": 0.37817240635559707, + "grad_norm": 0.093324214220047, + "learning_rate": 4.881685161832617e-05, + "loss": 0.8512, + "step": 1315 + }, + { + "ce_ib": 3.4923665523529053, + "ce_orig": 0.4155826270580292, + "epoch": 0.37817240635559707, + "kl_loss": 0.11499406397342682, + "loss_ib": 0.0014991771895438433, + "step": 1315 + }, + { + "ce_ib": 8.04870891571045, + "ce_orig": 1.0350520610809326, + "epoch": 0.37817240635559707, + "kl_loss": 0.24932055175304413, + "loss_ib": 0.0032980763353407383, + "step": 1315 + }, + { + "ce_ib": 6.75008487701416, + "ce_orig": 1.0719481706619263, + "epoch": 0.37817240635559707, + "kl_loss": 0.12588632106781006, + "loss_ib": 0.0019338716519996524, + "step": 1315 + }, + { + "ce_ib": 5.137867450714111, + "ce_orig": 0.939078152179718, + "epoch": 0.37817240635559707, + "kl_loss": 0.08320405334234238, + "loss_ib": 0.0013458272442221642, + "step": 1315 + }, + { + "ce_ib": 5.601523399353027, + "ce_orig": 0.730479896068573, + "epoch": 0.37845998993457475, + "kl_loss": 0.06329482793807983, + "loss_ib": 0.0011931005865335464, + "step": 1316 + }, + { + "ce_ib": 3.6545348167419434, + "ce_orig": 0.6439960598945618, + "epoch": 0.37845998993457475, + "kl_loss": 0.08682240545749664, + "loss_ib": 0.00123367749620229, + "step": 1316 + }, + { + "ce_ib": 6.024953842163086, + "ce_orig": 0.6438689231872559, + "epoch": 0.37845998993457475, + "kl_loss": 0.16080550849437714, + "loss_ib": 0.0022105504758656025, + "step": 1316 + }, + { + "ce_ib": 4.515985012054443, + "ce_orig": 0.544121265411377, + "epoch": 0.37845998993457475, + "kl_loss": 0.09402793645858765, + "loss_ib": 0.0013918777694925666, + "step": 1316 + }, + { + "ce_ib": 5.014019012451172, + "ce_orig": 0.4737093150615692, + "epoch": 0.37874757351355237, + "kl_loss": 0.1738872081041336, + "loss_ib": 0.002240273868665099, + "step": 1317 + }, + { + "ce_ib": 5.783313751220703, + "ce_orig": 0.6481950879096985, + "epoch": 0.37874757351355237, + "kl_loss": 0.14250211417675018, + "loss_ib": 0.002003352390602231, + "step": 1317 + }, + { + "ce_ib": 6.43743371963501, + "ce_orig": 0.850814163684845, + "epoch": 0.37874757351355237, + "kl_loss": 0.11499704420566559, + "loss_ib": 0.0017937137745320797, + "step": 1317 + }, + { + "ce_ib": 4.943159580230713, + "ce_orig": 0.6938974857330322, + "epoch": 0.37874757351355237, + "kl_loss": 0.1488955020904541, + "loss_ib": 0.0019832709804177284, + "step": 1317 + }, + { + "ce_ib": 4.182210922241211, + "ce_orig": 0.5845946669578552, + "epoch": 0.37903515709253, + "kl_loss": 0.0654899999499321, + "loss_ib": 0.0010731210932135582, + "step": 1318 + }, + { + "ce_ib": 9.34213924407959, + "ce_orig": 1.4825302362442017, + "epoch": 0.37903515709253, + "kl_loss": 0.18803195655345917, + "loss_ib": 0.002814533421769738, + "step": 1318 + }, + { + "ce_ib": 6.903960227966309, + "ce_orig": 1.253632664680481, + "epoch": 0.37903515709253, + "kl_loss": 0.1699225902557373, + "loss_ib": 0.0023896219208836555, + "step": 1318 + }, + { + "ce_ib": 6.289240837097168, + "ce_orig": 1.0074694156646729, + "epoch": 0.37903515709253, + "kl_loss": 0.0989120751619339, + "loss_ib": 0.0016180448001250625, + "step": 1318 + }, + { + "ce_ib": 5.321438789367676, + "ce_orig": 0.6448070406913757, + "epoch": 0.3793227406715077, + "kl_loss": 0.10022042691707611, + "loss_ib": 0.0015343481209129095, + "step": 1319 + }, + { + "ce_ib": 4.7291975021362305, + "ce_orig": 0.8807885050773621, + "epoch": 0.3793227406715077, + "kl_loss": 0.08091727644205093, + "loss_ib": 0.0012820924166589975, + "step": 1319 + }, + { + "ce_ib": 3.3131628036499023, + "ce_orig": 0.6549236178398132, + "epoch": 0.3793227406715077, + "kl_loss": 0.07724500447511673, + "loss_ib": 0.0011037662625312805, + "step": 1319 + }, + { + "ce_ib": 7.72067403793335, + "ce_orig": 1.068918228149414, + "epoch": 0.3793227406715077, + "kl_loss": 0.08838605880737305, + "loss_ib": 0.0016559278592467308, + "step": 1319 + }, + { + "epoch": 0.3796103242504853, + "grad_norm": 0.10508622229099274, + "learning_rate": 4.880502668597475e-05, + "loss": 0.867, + "step": 1320 + }, + { + "ce_ib": 3.1229352951049805, + "ce_orig": 0.6481002569198608, + "epoch": 0.3796103242504853, + "kl_loss": 0.07972665131092072, + "loss_ib": 0.0011095600202679634, + "step": 1320 + }, + { + "ce_ib": 5.122760772705078, + "ce_orig": 0.7584824562072754, + "epoch": 0.3796103242504853, + "kl_loss": 0.1352473497390747, + "loss_ib": 0.0018647494725883007, + "step": 1320 + }, + { + "ce_ib": 4.91263484954834, + "ce_orig": 0.8516531586647034, + "epoch": 0.3796103242504853, + "kl_loss": 0.08866020292043686, + "loss_ib": 0.0013778654392808676, + "step": 1320 + }, + { + "ce_ib": 4.272454738616943, + "ce_orig": 0.5512766242027283, + "epoch": 0.3796103242504853, + "kl_loss": 0.0826980322599411, + "loss_ib": 0.0012542258482426405, + "step": 1320 + }, + { + "ce_ib": 5.513332366943359, + "ce_orig": 0.5332241654396057, + "epoch": 0.3798979078294629, + "kl_loss": 0.10527944564819336, + "loss_ib": 0.0016041276976466179, + "step": 1321 + }, + { + "ce_ib": 7.259631633758545, + "ce_orig": 1.1701531410217285, + "epoch": 0.3798979078294629, + "kl_loss": 0.1175379604101181, + "loss_ib": 0.0019013426499441266, + "step": 1321 + }, + { + "ce_ib": 5.761963844299316, + "ce_orig": 1.0812888145446777, + "epoch": 0.3798979078294629, + "kl_loss": 0.06597635895013809, + "loss_ib": 0.001235959935002029, + "step": 1321 + }, + { + "ce_ib": 4.677454948425293, + "ce_orig": 0.7489356994628906, + "epoch": 0.3798979078294629, + "kl_loss": 0.12764066457748413, + "loss_ib": 0.001744152163155377, + "step": 1321 + }, + { + "ce_ib": 5.295727252960205, + "ce_orig": 0.7943225502967834, + "epoch": 0.3801854914084406, + "kl_loss": 0.121961809694767, + "loss_ib": 0.0017491908511146903, + "step": 1322 + }, + { + "ce_ib": 4.072883129119873, + "ce_orig": 0.8747122883796692, + "epoch": 0.3801854914084406, + "kl_loss": 0.1996590495109558, + "loss_ib": 0.002403878839686513, + "step": 1322 + }, + { + "ce_ib": 4.075475215911865, + "ce_orig": 0.5803650617599487, + "epoch": 0.3801854914084406, + "kl_loss": 0.05399131029844284, + "loss_ib": 0.0009474605903960764, + "step": 1322 + }, + { + "ce_ib": 6.995948791503906, + "ce_orig": 1.0964949131011963, + "epoch": 0.3801854914084406, + "kl_loss": 0.11446275562047958, + "loss_ib": 0.0018442223081365228, + "step": 1322 + }, + { + "ce_ib": 2.323742389678955, + "ce_orig": 0.1683950126171112, + "epoch": 0.3804730749874182, + "kl_loss": 0.312514990568161, + "loss_ib": 0.00335752428509295, + "step": 1323 + }, + { + "ce_ib": 7.685324192047119, + "ce_orig": 1.3328412771224976, + "epoch": 0.3804730749874182, + "kl_loss": 0.14166969060897827, + "loss_ib": 0.0021852292120456696, + "step": 1323 + }, + { + "ce_ib": 7.503167629241943, + "ce_orig": 0.979636549949646, + "epoch": 0.3804730749874182, + "kl_loss": 0.10989056527614594, + "loss_ib": 0.0018492224626243114, + "step": 1323 + }, + { + "ce_ib": 4.438648223876953, + "ce_orig": 0.644780158996582, + "epoch": 0.3804730749874182, + "kl_loss": 0.11196550726890564, + "loss_ib": 0.0015635198215022683, + "step": 1323 + }, + { + "ce_ib": 7.35436487197876, + "ce_orig": 1.2329034805297852, + "epoch": 0.38076065856639585, + "kl_loss": 0.08079203963279724, + "loss_ib": 0.0015433566877618432, + "step": 1324 + }, + { + "ce_ib": 5.28987979888916, + "ce_orig": 0.5954443216323853, + "epoch": 0.38076065856639585, + "kl_loss": 0.10509554296731949, + "loss_ib": 0.0015799434622749686, + "step": 1324 + }, + { + "ce_ib": 3.9158504009246826, + "ce_orig": 0.6997315287590027, + "epoch": 0.38076065856639585, + "kl_loss": 0.08156973123550415, + "loss_ib": 0.0012072824174538255, + "step": 1324 + }, + { + "ce_ib": 5.5528340339660645, + "ce_orig": 0.7494857907295227, + "epoch": 0.38076065856639585, + "kl_loss": 0.09654660522937775, + "loss_ib": 0.0015207494143396616, + "step": 1324 + }, + { + "epoch": 0.3810482421453735, + "grad_norm": 0.09944023191928864, + "learning_rate": 4.879314440178879e-05, + "loss": 0.866, + "step": 1325 + }, + { + "ce_ib": 6.527103424072266, + "ce_orig": 1.413140892982483, + "epoch": 0.3810482421453735, + "kl_loss": 0.10402053594589233, + "loss_ib": 0.00169291568454355, + "step": 1325 + }, + { + "ce_ib": 7.178508281707764, + "ce_orig": 0.9281395077705383, + "epoch": 0.3810482421453735, + "kl_loss": 0.12966975569725037, + "loss_ib": 0.0020145485177636147, + "step": 1325 + }, + { + "ce_ib": 5.939761161804199, + "ce_orig": 0.7400992512702942, + "epoch": 0.3810482421453735, + "kl_loss": 0.11570632457733154, + "loss_ib": 0.0017510392935946584, + "step": 1325 + }, + { + "ce_ib": 6.276197910308838, + "ce_orig": 0.8784988522529602, + "epoch": 0.3810482421453735, + "kl_loss": 0.11074468493461609, + "loss_ib": 0.0017350665293633938, + "step": 1325 + }, + { + "ce_ib": 7.449951648712158, + "ce_orig": 0.5432068705558777, + "epoch": 0.38133582572435115, + "kl_loss": 0.14821617305278778, + "loss_ib": 0.002227156888693571, + "step": 1326 + }, + { + "ce_ib": 6.362620830535889, + "ce_orig": 0.872480034828186, + "epoch": 0.38133582572435115, + "kl_loss": 0.11232534050941467, + "loss_ib": 0.0017595153767615557, + "step": 1326 + }, + { + "ce_ib": 4.248157978057861, + "ce_orig": 0.6296955943107605, + "epoch": 0.38133582572435115, + "kl_loss": 0.10103049874305725, + "loss_ib": 0.0014351207064464688, + "step": 1326 + }, + { + "ce_ib": 5.756939888000488, + "ce_orig": 0.7265815138816833, + "epoch": 0.38133582572435115, + "kl_loss": 0.1828758269548416, + "loss_ib": 0.0024044523015618324, + "step": 1326 + }, + { + "ce_ib": 7.763768196105957, + "ce_orig": 1.4668439626693726, + "epoch": 0.3816234093033288, + "kl_loss": 0.12431351840496063, + "loss_ib": 0.002019512001425028, + "step": 1327 + }, + { + "ce_ib": 4.5980119705200195, + "ce_orig": 0.7858704328536987, + "epoch": 0.3816234093033288, + "kl_loss": 0.09927660971879959, + "loss_ib": 0.0014525672886520624, + "step": 1327 + }, + { + "ce_ib": 4.350864887237549, + "ce_orig": 0.5971255898475647, + "epoch": 0.3816234093033288, + "kl_loss": 0.05036499351263046, + "loss_ib": 0.000938736426178366, + "step": 1327 + }, + { + "ce_ib": 12.01596736907959, + "ce_orig": 2.1367225646972656, + "epoch": 0.3816234093033288, + "kl_loss": 0.1739673614501953, + "loss_ib": 0.0029412703588604927, + "step": 1327 + }, + { + "ce_ib": 4.798478603363037, + "ce_orig": 0.35993748903274536, + "epoch": 0.3819109928823064, + "kl_loss": 0.14714062213897705, + "loss_ib": 0.0019512539729475975, + "step": 1328 + }, + { + "ce_ib": 4.909856796264648, + "ce_orig": 0.7560886740684509, + "epoch": 0.3819109928823064, + "kl_loss": 0.09511459618806839, + "loss_ib": 0.0014421317027881742, + "step": 1328 + }, + { + "ce_ib": 5.837655544281006, + "ce_orig": 0.7816391587257385, + "epoch": 0.3819109928823064, + "kl_loss": 0.16186535358428955, + "loss_ib": 0.0022024190984666348, + "step": 1328 + }, + { + "ce_ib": 5.54555606842041, + "ce_orig": 0.6967433094978333, + "epoch": 0.3819109928823064, + "kl_loss": 0.1746070683002472, + "loss_ib": 0.002300626365467906, + "step": 1328 + }, + { + "ce_ib": 5.7812042236328125, + "ce_orig": 0.5719181299209595, + "epoch": 0.3821985764612841, + "kl_loss": 0.11337076872587204, + "loss_ib": 0.0017118280520662665, + "step": 1329 + }, + { + "ce_ib": 4.787136077880859, + "ce_orig": 0.44207286834716797, + "epoch": 0.3821985764612841, + "kl_loss": 0.1073162704706192, + "loss_ib": 0.0015518763102591038, + "step": 1329 + }, + { + "ce_ib": 6.563467979431152, + "ce_orig": 0.9291197657585144, + "epoch": 0.3821985764612841, + "kl_loss": 0.07843677699565887, + "loss_ib": 0.0014407145790755749, + "step": 1329 + }, + { + "ce_ib": 4.12862491607666, + "ce_orig": 0.8059925436973572, + "epoch": 0.3821985764612841, + "kl_loss": 0.06666150689125061, + "loss_ib": 0.0010794774862006307, + "step": 1329 + }, + { + "epoch": 0.3824861600402617, + "grad_norm": 0.08576754480600357, + "learning_rate": 4.878120479439545e-05, + "loss": 0.8664, + "step": 1330 + }, + { + "ce_ib": 7.024020195007324, + "ce_orig": 1.1852842569351196, + "epoch": 0.3824861600402617, + "kl_loss": 0.08465856313705444, + "loss_ib": 0.0015489875804632902, + "step": 1330 + }, + { + "ce_ib": 4.564798355102539, + "ce_orig": 0.5595995187759399, + "epoch": 0.3824861600402617, + "kl_loss": 0.2181047797203064, + "loss_ib": 0.0026375274173915386, + "step": 1330 + }, + { + "ce_ib": 3.40017032623291, + "ce_orig": 0.4371066987514496, + "epoch": 0.3824861600402617, + "kl_loss": 0.13506951928138733, + "loss_ib": 0.0016907122917473316, + "step": 1330 + }, + { + "ce_ib": 7.465134143829346, + "ce_orig": 0.8330971002578735, + "epoch": 0.3824861600402617, + "kl_loss": 0.12516441941261292, + "loss_ib": 0.001998157473281026, + "step": 1330 + }, + { + "ce_ib": 7.266862392425537, + "ce_orig": 0.905171811580658, + "epoch": 0.3827737436192393, + "kl_loss": 0.08720521628856659, + "loss_ib": 0.0015987383667379618, + "step": 1331 + }, + { + "ce_ib": 5.786927700042725, + "ce_orig": 0.7975708246231079, + "epoch": 0.3827737436192393, + "kl_loss": 0.08120322972536087, + "loss_ib": 0.0013907250249758363, + "step": 1331 + }, + { + "ce_ib": 3.518803358078003, + "ce_orig": 0.5128543376922607, + "epoch": 0.3827737436192393, + "kl_loss": 0.06692732125520706, + "loss_ib": 0.0010211535263806581, + "step": 1331 + }, + { + "ce_ib": 4.056680679321289, + "ce_orig": 0.5318570137023926, + "epoch": 0.3827737436192393, + "kl_loss": 0.1076204776763916, + "loss_ib": 0.0014818727504462004, + "step": 1331 + }, + { + "ce_ib": 3.84483003616333, + "ce_orig": 0.6785795092582703, + "epoch": 0.383061327198217, + "kl_loss": 0.09602093696594238, + "loss_ib": 0.0013446924276649952, + "step": 1332 + }, + { + "ce_ib": 5.861329078674316, + "ce_orig": 0.8904538750648499, + "epoch": 0.383061327198217, + "kl_loss": 0.07035691291093826, + "loss_ib": 0.001289702020585537, + "step": 1332 + }, + { + "ce_ib": 6.79622220993042, + "ce_orig": 0.944312572479248, + "epoch": 0.383061327198217, + "kl_loss": 0.11347562074661255, + "loss_ib": 0.0018143784254789352, + "step": 1332 + }, + { + "ce_ib": 7.302602767944336, + "ce_orig": 1.311141848564148, + "epoch": 0.383061327198217, + "kl_loss": 0.07018446922302246, + "loss_ib": 0.001432104967534542, + "step": 1332 + }, + { + "ce_ib": 5.1223578453063965, + "ce_orig": 0.6385131478309631, + "epoch": 0.38334891077719463, + "kl_loss": 0.12935715913772583, + "loss_ib": 0.0018058073474094272, + "step": 1333 + }, + { + "ce_ib": 4.770997047424316, + "ce_orig": 0.5485048890113831, + "epoch": 0.38334891077719463, + "kl_loss": 0.1274576187133789, + "loss_ib": 0.001751675852574408, + "step": 1333 + }, + { + "ce_ib": 4.000513553619385, + "ce_orig": 0.7967947125434875, + "epoch": 0.38334891077719463, + "kl_loss": 0.08080033212900162, + "loss_ib": 0.0012080547166988254, + "step": 1333 + }, + { + "ce_ib": 8.564045906066895, + "ce_orig": 1.0766440629959106, + "epoch": 0.38334891077719463, + "kl_loss": 0.12121468037366867, + "loss_ib": 0.002068551490083337, + "step": 1333 + }, + { + "ce_ib": 6.580170631408691, + "ce_orig": 1.1447159051895142, + "epoch": 0.38363649435617225, + "kl_loss": 0.16854263842105865, + "loss_ib": 0.0023434432223439217, + "step": 1334 + }, + { + "ce_ib": 9.237170219421387, + "ce_orig": 1.8113460540771484, + "epoch": 0.38363649435617225, + "kl_loss": 0.14716824889183044, + "loss_ib": 0.0023953993804752827, + "step": 1334 + }, + { + "ce_ib": 3.421854019165039, + "ce_orig": 0.6700695753097534, + "epoch": 0.38363649435617225, + "kl_loss": 0.04630805179476738, + "loss_ib": 0.0008052659104578197, + "step": 1334 + }, + { + "ce_ib": 6.679248332977295, + "ce_orig": 1.0066481828689575, + "epoch": 0.38363649435617225, + "kl_loss": 0.1421959400177002, + "loss_ib": 0.0020898841321468353, + "step": 1334 + }, + { + "epoch": 0.3839240779351499, + "grad_norm": 0.09364533424377441, + "learning_rate": 4.876920789256003e-05, + "loss": 0.8202, + "step": 1335 + }, + { + "ce_ib": 3.789226531982422, + "ce_orig": 0.7831275463104248, + "epoch": 0.3839240779351499, + "kl_loss": 0.1242968887090683, + "loss_ib": 0.0016218915116041899, + "step": 1335 + }, + { + "ce_ib": 3.81172776222229, + "ce_orig": 0.7005083560943604, + "epoch": 0.3839240779351499, + "kl_loss": 0.06589465588331223, + "loss_ib": 0.0010401193285360932, + "step": 1335 + }, + { + "ce_ib": 7.375043869018555, + "ce_orig": 1.4221928119659424, + "epoch": 0.3839240779351499, + "kl_loss": 0.11741450428962708, + "loss_ib": 0.0019116493640467525, + "step": 1335 + }, + { + "ce_ib": 8.176351547241211, + "ce_orig": 1.0930140018463135, + "epoch": 0.3839240779351499, + "kl_loss": 0.08987772464752197, + "loss_ib": 0.0017164122546091676, + "step": 1335 + }, + { + "ce_ib": 8.336294174194336, + "ce_orig": 0.8998260498046875, + "epoch": 0.38421166151412756, + "kl_loss": 0.09345562011003494, + "loss_ib": 0.0017681854078546166, + "step": 1336 + }, + { + "ce_ib": 4.029426574707031, + "ce_orig": 0.7282453179359436, + "epoch": 0.38421166151412756, + "kl_loss": 0.09021305292844772, + "loss_ib": 0.0013050731504336, + "step": 1336 + }, + { + "ce_ib": 8.017088890075684, + "ce_orig": 0.8555032014846802, + "epoch": 0.38421166151412756, + "kl_loss": 0.12558424472808838, + "loss_ib": 0.002057551173493266, + "step": 1336 + }, + { + "ce_ib": 7.581797122955322, + "ce_orig": 1.004011631011963, + "epoch": 0.38421166151412756, + "kl_loss": 0.10867396742105484, + "loss_ib": 0.0018449192866683006, + "step": 1336 + }, + { + "ce_ib": 6.137843132019043, + "ce_orig": 1.3972002267837524, + "epoch": 0.3844992450931052, + "kl_loss": 0.11302444338798523, + "loss_ib": 0.0017440287629142404, + "step": 1337 + }, + { + "ce_ib": 5.292674541473389, + "ce_orig": 0.5526427626609802, + "epoch": 0.3844992450931052, + "kl_loss": 0.1625136137008667, + "loss_ib": 0.0021544035989791155, + "step": 1337 + }, + { + "ce_ib": 4.300665855407715, + "ce_orig": 1.0206716060638428, + "epoch": 0.3844992450931052, + "kl_loss": 0.07899816334247589, + "loss_ib": 0.0012200481723994017, + "step": 1337 + }, + { + "ce_ib": 5.7134904861450195, + "ce_orig": 1.067671537399292, + "epoch": 0.3844992450931052, + "kl_loss": 0.09659279882907867, + "loss_ib": 0.0015372770139947534, + "step": 1337 + }, + { + "ce_ib": 5.606064796447754, + "ce_orig": 0.9170923829078674, + "epoch": 0.3847868286720828, + "kl_loss": 0.10247627645730972, + "loss_ib": 0.0015853692311793566, + "step": 1338 + }, + { + "ce_ib": 5.497860908508301, + "ce_orig": 0.44184476137161255, + "epoch": 0.3847868286720828, + "kl_loss": 0.13341489434242249, + "loss_ib": 0.0018839349504560232, + "step": 1338 + }, + { + "ce_ib": 7.231921195983887, + "ce_orig": 0.9874067902565002, + "epoch": 0.3847868286720828, + "kl_loss": 0.2059246450662613, + "loss_ib": 0.0027824384160339832, + "step": 1338 + }, + { + "ce_ib": 7.948070049285889, + "ce_orig": 1.2974672317504883, + "epoch": 0.3847868286720828, + "kl_loss": 0.14574149250984192, + "loss_ib": 0.0022522220388054848, + "step": 1338 + }, + { + "ce_ib": 7.4506072998046875, + "ce_orig": 0.5894262790679932, + "epoch": 0.3850744122510605, + "kl_loss": 0.138429194688797, + "loss_ib": 0.0021293526515364647, + "step": 1339 + }, + { + "ce_ib": 8.624794006347656, + "ce_orig": 1.0650362968444824, + "epoch": 0.3850744122510605, + "kl_loss": 0.07382220774888992, + "loss_ib": 0.0016007014783099294, + "step": 1339 + }, + { + "ce_ib": 5.549061298370361, + "ce_orig": 0.5607146620750427, + "epoch": 0.3850744122510605, + "kl_loss": 0.12252309173345566, + "loss_ib": 0.001780137070454657, + "step": 1339 + }, + { + "ce_ib": 7.076119422912598, + "ce_orig": 0.8423207998275757, + "epoch": 0.3850744122510605, + "kl_loss": 0.10627346485853195, + "loss_ib": 0.0017703465418890119, + "step": 1339 + }, + { + "epoch": 0.3853619958300381, + "grad_norm": 0.08929329365491867, + "learning_rate": 4.875715372518585e-05, + "loss": 0.838, + "step": 1340 + }, + { + "ce_ib": 7.541855812072754, + "ce_orig": 0.8731908202171326, + "epoch": 0.3853619958300381, + "kl_loss": 0.1492196023464203, + "loss_ib": 0.0022463814821094275, + "step": 1340 + }, + { + "ce_ib": 8.010886192321777, + "ce_orig": 1.08771550655365, + "epoch": 0.3853619958300381, + "kl_loss": 0.10336001217365265, + "loss_ib": 0.0018346887081861496, + "step": 1340 + }, + { + "ce_ib": 5.893514156341553, + "ce_orig": 0.9211190938949585, + "epoch": 0.3853619958300381, + "kl_loss": 0.13187764585018158, + "loss_ib": 0.0019081278005614877, + "step": 1340 + }, + { + "ce_ib": 5.321976184844971, + "ce_orig": 0.9493192434310913, + "epoch": 0.3853619958300381, + "kl_loss": 0.1045265644788742, + "loss_ib": 0.0015774632338434458, + "step": 1340 + }, + { + "ce_ib": 11.51617431640625, + "ce_orig": 2.110943078994751, + "epoch": 0.38564957940901573, + "kl_loss": 0.1284325271844864, + "loss_ib": 0.0024359426461160183, + "step": 1341 + }, + { + "ce_ib": 7.60202169418335, + "ce_orig": 1.3231353759765625, + "epoch": 0.38564957940901573, + "kl_loss": 0.13365906476974487, + "loss_ib": 0.002096792683005333, + "step": 1341 + }, + { + "ce_ib": 7.612880229949951, + "ce_orig": 1.0295933485031128, + "epoch": 0.38564957940901573, + "kl_loss": 0.13664115965366364, + "loss_ib": 0.002127699553966522, + "step": 1341 + }, + { + "ce_ib": 3.26138973236084, + "ce_orig": 0.49690404534339905, + "epoch": 0.38564957940901573, + "kl_loss": 0.13129015266895294, + "loss_ib": 0.001639040419831872, + "step": 1341 + }, + { + "ce_ib": 5.174814701080322, + "ce_orig": 0.7611823081970215, + "epoch": 0.3859371629879934, + "kl_loss": 0.12216943502426147, + "loss_ib": 0.001739175757393241, + "step": 1342 + }, + { + "ce_ib": 3.6963768005371094, + "ce_orig": 0.6149874329566956, + "epoch": 0.3859371629879934, + "kl_loss": 0.10835998505353928, + "loss_ib": 0.00145323749165982, + "step": 1342 + }, + { + "ce_ib": 6.840673923492432, + "ce_orig": 0.946303129196167, + "epoch": 0.3859371629879934, + "kl_loss": 0.1355583667755127, + "loss_ib": 0.002039650920778513, + "step": 1342 + }, + { + "ce_ib": 6.208271026611328, + "ce_orig": 0.9422585368156433, + "epoch": 0.3859371629879934, + "kl_loss": 0.14368371665477753, + "loss_ib": 0.0020576640963554382, + "step": 1342 + }, + { + "ce_ib": 5.676513195037842, + "ce_orig": 0.9785840511322021, + "epoch": 0.38622474656697103, + "kl_loss": 0.12683354318141937, + "loss_ib": 0.001835986622609198, + "step": 1343 + }, + { + "ce_ib": 6.242606163024902, + "ce_orig": 0.5287706851959229, + "epoch": 0.38622474656697103, + "kl_loss": 0.10230138152837753, + "loss_ib": 0.001647274475544691, + "step": 1343 + }, + { + "ce_ib": 4.061772346496582, + "ce_orig": 0.5952426195144653, + "epoch": 0.38622474656697103, + "kl_loss": 0.05828586965799332, + "loss_ib": 0.0009890359360724688, + "step": 1343 + }, + { + "ce_ib": 6.636641025543213, + "ce_orig": 1.2166383266448975, + "epoch": 0.38622474656697103, + "kl_loss": 0.13114970922470093, + "loss_ib": 0.001975161023437977, + "step": 1343 + }, + { + "ce_ib": 5.310240268707275, + "ce_orig": 0.640544056892395, + "epoch": 0.38651233014594866, + "kl_loss": 0.23128513991832733, + "loss_ib": 0.002843875205144286, + "step": 1344 + }, + { + "ce_ib": 3.497588872909546, + "ce_orig": 0.634738028049469, + "epoch": 0.38651233014594866, + "kl_loss": 0.07952392846345901, + "loss_ib": 0.0011449981248006225, + "step": 1344 + }, + { + "ce_ib": 4.5079827308654785, + "ce_orig": 0.8078311681747437, + "epoch": 0.38651233014594866, + "kl_loss": 0.10402487218379974, + "loss_ib": 0.0014910469762980938, + "step": 1344 + }, + { + "ce_ib": 6.0893473625183105, + "ce_orig": 0.9539284110069275, + "epoch": 0.38651233014594866, + "kl_loss": 0.10625547170639038, + "loss_ib": 0.0016714894445613027, + "step": 1344 + }, + { + "epoch": 0.3867999137249263, + "grad_norm": 0.09451144933700562, + "learning_rate": 4.8745042321314186e-05, + "loss": 0.8364, + "step": 1345 + }, + { + "ce_ib": 4.948427200317383, + "ce_orig": 0.993834376335144, + "epoch": 0.3867999137249263, + "kl_loss": 0.10724367201328278, + "loss_ib": 0.0015672793379053473, + "step": 1345 + }, + { + "ce_ib": 4.9981207847595215, + "ce_orig": 0.7428358793258667, + "epoch": 0.3867999137249263, + "kl_loss": 0.10282860696315765, + "loss_ib": 0.0015280981315299869, + "step": 1345 + }, + { + "ce_ib": 6.816267013549805, + "ce_orig": 1.0093276500701904, + "epoch": 0.3867999137249263, + "kl_loss": 0.08815869688987732, + "loss_ib": 0.0015632137656211853, + "step": 1345 + }, + { + "ce_ib": 3.8050029277801514, + "ce_orig": 0.6337125897407532, + "epoch": 0.3867999137249263, + "kl_loss": 0.149078831076622, + "loss_ib": 0.0018712885212153196, + "step": 1345 + }, + { + "ce_ib": 4.451251029968262, + "ce_orig": 0.7932842969894409, + "epoch": 0.38708749730390396, + "kl_loss": 0.06703363358974457, + "loss_ib": 0.0011154614621773362, + "step": 1346 + }, + { + "ce_ib": 5.360795974731445, + "ce_orig": 0.6554229259490967, + "epoch": 0.38708749730390396, + "kl_loss": 0.08724645525217056, + "loss_ib": 0.001408544136211276, + "step": 1346 + }, + { + "ce_ib": 5.607513427734375, + "ce_orig": 0.7316446304321289, + "epoch": 0.38708749730390396, + "kl_loss": 0.12154290080070496, + "loss_ib": 0.0017761802300810814, + "step": 1346 + }, + { + "ce_ib": 3.820282220840454, + "ce_orig": 0.7290668487548828, + "epoch": 0.38708749730390396, + "kl_loss": 0.07640884816646576, + "loss_ib": 0.0011461166432127357, + "step": 1346 + }, + { + "ce_ib": 4.426151752471924, + "ce_orig": 0.4975670278072357, + "epoch": 0.3873750808828816, + "kl_loss": 0.09992469847202301, + "loss_ib": 0.001441862084902823, + "step": 1347 + }, + { + "ce_ib": 3.601679563522339, + "ce_orig": 0.6812854409217834, + "epoch": 0.3873750808828816, + "kl_loss": 0.07636852562427521, + "loss_ib": 0.001123853144235909, + "step": 1347 + }, + { + "ce_ib": 4.288147449493408, + "ce_orig": 0.7270475029945374, + "epoch": 0.3873750808828816, + "kl_loss": 0.09309622645378113, + "loss_ib": 0.0013597769429907203, + "step": 1347 + }, + { + "ce_ib": 6.769469261169434, + "ce_orig": 0.5788599252700806, + "epoch": 0.3873750808828816, + "kl_loss": 0.09748753160238266, + "loss_ib": 0.0016518222400918603, + "step": 1347 + }, + { + "ce_ib": 5.6365180015563965, + "ce_orig": 0.9563528895378113, + "epoch": 0.3876626644618592, + "kl_loss": 0.22810500860214233, + "loss_ib": 0.002844701986759901, + "step": 1348 + }, + { + "ce_ib": 6.9823126792907715, + "ce_orig": 0.7459795475006104, + "epoch": 0.3876626644618592, + "kl_loss": 0.16720719635486603, + "loss_ib": 0.002370303263887763, + "step": 1348 + }, + { + "ce_ib": 4.878572940826416, + "ce_orig": 0.6489146947860718, + "epoch": 0.3876626644618592, + "kl_loss": 0.07554194331169128, + "loss_ib": 0.0012432767543941736, + "step": 1348 + }, + { + "ce_ib": 5.692221641540527, + "ce_orig": 0.9350115060806274, + "epoch": 0.3876626644618592, + "kl_loss": 0.09622718393802643, + "loss_ib": 0.0015314939664676785, + "step": 1348 + }, + { + "ce_ib": 7.5320258140563965, + "ce_orig": 1.023134469985962, + "epoch": 0.3879502480408369, + "kl_loss": 0.13200296461582184, + "loss_ib": 0.0020732320845127106, + "step": 1349 + }, + { + "ce_ib": 6.651448726654053, + "ce_orig": 0.8953782320022583, + "epoch": 0.3879502480408369, + "kl_loss": 0.11706017702817917, + "loss_ib": 0.001835746574215591, + "step": 1349 + }, + { + "ce_ib": 4.424673080444336, + "ce_orig": 0.5250123739242554, + "epoch": 0.3879502480408369, + "kl_loss": 0.09768466651439667, + "loss_ib": 0.0014193139504641294, + "step": 1349 + }, + { + "ce_ib": 5.916868209838867, + "ce_orig": 0.8698412179946899, + "epoch": 0.3879502480408369, + "kl_loss": 0.05977011099457741, + "loss_ib": 0.0011893878690898418, + "step": 1349 + }, + { + "epoch": 0.3882378316198145, + "grad_norm": 0.10131075978279114, + "learning_rate": 4.8732873710124235e-05, + "loss": 0.8555, + "step": 1350 + }, + { + "ce_ib": 5.2028398513793945, + "ce_orig": 0.9036562442779541, + "epoch": 0.3882378316198145, + "kl_loss": 0.08852796256542206, + "loss_ib": 0.0014055636711418629, + "step": 1350 + }, + { + "ce_ib": 8.196189880371094, + "ce_orig": 1.0980186462402344, + "epoch": 0.3882378316198145, + "kl_loss": 0.14874669909477234, + "loss_ib": 0.002307086018845439, + "step": 1350 + }, + { + "ce_ib": 7.3839263916015625, + "ce_orig": 0.9047736525535583, + "epoch": 0.3882378316198145, + "kl_loss": 0.09531474858522415, + "loss_ib": 0.001691540121100843, + "step": 1350 + }, + { + "ce_ib": 10.06218433380127, + "ce_orig": 1.7880454063415527, + "epoch": 0.3882378316198145, + "kl_loss": 0.15133428573608398, + "loss_ib": 0.0025195612106472254, + "step": 1350 + }, + { + "ce_ib": 8.778315544128418, + "ce_orig": 1.6668694019317627, + "epoch": 0.38852541519879213, + "kl_loss": 0.11268506199121475, + "loss_ib": 0.0020046820864081383, + "step": 1351 + }, + { + "ce_ib": 7.385611057281494, + "ce_orig": 0.7190949320793152, + "epoch": 0.38852541519879213, + "kl_loss": 0.1424417793750763, + "loss_ib": 0.002162978984415531, + "step": 1351 + }, + { + "ce_ib": 4.4950337409973145, + "ce_orig": 0.4972643554210663, + "epoch": 0.38852541519879213, + "kl_loss": 0.0660613402724266, + "loss_ib": 0.0011101167183369398, + "step": 1351 + }, + { + "ce_ib": 6.403132915496826, + "ce_orig": 1.0407614707946777, + "epoch": 0.38852541519879213, + "kl_loss": 0.12615454196929932, + "loss_ib": 0.0019018587190657854, + "step": 1351 + }, + { + "ce_ib": 6.487383842468262, + "ce_orig": 0.7886338829994202, + "epoch": 0.3888129987777698, + "kl_loss": 0.12929624319076538, + "loss_ib": 0.0019417008152231574, + "step": 1352 + }, + { + "ce_ib": 7.565323829650879, + "ce_orig": 0.9060402512550354, + "epoch": 0.3888129987777698, + "kl_loss": 0.1719561070203781, + "loss_ib": 0.0024760933592915535, + "step": 1352 + }, + { + "ce_ib": 10.652222633361816, + "ce_orig": 2.0009143352508545, + "epoch": 0.3888129987777698, + "kl_loss": 0.12192914634943008, + "loss_ib": 0.0022845135536044836, + "step": 1352 + }, + { + "ce_ib": 3.8408257961273193, + "ce_orig": 0.7704099416732788, + "epoch": 0.3888129987777698, + "kl_loss": 0.2431485503911972, + "loss_ib": 0.002815568121150136, + "step": 1352 + }, + { + "ce_ib": 7.328763961791992, + "ce_orig": 1.0332540273666382, + "epoch": 0.38910058235674744, + "kl_loss": 0.16994734108448029, + "loss_ib": 0.0024323496036231518, + "step": 1353 + }, + { + "ce_ib": 6.36226224899292, + "ce_orig": 0.5826173424720764, + "epoch": 0.38910058235674744, + "kl_loss": 0.1299600452184677, + "loss_ib": 0.001935826614499092, + "step": 1353 + }, + { + "ce_ib": 7.506458282470703, + "ce_orig": 1.0630770921707153, + "epoch": 0.38910058235674744, + "kl_loss": 0.1475929617881775, + "loss_ib": 0.0022265755105763674, + "step": 1353 + }, + { + "ce_ib": 6.903659820556641, + "ce_orig": 0.50584876537323, + "epoch": 0.38910058235674744, + "kl_loss": 0.12740664184093475, + "loss_ib": 0.0019644321873784065, + "step": 1353 + }, + { + "ce_ib": 3.8798747062683105, + "ce_orig": 0.5197550058364868, + "epoch": 0.38938816593572506, + "kl_loss": 0.11910569667816162, + "loss_ib": 0.0015790443867444992, + "step": 1354 + }, + { + "ce_ib": 5.564952850341797, + "ce_orig": 0.758701741695404, + "epoch": 0.38938816593572506, + "kl_loss": 0.13943016529083252, + "loss_ib": 0.0019507968099787831, + "step": 1354 + }, + { + "ce_ib": 5.663549423217773, + "ce_orig": 0.8459624648094177, + "epoch": 0.38938816593572506, + "kl_loss": 0.1469283103942871, + "loss_ib": 0.0020356380846351385, + "step": 1354 + }, + { + "ce_ib": 4.190431118011475, + "ce_orig": 0.8275007009506226, + "epoch": 0.38938816593572506, + "kl_loss": 0.09464414417743683, + "loss_ib": 0.0013654845533892512, + "step": 1354 + }, + { + "epoch": 0.3896757495147027, + "grad_norm": 0.0893436148762703, + "learning_rate": 4.872064792093299e-05, + "loss": 0.8674, + "step": 1355 + }, + { + "ce_ib": 6.213205337524414, + "ce_orig": 1.2385234832763672, + "epoch": 0.3896757495147027, + "kl_loss": 0.10977844893932343, + "loss_ib": 0.0017191049410030246, + "step": 1355 + }, + { + "ce_ib": 2.949284553527832, + "ce_orig": 0.5540933012962341, + "epoch": 0.3896757495147027, + "kl_loss": 0.06217679753899574, + "loss_ib": 0.0009166963864117861, + "step": 1355 + }, + { + "ce_ib": 8.168116569519043, + "ce_orig": 1.1756985187530518, + "epoch": 0.3896757495147027, + "kl_loss": 0.1437525898218155, + "loss_ib": 0.002254337538033724, + "step": 1355 + }, + { + "ce_ib": 8.252790451049805, + "ce_orig": 1.310275673866272, + "epoch": 0.3896757495147027, + "kl_loss": 0.14780230820178986, + "loss_ib": 0.0023033020552247763, + "step": 1355 + }, + { + "ce_ib": 4.930200099945068, + "ce_orig": 1.1736384630203247, + "epoch": 0.38996333309368036, + "kl_loss": 0.11331868171691895, + "loss_ib": 0.0016262067947536707, + "step": 1356 + }, + { + "ce_ib": 5.806600093841553, + "ce_orig": 0.9677255749702454, + "epoch": 0.38996333309368036, + "kl_loss": 0.10435892641544342, + "loss_ib": 0.001624249154701829, + "step": 1356 + }, + { + "ce_ib": 4.6264801025390625, + "ce_orig": 0.6200498342514038, + "epoch": 0.38996333309368036, + "kl_loss": 0.08878219872713089, + "loss_ib": 0.0013504700036719441, + "step": 1356 + }, + { + "ce_ib": 5.591928005218506, + "ce_orig": 1.0976577997207642, + "epoch": 0.38996333309368036, + "kl_loss": 0.10171683132648468, + "loss_ib": 0.0015763610135763884, + "step": 1356 + }, + { + "ce_ib": 6.655910491943359, + "ce_orig": 1.143782615661621, + "epoch": 0.390250916672658, + "kl_loss": 0.07978525012731552, + "loss_ib": 0.0014634436229243875, + "step": 1357 + }, + { + "ce_ib": 5.03174352645874, + "ce_orig": 0.8346872925758362, + "epoch": 0.390250916672658, + "kl_loss": 0.11731187999248505, + "loss_ib": 0.0016762930899858475, + "step": 1357 + }, + { + "ce_ib": 8.140095710754395, + "ce_orig": 1.4168970584869385, + "epoch": 0.390250916672658, + "kl_loss": 0.1354292333126068, + "loss_ib": 0.002168301958590746, + "step": 1357 + }, + { + "ce_ib": 4.460680961608887, + "ce_orig": 0.4250698983669281, + "epoch": 0.390250916672658, + "kl_loss": 0.10067728161811829, + "loss_ib": 0.0014528408646583557, + "step": 1357 + }, + { + "ce_ib": 5.446596622467041, + "ce_orig": 0.7209237217903137, + "epoch": 0.3905385002516356, + "kl_loss": 0.13299855589866638, + "loss_ib": 0.0018746451241895556, + "step": 1358 + }, + { + "ce_ib": 4.268868446350098, + "ce_orig": 0.6162483096122742, + "epoch": 0.3905385002516356, + "kl_loss": 0.10890024900436401, + "loss_ib": 0.0015158893074840307, + "step": 1358 + }, + { + "ce_ib": 6.677936553955078, + "ce_orig": 1.0623646974563599, + "epoch": 0.3905385002516356, + "kl_loss": 0.263736367225647, + "loss_ib": 0.0033051574137061834, + "step": 1358 + }, + { + "ce_ib": 6.475733757019043, + "ce_orig": 1.0443817377090454, + "epoch": 0.3905385002516356, + "kl_loss": 0.11893180012702942, + "loss_ib": 0.001836891402490437, + "step": 1358 + }, + { + "ce_ib": 7.569817066192627, + "ce_orig": 1.2050386667251587, + "epoch": 0.3908260838306133, + "kl_loss": 0.16140955686569214, + "loss_ib": 0.0023710771929472685, + "step": 1359 + }, + { + "ce_ib": 7.5049567222595215, + "ce_orig": 0.826344907283783, + "epoch": 0.3908260838306133, + "kl_loss": 0.14760777354240417, + "loss_ib": 0.0022265734151005745, + "step": 1359 + }, + { + "ce_ib": 7.747564792633057, + "ce_orig": 0.8755878210067749, + "epoch": 0.3908260838306133, + "kl_loss": 0.10878226161003113, + "loss_ib": 0.0018625789089128375, + "step": 1359 + }, + { + "ce_ib": 8.582348823547363, + "ce_orig": 1.2040704488754272, + "epoch": 0.3908260838306133, + "kl_loss": 0.09434117376804352, + "loss_ib": 0.0018016466638073325, + "step": 1359 + }, + { + "epoch": 0.3911136674095909, + "grad_norm": 0.08629854023456573, + "learning_rate": 4.870836498319523e-05, + "loss": 0.8496, + "step": 1360 + }, + { + "ce_ib": 5.060675144195557, + "ce_orig": 0.8511158227920532, + "epoch": 0.3911136674095909, + "kl_loss": 0.05880500376224518, + "loss_ib": 0.0010941175278276205, + "step": 1360 + }, + { + "ce_ib": 1.9455045461654663, + "ce_orig": 0.21738861501216888, + "epoch": 0.3911136674095909, + "kl_loss": 0.2701791524887085, + "loss_ib": 0.00289634196087718, + "step": 1360 + }, + { + "ce_ib": 5.370924472808838, + "ce_orig": 1.0326734781265259, + "epoch": 0.3911136674095909, + "kl_loss": 0.08216007053852081, + "loss_ib": 0.0013586931163445115, + "step": 1360 + }, + { + "ce_ib": 6.79683256149292, + "ce_orig": 1.3858628273010254, + "epoch": 0.3911136674095909, + "kl_loss": 0.1067119687795639, + "loss_ib": 0.0017468029400333762, + "step": 1360 + }, + { + "ce_ib": 4.886713981628418, + "ce_orig": 0.43234968185424805, + "epoch": 0.39140125098856854, + "kl_loss": 0.11526788771152496, + "loss_ib": 0.0016413502162322402, + "step": 1361 + }, + { + "ce_ib": 3.7346255779266357, + "ce_orig": 0.4888816475868225, + "epoch": 0.39140125098856854, + "kl_loss": 0.049753397703170776, + "loss_ib": 0.0008709965622983873, + "step": 1361 + }, + { + "ce_ib": 3.8233001232147217, + "ce_orig": 0.5764029026031494, + "epoch": 0.39140125098856854, + "kl_loss": 0.0692904144525528, + "loss_ib": 0.0010752341477200389, + "step": 1361 + }, + { + "ce_ib": 3.1981613636016846, + "ce_orig": 0.5599377751350403, + "epoch": 0.39140125098856854, + "kl_loss": 0.07236847281455994, + "loss_ib": 0.0010435008443892002, + "step": 1361 + }, + { + "ce_ib": 7.6928181648254395, + "ce_orig": 1.2115129232406616, + "epoch": 0.3916888345675462, + "kl_loss": 0.11177465319633484, + "loss_ib": 0.0018870283383876085, + "step": 1362 + }, + { + "ce_ib": 5.256848335266113, + "ce_orig": 0.43086475133895874, + "epoch": 0.3916888345675462, + "kl_loss": 0.09716189652681351, + "loss_ib": 0.0014973038341850042, + "step": 1362 + }, + { + "ce_ib": 5.027642250061035, + "ce_orig": 0.5509925484657288, + "epoch": 0.3916888345675462, + "kl_loss": 0.185089111328125, + "loss_ib": 0.002353655407205224, + "step": 1362 + }, + { + "ce_ib": 6.421619892120361, + "ce_orig": 1.2486196756362915, + "epoch": 0.3916888345675462, + "kl_loss": 0.0833258405327797, + "loss_ib": 0.0014754203148186207, + "step": 1362 + }, + { + "ce_ib": 5.488348007202148, + "ce_orig": 0.9962913990020752, + "epoch": 0.39197641814652384, + "kl_loss": 0.08890173584222794, + "loss_ib": 0.0014378520427271724, + "step": 1363 + }, + { + "ce_ib": 4.584591865539551, + "ce_orig": 0.6659078598022461, + "epoch": 0.39197641814652384, + "kl_loss": 0.07413169741630554, + "loss_ib": 0.0011997760739177465, + "step": 1363 + }, + { + "ce_ib": 6.793995380401611, + "ce_orig": 0.8062725067138672, + "epoch": 0.39197641814652384, + "kl_loss": 0.12296256422996521, + "loss_ib": 0.0019090251298621297, + "step": 1363 + }, + { + "ce_ib": 12.83924674987793, + "ce_orig": 2.1623098850250244, + "epoch": 0.39197641814652384, + "kl_loss": 0.11011422425508499, + "loss_ib": 0.0023850665893405676, + "step": 1363 + }, + { + "ce_ib": 8.598384857177734, + "ce_orig": 1.3614503145217896, + "epoch": 0.39226400172550147, + "kl_loss": 0.13396115601062775, + "loss_ib": 0.0021994500420987606, + "step": 1364 + }, + { + "ce_ib": 6.677243232727051, + "ce_orig": 0.9641056656837463, + "epoch": 0.39226400172550147, + "kl_loss": 0.1300760805606842, + "loss_ib": 0.0019684850703924894, + "step": 1364 + }, + { + "ce_ib": 8.595512390136719, + "ce_orig": 1.4280240535736084, + "epoch": 0.39226400172550147, + "kl_loss": 0.09861013293266296, + "loss_ib": 0.0018456524703651667, + "step": 1364 + }, + { + "ce_ib": 4.735291004180908, + "ce_orig": 0.9403985738754272, + "epoch": 0.39226400172550147, + "kl_loss": 0.08944929391145706, + "loss_ib": 0.001368021941743791, + "step": 1364 + }, + { + "epoch": 0.3925515853044791, + "grad_norm": 0.09735099226236343, + "learning_rate": 4.8696024926503396e-05, + "loss": 0.8484, + "step": 1365 + }, + { + "ce_ib": 3.710118293762207, + "ce_orig": 0.45066648721694946, + "epoch": 0.3925515853044791, + "kl_loss": 0.14439572393894196, + "loss_ib": 0.0018149690004065633, + "step": 1365 + }, + { + "ce_ib": 3.6635892391204834, + "ce_orig": 0.4009067416191101, + "epoch": 0.3925515853044791, + "kl_loss": 0.06153864786028862, + "loss_ib": 0.000981745426543057, + "step": 1365 + }, + { + "ce_ib": 4.0791144371032715, + "ce_orig": 0.5721445083618164, + "epoch": 0.3925515853044791, + "kl_loss": 0.25153499841690063, + "loss_ib": 0.002923261374235153, + "step": 1365 + }, + { + "ce_ib": 3.123744487762451, + "ce_orig": 0.5085943341255188, + "epoch": 0.3925515853044791, + "kl_loss": 0.04723348468542099, + "loss_ib": 0.0007847092929296196, + "step": 1365 + }, + { + "ce_ib": 8.169525146484375, + "ce_orig": 1.6943016052246094, + "epoch": 0.39283916888345677, + "kl_loss": 0.08591040968894958, + "loss_ib": 0.001676056650467217, + "step": 1366 + }, + { + "ce_ib": 8.289563179016113, + "ce_orig": 1.3593651056289673, + "epoch": 0.39283916888345677, + "kl_loss": 0.14579978585243225, + "loss_ib": 0.0022869540844112635, + "step": 1366 + }, + { + "ce_ib": 4.006882190704346, + "ce_orig": 0.5923643112182617, + "epoch": 0.39283916888345677, + "kl_loss": 0.10397645831108093, + "loss_ib": 0.001440452761016786, + "step": 1366 + }, + { + "ce_ib": 6.16815710067749, + "ce_orig": 1.0465580224990845, + "epoch": 0.39283916888345677, + "kl_loss": 0.07608169317245483, + "loss_ib": 0.0013776326086372137, + "step": 1366 + }, + { + "ce_ib": 3.9323885440826416, + "ce_orig": 0.6568378210067749, + "epoch": 0.3931267524624344, + "kl_loss": 0.12474965304136276, + "loss_ib": 0.0016407354269176722, + "step": 1367 + }, + { + "ce_ib": 2.930619955062866, + "ce_orig": 0.5585191249847412, + "epoch": 0.3931267524624344, + "kl_loss": 0.06461334228515625, + "loss_ib": 0.0009391954517923295, + "step": 1367 + }, + { + "ce_ib": 3.9692986011505127, + "ce_orig": 0.5245909690856934, + "epoch": 0.3931267524624344, + "kl_loss": 0.14356094598770142, + "loss_ib": 0.001832539215683937, + "step": 1367 + }, + { + "ce_ib": 4.616350173950195, + "ce_orig": 0.6103743314743042, + "epoch": 0.3931267524624344, + "kl_loss": 0.1619236171245575, + "loss_ib": 0.0020808710251003504, + "step": 1367 + }, + { + "ce_ib": 5.887413501739502, + "ce_orig": 0.722166121006012, + "epoch": 0.393414336041412, + "kl_loss": 0.06748103350400925, + "loss_ib": 0.0012635516468435526, + "step": 1368 + }, + { + "ce_ib": 5.695414066314697, + "ce_orig": 0.6787561774253845, + "epoch": 0.393414336041412, + "kl_loss": 0.1149643212556839, + "loss_ib": 0.0017191844526678324, + "step": 1368 + }, + { + "ce_ib": 5.008102893829346, + "ce_orig": 0.9535930752754211, + "epoch": 0.393414336041412, + "kl_loss": 0.07903116941452026, + "loss_ib": 0.00129112193826586, + "step": 1368 + }, + { + "ce_ib": 3.252168893814087, + "ce_orig": 0.3988747000694275, + "epoch": 0.393414336041412, + "kl_loss": 0.13532951474189758, + "loss_ib": 0.0016785120824351907, + "step": 1368 + }, + { + "ce_ib": 5.28767728805542, + "ce_orig": 0.7203333377838135, + "epoch": 0.3937019196203897, + "kl_loss": 0.13053059577941895, + "loss_ib": 0.001834073569625616, + "step": 1369 + }, + { + "ce_ib": 7.581104278564453, + "ce_orig": 1.2444112300872803, + "epoch": 0.3937019196203897, + "kl_loss": 0.09139028191566467, + "loss_ib": 0.001672013197094202, + "step": 1369 + }, + { + "ce_ib": 3.3103346824645996, + "ce_orig": 0.5839555263519287, + "epoch": 0.3937019196203897, + "kl_loss": 0.05729393661022186, + "loss_ib": 0.0009039728320203722, + "step": 1369 + }, + { + "ce_ib": 3.8431239128112793, + "ce_orig": 0.6491223573684692, + "epoch": 0.3937019196203897, + "kl_loss": 0.09113702923059464, + "loss_ib": 0.001295682624913752, + "step": 1369 + }, + { + "epoch": 0.3939895031993673, + "grad_norm": 0.0974932387471199, + "learning_rate": 4.8683627780587546e-05, + "loss": 0.7966, + "step": 1370 + }, + { + "ce_ib": 4.4268012046813965, + "ce_orig": 0.7763320803642273, + "epoch": 0.3939895031993673, + "kl_loss": 0.11070152372121811, + "loss_ib": 0.001549695385619998, + "step": 1370 + }, + { + "ce_ib": 5.235965728759766, + "ce_orig": 0.7106630206108093, + "epoch": 0.3939895031993673, + "kl_loss": 0.05476393550634384, + "loss_ib": 0.0010712358634918928, + "step": 1370 + }, + { + "ce_ib": 7.041452407836914, + "ce_orig": 1.3121201992034912, + "epoch": 0.3939895031993673, + "kl_loss": 0.08947965502738953, + "loss_ib": 0.0015989416278898716, + "step": 1370 + }, + { + "ce_ib": 9.253966331481934, + "ce_orig": 1.2900464534759521, + "epoch": 0.3939895031993673, + "kl_loss": 0.10270275175571442, + "loss_ib": 0.00195242406334728, + "step": 1370 + }, + { + "ce_ib": 6.555151462554932, + "ce_orig": 0.9583280086517334, + "epoch": 0.39427708677834494, + "kl_loss": 0.14792247116565704, + "loss_ib": 0.0021347396541386843, + "step": 1371 + }, + { + "ce_ib": 5.8640851974487305, + "ce_orig": 1.2533522844314575, + "epoch": 0.39427708677834494, + "kl_loss": 0.12330082803964615, + "loss_ib": 0.0018194166477769613, + "step": 1371 + }, + { + "ce_ib": 5.276181697845459, + "ce_orig": 0.9645226001739502, + "epoch": 0.39427708677834494, + "kl_loss": 0.04578051716089249, + "loss_ib": 0.0009854233358055353, + "step": 1371 + }, + { + "ce_ib": 3.6213691234588623, + "ce_orig": 0.7831363081932068, + "epoch": 0.39427708677834494, + "kl_loss": 0.04129724204540253, + "loss_ib": 0.000775109336245805, + "step": 1371 + }, + { + "ce_ib": 5.293245315551758, + "ce_orig": 1.1399786472320557, + "epoch": 0.3945646703573226, + "kl_loss": 0.11456447839736938, + "loss_ib": 0.0016749693313613534, + "step": 1372 + }, + { + "ce_ib": 4.88385009765625, + "ce_orig": 0.5638843178749084, + "epoch": 0.3945646703573226, + "kl_loss": 0.1422284096479416, + "loss_ib": 0.0019106690306216478, + "step": 1372 + }, + { + "ce_ib": 3.2278645038604736, + "ce_orig": 0.5578948259353638, + "epoch": 0.3945646703573226, + "kl_loss": 0.06911308318376541, + "loss_ib": 0.0010139172663912177, + "step": 1372 + }, + { + "ce_ib": 6.677456855773926, + "ce_orig": 1.0336394309997559, + "epoch": 0.3945646703573226, + "kl_loss": 0.12147732079029083, + "loss_ib": 0.0018825187580659986, + "step": 1372 + }, + { + "ce_ib": 6.061744213104248, + "ce_orig": 0.6529628038406372, + "epoch": 0.39485225393630025, + "kl_loss": 0.09120302647352219, + "loss_ib": 0.0015182045754045248, + "step": 1373 + }, + { + "ce_ib": 7.936959266662598, + "ce_orig": 1.052087664604187, + "epoch": 0.39485225393630025, + "kl_loss": 0.11748115718364716, + "loss_ib": 0.00196850742213428, + "step": 1373 + }, + { + "ce_ib": 4.3993821144104, + "ce_orig": 0.602247953414917, + "epoch": 0.39485225393630025, + "kl_loss": 0.08443524688482285, + "loss_ib": 0.0012842906871810555, + "step": 1373 + }, + { + "ce_ib": 4.7374091148376465, + "ce_orig": 0.9693533182144165, + "epoch": 0.39485225393630025, + "kl_loss": 0.09733524918556213, + "loss_ib": 0.0014470933238044381, + "step": 1373 + }, + { + "ce_ib": 4.107109069824219, + "ce_orig": 0.6020128726959229, + "epoch": 0.39513983751527787, + "kl_loss": 0.09796494990587234, + "loss_ib": 0.0013903604121878743, + "step": 1374 + }, + { + "ce_ib": 4.63660192489624, + "ce_orig": 0.5517060160636902, + "epoch": 0.39513983751527787, + "kl_loss": 0.09080956876277924, + "loss_ib": 0.0013717558467760682, + "step": 1374 + }, + { + "ce_ib": 5.575448989868164, + "ce_orig": 0.6968877911567688, + "epoch": 0.39513983751527787, + "kl_loss": 0.14009396731853485, + "loss_ib": 0.0019584845285862684, + "step": 1374 + }, + { + "ce_ib": 8.282516479492188, + "ce_orig": 1.5205111503601074, + "epoch": 0.39513983751527787, + "kl_loss": 0.1510375738143921, + "loss_ib": 0.0023386271204799414, + "step": 1374 + }, + { + "epoch": 0.3954274210942555, + "grad_norm": 0.07745600491762161, + "learning_rate": 4.867117357531529e-05, + "loss": 0.8143, + "step": 1375 + }, + { + "ce_ib": 8.65802001953125, + "ce_orig": 1.6579588651657104, + "epoch": 0.3954274210942555, + "kl_loss": 0.1071658581495285, + "loss_ib": 0.0019374605035409331, + "step": 1375 + }, + { + "ce_ib": 7.021322727203369, + "ce_orig": 1.399658203125, + "epoch": 0.3954274210942555, + "kl_loss": 0.12391219288110733, + "loss_ib": 0.0019412541296333075, + "step": 1375 + }, + { + "ce_ib": 6.552249908447266, + "ce_orig": 1.0461649894714355, + "epoch": 0.3954274210942555, + "kl_loss": 0.10464093089103699, + "loss_ib": 0.001701634144410491, + "step": 1375 + }, + { + "ce_ib": 4.81926965713501, + "ce_orig": 0.7743741869926453, + "epoch": 0.3954274210942555, + "kl_loss": 0.09038940817117691, + "loss_ib": 0.0013858210295438766, + "step": 1375 + }, + { + "ce_ib": 5.60307502746582, + "ce_orig": 0.7910189628601074, + "epoch": 0.3957150046732332, + "kl_loss": 0.08102002739906311, + "loss_ib": 0.0013705077581107616, + "step": 1376 + }, + { + "ce_ib": 5.879729747772217, + "ce_orig": 0.9454348087310791, + "epoch": 0.3957150046732332, + "kl_loss": 0.10077565163373947, + "loss_ib": 0.001595729379914701, + "step": 1376 + }, + { + "ce_ib": 4.007662296295166, + "ce_orig": 0.909389078617096, + "epoch": 0.3957150046732332, + "kl_loss": 0.07688228785991669, + "loss_ib": 0.001169589115306735, + "step": 1376 + }, + { + "ce_ib": 5.555829048156738, + "ce_orig": 0.8984038233757019, + "epoch": 0.3957150046732332, + "kl_loss": 0.11498790234327316, + "loss_ib": 0.0017054618801921606, + "step": 1376 + }, + { + "ce_ib": 6.684864044189453, + "ce_orig": 1.4516613483428955, + "epoch": 0.3960025882522108, + "kl_loss": 0.08933814615011215, + "loss_ib": 0.0015618678880855441, + "step": 1377 + }, + { + "ce_ib": 7.712717533111572, + "ce_orig": 0.7735072374343872, + "epoch": 0.3960025882522108, + "kl_loss": 0.130492702126503, + "loss_ib": 0.002076198812574148, + "step": 1377 + }, + { + "ce_ib": 4.936470985412598, + "ce_orig": 0.5509382486343384, + "epoch": 0.3960025882522108, + "kl_loss": 0.10004114359617233, + "loss_ib": 0.0014940585242584348, + "step": 1377 + }, + { + "ce_ib": 3.2147631645202637, + "ce_orig": 0.4478761851787567, + "epoch": 0.3960025882522108, + "kl_loss": 0.092786505818367, + "loss_ib": 0.001249341294169426, + "step": 1377 + }, + { + "ce_ib": 5.846179962158203, + "ce_orig": 0.7358298897743225, + "epoch": 0.3962901718311884, + "kl_loss": 0.10214491933584213, + "loss_ib": 0.0016060670604929328, + "step": 1378 + }, + { + "ce_ib": 4.022353649139404, + "ce_orig": 0.6607013940811157, + "epoch": 0.3962901718311884, + "kl_loss": 0.0745079442858696, + "loss_ib": 0.0011473146732896566, + "step": 1378 + }, + { + "ce_ib": 5.024077892303467, + "ce_orig": 0.9707644581794739, + "epoch": 0.3962901718311884, + "kl_loss": 0.08207986503839493, + "loss_ib": 0.0013232063502073288, + "step": 1378 + }, + { + "ce_ib": 8.740952491760254, + "ce_orig": 1.6276335716247559, + "epoch": 0.3962901718311884, + "kl_loss": 0.10734099894762039, + "loss_ib": 0.0019475051667541265, + "step": 1378 + }, + { + "ce_ib": 7.33891487121582, + "ce_orig": 1.0861576795578003, + "epoch": 0.3965777554101661, + "kl_loss": 0.11862307786941528, + "loss_ib": 0.0019201221875846386, + "step": 1379 + }, + { + "ce_ib": 5.514404773712158, + "ce_orig": 0.8761352896690369, + "epoch": 0.3965777554101661, + "kl_loss": 0.10926657915115356, + "loss_ib": 0.0016441061161458492, + "step": 1379 + }, + { + "ce_ib": 2.858499765396118, + "ce_orig": 0.1799149066209793, + "epoch": 0.3965777554101661, + "kl_loss": 0.48103827238082886, + "loss_ib": 0.005096232984215021, + "step": 1379 + }, + { + "ce_ib": 6.853128433227539, + "ce_orig": 1.1733317375183105, + "epoch": 0.3965777554101661, + "kl_loss": 0.12556175887584686, + "loss_ib": 0.0019409304950386286, + "step": 1379 + }, + { + "epoch": 0.3968653389891437, + "grad_norm": 0.0979207307100296, + "learning_rate": 4.865866234069169e-05, + "loss": 0.9198, + "step": 1380 + }, + { + "ce_ib": 5.861191272735596, + "ce_orig": 0.8538046479225159, + "epoch": 0.3968653389891437, + "kl_loss": 0.16768991947174072, + "loss_ib": 0.0022630183957517147, + "step": 1380 + }, + { + "ce_ib": 4.218904972076416, + "ce_orig": 0.6871565580368042, + "epoch": 0.3968653389891437, + "kl_loss": 0.1207076832652092, + "loss_ib": 0.0016289673512801528, + "step": 1380 + }, + { + "ce_ib": 4.815241813659668, + "ce_orig": 0.8256959915161133, + "epoch": 0.3968653389891437, + "kl_loss": 0.08690144121646881, + "loss_ib": 0.0013505385722965002, + "step": 1380 + }, + { + "ce_ib": 5.396557807922363, + "ce_orig": 0.6450198292732239, + "epoch": 0.3968653389891437, + "kl_loss": 0.08772458136081696, + "loss_ib": 0.0014169015921652317, + "step": 1380 + }, + { + "ce_ib": 7.949460983276367, + "ce_orig": 1.2881579399108887, + "epoch": 0.39715292256812135, + "kl_loss": 0.11858942359685898, + "loss_ib": 0.001980840228497982, + "step": 1381 + }, + { + "ce_ib": 9.407849311828613, + "ce_orig": 1.7857203483581543, + "epoch": 0.39715292256812135, + "kl_loss": 0.11156527698040009, + "loss_ib": 0.0020564377773553133, + "step": 1381 + }, + { + "ce_ib": 4.317052841186523, + "ce_orig": 0.4395979344844818, + "epoch": 0.39715292256812135, + "kl_loss": 0.12274263799190521, + "loss_ib": 0.001659131608903408, + "step": 1381 + }, + { + "ce_ib": 4.266213893890381, + "ce_orig": 0.46749892830848694, + "epoch": 0.39715292256812135, + "kl_loss": 0.08208754658699036, + "loss_ib": 0.0012474968098104, + "step": 1381 + }, + { + "ce_ib": 3.835118293762207, + "ce_orig": 0.36215564608573914, + "epoch": 0.397440506147099, + "kl_loss": 0.1314554661512375, + "loss_ib": 0.001698066364042461, + "step": 1382 + }, + { + "ce_ib": 7.1647233963012695, + "ce_orig": 0.9199727773666382, + "epoch": 0.397440506147099, + "kl_loss": 0.07061807811260223, + "loss_ib": 0.0014226532075554132, + "step": 1382 + }, + { + "ce_ib": 6.599220275878906, + "ce_orig": 0.9260650277137756, + "epoch": 0.397440506147099, + "kl_loss": 0.08752088993787766, + "loss_ib": 0.0015351308975368738, + "step": 1382 + }, + { + "ce_ib": 6.4383392333984375, + "ce_orig": 0.8768721222877502, + "epoch": 0.397440506147099, + "kl_loss": 0.13634315133094788, + "loss_ib": 0.002007265342399478, + "step": 1382 + }, + { + "ce_ib": 5.870787143707275, + "ce_orig": 1.382786750793457, + "epoch": 0.39772808972607665, + "kl_loss": 0.17133145034313202, + "loss_ib": 0.0023003933019936085, + "step": 1383 + }, + { + "ce_ib": 2.237919569015503, + "ce_orig": 0.5899192094802856, + "epoch": 0.39772808972607665, + "kl_loss": 0.3677208423614502, + "loss_ib": 0.0039010001346468925, + "step": 1383 + }, + { + "ce_ib": 10.419187545776367, + "ce_orig": 1.5305267572402954, + "epoch": 0.39772808972607665, + "kl_loss": 0.13531950116157532, + "loss_ib": 0.0023951136972755194, + "step": 1383 + }, + { + "ce_ib": 4.558192729949951, + "ce_orig": 0.5665358304977417, + "epoch": 0.39772808972607665, + "kl_loss": 0.0764642208814621, + "loss_ib": 0.0012204614467918873, + "step": 1383 + }, + { + "ce_ib": 7.549960136413574, + "ce_orig": 1.4197278022766113, + "epoch": 0.3980156733050543, + "kl_loss": 0.110089972615242, + "loss_ib": 0.0018558957381173968, + "step": 1384 + }, + { + "ce_ib": 4.553320407867432, + "ce_orig": 0.8299973011016846, + "epoch": 0.3980156733050543, + "kl_loss": 0.07558947056531906, + "loss_ib": 0.0012112266849726439, + "step": 1384 + }, + { + "ce_ib": 6.357949733734131, + "ce_orig": 1.0268474817276, + "epoch": 0.3980156733050543, + "kl_loss": 0.1523449420928955, + "loss_ib": 0.002159244380891323, + "step": 1384 + }, + { + "ce_ib": 4.918845176696777, + "ce_orig": 0.48912161588668823, + "epoch": 0.3980156733050543, + "kl_loss": 0.1415548324584961, + "loss_ib": 0.0019074328010901809, + "step": 1384 + }, + { + "epoch": 0.3983032568840319, + "grad_norm": 0.09846891462802887, + "learning_rate": 4.864609410685922e-05, + "loss": 0.8625, + "step": 1385 + }, + { + "ce_ib": 5.032691478729248, + "ce_orig": 0.9307861328125, + "epoch": 0.3983032568840319, + "kl_loss": 0.10518186539411545, + "loss_ib": 0.0015550878597423434, + "step": 1385 + }, + { + "ce_ib": 7.9908952713012695, + "ce_orig": 1.7475234270095825, + "epoch": 0.3983032568840319, + "kl_loss": 0.09814848750829697, + "loss_ib": 0.001780574326403439, + "step": 1385 + }, + { + "ce_ib": 9.869513511657715, + "ce_orig": 1.627759575843811, + "epoch": 0.3983032568840319, + "kl_loss": 0.10387978702783585, + "loss_ib": 0.0020257493015378714, + "step": 1385 + }, + { + "ce_ib": 5.364109992980957, + "ce_orig": 0.8500359654426575, + "epoch": 0.3983032568840319, + "kl_loss": 0.1431843340396881, + "loss_ib": 0.0019682543352246284, + "step": 1385 + }, + { + "ce_ib": 6.705972194671631, + "ce_orig": 1.0989729166030884, + "epoch": 0.3985908404630096, + "kl_loss": 0.12062980979681015, + "loss_ib": 0.0018768951995298266, + "step": 1386 + }, + { + "ce_ib": 4.8373613357543945, + "ce_orig": 0.650296688079834, + "epoch": 0.3985908404630096, + "kl_loss": 0.10792292654514313, + "loss_ib": 0.0015629653353244066, + "step": 1386 + }, + { + "ce_ib": 5.477198123931885, + "ce_orig": 0.9474713206291199, + "epoch": 0.3985908404630096, + "kl_loss": 0.12362131476402283, + "loss_ib": 0.0017839329084381461, + "step": 1386 + }, + { + "ce_ib": 6.538940906524658, + "ce_orig": 1.1148537397384644, + "epoch": 0.3985908404630096, + "kl_loss": 0.07181133329868317, + "loss_ib": 0.0013720074202865362, + "step": 1386 + }, + { + "ce_ib": 4.820619583129883, + "ce_orig": 0.3477015793323517, + "epoch": 0.3988784240419872, + "kl_loss": 0.10620693862438202, + "loss_ib": 0.0015441313153132796, + "step": 1387 + }, + { + "ce_ib": 3.6405653953552246, + "ce_orig": 0.7365078926086426, + "epoch": 0.3988784240419872, + "kl_loss": 0.07146044075489044, + "loss_ib": 0.0010786609491333365, + "step": 1387 + }, + { + "ce_ib": 6.429567813873291, + "ce_orig": 1.3540785312652588, + "epoch": 0.3988784240419872, + "kl_loss": 0.08094966411590576, + "loss_ib": 0.0014524534344673157, + "step": 1387 + }, + { + "ce_ib": 4.190028667449951, + "ce_orig": 0.5769890546798706, + "epoch": 0.3988784240419872, + "kl_loss": 0.11371330171823502, + "loss_ib": 0.0015561358304694295, + "step": 1387 + }, + { + "ce_ib": 7.87295389175415, + "ce_orig": 1.2537925243377686, + "epoch": 0.3991660076209648, + "kl_loss": 0.16727596521377563, + "loss_ib": 0.0024600550532341003, + "step": 1388 + }, + { + "ce_ib": 6.019086837768555, + "ce_orig": 1.0837427377700806, + "epoch": 0.3991660076209648, + "kl_loss": 0.09157024323940277, + "loss_ib": 0.001517611090093851, + "step": 1388 + }, + { + "ce_ib": 5.586955547332764, + "ce_orig": 0.8558295369148254, + "epoch": 0.3991660076209648, + "kl_loss": 0.1290510892868042, + "loss_ib": 0.0018492063973098993, + "step": 1388 + }, + { + "ce_ib": 5.672644138336182, + "ce_orig": 0.9201558232307434, + "epoch": 0.3991660076209648, + "kl_loss": 0.12830939888954163, + "loss_ib": 0.0018503583269193769, + "step": 1388 + }, + { + "ce_ib": 4.650105953216553, + "ce_orig": 0.7557408809661865, + "epoch": 0.3994535911999425, + "kl_loss": 0.12210938334465027, + "loss_ib": 0.0016861043404787779, + "step": 1389 + }, + { + "ce_ib": 6.44442892074585, + "ce_orig": 1.0067079067230225, + "epoch": 0.3994535911999425, + "kl_loss": 0.10133783519268036, + "loss_ib": 0.001657821238040924, + "step": 1389 + }, + { + "ce_ib": 7.7318902015686035, + "ce_orig": 1.4324907064437866, + "epoch": 0.3994535911999425, + "kl_loss": 0.08357816934585571, + "loss_ib": 0.0016089706914499402, + "step": 1389 + }, + { + "ce_ib": 7.588091850280762, + "ce_orig": 1.3219072818756104, + "epoch": 0.3994535911999425, + "kl_loss": 0.08463755249977112, + "loss_ib": 0.0016051846323534846, + "step": 1389 + }, + { + "epoch": 0.3997411747789201, + "grad_norm": 0.11438465863466263, + "learning_rate": 4.863346890409767e-05, + "loss": 0.9591, + "step": 1390 + }, + { + "ce_ib": 5.451303005218506, + "ce_orig": 0.9760143160820007, + "epoch": 0.3997411747789201, + "kl_loss": 0.11309187114238739, + "loss_ib": 0.0016760488506406546, + "step": 1390 + }, + { + "ce_ib": 9.699860572814941, + "ce_orig": 1.3062336444854736, + "epoch": 0.3997411747789201, + "kl_loss": 0.11411392688751221, + "loss_ib": 0.002111125271767378, + "step": 1390 + }, + { + "ce_ib": 4.81472110748291, + "ce_orig": 0.6785033345222473, + "epoch": 0.3997411747789201, + "kl_loss": 0.07530829310417175, + "loss_ib": 0.0012345550348982215, + "step": 1390 + }, + { + "ce_ib": 4.894866466522217, + "ce_orig": 0.5927574634552002, + "epoch": 0.3997411747789201, + "kl_loss": 0.13816197216510773, + "loss_ib": 0.0018711063312366605, + "step": 1390 + }, + { + "ce_ib": 3.740232467651367, + "ce_orig": 0.7145044803619385, + "epoch": 0.40002875835789775, + "kl_loss": 0.07702134549617767, + "loss_ib": 0.0011442366521805525, + "step": 1391 + }, + { + "ce_ib": 6.7156219482421875, + "ce_orig": 1.1468040943145752, + "epoch": 0.40002875835789775, + "kl_loss": 0.07577596604824066, + "loss_ib": 0.0014293217100203037, + "step": 1391 + }, + { + "ce_ib": 4.911206245422363, + "ce_orig": 0.6374475359916687, + "epoch": 0.40002875835789775, + "kl_loss": 0.11240187287330627, + "loss_ib": 0.0016151393065229058, + "step": 1391 + }, + { + "ce_ib": 4.137016773223877, + "ce_orig": 0.5586443543434143, + "epoch": 0.40002875835789775, + "kl_loss": 0.09084071218967438, + "loss_ib": 0.0013221087865531445, + "step": 1391 + }, + { + "ce_ib": 7.145791530609131, + "ce_orig": 0.7220593690872192, + "epoch": 0.40031634193687543, + "kl_loss": 0.18535080552101135, + "loss_ib": 0.0025680873077362776, + "step": 1392 + }, + { + "ce_ib": 8.202098846435547, + "ce_orig": 0.8149229884147644, + "epoch": 0.40031634193687543, + "kl_loss": 0.1332991123199463, + "loss_ib": 0.0021532007958739996, + "step": 1392 + }, + { + "ce_ib": 10.058478355407715, + "ce_orig": 1.2291303873062134, + "epoch": 0.40031634193687543, + "kl_loss": 0.10170267522335052, + "loss_ib": 0.002022874541580677, + "step": 1392 + }, + { + "ce_ib": 6.339056015014648, + "ce_orig": 0.8793673515319824, + "epoch": 0.40031634193687543, + "kl_loss": 0.10560115426778793, + "loss_ib": 0.0016899170586839318, + "step": 1392 + }, + { + "ce_ib": 6.754205226898193, + "ce_orig": 0.8358240723609924, + "epoch": 0.40060392551585305, + "kl_loss": 0.08306322246789932, + "loss_ib": 0.001506052678450942, + "step": 1393 + }, + { + "ce_ib": 3.8460471630096436, + "ce_orig": 0.5023829340934753, + "epoch": 0.40060392551585305, + "kl_loss": 0.12775641679763794, + "loss_ib": 0.001662168768234551, + "step": 1393 + }, + { + "ce_ib": 5.579369068145752, + "ce_orig": 0.6354689598083496, + "epoch": 0.40060392551585305, + "kl_loss": 0.22461557388305664, + "loss_ib": 0.0028040925972163677, + "step": 1393 + }, + { + "ce_ib": 3.25400972366333, + "ce_orig": 0.5061582326889038, + "epoch": 0.40060392551585305, + "kl_loss": 0.23202760517597198, + "loss_ib": 0.0026456769555807114, + "step": 1393 + }, + { + "ce_ib": 5.32595682144165, + "ce_orig": 1.127187967300415, + "epoch": 0.4008915090948307, + "kl_loss": 0.12879681587219238, + "loss_ib": 0.001820563804358244, + "step": 1394 + }, + { + "ce_ib": 6.9711737632751465, + "ce_orig": 1.1765919923782349, + "epoch": 0.4008915090948307, + "kl_loss": 0.0906248465180397, + "loss_ib": 0.0016033657593652606, + "step": 1394 + }, + { + "ce_ib": 4.378665447235107, + "ce_orig": 0.5809182524681091, + "epoch": 0.4008915090948307, + "kl_loss": 0.11743704974651337, + "loss_ib": 0.0016122370725497603, + "step": 1394 + }, + { + "ce_ib": 5.15740442276001, + "ce_orig": 0.9412955641746521, + "epoch": 0.4008915090948307, + "kl_loss": 0.080172598361969, + "loss_ib": 0.0013174664927646518, + "step": 1394 + }, + { + "epoch": 0.4011790926738083, + "grad_norm": 0.09315615147352219, + "learning_rate": 4.862078676282409e-05, + "loss": 0.8388, + "step": 1395 + }, + { + "ce_ib": 8.188400268554688, + "ce_orig": 1.2357627153396606, + "epoch": 0.4011790926738083, + "kl_loss": 0.1002885177731514, + "loss_ib": 0.0018217251636087894, + "step": 1395 + }, + { + "ce_ib": 6.079811096191406, + "ce_orig": 1.1144137382507324, + "epoch": 0.4011790926738083, + "kl_loss": 0.07685106992721558, + "loss_ib": 0.0013764917384833097, + "step": 1395 + }, + { + "ce_ib": 7.778738021850586, + "ce_orig": 1.4507184028625488, + "epoch": 0.4011790926738083, + "kl_loss": 0.12209288775920868, + "loss_ib": 0.0019988026469945908, + "step": 1395 + }, + { + "ce_ib": 7.983975887298584, + "ce_orig": 1.0864462852478027, + "epoch": 0.4011790926738083, + "kl_loss": 0.11116814613342285, + "loss_ib": 0.001910079037770629, + "step": 1395 + }, + { + "ce_ib": 9.038493156433105, + "ce_orig": 1.6814976930618286, + "epoch": 0.401466676252786, + "kl_loss": 0.12096206843852997, + "loss_ib": 0.0021134698763489723, + "step": 1396 + }, + { + "ce_ib": 4.606503963470459, + "ce_orig": 0.38909873366355896, + "epoch": 0.401466676252786, + "kl_loss": 0.13731586933135986, + "loss_ib": 0.0018338089575991035, + "step": 1396 + }, + { + "ce_ib": 4.8945770263671875, + "ce_orig": 0.6191275119781494, + "epoch": 0.401466676252786, + "kl_loss": 0.0936412438750267, + "loss_ib": 0.0014258699957281351, + "step": 1396 + }, + { + "ce_ib": 7.423152923583984, + "ce_orig": 0.8506887555122375, + "epoch": 0.401466676252786, + "kl_loss": 0.14094209671020508, + "loss_ib": 0.0021517362911254168, + "step": 1396 + }, + { + "ce_ib": 9.513692855834961, + "ce_orig": 1.5091493129730225, + "epoch": 0.4017542598317636, + "kl_loss": 0.09959740936756134, + "loss_ib": 0.0019473433494567871, + "step": 1397 + }, + { + "ce_ib": 6.524710178375244, + "ce_orig": 0.8636010885238647, + "epoch": 0.4017542598317636, + "kl_loss": 0.13173584640026093, + "loss_ib": 0.001969829434528947, + "step": 1397 + }, + { + "ce_ib": 4.396295070648193, + "ce_orig": 0.619438648223877, + "epoch": 0.4017542598317636, + "kl_loss": 0.09558691829442978, + "loss_ib": 0.0013954986352473497, + "step": 1397 + }, + { + "ce_ib": 4.203965663909912, + "ce_orig": 0.7036296129226685, + "epoch": 0.4017542598317636, + "kl_loss": 0.07829403877258301, + "loss_ib": 0.001203336869366467, + "step": 1397 + }, + { + "ce_ib": 5.514439105987549, + "ce_orig": 0.520076334476471, + "epoch": 0.40204184341074123, + "kl_loss": 0.08803772926330566, + "loss_ib": 0.001431821146979928, + "step": 1398 + }, + { + "ce_ib": 1.4410609006881714, + "ce_orig": 0.13030670583248138, + "epoch": 0.40204184341074123, + "kl_loss": 0.16449351608753204, + "loss_ib": 0.001789041212759912, + "step": 1398 + }, + { + "ce_ib": 5.51670503616333, + "ce_orig": 0.3515918254852295, + "epoch": 0.40204184341074123, + "kl_loss": 0.19496026635169983, + "loss_ib": 0.002501273062080145, + "step": 1398 + }, + { + "ce_ib": 5.9502973556518555, + "ce_orig": 1.3472964763641357, + "epoch": 0.40204184341074123, + "kl_loss": 0.08377566188573837, + "loss_ib": 0.0014327862299978733, + "step": 1398 + }, + { + "ce_ib": 5.6048688888549805, + "ce_orig": 1.193996787071228, + "epoch": 0.4023294269897189, + "kl_loss": 0.0680387020111084, + "loss_ib": 0.0012408739421516657, + "step": 1399 + }, + { + "ce_ib": 7.806824684143066, + "ce_orig": 0.9458127021789551, + "epoch": 0.4023294269897189, + "kl_loss": 0.13807079195976257, + "loss_ib": 0.002161390380933881, + "step": 1399 + }, + { + "ce_ib": 4.314865589141846, + "ce_orig": 0.6677300930023193, + "epoch": 0.4023294269897189, + "kl_loss": 0.1180049255490303, + "loss_ib": 0.0016115357866510749, + "step": 1399 + }, + { + "ce_ib": 8.716057777404785, + "ce_orig": 1.3581658601760864, + "epoch": 0.4023294269897189, + "kl_loss": 0.09681472927331924, + "loss_ib": 0.001839753007516265, + "step": 1399 + }, + { + "epoch": 0.40261701056869653, + "grad_norm": 0.09289965033531189, + "learning_rate": 4.86080477135927e-05, + "loss": 0.9395, + "step": 1400 + }, + { + "ce_ib": 5.28957462310791, + "ce_orig": 0.6641263365745544, + "epoch": 0.40261701056869653, + "kl_loss": 0.1141863688826561, + "loss_ib": 0.0016708211041986942, + "step": 1400 + }, + { + "ce_ib": 5.174437046051025, + "ce_orig": 0.3216610848903656, + "epoch": 0.40261701056869653, + "kl_loss": 0.14265108108520508, + "loss_ib": 0.001943954499438405, + "step": 1400 + }, + { + "ce_ib": 4.932038307189941, + "ce_orig": 0.6975332498550415, + "epoch": 0.40261701056869653, + "kl_loss": 0.08903782814741135, + "loss_ib": 0.0013835820136591792, + "step": 1400 + }, + { + "ce_ib": 6.784277439117432, + "ce_orig": 1.5330122709274292, + "epoch": 0.40261701056869653, + "kl_loss": 0.08088655024766922, + "loss_ib": 0.0014872931642457843, + "step": 1400 + }, + { + "ce_ib": 5.346994876861572, + "ce_orig": 1.0070792436599731, + "epoch": 0.40290459414767416, + "kl_loss": 0.05356618016958237, + "loss_ib": 0.001070361235179007, + "step": 1401 + }, + { + "ce_ib": 1.9412345886230469, + "ce_orig": 0.25424134731292725, + "epoch": 0.40290459414767416, + "kl_loss": 0.1259712427854538, + "loss_ib": 0.0014538359828293324, + "step": 1401 + }, + { + "ce_ib": 4.817016124725342, + "ce_orig": 0.9583339095115662, + "epoch": 0.40290459414767416, + "kl_loss": 0.06185237318277359, + "loss_ib": 0.0011002252576872706, + "step": 1401 + }, + { + "ce_ib": 5.685846328735352, + "ce_orig": 0.7392550706863403, + "epoch": 0.40290459414767416, + "kl_loss": 0.07985693216323853, + "loss_ib": 0.0013671539491042495, + "step": 1401 + }, + { + "ce_ib": 7.376675605773926, + "ce_orig": 0.9400675892829895, + "epoch": 0.40319217772665183, + "kl_loss": 0.10595303773880005, + "loss_ib": 0.0017971978522837162, + "step": 1402 + }, + { + "ce_ib": 4.847853183746338, + "ce_orig": 0.5265023708343506, + "epoch": 0.40319217772665183, + "kl_loss": 0.08615823090076447, + "loss_ib": 0.001346367527730763, + "step": 1402 + }, + { + "ce_ib": 6.800249099731445, + "ce_orig": 0.6501719951629639, + "epoch": 0.40319217772665183, + "kl_loss": 0.12006954848766327, + "loss_ib": 0.001880720374174416, + "step": 1402 + }, + { + "ce_ib": 2.961667060852051, + "ce_orig": 0.4114344120025635, + "epoch": 0.40319217772665183, + "kl_loss": 0.13707970082759857, + "loss_ib": 0.0016669636825099587, + "step": 1402 + }, + { + "ce_ib": 10.904898643493652, + "ce_orig": 2.0329442024230957, + "epoch": 0.40347976130562946, + "kl_loss": 0.11201053857803345, + "loss_ib": 0.002210595179349184, + "step": 1403 + }, + { + "ce_ib": 4.756246566772461, + "ce_orig": 0.6134840846061707, + "epoch": 0.40347976130562946, + "kl_loss": 0.08142095059156418, + "loss_ib": 0.0012898340355604887, + "step": 1403 + }, + { + "ce_ib": 6.535433292388916, + "ce_orig": 0.9841384291648865, + "epoch": 0.40347976130562946, + "kl_loss": 0.08830951154232025, + "loss_ib": 0.0015366383595392108, + "step": 1403 + }, + { + "ce_ib": 3.6593120098114014, + "ce_orig": 0.5448381304740906, + "epoch": 0.40347976130562946, + "kl_loss": 0.11145228147506714, + "loss_ib": 0.0014804539969190955, + "step": 1403 + }, + { + "ce_ib": 4.8199782371521, + "ce_orig": 0.7326803207397461, + "epoch": 0.4037673448846071, + "kl_loss": 0.12226551026105881, + "loss_ib": 0.0017046529101207852, + "step": 1404 + }, + { + "ce_ib": 6.233541965484619, + "ce_orig": 0.5725308060646057, + "epoch": 0.4037673448846071, + "kl_loss": 0.1417505443096161, + "loss_ib": 0.00204085954464972, + "step": 1404 + }, + { + "ce_ib": 9.850833892822266, + "ce_orig": 1.7082631587982178, + "epoch": 0.4037673448846071, + "kl_loss": 0.12727659940719604, + "loss_ib": 0.002257849322631955, + "step": 1404 + }, + { + "ce_ib": 6.463018894195557, + "ce_orig": 0.44856294989585876, + "epoch": 0.4037673448846071, + "kl_loss": 0.09182668477296829, + "loss_ib": 0.001564568723551929, + "step": 1404 + }, + { + "epoch": 0.4040549284635847, + "grad_norm": 0.08630051463842392, + "learning_rate": 4.859525178709481e-05, + "loss": 0.877, + "step": 1405 + }, + { + "ce_ib": 5.258861064910889, + "ce_orig": 0.7958999872207642, + "epoch": 0.4040549284635847, + "kl_loss": 0.09843520820140839, + "loss_ib": 0.0015102381585165858, + "step": 1405 + }, + { + "ce_ib": 5.847157955169678, + "ce_orig": 0.846228301525116, + "epoch": 0.4040549284635847, + "kl_loss": 0.09086570888757706, + "loss_ib": 0.0014933728380128741, + "step": 1405 + }, + { + "ce_ib": 8.750494956970215, + "ce_orig": 1.657193899154663, + "epoch": 0.4040549284635847, + "kl_loss": 0.21177685260772705, + "loss_ib": 0.002992817899212241, + "step": 1405 + }, + { + "ce_ib": 5.047918796539307, + "ce_orig": 0.783190906047821, + "epoch": 0.4040549284635847, + "kl_loss": 0.10703468322753906, + "loss_ib": 0.0015751386526972055, + "step": 1405 + }, + { + "ce_ib": 7.651825428009033, + "ce_orig": 0.9800135493278503, + "epoch": 0.4043425120425624, + "kl_loss": 0.13863235712051392, + "loss_ib": 0.002151506021618843, + "step": 1406 + }, + { + "ce_ib": 4.351835250854492, + "ce_orig": 0.6803432106971741, + "epoch": 0.4043425120425624, + "kl_loss": 0.10353352129459381, + "loss_ib": 0.0014705186476930976, + "step": 1406 + }, + { + "ce_ib": 9.203539848327637, + "ce_orig": 1.6314523220062256, + "epoch": 0.4043425120425624, + "kl_loss": 0.10844020545482635, + "loss_ib": 0.0020047558937221766, + "step": 1406 + }, + { + "ce_ib": 5.073543071746826, + "ce_orig": 0.6256998777389526, + "epoch": 0.4043425120425624, + "kl_loss": 0.12684963643550873, + "loss_ib": 0.0017758506583049893, + "step": 1406 + }, + { + "ce_ib": 2.1065125465393066, + "ce_orig": 0.45548203587532043, + "epoch": 0.40463009562154, + "kl_loss": 0.047039005905389786, + "loss_ib": 0.0006810413324274123, + "step": 1407 + }, + { + "ce_ib": 7.553897857666016, + "ce_orig": 0.9205471873283386, + "epoch": 0.40463009562154, + "kl_loss": 0.17443448305130005, + "loss_ib": 0.00249973451718688, + "step": 1407 + }, + { + "ce_ib": 6.973599433898926, + "ce_orig": 0.46554118394851685, + "epoch": 0.40463009562154, + "kl_loss": 0.15451180934906006, + "loss_ib": 0.0022424780763685703, + "step": 1407 + }, + { + "ce_ib": 7.436211109161377, + "ce_orig": 1.1756176948547363, + "epoch": 0.40463009562154, + "kl_loss": 0.10663871467113495, + "loss_ib": 0.0018100081942975521, + "step": 1407 + }, + { + "ce_ib": 6.034943580627441, + "ce_orig": 0.30098381638526917, + "epoch": 0.40491767920051763, + "kl_loss": 0.15259245038032532, + "loss_ib": 0.0021294187754392624, + "step": 1408 + }, + { + "ce_ib": 7.934663772583008, + "ce_orig": 0.7295154929161072, + "epoch": 0.40491767920051763, + "kl_loss": 0.12445910274982452, + "loss_ib": 0.002038057427853346, + "step": 1408 + }, + { + "ce_ib": 7.308053493499756, + "ce_orig": 1.1165850162506104, + "epoch": 0.40491767920051763, + "kl_loss": 0.10732346028089523, + "loss_ib": 0.0018040399299934506, + "step": 1408 + }, + { + "ce_ib": 6.947267532348633, + "ce_orig": 0.9952074885368347, + "epoch": 0.40491767920051763, + "kl_loss": 0.12138228863477707, + "loss_ib": 0.0019085495732724667, + "step": 1408 + }, + { + "ce_ib": 5.599508285522461, + "ce_orig": 0.9172304272651672, + "epoch": 0.4052052627794953, + "kl_loss": 0.11137841641902924, + "loss_ib": 0.0016737348632887006, + "step": 1409 + }, + { + "ce_ib": 8.022232055664062, + "ce_orig": 1.4531203508377075, + "epoch": 0.4052052627794953, + "kl_loss": 0.12734998762607574, + "loss_ib": 0.0020757231395691633, + "step": 1409 + }, + { + "ce_ib": 7.1103196144104, + "ce_orig": 1.23030686378479, + "epoch": 0.4052052627794953, + "kl_loss": 0.09444105625152588, + "loss_ib": 0.0016554424073547125, + "step": 1409 + }, + { + "ce_ib": 5.490854263305664, + "ce_orig": 0.6522265672683716, + "epoch": 0.4052052627794953, + "kl_loss": 0.06643573939800262, + "loss_ib": 0.0012134427670389414, + "step": 1409 + }, + { + "epoch": 0.40549284635847294, + "grad_norm": 0.0967244878411293, + "learning_rate": 4.8582399014158794e-05, + "loss": 0.8162, + "step": 1410 + }, + { + "ce_ib": 5.121457576751709, + "ce_orig": 0.7949561476707458, + "epoch": 0.40549284635847294, + "kl_loss": 0.12705135345458984, + "loss_ib": 0.0017826592084020376, + "step": 1410 + }, + { + "ce_ib": 4.26038122177124, + "ce_orig": 0.7149118781089783, + "epoch": 0.40549284635847294, + "kl_loss": 0.10282117128372192, + "loss_ib": 0.001454249839298427, + "step": 1410 + }, + { + "ce_ib": 4.0952372550964355, + "ce_orig": 0.8095517158508301, + "epoch": 0.40549284635847294, + "kl_loss": 0.10190194100141525, + "loss_ib": 0.0014285431243479252, + "step": 1410 + }, + { + "ce_ib": 6.364243507385254, + "ce_orig": 0.6983405947685242, + "epoch": 0.40549284635847294, + "kl_loss": 0.09122467041015625, + "loss_ib": 0.0015486710472032428, + "step": 1410 + }, + { + "ce_ib": 6.581660747528076, + "ce_orig": 0.8682206869125366, + "epoch": 0.40578042993745056, + "kl_loss": 0.1153961569070816, + "loss_ib": 0.0018121275352314115, + "step": 1411 + }, + { + "ce_ib": 8.94925594329834, + "ce_orig": 1.347611665725708, + "epoch": 0.40578042993745056, + "kl_loss": 0.17726653814315796, + "loss_ib": 0.0026675909757614136, + "step": 1411 + }, + { + "ce_ib": 3.1764252185821533, + "ce_orig": 0.5812899470329285, + "epoch": 0.40578042993745056, + "kl_loss": 0.09140671789646149, + "loss_ib": 0.0012317097280174494, + "step": 1411 + }, + { + "ce_ib": 5.011621952056885, + "ce_orig": 0.7421811819076538, + "epoch": 0.40578042993745056, + "kl_loss": 0.08518759906291962, + "loss_ib": 0.0013530382420867682, + "step": 1411 + }, + { + "ce_ib": 5.735240459442139, + "ce_orig": 0.9989059567451477, + "epoch": 0.40606801351642824, + "kl_loss": 0.08036477863788605, + "loss_ib": 0.0013771718367934227, + "step": 1412 + }, + { + "ce_ib": 6.586531639099121, + "ce_orig": 0.8133410811424255, + "epoch": 0.40606801351642824, + "kl_loss": 0.09770262241363525, + "loss_ib": 0.0016356792766600847, + "step": 1412 + }, + { + "ce_ib": 5.184510707855225, + "ce_orig": 0.8797780871391296, + "epoch": 0.40606801351642824, + "kl_loss": 0.10436460375785828, + "loss_ib": 0.0015620969934388995, + "step": 1412 + }, + { + "ce_ib": 5.362227916717529, + "ce_orig": 1.1667356491088867, + "epoch": 0.40606801351642824, + "kl_loss": 0.09264987707138062, + "loss_ib": 0.001462721498683095, + "step": 1412 + }, + { + "ce_ib": 2.8289527893066406, + "ce_orig": 0.6173007488250732, + "epoch": 0.40635559709540586, + "kl_loss": 0.060516029596328735, + "loss_ib": 0.0008880555396899581, + "step": 1413 + }, + { + "ce_ib": 7.956472396850586, + "ce_orig": 1.0977833271026611, + "epoch": 0.40635559709540586, + "kl_loss": 0.12055166065692902, + "loss_ib": 0.0020011637825518847, + "step": 1413 + }, + { + "ce_ib": 4.129430770874023, + "ce_orig": 0.6660087704658508, + "epoch": 0.40635559709540586, + "kl_loss": 0.0664260983467102, + "loss_ib": 0.0010772040113806725, + "step": 1413 + }, + { + "ce_ib": 5.287503719329834, + "ce_orig": 0.6029794216156006, + "epoch": 0.40635559709540586, + "kl_loss": 0.1367393434047699, + "loss_ib": 0.0018961437745019794, + "step": 1413 + }, + { + "ce_ib": 3.3757805824279785, + "ce_orig": 0.5221848487854004, + "epoch": 0.4066431806743835, + "kl_loss": 0.11435914784669876, + "loss_ib": 0.0014811694854870439, + "step": 1414 + }, + { + "ce_ib": 9.696231842041016, + "ce_orig": 1.6972432136535645, + "epoch": 0.4066431806743835, + "kl_loss": 0.08618634939193726, + "loss_ib": 0.001831486588343978, + "step": 1414 + }, + { + "ce_ib": 4.770078659057617, + "ce_orig": 0.8775630593299866, + "epoch": 0.4066431806743835, + "kl_loss": 0.07194533944129944, + "loss_ib": 0.0011964612640440464, + "step": 1414 + }, + { + "ce_ib": 1.6736482381820679, + "ce_orig": 0.18917334079742432, + "epoch": 0.4066431806743835, + "kl_loss": 0.22798651456832886, + "loss_ib": 0.0024472298100590706, + "step": 1414 + }, + { + "epoch": 0.4069307642533611, + "grad_norm": 0.09320499002933502, + "learning_rate": 4.856948942574997e-05, + "loss": 0.8688, + "step": 1415 + }, + { + "ce_ib": 6.214433193206787, + "ce_orig": 0.8773539662361145, + "epoch": 0.4069307642533611, + "kl_loss": 0.10065476596355438, + "loss_ib": 0.0016279908595606685, + "step": 1415 + }, + { + "ce_ib": 5.722317218780518, + "ce_orig": 1.2242968082427979, + "epoch": 0.4069307642533611, + "kl_loss": 0.10353957861661911, + "loss_ib": 0.001607627491466701, + "step": 1415 + }, + { + "ce_ib": 7.316473007202148, + "ce_orig": 1.4600270986557007, + "epoch": 0.4069307642533611, + "kl_loss": 0.146986186504364, + "loss_ib": 0.0022015091963112354, + "step": 1415 + }, + { + "ce_ib": 4.994661331176758, + "ce_orig": 0.562883198261261, + "epoch": 0.4069307642533611, + "kl_loss": 0.09666267037391663, + "loss_ib": 0.0014660927699878812, + "step": 1415 + }, + { + "ce_ib": 3.781491994857788, + "ce_orig": 0.6928848624229431, + "epoch": 0.4072183478323388, + "kl_loss": 0.05718432366847992, + "loss_ib": 0.0009499923908151686, + "step": 1416 + }, + { + "ce_ib": 9.2673978805542, + "ce_orig": 1.2964565753936768, + "epoch": 0.4072183478323388, + "kl_loss": 0.10217346251010895, + "loss_ib": 0.0019484743243083358, + "step": 1416 + }, + { + "ce_ib": 6.029436111450195, + "ce_orig": 0.9657694697380066, + "epoch": 0.4072183478323388, + "kl_loss": 0.1299174427986145, + "loss_ib": 0.001902117975987494, + "step": 1416 + }, + { + "ce_ib": 5.017213821411133, + "ce_orig": 0.7008000612258911, + "epoch": 0.4072183478323388, + "kl_loss": 0.09423135966062546, + "loss_ib": 0.0014440348604694009, + "step": 1416 + }, + { + "ce_ib": 4.500516891479492, + "ce_orig": 0.34214717149734497, + "epoch": 0.4075059314113164, + "kl_loss": 0.11767271906137466, + "loss_ib": 0.0016267788596451283, + "step": 1417 + }, + { + "ce_ib": 3.1806514263153076, + "ce_orig": 0.6471096873283386, + "epoch": 0.4075059314113164, + "kl_loss": 0.0917370468378067, + "loss_ib": 0.0012354356003925204, + "step": 1417 + }, + { + "ce_ib": 4.803709983825684, + "ce_orig": 1.184767484664917, + "epoch": 0.4075059314113164, + "kl_loss": 0.056204065680503845, + "loss_ib": 0.0010424115462228656, + "step": 1417 + }, + { + "ce_ib": 4.624536514282227, + "ce_orig": 0.8192501664161682, + "epoch": 0.4075059314113164, + "kl_loss": 0.11898795515298843, + "loss_ib": 0.0016523330705240369, + "step": 1417 + }, + { + "ce_ib": 4.678039073944092, + "ce_orig": 0.4023797810077667, + "epoch": 0.40779351499029404, + "kl_loss": 0.09496867656707764, + "loss_ib": 0.001417490653693676, + "step": 1418 + }, + { + "ce_ib": 5.111543655395508, + "ce_orig": 0.7812928557395935, + "epoch": 0.40779351499029404, + "kl_loss": 0.08022376894950867, + "loss_ib": 0.0013133920729160309, + "step": 1418 + }, + { + "ce_ib": 5.8873138427734375, + "ce_orig": 0.6747121810913086, + "epoch": 0.40779351499029404, + "kl_loss": 0.13775616884231567, + "loss_ib": 0.0019662929698824883, + "step": 1418 + }, + { + "ce_ib": 8.242557525634766, + "ce_orig": 1.7442784309387207, + "epoch": 0.40779351499029404, + "kl_loss": 0.10227921605110168, + "loss_ib": 0.0018470477079972625, + "step": 1418 + }, + { + "ce_ib": 4.78331995010376, + "ce_orig": 0.8966225981712341, + "epoch": 0.4080810985692717, + "kl_loss": 0.1043175607919693, + "loss_ib": 0.0015215075109153986, + "step": 1419 + }, + { + "ce_ib": 5.467410087585449, + "ce_orig": 1.2548538446426392, + "epoch": 0.4080810985692717, + "kl_loss": 0.05958162248134613, + "loss_ib": 0.0011425572447478771, + "step": 1419 + }, + { + "ce_ib": 5.79073429107666, + "ce_orig": 0.5919792652130127, + "epoch": 0.4080810985692717, + "kl_loss": 0.08852129429578781, + "loss_ib": 0.0014642864698544145, + "step": 1419 + }, + { + "ce_ib": 8.342966079711914, + "ce_orig": 0.7671210169792175, + "epoch": 0.4080810985692717, + "kl_loss": 0.38292115926742554, + "loss_ib": 0.004663507919758558, + "step": 1419 + }, + { + "epoch": 0.40836868214824934, + "grad_norm": 0.10169852524995804, + "learning_rate": 4.855652305297052e-05, + "loss": 0.8024, + "step": 1420 + }, + { + "ce_ib": 6.110755443572998, + "ce_orig": 1.0325820446014404, + "epoch": 0.40836868214824934, + "kl_loss": 0.06489177793264389, + "loss_ib": 0.0012599932961165905, + "step": 1420 + }, + { + "ce_ib": 5.731692790985107, + "ce_orig": 1.1643919944763184, + "epoch": 0.40836868214824934, + "kl_loss": 0.11741343140602112, + "loss_ib": 0.001747303525917232, + "step": 1420 + }, + { + "ce_ib": 6.906783580780029, + "ce_orig": 0.9419347047805786, + "epoch": 0.40836868214824934, + "kl_loss": 0.1597922444343567, + "loss_ib": 0.0022886006627231836, + "step": 1420 + }, + { + "ce_ib": 3.428995132446289, + "ce_orig": 0.46887871623039246, + "epoch": 0.40836868214824934, + "kl_loss": 0.05160287767648697, + "loss_ib": 0.0008589282515458763, + "step": 1420 + }, + { + "ce_ib": 7.4091644287109375, + "ce_orig": 1.4213844537734985, + "epoch": 0.40865626572722696, + "kl_loss": 0.16700538992881775, + "loss_ib": 0.002410970162600279, + "step": 1421 + }, + { + "ce_ib": 7.73345947265625, + "ce_orig": 0.7962655425071716, + "epoch": 0.40865626572722696, + "kl_loss": 0.13736525177955627, + "loss_ib": 0.0021469981875270605, + "step": 1421 + }, + { + "ce_ib": 6.495778560638428, + "ce_orig": 1.1647603511810303, + "epoch": 0.40865626572722696, + "kl_loss": 0.07768432796001434, + "loss_ib": 0.0014264211058616638, + "step": 1421 + }, + { + "ce_ib": 5.779698848724365, + "ce_orig": 0.8063614964485168, + "epoch": 0.40865626572722696, + "kl_loss": 0.08508419990539551, + "loss_ib": 0.0014288118109107018, + "step": 1421 + }, + { + "ce_ib": 4.216153621673584, + "ce_orig": 0.5847137570381165, + "epoch": 0.40894384930620464, + "kl_loss": 0.06408952176570892, + "loss_ib": 0.001062510535120964, + "step": 1422 + }, + { + "ce_ib": 7.842023849487305, + "ce_orig": 1.3211596012115479, + "epoch": 0.40894384930620464, + "kl_loss": 0.12538115680217743, + "loss_ib": 0.0020380138885229826, + "step": 1422 + }, + { + "ce_ib": 6.368346691131592, + "ce_orig": 1.4015376567840576, + "epoch": 0.40894384930620464, + "kl_loss": 0.1733558028936386, + "loss_ib": 0.002370392670854926, + "step": 1422 + }, + { + "ce_ib": 4.551137924194336, + "ce_orig": 0.6162861585617065, + "epoch": 0.40894384930620464, + "kl_loss": 0.10174643993377686, + "loss_ib": 0.001472578151151538, + "step": 1422 + }, + { + "ce_ib": 6.886654376983643, + "ce_orig": 0.9630997180938721, + "epoch": 0.40923143288518227, + "kl_loss": 0.12046155333518982, + "loss_ib": 0.0018932810053229332, + "step": 1423 + }, + { + "ce_ib": 7.230597019195557, + "ce_orig": 1.1543656587600708, + "epoch": 0.40923143288518227, + "kl_loss": 0.0784279853105545, + "loss_ib": 0.0015073394170030951, + "step": 1423 + }, + { + "ce_ib": 5.933324813842773, + "ce_orig": 0.7128235697746277, + "epoch": 0.40923143288518227, + "kl_loss": 0.1509019285440445, + "loss_ib": 0.0021023517474532127, + "step": 1423 + }, + { + "ce_ib": 5.982076644897461, + "ce_orig": 1.2698583602905273, + "epoch": 0.40923143288518227, + "kl_loss": 0.06490836292505264, + "loss_ib": 0.0012472912203520536, + "step": 1423 + }, + { + "ce_ib": 3.7193098068237305, + "ce_orig": 0.6517233848571777, + "epoch": 0.4095190164641599, + "kl_loss": 0.08744284510612488, + "loss_ib": 0.0012463594321161509, + "step": 1424 + }, + { + "ce_ib": 6.195068359375, + "ce_orig": 1.2152502536773682, + "epoch": 0.4095190164641599, + "kl_loss": 0.11578704416751862, + "loss_ib": 0.001777377212420106, + "step": 1424 + }, + { + "ce_ib": 9.438179016113281, + "ce_orig": 1.714571475982666, + "epoch": 0.4095190164641599, + "kl_loss": 0.09612904489040375, + "loss_ib": 0.0019051083363592625, + "step": 1424 + }, + { + "ce_ib": 5.258744716644287, + "ce_orig": 0.7220935225486755, + "epoch": 0.4095190164641599, + "kl_loss": 0.08531811088323593, + "loss_ib": 0.0013790555531159043, + "step": 1424 + }, + { + "epoch": 0.4098066000431375, + "grad_norm": 0.1047549843788147, + "learning_rate": 4.8543499927059445e-05, + "loss": 0.8855, + "step": 1425 + }, + { + "ce_ib": 5.544464588165283, + "ce_orig": 0.5749992728233337, + "epoch": 0.4098066000431375, + "kl_loss": 0.15693159401416779, + "loss_ib": 0.0021237623877823353, + "step": 1425 + }, + { + "ce_ib": 2.2752645015716553, + "ce_orig": 0.35088467597961426, + "epoch": 0.4098066000431375, + "kl_loss": 0.07889710366725922, + "loss_ib": 0.00101649749558419, + "step": 1425 + }, + { + "ce_ib": 8.617973327636719, + "ce_orig": 1.2295658588409424, + "epoch": 0.4098066000431375, + "kl_loss": 0.08672799915075302, + "loss_ib": 0.0017290773103013635, + "step": 1425 + }, + { + "ce_ib": 4.017047882080078, + "ce_orig": 0.2988843619823456, + "epoch": 0.4098066000431375, + "kl_loss": 0.3063962459564209, + "loss_ib": 0.0034656673669815063, + "step": 1425 + }, + { + "ce_ib": 6.636280059814453, + "ce_orig": 0.8577524423599243, + "epoch": 0.4100941836221152, + "kl_loss": 0.13353979587554932, + "loss_ib": 0.001999025931581855, + "step": 1426 + }, + { + "ce_ib": 6.605571269989014, + "ce_orig": 0.9017672538757324, + "epoch": 0.4100941836221152, + "kl_loss": 0.057967811822891235, + "loss_ib": 0.0012402351712808013, + "step": 1426 + }, + { + "ce_ib": 5.672203540802002, + "ce_orig": 0.9474138021469116, + "epoch": 0.4100941836221152, + "kl_loss": 0.09774202108383179, + "loss_ib": 0.0015446405159309506, + "step": 1426 + }, + { + "ce_ib": 5.820497512817383, + "ce_orig": 0.7509312629699707, + "epoch": 0.4100941836221152, + "kl_loss": 0.12614545226097107, + "loss_ib": 0.0018435042584314942, + "step": 1426 + }, + { + "ce_ib": 5.530405521392822, + "ce_orig": 0.7405644059181213, + "epoch": 0.4103817672010928, + "kl_loss": 0.12352365255355835, + "loss_ib": 0.0017882769461721182, + "step": 1427 + }, + { + "ce_ib": 4.921144962310791, + "ce_orig": 0.7050065994262695, + "epoch": 0.4103817672010928, + "kl_loss": 0.08236676454544067, + "loss_ib": 0.0013157820794731379, + "step": 1427 + }, + { + "ce_ib": 5.164494514465332, + "ce_orig": 0.8321438431739807, + "epoch": 0.4103817672010928, + "kl_loss": 0.09443796426057816, + "loss_ib": 0.0014608290512114763, + "step": 1427 + }, + { + "ce_ib": 7.48508358001709, + "ce_orig": 1.5289044380187988, + "epoch": 0.4103817672010928, + "kl_loss": 0.09666682779788971, + "loss_ib": 0.0017151766223832965, + "step": 1427 + }, + { + "ce_ib": 8.054293632507324, + "ce_orig": 1.314834713935852, + "epoch": 0.41066935078007044, + "kl_loss": 0.07268555462360382, + "loss_ib": 0.0015322848921641707, + "step": 1428 + }, + { + "ce_ib": 5.46734619140625, + "ce_orig": 0.7332492470741272, + "epoch": 0.41066935078007044, + "kl_loss": 0.18445497751235962, + "loss_ib": 0.002391284331679344, + "step": 1428 + }, + { + "ce_ib": 6.619436264038086, + "ce_orig": 1.3106836080551147, + "epoch": 0.41066935078007044, + "kl_loss": 0.0980033203959465, + "loss_ib": 0.0016419767634943128, + "step": 1428 + }, + { + "ce_ib": 8.549188613891602, + "ce_orig": 1.5234109163284302, + "epoch": 0.41066935078007044, + "kl_loss": 0.10396061837673187, + "loss_ib": 0.001894524903036654, + "step": 1428 + }, + { + "ce_ib": 5.3329386711120605, + "ce_orig": 0.9647698402404785, + "epoch": 0.4109569343590481, + "kl_loss": 0.12533004581928253, + "loss_ib": 0.0017865943955257535, + "step": 1429 + }, + { + "ce_ib": 7.284860134124756, + "ce_orig": 0.9161247611045837, + "epoch": 0.4109569343590481, + "kl_loss": 0.08901812136173248, + "loss_ib": 0.0016186671564355493, + "step": 1429 + }, + { + "ce_ib": 4.861490249633789, + "ce_orig": 0.8348015546798706, + "epoch": 0.4109569343590481, + "kl_loss": 0.08867591619491577, + "loss_ib": 0.0013729081256315112, + "step": 1429 + }, + { + "ce_ib": 4.184719562530518, + "ce_orig": 0.5124539136886597, + "epoch": 0.4109569343590481, + "kl_loss": 0.1092284768819809, + "loss_ib": 0.001510756672360003, + "step": 1429 + }, + { + "epoch": 0.41124451793802574, + "grad_norm": 0.0929122045636177, + "learning_rate": 4.853042007939248e-05, + "loss": 0.8756, + "step": 1430 + }, + { + "ce_ib": 5.132282257080078, + "ce_orig": 0.7177113890647888, + "epoch": 0.41124451793802574, + "kl_loss": 0.11695947498083115, + "loss_ib": 0.001682822941802442, + "step": 1430 + }, + { + "ce_ib": 5.9083943367004395, + "ce_orig": 0.6927317380905151, + "epoch": 0.41124451793802574, + "kl_loss": 0.13192662596702576, + "loss_ib": 0.0019101055804640055, + "step": 1430 + }, + { + "ce_ib": 7.165446758270264, + "ce_orig": 0.8966021537780762, + "epoch": 0.41124451793802574, + "kl_loss": 0.12918083369731903, + "loss_ib": 0.002008352894335985, + "step": 1430 + }, + { + "ce_ib": 6.5472588539123535, + "ce_orig": 1.065467357635498, + "epoch": 0.41124451793802574, + "kl_loss": 0.1149042397737503, + "loss_ib": 0.0018037682166323066, + "step": 1430 + }, + { + "ce_ib": 4.255852222442627, + "ce_orig": 0.44100260734558105, + "epoch": 0.41153210151700337, + "kl_loss": 0.12202838808298111, + "loss_ib": 0.0016458689933642745, + "step": 1431 + }, + { + "ce_ib": 4.779837131500244, + "ce_orig": 0.7545100450515747, + "epoch": 0.41153210151700337, + "kl_loss": 0.12677210569381714, + "loss_ib": 0.001745704677887261, + "step": 1431 + }, + { + "ce_ib": 7.361011981964111, + "ce_orig": 0.9949771761894226, + "epoch": 0.41153210151700337, + "kl_loss": 0.07376141101121902, + "loss_ib": 0.0014737152960151434, + "step": 1431 + }, + { + "ce_ib": 6.311118125915527, + "ce_orig": 0.9260743856430054, + "epoch": 0.41153210151700337, + "kl_loss": 0.13274039328098297, + "loss_ib": 0.0019585154950618744, + "step": 1431 + }, + { + "ce_ib": 3.5083696842193604, + "ce_orig": 0.1979556828737259, + "epoch": 0.411819685095981, + "kl_loss": 0.1574898660182953, + "loss_ib": 0.0019257356179878116, + "step": 1432 + }, + { + "ce_ib": 5.359910011291504, + "ce_orig": 1.0081112384796143, + "epoch": 0.411819685095981, + "kl_loss": 0.06903597712516785, + "loss_ib": 0.00122635078150779, + "step": 1432 + }, + { + "ce_ib": 5.4759521484375, + "ce_orig": 0.9224492907524109, + "epoch": 0.411819685095981, + "kl_loss": 0.07184388488531113, + "loss_ib": 0.0012660340871661901, + "step": 1432 + }, + { + "ce_ib": 4.915720462799072, + "ce_orig": 0.6493841409683228, + "epoch": 0.411819685095981, + "kl_loss": 0.09995627403259277, + "loss_ib": 0.0014911347534507513, + "step": 1432 + }, + { + "ce_ib": 4.013033390045166, + "ce_orig": 0.6558687686920166, + "epoch": 0.41210726867495867, + "kl_loss": 0.18210524320602417, + "loss_ib": 0.0022223556879907846, + "step": 1433 + }, + { + "ce_ib": 7.42601203918457, + "ce_orig": 1.2959660291671753, + "epoch": 0.41210726867495867, + "kl_loss": 0.09777391701936722, + "loss_ib": 0.0017203402239829302, + "step": 1433 + }, + { + "ce_ib": 5.428860664367676, + "ce_orig": 0.4534249007701874, + "epoch": 0.41210726867495867, + "kl_loss": 0.1439604014158249, + "loss_ib": 0.0019824900664389133, + "step": 1433 + }, + { + "ce_ib": 7.018229007720947, + "ce_orig": 1.1912530660629272, + "epoch": 0.41210726867495867, + "kl_loss": 0.13741852343082428, + "loss_ib": 0.002076007891446352, + "step": 1433 + }, + { + "ce_ib": 5.00704288482666, + "ce_orig": 0.5389499068260193, + "epoch": 0.4123948522539363, + "kl_loss": 0.09618522226810455, + "loss_ib": 0.0014625564217567444, + "step": 1434 + }, + { + "ce_ib": 5.019461154937744, + "ce_orig": 0.7763354778289795, + "epoch": 0.4123948522539363, + "kl_loss": 0.08913825452327728, + "loss_ib": 0.0013933285372331738, + "step": 1434 + }, + { + "ce_ib": 5.427734375, + "ce_orig": 0.8100016713142395, + "epoch": 0.4123948522539363, + "kl_loss": 0.0638791173696518, + "loss_ib": 0.00118156464304775, + "step": 1434 + }, + { + "ce_ib": 4.386058330535889, + "ce_orig": 0.6597939133644104, + "epoch": 0.4123948522539363, + "kl_loss": 0.10749074816703796, + "loss_ib": 0.001513513270765543, + "step": 1434 + }, + { + "epoch": 0.4126824358329139, + "grad_norm": 0.08337324112653732, + "learning_rate": 4.851728354148203e-05, + "loss": 0.849, + "step": 1435 + }, + { + "ce_ib": 4.55735969543457, + "ce_orig": 0.6752052307128906, + "epoch": 0.4126824358329139, + "kl_loss": 0.0997617095708847, + "loss_ib": 0.0014533529756590724, + "step": 1435 + }, + { + "ce_ib": 5.787301540374756, + "ce_orig": 1.0791867971420288, + "epoch": 0.4126824358329139, + "kl_loss": 0.053296104073524475, + "loss_ib": 0.0011116911191493273, + "step": 1435 + }, + { + "ce_ib": 6.046157360076904, + "ce_orig": 0.8505937457084656, + "epoch": 0.4126824358329139, + "kl_loss": 0.07980112731456757, + "loss_ib": 0.0014026268618181348, + "step": 1435 + }, + { + "ce_ib": 4.417842388153076, + "ce_orig": 0.8308053612709045, + "epoch": 0.4126824358329139, + "kl_loss": 0.12036478519439697, + "loss_ib": 0.001645432086661458, + "step": 1435 + }, + { + "ce_ib": 9.31179428100586, + "ce_orig": 1.4481607675552368, + "epoch": 0.4129700194118916, + "kl_loss": 0.07177025079727173, + "loss_ib": 0.0016488818218931556, + "step": 1436 + }, + { + "ce_ib": 4.3464484214782715, + "ce_orig": 0.518915057182312, + "epoch": 0.4129700194118916, + "kl_loss": 0.07961948215961456, + "loss_ib": 0.0012308396399021149, + "step": 1436 + }, + { + "ce_ib": 4.7582173347473145, + "ce_orig": 0.32292982935905457, + "epoch": 0.4129700194118916, + "kl_loss": 0.13897094130516052, + "loss_ib": 0.0018655312014743686, + "step": 1436 + }, + { + "ce_ib": 6.521214962005615, + "ce_orig": 1.1188610792160034, + "epoch": 0.4129700194118916, + "kl_loss": 0.09975160658359528, + "loss_ib": 0.0016496374737471342, + "step": 1436 + }, + { + "ce_ib": 3.012716293334961, + "ce_orig": 0.3423144519329071, + "epoch": 0.4132576029908692, + "kl_loss": 0.20307299494743347, + "loss_ib": 0.002332001458853483, + "step": 1437 + }, + { + "ce_ib": 6.234538555145264, + "ce_orig": 0.7571339011192322, + "epoch": 0.4132576029908692, + "kl_loss": 0.10930953174829483, + "loss_ib": 0.0017165490426123142, + "step": 1437 + }, + { + "ce_ib": 7.31367301940918, + "ce_orig": 1.5435678958892822, + "epoch": 0.4132576029908692, + "kl_loss": 0.09054261445999146, + "loss_ib": 0.0016367933712899685, + "step": 1437 + }, + { + "ce_ib": 4.15863561630249, + "ce_orig": 0.6962539553642273, + "epoch": 0.4132576029908692, + "kl_loss": 0.09438318014144897, + "loss_ib": 0.0013596953358501196, + "step": 1437 + }, + { + "ce_ib": 3.0213708877563477, + "ce_orig": 0.6388891935348511, + "epoch": 0.41354518656984685, + "kl_loss": 0.07304428517818451, + "loss_ib": 0.0010325799230486155, + "step": 1438 + }, + { + "ce_ib": 2.410836935043335, + "ce_orig": 0.4856458008289337, + "epoch": 0.41354518656984685, + "kl_loss": 0.06359301507472992, + "loss_ib": 0.000877013779245317, + "step": 1438 + }, + { + "ce_ib": 6.516482353210449, + "ce_orig": 1.2926971912384033, + "epoch": 0.41354518656984685, + "kl_loss": 0.0939096137881279, + "loss_ib": 0.001590744243003428, + "step": 1438 + }, + { + "ce_ib": 6.705051898956299, + "ce_orig": 0.9979714751243591, + "epoch": 0.41354518656984685, + "kl_loss": 0.1320842057466507, + "loss_ib": 0.00199134717695415, + "step": 1438 + }, + { + "ce_ib": 5.880115985870361, + "ce_orig": 0.6562256813049316, + "epoch": 0.4138327701488245, + "kl_loss": 0.11906502395868301, + "loss_ib": 0.0017786618554964662, + "step": 1439 + }, + { + "ce_ib": 3.5629148483276367, + "ce_orig": 0.3119569420814514, + "epoch": 0.4138327701488245, + "kl_loss": 0.08793976902961731, + "loss_ib": 0.0012356891529634595, + "step": 1439 + }, + { + "ce_ib": 6.078202247619629, + "ce_orig": 0.9515578746795654, + "epoch": 0.4138327701488245, + "kl_loss": 0.10293766856193542, + "loss_ib": 0.0016371967503800988, + "step": 1439 + }, + { + "ce_ib": 5.050149917602539, + "ce_orig": 0.7735735774040222, + "epoch": 0.4138327701488245, + "kl_loss": 0.06617474555969238, + "loss_ib": 0.0011667624348774552, + "step": 1439 + }, + { + "epoch": 0.41412035372780215, + "grad_norm": 0.07793393731117249, + "learning_rate": 4.850409034497704e-05, + "loss": 0.8629, + "step": 1440 + }, + { + "ce_ib": 6.743407726287842, + "ce_orig": 1.0928657054901123, + "epoch": 0.41412035372780215, + "kl_loss": 0.08591713011264801, + "loss_ib": 0.0015335120260715485, + "step": 1440 + }, + { + "ce_ib": 5.080863952636719, + "ce_orig": 1.1487590074539185, + "epoch": 0.41412035372780215, + "kl_loss": 0.09081675112247467, + "loss_ib": 0.0014162538573145866, + "step": 1440 + }, + { + "ce_ib": 4.980000019073486, + "ce_orig": 0.7465852499008179, + "epoch": 0.41412035372780215, + "kl_loss": 0.07600586861371994, + "loss_ib": 0.0012580587062984705, + "step": 1440 + }, + { + "ce_ib": 5.498541831970215, + "ce_orig": 0.5793271660804749, + "epoch": 0.41412035372780215, + "kl_loss": 0.08694162964820862, + "loss_ib": 0.0014192704111337662, + "step": 1440 + }, + { + "ce_ib": 3.5841479301452637, + "ce_orig": 0.6023969054222107, + "epoch": 0.4144079373067798, + "kl_loss": 0.08362072706222534, + "loss_ib": 0.0011946220183745027, + "step": 1441 + }, + { + "ce_ib": 7.423813343048096, + "ce_orig": 1.302383303642273, + "epoch": 0.4144079373067798, + "kl_loss": 0.1895519644021988, + "loss_ib": 0.0026379008777439594, + "step": 1441 + }, + { + "ce_ib": 4.287861347198486, + "ce_orig": 0.546518862247467, + "epoch": 0.4144079373067798, + "kl_loss": 0.10317136347293854, + "loss_ib": 0.001460499712266028, + "step": 1441 + }, + { + "ce_ib": 3.742234945297241, + "ce_orig": 0.41573965549468994, + "epoch": 0.4144079373067798, + "kl_loss": 0.0966222956776619, + "loss_ib": 0.0013404464116320014, + "step": 1441 + }, + { + "ce_ib": 3.696802854537964, + "ce_orig": 0.582406759262085, + "epoch": 0.4146955208857574, + "kl_loss": 0.07445007562637329, + "loss_ib": 0.0011141810100525618, + "step": 1442 + }, + { + "ce_ib": 5.3360466957092285, + "ce_orig": 0.6130356192588806, + "epoch": 0.4146955208857574, + "kl_loss": 0.1057562604546547, + "loss_ib": 0.001591167296282947, + "step": 1442 + }, + { + "ce_ib": 5.148367404937744, + "ce_orig": 0.6853485107421875, + "epoch": 0.4146955208857574, + "kl_loss": 0.12549643218517303, + "loss_ib": 0.0017698010196909308, + "step": 1442 + }, + { + "ce_ib": 7.778355121612549, + "ce_orig": 1.2386947870254517, + "epoch": 0.4146955208857574, + "kl_loss": 0.11003492772579193, + "loss_ib": 0.0018781846156343818, + "step": 1442 + }, + { + "ce_ib": 4.685573577880859, + "ce_orig": 0.6230754852294922, + "epoch": 0.4149831044647351, + "kl_loss": 0.0866067036986351, + "loss_ib": 0.0013346243649721146, + "step": 1443 + }, + { + "ce_ib": 4.70703649520874, + "ce_orig": 0.354888379573822, + "epoch": 0.4149831044647351, + "kl_loss": 0.07406913489103317, + "loss_ib": 0.0012113949051126838, + "step": 1443 + }, + { + "ce_ib": 4.947773456573486, + "ce_orig": 0.8348574042320251, + "epoch": 0.4149831044647351, + "kl_loss": 0.221211776137352, + "loss_ib": 0.0027068951167166233, + "step": 1443 + }, + { + "ce_ib": 5.788531303405762, + "ce_orig": 0.5687450766563416, + "epoch": 0.4149831044647351, + "kl_loss": 0.0781509131193161, + "loss_ib": 0.0013603621628135443, + "step": 1443 + }, + { + "ce_ib": 4.952359199523926, + "ce_orig": 0.5754139423370361, + "epoch": 0.4152706880437127, + "kl_loss": 0.07508834451436996, + "loss_ib": 0.0012461193837225437, + "step": 1444 + }, + { + "ce_ib": 3.1481313705444336, + "ce_orig": 0.5248778462409973, + "epoch": 0.4152706880437127, + "kl_loss": 0.05129199102520943, + "loss_ib": 0.0008277330198325217, + "step": 1444 + }, + { + "ce_ib": 3.468945026397705, + "ce_orig": 0.585468590259552, + "epoch": 0.4152706880437127, + "kl_loss": 0.06292411684989929, + "loss_ib": 0.0009761356632225215, + "step": 1444 + }, + { + "ce_ib": 6.467720985412598, + "ce_orig": 0.6333016753196716, + "epoch": 0.4152706880437127, + "kl_loss": 0.15747122466564178, + "loss_ib": 0.002221484202891588, + "step": 1444 + }, + { + "epoch": 0.4155582716226903, + "grad_norm": 0.09773839265108109, + "learning_rate": 4.8490840521663e-05, + "loss": 0.831, + "step": 1445 + }, + { + "ce_ib": 4.575643539428711, + "ce_orig": 0.7352006435394287, + "epoch": 0.4155582716226903, + "kl_loss": 0.06750188022851944, + "loss_ib": 0.0011325831292197108, + "step": 1445 + }, + { + "ce_ib": 7.829094886779785, + "ce_orig": 1.3825410604476929, + "epoch": 0.4155582716226903, + "kl_loss": 0.10775604844093323, + "loss_ib": 0.001860469812527299, + "step": 1445 + }, + { + "ce_ib": 4.150346755981445, + "ce_orig": 0.8056705594062805, + "epoch": 0.4155582716226903, + "kl_loss": 0.06260480731725693, + "loss_ib": 0.0010410826653242111, + "step": 1445 + }, + { + "ce_ib": 3.7523932456970215, + "ce_orig": 0.6832635998725891, + "epoch": 0.4155582716226903, + "kl_loss": 0.04519576579332352, + "loss_ib": 0.0008271969854831696, + "step": 1445 + }, + { + "ce_ib": 4.2690205574035645, + "ce_orig": 0.7793135046958923, + "epoch": 0.415845855201668, + "kl_loss": 0.07240147888660431, + "loss_ib": 0.0011509167961776257, + "step": 1446 + }, + { + "ce_ib": 5.752465724945068, + "ce_orig": 0.8700240850448608, + "epoch": 0.415845855201668, + "kl_loss": 0.11336810141801834, + "loss_ib": 0.0017089275643229485, + "step": 1446 + }, + { + "ce_ib": 4.555610179901123, + "ce_orig": 0.3389473855495453, + "epoch": 0.415845855201668, + "kl_loss": 0.141608327627182, + "loss_ib": 0.001871644170023501, + "step": 1446 + }, + { + "ce_ib": 7.882659435272217, + "ce_orig": 1.145242691040039, + "epoch": 0.415845855201668, + "kl_loss": 0.08191496878862381, + "loss_ib": 0.001607415615580976, + "step": 1446 + }, + { + "ce_ib": 7.144163131713867, + "ce_orig": 0.7978856563568115, + "epoch": 0.4161334387806456, + "kl_loss": 0.14888674020767212, + "loss_ib": 0.0022032835986465216, + "step": 1447 + }, + { + "ce_ib": 4.994200706481934, + "ce_orig": 0.43828389048576355, + "epoch": 0.4161334387806456, + "kl_loss": 0.08625173568725586, + "loss_ib": 0.0013619373785331845, + "step": 1447 + }, + { + "ce_ib": 3.9171884059906006, + "ce_orig": 0.45851317048072815, + "epoch": 0.4161334387806456, + "kl_loss": 0.10175397992134094, + "loss_ib": 0.0014092584606260061, + "step": 1447 + }, + { + "ce_ib": 5.007274150848389, + "ce_orig": 0.895894467830658, + "epoch": 0.4161334387806456, + "kl_loss": 0.10107745230197906, + "loss_ib": 0.001511501963250339, + "step": 1447 + }, + { + "ce_ib": 10.265788078308105, + "ce_orig": 1.6524277925491333, + "epoch": 0.41642102235962325, + "kl_loss": 0.1213127076625824, + "loss_ib": 0.0022397057618945837, + "step": 1448 + }, + { + "ce_ib": 5.410793781280518, + "ce_orig": 0.3247361183166504, + "epoch": 0.41642102235962325, + "kl_loss": 0.15680810809135437, + "loss_ib": 0.002109160413965583, + "step": 1448 + }, + { + "ce_ib": 5.622844219207764, + "ce_orig": 0.9446114301681519, + "epoch": 0.41642102235962325, + "kl_loss": 0.08880884200334549, + "loss_ib": 0.0014503727434203029, + "step": 1448 + }, + { + "ce_ib": 2.635361909866333, + "ce_orig": 0.2937851846218109, + "epoch": 0.41642102235962325, + "kl_loss": 0.3179281949996948, + "loss_ib": 0.0034428180661052465, + "step": 1448 + }, + { + "ce_ib": 3.701805830001831, + "ce_orig": 0.5746484398841858, + "epoch": 0.41670860593860093, + "kl_loss": 0.13422581553459167, + "loss_ib": 0.0017124387668445706, + "step": 1449 + }, + { + "ce_ib": 4.573523044586182, + "ce_orig": 0.7001649737358093, + "epoch": 0.41670860593860093, + "kl_loss": 0.09929852932691574, + "loss_ib": 0.0014503375859931111, + "step": 1449 + }, + { + "ce_ib": 6.455227375030518, + "ce_orig": 0.9670495986938477, + "epoch": 0.41670860593860093, + "kl_loss": 0.11830205470323563, + "loss_ib": 0.0018285432597622275, + "step": 1449 + }, + { + "ce_ib": 4.639206409454346, + "ce_orig": 0.7645363211631775, + "epoch": 0.41670860593860093, + "kl_loss": 0.10847769677639008, + "loss_ib": 0.0015486975898966193, + "step": 1449 + }, + { + "epoch": 0.41699618951757855, + "grad_norm": 0.0861210972070694, + "learning_rate": 4.84775341034618e-05, + "loss": 0.823, + "step": 1450 + }, + { + "ce_ib": 6.17034912109375, + "ce_orig": 0.8687017560005188, + "epoch": 0.41699618951757855, + "kl_loss": 0.16763222217559814, + "loss_ib": 0.0022933571599423885, + "step": 1450 + }, + { + "ce_ib": 5.7684783935546875, + "ce_orig": 0.8275676965713501, + "epoch": 0.41699618951757855, + "kl_loss": 0.1634751409292221, + "loss_ib": 0.0022115991450846195, + "step": 1450 + }, + { + "ce_ib": 3.9640069007873535, + "ce_orig": 0.4531187117099762, + "epoch": 0.41699618951757855, + "kl_loss": 0.14157013595104218, + "loss_ib": 0.0018121020402759314, + "step": 1450 + }, + { + "ce_ib": 4.608716011047363, + "ce_orig": 0.34708335995674133, + "epoch": 0.41699618951757855, + "kl_loss": 0.10207530856132507, + "loss_ib": 0.0014816246693953872, + "step": 1450 + }, + { + "ce_ib": 5.21584939956665, + "ce_orig": 0.856076717376709, + "epoch": 0.4172837730965562, + "kl_loss": 0.09565088152885437, + "loss_ib": 0.0014780936762690544, + "step": 1451 + }, + { + "ce_ib": 5.448686599731445, + "ce_orig": 1.2174465656280518, + "epoch": 0.4172837730965562, + "kl_loss": 0.11567234247922897, + "loss_ib": 0.0017015920020639896, + "step": 1451 + }, + { + "ce_ib": 6.26071310043335, + "ce_orig": 0.8002637624740601, + "epoch": 0.4172837730965562, + "kl_loss": 0.12488710135221481, + "loss_ib": 0.0018749423325061798, + "step": 1451 + }, + { + "ce_ib": 5.369015216827393, + "ce_orig": 0.6488451957702637, + "epoch": 0.4172837730965562, + "kl_loss": 0.11009375005960464, + "loss_ib": 0.001637839013710618, + "step": 1451 + }, + { + "ce_ib": 7.470041275024414, + "ce_orig": 1.25552499294281, + "epoch": 0.4175713566755338, + "kl_loss": 0.08985939621925354, + "loss_ib": 0.0016455980949103832, + "step": 1452 + }, + { + "ce_ib": 6.61988639831543, + "ce_orig": 0.6664535403251648, + "epoch": 0.4175713566755338, + "kl_loss": 0.15661829710006714, + "loss_ib": 0.0022281715646386147, + "step": 1452 + }, + { + "ce_ib": 4.991513729095459, + "ce_orig": 0.8073134422302246, + "epoch": 0.4175713566755338, + "kl_loss": 0.11219026893377304, + "loss_ib": 0.001621054019778967, + "step": 1452 + }, + { + "ce_ib": 5.777252197265625, + "ce_orig": 0.6050467491149902, + "epoch": 0.4175713566755338, + "kl_loss": 0.10480629652738571, + "loss_ib": 0.001625788165256381, + "step": 1452 + }, + { + "ce_ib": 4.126240253448486, + "ce_orig": 0.8339105844497681, + "epoch": 0.4178589402545115, + "kl_loss": 0.11453627049922943, + "loss_ib": 0.001557986717671156, + "step": 1453 + }, + { + "ce_ib": 5.761976718902588, + "ce_orig": 0.6732054352760315, + "epoch": 0.4178589402545115, + "kl_loss": 0.08584851771593094, + "loss_ib": 0.0014346828684210777, + "step": 1453 + }, + { + "ce_ib": 6.47689962387085, + "ce_orig": 0.8545829653739929, + "epoch": 0.4178589402545115, + "kl_loss": 0.09251880645751953, + "loss_ib": 0.0015728779835626483, + "step": 1453 + }, + { + "ce_ib": 6.084139823913574, + "ce_orig": 0.8071439862251282, + "epoch": 0.4178589402545115, + "kl_loss": 0.13854151964187622, + "loss_ib": 0.0019938291516155005, + "step": 1453 + }, + { + "ce_ib": 5.937831401824951, + "ce_orig": 0.8069619536399841, + "epoch": 0.4181465238334891, + "kl_loss": 0.15720096230506897, + "loss_ib": 0.002165792742744088, + "step": 1454 + }, + { + "ce_ib": 3.6552929878234863, + "ce_orig": 0.6291875243186951, + "epoch": 0.4181465238334891, + "kl_loss": 0.07416625320911407, + "loss_ib": 0.001107191783376038, + "step": 1454 + }, + { + "ce_ib": 6.729911804199219, + "ce_orig": 1.3609896898269653, + "epoch": 0.4181465238334891, + "kl_loss": 0.11908746510744095, + "loss_ib": 0.0018638657638803124, + "step": 1454 + }, + { + "ce_ib": 6.7187981605529785, + "ce_orig": 1.2945940494537354, + "epoch": 0.4181465238334891, + "kl_loss": 0.10286684334278107, + "loss_ib": 0.0017005482222884893, + "step": 1454 + }, + { + "epoch": 0.4184341074124667, + "grad_norm": 0.09489905834197998, + "learning_rate": 4.8464171122431684e-05, + "loss": 0.8488, + "step": 1455 + }, + { + "ce_ib": 2.7830166816711426, + "ce_orig": 0.545401394367218, + "epoch": 0.4184341074124667, + "kl_loss": 0.052842650562524796, + "loss_ib": 0.0008067281451076269, + "step": 1455 + }, + { + "ce_ib": 4.6558709144592285, + "ce_orig": 0.6611546277999878, + "epoch": 0.4184341074124667, + "kl_loss": 0.11962183564901352, + "loss_ib": 0.001661805436015129, + "step": 1455 + }, + { + "ce_ib": 5.181333541870117, + "ce_orig": 0.7933509349822998, + "epoch": 0.4184341074124667, + "kl_loss": 0.09340114146471024, + "loss_ib": 0.0014521447010338306, + "step": 1455 + }, + { + "ce_ib": 5.011409282684326, + "ce_orig": 0.5729467868804932, + "epoch": 0.4184341074124667, + "kl_loss": 0.12782812118530273, + "loss_ib": 0.0017794221639633179, + "step": 1455 + }, + { + "ce_ib": 5.055473327636719, + "ce_orig": 0.7648478746414185, + "epoch": 0.4187216909914444, + "kl_loss": 0.1019359678030014, + "loss_ib": 0.0015249070711433887, + "step": 1456 + }, + { + "ce_ib": 4.663490295410156, + "ce_orig": 0.49775955080986023, + "epoch": 0.4187216909914444, + "kl_loss": 0.07127943634986877, + "loss_ib": 0.0011791433207690716, + "step": 1456 + }, + { + "ce_ib": 7.593672275543213, + "ce_orig": 1.1730624437332153, + "epoch": 0.4187216909914444, + "kl_loss": 0.14229080080986023, + "loss_ib": 0.0021822750568389893, + "step": 1456 + }, + { + "ce_ib": 4.291922569274902, + "ce_orig": 0.6264102458953857, + "epoch": 0.4187216909914444, + "kl_loss": 0.09957338124513626, + "loss_ib": 0.0014249259838834405, + "step": 1456 + }, + { + "ce_ib": 3.189561367034912, + "ce_orig": 0.4278847277164459, + "epoch": 0.41900927457042203, + "kl_loss": 0.13700458407402039, + "loss_ib": 0.0016890019178390503, + "step": 1457 + }, + { + "ce_ib": 3.4097647666931152, + "ce_orig": 0.49093571305274963, + "epoch": 0.41900927457042203, + "kl_loss": 0.09303900599479675, + "loss_ib": 0.001271366490982473, + "step": 1457 + }, + { + "ce_ib": 7.352541446685791, + "ce_orig": 0.9452694058418274, + "epoch": 0.41900927457042203, + "kl_loss": 0.08397386968135834, + "loss_ib": 0.0015749927842989564, + "step": 1457 + }, + { + "ce_ib": 4.712612152099609, + "ce_orig": 0.8468739986419678, + "epoch": 0.41900927457042203, + "kl_loss": 0.12789814174175262, + "loss_ib": 0.0017502426635473967, + "step": 1457 + }, + { + "ce_ib": 5.61114501953125, + "ce_orig": 0.8121333718299866, + "epoch": 0.41929685814939965, + "kl_loss": 0.10803937166929245, + "loss_ib": 0.0016415081918239594, + "step": 1458 + }, + { + "ce_ib": 4.783021450042725, + "ce_orig": 0.9681260585784912, + "epoch": 0.41929685814939965, + "kl_loss": 0.12566494941711426, + "loss_ib": 0.0017349515110254288, + "step": 1458 + }, + { + "ce_ib": 4.220062255859375, + "ce_orig": 0.6777926683425903, + "epoch": 0.41929685814939965, + "kl_loss": 0.10959968715906143, + "loss_ib": 0.0015180030604824424, + "step": 1458 + }, + { + "ce_ib": 8.752851486206055, + "ce_orig": 1.2470048666000366, + "epoch": 0.41929685814939965, + "kl_loss": 0.1120050922036171, + "loss_ib": 0.0019953360315412283, + "step": 1458 + }, + { + "ce_ib": 5.158299446105957, + "ce_orig": 0.9586217999458313, + "epoch": 0.41958444172837733, + "kl_loss": 0.08502523601055145, + "loss_ib": 0.0013660822296515107, + "step": 1459 + }, + { + "ce_ib": 5.161741256713867, + "ce_orig": 0.7361714243888855, + "epoch": 0.41958444172837733, + "kl_loss": 0.0770580917596817, + "loss_ib": 0.00128675508312881, + "step": 1459 + }, + { + "ce_ib": 4.91823673248291, + "ce_orig": 0.7018568515777588, + "epoch": 0.41958444172837733, + "kl_loss": 0.10236355662345886, + "loss_ib": 0.001515459269285202, + "step": 1459 + }, + { + "ce_ib": 4.538360118865967, + "ce_orig": 0.7272351980209351, + "epoch": 0.41958444172837733, + "kl_loss": 0.09524193406105042, + "loss_ib": 0.0014062552945688367, + "step": 1459 + }, + { + "epoch": 0.41987202530735496, + "grad_norm": 0.10186373442411423, + "learning_rate": 4.8450751610767194e-05, + "loss": 0.8158, + "step": 1460 + }, + { + "ce_ib": 4.1273512840271, + "ce_orig": 0.7310515642166138, + "epoch": 0.41987202530735496, + "kl_loss": 0.058533839881420135, + "loss_ib": 0.0009980734903365374, + "step": 1460 + }, + { + "ce_ib": 4.629617691040039, + "ce_orig": 0.4766985774040222, + "epoch": 0.41987202530735496, + "kl_loss": 0.1433141827583313, + "loss_ib": 0.001896103611215949, + "step": 1460 + }, + { + "ce_ib": 5.644496440887451, + "ce_orig": 0.7028821706771851, + "epoch": 0.41987202530735496, + "kl_loss": 0.09563258290290833, + "loss_ib": 0.001520775374956429, + "step": 1460 + }, + { + "ce_ib": 6.520239353179932, + "ce_orig": 1.4824178218841553, + "epoch": 0.41987202530735496, + "kl_loss": 0.07369743287563324, + "loss_ib": 0.0013889983529224992, + "step": 1460 + }, + { + "ce_ib": 2.592207193374634, + "ce_orig": 0.26936668157577515, + "epoch": 0.4201596088863326, + "kl_loss": 0.3481166660785675, + "loss_ib": 0.0037403872702270746, + "step": 1461 + }, + { + "ce_ib": 4.518808364868164, + "ce_orig": 0.6325695514678955, + "epoch": 0.4201596088863326, + "kl_loss": 0.11003083735704422, + "loss_ib": 0.0015521892346441746, + "step": 1461 + }, + { + "ce_ib": 3.3300020694732666, + "ce_orig": 0.5329961180686951, + "epoch": 0.4201596088863326, + "kl_loss": 0.0928843691945076, + "loss_ib": 0.0012618438340723515, + "step": 1461 + }, + { + "ce_ib": 3.862823963165283, + "ce_orig": 0.6123619675636292, + "epoch": 0.4201596088863326, + "kl_loss": 0.11784958839416504, + "loss_ib": 0.0015647781547158957, + "step": 1461 + }, + { + "ce_ib": 5.960676670074463, + "ce_orig": 1.0435458421707153, + "epoch": 0.4204471924653102, + "kl_loss": 0.10175187885761261, + "loss_ib": 0.0016135863261297345, + "step": 1462 + }, + { + "ce_ib": 4.466385364532471, + "ce_orig": 0.7635088562965393, + "epoch": 0.4204471924653102, + "kl_loss": 0.08558037132024765, + "loss_ib": 0.001302442280575633, + "step": 1462 + }, + { + "ce_ib": 3.277053117752075, + "ce_orig": 0.6193379759788513, + "epoch": 0.4204471924653102, + "kl_loss": 0.1026398092508316, + "loss_ib": 0.0013541033258661628, + "step": 1462 + }, + { + "ce_ib": 6.897516250610352, + "ce_orig": 1.2636864185333252, + "epoch": 0.4204471924653102, + "kl_loss": 0.10778491199016571, + "loss_ib": 0.0017676007701084018, + "step": 1462 + }, + { + "ce_ib": 7.378346920013428, + "ce_orig": 1.2946105003356934, + "epoch": 0.4207347760442879, + "kl_loss": 0.11343882977962494, + "loss_ib": 0.0018722229870036244, + "step": 1463 + }, + { + "ce_ib": 5.065412998199463, + "ce_orig": 0.7953489422798157, + "epoch": 0.4207347760442879, + "kl_loss": 0.14171633124351501, + "loss_ib": 0.001923704519867897, + "step": 1463 + }, + { + "ce_ib": 9.015141487121582, + "ce_orig": 0.8755373358726501, + "epoch": 0.4207347760442879, + "kl_loss": 0.20373690128326416, + "loss_ib": 0.0029388831462711096, + "step": 1463 + }, + { + "ce_ib": 1.8639222383499146, + "ce_orig": 0.34190768003463745, + "epoch": 0.4207347760442879, + "kl_loss": 0.035271838307380676, + "loss_ib": 0.0005391105660237372, + "step": 1463 + }, + { + "ce_ib": 6.010770797729492, + "ce_orig": 1.1248222589492798, + "epoch": 0.4210223596232655, + "kl_loss": 0.11656366288661957, + "loss_ib": 0.0017667136853560805, + "step": 1464 + }, + { + "ce_ib": 4.576229572296143, + "ce_orig": 0.4586426019668579, + "epoch": 0.4210223596232655, + "kl_loss": 0.13751602172851562, + "loss_ib": 0.0018327832221984863, + "step": 1464 + }, + { + "ce_ib": 4.836610317230225, + "ce_orig": 0.3597474992275238, + "epoch": 0.4210223596232655, + "kl_loss": 0.11420424282550812, + "loss_ib": 0.001625703414902091, + "step": 1464 + }, + { + "ce_ib": 4.152197360992432, + "ce_orig": 0.6041322946548462, + "epoch": 0.4210223596232655, + "kl_loss": 0.07142791152000427, + "loss_ib": 0.001129498821683228, + "step": 1464 + }, + { + "epoch": 0.42130994320224313, + "grad_norm": 0.08001025766134262, + "learning_rate": 4.8437275600799036e-05, + "loss": 0.8504, + "step": 1465 + }, + { + "ce_ib": 7.085323810577393, + "ce_orig": 1.2465083599090576, + "epoch": 0.42130994320224313, + "kl_loss": 0.08729834854602814, + "loss_ib": 0.0015815157676115632, + "step": 1465 + }, + { + "ce_ib": 8.220247268676758, + "ce_orig": 0.6453145146369934, + "epoch": 0.42130994320224313, + "kl_loss": 0.14087852835655212, + "loss_ib": 0.0022308097686618567, + "step": 1465 + }, + { + "ce_ib": 6.629300594329834, + "ce_orig": 1.267394781112671, + "epoch": 0.42130994320224313, + "kl_loss": 0.07615265995264053, + "loss_ib": 0.0014244564808905125, + "step": 1465 + }, + { + "ce_ib": 4.753538131713867, + "ce_orig": 0.4846392571926117, + "epoch": 0.42130994320224313, + "kl_loss": 0.07593982666730881, + "loss_ib": 0.0012347520096227527, + "step": 1465 + }, + { + "ce_ib": 2.3658440113067627, + "ce_orig": 0.26698946952819824, + "epoch": 0.4215975267812208, + "kl_loss": 0.23476958274841309, + "loss_ib": 0.002584280213341117, + "step": 1466 + }, + { + "ce_ib": 3.0916457176208496, + "ce_orig": 0.5870619416236877, + "epoch": 0.4215975267812208, + "kl_loss": 0.07413533329963684, + "loss_ib": 0.0010505177779123187, + "step": 1466 + }, + { + "ce_ib": 5.169697284698486, + "ce_orig": 1.0351004600524902, + "epoch": 0.4215975267812208, + "kl_loss": 0.09100858867168427, + "loss_ib": 0.0014270555693656206, + "step": 1466 + }, + { + "ce_ib": 9.962007522583008, + "ce_orig": 1.7318432331085205, + "epoch": 0.4215975267812208, + "kl_loss": 0.20365050435066223, + "loss_ib": 0.003032705746591091, + "step": 1466 + }, + { + "ce_ib": 6.981663227081299, + "ce_orig": 0.7525008320808411, + "epoch": 0.42188511036019843, + "kl_loss": 0.16806934773921967, + "loss_ib": 0.0023788597900420427, + "step": 1467 + }, + { + "ce_ib": 4.773881912231445, + "ce_orig": 0.6788235306739807, + "epoch": 0.42188511036019843, + "kl_loss": 0.13860733807086945, + "loss_ib": 0.0018634615698829293, + "step": 1467 + }, + { + "ce_ib": 7.603765964508057, + "ce_orig": 0.9796539545059204, + "epoch": 0.42188511036019843, + "kl_loss": 0.09102034568786621, + "loss_ib": 0.0016705800080671906, + "step": 1467 + }, + { + "ce_ib": 4.365150451660156, + "ce_orig": 0.2997058629989624, + "epoch": 0.42188511036019843, + "kl_loss": 0.1265721321105957, + "loss_ib": 0.0017022363608703017, + "step": 1467 + }, + { + "ce_ib": 6.895716667175293, + "ce_orig": 1.152829885482788, + "epoch": 0.42217269393917606, + "kl_loss": 0.09192080050706863, + "loss_ib": 0.0016087796539068222, + "step": 1468 + }, + { + "ce_ib": 6.535571575164795, + "ce_orig": 0.9287649393081665, + "epoch": 0.42217269393917606, + "kl_loss": 0.15220746397972107, + "loss_ib": 0.0021756317000836134, + "step": 1468 + }, + { + "ce_ib": 3.891815423965454, + "ce_orig": 0.3581300973892212, + "epoch": 0.42217269393917606, + "kl_loss": 0.1889212429523468, + "loss_ib": 0.0022783938329666853, + "step": 1468 + }, + { + "ce_ib": 2.487725019454956, + "ce_orig": 0.4007653594017029, + "epoch": 0.42217269393917606, + "kl_loss": 0.23008421063423157, + "loss_ib": 0.0025496145244687796, + "step": 1468 + }, + { + "ce_ib": 4.7796525955200195, + "ce_orig": 0.8040958642959595, + "epoch": 0.42246027751815374, + "kl_loss": 0.11338557302951813, + "loss_ib": 0.0016118210041895509, + "step": 1469 + }, + { + "ce_ib": 5.699153900146484, + "ce_orig": 0.7264449000358582, + "epoch": 0.42246027751815374, + "kl_loss": 0.1126704216003418, + "loss_ib": 0.0016966195544227958, + "step": 1469 + }, + { + "ce_ib": 5.746955394744873, + "ce_orig": 0.7303146123886108, + "epoch": 0.42246027751815374, + "kl_loss": 0.08956634998321533, + "loss_ib": 0.001470358925871551, + "step": 1469 + }, + { + "ce_ib": 4.594834804534912, + "ce_orig": 0.4276607632637024, + "epoch": 0.42246027751815374, + "kl_loss": 0.30717772245407104, + "loss_ib": 0.003531260648742318, + "step": 1469 + }, + { + "epoch": 0.42274786109713136, + "grad_norm": 0.09342991560697556, + "learning_rate": 4.842374312499405e-05, + "loss": 0.8164, + "step": 1470 + }, + { + "ce_ib": 7.28029203414917, + "ce_orig": 1.1571022272109985, + "epoch": 0.42274786109713136, + "kl_loss": 0.11070867627859116, + "loss_ib": 0.0018351158360019326, + "step": 1470 + }, + { + "ce_ib": 3.672571897506714, + "ce_orig": 0.4727495312690735, + "epoch": 0.42274786109713136, + "kl_loss": 0.09738650918006897, + "loss_ib": 0.0013411222025752068, + "step": 1470 + }, + { + "ce_ib": 7.971317768096924, + "ce_orig": 1.2943000793457031, + "epoch": 0.42274786109713136, + "kl_loss": 0.13336224853992462, + "loss_ib": 0.002130754292011261, + "step": 1470 + }, + { + "ce_ib": 4.77113151550293, + "ce_orig": 0.7445866465568542, + "epoch": 0.42274786109713136, + "kl_loss": 0.07389845699071884, + "loss_ib": 0.0012160976184532046, + "step": 1470 + }, + { + "ce_ib": 6.376780986785889, + "ce_orig": 1.117029070854187, + "epoch": 0.423035444676109, + "kl_loss": 0.11569513380527496, + "loss_ib": 0.0017946293810382485, + "step": 1471 + }, + { + "ce_ib": 7.349169731140137, + "ce_orig": 1.4872691631317139, + "epoch": 0.423035444676109, + "kl_loss": 0.09389686584472656, + "loss_ib": 0.0016738855047151446, + "step": 1471 + }, + { + "ce_ib": 3.8162693977355957, + "ce_orig": 0.4793437421321869, + "epoch": 0.423035444676109, + "kl_loss": 0.1309661865234375, + "loss_ib": 0.0016912887804210186, + "step": 1471 + }, + { + "ce_ib": 9.036438941955566, + "ce_orig": 1.5297104120254517, + "epoch": 0.423035444676109, + "kl_loss": 0.12823426723480225, + "loss_ib": 0.0021859866101294756, + "step": 1471 + }, + { + "ce_ib": 4.2849249839782715, + "ce_orig": 0.8074371218681335, + "epoch": 0.4233230282550866, + "kl_loss": 0.11331785470247269, + "loss_ib": 0.0015616710297763348, + "step": 1472 + }, + { + "ce_ib": 6.1896491050720215, + "ce_orig": 0.9406371712684631, + "epoch": 0.4233230282550866, + "kl_loss": 0.09268368035554886, + "loss_ib": 0.0015458017587661743, + "step": 1472 + }, + { + "ce_ib": 5.543100833892822, + "ce_orig": 0.6453739404678345, + "epoch": 0.4233230282550866, + "kl_loss": 0.13290368020534515, + "loss_ib": 0.0018833468202501535, + "step": 1472 + }, + { + "ce_ib": 5.75667667388916, + "ce_orig": 0.6321004033088684, + "epoch": 0.4233230282550866, + "kl_loss": 0.1326783448457718, + "loss_ib": 0.0019024510402232409, + "step": 1472 + }, + { + "ce_ib": 4.381680965423584, + "ce_orig": 0.7493016123771667, + "epoch": 0.4236106118340643, + "kl_loss": 0.07192018628120422, + "loss_ib": 0.0011573699302971363, + "step": 1473 + }, + { + "ce_ib": 3.2324516773223877, + "ce_orig": 0.4105515480041504, + "epoch": 0.4236106118340643, + "kl_loss": 0.09375610947608948, + "loss_ib": 0.0012608063407242298, + "step": 1473 + }, + { + "ce_ib": 5.543922424316406, + "ce_orig": 0.5035163164138794, + "epoch": 0.4236106118340643, + "kl_loss": 0.17082586884498596, + "loss_ib": 0.0022626507561653852, + "step": 1473 + }, + { + "ce_ib": 8.907859802246094, + "ce_orig": 1.4666497707366943, + "epoch": 0.4236106118340643, + "kl_loss": 0.07999315112829208, + "loss_ib": 0.001690717414021492, + "step": 1473 + }, + { + "ce_ib": 6.575271129608154, + "ce_orig": 0.5658308267593384, + "epoch": 0.4238981954130419, + "kl_loss": 0.3247229754924774, + "loss_ib": 0.0039047568570822477, + "step": 1474 + }, + { + "ce_ib": 3.7774455547332764, + "ce_orig": 0.7648996114730835, + "epoch": 0.4238981954130419, + "kl_loss": 0.06962984800338745, + "loss_ib": 0.0010740429861471057, + "step": 1474 + }, + { + "ce_ib": 5.892938137054443, + "ce_orig": 0.8106856942176819, + "epoch": 0.4238981954130419, + "kl_loss": 0.12563520669937134, + "loss_ib": 0.0018456458346918225, + "step": 1474 + }, + { + "ce_ib": 6.486325740814209, + "ce_orig": 0.46294519305229187, + "epoch": 0.4238981954130419, + "kl_loss": 0.28996336460113525, + "loss_ib": 0.0035482661332935095, + "step": 1474 + }, + { + "epoch": 0.42418577899201954, + "grad_norm": 0.09277452528476715, + "learning_rate": 4.841015421595511e-05, + "loss": 0.8851, + "step": 1475 + }, + { + "ce_ib": 6.460165977478027, + "ce_orig": 1.3669565916061401, + "epoch": 0.42418577899201954, + "kl_loss": 0.10979809612035751, + "loss_ib": 0.0017439975636079907, + "step": 1475 + }, + { + "ce_ib": 5.125516414642334, + "ce_orig": 0.6385084390640259, + "epoch": 0.42418577899201954, + "kl_loss": 0.09865008294582367, + "loss_ib": 0.0014990525087341666, + "step": 1475 + }, + { + "ce_ib": 5.793264865875244, + "ce_orig": 0.972436785697937, + "epoch": 0.42418577899201954, + "kl_loss": 0.08221779763698578, + "loss_ib": 0.0014015043852850795, + "step": 1475 + }, + { + "ce_ib": 4.9234795570373535, + "ce_orig": 0.5103410482406616, + "epoch": 0.42418577899201954, + "kl_loss": 0.12610337138175964, + "loss_ib": 0.0017533815698698163, + "step": 1475 + }, + { + "ce_ib": 6.956478595733643, + "ce_orig": 0.7556769251823425, + "epoch": 0.4244733625709972, + "kl_loss": 0.08710888028144836, + "loss_ib": 0.0015667366096749902, + "step": 1476 + }, + { + "ce_ib": 4.732303142547607, + "ce_orig": 0.6311543583869934, + "epoch": 0.4244733625709972, + "kl_loss": 0.11427406221628189, + "loss_ib": 0.0016159708611667156, + "step": 1476 + }, + { + "ce_ib": 8.348984718322754, + "ce_orig": 1.5093717575073242, + "epoch": 0.4244733625709972, + "kl_loss": 0.13694071769714355, + "loss_ib": 0.0022043054923415184, + "step": 1476 + }, + { + "ce_ib": 8.642187118530273, + "ce_orig": 1.3205996751785278, + "epoch": 0.4244733625709972, + "kl_loss": 0.12525543570518494, + "loss_ib": 0.0021167730446904898, + "step": 1476 + }, + { + "ce_ib": 3.667728900909424, + "ce_orig": 0.6192456483840942, + "epoch": 0.42476094614997484, + "kl_loss": 0.07596628367900848, + "loss_ib": 0.0011264357017353177, + "step": 1477 + }, + { + "ce_ib": 5.8655500411987305, + "ce_orig": 0.716063916683197, + "epoch": 0.42476094614997484, + "kl_loss": 0.12598375976085663, + "loss_ib": 0.0018463925225660205, + "step": 1477 + }, + { + "ce_ib": 8.43331241607666, + "ce_orig": 1.214815378189087, + "epoch": 0.42476094614997484, + "kl_loss": 0.09582893550395966, + "loss_ib": 0.0018016205867752433, + "step": 1477 + }, + { + "ce_ib": 3.889341354370117, + "ce_orig": 0.9948440194129944, + "epoch": 0.42476094614997484, + "kl_loss": 0.05919331684708595, + "loss_ib": 0.0009808673057705164, + "step": 1477 + }, + { + "ce_ib": 6.710433483123779, + "ce_orig": 1.4087427854537964, + "epoch": 0.42504852972895246, + "kl_loss": 0.08481179177761078, + "loss_ib": 0.0015191611601039767, + "step": 1478 + }, + { + "ce_ib": 5.514074325561523, + "ce_orig": 0.6335970163345337, + "epoch": 0.42504852972895246, + "kl_loss": 0.14356637001037598, + "loss_ib": 0.0019870710093528032, + "step": 1478 + }, + { + "ce_ib": 6.760642051696777, + "ce_orig": 0.7269102931022644, + "epoch": 0.42504852972895246, + "kl_loss": 0.12913690507411957, + "loss_ib": 0.0019674331415444613, + "step": 1478 + }, + { + "ce_ib": 5.520792484283447, + "ce_orig": 0.8305492997169495, + "epoch": 0.42504852972895246, + "kl_loss": 0.09097446501255035, + "loss_ib": 0.0014618238201364875, + "step": 1478 + }, + { + "ce_ib": 7.382885456085205, + "ce_orig": 0.9886298775672913, + "epoch": 0.42533611330793014, + "kl_loss": 0.09761233627796173, + "loss_ib": 0.0017144118901342154, + "step": 1479 + }, + { + "ce_ib": 3.056349039077759, + "ce_orig": 0.44829756021499634, + "epoch": 0.42533611330793014, + "kl_loss": 0.1604885309934616, + "loss_ib": 0.0019105201354250312, + "step": 1479 + }, + { + "ce_ib": 4.808676719665527, + "ce_orig": 0.206171452999115, + "epoch": 0.42533611330793014, + "kl_loss": 0.16769975423812866, + "loss_ib": 0.0021578650921583176, + "step": 1479 + }, + { + "ce_ib": 3.821026563644409, + "ce_orig": 0.6127325892448425, + "epoch": 0.42533611330793014, + "kl_loss": 0.054024383425712585, + "loss_ib": 0.0009223464876413345, + "step": 1479 + }, + { + "epoch": 0.42562369688690777, + "grad_norm": 0.09873180091381073, + "learning_rate": 4.839650890642104e-05, + "loss": 0.863, + "step": 1480 + }, + { + "ce_ib": 5.923498153686523, + "ce_orig": 0.9746879935264587, + "epoch": 0.42562369688690777, + "kl_loss": 0.10275821387767792, + "loss_ib": 0.001619931892491877, + "step": 1480 + }, + { + "ce_ib": 5.409031391143799, + "ce_orig": 0.6504145264625549, + "epoch": 0.42562369688690777, + "kl_loss": 0.14318981766700745, + "loss_ib": 0.001972801284864545, + "step": 1480 + }, + { + "ce_ib": 5.768213272094727, + "ce_orig": 1.4572311639785767, + "epoch": 0.42562369688690777, + "kl_loss": 0.07351214438676834, + "loss_ib": 0.0013119427021592855, + "step": 1480 + }, + { + "ce_ib": 7.788840293884277, + "ce_orig": 1.3698606491088867, + "epoch": 0.42562369688690777, + "kl_loss": 0.086025670170784, + "loss_ib": 0.001639140653423965, + "step": 1480 + }, + { + "ce_ib": 6.753962516784668, + "ce_orig": 0.8590308427810669, + "epoch": 0.4259112804658854, + "kl_loss": 0.20286300778388977, + "loss_ib": 0.002704026410356164, + "step": 1481 + }, + { + "ce_ib": 6.63479471206665, + "ce_orig": 1.1812267303466797, + "epoch": 0.4259112804658854, + "kl_loss": 0.13220015168190002, + "loss_ib": 0.001985481008887291, + "step": 1481 + }, + { + "ce_ib": 5.007169246673584, + "ce_orig": 0.7880942821502686, + "epoch": 0.4259112804658854, + "kl_loss": 0.07814208418130875, + "loss_ib": 0.0012821377022191882, + "step": 1481 + }, + { + "ce_ib": 5.508900165557861, + "ce_orig": 0.8839089870452881, + "epoch": 0.4259112804658854, + "kl_loss": 0.12156786024570465, + "loss_ib": 0.0017665685154497623, + "step": 1481 + }, + { + "ce_ib": 8.623146057128906, + "ce_orig": 1.9662206172943115, + "epoch": 0.426198864044863, + "kl_loss": 0.10497733950614929, + "loss_ib": 0.0019120879005640745, + "step": 1482 + }, + { + "ce_ib": 5.697625160217285, + "ce_orig": 0.9555112719535828, + "epoch": 0.426198864044863, + "kl_loss": 0.10451681166887283, + "loss_ib": 0.0016149305738508701, + "step": 1482 + }, + { + "ce_ib": 6.330455303192139, + "ce_orig": 0.987522304058075, + "epoch": 0.426198864044863, + "kl_loss": 0.11000921577215195, + "loss_ib": 0.0017331376438960433, + "step": 1482 + }, + { + "ce_ib": 4.160543441772461, + "ce_orig": 0.27671387791633606, + "epoch": 0.426198864044863, + "kl_loss": 0.17393755912780762, + "loss_ib": 0.0021554299164563417, + "step": 1482 + }, + { + "ce_ib": 4.079864025115967, + "ce_orig": 1.0547031164169312, + "epoch": 0.4264864476238407, + "kl_loss": 0.06679850071668625, + "loss_ib": 0.0010759714059531689, + "step": 1483 + }, + { + "ce_ib": 3.6775362491607666, + "ce_orig": 0.5392860770225525, + "epoch": 0.4264864476238407, + "kl_loss": 0.1283085197210312, + "loss_ib": 0.0016508387634530663, + "step": 1483 + }, + { + "ce_ib": 5.188068866729736, + "ce_orig": 0.8815830945968628, + "epoch": 0.4264864476238407, + "kl_loss": 0.07818719744682312, + "loss_ib": 0.0013006788212805986, + "step": 1483 + }, + { + "ce_ib": 5.105358123779297, + "ce_orig": 0.7038376331329346, + "epoch": 0.4264864476238407, + "kl_loss": 0.10735287517309189, + "loss_ib": 0.0015840644482523203, + "step": 1483 + }, + { + "ce_ib": 3.7244279384613037, + "ce_orig": 0.7728710174560547, + "epoch": 0.4267740312028183, + "kl_loss": 0.0835123211145401, + "loss_ib": 0.001207565888762474, + "step": 1484 + }, + { + "ce_ib": 5.630345821380615, + "ce_orig": 0.779137372970581, + "epoch": 0.4267740312028183, + "kl_loss": 0.14693334698677063, + "loss_ib": 0.00203236797824502, + "step": 1484 + }, + { + "ce_ib": 4.299570560455322, + "ce_orig": 1.070716142654419, + "epoch": 0.4267740312028183, + "kl_loss": 0.0551174022257328, + "loss_ib": 0.0009811309864744544, + "step": 1484 + }, + { + "ce_ib": 5.630002498626709, + "ce_orig": 1.0188994407653809, + "epoch": 0.4267740312028183, + "kl_loss": 0.10909400880336761, + "loss_ib": 0.0016539403004571795, + "step": 1484 + }, + { + "epoch": 0.42706161478179594, + "grad_norm": 0.10764925181865692, + "learning_rate": 4.8382807229266583e-05, + "loss": 0.9377, + "step": 1485 + }, + { + "ce_ib": 8.589742660522461, + "ce_orig": 1.2172328233718872, + "epoch": 0.42706161478179594, + "kl_loss": 0.12123409658670425, + "loss_ib": 0.0020713151898235083, + "step": 1485 + }, + { + "ce_ib": 8.910140991210938, + "ce_orig": 0.8911302089691162, + "epoch": 0.42706161478179594, + "kl_loss": 0.1684650480747223, + "loss_ib": 0.002575664548203349, + "step": 1485 + }, + { + "ce_ib": 6.011847019195557, + "ce_orig": 1.0063878297805786, + "epoch": 0.42706161478179594, + "kl_loss": 0.13005661964416504, + "loss_ib": 0.0019017508020624518, + "step": 1485 + }, + { + "ce_ib": 3.877028703689575, + "ce_orig": 0.4349795877933502, + "epoch": 0.42706161478179594, + "kl_loss": 0.07950502634048462, + "loss_ib": 0.0011827531270682812, + "step": 1485 + }, + { + "ce_ib": 3.6345038414001465, + "ce_orig": 0.6873906850814819, + "epoch": 0.4273491983607736, + "kl_loss": 0.090648353099823, + "loss_ib": 0.0012699338840320706, + "step": 1486 + }, + { + "ce_ib": 3.8409159183502197, + "ce_orig": 0.5949610471725464, + "epoch": 0.4273491983607736, + "kl_loss": 0.07319621741771698, + "loss_ib": 0.0011160537833347917, + "step": 1486 + }, + { + "ce_ib": 3.623453140258789, + "ce_orig": 0.7704918384552002, + "epoch": 0.4273491983607736, + "kl_loss": 0.12237702310085297, + "loss_ib": 0.0015861154533922672, + "step": 1486 + }, + { + "ce_ib": 4.723622798919678, + "ce_orig": 0.487240195274353, + "epoch": 0.4273491983607736, + "kl_loss": 0.17967435717582703, + "loss_ib": 0.002269105752930045, + "step": 1486 + }, + { + "ce_ib": 3.312293529510498, + "ce_orig": 0.7620286345481873, + "epoch": 0.42763678193975124, + "kl_loss": 0.0706988275051117, + "loss_ib": 0.0010382175678387284, + "step": 1487 + }, + { + "ce_ib": 3.4072983264923096, + "ce_orig": 0.6434142589569092, + "epoch": 0.42763678193975124, + "kl_loss": 0.09892557561397552, + "loss_ib": 0.0013299855636432767, + "step": 1487 + }, + { + "ce_ib": 3.0196757316589355, + "ce_orig": 0.33321601152420044, + "epoch": 0.42763678193975124, + "kl_loss": 0.22260043025016785, + "loss_ib": 0.002527971751987934, + "step": 1487 + }, + { + "ce_ib": 7.22377347946167, + "ce_orig": 1.249780297279358, + "epoch": 0.42763678193975124, + "kl_loss": 0.0948280617594719, + "loss_ib": 0.0016706580063328147, + "step": 1487 + }, + { + "ce_ib": 6.389316082000732, + "ce_orig": 0.8940539956092834, + "epoch": 0.42792436551872887, + "kl_loss": 0.11902444809675217, + "loss_ib": 0.0018291760934516788, + "step": 1488 + }, + { + "ce_ib": 7.263855457305908, + "ce_orig": 0.7269363403320312, + "epoch": 0.42792436551872887, + "kl_loss": 0.13229572772979736, + "loss_ib": 0.002049342729151249, + "step": 1488 + }, + { + "ce_ib": 4.953929901123047, + "ce_orig": 1.0663037300109863, + "epoch": 0.42792436551872887, + "kl_loss": 0.06149699166417122, + "loss_ib": 0.001110362820327282, + "step": 1488 + }, + { + "ce_ib": 8.461508750915527, + "ce_orig": 1.434740662574768, + "epoch": 0.42792436551872887, + "kl_loss": 0.15363231301307678, + "loss_ib": 0.002382473787292838, + "step": 1488 + }, + { + "ce_ib": 4.225872039794922, + "ce_orig": 0.5934211611747742, + "epoch": 0.42821194909770655, + "kl_loss": 0.11377574503421783, + "loss_ib": 0.0015603447100147605, + "step": 1489 + }, + { + "ce_ib": 6.648335933685303, + "ce_orig": 1.411804437637329, + "epoch": 0.42821194909770655, + "kl_loss": 0.07212527096271515, + "loss_ib": 0.0013860863400623202, + "step": 1489 + }, + { + "ce_ib": 6.111649513244629, + "ce_orig": 0.8437939286231995, + "epoch": 0.42821194909770655, + "kl_loss": 0.06988925486803055, + "loss_ib": 0.0013100574724376202, + "step": 1489 + }, + { + "ce_ib": 3.841252326965332, + "ce_orig": 0.5593860149383545, + "epoch": 0.42821194909770655, + "kl_loss": 0.09108671545982361, + "loss_ib": 0.0012949923984706402, + "step": 1489 + }, + { + "epoch": 0.42849953267668417, + "grad_norm": 0.10317344963550568, + "learning_rate": 4.836904921750223e-05, + "loss": 0.8231, + "step": 1490 + }, + { + "ce_ib": 5.85347318649292, + "ce_orig": 1.0941354036331177, + "epoch": 0.42849953267668417, + "kl_loss": 0.09418511390686035, + "loss_ib": 0.0015271983575075865, + "step": 1490 + }, + { + "ce_ib": 4.57334041595459, + "ce_orig": 0.8032926917076111, + "epoch": 0.42849953267668417, + "kl_loss": 0.06270309537649155, + "loss_ib": 0.0010843649506568909, + "step": 1490 + }, + { + "ce_ib": 3.4598300457000732, + "ce_orig": 0.3299408555030823, + "epoch": 0.42849953267668417, + "kl_loss": 0.07001639157533646, + "loss_ib": 0.0010461468482390046, + "step": 1490 + }, + { + "ce_ib": 4.194902420043945, + "ce_orig": 0.5485158562660217, + "epoch": 0.42849953267668417, + "kl_loss": 0.06838849186897278, + "loss_ib": 0.001103375107049942, + "step": 1490 + }, + { + "ce_ib": 4.9279961585998535, + "ce_orig": 0.8593989610671997, + "epoch": 0.4287871162556618, + "kl_loss": 0.09136204421520233, + "loss_ib": 0.0014064200222492218, + "step": 1491 + }, + { + "ce_ib": 5.720012664794922, + "ce_orig": 1.2061270475387573, + "epoch": 0.4287871162556618, + "kl_loss": 0.1045733168721199, + "loss_ib": 0.0016177344368770719, + "step": 1491 + }, + { + "ce_ib": 3.831967353820801, + "ce_orig": 0.6567938327789307, + "epoch": 0.4287871162556618, + "kl_loss": 0.07468820363283157, + "loss_ib": 0.001130078686401248, + "step": 1491 + }, + { + "ce_ib": 3.877784013748169, + "ce_orig": 0.8739439249038696, + "epoch": 0.4287871162556618, + "kl_loss": 0.08189292252063751, + "loss_ib": 0.001206707675009966, + "step": 1491 + }, + { + "ce_ib": 4.748837471008301, + "ce_orig": 1.0048332214355469, + "epoch": 0.4290746998346394, + "kl_loss": 0.06992149353027344, + "loss_ib": 0.001174098695628345, + "step": 1492 + }, + { + "ce_ib": 4.89517879486084, + "ce_orig": 0.5884690880775452, + "epoch": 0.4290746998346394, + "kl_loss": 0.07238373160362244, + "loss_ib": 0.0012133552227169275, + "step": 1492 + }, + { + "ce_ib": 7.266257286071777, + "ce_orig": 1.2749794721603394, + "epoch": 0.4290746998346394, + "kl_loss": 0.20466673374176025, + "loss_ib": 0.0027732928283512592, + "step": 1492 + }, + { + "ce_ib": 5.501678466796875, + "ce_orig": 0.3813422918319702, + "epoch": 0.4290746998346394, + "kl_loss": 0.10594455152750015, + "loss_ib": 0.0016096133040264249, + "step": 1492 + }, + { + "ce_ib": 6.912803649902344, + "ce_orig": 0.9841716289520264, + "epoch": 0.4293622834136171, + "kl_loss": 0.2044220119714737, + "loss_ib": 0.0027355002239346504, + "step": 1493 + }, + { + "ce_ib": 4.345367908477783, + "ce_orig": 0.7242448329925537, + "epoch": 0.4293622834136171, + "kl_loss": 0.0738978162407875, + "loss_ib": 0.001173514872789383, + "step": 1493 + }, + { + "ce_ib": 7.523437976837158, + "ce_orig": 0.5210906863212585, + "epoch": 0.4293622834136171, + "kl_loss": 0.1008140817284584, + "loss_ib": 0.001760484534315765, + "step": 1493 + }, + { + "ce_ib": 4.733566761016846, + "ce_orig": 0.6056400537490845, + "epoch": 0.4293622834136171, + "kl_loss": 0.1043701171875, + "loss_ib": 0.0015170578844845295, + "step": 1493 + }, + { + "ce_ib": 5.4491868019104, + "ce_orig": 0.9924566745758057, + "epoch": 0.4296498669925947, + "kl_loss": 0.07613148540258408, + "loss_ib": 0.0013062335783615708, + "step": 1494 + }, + { + "ce_ib": 2.647188186645508, + "ce_orig": 0.4178062677383423, + "epoch": 0.4296498669925947, + "kl_loss": 0.09912779927253723, + "loss_ib": 0.0012559967581182718, + "step": 1494 + }, + { + "ce_ib": 3.5450642108917236, + "ce_orig": 0.34603291749954224, + "epoch": 0.4296498669925947, + "kl_loss": 0.07269264757633209, + "loss_ib": 0.0010814327979460359, + "step": 1494 + }, + { + "ce_ib": 2.828864336013794, + "ce_orig": 0.5859595537185669, + "epoch": 0.4296498669925947, + "kl_loss": 0.05315869301557541, + "loss_ib": 0.0008144733146764338, + "step": 1494 + }, + { + "epoch": 0.42993745057157234, + "grad_norm": 0.10054890066385269, + "learning_rate": 4.835523490427425e-05, + "loss": 0.8296, + "step": 1495 + }, + { + "ce_ib": 4.736669063568115, + "ce_orig": 0.564308226108551, + "epoch": 0.42993745057157234, + "kl_loss": 0.07203033566474915, + "loss_ib": 0.0011939702089875937, + "step": 1495 + }, + { + "ce_ib": 3.487304925918579, + "ce_orig": 0.5112748742103577, + "epoch": 0.42993745057157234, + "kl_loss": 0.052416346967220306, + "loss_ib": 0.0008728938992135227, + "step": 1495 + }, + { + "ce_ib": 6.845876216888428, + "ce_orig": 0.859950065612793, + "epoch": 0.42993745057157234, + "kl_loss": 0.12004132568836212, + "loss_ib": 0.0018850007327273488, + "step": 1495 + }, + { + "ce_ib": 6.047791481018066, + "ce_orig": 1.0694940090179443, + "epoch": 0.42993745057157234, + "kl_loss": 0.06923744082450867, + "loss_ib": 0.0012971535325050354, + "step": 1495 + }, + { + "ce_ib": 7.036633491516113, + "ce_orig": 1.3809123039245605, + "epoch": 0.43022503415055, + "kl_loss": 0.12386683374643326, + "loss_ib": 0.0019423315534368157, + "step": 1496 + }, + { + "ce_ib": 9.137833595275879, + "ce_orig": 1.7173395156860352, + "epoch": 0.43022503415055, + "kl_loss": 0.11944153904914856, + "loss_ib": 0.0021081985905766487, + "step": 1496 + }, + { + "ce_ib": 5.539783477783203, + "ce_orig": 0.6790186762809753, + "epoch": 0.43022503415055, + "kl_loss": 0.16711831092834473, + "loss_ib": 0.0022251615300774574, + "step": 1496 + }, + { + "ce_ib": 6.523403167724609, + "ce_orig": 0.668170154094696, + "epoch": 0.43022503415055, + "kl_loss": 0.09907539188861847, + "loss_ib": 0.0016430941177532077, + "step": 1496 + }, + { + "ce_ib": 8.092485427856445, + "ce_orig": 1.5751919746398926, + "epoch": 0.43051261772952765, + "kl_loss": 0.10649112612009048, + "loss_ib": 0.0018741597887128592, + "step": 1497 + }, + { + "ce_ib": 8.320467948913574, + "ce_orig": 1.3753570318222046, + "epoch": 0.43051261772952765, + "kl_loss": 0.11326869577169418, + "loss_ib": 0.0019647337030619383, + "step": 1497 + }, + { + "ce_ib": 7.601989269256592, + "ce_orig": 1.6063616275787354, + "epoch": 0.43051261772952765, + "kl_loss": 0.10248681157827377, + "loss_ib": 0.0017850670265033841, + "step": 1497 + }, + { + "ce_ib": 5.250172138214111, + "ce_orig": 1.012228012084961, + "epoch": 0.43051261772952765, + "kl_loss": 0.08977590501308441, + "loss_ib": 0.0014227762585505843, + "step": 1497 + }, + { + "ce_ib": 7.121866703033447, + "ce_orig": 1.2347811460494995, + "epoch": 0.43080020130850527, + "kl_loss": 0.14125066995620728, + "loss_ib": 0.0021246932446956635, + "step": 1498 + }, + { + "ce_ib": 3.8643674850463867, + "ce_orig": 0.6364955902099609, + "epoch": 0.43080020130850527, + "kl_loss": 0.08937310427427292, + "loss_ib": 0.0012801677221432328, + "step": 1498 + }, + { + "ce_ib": 5.398726940155029, + "ce_orig": 1.1411463022232056, + "epoch": 0.43080020130850527, + "kl_loss": 0.08773718774318695, + "loss_ib": 0.001417244435288012, + "step": 1498 + }, + { + "ce_ib": 5.65582799911499, + "ce_orig": 1.0903723239898682, + "epoch": 0.43080020130850527, + "kl_loss": 0.08431953191757202, + "loss_ib": 0.0014087781310081482, + "step": 1498 + }, + { + "ce_ib": 8.064273834228516, + "ce_orig": 1.591813564300537, + "epoch": 0.43108778488748295, + "kl_loss": 0.12314474582672119, + "loss_ib": 0.0020378746557980776, + "step": 1499 + }, + { + "ce_ib": 7.066220760345459, + "ce_orig": 1.3731032609939575, + "epoch": 0.43108778488748295, + "kl_loss": 0.09561625123023987, + "loss_ib": 0.0016627844888716936, + "step": 1499 + }, + { + "ce_ib": 6.4272074699401855, + "ce_orig": 0.6825188994407654, + "epoch": 0.43108778488748295, + "kl_loss": 0.18424586951732635, + "loss_ib": 0.002485179342329502, + "step": 1499 + }, + { + "ce_ib": 7.436380863189697, + "ce_orig": 1.1504343748092651, + "epoch": 0.43108778488748295, + "kl_loss": 0.13248597085475922, + "loss_ib": 0.002068497706204653, + "step": 1499 + }, + { + "epoch": 0.4313753684664606, + "grad_norm": 0.08262129127979279, + "learning_rate": 4.8341364322864523e-05, + "loss": 0.9576, + "step": 1500 + } + ], + "logging_steps": 5, + "max_steps": 10434, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}