| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9732360097323601, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.019464720194647202, | |
| "grad_norm": 3.141327142715454, | |
| "learning_rate": 1.964980544747082e-05, | |
| "loss": 0.8378, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.038929440389294405, | |
| "grad_norm": 3.0160133838653564, | |
| "learning_rate": 1.926070038910506e-05, | |
| "loss": 0.6845, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 2.803372383117676, | |
| "learning_rate": 1.8871595330739302e-05, | |
| "loss": 0.7152, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07785888077858881, | |
| "grad_norm": 3.0655837059020996, | |
| "learning_rate": 1.8482490272373545e-05, | |
| "loss": 0.7378, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09732360097323602, | |
| "grad_norm": 3.110929250717163, | |
| "learning_rate": 1.8093385214007784e-05, | |
| "loss": 0.7893, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 2.8148601055145264, | |
| "learning_rate": 1.7704280155642024e-05, | |
| "loss": 0.6094, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1362530413625304, | |
| "grad_norm": 3.101245164871216, | |
| "learning_rate": 1.7315175097276267e-05, | |
| "loss": 0.7711, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15571776155717762, | |
| "grad_norm": 3.2842319011688232, | |
| "learning_rate": 1.6926070038910507e-05, | |
| "loss": 0.8186, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 3.251784563064575, | |
| "learning_rate": 1.6536964980544746e-05, | |
| "loss": 0.7434, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19464720194647203, | |
| "grad_norm": 2.3745484352111816, | |
| "learning_rate": 1.614785992217899e-05, | |
| "loss": 0.7038, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2141119221411192, | |
| "grad_norm": 3.1335856914520264, | |
| "learning_rate": 1.575875486381323e-05, | |
| "loss": 0.6951, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 2.7429134845733643, | |
| "learning_rate": 1.5369649805447472e-05, | |
| "loss": 0.7729, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25304136253041365, | |
| "grad_norm": 2.9923152923583984, | |
| "learning_rate": 1.4980544747081713e-05, | |
| "loss": 0.8028, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2725060827250608, | |
| "grad_norm": 2.421699047088623, | |
| "learning_rate": 1.4591439688715954e-05, | |
| "loss": 0.7149, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 2.9223577976226807, | |
| "learning_rate": 1.4202334630350196e-05, | |
| "loss": 0.6443, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.31143552311435524, | |
| "grad_norm": 2.8819730281829834, | |
| "learning_rate": 1.3813229571984437e-05, | |
| "loss": 0.6637, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3309002433090024, | |
| "grad_norm": 4.547940254211426, | |
| "learning_rate": 1.3424124513618676e-05, | |
| "loss": 0.7544, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 2.5568296909332275, | |
| "learning_rate": 1.303501945525292e-05, | |
| "loss": 0.7072, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.36982968369829683, | |
| "grad_norm": 3.3708319664001465, | |
| "learning_rate": 1.264591439688716e-05, | |
| "loss": 0.6297, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.38929440389294406, | |
| "grad_norm": 2.7162747383117676, | |
| "learning_rate": 1.2256809338521402e-05, | |
| "loss": 0.7005, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 2.8566107749938965, | |
| "learning_rate": 1.1867704280155643e-05, | |
| "loss": 0.685, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4282238442822384, | |
| "grad_norm": 2.8077552318573, | |
| "learning_rate": 1.1478599221789883e-05, | |
| "loss": 0.6693, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.44768856447688565, | |
| "grad_norm": 2.4068663120269775, | |
| "learning_rate": 1.1089494163424126e-05, | |
| "loss": 0.7102, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 2.695136547088623, | |
| "learning_rate": 1.0700389105058367e-05, | |
| "loss": 0.627, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.48661800486618007, | |
| "grad_norm": 3.525002956390381, | |
| "learning_rate": 1.0311284046692607e-05, | |
| "loss": 0.8356, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5060827250608273, | |
| "grad_norm": 2.570307970046997, | |
| "learning_rate": 9.92217898832685e-06, | |
| "loss": 0.6801, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 4.270814418792725, | |
| "learning_rate": 9.533073929961091e-06, | |
| "loss": 0.69, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5450121654501217, | |
| "grad_norm": 2.613346576690674, | |
| "learning_rate": 9.14396887159533e-06, | |
| "loss": 0.7642, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5644768856447688, | |
| "grad_norm": 2.4436159133911133, | |
| "learning_rate": 8.754863813229572e-06, | |
| "loss": 0.6094, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 2.36502742767334, | |
| "learning_rate": 8.365758754863815e-06, | |
| "loss": 0.8154, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6034063260340633, | |
| "grad_norm": 3.1881520748138428, | |
| "learning_rate": 7.976653696498056e-06, | |
| "loss": 0.7275, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6228710462287105, | |
| "grad_norm": 4.3175506591796875, | |
| "learning_rate": 7.587548638132296e-06, | |
| "loss": 0.7597, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 2.3517050743103027, | |
| "learning_rate": 7.198443579766538e-06, | |
| "loss": 0.6347, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6618004866180048, | |
| "grad_norm": 2.738858938217163, | |
| "learning_rate": 6.809338521400779e-06, | |
| "loss": 0.6762, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.681265206812652, | |
| "grad_norm": 2.875246524810791, | |
| "learning_rate": 6.4202334630350205e-06, | |
| "loss": 0.7455, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 2.4450690746307373, | |
| "learning_rate": 6.031128404669261e-06, | |
| "loss": 0.6319, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7201946472019465, | |
| "grad_norm": 2.8268823623657227, | |
| "learning_rate": 5.642023346303502e-06, | |
| "loss": 0.727, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7396593673965937, | |
| "grad_norm": 2.7376480102539062, | |
| "learning_rate": 5.2529182879377435e-06, | |
| "loss": 0.715, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 2.594489812850952, | |
| "learning_rate": 4.863813229571985e-06, | |
| "loss": 0.6903, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7785888077858881, | |
| "grad_norm": 2.7169930934906006, | |
| "learning_rate": 4.474708171206226e-06, | |
| "loss": 0.6588, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7980535279805353, | |
| "grad_norm": 2.7620320320129395, | |
| "learning_rate": 4.085603112840467e-06, | |
| "loss": 0.6841, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 2.7458746433258057, | |
| "learning_rate": 3.6964980544747086e-06, | |
| "loss": 0.6749, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8369829683698297, | |
| "grad_norm": 2.521542549133301, | |
| "learning_rate": 3.3073929961089495e-06, | |
| "loss": 0.6442, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8564476885644768, | |
| "grad_norm": 2.5402560234069824, | |
| "learning_rate": 2.918287937743191e-06, | |
| "loss": 0.6811, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 2.5818333625793457, | |
| "learning_rate": 2.529182879377432e-06, | |
| "loss": 0.6693, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8953771289537713, | |
| "grad_norm": 2.597168207168579, | |
| "learning_rate": 2.1400778210116734e-06, | |
| "loss": 0.6262, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9148418491484185, | |
| "grad_norm": 2.494330883026123, | |
| "learning_rate": 1.7509727626459144e-06, | |
| "loss": 0.6529, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 2.4246292114257812, | |
| "learning_rate": 1.3618677042801557e-06, | |
| "loss": 0.6761, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9537712895377128, | |
| "grad_norm": 3.0480525493621826, | |
| "learning_rate": 9.72762645914397e-07, | |
| "loss": 0.6473, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9732360097323601, | |
| "grad_norm": 2.6482596397399902, | |
| "learning_rate": 5.836575875486382e-07, | |
| "loss": 0.6354, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 514, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.674366707918438e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |