| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 11.91962675448914, | |
| "global_step": 76000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.712319219376199e-06, | |
| "loss": 3.9787, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 3.446138381958008, | |
| "eval_runtime": 172.8011, | |
| "eval_samples_per_second": 65.578, | |
| "eval_steps_per_second": 16.395, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.7424638438752397e-05, | |
| "loss": 3.6486, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 3.351425886154175, | |
| "eval_runtime": 164.1827, | |
| "eval_samples_per_second": 69.021, | |
| "eval_steps_per_second": 17.255, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6136957658128598e-05, | |
| "loss": 3.5509, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 3.2961513996124268, | |
| "eval_runtime": 164.8921, | |
| "eval_samples_per_second": 68.724, | |
| "eval_steps_per_second": 17.181, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.4849276877504794e-05, | |
| "loss": 3.488, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 3.2645070552825928, | |
| "eval_runtime": 175.5668, | |
| "eval_samples_per_second": 64.545, | |
| "eval_steps_per_second": 16.136, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.356159609688099e-05, | |
| "loss": 3.4485, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 3.236461877822876, | |
| "eval_runtime": 172.1625, | |
| "eval_samples_per_second": 65.822, | |
| "eval_steps_per_second": 16.455, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.985484032435679e-05, | |
| "loss": 3.4183, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 3.213158130645752, | |
| "eval_runtime": 165.7563, | |
| "eval_samples_per_second": 68.365, | |
| "eval_steps_per_second": 17.091, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.9298672984727646e-05, | |
| "loss": 3.3437, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 3.199575185775757, | |
| "eval_runtime": 155.1508, | |
| "eval_samples_per_second": 73.039, | |
| "eval_steps_per_second": 18.26, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.87425056450985e-05, | |
| "loss": 3.2964, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 3.1807236671447754, | |
| "eval_runtime": 156.1445, | |
| "eval_samples_per_second": 72.574, | |
| "eval_steps_per_second": 18.143, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.8186338305469355e-05, | |
| "loss": 3.2813, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 3.1617496013641357, | |
| "eval_runtime": 169.4771, | |
| "eval_samples_per_second": 66.864, | |
| "eval_steps_per_second": 16.716, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.76301709658402e-05, | |
| "loss": 3.2586, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 3.14561128616333, | |
| "eval_runtime": 169.05, | |
| "eval_samples_per_second": 67.033, | |
| "eval_steps_per_second": 16.758, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.7074003626211057e-05, | |
| "loss": 3.248, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 3.1361846923828125, | |
| "eval_runtime": 165.9904, | |
| "eval_samples_per_second": 68.269, | |
| "eval_steps_per_second": 17.067, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 4.651783628658191e-05, | |
| "loss": 3.2413, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 3.1259961128234863, | |
| "eval_runtime": 167.4544, | |
| "eval_samples_per_second": 67.672, | |
| "eval_steps_per_second": 16.918, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.5961668946952765e-05, | |
| "loss": 3.1974, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 3.121722459793091, | |
| "eval_runtime": 167.3385, | |
| "eval_samples_per_second": 67.719, | |
| "eval_steps_per_second": 16.93, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 4.540550160732361e-05, | |
| "loss": 3.1014, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 3.1170709133148193, | |
| "eval_runtime": 166.2081, | |
| "eval_samples_per_second": 68.18, | |
| "eval_steps_per_second": 17.045, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 4.484933426769447e-05, | |
| "loss": 3.1037, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_loss": 3.1060075759887695, | |
| "eval_runtime": 163.3667, | |
| "eval_samples_per_second": 69.365, | |
| "eval_steps_per_second": 17.341, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 4.4293166928065315e-05, | |
| "loss": 3.1047, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 3.1029844284057617, | |
| "eval_runtime": 166.2417, | |
| "eval_samples_per_second": 68.166, | |
| "eval_steps_per_second": 17.041, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 4.3736999588436176e-05, | |
| "loss": 3.1004, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_loss": 3.0984201431274414, | |
| "eval_runtime": 156.2961, | |
| "eval_samples_per_second": 72.503, | |
| "eval_steps_per_second": 18.126, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 4.3180832248807023e-05, | |
| "loss": 3.0932, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 3.0867185592651367, | |
| "eval_runtime": 163.2041, | |
| "eval_samples_per_second": 69.435, | |
| "eval_steps_per_second": 17.359, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.262466490917788e-05, | |
| "loss": 3.0966, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 3.0811350345611572, | |
| "eval_runtime": 165.6977, | |
| "eval_samples_per_second": 68.39, | |
| "eval_steps_per_second": 17.097, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 4.2068497569548725e-05, | |
| "loss": 2.9921, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "eval_loss": 3.089102029800415, | |
| "eval_runtime": 158.578, | |
| "eval_samples_per_second": 71.46, | |
| "eval_steps_per_second": 17.865, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 4.151233022991958e-05, | |
| "loss": 2.9753, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "eval_loss": 3.0833535194396973, | |
| "eval_runtime": 156.4016, | |
| "eval_samples_per_second": 72.454, | |
| "eval_steps_per_second": 18.114, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 4.0956162890290434e-05, | |
| "loss": 2.9872, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_loss": 3.0813159942626953, | |
| "eval_runtime": 154.7129, | |
| "eval_samples_per_second": 73.245, | |
| "eval_steps_per_second": 18.311, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 4.039999555066128e-05, | |
| "loss": 2.9916, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "eval_loss": 3.069411516189575, | |
| "eval_runtime": 157.7992, | |
| "eval_samples_per_second": 71.813, | |
| "eval_steps_per_second": 17.953, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 3.9843828211032136e-05, | |
| "loss": 2.9885, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "eval_loss": 3.0638270378112793, | |
| "eval_runtime": 155.9919, | |
| "eval_samples_per_second": 72.645, | |
| "eval_steps_per_second": 18.161, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 3.928766087140299e-05, | |
| "loss": 2.9925, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "eval_loss": 3.0645134449005127, | |
| "eval_runtime": 154.9825, | |
| "eval_samples_per_second": 73.118, | |
| "eval_steps_per_second": 18.279, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.8731493531773845e-05, | |
| "loss": 2.9338, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_loss": 3.072267770767212, | |
| "eval_runtime": 160.362, | |
| "eval_samples_per_second": 70.665, | |
| "eval_steps_per_second": 17.666, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 3.817532619214469e-05, | |
| "loss": 2.8808, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "eval_loss": 3.0753729343414307, | |
| "eval_runtime": 154.306, | |
| "eval_samples_per_second": 73.439, | |
| "eval_steps_per_second": 18.36, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 3.7619158852515547e-05, | |
| "loss": 2.8947, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "eval_loss": 3.0727920532226562, | |
| "eval_runtime": 155.5666, | |
| "eval_samples_per_second": 72.843, | |
| "eval_steps_per_second": 18.211, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 3.7062991512886394e-05, | |
| "loss": 2.8972, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "eval_loss": 3.0629560947418213, | |
| "eval_runtime": 155.1806, | |
| "eval_samples_per_second": 73.025, | |
| "eval_steps_per_second": 18.256, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.6506824173257255e-05, | |
| "loss": 2.8965, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "eval_loss": 3.063514471054077, | |
| "eval_runtime": 154.5456, | |
| "eval_samples_per_second": 73.325, | |
| "eval_steps_per_second": 18.331, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 3.59506568336281e-05, | |
| "loss": 2.9067, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_loss": 3.055640697479248, | |
| "eval_runtime": 154.9311, | |
| "eval_samples_per_second": 73.142, | |
| "eval_steps_per_second": 18.286, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 3.539448949399896e-05, | |
| "loss": 2.8863, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "eval_loss": 3.0722622871398926, | |
| "eval_runtime": 154.3548, | |
| "eval_samples_per_second": 73.415, | |
| "eval_steps_per_second": 18.354, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 3.4838322154369805e-05, | |
| "loss": 2.8005, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "eval_loss": 3.072225332260132, | |
| "eval_runtime": 168.3304, | |
| "eval_samples_per_second": 67.32, | |
| "eval_steps_per_second": 16.83, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 3.4282154814740666e-05, | |
| "loss": 2.8136, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_loss": 3.067979097366333, | |
| "eval_runtime": 164.2522, | |
| "eval_samples_per_second": 68.991, | |
| "eval_steps_per_second": 17.248, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 3.3725987475111513e-05, | |
| "loss": 2.8092, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "eval_loss": 3.0639047622680664, | |
| "eval_runtime": 167.1507, | |
| "eval_samples_per_second": 67.795, | |
| "eval_steps_per_second": 16.949, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 3.316982013548237e-05, | |
| "loss": 2.8186, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "eval_loss": 3.0562849044799805, | |
| "eval_runtime": 158.9079, | |
| "eval_samples_per_second": 71.312, | |
| "eval_steps_per_second": 17.828, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 3.2613652795853215e-05, | |
| "loss": 2.8306, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "eval_loss": 3.0535264015197754, | |
| "eval_runtime": 150.4043, | |
| "eval_samples_per_second": 75.344, | |
| "eval_steps_per_second": 18.836, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 3.205748545622407e-05, | |
| "loss": 2.8327, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "eval_loss": 3.0540544986724854, | |
| "eval_runtime": 159.9431, | |
| "eval_samples_per_second": 70.85, | |
| "eval_steps_per_second": 17.713, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 3.1501318116594924e-05, | |
| "loss": 2.7548, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "eval_loss": 3.068983554840088, | |
| "eval_runtime": 157.2439, | |
| "eval_samples_per_second": 72.066, | |
| "eval_steps_per_second": 18.017, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 3.094515077696578e-05, | |
| "loss": 2.7369, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "eval_loss": 3.067845344543457, | |
| "eval_runtime": 151.1866, | |
| "eval_samples_per_second": 74.954, | |
| "eval_steps_per_second": 18.738, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 3.0388983437336626e-05, | |
| "loss": 2.7471, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "eval_loss": 3.063314914703369, | |
| "eval_runtime": 150.8738, | |
| "eval_samples_per_second": 75.109, | |
| "eval_steps_per_second": 18.777, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 2.9832816097707477e-05, | |
| "loss": 2.7576, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "eval_loss": 3.0629308223724365, | |
| "eval_runtime": 170.5929, | |
| "eval_samples_per_second": 66.427, | |
| "eval_steps_per_second": 16.607, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 2.9276648758078335e-05, | |
| "loss": 2.7566, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "eval_loss": 3.0592966079711914, | |
| "eval_runtime": 152.4939, | |
| "eval_samples_per_second": 74.311, | |
| "eval_steps_per_second": 18.578, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 2.8720481418449186e-05, | |
| "loss": 2.7642, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "eval_loss": 3.055969715118408, | |
| "eval_runtime": 152.0719, | |
| "eval_samples_per_second": 74.517, | |
| "eval_steps_per_second": 18.629, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 2.8164314078820037e-05, | |
| "loss": 2.7268, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "eval_loss": 3.0751819610595703, | |
| "eval_runtime": 150.8723, | |
| "eval_samples_per_second": 75.11, | |
| "eval_steps_per_second": 18.777, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 2.7608146739190888e-05, | |
| "loss": 2.6778, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "eval_loss": 3.0745816230773926, | |
| "eval_runtime": 150.773, | |
| "eval_samples_per_second": 75.159, | |
| "eval_steps_per_second": 18.79, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 2.705197939956174e-05, | |
| "loss": 2.6895, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "eval_loss": 3.071889877319336, | |
| "eval_runtime": 151.4074, | |
| "eval_samples_per_second": 74.844, | |
| "eval_steps_per_second": 18.711, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 2.6495812059932596e-05, | |
| "loss": 2.6908, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "eval_loss": 3.0682897567749023, | |
| "eval_runtime": 150.9084, | |
| "eval_samples_per_second": 75.092, | |
| "eval_steps_per_second": 18.773, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 2.5939644720303447e-05, | |
| "loss": 2.6985, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "eval_loss": 3.06658935546875, | |
| "eval_runtime": 151.6999, | |
| "eval_samples_per_second": 74.7, | |
| "eval_steps_per_second": 18.675, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 2.5383477380674298e-05, | |
| "loss": 2.6969, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "eval_loss": 3.0591042041778564, | |
| "eval_runtime": 161.1961, | |
| "eval_samples_per_second": 70.299, | |
| "eval_steps_per_second": 17.575, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2.4827310041045153e-05, | |
| "loss": 2.7043, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 3.0611181259155273, | |
| "eval_runtime": 152.3504, | |
| "eval_samples_per_second": 74.381, | |
| "eval_steps_per_second": 18.595, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 2.4271142701416004e-05, | |
| "loss": 2.6225, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "eval_loss": 3.080932378768921, | |
| "eval_runtime": 150.5999, | |
| "eval_samples_per_second": 75.246, | |
| "eval_steps_per_second": 18.811, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 2.3714975361786858e-05, | |
| "loss": 2.636, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "eval_loss": 3.0787675380706787, | |
| "eval_runtime": 152.1353, | |
| "eval_samples_per_second": 74.486, | |
| "eval_steps_per_second": 18.622, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 2.315880802215771e-05, | |
| "loss": 2.6379, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "eval_loss": 3.0813567638397217, | |
| "eval_runtime": 150.7408, | |
| "eval_samples_per_second": 75.175, | |
| "eval_steps_per_second": 18.794, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "learning_rate": 2.260264068252856e-05, | |
| "loss": 2.6423, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "eval_loss": 3.0757715702056885, | |
| "eval_runtime": 151.7054, | |
| "eval_samples_per_second": 74.697, | |
| "eval_steps_per_second": 18.674, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 2.2046473342899414e-05, | |
| "loss": 2.6417, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "eval_loss": 3.074010133743286, | |
| "eval_runtime": 151.6069, | |
| "eval_samples_per_second": 74.746, | |
| "eval_steps_per_second": 18.686, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 2.1490306003270265e-05, | |
| "loss": 2.6507, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "eval_loss": 3.07344388961792, | |
| "eval_runtime": 158.2388, | |
| "eval_samples_per_second": 71.613, | |
| "eval_steps_per_second": 17.903, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 2.0934138663641116e-05, | |
| "loss": 2.6058, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "eval_loss": 3.086090087890625, | |
| "eval_runtime": 151.6749, | |
| "eval_samples_per_second": 74.712, | |
| "eval_steps_per_second": 18.678, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 2.0377971324011967e-05, | |
| "loss": 2.5833, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "eval_loss": 3.086378812789917, | |
| "eval_runtime": 150.5584, | |
| "eval_samples_per_second": 75.266, | |
| "eval_steps_per_second": 18.817, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 1.982180398438282e-05, | |
| "loss": 2.5864, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "eval_loss": 3.081770896911621, | |
| "eval_runtime": 152.7908, | |
| "eval_samples_per_second": 74.167, | |
| "eval_steps_per_second": 18.542, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 1.9265636644753672e-05, | |
| "loss": 2.5952, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "eval_loss": 3.0846848487854004, | |
| "eval_runtime": 158.7675, | |
| "eval_samples_per_second": 71.375, | |
| "eval_steps_per_second": 17.844, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 1.8709469305124527e-05, | |
| "loss": 2.6003, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "eval_loss": 3.0796427726745605, | |
| "eval_runtime": 151.7749, | |
| "eval_samples_per_second": 74.663, | |
| "eval_steps_per_second": 18.666, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 1.8153301965495378e-05, | |
| "loss": 2.6024, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "eval_loss": 3.076544761657715, | |
| "eval_runtime": 150.8625, | |
| "eval_samples_per_second": 75.115, | |
| "eval_steps_per_second": 18.779, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "learning_rate": 1.7597134625866232e-05, | |
| "loss": 2.5883, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "eval_loss": 3.0901451110839844, | |
| "eval_runtime": 150.7672, | |
| "eval_samples_per_second": 75.162, | |
| "eval_steps_per_second": 18.791, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "learning_rate": 1.7040967286237083e-05, | |
| "loss": 2.5393, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "eval_loss": 3.0962793827056885, | |
| "eval_runtime": 158.5186, | |
| "eval_samples_per_second": 71.487, | |
| "eval_steps_per_second": 17.872, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "learning_rate": 1.6484799946607937e-05, | |
| "loss": 2.5485, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "eval_loss": 3.0939271450042725, | |
| "eval_runtime": 150.6091, | |
| "eval_samples_per_second": 75.241, | |
| "eval_steps_per_second": 18.81, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 1.5928632606978788e-05, | |
| "loss": 2.5496, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "eval_loss": 3.092724084854126, | |
| "eval_runtime": 151.4272, | |
| "eval_samples_per_second": 74.835, | |
| "eval_steps_per_second": 18.709, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "learning_rate": 1.5372465267349643e-05, | |
| "loss": 2.5577, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "eval_loss": 3.0966575145721436, | |
| "eval_runtime": 152.0136, | |
| "eval_samples_per_second": 74.546, | |
| "eval_steps_per_second": 18.636, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "learning_rate": 1.4816297927720494e-05, | |
| "loss": 2.5598, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "eval_loss": 3.091947078704834, | |
| "eval_runtime": 150.5612, | |
| "eval_samples_per_second": 75.265, | |
| "eval_steps_per_second": 18.816, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 1.4260130588091345e-05, | |
| "loss": 2.5623, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "eval_loss": 3.090240955352783, | |
| "eval_runtime": 151.6176, | |
| "eval_samples_per_second": 74.741, | |
| "eval_steps_per_second": 18.685, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "learning_rate": 1.3703963248462199e-05, | |
| "loss": 2.5138, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "eval_loss": 3.103158473968506, | |
| "eval_runtime": 150.096, | |
| "eval_samples_per_second": 75.498, | |
| "eval_steps_per_second": 18.875, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "learning_rate": 1.314779590883305e-05, | |
| "loss": 2.5142, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "eval_loss": 3.1014111042022705, | |
| "eval_runtime": 151.6463, | |
| "eval_samples_per_second": 74.727, | |
| "eval_steps_per_second": 18.682, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "learning_rate": 1.2591628569203902e-05, | |
| "loss": 2.514, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "eval_loss": 3.1006739139556885, | |
| "eval_runtime": 158.2805, | |
| "eval_samples_per_second": 71.594, | |
| "eval_steps_per_second": 17.899, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 1.2035461229574755e-05, | |
| "loss": 2.5206, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "eval_loss": 3.1010029315948486, | |
| "eval_runtime": 150.3118, | |
| "eval_samples_per_second": 75.39, | |
| "eval_steps_per_second": 18.847, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "learning_rate": 1.1479293889945606e-05, | |
| "loss": 2.52, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "eval_loss": 3.098405361175537, | |
| "eval_runtime": 156.5194, | |
| "eval_samples_per_second": 72.4, | |
| "eval_steps_per_second": 18.1, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "learning_rate": 1.0923126550316459e-05, | |
| "loss": 2.5204, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "eval_loss": 3.0984325408935547, | |
| "eval_runtime": 151.2164, | |
| "eval_samples_per_second": 74.939, | |
| "eval_steps_per_second": 18.735, | |
| "step": 76000 | |
| } | |
| ], | |
| "max_steps": 95640, | |
| "num_train_epochs": 15, | |
| "total_flos": 1.1223588676956365e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |