| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.99889339727038, | |
| "eval_steps": 50, | |
| "global_step": 677, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "acc": 0.85936797, | |
| "epoch": 0.0014754703061600886, | |
| "grad_norm": 7.874454151515785, | |
| "learning_rate": 0.0, | |
| "loss": 0.68658942, | |
| "memory(GiB)": 24.89, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.03037 | |
| }, | |
| { | |
| "acc": 0.84321463, | |
| "epoch": 0.0073773515308004425, | |
| "grad_norm": 8.79654818500605, | |
| "learning_rate": 7.628557760232497e-07, | |
| "loss": 0.79017758, | |
| "memory(GiB)": 31.87, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.092709 | |
| }, | |
| { | |
| "acc": 0.85256624, | |
| "epoch": 0.014754703061600885, | |
| "grad_norm": 8.005772072681205, | |
| "learning_rate": 1.0913998759473501e-06, | |
| "loss": 0.70760584, | |
| "memory(GiB)": 33.75, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.120868 | |
| }, | |
| { | |
| "acc": 0.85825052, | |
| "epoch": 0.022132054592401328, | |
| "grad_norm": 4.861872738410458, | |
| "learning_rate": 1.2835858542361333e-06, | |
| "loss": 0.64002485, | |
| "memory(GiB)": 33.01, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.137764 | |
| }, | |
| { | |
| "acc": 0.8677763, | |
| "epoch": 0.02950940612320177, | |
| "grad_norm": 2.624090927434735, | |
| "learning_rate": 1.4199439758714505e-06, | |
| "loss": 0.5428031, | |
| "memory(GiB)": 34.84, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.148523 | |
| }, | |
| { | |
| "acc": 0.88262272, | |
| "epoch": 0.03688675765400221, | |
| "grad_norm": 2.2979293864903276, | |
| "learning_rate": 1.5257115520464994e-06, | |
| "loss": 0.45293074, | |
| "memory(GiB)": 31.42, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.152816 | |
| }, | |
| { | |
| "acc": 0.88684368, | |
| "epoch": 0.044264109184802655, | |
| "grad_norm": 2.321279166108657, | |
| "learning_rate": 1.6121299541602339e-06, | |
| "loss": 0.44487882, | |
| "memory(GiB)": 34.17, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.158226 | |
| }, | |
| { | |
| "acc": 0.88785019, | |
| "epoch": 0.0516414607156031, | |
| "grad_norm": 1.6462078924259171, | |
| "learning_rate": 1.6851956720581583e-06, | |
| "loss": 0.42431307, | |
| "memory(GiB)": 33.89, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.160915 | |
| }, | |
| { | |
| "acc": 0.88771706, | |
| "epoch": 0.05901881224640354, | |
| "grad_norm": 2.0535907435541323, | |
| "learning_rate": 1.7484880757955508e-06, | |
| "loss": 0.41692309, | |
| "memory(GiB)": 33.45, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.162212 | |
| }, | |
| { | |
| "acc": 0.89934006, | |
| "epoch": 0.06639616377720399, | |
| "grad_norm": 1.880024272875225, | |
| "learning_rate": 1.8043159324490168e-06, | |
| "loss": 0.37824535, | |
| "memory(GiB)": 32.49, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.164895 | |
| }, | |
| { | |
| "acc": 0.89317064, | |
| "epoch": 0.07377351530800443, | |
| "grad_norm": 2.4862794709135483, | |
| "learning_rate": 1.8542556519706e-06, | |
| "loss": 0.39434323, | |
| "memory(GiB)": 31.37, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.166039 | |
| }, | |
| { | |
| "epoch": 0.07377351530800443, | |
| "eval_acc": 0.8897788969852836, | |
| "eval_loss": 0.3586576581001282, | |
| "eval_runtime": 9.1458, | |
| "eval_samples_per_second": 23.836, | |
| "eval_steps_per_second": 3.062, | |
| "step": 50 | |
| }, | |
| { | |
| "acc": 0.90738754, | |
| "epoch": 0.08115086683880487, | |
| "grad_norm": 1.818011862869067, | |
| "learning_rate": 1.8994316234174147e-06, | |
| "loss": 0.34018734, | |
| "memory(GiB)": 43.99, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.163069 | |
| }, | |
| { | |
| "acc": 0.89877386, | |
| "epoch": 0.08852821836960531, | |
| "grad_norm": 2.769061395622785, | |
| "learning_rate": 1.940674054084334e-06, | |
| "loss": 0.3834722, | |
| "memory(GiB)": 33.18, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.163587 | |
| }, | |
| { | |
| "acc": 0.89560518, | |
| "epoch": 0.09590556990040576, | |
| "grad_norm": 3.0254291124967776, | |
| "learning_rate": 1.9786134125433064e-06, | |
| "loss": 0.40774279, | |
| "memory(GiB)": 36.96, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.163438 | |
| }, | |
| { | |
| "acc": 0.90745316, | |
| "epoch": 0.1032829214312062, | |
| "grad_norm": 1.9702664127406297, | |
| "learning_rate": 1.998444790046656e-06, | |
| "loss": 0.34646974, | |
| "memory(GiB)": 33.91, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.165839 | |
| }, | |
| { | |
| "acc": 0.90453644, | |
| "epoch": 0.11066027296200664, | |
| "grad_norm": 1.956498769069037, | |
| "learning_rate": 1.990668740279938e-06, | |
| "loss": 0.34771657, | |
| "memory(GiB)": 32.4, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.166283 | |
| }, | |
| { | |
| "acc": 0.90620461, | |
| "epoch": 0.11803762449280708, | |
| "grad_norm": 1.7929520466502804, | |
| "learning_rate": 1.9828926905132194e-06, | |
| "loss": 0.34979777, | |
| "memory(GiB)": 32.69, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.166045 | |
| }, | |
| { | |
| "acc": 0.90826426, | |
| "epoch": 0.12541497602360752, | |
| "grad_norm": 2.255532399806791, | |
| "learning_rate": 1.975116640746501e-06, | |
| "loss": 0.34021211, | |
| "memory(GiB)": 32.39, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.16736 | |
| }, | |
| { | |
| "acc": 0.90400352, | |
| "epoch": 0.13279232755440798, | |
| "grad_norm": 1.606426887028717, | |
| "learning_rate": 1.9673405909797823e-06, | |
| "loss": 0.3593976, | |
| "memory(GiB)": 33.28, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.166086 | |
| }, | |
| { | |
| "acc": 0.90273075, | |
| "epoch": 0.14016967908520842, | |
| "grad_norm": 1.7550090784719037, | |
| "learning_rate": 1.959564541213064e-06, | |
| "loss": 0.34527693, | |
| "memory(GiB)": 32.74, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.167937 | |
| }, | |
| { | |
| "acc": 0.90631161, | |
| "epoch": 0.14754703061600885, | |
| "grad_norm": 2.151177976553762, | |
| "learning_rate": 1.9517884914463452e-06, | |
| "loss": 0.34601164, | |
| "memory(GiB)": 34.44, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.167745 | |
| }, | |
| { | |
| "epoch": 0.14754703061600885, | |
| "eval_acc": 0.8985658665523646, | |
| "eval_loss": 0.3217943012714386, | |
| "eval_runtime": 9.0118, | |
| "eval_samples_per_second": 24.19, | |
| "eval_steps_per_second": 3.107, | |
| "step": 100 | |
| }, | |
| { | |
| "acc": 0.90445766, | |
| "epoch": 0.1549243821468093, | |
| "grad_norm": 2.0562867995030527, | |
| "learning_rate": 1.9440124416796267e-06, | |
| "loss": 0.34789481, | |
| "memory(GiB)": 42.9, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.164588 | |
| }, | |
| { | |
| "acc": 0.90358963, | |
| "epoch": 0.16230173367760975, | |
| "grad_norm": 1.8705476431194374, | |
| "learning_rate": 1.936236391912908e-06, | |
| "loss": 0.34220786, | |
| "memory(GiB)": 31.78, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.165873 | |
| }, | |
| { | |
| "acc": 0.9085845, | |
| "epoch": 0.16967908520841019, | |
| "grad_norm": 1.8278699994168497, | |
| "learning_rate": 1.9284603421461896e-06, | |
| "loss": 0.3233917, | |
| "memory(GiB)": 31.86, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.16598 | |
| }, | |
| { | |
| "acc": 0.90997429, | |
| "epoch": 0.17705643673921062, | |
| "grad_norm": 1.945716912044592, | |
| "learning_rate": 1.920684292379471e-06, | |
| "loss": 0.34307232, | |
| "memory(GiB)": 35.12, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.166556 | |
| }, | |
| { | |
| "acc": 0.91014824, | |
| "epoch": 0.18443378827001106, | |
| "grad_norm": 1.7135397704667659, | |
| "learning_rate": 1.912908242612753e-06, | |
| "loss": 0.32152495, | |
| "memory(GiB)": 35.65, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.167431 | |
| }, | |
| { | |
| "acc": 0.9074892, | |
| "epoch": 0.19181113980081152, | |
| "grad_norm": 1.7116721779311537, | |
| "learning_rate": 1.9051321928460342e-06, | |
| "loss": 0.32937753, | |
| "memory(GiB)": 33.19, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.167152 | |
| }, | |
| { | |
| "acc": 0.90999937, | |
| "epoch": 0.19918849133161196, | |
| "grad_norm": 1.6389355962957932, | |
| "learning_rate": 1.8973561430793156e-06, | |
| "loss": 0.33004179, | |
| "memory(GiB)": 33.36, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.168049 | |
| }, | |
| { | |
| "acc": 0.9056819, | |
| "epoch": 0.2065658428624124, | |
| "grad_norm": 1.618401896535921, | |
| "learning_rate": 1.889580093312597e-06, | |
| "loss": 0.32887373, | |
| "memory(GiB)": 31.72, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.167987 | |
| }, | |
| { | |
| "acc": 0.90799198, | |
| "epoch": 0.21394319439321283, | |
| "grad_norm": 2.0697336354422076, | |
| "learning_rate": 1.8818040435458787e-06, | |
| "loss": 0.33212447, | |
| "memory(GiB)": 32.61, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.168358 | |
| }, | |
| { | |
| "acc": 0.89975605, | |
| "epoch": 0.2213205459240133, | |
| "grad_norm": 1.645561918074026, | |
| "learning_rate": 1.8740279937791602e-06, | |
| "loss": 0.35846872, | |
| "memory(GiB)": 32.3, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.169041 | |
| }, | |
| { | |
| "epoch": 0.2213205459240133, | |
| "eval_acc": 0.9009412058865552, | |
| "eval_loss": 0.31137242913246155, | |
| "eval_runtime": 8.9003, | |
| "eval_samples_per_second": 24.494, | |
| "eval_steps_per_second": 3.146, | |
| "step": 150 | |
| }, | |
| { | |
| "acc": 0.90751858, | |
| "epoch": 0.22869789745481373, | |
| "grad_norm": 1.717914687308357, | |
| "learning_rate": 1.8662519440124416e-06, | |
| "loss": 0.33635845, | |
| "memory(GiB)": 43.6, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.167082 | |
| }, | |
| { | |
| "acc": 0.90450516, | |
| "epoch": 0.23607524898561416, | |
| "grad_norm": 1.6863266349964434, | |
| "learning_rate": 1.858475894245723e-06, | |
| "loss": 0.35405197, | |
| "memory(GiB)": 33.81, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.167855 | |
| }, | |
| { | |
| "acc": 0.90395164, | |
| "epoch": 0.2434526005164146, | |
| "grad_norm": 2.1013428529714906, | |
| "learning_rate": 1.8506998444790045e-06, | |
| "loss": 0.34658258, | |
| "memory(GiB)": 32.9, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.167867 | |
| }, | |
| { | |
| "acc": 0.91127558, | |
| "epoch": 0.25082995204721503, | |
| "grad_norm": 1.6631238092162342, | |
| "learning_rate": 1.842923794712286e-06, | |
| "loss": 0.32777104, | |
| "memory(GiB)": 33.53, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.168028 | |
| }, | |
| { | |
| "acc": 0.90831413, | |
| "epoch": 0.25820730357801547, | |
| "grad_norm": 2.0857884493375756, | |
| "learning_rate": 1.8351477449455676e-06, | |
| "loss": 0.32164063, | |
| "memory(GiB)": 32.03, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.169138 | |
| }, | |
| { | |
| "acc": 0.91539364, | |
| "epoch": 0.26558465510881596, | |
| "grad_norm": 2.0145344122511095, | |
| "learning_rate": 1.827371695178849e-06, | |
| "loss": 0.30975475, | |
| "memory(GiB)": 34.31, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.168973 | |
| }, | |
| { | |
| "acc": 0.9064558, | |
| "epoch": 0.2729620066396164, | |
| "grad_norm": 1.6651879684580124, | |
| "learning_rate": 1.8195956454121305e-06, | |
| "loss": 0.3413609, | |
| "memory(GiB)": 32.63, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.169312 | |
| }, | |
| { | |
| "acc": 0.90828686, | |
| "epoch": 0.28033935817041683, | |
| "grad_norm": 2.3469960245148056, | |
| "learning_rate": 1.811819595645412e-06, | |
| "loss": 0.32660947, | |
| "memory(GiB)": 33.41, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.169856 | |
| }, | |
| { | |
| "acc": 0.91549397, | |
| "epoch": 0.28771670970121727, | |
| "grad_norm": 2.1806025367886117, | |
| "learning_rate": 1.8040435458786937e-06, | |
| "loss": 0.30616875, | |
| "memory(GiB)": 36.24, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.169761 | |
| }, | |
| { | |
| "acc": 0.90924969, | |
| "epoch": 0.2950940612320177, | |
| "grad_norm": 1.5587292681869693, | |
| "learning_rate": 1.7962674961119751e-06, | |
| "loss": 0.32027857, | |
| "memory(GiB)": 32.62, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.170581 | |
| }, | |
| { | |
| "epoch": 0.2950940612320177, | |
| "eval_acc": 0.901896699528504, | |
| "eval_loss": 0.3015853464603424, | |
| "eval_runtime": 9.0231, | |
| "eval_samples_per_second": 24.16, | |
| "eval_steps_per_second": 3.103, | |
| "step": 200 | |
| }, | |
| { | |
| "acc": 0.91348085, | |
| "epoch": 0.30247141276281814, | |
| "grad_norm": 1.7818986098446097, | |
| "learning_rate": 1.7884914463452566e-06, | |
| "loss": 0.30208986, | |
| "memory(GiB)": 44.06, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.169194 | |
| }, | |
| { | |
| "acc": 0.90921364, | |
| "epoch": 0.3098487642936186, | |
| "grad_norm": 4.02077354284952, | |
| "learning_rate": 1.780715396578538e-06, | |
| "loss": 0.31497798, | |
| "memory(GiB)": 34.58, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.169003 | |
| }, | |
| { | |
| "acc": 0.91234264, | |
| "epoch": 0.317226115824419, | |
| "grad_norm": 1.856976113207096, | |
| "learning_rate": 1.7729393468118195e-06, | |
| "loss": 0.30694566, | |
| "memory(GiB)": 33.8, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.16984 | |
| }, | |
| { | |
| "acc": 0.91051998, | |
| "epoch": 0.3246034673552195, | |
| "grad_norm": 1.7185168230569432, | |
| "learning_rate": 1.765163297045101e-06, | |
| "loss": 0.30961909, | |
| "memory(GiB)": 32.79, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.169666 | |
| }, | |
| { | |
| "acc": 0.90716095, | |
| "epoch": 0.33198081888601993, | |
| "grad_norm": 1.340608010048739, | |
| "learning_rate": 1.7573872472783826e-06, | |
| "loss": 0.32777991, | |
| "memory(GiB)": 32.43, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.169965 | |
| }, | |
| { | |
| "acc": 0.91547451, | |
| "epoch": 0.33935817041682037, | |
| "grad_norm": 1.6059763623857688, | |
| "learning_rate": 1.749611197511664e-06, | |
| "loss": 0.30423913, | |
| "memory(GiB)": 34.95, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.169935 | |
| }, | |
| { | |
| "acc": 0.917132, | |
| "epoch": 0.3467355219476208, | |
| "grad_norm": 2.0390121908637644, | |
| "learning_rate": 1.7418351477449455e-06, | |
| "loss": 0.30788417, | |
| "memory(GiB)": 34.18, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.169583 | |
| }, | |
| { | |
| "acc": 0.92253389, | |
| "epoch": 0.35411287347842124, | |
| "grad_norm": 1.7323441045370742, | |
| "learning_rate": 1.734059097978227e-06, | |
| "loss": 0.27823753, | |
| "memory(GiB)": 31.85, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.17024 | |
| }, | |
| { | |
| "acc": 0.91325512, | |
| "epoch": 0.3614902250092217, | |
| "grad_norm": 1.6955182367729624, | |
| "learning_rate": 1.7262830482115086e-06, | |
| "loss": 0.31402481, | |
| "memory(GiB)": 32.14, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.169973 | |
| }, | |
| { | |
| "acc": 0.91568565, | |
| "epoch": 0.3688675765400221, | |
| "grad_norm": 1.5212817841417117, | |
| "learning_rate": 1.71850699844479e-06, | |
| "loss": 0.29354782, | |
| "memory(GiB)": 33.28, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.169891 | |
| }, | |
| { | |
| "epoch": 0.3688675765400221, | |
| "eval_acc": 0.903888055436491, | |
| "eval_loss": 0.2949393689632416, | |
| "eval_runtime": 8.8569, | |
| "eval_samples_per_second": 24.614, | |
| "eval_steps_per_second": 3.161, | |
| "step": 250 | |
| }, | |
| { | |
| "acc": 0.91542091, | |
| "epoch": 0.37624492807082255, | |
| "grad_norm": 1.872512089057089, | |
| "learning_rate": 1.7107309486780715e-06, | |
| "loss": 0.29765024, | |
| "memory(GiB)": 43.8, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.169287 | |
| }, | |
| { | |
| "acc": 0.90894642, | |
| "epoch": 0.38362227960162304, | |
| "grad_norm": 2.118992381164901, | |
| "learning_rate": 1.702954898911353e-06, | |
| "loss": 0.32009149, | |
| "memory(GiB)": 33.0, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.169108 | |
| }, | |
| { | |
| "acc": 0.91895199, | |
| "epoch": 0.3909996311324235, | |
| "grad_norm": 1.8087446200238866, | |
| "learning_rate": 1.6951788491446344e-06, | |
| "loss": 0.28518291, | |
| "memory(GiB)": 33.64, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.169659 | |
| }, | |
| { | |
| "acc": 0.91831837, | |
| "epoch": 0.3983769826632239, | |
| "grad_norm": 2.295227865477349, | |
| "learning_rate": 1.6874027993779158e-06, | |
| "loss": 0.29493954, | |
| "memory(GiB)": 32.16, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.16921 | |
| }, | |
| { | |
| "acc": 0.91772842, | |
| "epoch": 0.40575433419402435, | |
| "grad_norm": 1.8335936104899577, | |
| "learning_rate": 1.6796267496111975e-06, | |
| "loss": 0.29295368, | |
| "memory(GiB)": 32.48, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.169211 | |
| }, | |
| { | |
| "acc": 0.9184288, | |
| "epoch": 0.4131316857248248, | |
| "grad_norm": 1.9183997806679902, | |
| "learning_rate": 1.671850699844479e-06, | |
| "loss": 0.29449196, | |
| "memory(GiB)": 32.65, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.169821 | |
| }, | |
| { | |
| "acc": 0.91275759, | |
| "epoch": 0.4205090372556252, | |
| "grad_norm": 1.5737005817463792, | |
| "learning_rate": 1.6640746500777604e-06, | |
| "loss": 0.30824404, | |
| "memory(GiB)": 32.27, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.169618 | |
| }, | |
| { | |
| "acc": 0.91761837, | |
| "epoch": 0.42788638878642565, | |
| "grad_norm": 1.6411868652328097, | |
| "learning_rate": 1.6562986003110419e-06, | |
| "loss": 0.28589807, | |
| "memory(GiB)": 33.9, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.16978 | |
| }, | |
| { | |
| "acc": 0.91096239, | |
| "epoch": 0.4352637403172261, | |
| "grad_norm": 1.4763719992796571, | |
| "learning_rate": 1.6485225505443235e-06, | |
| "loss": 0.31501875, | |
| "memory(GiB)": 33.9, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.170116 | |
| }, | |
| { | |
| "acc": 0.92102461, | |
| "epoch": 0.4426410918480266, | |
| "grad_norm": 1.7038633862826587, | |
| "learning_rate": 1.640746500777605e-06, | |
| "loss": 0.28700156, | |
| "memory(GiB)": 33.12, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.16999 | |
| }, | |
| { | |
| "epoch": 0.4426410918480266, | |
| "eval_acc": 0.904986426632376, | |
| "eval_loss": 0.28871360421180725, | |
| "eval_runtime": 8.8172, | |
| "eval_samples_per_second": 24.724, | |
| "eval_steps_per_second": 3.176, | |
| "step": 300 | |
| }, | |
| { | |
| "acc": 0.9137413, | |
| "epoch": 0.450018443378827, | |
| "grad_norm": 1.5572757830459178, | |
| "learning_rate": 1.6329704510108864e-06, | |
| "loss": 0.3066596, | |
| "memory(GiB)": 44.77, | |
| "step": 305, | |
| "train_speed(iter/s)": 0.169643 | |
| }, | |
| { | |
| "acc": 0.92225361, | |
| "epoch": 0.45739579490962745, | |
| "grad_norm": 1.7973596806557957, | |
| "learning_rate": 1.6251944012441679e-06, | |
| "loss": 0.28060098, | |
| "memory(GiB)": 34.38, | |
| "step": 310, | |
| "train_speed(iter/s)": 0.169469 | |
| }, | |
| { | |
| "acc": 0.91542816, | |
| "epoch": 0.4647731464404279, | |
| "grad_norm": 1.7774091029439925, | |
| "learning_rate": 1.6174183514774493e-06, | |
| "loss": 0.29976537, | |
| "memory(GiB)": 33.81, | |
| "step": 315, | |
| "train_speed(iter/s)": 0.169523 | |
| }, | |
| { | |
| "acc": 0.91291943, | |
| "epoch": 0.4721504979712283, | |
| "grad_norm": 1.3755306649838441, | |
| "learning_rate": 1.6096423017107308e-06, | |
| "loss": 0.30613976, | |
| "memory(GiB)": 33.81, | |
| "step": 320, | |
| "train_speed(iter/s)": 0.169769 | |
| }, | |
| { | |
| "acc": 0.90916691, | |
| "epoch": 0.47952784950202876, | |
| "grad_norm": 1.9213831375809023, | |
| "learning_rate": 1.6018662519440122e-06, | |
| "loss": 0.32510529, | |
| "memory(GiB)": 34.44, | |
| "step": 325, | |
| "train_speed(iter/s)": 0.169545 | |
| }, | |
| { | |
| "acc": 0.91636696, | |
| "epoch": 0.4869052010328292, | |
| "grad_norm": 1.8837685149781478, | |
| "learning_rate": 1.5940902021772939e-06, | |
| "loss": 0.30537646, | |
| "memory(GiB)": 31.2, | |
| "step": 330, | |
| "train_speed(iter/s)": 0.170038 | |
| }, | |
| { | |
| "acc": 0.91307325, | |
| "epoch": 0.4942825525636297, | |
| "grad_norm": 1.8595782698159422, | |
| "learning_rate": 1.5863141524105753e-06, | |
| "loss": 0.30300996, | |
| "memory(GiB)": 30.74, | |
| "step": 335, | |
| "train_speed(iter/s)": 0.169983 | |
| }, | |
| { | |
| "acc": 0.91927223, | |
| "epoch": 0.5016599040944301, | |
| "grad_norm": 1.8693944311229003, | |
| "learning_rate": 1.5785381026438568e-06, | |
| "loss": 0.28294766, | |
| "memory(GiB)": 31.5, | |
| "step": 340, | |
| "train_speed(iter/s)": 0.170169 | |
| }, | |
| { | |
| "acc": 0.92018118, | |
| "epoch": 0.5090372556252305, | |
| "grad_norm": 1.6240951695142463, | |
| "learning_rate": 1.5707620528771385e-06, | |
| "loss": 0.27536349, | |
| "memory(GiB)": 32.84, | |
| "step": 345, | |
| "train_speed(iter/s)": 0.170494 | |
| }, | |
| { | |
| "acc": 0.91428967, | |
| "epoch": 0.5164146071560309, | |
| "grad_norm": 2.0654305075288653, | |
| "learning_rate": 1.56298600311042e-06, | |
| "loss": 0.30193062, | |
| "memory(GiB)": 33.88, | |
| "step": 350, | |
| "train_speed(iter/s)": 0.170499 | |
| }, | |
| { | |
| "epoch": 0.5164146071560309, | |
| "eval_acc": 0.906031218745535, | |
| "eval_loss": 0.2829771637916565, | |
| "eval_runtime": 8.9252, | |
| "eval_samples_per_second": 24.425, | |
| "eval_steps_per_second": 3.137, | |
| "step": 350 | |
| }, | |
| { | |
| "acc": 0.92116051, | |
| "epoch": 0.5237919586868315, | |
| "grad_norm": 2.2709862324112136, | |
| "learning_rate": 1.5552099533437014e-06, | |
| "loss": 0.277144, | |
| "memory(GiB)": 44.05, | |
| "step": 355, | |
| "train_speed(iter/s)": 0.169773 | |
| }, | |
| { | |
| "acc": 0.90278854, | |
| "epoch": 0.5311693102176319, | |
| "grad_norm": 1.9738153042801483, | |
| "learning_rate": 1.5474339035769828e-06, | |
| "loss": 0.33822517, | |
| "memory(GiB)": 31.78, | |
| "step": 360, | |
| "train_speed(iter/s)": 0.170163 | |
| }, | |
| { | |
| "acc": 0.92497654, | |
| "epoch": 0.5385466617484324, | |
| "grad_norm": 1.2430005126419985, | |
| "learning_rate": 1.5396578538102643e-06, | |
| "loss": 0.26646669, | |
| "memory(GiB)": 33.8, | |
| "step": 365, | |
| "train_speed(iter/s)": 0.16992 | |
| }, | |
| { | |
| "acc": 0.91328669, | |
| "epoch": 0.5459240132792328, | |
| "grad_norm": 1.732568460701246, | |
| "learning_rate": 1.5318818040435457e-06, | |
| "loss": 0.30124869, | |
| "memory(GiB)": 34.07, | |
| "step": 370, | |
| "train_speed(iter/s)": 0.170382 | |
| }, | |
| { | |
| "acc": 0.91603355, | |
| "epoch": 0.5533013648100332, | |
| "grad_norm": 1.6627563648419381, | |
| "learning_rate": 1.5241057542768272e-06, | |
| "loss": 0.29759171, | |
| "memory(GiB)": 32.61, | |
| "step": 375, | |
| "train_speed(iter/s)": 0.170197 | |
| }, | |
| { | |
| "acc": 0.90871716, | |
| "epoch": 0.5606787163408337, | |
| "grad_norm": 2.1331488669107492, | |
| "learning_rate": 1.5163297045101088e-06, | |
| "loss": 0.33630853, | |
| "memory(GiB)": 32.33, | |
| "step": 380, | |
| "train_speed(iter/s)": 0.17029 | |
| }, | |
| { | |
| "acc": 0.90700073, | |
| "epoch": 0.5680560678716341, | |
| "grad_norm": 2.080763753555995, | |
| "learning_rate": 1.5085536547433903e-06, | |
| "loss": 0.325877, | |
| "memory(GiB)": 32.95, | |
| "step": 385, | |
| "train_speed(iter/s)": 0.170474 | |
| }, | |
| { | |
| "acc": 0.91835623, | |
| "epoch": 0.5754334194024345, | |
| "grad_norm": 1.5911495384236254, | |
| "learning_rate": 1.500777604976672e-06, | |
| "loss": 0.28332872, | |
| "memory(GiB)": 31.78, | |
| "step": 390, | |
| "train_speed(iter/s)": 0.170283 | |
| }, | |
| { | |
| "acc": 0.91712914, | |
| "epoch": 0.582810770933235, | |
| "grad_norm": 1.6237776507352246, | |
| "learning_rate": 1.4930015552099534e-06, | |
| "loss": 0.28782868, | |
| "memory(GiB)": 33.13, | |
| "step": 395, | |
| "train_speed(iter/s)": 0.170424 | |
| }, | |
| { | |
| "acc": 0.92452984, | |
| "epoch": 0.5901881224640354, | |
| "grad_norm": 1.9617693211652296, | |
| "learning_rate": 1.4852255054432348e-06, | |
| "loss": 0.25721183, | |
| "memory(GiB)": 34.52, | |
| "step": 400, | |
| "train_speed(iter/s)": 0.170549 | |
| }, | |
| { | |
| "epoch": 0.5901881224640354, | |
| "eval_acc": 0.9067634662094585, | |
| "eval_loss": 0.27780693769454956, | |
| "eval_runtime": 8.9713, | |
| "eval_samples_per_second": 24.3, | |
| "eval_steps_per_second": 3.121, | |
| "step": 400 | |
| }, | |
| { | |
| "acc": 0.91402645, | |
| "epoch": 0.5975654739948358, | |
| "grad_norm": 1.6283342820719429, | |
| "learning_rate": 1.4774494556765163e-06, | |
| "loss": 0.29935551, | |
| "memory(GiB)": 43.79, | |
| "step": 405, | |
| "train_speed(iter/s)": 0.169655 | |
| }, | |
| { | |
| "acc": 0.91232147, | |
| "epoch": 0.6049428255256363, | |
| "grad_norm": 1.7979698219270268, | |
| "learning_rate": 1.4696734059097977e-06, | |
| "loss": 0.29618566, | |
| "memory(GiB)": 34.75, | |
| "step": 410, | |
| "train_speed(iter/s)": 0.169867 | |
| }, | |
| { | |
| "acc": 0.91495514, | |
| "epoch": 0.6123201770564367, | |
| "grad_norm": 1.400313093548897, | |
| "learning_rate": 1.4618973561430792e-06, | |
| "loss": 0.30076814, | |
| "memory(GiB)": 33.36, | |
| "step": 415, | |
| "train_speed(iter/s)": 0.169686 | |
| }, | |
| { | |
| "acc": 0.91793385, | |
| "epoch": 0.6196975285872371, | |
| "grad_norm": 1.5440217170439645, | |
| "learning_rate": 1.4541213063763606e-06, | |
| "loss": 0.27723732, | |
| "memory(GiB)": 32.03, | |
| "step": 420, | |
| "train_speed(iter/s)": 0.169706 | |
| }, | |
| { | |
| "acc": 0.92025652, | |
| "epoch": 0.6270748801180376, | |
| "grad_norm": 1.7171089334482643, | |
| "learning_rate": 1.446345256609642e-06, | |
| "loss": 0.28218346, | |
| "memory(GiB)": 31.84, | |
| "step": 425, | |
| "train_speed(iter/s)": 0.169824 | |
| }, | |
| { | |
| "acc": 0.91456184, | |
| "epoch": 0.634452231648838, | |
| "grad_norm": 1.7617810648771757, | |
| "learning_rate": 1.4385692068429238e-06, | |
| "loss": 0.30232787, | |
| "memory(GiB)": 33.01, | |
| "step": 430, | |
| "train_speed(iter/s)": 0.169549 | |
| }, | |
| { | |
| "acc": 0.91554451, | |
| "epoch": 0.6418295831796386, | |
| "grad_norm": 2.1102714988825966, | |
| "learning_rate": 1.4307931570762052e-06, | |
| "loss": 0.29879627, | |
| "memory(GiB)": 33.18, | |
| "step": 435, | |
| "train_speed(iter/s)": 0.169677 | |
| }, | |
| { | |
| "acc": 0.92126179, | |
| "epoch": 0.649206934710439, | |
| "grad_norm": 2.046949703950944, | |
| "learning_rate": 1.4230171073094869e-06, | |
| "loss": 0.27905126, | |
| "memory(GiB)": 35.07, | |
| "step": 440, | |
| "train_speed(iter/s)": 0.169605 | |
| }, | |
| { | |
| "acc": 0.90152893, | |
| "epoch": 0.6565842862412394, | |
| "grad_norm": 2.001971595085909, | |
| "learning_rate": 1.4152410575427683e-06, | |
| "loss": 0.34060516, | |
| "memory(GiB)": 33.51, | |
| "step": 445, | |
| "train_speed(iter/s)": 0.169689 | |
| }, | |
| { | |
| "acc": 0.91629639, | |
| "epoch": 0.6639616377720399, | |
| "grad_norm": 2.0397672790155528, | |
| "learning_rate": 1.4074650077760498e-06, | |
| "loss": 0.28595252, | |
| "memory(GiB)": 34.12, | |
| "step": 450, | |
| "train_speed(iter/s)": 0.170047 | |
| }, | |
| { | |
| "epoch": 0.6639616377720399, | |
| "eval_acc": 0.9078082583226175, | |
| "eval_loss": 0.2715848386287689, | |
| "eval_runtime": 8.8964, | |
| "eval_samples_per_second": 24.504, | |
| "eval_steps_per_second": 3.147, | |
| "step": 450 | |
| }, | |
| { | |
| "acc": 0.92627125, | |
| "epoch": 0.6713389893028403, | |
| "grad_norm": 1.6378143906534044, | |
| "learning_rate": 1.3996889580093312e-06, | |
| "loss": 0.25918436, | |
| "memory(GiB)": 43.88, | |
| "step": 455, | |
| "train_speed(iter/s)": 0.169369 | |
| }, | |
| { | |
| "acc": 0.91979427, | |
| "epoch": 0.6787163408336407, | |
| "grad_norm": 1.7082862687854972, | |
| "learning_rate": 1.3919129082426127e-06, | |
| "loss": 0.27077117, | |
| "memory(GiB)": 32.33, | |
| "step": 460, | |
| "train_speed(iter/s)": 0.169438 | |
| }, | |
| { | |
| "acc": 0.91361713, | |
| "epoch": 0.6860936923644412, | |
| "grad_norm": 2.293000555161464, | |
| "learning_rate": 1.3841368584758941e-06, | |
| "loss": 0.30449131, | |
| "memory(GiB)": 32.93, | |
| "step": 465, | |
| "train_speed(iter/s)": 0.169581 | |
| }, | |
| { | |
| "acc": 0.91954422, | |
| "epoch": 0.6934710438952416, | |
| "grad_norm": 1.8478883729217541, | |
| "learning_rate": 1.3763608087091756e-06, | |
| "loss": 0.29147563, | |
| "memory(GiB)": 32.32, | |
| "step": 470, | |
| "train_speed(iter/s)": 0.169425 | |
| }, | |
| { | |
| "acc": 0.91925821, | |
| "epoch": 0.700848395426042, | |
| "grad_norm": 2.1771276083255833, | |
| "learning_rate": 1.368584758942457e-06, | |
| "loss": 0.27578421, | |
| "memory(GiB)": 31.55, | |
| "step": 475, | |
| "train_speed(iter/s)": 0.169717 | |
| }, | |
| { | |
| "acc": 0.91978226, | |
| "epoch": 0.7082257469568425, | |
| "grad_norm": 1.5525703471804124, | |
| "learning_rate": 1.3608087091757387e-06, | |
| "loss": 0.28457327, | |
| "memory(GiB)": 34.35, | |
| "step": 480, | |
| "train_speed(iter/s)": 0.169473 | |
| }, | |
| { | |
| "acc": 0.91358566, | |
| "epoch": 0.7156030984876429, | |
| "grad_norm": 1.6094545899681876, | |
| "learning_rate": 1.3530326594090201e-06, | |
| "loss": 0.29641771, | |
| "memory(GiB)": 34.35, | |
| "step": 485, | |
| "train_speed(iter/s)": 0.169292 | |
| }, | |
| { | |
| "acc": 0.9157114, | |
| "epoch": 0.7229804500184434, | |
| "grad_norm": 2.001462148706446, | |
| "learning_rate": 1.3452566096423018e-06, | |
| "loss": 0.30091541, | |
| "memory(GiB)": 33.0, | |
| "step": 490, | |
| "train_speed(iter/s)": 0.169539 | |
| }, | |
| { | |
| "acc": 0.9181448, | |
| "epoch": 0.7303578015492438, | |
| "grad_norm": 1.933852376850104, | |
| "learning_rate": 1.3374805598755833e-06, | |
| "loss": 0.28622799, | |
| "memory(GiB)": 31.96, | |
| "step": 495, | |
| "train_speed(iter/s)": 0.169315 | |
| }, | |
| { | |
| "acc": 0.91473122, | |
| "epoch": 0.7377351530800442, | |
| "grad_norm": 1.9036456322193762, | |
| "learning_rate": 1.3297045101088647e-06, | |
| "loss": 0.3094301, | |
| "memory(GiB)": 31.84, | |
| "step": 500, | |
| "train_speed(iter/s)": 0.169482 | |
| }, | |
| { | |
| "epoch": 0.7377351530800442, | |
| "eval_acc": 0.9090048578368338, | |
| "eval_loss": 0.2688305675983429, | |
| "eval_runtime": 8.8274, | |
| "eval_samples_per_second": 24.696, | |
| "eval_steps_per_second": 3.172, | |
| "step": 500 | |
| }, | |
| { | |
| "acc": 0.91458435, | |
| "epoch": 0.7451125046108447, | |
| "grad_norm": 1.9335752594206985, | |
| "learning_rate": 1.3219284603421462e-06, | |
| "loss": 0.29494238, | |
| "memory(GiB)": 43.4, | |
| "step": 505, | |
| "train_speed(iter/s)": 0.168821 | |
| }, | |
| { | |
| "acc": 0.9221386, | |
| "epoch": 0.7524898561416451, | |
| "grad_norm": 1.8197097143608403, | |
| "learning_rate": 1.3141524105754276e-06, | |
| "loss": 0.2647439, | |
| "memory(GiB)": 33.36, | |
| "step": 510, | |
| "train_speed(iter/s)": 0.168682 | |
| }, | |
| { | |
| "acc": 0.92193203, | |
| "epoch": 0.7598672076724456, | |
| "grad_norm": 1.901554742963865, | |
| "learning_rate": 1.306376360808709e-06, | |
| "loss": 0.27191839, | |
| "memory(GiB)": 30.47, | |
| "step": 515, | |
| "train_speed(iter/s)": 0.168924 | |
| }, | |
| { | |
| "acc": 0.91413088, | |
| "epoch": 0.7672445592032461, | |
| "grad_norm": 2.0670792917636236, | |
| "learning_rate": 1.2986003110419905e-06, | |
| "loss": 0.296503, | |
| "memory(GiB)": 32.43, | |
| "step": 520, | |
| "train_speed(iter/s)": 0.168732 | |
| }, | |
| { | |
| "acc": 0.92014456, | |
| "epoch": 0.7746219107340465, | |
| "grad_norm": 1.3940992355499904, | |
| "learning_rate": 1.290824261275272e-06, | |
| "loss": 0.27345006, | |
| "memory(GiB)": 31.88, | |
| "step": 525, | |
| "train_speed(iter/s)": 0.168564 | |
| }, | |
| { | |
| "acc": 0.91787033, | |
| "epoch": 0.781999262264847, | |
| "grad_norm": 1.7528498159038246, | |
| "learning_rate": 1.2830482115085536e-06, | |
| "loss": 0.27718287, | |
| "memory(GiB)": 32.83, | |
| "step": 530, | |
| "train_speed(iter/s)": 0.168633 | |
| }, | |
| { | |
| "acc": 0.91950254, | |
| "epoch": 0.7893766137956474, | |
| "grad_norm": 1.6045395248629215, | |
| "learning_rate": 1.275272161741835e-06, | |
| "loss": 0.27553134, | |
| "memory(GiB)": 30.99, | |
| "step": 535, | |
| "train_speed(iter/s)": 0.168504 | |
| }, | |
| { | |
| "acc": 0.91442375, | |
| "epoch": 0.7967539653264478, | |
| "grad_norm": 2.0480557410695686, | |
| "learning_rate": 1.2674961119751167e-06, | |
| "loss": 0.29672928, | |
| "memory(GiB)": 32.9, | |
| "step": 540, | |
| "train_speed(iter/s)": 0.168746 | |
| }, | |
| { | |
| "acc": 0.91783228, | |
| "epoch": 0.8041313168572483, | |
| "grad_norm": 1.7063380836356228, | |
| "learning_rate": 1.2597200622083982e-06, | |
| "loss": 0.28551073, | |
| "memory(GiB)": 32.64, | |
| "step": 545, | |
| "train_speed(iter/s)": 0.168632 | |
| }, | |
| { | |
| "acc": 0.91965294, | |
| "epoch": 0.8115086683880487, | |
| "grad_norm": 1.8091430299196016, | |
| "learning_rate": 1.2519440124416796e-06, | |
| "loss": 0.28367462, | |
| "memory(GiB)": 33.12, | |
| "step": 550, | |
| "train_speed(iter/s)": 0.168537 | |
| }, | |
| { | |
| "epoch": 0.8115086683880487, | |
| "eval_acc": 0.9094959994284898, | |
| "eval_loss": 0.265609472990036, | |
| "eval_runtime": 8.9354, | |
| "eval_samples_per_second": 24.397, | |
| "eval_steps_per_second": 3.134, | |
| "step": 550 | |
| }, | |
| { | |
| "acc": 0.91708422, | |
| "epoch": 0.8188860199188491, | |
| "grad_norm": 1.9338041082162762, | |
| "learning_rate": 1.244167962674961e-06, | |
| "loss": 0.30288501, | |
| "memory(GiB)": 44.46, | |
| "step": 555, | |
| "train_speed(iter/s)": 0.168246 | |
| }, | |
| { | |
| "acc": 0.91793032, | |
| "epoch": 0.8262633714496496, | |
| "grad_norm": 1.960186880981984, | |
| "learning_rate": 1.2363919129082425e-06, | |
| "loss": 0.29391913, | |
| "memory(GiB)": 33.02, | |
| "step": 560, | |
| "train_speed(iter/s)": 0.168119 | |
| }, | |
| { | |
| "acc": 0.92976294, | |
| "epoch": 0.83364072298045, | |
| "grad_norm": 1.7220525036525174, | |
| "learning_rate": 1.228615863141524e-06, | |
| "loss": 0.24753182, | |
| "memory(GiB)": 32.77, | |
| "step": 565, | |
| "train_speed(iter/s)": 0.16819 | |
| }, | |
| { | |
| "acc": 0.9202878, | |
| "epoch": 0.8410180745112504, | |
| "grad_norm": 1.9681280144249207, | |
| "learning_rate": 1.2208398133748054e-06, | |
| "loss": 0.27648234, | |
| "memory(GiB)": 32.36, | |
| "step": 570, | |
| "train_speed(iter/s)": 0.168331 | |
| }, | |
| { | |
| "acc": 0.91870079, | |
| "epoch": 0.8483954260420509, | |
| "grad_norm": 1.6402903494642216, | |
| "learning_rate": 1.2130637636080869e-06, | |
| "loss": 0.29140263, | |
| "memory(GiB)": 35.18, | |
| "step": 575, | |
| "train_speed(iter/s)": 0.168255 | |
| }, | |
| { | |
| "acc": 0.91364193, | |
| "epoch": 0.8557727775728513, | |
| "grad_norm": 2.146651599757078, | |
| "learning_rate": 1.2052877138413686e-06, | |
| "loss": 0.31224487, | |
| "memory(GiB)": 37.43, | |
| "step": 580, | |
| "train_speed(iter/s)": 0.168463 | |
| }, | |
| { | |
| "acc": 0.92091951, | |
| "epoch": 0.8631501291036517, | |
| "grad_norm": 2.110687395796676, | |
| "learning_rate": 1.19751166407465e-06, | |
| "loss": 0.27074888, | |
| "memory(GiB)": 30.34, | |
| "step": 585, | |
| "train_speed(iter/s)": 0.16837 | |
| }, | |
| { | |
| "acc": 0.92361298, | |
| "epoch": 0.8705274806344522, | |
| "grad_norm": 1.341809177582426, | |
| "learning_rate": 1.1897356143079317e-06, | |
| "loss": 0.26371779, | |
| "memory(GiB)": 32.35, | |
| "step": 590, | |
| "train_speed(iter/s)": 0.168375 | |
| }, | |
| { | |
| "acc": 0.92123985, | |
| "epoch": 0.8779048321652527, | |
| "grad_norm": 1.8270563745834436, | |
| "learning_rate": 1.1819595645412131e-06, | |
| "loss": 0.26702247, | |
| "memory(GiB)": 34.77, | |
| "step": 595, | |
| "train_speed(iter/s)": 0.168532 | |
| }, | |
| { | |
| "acc": 0.91653709, | |
| "epoch": 0.8852821836960532, | |
| "grad_norm": 1.6527432011832037, | |
| "learning_rate": 1.1741835147744946e-06, | |
| "loss": 0.29842911, | |
| "memory(GiB)": 33.87, | |
| "step": 600, | |
| "train_speed(iter/s)": 0.168424 | |
| }, | |
| { | |
| "epoch": 0.8852821836960532, | |
| "eval_acc": 0.9105765109301329, | |
| "eval_loss": 0.2623133361339569, | |
| "eval_runtime": 8.7796, | |
| "eval_samples_per_second": 24.83, | |
| "eval_steps_per_second": 3.189, | |
| "step": 600 | |
| }, | |
| { | |
| "acc": 0.91810665, | |
| "epoch": 0.8926595352268536, | |
| "grad_norm": 1.3239706750197222, | |
| "learning_rate": 1.166407465007776e-06, | |
| "loss": 0.29543982, | |
| "memory(GiB)": 43.63, | |
| "step": 605, | |
| "train_speed(iter/s)": 0.16811 | |
| }, | |
| { | |
| "acc": 0.92373562, | |
| "epoch": 0.900036886757654, | |
| "grad_norm": 1.589090709862595, | |
| "learning_rate": 1.1586314152410575e-06, | |
| "loss": 0.27000737, | |
| "memory(GiB)": 32.08, | |
| "step": 610, | |
| "train_speed(iter/s)": 0.168111 | |
| }, | |
| { | |
| "acc": 0.92571859, | |
| "epoch": 0.9074142382884545, | |
| "grad_norm": 1.786690071917202, | |
| "learning_rate": 1.150855365474339e-06, | |
| "loss": 0.26558821, | |
| "memory(GiB)": 34.26, | |
| "step": 615, | |
| "train_speed(iter/s)": 0.167944 | |
| }, | |
| { | |
| "acc": 0.92350941, | |
| "epoch": 0.9147915898192549, | |
| "grad_norm": 1.4482760998007842, | |
| "learning_rate": 1.1430793157076204e-06, | |
| "loss": 0.27038224, | |
| "memory(GiB)": 32.87, | |
| "step": 620, | |
| "train_speed(iter/s)": 0.168075 | |
| }, | |
| { | |
| "acc": 0.92567997, | |
| "epoch": 0.9221689413500553, | |
| "grad_norm": 1.5651995631831526, | |
| "learning_rate": 1.1353032659409018e-06, | |
| "loss": 0.25891747, | |
| "memory(GiB)": 32.63, | |
| "step": 625, | |
| "train_speed(iter/s)": 0.168015 | |
| }, | |
| { | |
| "acc": 0.91823616, | |
| "epoch": 0.9295462928808558, | |
| "grad_norm": 1.4462434724962336, | |
| "learning_rate": 1.1275272161741835e-06, | |
| "loss": 0.2788033, | |
| "memory(GiB)": 38.22, | |
| "step": 630, | |
| "train_speed(iter/s)": 0.167998 | |
| }, | |
| { | |
| "acc": 0.92322083, | |
| "epoch": 0.9369236444116562, | |
| "grad_norm": 1.4194043988299254, | |
| "learning_rate": 1.119751166407465e-06, | |
| "loss": 0.26030297, | |
| "memory(GiB)": 32.29, | |
| "step": 635, | |
| "train_speed(iter/s)": 0.168162 | |
| }, | |
| { | |
| "acc": 0.92457771, | |
| "epoch": 0.9443009959424566, | |
| "grad_norm": 1.8304569462755849, | |
| "learning_rate": 1.1119751166407466e-06, | |
| "loss": 0.27183619, | |
| "memory(GiB)": 35.33, | |
| "step": 640, | |
| "train_speed(iter/s)": 0.168086 | |
| }, | |
| { | |
| "acc": 0.9201807, | |
| "epoch": 0.9516783474732571, | |
| "grad_norm": 1.6355541683467607, | |
| "learning_rate": 1.104199066874028e-06, | |
| "loss": 0.27730408, | |
| "memory(GiB)": 31.4, | |
| "step": 645, | |
| "train_speed(iter/s)": 0.168284 | |
| }, | |
| { | |
| "acc": 0.92337418, | |
| "epoch": 0.9590556990040575, | |
| "grad_norm": 1.6309155055635356, | |
| "learning_rate": 1.0964230171073095e-06, | |
| "loss": 0.25860276, | |
| "memory(GiB)": 32.67, | |
| "step": 650, | |
| "train_speed(iter/s)": 0.168267 | |
| }, | |
| { | |
| "epoch": 0.9590556990040575, | |
| "eval_acc": 0.9113176882411773, | |
| "eval_loss": 0.2569684386253357, | |
| "eval_runtime": 8.8598, | |
| "eval_samples_per_second": 24.605, | |
| "eval_steps_per_second": 3.16, | |
| "step": 650 | |
| }, | |
| { | |
| "acc": 0.91919975, | |
| "epoch": 0.966433050534858, | |
| "grad_norm": 1.482378816274918, | |
| "learning_rate": 1.088646967340591e-06, | |
| "loss": 0.28527048, | |
| "memory(GiB)": 45.59, | |
| "step": 655, | |
| "train_speed(iter/s)": 0.167772 | |
| }, | |
| { | |
| "acc": 0.92037735, | |
| "epoch": 0.9738104020656584, | |
| "grad_norm": 2.2165369625767712, | |
| "learning_rate": 1.0808709175738724e-06, | |
| "loss": 0.28198528, | |
| "memory(GiB)": 32.93, | |
| "step": 660, | |
| "train_speed(iter/s)": 0.16789 | |
| }, | |
| { | |
| "acc": 0.92200727, | |
| "epoch": 0.9811877535964588, | |
| "grad_norm": 1.7151646172394919, | |
| "learning_rate": 1.0730948678071539e-06, | |
| "loss": 0.27098572, | |
| "memory(GiB)": 33.1, | |
| "step": 665, | |
| "train_speed(iter/s)": 0.167862 | |
| }, | |
| { | |
| "acc": 0.92197828, | |
| "epoch": 0.9885651051272594, | |
| "grad_norm": 2.076606131505725, | |
| "learning_rate": 1.0653188180404353e-06, | |
| "loss": 0.26747627, | |
| "memory(GiB)": 34.45, | |
| "step": 670, | |
| "train_speed(iter/s)": 0.167945 | |
| }, | |
| { | |
| "acc": 0.92063084, | |
| "epoch": 0.9959424566580598, | |
| "grad_norm": 1.7465662806523121, | |
| "learning_rate": 1.0575427682737168e-06, | |
| "loss": 0.27087922, | |
| "memory(GiB)": 39.51, | |
| "step": 675, | |
| "train_speed(iter/s)": 0.167951 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1354, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 66000591650816.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |