| { |
| "best_global_step": 1728, |
| "best_metric": 0.9399001064439532, |
| "best_model_checkpoint": "./my_unified_model_classification_4_6_10/checkpoint-1728", |
| "epoch": 2.0, |
| "eval_steps": 288, |
| "global_step": 1924, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05197505197505198, |
| "grad_norm": 42.5, |
| "learning_rate": 2.5520833333333334e-06, |
| "loss": 0.955, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10395010395010396, |
| "grad_norm": 37.5, |
| "learning_rate": 5.156250000000001e-06, |
| "loss": 0.6218, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.15592515592515593, |
| "grad_norm": 154.0, |
| "learning_rate": 7.760416666666666e-06, |
| "loss": 0.5542, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2079002079002079, |
| "grad_norm": 107.0, |
| "learning_rate": 9.95958429561201e-06, |
| "loss": 0.581, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2598752598752599, |
| "grad_norm": 14.75, |
| "learning_rate": 9.670900692840648e-06, |
| "loss": 0.5363, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2993762993762994, |
| "eval_accuracy": 0.7968526466380543, |
| "eval_auc": 0.723546321417172, |
| "eval_f1": 0.882882207392967, |
| "eval_false_negatives": 275, |
| "eval_false_positives": 2849, |
| "eval_loss": 0.479750394821167, |
| "eval_precision": 0.8051832603938731, |
| "eval_recall": 0.9771784232365145, |
| "eval_runtime": 515.9489, |
| "eval_samples_per_second": 29.805, |
| "eval_specificity": 0.14393028846153846, |
| "eval_steps_per_second": 0.235, |
| "eval_true_negatives": 479, |
| "eval_true_positives": 11775, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.2993762993762994, |
| "step": 288, |
| "train_accuracy": 0.803, |
| "train_auc": 0.7153718677156178, |
| "train_f1": 0.8873642081189251, |
| "train_false_negatives": 16, |
| "train_false_positives": 181, |
| "train_loss": 0.4671786427497864, |
| "train_precision": 0.8108672936259144, |
| "train_recall": 0.9797979797979798, |
| "train_runtime": 33.5789, |
| "train_samples_per_second": 29.781, |
| "train_specificity": 0.12980769230769232, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 27, |
| "train_true_positives": 776 |
| }, |
| { |
| "epoch": 0.31185031185031187, |
| "grad_norm": 37.75, |
| "learning_rate": 9.382217090069284e-06, |
| "loss": 0.4916, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.36382536382536385, |
| "grad_norm": 8.75, |
| "learning_rate": 9.093533487297921e-06, |
| "loss": 0.4755, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4158004158004158, |
| "grad_norm": 48.0, |
| "learning_rate": 8.804849884526559e-06, |
| "loss": 0.4985, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4677754677754678, |
| "grad_norm": 18.625, |
| "learning_rate": 8.516166281755197e-06, |
| "loss": 0.4602, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5197505197505198, |
| "grad_norm": 33.0, |
| "learning_rate": 8.227482678983834e-06, |
| "loss": 0.4592, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5717255717255717, |
| "grad_norm": 45.5, |
| "learning_rate": 7.938799076212472e-06, |
| "loss": 0.4701, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5987525987525988, |
| "eval_accuracy": 0.807192092599818, |
| "eval_auc": 0.7876185465209066, |
| "eval_f1": 0.8887387894480093, |
| "eval_false_negatives": 208, |
| "eval_false_positives": 2757, |
| "eval_loss": 0.4651535153388977, |
| "eval_precision": 0.8111514487293651, |
| "eval_recall": 0.9827385892116183, |
| "eval_runtime": 516.3506, |
| "eval_samples_per_second": 29.782, |
| "eval_specificity": 0.17157451923076922, |
| "eval_steps_per_second": 0.234, |
| "eval_true_negatives": 571, |
| "eval_true_positives": 11842, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.5987525987525988, |
| "step": 576, |
| "train_accuracy": 0.824, |
| "train_auc": 0.8292374924653406, |
| "train_f1": 0.8983833718244804, |
| "train_false_negatives": 12, |
| "train_false_positives": 164, |
| "train_loss": 0.4171445071697235, |
| "train_precision": 0.8259023354564756, |
| "train_recall": 0.9848101265822785, |
| "train_runtime": 33.5723, |
| "train_samples_per_second": 29.786, |
| "train_specificity": 0.21904761904761905, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 46, |
| "train_true_positives": 778 |
| }, |
| { |
| "epoch": 0.6237006237006237, |
| "grad_norm": 9.5625, |
| "learning_rate": 7.650115473441108e-06, |
| "loss": 0.4696, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6756756756756757, |
| "grad_norm": 28.125, |
| "learning_rate": 7.3614318706697466e-06, |
| "loss": 0.4301, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7276507276507277, |
| "grad_norm": 20.5, |
| "learning_rate": 7.072748267898384e-06, |
| "loss": 0.4081, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7796257796257796, |
| "grad_norm": 23.375, |
| "learning_rate": 6.784064665127021e-06, |
| "loss": 0.3879, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8316008316008316, |
| "grad_norm": 21.5, |
| "learning_rate": 6.495381062355659e-06, |
| "loss": 0.3572, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8835758835758836, |
| "grad_norm": 62.25, |
| "learning_rate": 6.2066974595842965e-06, |
| "loss": 0.3478, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8981288981288982, |
| "eval_accuracy": 0.862205748471843, |
| "eval_auc": 0.8848185644749442, |
| "eval_f1": 0.915668404505114, |
| "eval_false_negatives": 546, |
| "eval_false_positives": 1573, |
| "eval_loss": 0.3326202929019928, |
| "eval_precision": 0.8797124722795748, |
| "eval_recall": 0.9546887966804979, |
| "eval_runtime": 516.3368, |
| "eval_samples_per_second": 29.783, |
| "eval_specificity": 0.52734375, |
| "eval_steps_per_second": 0.234, |
| "eval_true_negatives": 1755, |
| "eval_true_positives": 11504, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.8981288981288982, |
| "step": 864, |
| "train_accuracy": 0.902, |
| "train_auc": 0.9281183226495727, |
| "train_f1": 0.939877300613497, |
| "train_false_negatives": 26, |
| "train_false_positives": 72, |
| "train_loss": 0.2601640224456787, |
| "train_precision": 0.9140811455847255, |
| "train_recall": 0.9671717171717171, |
| "train_runtime": 33.5805, |
| "train_samples_per_second": 29.779, |
| "train_specificity": 0.6538461538461539, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 136, |
| "train_true_positives": 766 |
| }, |
| { |
| "epoch": 0.9355509355509356, |
| "grad_norm": 16.25, |
| "learning_rate": 5.918013856812933e-06, |
| "loss": 0.3391, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9875259875259875, |
| "grad_norm": 20.75, |
| "learning_rate": 5.629330254041571e-06, |
| "loss": 0.3058, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0395010395010396, |
| "grad_norm": 18.5, |
| "learning_rate": 5.340646651270208e-06, |
| "loss": 0.2592, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0914760914760915, |
| "grad_norm": 21.875, |
| "learning_rate": 5.0519630484988455e-06, |
| "loss": 0.2312, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1434511434511434, |
| "grad_norm": 17.375, |
| "learning_rate": 4.763279445727483e-06, |
| "loss": 0.2124, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1954261954261955, |
| "grad_norm": 13.5625, |
| "learning_rate": 4.47459584295612e-06, |
| "loss": 0.2181, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1975051975051976, |
| "eval_accuracy": 0.889192352711666, |
| "eval_auc": 0.9218037947853494, |
| "eval_f1": 0.9310902620511161, |
| "eval_false_negatives": 538, |
| "eval_false_positives": 1166, |
| "eval_loss": 0.283740758895874, |
| "eval_precision": 0.9080296576747121, |
| "eval_recall": 0.9553526970954357, |
| "eval_runtime": 516.4754, |
| "eval_samples_per_second": 29.775, |
| "eval_specificity": 0.6496394230769231, |
| "eval_steps_per_second": 0.234, |
| "eval_true_negatives": 2162, |
| "eval_true_positives": 11512, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.1975051975051976, |
| "step": 1152, |
| "train_accuracy": 0.935, |
| "train_auc": 0.9640254927047857, |
| "train_f1": 0.9596523898199876, |
| "train_false_negatives": 16, |
| "train_false_positives": 49, |
| "train_loss": 0.1742788702249527, |
| "train_precision": 0.940389294403893, |
| "train_recall": 0.9797211660329531, |
| "train_runtime": 33.4133, |
| "train_samples_per_second": 29.928, |
| "train_specificity": 0.7677725118483413, |
| "train_steps_per_second": 0.239, |
| "train_true_negatives": 162, |
| "train_true_positives": 773 |
| }, |
| { |
| "epoch": 1.2474012474012475, |
| "grad_norm": 21.5, |
| "learning_rate": 4.185912240184758e-06, |
| "loss": 0.2036, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2993762993762994, |
| "grad_norm": 20.125, |
| "learning_rate": 3.897228637413395e-06, |
| "loss": 0.2052, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3513513513513513, |
| "grad_norm": 13.9375, |
| "learning_rate": 3.6085450346420327e-06, |
| "loss": 0.2054, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4033264033264032, |
| "grad_norm": 25.125, |
| "learning_rate": 3.31986143187067e-06, |
| "loss": 0.196, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4553014553014554, |
| "grad_norm": 16.75, |
| "learning_rate": 3.0311778290993072e-06, |
| "loss": 0.1987, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.496881496881497, |
| "eval_accuracy": 0.9001820782936663, |
| "eval_auc": 0.9344650070818704, |
| "eval_f1": 0.9372829417773239, |
| "eval_false_negatives": 580, |
| "eval_false_positives": 955, |
| "eval_loss": 0.256587952375412, |
| "eval_precision": 0.9231388329979879, |
| "eval_recall": 0.9518672199170124, |
| "eval_runtime": 516.6281, |
| "eval_samples_per_second": 29.766, |
| "eval_specificity": 0.7130408653846154, |
| "eval_steps_per_second": 0.234, |
| "eval_true_negatives": 2373, |
| "eval_true_positives": 11470, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.496881496881497, |
| "step": 1440, |
| "train_accuracy": 0.931, |
| "train_auc": 0.9717205013621509, |
| "train_f1": 0.9559105431309904, |
| "train_false_negatives": 23, |
| "train_false_positives": 46, |
| "train_loss": 0.17544881999492645, |
| "train_precision": 0.9420654911838791, |
| "train_recall": 0.9701686121919585, |
| "train_runtime": 33.4556, |
| "train_samples_per_second": 29.89, |
| "train_specificity": 0.7991266375545851, |
| "train_steps_per_second": 0.239, |
| "train_true_negatives": 183, |
| "train_true_positives": 748 |
| }, |
| { |
| "epoch": 1.5072765072765073, |
| "grad_norm": 21.625, |
| "learning_rate": 2.742494226327945e-06, |
| "loss": 0.1915, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5592515592515592, |
| "grad_norm": 27.625, |
| "learning_rate": 2.453810623556582e-06, |
| "loss": 0.1904, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6112266112266114, |
| "grad_norm": 19.625, |
| "learning_rate": 2.1651270207852194e-06, |
| "loss": 0.1888, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6632016632016633, |
| "grad_norm": 18.5, |
| "learning_rate": 1.876443418013857e-06, |
| "loss": 0.1863, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7151767151767152, |
| "grad_norm": 23.25, |
| "learning_rate": 1.5877598152424944e-06, |
| "loss": 0.1854, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.7671517671517671, |
| "grad_norm": 12.5625, |
| "learning_rate": 1.2990762124711317e-06, |
| "loss": 0.1911, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.7962577962577964, |
| "eval_accuracy": 0.9045389517492521, |
| "eval_auc": 0.9388098093879668, |
| "eval_f1": 0.9399001064439532, |
| "eval_false_negatives": 571, |
| "eval_false_positives": 897, |
| "eval_loss": 0.24762538075447083, |
| "eval_precision": 0.9275210084033614, |
| "eval_recall": 0.9526141078838174, |
| "eval_runtime": 516.4828, |
| "eval_samples_per_second": 29.774, |
| "eval_specificity": 0.73046875, |
| "eval_steps_per_second": 0.234, |
| "eval_true_negatives": 2431, |
| "eval_true_positives": 11479, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.7962577962577964, |
| "step": 1728, |
| "train_accuracy": 0.952, |
| "train_auc": 0.9798224148344634, |
| "train_f1": 0.9702970297029703, |
| "train_false_negatives": 15, |
| "train_false_positives": 33, |
| "train_loss": 0.13925302028656006, |
| "train_precision": 0.9596083231334149, |
| "train_recall": 0.981226533166458, |
| "train_runtime": 33.5792, |
| "train_samples_per_second": 29.78, |
| "train_specificity": 0.835820895522388, |
| "train_steps_per_second": 0.238, |
| "train_true_negatives": 168, |
| "train_true_positives": 784 |
| }, |
| { |
| "epoch": 1.819126819126819, |
| "grad_norm": 18.125, |
| "learning_rate": 1.0103926096997691e-06, |
| "loss": 0.1843, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.871101871101871, |
| "grad_norm": 19.0, |
| "learning_rate": 7.217090069284065e-07, |
| "loss": 0.1866, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 19.125, |
| "learning_rate": 4.330254041570439e-07, |
| "loss": 0.1778, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.975051975051975, |
| "grad_norm": 26.875, |
| "learning_rate": 1.443418013856813e-07, |
| "loss": 0.1802, |
| "step": 1900 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1924, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 288, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.054906836624094e+19, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|