Text Generation
Transformers
Safetensors
Turkish
English
phi3
causal-lm
turkish
syko
text-generation-inference
Instructions to use SykoSLM/SykoLLM-V6.1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use SykoSLM/SykoLLM-V6.1 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="SykoSLM/SykoLLM-V6.1")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("SykoSLM/SykoLLM-V6.1") model = AutoModelForCausalLM.from_pretrained("SykoSLM/SykoLLM-V6.1") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use SykoSLM/SykoLLM-V6.1 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "SykoSLM/SykoLLM-V6.1" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SykoSLM/SykoLLM-V6.1", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/SykoSLM/SykoLLM-V6.1
- SGLang
How to use SykoSLM/SykoLLM-V6.1 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "SykoSLM/SykoLLM-V6.1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SykoSLM/SykoLLM-V6.1", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "SykoSLM/SykoLLM-V6.1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "SykoSLM/SykoLLM-V6.1", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use SykoSLM/SykoLLM-V6.1 with Docker Model Runner:
docker model run hf.co/SykoSLM/SykoLLM-V6.1
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.30526315789473685, | |
| "eval_steps": 500, | |
| "global_step": 2900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010526315789473684, | |
| "grad_norm": 0.34683918952941895, | |
| "learning_rate": 4.815e-06, | |
| "loss": 1.7081634521484375, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002105263157894737, | |
| "grad_norm": 0.3531605899333954, | |
| "learning_rate": 1.0165e-05, | |
| "loss": 1.656758689880371, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003157894736842105, | |
| "grad_norm": 0.3394385576248169, | |
| "learning_rate": 1.5515e-05, | |
| "loss": 1.633415985107422, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.004210526315789474, | |
| "grad_norm": 0.37144365906715393, | |
| "learning_rate": 2.0865e-05, | |
| "loss": 1.6722015380859374, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.005263157894736842, | |
| "grad_norm": 0.3785368800163269, | |
| "learning_rate": 2.6215e-05, | |
| "loss": 1.6771835327148437, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00631578947368421, | |
| "grad_norm": 0.3667239546775818, | |
| "learning_rate": 3.1565e-05, | |
| "loss": 1.6657798767089844, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.007368421052631579, | |
| "grad_norm": 0.35704323649406433, | |
| "learning_rate": 3.6914999999999995e-05, | |
| "loss": 1.637792205810547, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.008421052631578947, | |
| "grad_norm": 0.3614155352115631, | |
| "learning_rate": 4.2265e-05, | |
| "loss": 1.6456287384033204, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.009473684210526316, | |
| "grad_norm": 0.3477347195148468, | |
| "learning_rate": 4.7615e-05, | |
| "loss": 1.6376474380493165, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.010526315789473684, | |
| "grad_norm": 0.3474464416503906, | |
| "learning_rate": 5.2965e-05, | |
| "loss": 1.6883708953857421, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.011578947368421053, | |
| "grad_norm": 0.3608642518520355, | |
| "learning_rate": 5.831500000000001e-05, | |
| "loss": 1.7032821655273438, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01263157894736842, | |
| "grad_norm": 0.35934099555015564, | |
| "learning_rate": 6.3665e-05, | |
| "loss": 1.597799301147461, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01368421052631579, | |
| "grad_norm": 0.38500702381134033, | |
| "learning_rate": 6.9015e-05, | |
| "loss": 1.6558387756347657, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.014736842105263158, | |
| "grad_norm": 0.3602914810180664, | |
| "learning_rate": 7.4365e-05, | |
| "loss": 1.6937145233154296, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.015789473684210527, | |
| "grad_norm": 0.36331596970558167, | |
| "learning_rate": 7.9715e-05, | |
| "loss": 1.5696943283081055, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.016842105263157894, | |
| "grad_norm": 0.3533744215965271, | |
| "learning_rate": 8.5065e-05, | |
| "loss": 1.702765655517578, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.017894736842105262, | |
| "grad_norm": 0.3546121120452881, | |
| "learning_rate": 9.0415e-05, | |
| "loss": 1.6325836181640625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.018947368421052633, | |
| "grad_norm": 0.3867342472076416, | |
| "learning_rate": 9.5765e-05, | |
| "loss": 1.636269760131836, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.3859454393386841, | |
| "learning_rate": 0.000101115, | |
| "loss": 1.7112407684326172, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.021052631578947368, | |
| "grad_norm": 0.3688015937805176, | |
| "learning_rate": 0.000106465, | |
| "loss": 1.6455875396728517, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.022105263157894735, | |
| "grad_norm": 0.3693976402282715, | |
| "learning_rate": 0.00010699975274657343, | |
| "loss": 1.6878833770751953, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.023157894736842106, | |
| "grad_norm": 0.3748058080673218, | |
| "learning_rate": 0.00010699889804630456, | |
| "loss": 1.6435226440429687, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.024210526315789474, | |
| "grad_norm": 0.3806576430797577, | |
| "learning_rate": 0.00010699743285643286, | |
| "loss": 1.7004669189453125, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.02526315789473684, | |
| "grad_norm": 0.3829317092895508, | |
| "learning_rate": 0.00010699535719367796, | |
| "loss": 1.6831859588623046, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.02631578947368421, | |
| "grad_norm": 0.4069920480251312, | |
| "learning_rate": 0.00010699267108172577, | |
| "loss": 1.6417667388916015, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02736842105263158, | |
| "grad_norm": 0.37535834312438965, | |
| "learning_rate": 0.00010698937455122825, | |
| "loss": 1.640174102783203, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.028421052631578948, | |
| "grad_norm": 0.3992610573768616, | |
| "learning_rate": 0.0001069854676398029, | |
| "loss": 1.6665351867675782, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.029473684210526315, | |
| "grad_norm": 0.3680964708328247, | |
| "learning_rate": 0.0001069809503920325, | |
| "loss": 1.7111568450927734, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.030526315789473683, | |
| "grad_norm": 0.4049525856971741, | |
| "learning_rate": 0.00010697582285946452, | |
| "loss": 1.6817201614379882, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.031578947368421054, | |
| "grad_norm": 0.38598954677581787, | |
| "learning_rate": 0.00010697008510061057, | |
| "loss": 1.6445945739746093, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03263157894736842, | |
| "grad_norm": 0.39688920974731445, | |
| "learning_rate": 0.00010696373718094565, | |
| "loss": 1.688629150390625, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.03368421052631579, | |
| "grad_norm": 0.3762621581554413, | |
| "learning_rate": 0.00010695677917290751, | |
| "loss": 1.6273818969726563, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.034736842105263156, | |
| "grad_norm": 0.3470601737499237, | |
| "learning_rate": 0.00010694921115589574, | |
| "loss": 1.690780258178711, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.035789473684210524, | |
| "grad_norm": 0.38783422112464905, | |
| "learning_rate": 0.00010694103321627094, | |
| "loss": 1.6885700225830078, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.03684210526315789, | |
| "grad_norm": 0.3837421238422394, | |
| "learning_rate": 0.00010693224544735366, | |
| "loss": 1.670220184326172, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.037894736842105266, | |
| "grad_norm": 0.3634503185749054, | |
| "learning_rate": 0.00010692284794942337, | |
| "loss": 1.6357498168945312, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.03894736842105263, | |
| "grad_norm": 0.39452844858169556, | |
| "learning_rate": 0.00010691284082971734, | |
| "loss": 1.6791454315185548, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.38304150104522705, | |
| "learning_rate": 0.00010690222420242937, | |
| "loss": 1.6702400207519532, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.04105263157894737, | |
| "grad_norm": 0.3755001723766327, | |
| "learning_rate": 0.00010689099818870848, | |
| "loss": 1.6558124542236328, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.042105263157894736, | |
| "grad_norm": 0.3776380121707916, | |
| "learning_rate": 0.0001068791629166576, | |
| "loss": 1.6616518020629882, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0431578947368421, | |
| "grad_norm": 0.3697650134563446, | |
| "learning_rate": 0.00010686671852133208, | |
| "loss": 1.6540897369384766, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.04421052631578947, | |
| "grad_norm": 0.3718468248844147, | |
| "learning_rate": 0.00010685366514473802, | |
| "loss": 1.6041250228881836, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.045263157894736845, | |
| "grad_norm": 0.38397344946861267, | |
| "learning_rate": 0.0001068400029358309, | |
| "loss": 1.677585983276367, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.04631578947368421, | |
| "grad_norm": 0.37290486693382263, | |
| "learning_rate": 0.00010682573205051367, | |
| "loss": 1.6698143005371093, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.04736842105263158, | |
| "grad_norm": 0.37734609842300415, | |
| "learning_rate": 0.00010681085265163504, | |
| "loss": 1.6791515350341797, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.04842105263157895, | |
| "grad_norm": 0.354443222284317, | |
| "learning_rate": 0.00010679536490898761, | |
| "loss": 1.6450014114379883, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.049473684210526316, | |
| "grad_norm": 0.3799300491809845, | |
| "learning_rate": 0.00010677926899930603, | |
| "loss": 1.6635103225708008, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.05052631578947368, | |
| "grad_norm": 0.3844967484474182, | |
| "learning_rate": 0.00010676256510626478, | |
| "loss": 1.6978870391845704, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.05157894736842105, | |
| "grad_norm": 0.38755500316619873, | |
| "learning_rate": 0.00010674525342047629, | |
| "loss": 1.6842260360717773, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.05263157894736842, | |
| "grad_norm": 0.39443737268447876, | |
| "learning_rate": 0.00010672733413948862, | |
| "loss": 1.6408458709716798, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05368421052631579, | |
| "grad_norm": 0.4008043110370636, | |
| "learning_rate": 0.00010670880746778328, | |
| "loss": 1.61962833404541, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.05473684210526316, | |
| "grad_norm": 0.3917809724807739, | |
| "learning_rate": 0.00010668967361677283, | |
| "loss": 1.718182373046875, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.05578947368421053, | |
| "grad_norm": 0.364409476518631, | |
| "learning_rate": 0.00010666993280479856, | |
| "loss": 1.7204322814941406, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.056842105263157895, | |
| "grad_norm": 0.39319396018981934, | |
| "learning_rate": 0.00010664958525712792, | |
| "loss": 1.6448682785034179, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.05789473684210526, | |
| "grad_norm": 0.3864227533340454, | |
| "learning_rate": 0.00010662863120595196, | |
| "loss": 1.7400585174560548, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.05894736842105263, | |
| "grad_norm": 0.37699612975120544, | |
| "learning_rate": 0.00010660707089038273, | |
| "loss": 1.6591960906982421, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.3808913230895996, | |
| "learning_rate": 0.00010658490455645052, | |
| "loss": 1.63150634765625, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.061052631578947365, | |
| "grad_norm": 0.38882365822792053, | |
| "learning_rate": 0.00010656213245710098, | |
| "loss": 1.6896860122680664, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.06210526315789474, | |
| "grad_norm": 0.3772079050540924, | |
| "learning_rate": 0.0001065387548521924, | |
| "loss": 1.7085845947265625, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.06315789473684211, | |
| "grad_norm": 0.37180712819099426, | |
| "learning_rate": 0.00010651477200849263, | |
| "loss": 1.7532657623291015, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.06421052631578947, | |
| "grad_norm": 0.3878546357154846, | |
| "learning_rate": 0.00010649018419967597, | |
| "loss": 1.6636667251586914, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.06526315789473684, | |
| "grad_norm": 0.39751365780830383, | |
| "learning_rate": 0.00010646499170632023, | |
| "loss": 1.6579233169555665, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.06631578947368422, | |
| "grad_norm": 0.3831867277622223, | |
| "learning_rate": 0.00010643919481590337, | |
| "loss": 1.6426708221435546, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.06736842105263158, | |
| "grad_norm": 0.3749397099018097, | |
| "learning_rate": 0.00010641279382280032, | |
| "loss": 1.7154060363769532, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.06842105263157895, | |
| "grad_norm": 0.37839797139167786, | |
| "learning_rate": 0.00010638578902827957, | |
| "loss": 1.7217548370361329, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.06947368421052631, | |
| "grad_norm": 0.3703754246234894, | |
| "learning_rate": 0.00010635818074049972, | |
| "loss": 1.7110353469848634, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.07052631578947369, | |
| "grad_norm": 0.36747097969055176, | |
| "learning_rate": 0.00010632996927450597, | |
| "loss": 1.651369857788086, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.07157894736842105, | |
| "grad_norm": 0.36606892943382263, | |
| "learning_rate": 0.00010630115495222664, | |
| "loss": 1.6909339904785157, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.07263157894736842, | |
| "grad_norm": 0.3871472179889679, | |
| "learning_rate": 0.00010627173810246927, | |
| "loss": 1.6740509033203126, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.07368421052631578, | |
| "grad_norm": 0.3820892572402954, | |
| "learning_rate": 0.00010624171906091708, | |
| "loss": 1.7049301147460938, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.07473684210526316, | |
| "grad_norm": 0.38060277700424194, | |
| "learning_rate": 0.00010621109817012501, | |
| "loss": 1.7255819320678711, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.07578947368421053, | |
| "grad_norm": 0.37024298310279846, | |
| "learning_rate": 0.00010617987577951588, | |
| "loss": 1.707390594482422, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.07684210526315789, | |
| "grad_norm": 0.3976726233959198, | |
| "learning_rate": 0.0001061480522453764, | |
| "loss": 1.6445907592773437, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.07789473684210527, | |
| "grad_norm": 0.3904809057712555, | |
| "learning_rate": 0.00010611562793085301, | |
| "loss": 1.7427913665771484, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.07894736842105263, | |
| "grad_norm": 0.37776583433151245, | |
| "learning_rate": 0.00010608260320594787, | |
| "loss": 1.6211050033569336, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.382707804441452, | |
| "learning_rate": 0.00010604897844751458, | |
| "loss": 1.6817436218261719, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.08105263157894736, | |
| "grad_norm": 0.3894830048084259, | |
| "learning_rate": 0.00010601475403925381, | |
| "loss": 1.747372817993164, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.08210526315789474, | |
| "grad_norm": 0.38454341888427734, | |
| "learning_rate": 0.00010597993037170907, | |
| "loss": 1.667810821533203, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.08315789473684211, | |
| "grad_norm": 0.3924828767776489, | |
| "learning_rate": 0.00010594450784226211, | |
| "loss": 1.689559555053711, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.08421052631578947, | |
| "grad_norm": 0.390747994184494, | |
| "learning_rate": 0.0001059084868551285, | |
| "loss": 1.687558364868164, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.08526315789473685, | |
| "grad_norm": 0.38002100586891174, | |
| "learning_rate": 0.0001058718678213529, | |
| "loss": 1.7372432708740235, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.0863157894736842, | |
| "grad_norm": 0.3947979509830475, | |
| "learning_rate": 0.00010583465115880448, | |
| "loss": 1.7141420364379882, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.08736842105263158, | |
| "grad_norm": 0.38964593410491943, | |
| "learning_rate": 0.0001057968372921721, | |
| "loss": 1.6732599258422851, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.08842105263157894, | |
| "grad_norm": 0.3914567828178406, | |
| "learning_rate": 0.00010575842665295942, | |
| "loss": 1.697699737548828, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.08947368421052632, | |
| "grad_norm": 0.3780556917190552, | |
| "learning_rate": 0.00010571941967948013, | |
| "loss": 1.6859580993652343, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.09052631578947369, | |
| "grad_norm": 0.3804113268852234, | |
| "learning_rate": 0.00010567981681685271, | |
| "loss": 1.630574607849121, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.09157894736842105, | |
| "grad_norm": 0.3920338451862335, | |
| "learning_rate": 0.0001056396185169956, | |
| "loss": 1.701805877685547, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.09263157894736843, | |
| "grad_norm": 0.3645232021808624, | |
| "learning_rate": 0.00010559882523862185, | |
| "loss": 1.6626638412475585, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.09368421052631579, | |
| "grad_norm": 0.39647483825683594, | |
| "learning_rate": 0.000105557437447234, | |
| "loss": 1.657071876525879, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.09473684210526316, | |
| "grad_norm": 0.3784042298793793, | |
| "learning_rate": 0.00010551545561511872, | |
| "loss": 1.6789131164550781, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.09578947368421052, | |
| "grad_norm": 0.3799436390399933, | |
| "learning_rate": 0.00010547288022134141, | |
| "loss": 1.6874401092529296, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.0968421052631579, | |
| "grad_norm": 0.3979872465133667, | |
| "learning_rate": 0.00010542971175174078, | |
| "loss": 1.7372554779052733, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.09789473684210526, | |
| "grad_norm": 0.3869173228740692, | |
| "learning_rate": 0.0001053859506989233, | |
| "loss": 1.6965164184570312, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.09894736842105263, | |
| "grad_norm": 0.38553228974342346, | |
| "learning_rate": 0.0001053415975622575, | |
| "loss": 1.6804073333740235, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.37855857610702515, | |
| "learning_rate": 0.00010529665284786835, | |
| "loss": 1.7479766845703124, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.10105263157894737, | |
| "grad_norm": 0.36974212527275085, | |
| "learning_rate": 0.00010525111706863153, | |
| "loss": 1.6555421829223633, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.10210526315789474, | |
| "grad_norm": 0.3829262852668762, | |
| "learning_rate": 0.00010520499074416742, | |
| "loss": 1.7271907806396485, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1031578947368421, | |
| "grad_norm": 0.3871605396270752, | |
| "learning_rate": 0.0001051582744008353, | |
| "loss": 1.6716243743896484, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.10421052631578948, | |
| "grad_norm": 0.3923998475074768, | |
| "learning_rate": 0.00010511096857172731, | |
| "loss": 1.6450519561767578, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 0.38333484530448914, | |
| "learning_rate": 0.00010506307379666238, | |
| "loss": 1.6865043640136719, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.10631578947368421, | |
| "grad_norm": 0.38256773352622986, | |
| "learning_rate": 0.00010501459062218, | |
| "loss": 1.6601579666137696, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.10736842105263159, | |
| "grad_norm": 0.3737237751483917, | |
| "learning_rate": 0.00010496551960153409, | |
| "loss": 1.6208690643310546, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.10842105263157895, | |
| "grad_norm": 0.366969496011734, | |
| "learning_rate": 0.00010491586129468662, | |
| "loss": 1.6808839797973634, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.10947368421052632, | |
| "grad_norm": 0.3720376193523407, | |
| "learning_rate": 0.0001048656162683012, | |
| "loss": 1.6338840484619142, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.11052631578947368, | |
| "grad_norm": 0.39924025535583496, | |
| "learning_rate": 0.00010481478509573669, | |
| "loss": 1.652592086791992, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.11157894736842106, | |
| "grad_norm": 0.37709176540374756, | |
| "learning_rate": 0.00010476336835704059, | |
| "loss": 1.6794198989868163, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.11263157894736842, | |
| "grad_norm": 0.382405161857605, | |
| "learning_rate": 0.00010471136663894244, | |
| "loss": 1.702239990234375, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.11368421052631579, | |
| "grad_norm": 0.3955666720867157, | |
| "learning_rate": 0.00010465878053484715, | |
| "loss": 1.625558090209961, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.11473684210526315, | |
| "grad_norm": 0.3984505534172058, | |
| "learning_rate": 0.0001046056106448282, | |
| "loss": 1.7061031341552735, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.11578947368421053, | |
| "grad_norm": 0.37337619066238403, | |
| "learning_rate": 0.00010455185757562081, | |
| "loss": 1.6474536895751952, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.1168421052631579, | |
| "grad_norm": 0.4265633225440979, | |
| "learning_rate": 0.00010449752194061497, | |
| "loss": 1.6948539733886718, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.11789473684210526, | |
| "grad_norm": 0.39065343141555786, | |
| "learning_rate": 0.0001044426043598485, | |
| "loss": 1.6905693054199218, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.11894736842105263, | |
| "grad_norm": 0.3910517692565918, | |
| "learning_rate": 0.00010438710545999999, | |
| "loss": 1.6512699127197266, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.41286537051200867, | |
| "learning_rate": 0.00010433102587438154, | |
| "loss": 1.6904163360595703, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.12105263157894737, | |
| "grad_norm": 0.39058077335357666, | |
| "learning_rate": 0.00010427436624293164, | |
| "loss": 1.6889778137207032, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.12210526315789473, | |
| "grad_norm": 0.40376579761505127, | |
| "learning_rate": 0.00010421712721220786, | |
| "loss": 1.6660743713378907, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.1231578947368421, | |
| "grad_norm": 0.4065842628479004, | |
| "learning_rate": 0.00010415930943537937, | |
| "loss": 1.7282680511474608, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.12421052631578948, | |
| "grad_norm": 0.3935592770576477, | |
| "learning_rate": 0.00010410091357221965, | |
| "loss": 1.7208686828613282, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.12526315789473685, | |
| "grad_norm": 0.3769897520542145, | |
| "learning_rate": 0.00010404194028909876, | |
| "loss": 1.6730665206909179, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.12631578947368421, | |
| "grad_norm": 0.37976640462875366, | |
| "learning_rate": 0.00010398239025897598, | |
| "loss": 1.7071300506591798, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.12736842105263158, | |
| "grad_norm": 0.38293200731277466, | |
| "learning_rate": 0.0001039222641613919, | |
| "loss": 1.7225513458251953, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.12842105263157894, | |
| "grad_norm": 0.3943805694580078, | |
| "learning_rate": 0.00010386156268246077, | |
| "loss": 1.6900711059570312, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.12947368421052632, | |
| "grad_norm": 0.402694970369339, | |
| "learning_rate": 0.00010380028651486271, | |
| "loss": 1.6741355895996093, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.13052631578947368, | |
| "grad_norm": 0.4034770429134369, | |
| "learning_rate": 0.00010373843635783572, | |
| "loss": 1.7251928329467774, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.13157894736842105, | |
| "grad_norm": 0.4223957359790802, | |
| "learning_rate": 0.00010367601291716777, | |
| "loss": 1.7350204467773438, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.13263157894736843, | |
| "grad_norm": 0.3636983633041382, | |
| "learning_rate": 0.0001036130169051887, | |
| "loss": 1.6685359954833985, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.1336842105263158, | |
| "grad_norm": 0.36913859844207764, | |
| "learning_rate": 0.00010354944904076209, | |
| "loss": 1.6918949127197265, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.13473684210526315, | |
| "grad_norm": 0.3916381597518921, | |
| "learning_rate": 0.00010348531004927711, | |
| "loss": 1.6259313583374024, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.13578947368421052, | |
| "grad_norm": 0.38772350549697876, | |
| "learning_rate": 0.00010342060066264016, | |
| "loss": 1.7148677825927734, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.1368421052631579, | |
| "grad_norm": 0.38373488187789917, | |
| "learning_rate": 0.00010335532161926664, | |
| "loss": 1.6328174591064453, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.13789473684210526, | |
| "grad_norm": 0.3877631723880768, | |
| "learning_rate": 0.00010328947366407237, | |
| "loss": 1.646784210205078, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.13894736842105262, | |
| "grad_norm": 0.39882156252861023, | |
| "learning_rate": 0.00010322305754846519, | |
| "loss": 1.6600376129150392, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.40457776188850403, | |
| "learning_rate": 0.00010315607403033641, | |
| "loss": 1.669814109802246, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.14105263157894737, | |
| "grad_norm": 0.3948962688446045, | |
| "learning_rate": 0.00010308852387405208, | |
| "loss": 1.715940284729004, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.14210526315789473, | |
| "grad_norm": 0.3921595513820648, | |
| "learning_rate": 0.00010302040785044425, | |
| "loss": 1.6944934844970703, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.1431578947368421, | |
| "grad_norm": 0.3857240676879883, | |
| "learning_rate": 0.00010295172673680234, | |
| "loss": 1.6900419235229491, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.14421052631578948, | |
| "grad_norm": 0.38249680399894714, | |
| "learning_rate": 0.00010288248131686406, | |
| "loss": 1.7138862609863281, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.14526315789473684, | |
| "grad_norm": 0.40845534205436707, | |
| "learning_rate": 0.00010281267238080664, | |
| "loss": 1.7212867736816406, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.1463157894736842, | |
| "grad_norm": 0.3911115229129791, | |
| "learning_rate": 0.00010274230072523764, | |
| "loss": 1.7087575912475585, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.14736842105263157, | |
| "grad_norm": 0.3967211842536926, | |
| "learning_rate": 0.00010267136715318605, | |
| "loss": 1.675175094604492, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.14842105263157895, | |
| "grad_norm": 0.3820992410182953, | |
| "learning_rate": 0.00010259987247409298, | |
| "loss": 1.665155792236328, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.14947368421052631, | |
| "grad_norm": 0.40317046642303467, | |
| "learning_rate": 0.00010252781750380252, | |
| "loss": 1.6777839660644531, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.15052631578947367, | |
| "grad_norm": 0.39026641845703125, | |
| "learning_rate": 0.00010245520306455232, | |
| "loss": 1.6641407012939453, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.15157894736842106, | |
| "grad_norm": 0.38703930377960205, | |
| "learning_rate": 0.00010238202998496432, | |
| "loss": 1.7006916046142577, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.15263157894736842, | |
| "grad_norm": 0.3920949697494507, | |
| "learning_rate": 0.00010230829910003525, | |
| "loss": 1.6237125396728516, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.15368421052631578, | |
| "grad_norm": 0.40310102701187134, | |
| "learning_rate": 0.00010223401125112709, | |
| "loss": 1.693703842163086, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.15473684210526314, | |
| "grad_norm": 0.3895237147808075, | |
| "learning_rate": 0.00010215916728595746, | |
| "loss": 1.6554393768310547, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.15578947368421053, | |
| "grad_norm": 0.3830355703830719, | |
| "learning_rate": 0.00010208376805858997, | |
| "loss": 1.6817665100097656, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.1568421052631579, | |
| "grad_norm": 0.4044099450111389, | |
| "learning_rate": 0.00010200781442942451, | |
| "loss": 1.740530776977539, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.15789473684210525, | |
| "grad_norm": 0.37278082966804504, | |
| "learning_rate": 0.00010193130726518736, | |
| "loss": 1.7269683837890626, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.15894736842105264, | |
| "grad_norm": 0.3909358084201813, | |
| "learning_rate": 0.00010185424743892131, | |
| "loss": 1.674229049682617, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.3877439796924591, | |
| "learning_rate": 0.00010177663582997574, | |
| "loss": 1.6566276550292969, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.16105263157894736, | |
| "grad_norm": 0.3673596978187561, | |
| "learning_rate": 0.00010169847332399658, | |
| "loss": 1.6969722747802733, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.16210526315789472, | |
| "grad_norm": 0.428408145904541, | |
| "learning_rate": 0.00010161976081291614, | |
| "loss": 1.6617691040039062, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.1631578947368421, | |
| "grad_norm": 0.38442328572273254, | |
| "learning_rate": 0.00010154049919494305, | |
| "loss": 1.7180919647216797, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.16421052631578947, | |
| "grad_norm": 0.41423359513282776, | |
| "learning_rate": 0.00010146068937455184, | |
| "loss": 1.7110111236572265, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.16526315789473683, | |
| "grad_norm": 0.3815020024776459, | |
| "learning_rate": 0.00010138033226247282, | |
| "loss": 1.6620532989501953, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.16631578947368422, | |
| "grad_norm": 0.38987597823143005, | |
| "learning_rate": 0.00010129942877568153, | |
| "loss": 1.6376758575439454, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.16736842105263158, | |
| "grad_norm": 0.37103158235549927, | |
| "learning_rate": 0.00010121797983738831, | |
| "loss": 1.6269058227539062, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.16842105263157894, | |
| "grad_norm": 0.39582741260528564, | |
| "learning_rate": 0.00010113598637702785, | |
| "loss": 1.6544437408447266, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.1694736842105263, | |
| "grad_norm": 0.3875832259654999, | |
| "learning_rate": 0.0001010534493302485, | |
| "loss": 1.69503173828125, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.1705263157894737, | |
| "grad_norm": 0.40506550669670105, | |
| "learning_rate": 0.00010097036963890156, | |
| "loss": 1.6826278686523437, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.17157894736842105, | |
| "grad_norm": 0.39827048778533936, | |
| "learning_rate": 0.00010088674825103067, | |
| "loss": 1.6500736236572267, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.1726315789473684, | |
| "grad_norm": 0.3786768913269043, | |
| "learning_rate": 0.00010080258612086083, | |
| "loss": 1.6809326171875, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.1736842105263158, | |
| "grad_norm": 0.40326225757598877, | |
| "learning_rate": 0.00010071788420878764, | |
| "loss": 1.7387603759765624, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.17473684210526316, | |
| "grad_norm": 0.3733818829059601, | |
| "learning_rate": 0.00010063264348136629, | |
| "loss": 1.6930301666259766, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.17578947368421052, | |
| "grad_norm": 0.4019014239311218, | |
| "learning_rate": 0.00010054686491130048, | |
| "loss": 1.665353012084961, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.17684210526315788, | |
| "grad_norm": 0.3994007110595703, | |
| "learning_rate": 0.00010046054947743142, | |
| "loss": 1.7481708526611328, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.17789473684210527, | |
| "grad_norm": 0.40046176314353943, | |
| "learning_rate": 0.00010037369816472658, | |
| "loss": 1.6684654235839844, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.17894736842105263, | |
| "grad_norm": 0.39062178134918213, | |
| "learning_rate": 0.00010028631196426851, | |
| "loss": 1.6636728286743163, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.40030282735824585, | |
| "learning_rate": 0.0001001983918732435, | |
| "loss": 1.6382123947143554, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.18105263157894738, | |
| "grad_norm": 0.38396012783050537, | |
| "learning_rate": 0.00010010993889493013, | |
| "loss": 1.6094409942626953, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.18210526315789474, | |
| "grad_norm": 0.3969299793243408, | |
| "learning_rate": 0.000100020954038688, | |
| "loss": 1.6550315856933593, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.1831578947368421, | |
| "grad_norm": 0.39174884557724, | |
| "learning_rate": 9.993143831994603e-05, | |
| "loss": 1.7123249053955079, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.18421052631578946, | |
| "grad_norm": 0.38760584592819214, | |
| "learning_rate": 9.984139276019098e-05, | |
| "loss": 1.6742156982421874, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.18526315789473685, | |
| "grad_norm": 0.3817841410636902, | |
| "learning_rate": 9.975081838695576e-05, | |
| "loss": 1.641263771057129, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.1863157894736842, | |
| "grad_norm": 0.4085705578327179, | |
| "learning_rate": 9.965971623380768e-05, | |
| "loss": 1.7673213958740235, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.18736842105263157, | |
| "grad_norm": 0.38965287804603577, | |
| "learning_rate": 9.956808734033671e-05, | |
| "loss": 1.770319366455078, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.18842105263157893, | |
| "grad_norm": 0.3770400881767273, | |
| "learning_rate": 9.947593275214358e-05, | |
| "loss": 1.6587142944335938, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.18947368421052632, | |
| "grad_norm": 0.40959247946739197, | |
| "learning_rate": 9.938325352082786e-05, | |
| "loss": 1.6820697784423828, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.19052631578947368, | |
| "grad_norm": 0.37764784693717957, | |
| "learning_rate": 9.929005070397595e-05, | |
| "loss": 1.6965087890625, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.19157894736842104, | |
| "grad_norm": 0.37487778067588806, | |
| "learning_rate": 9.9196325365149e-05, | |
| "loss": 1.6261119842529297, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.19263157894736843, | |
| "grad_norm": 0.4048542380332947, | |
| "learning_rate": 9.910207857387085e-05, | |
| "loss": 1.7076032638549805, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.1936842105263158, | |
| "grad_norm": 0.37118133902549744, | |
| "learning_rate": 9.90073114056157e-05, | |
| "loss": 1.70123233795166, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.19473684210526315, | |
| "grad_norm": 0.38945528864860535, | |
| "learning_rate": 9.891202494179595e-05, | |
| "loss": 1.7137296676635743, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.1957894736842105, | |
| "grad_norm": 0.39081960916519165, | |
| "learning_rate": 9.881622026974978e-05, | |
| "loss": 1.6556056976318358, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.1968421052631579, | |
| "grad_norm": 0.4000365436077118, | |
| "learning_rate": 9.871989848272882e-05, | |
| "loss": 1.708022689819336, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.19789473684210526, | |
| "grad_norm": 0.38972243666648865, | |
| "learning_rate": 9.86230606798856e-05, | |
| "loss": 1.6936985015869142, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.19894736842105262, | |
| "grad_norm": 0.4023416340351105, | |
| "learning_rate": 9.852570796626104e-05, | |
| "loss": 1.6013282775878905, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.37790361046791077, | |
| "learning_rate": 9.842784145277185e-05, | |
| "loss": 1.678757095336914, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.20105263157894737, | |
| "grad_norm": 0.4072909653186798, | |
| "learning_rate": 9.832946225619782e-05, | |
| "loss": 1.6550043106079102, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.20210526315789473, | |
| "grad_norm": 0.4222109317779541, | |
| "learning_rate": 9.823057149916913e-05, | |
| "loss": 1.6794788360595703, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.2031578947368421, | |
| "grad_norm": 0.3997038006782532, | |
| "learning_rate": 9.813117031015348e-05, | |
| "loss": 1.708123779296875, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.20421052631578948, | |
| "grad_norm": 0.387678861618042, | |
| "learning_rate": 9.803125982344328e-05, | |
| "loss": 1.694279098510742, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.20526315789473684, | |
| "grad_norm": 0.41388800740242004, | |
| "learning_rate": 9.793084117914258e-05, | |
| "loss": 1.698614501953125, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2063157894736842, | |
| "grad_norm": 0.38706713914871216, | |
| "learning_rate": 9.782991552315424e-05, | |
| "loss": 1.702214813232422, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.2073684210526316, | |
| "grad_norm": 0.3965074419975281, | |
| "learning_rate": 9.772848400716673e-05, | |
| "loss": 1.6214000701904296, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.20842105263157895, | |
| "grad_norm": 0.39218032360076904, | |
| "learning_rate": 9.762654778864099e-05, | |
| "loss": 1.681211280822754, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.2094736842105263, | |
| "grad_norm": 0.4117305874824524, | |
| "learning_rate": 9.752410803079726e-05, | |
| "loss": 1.6745601654052735, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 0.3973471224308014, | |
| "learning_rate": 9.742116590260185e-05, | |
| "loss": 1.6459293365478516, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21157894736842106, | |
| "grad_norm": 0.3847576975822449, | |
| "learning_rate": 9.731772257875366e-05, | |
| "loss": 1.6581769943237306, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.21263157894736842, | |
| "grad_norm": 0.4136882424354553, | |
| "learning_rate": 9.721377923967092e-05, | |
| "loss": 1.7314947128295899, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.21368421052631578, | |
| "grad_norm": 0.37820902466773987, | |
| "learning_rate": 9.710933707147764e-05, | |
| "loss": 1.7070299148559571, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.21473684210526317, | |
| "grad_norm": 0.39630916714668274, | |
| "learning_rate": 9.700439726599012e-05, | |
| "loss": 1.6553241729736328, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.21578947368421053, | |
| "grad_norm": 0.3991798758506775, | |
| "learning_rate": 9.68989610207033e-05, | |
| "loss": 1.7385829925537108, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.2168421052631579, | |
| "grad_norm": 0.4119565188884735, | |
| "learning_rate": 9.679302953877712e-05, | |
| "loss": 1.71380615234375, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.21789473684210525, | |
| "grad_norm": 0.40724804997444153, | |
| "learning_rate": 9.66866040290228e-05, | |
| "loss": 1.6676467895507812, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.21894736842105264, | |
| "grad_norm": 0.4088967442512512, | |
| "learning_rate": 9.657968570588905e-05, | |
| "loss": 1.674250030517578, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.40387439727783203, | |
| "learning_rate": 9.64722757894482e-05, | |
| "loss": 1.676458740234375, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.22105263157894736, | |
| "grad_norm": 0.4028227925300598, | |
| "learning_rate": 9.636437550538226e-05, | |
| "loss": 1.6708587646484374, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.22210526315789475, | |
| "grad_norm": 0.40027210116386414, | |
| "learning_rate": 9.625598608496895e-05, | |
| "loss": 1.6314043045043944, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.2231578947368421, | |
| "grad_norm": 0.386688768863678, | |
| "learning_rate": 9.614710876506763e-05, | |
| "loss": 1.725076675415039, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.22421052631578947, | |
| "grad_norm": 0.4061787724494934, | |
| "learning_rate": 9.603774478810528e-05, | |
| "loss": 1.6826349258422852, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.22526315789473683, | |
| "grad_norm": 0.40370142459869385, | |
| "learning_rate": 9.592789540206218e-05, | |
| "loss": 1.649374771118164, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.22631578947368422, | |
| "grad_norm": 0.40586093068122864, | |
| "learning_rate": 9.581756186045777e-05, | |
| "loss": 1.6614540100097657, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.22736842105263158, | |
| "grad_norm": 0.3933681547641754, | |
| "learning_rate": 9.570674542233628e-05, | |
| "loss": 1.6946598052978517, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.22842105263157894, | |
| "grad_norm": 0.3825010359287262, | |
| "learning_rate": 9.559544735225242e-05, | |
| "loss": 1.6574283599853517, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.2294736842105263, | |
| "grad_norm": 0.4000436067581177, | |
| "learning_rate": 9.548366892025693e-05, | |
| "loss": 1.673634910583496, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.2305263157894737, | |
| "grad_norm": 0.3942500054836273, | |
| "learning_rate": 9.537141140188206e-05, | |
| "loss": 1.621174430847168, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.23157894736842105, | |
| "grad_norm": 0.3846987783908844, | |
| "learning_rate": 9.525867607812708e-05, | |
| "loss": 1.6244104385375977, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2326315789473684, | |
| "grad_norm": 0.38483455777168274, | |
| "learning_rate": 9.514546423544357e-05, | |
| "loss": 1.687708282470703, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2336842105263158, | |
| "grad_norm": 0.4134112000465393, | |
| "learning_rate": 9.503177716572082e-05, | |
| "loss": 1.7054229736328126, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.23473684210526316, | |
| "grad_norm": 0.3780292868614197, | |
| "learning_rate": 9.491761616627101e-05, | |
| "loss": 1.6283729553222657, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.23578947368421052, | |
| "grad_norm": 0.40246784687042236, | |
| "learning_rate": 9.480298253981456e-05, | |
| "loss": 1.7036407470703125, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.23684210526315788, | |
| "grad_norm": 0.4002091884613037, | |
| "learning_rate": 9.468787759446502e-05, | |
| "loss": 1.7064756393432616, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.23789473684210527, | |
| "grad_norm": 0.40926146507263184, | |
| "learning_rate": 9.457230264371439e-05, | |
| "loss": 1.6858642578125, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.23894736842105263, | |
| "grad_norm": 0.41373902559280396, | |
| "learning_rate": 9.445625900641796e-05, | |
| "loss": 1.655508804321289, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.38966718316078186, | |
| "learning_rate": 9.433974800677935e-05, | |
| "loss": 1.6741256713867188, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.24105263157894738, | |
| "grad_norm": 0.4069412648677826, | |
| "learning_rate": 9.422277097433537e-05, | |
| "loss": 1.6685916900634765, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.24210526315789474, | |
| "grad_norm": 0.3916907012462616, | |
| "learning_rate": 9.410532924394083e-05, | |
| "loss": 1.6491849899291993, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.2431578947368421, | |
| "grad_norm": 0.39959436655044556, | |
| "learning_rate": 9.398742415575336e-05, | |
| "loss": 1.670114517211914, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.24421052631578946, | |
| "grad_norm": 0.3950902223587036, | |
| "learning_rate": 9.386905705521803e-05, | |
| "loss": 1.6907678604125977, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.24526315789473685, | |
| "grad_norm": 0.38667526841163635, | |
| "learning_rate": 9.375022929305213e-05, | |
| "loss": 1.669590377807617, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.2463157894736842, | |
| "grad_norm": 0.39125263690948486, | |
| "learning_rate": 9.363094222522958e-05, | |
| "loss": 1.6502418518066406, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.24736842105263157, | |
| "grad_norm": 0.38178369402885437, | |
| "learning_rate": 9.351119721296566e-05, | |
| "loss": 1.7035490036010743, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.24842105263157896, | |
| "grad_norm": 0.37467339634895325, | |
| "learning_rate": 9.339099562270128e-05, | |
| "loss": 1.6536640167236327, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.24947368421052632, | |
| "grad_norm": 0.41233041882514954, | |
| "learning_rate": 9.327033882608754e-05, | |
| "loss": 1.6268924713134765, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.2505263157894737, | |
| "grad_norm": 0.3746933937072754, | |
| "learning_rate": 9.314922819996997e-05, | |
| "loss": 1.6240985870361329, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.25157894736842107, | |
| "grad_norm": 0.3932549059391022, | |
| "learning_rate": 9.302766512637293e-05, | |
| "loss": 1.6809700012207032, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.25263157894736843, | |
| "grad_norm": 0.4058087468147278, | |
| "learning_rate": 9.290565099248368e-05, | |
| "loss": 1.6474214553833009, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2536842105263158, | |
| "grad_norm": 0.3873753547668457, | |
| "learning_rate": 9.278318719063673e-05, | |
| "loss": 1.6398870468139648, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.25473684210526315, | |
| "grad_norm": 0.41126886010169983, | |
| "learning_rate": 9.26602751182978e-05, | |
| "loss": 1.6111644744873046, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.2557894736842105, | |
| "grad_norm": 0.40002816915512085, | |
| "learning_rate": 9.2536916178048e-05, | |
| "loss": 1.6024229049682617, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.25684210526315787, | |
| "grad_norm": 0.4194015562534332, | |
| "learning_rate": 9.241311177756771e-05, | |
| "loss": 1.6467687606811523, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.2578947368421053, | |
| "grad_norm": 0.4181770980358124, | |
| "learning_rate": 9.228886332962062e-05, | |
| "loss": 1.6439130783081055, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.25894736842105265, | |
| "grad_norm": 0.40925332903862, | |
| "learning_rate": 9.216417225203754e-05, | |
| "loss": 1.6347824096679688, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.40195897221565247, | |
| "learning_rate": 9.203903996770019e-05, | |
| "loss": 1.6572818756103516, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.26105263157894737, | |
| "grad_norm": 0.4277157485485077, | |
| "learning_rate": 9.191346790452509e-05, | |
| "loss": 1.6013570785522462, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.26210526315789473, | |
| "grad_norm": 0.3951636552810669, | |
| "learning_rate": 9.178745749544716e-05, | |
| "loss": 1.694039535522461, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 0.3961932957172394, | |
| "learning_rate": 9.166101017840337e-05, | |
| "loss": 1.6311038970947265, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.26421052631578945, | |
| "grad_norm": 0.40256279706954956, | |
| "learning_rate": 9.15341273963164e-05, | |
| "loss": 1.7131736755371094, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.26526315789473687, | |
| "grad_norm": 0.40076208114624023, | |
| "learning_rate": 9.14068105970781e-05, | |
| "loss": 1.659266471862793, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.26631578947368423, | |
| "grad_norm": 0.39892420172691345, | |
| "learning_rate": 9.127906123353305e-05, | |
| "loss": 1.6891080856323242, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.2673684210526316, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 9.115088076346184e-05, | |
| "loss": 1.6869060516357421, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.26842105263157895, | |
| "grad_norm": 0.3876430094242096, | |
| "learning_rate": 9.102227064956465e-05, | |
| "loss": 1.623502540588379, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.2694736842105263, | |
| "grad_norm": 0.3828693628311157, | |
| "learning_rate": 9.08932323594443e-05, | |
| "loss": 1.6787071228027344, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.27052631578947367, | |
| "grad_norm": 0.3757915198802948, | |
| "learning_rate": 9.076376736558976e-05, | |
| "loss": 1.7229637145996093, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.27157894736842103, | |
| "grad_norm": 0.3994489312171936, | |
| "learning_rate": 9.063387714535916e-05, | |
| "loss": 1.6279123306274415, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.27263157894736845, | |
| "grad_norm": 0.40050971508026123, | |
| "learning_rate": 9.0503563180963e-05, | |
| "loss": 1.667708969116211, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.2736842105263158, | |
| "grad_norm": 0.4005604684352875, | |
| "learning_rate": 9.037282695944726e-05, | |
| "loss": 1.6468616485595704, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.27473684210526317, | |
| "grad_norm": 0.40057310461997986, | |
| "learning_rate": 9.024166997267636e-05, | |
| "loss": 1.6907684326171875, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.27578947368421053, | |
| "grad_norm": 0.4074793756008148, | |
| "learning_rate": 9.011009371731623e-05, | |
| "loss": 1.6792390823364258, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.2768421052631579, | |
| "grad_norm": 0.4014405310153961, | |
| "learning_rate": 8.997809969481715e-05, | |
| "loss": 1.640324592590332, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.27789473684210525, | |
| "grad_norm": 0.42860186100006104, | |
| "learning_rate": 8.984568941139665e-05, | |
| "loss": 1.6390762329101562, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.2789473684210526, | |
| "grad_norm": 0.41278424859046936, | |
| "learning_rate": 8.971286437802235e-05, | |
| "loss": 1.7043113708496094, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.38656142354011536, | |
| "learning_rate": 8.957962611039464e-05, | |
| "loss": 1.7256532669067384, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.2810526315789474, | |
| "grad_norm": 0.3984103202819824, | |
| "learning_rate": 8.944597612892944e-05, | |
| "loss": 1.6301074981689454, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.28210526315789475, | |
| "grad_norm": 0.3937322795391083, | |
| "learning_rate": 8.93119159587409e-05, | |
| "loss": 1.6612771987915038, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.2831578947368421, | |
| "grad_norm": 0.39241543412208557, | |
| "learning_rate": 8.917744712962387e-05, | |
| "loss": 1.6962703704833983, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.28421052631578947, | |
| "grad_norm": 0.407466858625412, | |
| "learning_rate": 8.904257117603653e-05, | |
| "loss": 1.721807861328125, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.28526315789473683, | |
| "grad_norm": 0.3965199589729309, | |
| "learning_rate": 8.890728963708288e-05, | |
| "loss": 1.6854072570800782, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.2863157894736842, | |
| "grad_norm": 0.3866688013076782, | |
| "learning_rate": 8.877160405649515e-05, | |
| "loss": 1.678403663635254, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.2873684210526316, | |
| "grad_norm": 0.40115654468536377, | |
| "learning_rate": 8.863551598261618e-05, | |
| "loss": 1.688330078125, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.28842105263157897, | |
| "grad_norm": 0.41881707310676575, | |
| "learning_rate": 8.849902696838176e-05, | |
| "loss": 1.685501480102539, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.2894736842105263, | |
| "grad_norm": 0.3956238329410553, | |
| "learning_rate": 8.836213857130296e-05, | |
| "loss": 1.6521308898925782, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.2905263157894737, | |
| "grad_norm": 0.3809671700000763, | |
| "learning_rate": 8.822485235344825e-05, | |
| "loss": 1.6597816467285156, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.29157894736842105, | |
| "grad_norm": 0.39534077048301697, | |
| "learning_rate": 8.808716988142575e-05, | |
| "loss": 1.6627084732055664, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.2926315789473684, | |
| "grad_norm": 0.37715721130371094, | |
| "learning_rate": 8.794909272636537e-05, | |
| "loss": 1.6618637084960937, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.29368421052631577, | |
| "grad_norm": 0.4065514802932739, | |
| "learning_rate": 8.781062246390083e-05, | |
| "loss": 1.6399276733398438, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.29473684210526313, | |
| "grad_norm": 0.3923916220664978, | |
| "learning_rate": 8.767176067415169e-05, | |
| "loss": 1.668557357788086, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.29578947368421055, | |
| "grad_norm": 0.3970358967781067, | |
| "learning_rate": 8.75325089417053e-05, | |
| "loss": 1.6664169311523438, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.2968421052631579, | |
| "grad_norm": 0.4063076078891754, | |
| "learning_rate": 8.739286885559882e-05, | |
| "loss": 1.718800163269043, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.29789473684210527, | |
| "grad_norm": 0.41235899925231934, | |
| "learning_rate": 8.725284200930096e-05, | |
| "loss": 1.6484018325805665, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.29894736842105263, | |
| "grad_norm": 0.41001883149147034, | |
| "learning_rate": 8.711243000069387e-05, | |
| "loss": 1.6729150772094727, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.40411022305488586, | |
| "learning_rate": 8.697163443205486e-05, | |
| "loss": 1.6615083694458008, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.30105263157894735, | |
| "grad_norm": 0.3862515389919281, | |
| "learning_rate": 8.683045691003816e-05, | |
| "loss": 1.6196592330932618, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.3021052631578947, | |
| "grad_norm": 0.385047972202301, | |
| "learning_rate": 8.668889904565657e-05, | |
| "loss": 1.6499458312988282, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.3031578947368421, | |
| "grad_norm": 0.385885626077652, | |
| "learning_rate": 8.654696245426309e-05, | |
| "loss": 1.6544832229614257, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.3042105263157895, | |
| "grad_norm": 0.39182907342910767, | |
| "learning_rate": 8.640464875553244e-05, | |
| "loss": 1.6151403427124023, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.30526315789473685, | |
| "grad_norm": 0.37692710757255554, | |
| "learning_rate": 8.626195957344259e-05, | |
| "loss": 1.7116943359375, | |
| "step": 2900 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 9500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.884603437744128e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |