Image-Text-to-Text
Transformers
Safetensors
lfm2_vl
Generated from Trainer
unsloth
trl
sft
conversational
Instructions to use Ba2han/l-cpt-1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Ba2han/l-cpt-1 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="Ba2han/l-cpt-1") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("Ba2han/l-cpt-1") model = AutoModelForImageTextToText.from_pretrained("Ba2han/l-cpt-1") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Ba2han/l-cpt-1 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Ba2han/l-cpt-1" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/l-cpt-1", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/Ba2han/l-cpt-1
- SGLang
How to use Ba2han/l-cpt-1 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Ba2han/l-cpt-1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/l-cpt-1", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Ba2han/l-cpt-1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/l-cpt-1", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Unsloth Studio
How to use Ba2han/l-cpt-1 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/l-cpt-1 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/l-cpt-1 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Ba2han/l-cpt-1 to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Ba2han/l-cpt-1", max_seq_length=2048, ) - Docker Model Runner
How to use Ba2han/l-cpt-1 with Docker Model Runner:
docker model run hf.co/Ba2han/l-cpt-1
Training in progress, step 11000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3193334216
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf476373257c3648ba4ab1aca756d1238ee8cdb9e819383637f2adb5e8fc9874
|
| 3 |
size 3193334216
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3464388157
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fb747f5bc3af38f67c6efd46e9f31d77e68f221662910efc6fcd741dcbe0bd6
|
| 3 |
size 3464388157
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1b7a8930db3356c00cbcb04b613e6045c1d0fef4335e71a16302360492013c2
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -73508,6 +73508,3506 @@
|
|
| 73508 |
"learning_rate": 0.0005964929933549236,
|
| 73509 |
"loss": 1.3805874586105347,
|
| 73510 |
"step": 10500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73511 |
}
|
| 73512 |
],
|
| 73513 |
"logging_steps": 1,
|
|
@@ -73527,7 +77027,7 @@
|
|
| 73527 |
"attributes": {}
|
| 73528 |
}
|
| 73529 |
},
|
| 73530 |
-
"total_flos":
|
| 73531 |
"train_batch_size": 1,
|
| 73532 |
"trial_name": null,
|
| 73533 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.9586435194200207,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 11000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 73508 |
"learning_rate": 0.0005964929933549236,
|
| 73509 |
"loss": 1.3805874586105347,
|
| 73510 |
"step": 10500
|
| 73511 |
+
},
|
| 73512 |
+
{
|
| 73513 |
+
"epoch": 0.9151559634026942,
|
| 73514 |
+
"grad_norm": 0.62890625,
|
| 73515 |
+
"learning_rate": 0.0005958120595250271,
|
| 73516 |
+
"loss": 1.3996551036834717,
|
| 73517 |
+
"step": 10501
|
| 73518 |
+
},
|
| 73519 |
+
{
|
| 73520 |
+
"epoch": 0.9152431128135506,
|
| 73521 |
+
"grad_norm": 0.55859375,
|
| 73522 |
+
"learning_rate": 0.0005951313502807226,
|
| 73523 |
+
"loss": 1.3089829683303833,
|
| 73524 |
+
"step": 10502
|
| 73525 |
+
},
|
| 73526 |
+
{
|
| 73527 |
+
"epoch": 0.915330262224407,
|
| 73528 |
+
"grad_norm": 0.62109375,
|
| 73529 |
+
"learning_rate": 0.0005944508659997757,
|
| 73530 |
+
"loss": 1.3411579132080078,
|
| 73531 |
+
"step": 10503
|
| 73532 |
+
},
|
| 73533 |
+
{
|
| 73534 |
+
"epoch": 0.9154174116352634,
|
| 73535 |
+
"grad_norm": 0.515625,
|
| 73536 |
+
"learning_rate": 0.0005937706070598278,
|
| 73537 |
+
"loss": 1.4157911539077759,
|
| 73538 |
+
"step": 10504
|
| 73539 |
+
},
|
| 73540 |
+
{
|
| 73541 |
+
"epoch": 0.9155045610461198,
|
| 73542 |
+
"grad_norm": 0.6328125,
|
| 73543 |
+
"learning_rate": 0.0005930905738383965,
|
| 73544 |
+
"loss": 1.318338394165039,
|
| 73545 |
+
"step": 10505
|
| 73546 |
+
},
|
| 73547 |
+
{
|
| 73548 |
+
"epoch": 0.9155917104569761,
|
| 73549 |
+
"grad_norm": 0.59375,
|
| 73550 |
+
"learning_rate": 0.0005924107667128726,
|
| 73551 |
+
"loss": 1.3024048805236816,
|
| 73552 |
+
"step": 10506
|
| 73553 |
+
},
|
| 73554 |
+
{
|
| 73555 |
+
"epoch": 0.9156788598678325,
|
| 73556 |
+
"grad_norm": 0.60546875,
|
| 73557 |
+
"learning_rate": 0.0005917311860605213,
|
| 73558 |
+
"loss": 1.3165955543518066,
|
| 73559 |
+
"step": 10507
|
| 73560 |
+
},
|
| 73561 |
+
{
|
| 73562 |
+
"epoch": 0.9157660092786888,
|
| 73563 |
+
"grad_norm": 0.64453125,
|
| 73564 |
+
"learning_rate": 0.0005910518322584838,
|
| 73565 |
+
"loss": 1.3904566764831543,
|
| 73566 |
+
"step": 10508
|
| 73567 |
+
},
|
| 73568 |
+
{
|
| 73569 |
+
"epoch": 0.9158531586895452,
|
| 73570 |
+
"grad_norm": 0.5859375,
|
| 73571 |
+
"learning_rate": 0.0005903727056837727,
|
| 73572 |
+
"loss": 1.4618806838989258,
|
| 73573 |
+
"step": 10509
|
| 73574 |
+
},
|
| 73575 |
+
{
|
| 73576 |
+
"epoch": 0.9159403081004016,
|
| 73577 |
+
"grad_norm": 0.640625,
|
| 73578 |
+
"learning_rate": 0.0005896938067132759,
|
| 73579 |
+
"loss": 1.2961794137954712,
|
| 73580 |
+
"step": 10510
|
| 73581 |
+
},
|
| 73582 |
+
{
|
| 73583 |
+
"epoch": 0.9160274575112579,
|
| 73584 |
+
"grad_norm": 0.609375,
|
| 73585 |
+
"learning_rate": 0.0005890151357237569,
|
| 73586 |
+
"loss": 1.3977863788604736,
|
| 73587 |
+
"step": 10511
|
| 73588 |
+
},
|
| 73589 |
+
{
|
| 73590 |
+
"epoch": 0.9161146069221143,
|
| 73591 |
+
"grad_norm": 0.6328125,
|
| 73592 |
+
"learning_rate": 0.00058833669309185,
|
| 73593 |
+
"loss": 1.3360893726348877,
|
| 73594 |
+
"step": 10512
|
| 73595 |
+
},
|
| 73596 |
+
{
|
| 73597 |
+
"epoch": 0.9162017563329706,
|
| 73598 |
+
"grad_norm": 0.6484375,
|
| 73599 |
+
"learning_rate": 0.0005876584791940637,
|
| 73600 |
+
"loss": 1.3887076377868652,
|
| 73601 |
+
"step": 10513
|
| 73602 |
+
},
|
| 73603 |
+
{
|
| 73604 |
+
"epoch": 0.916288905743827,
|
| 73605 |
+
"grad_norm": 0.625,
|
| 73606 |
+
"learning_rate": 0.0005869804944067799,
|
| 73607 |
+
"loss": 1.6108146905899048,
|
| 73608 |
+
"step": 10514
|
| 73609 |
+
},
|
| 73610 |
+
{
|
| 73611 |
+
"epoch": 0.9163760551546833,
|
| 73612 |
+
"grad_norm": 0.56640625,
|
| 73613 |
+
"learning_rate": 0.000586302739106251,
|
| 73614 |
+
"loss": 1.4226261377334595,
|
| 73615 |
+
"step": 10515
|
| 73616 |
+
},
|
| 73617 |
+
{
|
| 73618 |
+
"epoch": 0.9164632045655398,
|
| 73619 |
+
"grad_norm": 0.609375,
|
| 73620 |
+
"learning_rate": 0.0005856252136686069,
|
| 73621 |
+
"loss": 1.4893242120742798,
|
| 73622 |
+
"step": 10516
|
| 73623 |
+
},
|
| 73624 |
+
{
|
| 73625 |
+
"epoch": 0.9165503539763962,
|
| 73626 |
+
"grad_norm": 0.578125,
|
| 73627 |
+
"learning_rate": 0.0005849479184698455,
|
| 73628 |
+
"loss": 1.3504881858825684,
|
| 73629 |
+
"step": 10517
|
| 73630 |
+
},
|
| 73631 |
+
{
|
| 73632 |
+
"epoch": 0.9166375033872525,
|
| 73633 |
+
"grad_norm": 0.7265625,
|
| 73634 |
+
"learning_rate": 0.0005842708538858389,
|
| 73635 |
+
"loss": 1.3939337730407715,
|
| 73636 |
+
"step": 10518
|
| 73637 |
+
},
|
| 73638 |
+
{
|
| 73639 |
+
"epoch": 0.9167246527981089,
|
| 73640 |
+
"grad_norm": 0.59375,
|
| 73641 |
+
"learning_rate": 0.0005835940202923306,
|
| 73642 |
+
"loss": 1.3288497924804688,
|
| 73643 |
+
"step": 10519
|
| 73644 |
+
},
|
| 73645 |
+
{
|
| 73646 |
+
"epoch": 0.9168118022089652,
|
| 73647 |
+
"grad_norm": 0.63671875,
|
| 73648 |
+
"learning_rate": 0.0005829174180649356,
|
| 73649 |
+
"loss": 1.4457199573516846,
|
| 73650 |
+
"step": 10520
|
| 73651 |
+
},
|
| 73652 |
+
{
|
| 73653 |
+
"epoch": 0.9168989516198216,
|
| 73654 |
+
"grad_norm": 0.5703125,
|
| 73655 |
+
"learning_rate": 0.0005822410475791423,
|
| 73656 |
+
"loss": 1.4103813171386719,
|
| 73657 |
+
"step": 10521
|
| 73658 |
+
},
|
| 73659 |
+
{
|
| 73660 |
+
"epoch": 0.9169861010306779,
|
| 73661 |
+
"grad_norm": 0.5859375,
|
| 73662 |
+
"learning_rate": 0.0005815649092103085,
|
| 73663 |
+
"loss": 1.4175074100494385,
|
| 73664 |
+
"step": 10522
|
| 73665 |
+
},
|
| 73666 |
+
{
|
| 73667 |
+
"epoch": 0.9170732504415343,
|
| 73668 |
+
"grad_norm": 0.5546875,
|
| 73669 |
+
"learning_rate": 0.0005808890033336642,
|
| 73670 |
+
"loss": 1.3463209867477417,
|
| 73671 |
+
"step": 10523
|
| 73672 |
+
},
|
| 73673 |
+
{
|
| 73674 |
+
"epoch": 0.9171603998523907,
|
| 73675 |
+
"grad_norm": 0.61328125,
|
| 73676 |
+
"learning_rate": 0.0005802133303243096,
|
| 73677 |
+
"loss": 1.4614325761795044,
|
| 73678 |
+
"step": 10524
|
| 73679 |
+
},
|
| 73680 |
+
{
|
| 73681 |
+
"epoch": 0.917247549263247,
|
| 73682 |
+
"grad_norm": 0.62109375,
|
| 73683 |
+
"learning_rate": 0.0005795378905572166,
|
| 73684 |
+
"loss": 1.3758034706115723,
|
| 73685 |
+
"step": 10525
|
| 73686 |
+
},
|
| 73687 |
+
{
|
| 73688 |
+
"epoch": 0.9173346986741034,
|
| 73689 |
+
"grad_norm": 0.5625,
|
| 73690 |
+
"learning_rate": 0.0005788626844072265,
|
| 73691 |
+
"loss": 1.3858534097671509,
|
| 73692 |
+
"step": 10526
|
| 73693 |
+
},
|
| 73694 |
+
{
|
| 73695 |
+
"epoch": 0.9174218480849597,
|
| 73696 |
+
"grad_norm": 0.60546875,
|
| 73697 |
+
"learning_rate": 0.0005781877122490526,
|
| 73698 |
+
"loss": 1.4781090021133423,
|
| 73699 |
+
"step": 10527
|
| 73700 |
+
},
|
| 73701 |
+
{
|
| 73702 |
+
"epoch": 0.9175089974958162,
|
| 73703 |
+
"grad_norm": 0.609375,
|
| 73704 |
+
"learning_rate": 0.0005775129744572769,
|
| 73705 |
+
"loss": 1.4332325458526611,
|
| 73706 |
+
"step": 10528
|
| 73707 |
+
},
|
| 73708 |
+
{
|
| 73709 |
+
"epoch": 0.9175961469066725,
|
| 73710 |
+
"grad_norm": 0.55859375,
|
| 73711 |
+
"learning_rate": 0.000576838471406352,
|
| 73712 |
+
"loss": 1.375857949256897,
|
| 73713 |
+
"step": 10529
|
| 73714 |
+
},
|
| 73715 |
+
{
|
| 73716 |
+
"epoch": 0.9176832963175289,
|
| 73717 |
+
"grad_norm": 0.58984375,
|
| 73718 |
+
"learning_rate": 0.0005761642034705997,
|
| 73719 |
+
"loss": 1.4588314294815063,
|
| 73720 |
+
"step": 10530
|
| 73721 |
+
},
|
| 73722 |
+
{
|
| 73723 |
+
"epoch": 0.9177704457283853,
|
| 73724 |
+
"grad_norm": 0.58203125,
|
| 73725 |
+
"learning_rate": 0.0005754901710242113,
|
| 73726 |
+
"loss": 1.3294475078582764,
|
| 73727 |
+
"step": 10531
|
| 73728 |
+
},
|
| 73729 |
+
{
|
| 73730 |
+
"epoch": 0.9178575951392416,
|
| 73731 |
+
"grad_norm": 0.6484375,
|
| 73732 |
+
"learning_rate": 0.0005748163744412494,
|
| 73733 |
+
"loss": 1.5025684833526611,
|
| 73734 |
+
"step": 10532
|
| 73735 |
+
},
|
| 73736 |
+
{
|
| 73737 |
+
"epoch": 0.917944744550098,
|
| 73738 |
+
"grad_norm": 0.640625,
|
| 73739 |
+
"learning_rate": 0.000574142814095643,
|
| 73740 |
+
"loss": 1.3892470598220825,
|
| 73741 |
+
"step": 10533
|
| 73742 |
+
},
|
| 73743 |
+
{
|
| 73744 |
+
"epoch": 0.9180318939609543,
|
| 73745 |
+
"grad_norm": 0.58203125,
|
| 73746 |
+
"learning_rate": 0.0005734694903611913,
|
| 73747 |
+
"loss": 1.4492141008377075,
|
| 73748 |
+
"step": 10534
|
| 73749 |
+
},
|
| 73750 |
+
{
|
| 73751 |
+
"epoch": 0.9181190433718107,
|
| 73752 |
+
"grad_norm": 0.59765625,
|
| 73753 |
+
"learning_rate": 0.000572796403611562,
|
| 73754 |
+
"loss": 1.2923322916030884,
|
| 73755 |
+
"step": 10535
|
| 73756 |
+
},
|
| 73757 |
+
{
|
| 73758 |
+
"epoch": 0.918206192782667,
|
| 73759 |
+
"grad_norm": 0.65234375,
|
| 73760 |
+
"learning_rate": 0.0005721235542202904,
|
| 73761 |
+
"loss": 1.5104238986968994,
|
| 73762 |
+
"step": 10536
|
| 73763 |
+
},
|
| 73764 |
+
{
|
| 73765 |
+
"epoch": 0.9182933421935234,
|
| 73766 |
+
"grad_norm": 0.671875,
|
| 73767 |
+
"learning_rate": 0.0005714509425607825,
|
| 73768 |
+
"loss": 1.4007127285003662,
|
| 73769 |
+
"step": 10537
|
| 73770 |
+
},
|
| 73771 |
+
{
|
| 73772 |
+
"epoch": 0.9183804916043798,
|
| 73773 |
+
"grad_norm": 0.57421875,
|
| 73774 |
+
"learning_rate": 0.0005707785690063103,
|
| 73775 |
+
"loss": 1.3198069334030151,
|
| 73776 |
+
"step": 10538
|
| 73777 |
+
},
|
| 73778 |
+
{
|
| 73779 |
+
"epoch": 0.9184676410152361,
|
| 73780 |
+
"grad_norm": 0.62109375,
|
| 73781 |
+
"learning_rate": 0.0005701064339300137,
|
| 73782 |
+
"loss": 1.190271258354187,
|
| 73783 |
+
"step": 10539
|
| 73784 |
+
},
|
| 73785 |
+
{
|
| 73786 |
+
"epoch": 0.9185547904260926,
|
| 73787 |
+
"grad_norm": 0.59765625,
|
| 73788 |
+
"learning_rate": 0.0005694345377049013,
|
| 73789 |
+
"loss": 1.3525913953781128,
|
| 73790 |
+
"step": 10540
|
| 73791 |
+
},
|
| 73792 |
+
{
|
| 73793 |
+
"epoch": 0.9186419398369489,
|
| 73794 |
+
"grad_norm": 0.5546875,
|
| 73795 |
+
"learning_rate": 0.0005687628807038482,
|
| 73796 |
+
"loss": 1.1677380800247192,
|
| 73797 |
+
"step": 10541
|
| 73798 |
+
},
|
| 73799 |
+
{
|
| 73800 |
+
"epoch": 0.9187290892478053,
|
| 73801 |
+
"grad_norm": 0.60546875,
|
| 73802 |
+
"learning_rate": 0.0005680914632995964,
|
| 73803 |
+
"loss": 1.4719948768615723,
|
| 73804 |
+
"step": 10542
|
| 73805 |
+
},
|
| 73806 |
+
{
|
| 73807 |
+
"epoch": 0.9188162386586616,
|
| 73808 |
+
"grad_norm": 0.5625,
|
| 73809 |
+
"learning_rate": 0.0005674202858647577,
|
| 73810 |
+
"loss": 1.3417930603027344,
|
| 73811 |
+
"step": 10543
|
| 73812 |
+
},
|
| 73813 |
+
{
|
| 73814 |
+
"epoch": 0.918903388069518,
|
| 73815 |
+
"grad_norm": 0.61328125,
|
| 73816 |
+
"learning_rate": 0.0005667493487718071,
|
| 73817 |
+
"loss": 1.3469139337539673,
|
| 73818 |
+
"step": 10544
|
| 73819 |
+
},
|
| 73820 |
+
{
|
| 73821 |
+
"epoch": 0.9189905374803744,
|
| 73822 |
+
"grad_norm": 0.6171875,
|
| 73823 |
+
"learning_rate": 0.0005660786523930881,
|
| 73824 |
+
"loss": 1.4537931680679321,
|
| 73825 |
+
"step": 10545
|
| 73826 |
+
},
|
| 73827 |
+
{
|
| 73828 |
+
"epoch": 0.9190776868912307,
|
| 73829 |
+
"grad_norm": 0.5859375,
|
| 73830 |
+
"learning_rate": 0.0005654081971008105,
|
| 73831 |
+
"loss": 1.2847542762756348,
|
| 73832 |
+
"step": 10546
|
| 73833 |
+
},
|
| 73834 |
+
{
|
| 73835 |
+
"epoch": 0.9191648363020871,
|
| 73836 |
+
"grad_norm": 0.62109375,
|
| 73837 |
+
"learning_rate": 0.0005647379832670486,
|
| 73838 |
+
"loss": 1.3840348720550537,
|
| 73839 |
+
"step": 10547
|
| 73840 |
+
},
|
| 73841 |
+
{
|
| 73842 |
+
"epoch": 0.9192519857129434,
|
| 73843 |
+
"grad_norm": 0.6015625,
|
| 73844 |
+
"learning_rate": 0.000564068011263747,
|
| 73845 |
+
"loss": 1.2110737562179565,
|
| 73846 |
+
"step": 10548
|
| 73847 |
+
},
|
| 73848 |
+
{
|
| 73849 |
+
"epoch": 0.9193391351237998,
|
| 73850 |
+
"grad_norm": 0.5859375,
|
| 73851 |
+
"learning_rate": 0.0005633982814627125,
|
| 73852 |
+
"loss": 1.4413111209869385,
|
| 73853 |
+
"step": 10549
|
| 73854 |
+
},
|
| 73855 |
+
{
|
| 73856 |
+
"epoch": 0.9194262845346561,
|
| 73857 |
+
"grad_norm": 0.57421875,
|
| 73858 |
+
"learning_rate": 0.0005627287942356171,
|
| 73859 |
+
"loss": 1.4561347961425781,
|
| 73860 |
+
"step": 10550
|
| 73861 |
+
},
|
| 73862 |
+
{
|
| 73863 |
+
"epoch": 0.9195134339455125,
|
| 73864 |
+
"grad_norm": 0.60546875,
|
| 73865 |
+
"learning_rate": 0.000562059549954,
|
| 73866 |
+
"loss": 1.3817965984344482,
|
| 73867 |
+
"step": 10551
|
| 73868 |
+
},
|
| 73869 |
+
{
|
| 73870 |
+
"epoch": 0.919600583356369,
|
| 73871 |
+
"grad_norm": 0.63671875,
|
| 73872 |
+
"learning_rate": 0.0005613905489892645,
|
| 73873 |
+
"loss": 1.3496308326721191,
|
| 73874 |
+
"step": 10552
|
| 73875 |
+
},
|
| 73876 |
+
{
|
| 73877 |
+
"epoch": 0.9196877327672253,
|
| 73878 |
+
"grad_norm": 0.60546875,
|
| 73879 |
+
"learning_rate": 0.0005607217917126804,
|
| 73880 |
+
"loss": 1.4852162599563599,
|
| 73881 |
+
"step": 10553
|
| 73882 |
+
},
|
| 73883 |
+
{
|
| 73884 |
+
"epoch": 0.9197748821780817,
|
| 73885 |
+
"grad_norm": 0.59765625,
|
| 73886 |
+
"learning_rate": 0.0005600532784953802,
|
| 73887 |
+
"loss": 1.295851230621338,
|
| 73888 |
+
"step": 10554
|
| 73889 |
+
},
|
| 73890 |
+
{
|
| 73891 |
+
"epoch": 0.919862031588938,
|
| 73892 |
+
"grad_norm": 0.625,
|
| 73893 |
+
"learning_rate": 0.0005593850097083633,
|
| 73894 |
+
"loss": 1.326664924621582,
|
| 73895 |
+
"step": 10555
|
| 73896 |
+
},
|
| 73897 |
+
{
|
| 73898 |
+
"epoch": 0.9199491809997944,
|
| 73899 |
+
"grad_norm": 0.61328125,
|
| 73900 |
+
"learning_rate": 0.0005587169857224907,
|
| 73901 |
+
"loss": 1.4255481958389282,
|
| 73902 |
+
"step": 10556
|
| 73903 |
+
},
|
| 73904 |
+
{
|
| 73905 |
+
"epoch": 0.9200363304106507,
|
| 73906 |
+
"grad_norm": 0.66796875,
|
| 73907 |
+
"learning_rate": 0.0005580492069084894,
|
| 73908 |
+
"loss": 1.3229376077651978,
|
| 73909 |
+
"step": 10557
|
| 73910 |
+
},
|
| 73911 |
+
{
|
| 73912 |
+
"epoch": 0.9201234798215071,
|
| 73913 |
+
"grad_norm": 0.6171875,
|
| 73914 |
+
"learning_rate": 0.0005573816736369492,
|
| 73915 |
+
"loss": 1.453386902809143,
|
| 73916 |
+
"step": 10558
|
| 73917 |
+
},
|
| 73918 |
+
{
|
| 73919 |
+
"epoch": 0.9202106292323635,
|
| 73920 |
+
"grad_norm": 0.609375,
|
| 73921 |
+
"learning_rate": 0.0005567143862783259,
|
| 73922 |
+
"loss": 1.2901363372802734,
|
| 73923 |
+
"step": 10559
|
| 73924 |
+
},
|
| 73925 |
+
{
|
| 73926 |
+
"epoch": 0.9202977786432198,
|
| 73927 |
+
"grad_norm": 0.58203125,
|
| 73928 |
+
"learning_rate": 0.0005560473452029361,
|
| 73929 |
+
"loss": 1.40326726436615,
|
| 73930 |
+
"step": 10560
|
| 73931 |
+
},
|
| 73932 |
+
{
|
| 73933 |
+
"epoch": 0.9203849280540762,
|
| 73934 |
+
"grad_norm": 0.578125,
|
| 73935 |
+
"learning_rate": 0.0005553805507809616,
|
| 73936 |
+
"loss": 1.39547860622406,
|
| 73937 |
+
"step": 10561
|
| 73938 |
+
},
|
| 73939 |
+
{
|
| 73940 |
+
"epoch": 0.9204720774649325,
|
| 73941 |
+
"grad_norm": 0.59375,
|
| 73942 |
+
"learning_rate": 0.0005547140033824458,
|
| 73943 |
+
"loss": 1.3772691488265991,
|
| 73944 |
+
"step": 10562
|
| 73945 |
+
},
|
| 73946 |
+
{
|
| 73947 |
+
"epoch": 0.920559226875789,
|
| 73948 |
+
"grad_norm": 0.5546875,
|
| 73949 |
+
"learning_rate": 0.000554047703377295,
|
| 73950 |
+
"loss": 1.2665115594863892,
|
| 73951 |
+
"step": 10563
|
| 73952 |
+
},
|
| 73953 |
+
{
|
| 73954 |
+
"epoch": 0.9206463762866453,
|
| 73955 |
+
"grad_norm": 0.5859375,
|
| 73956 |
+
"learning_rate": 0.0005533816511352811,
|
| 73957 |
+
"loss": 1.4510654211044312,
|
| 73958 |
+
"step": 10564
|
| 73959 |
+
},
|
| 73960 |
+
{
|
| 73961 |
+
"epoch": 0.9207335256975017,
|
| 73962 |
+
"grad_norm": 0.625,
|
| 73963 |
+
"learning_rate": 0.0005527158470260357,
|
| 73964 |
+
"loss": 1.5079319477081299,
|
| 73965 |
+
"step": 10565
|
| 73966 |
+
},
|
| 73967 |
+
{
|
| 73968 |
+
"epoch": 0.9208206751083581,
|
| 73969 |
+
"grad_norm": 0.62109375,
|
| 73970 |
+
"learning_rate": 0.000552050291419053,
|
| 73971 |
+
"loss": 1.4127393960952759,
|
| 73972 |
+
"step": 10566
|
| 73973 |
+
},
|
| 73974 |
+
{
|
| 73975 |
+
"epoch": 0.9209078245192144,
|
| 73976 |
+
"grad_norm": 0.63671875,
|
| 73977 |
+
"learning_rate": 0.0005513849846836902,
|
| 73978 |
+
"loss": 1.3956263065338135,
|
| 73979 |
+
"step": 10567
|
| 73980 |
+
},
|
| 73981 |
+
{
|
| 73982 |
+
"epoch": 0.9209949739300708,
|
| 73983 |
+
"grad_norm": 0.6796875,
|
| 73984 |
+
"learning_rate": 0.0005507199271891644,
|
| 73985 |
+
"loss": 1.4360769987106323,
|
| 73986 |
+
"step": 10568
|
| 73987 |
+
},
|
| 73988 |
+
{
|
| 73989 |
+
"epoch": 0.9210821233409271,
|
| 73990 |
+
"grad_norm": 0.640625,
|
| 73991 |
+
"learning_rate": 0.0005500551193045577,
|
| 73992 |
+
"loss": 1.4644925594329834,
|
| 73993 |
+
"step": 10569
|
| 73994 |
+
},
|
| 73995 |
+
{
|
| 73996 |
+
"epoch": 0.9211692727517835,
|
| 73997 |
+
"grad_norm": 0.59765625,
|
| 73998 |
+
"learning_rate": 0.0005493905613988112,
|
| 73999 |
+
"loss": 1.3501391410827637,
|
| 74000 |
+
"step": 10570
|
| 74001 |
+
},
|
| 74002 |
+
{
|
| 74003 |
+
"epoch": 0.9212564221626398,
|
| 74004 |
+
"grad_norm": 0.59375,
|
| 74005 |
+
"learning_rate": 0.0005487262538407272,
|
| 74006 |
+
"loss": 1.3831145763397217,
|
| 74007 |
+
"step": 10571
|
| 74008 |
+
},
|
| 74009 |
+
{
|
| 74010 |
+
"epoch": 0.9213435715734962,
|
| 74011 |
+
"grad_norm": 0.66015625,
|
| 74012 |
+
"learning_rate": 0.000548062196998971,
|
| 74013 |
+
"loss": 1.3456957340240479,
|
| 74014 |
+
"step": 10572
|
| 74015 |
+
},
|
| 74016 |
+
{
|
| 74017 |
+
"epoch": 0.9214307209843526,
|
| 74018 |
+
"grad_norm": 0.6015625,
|
| 74019 |
+
"learning_rate": 0.0005473983912420673,
|
| 74020 |
+
"loss": 1.2185256481170654,
|
| 74021 |
+
"step": 10573
|
| 74022 |
+
},
|
| 74023 |
+
{
|
| 74024 |
+
"epoch": 0.9215178703952089,
|
| 74025 |
+
"grad_norm": 0.62890625,
|
| 74026 |
+
"learning_rate": 0.0005467348369384002,
|
| 74027 |
+
"loss": 1.3591208457946777,
|
| 74028 |
+
"step": 10574
|
| 74029 |
+
},
|
| 74030 |
+
{
|
| 74031 |
+
"epoch": 0.9216050198060654,
|
| 74032 |
+
"grad_norm": 0.62890625,
|
| 74033 |
+
"learning_rate": 0.0005460715344562175,
|
| 74034 |
+
"loss": 1.4143370389938354,
|
| 74035 |
+
"step": 10575
|
| 74036 |
+
},
|
| 74037 |
+
{
|
| 74038 |
+
"epoch": 0.9216921692169217,
|
| 74039 |
+
"grad_norm": 0.671875,
|
| 74040 |
+
"learning_rate": 0.0005454084841636253,
|
| 74041 |
+
"loss": 1.4549078941345215,
|
| 74042 |
+
"step": 10576
|
| 74043 |
+
},
|
| 74044 |
+
{
|
| 74045 |
+
"epoch": 0.9217793186277781,
|
| 74046 |
+
"grad_norm": 0.59765625,
|
| 74047 |
+
"learning_rate": 0.0005447456864285891,
|
| 74048 |
+
"loss": 1.3903030157089233,
|
| 74049 |
+
"step": 10577
|
| 74050 |
+
},
|
| 74051 |
+
{
|
| 74052 |
+
"epoch": 0.9218664680386344,
|
| 74053 |
+
"grad_norm": 0.59375,
|
| 74054 |
+
"learning_rate": 0.0005440831416189357,
|
| 74055 |
+
"loss": 1.5252403020858765,
|
| 74056 |
+
"step": 10578
|
| 74057 |
+
},
|
| 74058 |
+
{
|
| 74059 |
+
"epoch": 0.9219536174494908,
|
| 74060 |
+
"grad_norm": 0.6640625,
|
| 74061 |
+
"learning_rate": 0.0005434208501023498,
|
| 74062 |
+
"loss": 1.3850946426391602,
|
| 74063 |
+
"step": 10579
|
| 74064 |
+
},
|
| 74065 |
+
{
|
| 74066 |
+
"epoch": 0.9220407668603472,
|
| 74067 |
+
"grad_norm": 0.6328125,
|
| 74068 |
+
"learning_rate": 0.0005427588122463785,
|
| 74069 |
+
"loss": 1.3343126773834229,
|
| 74070 |
+
"step": 10580
|
| 74071 |
+
},
|
| 74072 |
+
{
|
| 74073 |
+
"epoch": 0.9221279162712035,
|
| 74074 |
+
"grad_norm": 0.6171875,
|
| 74075 |
+
"learning_rate": 0.0005420970284184253,
|
| 74076 |
+
"loss": 1.3930814266204834,
|
| 74077 |
+
"step": 10581
|
| 74078 |
+
},
|
| 74079 |
+
{
|
| 74080 |
+
"epoch": 0.9222150656820599,
|
| 74081 |
+
"grad_norm": 0.640625,
|
| 74082 |
+
"learning_rate": 0.000541435498985754,
|
| 74083 |
+
"loss": 1.4217712879180908,
|
| 74084 |
+
"step": 10582
|
| 74085 |
+
},
|
| 74086 |
+
{
|
| 74087 |
+
"epoch": 0.9223022150929162,
|
| 74088 |
+
"grad_norm": 0.61328125,
|
| 74089 |
+
"learning_rate": 0.0005407742243154865,
|
| 74090 |
+
"loss": 1.3846927881240845,
|
| 74091 |
+
"step": 10583
|
| 74092 |
+
},
|
| 74093 |
+
{
|
| 74094 |
+
"epoch": 0.9223893645037726,
|
| 74095 |
+
"grad_norm": 0.6171875,
|
| 74096 |
+
"learning_rate": 0.0005401132047746036,
|
| 74097 |
+
"loss": 1.378669023513794,
|
| 74098 |
+
"step": 10584
|
| 74099 |
+
},
|
| 74100 |
+
{
|
| 74101 |
+
"epoch": 0.922476513914629,
|
| 74102 |
+
"grad_norm": 0.70703125,
|
| 74103 |
+
"learning_rate": 0.0005394524407299454,
|
| 74104 |
+
"loss": 1.3566769361495972,
|
| 74105 |
+
"step": 10585
|
| 74106 |
+
},
|
| 74107 |
+
{
|
| 74108 |
+
"epoch": 0.9225636633254853,
|
| 74109 |
+
"grad_norm": 0.62890625,
|
| 74110 |
+
"learning_rate": 0.0005387919325482091,
|
| 74111 |
+
"loss": 1.3824747800827026,
|
| 74112 |
+
"step": 10586
|
| 74113 |
+
},
|
| 74114 |
+
{
|
| 74115 |
+
"epoch": 0.9226508127363418,
|
| 74116 |
+
"grad_norm": 0.58984375,
|
| 74117 |
+
"learning_rate": 0.0005381316805959499,
|
| 74118 |
+
"loss": 1.5102699995040894,
|
| 74119 |
+
"step": 10587
|
| 74120 |
+
},
|
| 74121 |
+
{
|
| 74122 |
+
"epoch": 0.922737962147198,
|
| 74123 |
+
"grad_norm": 0.640625,
|
| 74124 |
+
"learning_rate": 0.0005374716852395819,
|
| 74125 |
+
"loss": 1.500016212463379,
|
| 74126 |
+
"step": 10588
|
| 74127 |
+
},
|
| 74128 |
+
{
|
| 74129 |
+
"epoch": 0.9228251115580545,
|
| 74130 |
+
"grad_norm": 0.5546875,
|
| 74131 |
+
"learning_rate": 0.0005368119468453758,
|
| 74132 |
+
"loss": 1.219261646270752,
|
| 74133 |
+
"step": 10589
|
| 74134 |
+
},
|
| 74135 |
+
{
|
| 74136 |
+
"epoch": 0.9229122609689108,
|
| 74137 |
+
"grad_norm": 0.61328125,
|
| 74138 |
+
"learning_rate": 0.0005361524657794594,
|
| 74139 |
+
"loss": 1.3092141151428223,
|
| 74140 |
+
"step": 10590
|
| 74141 |
+
},
|
| 74142 |
+
{
|
| 74143 |
+
"epoch": 0.9229994103797672,
|
| 74144 |
+
"grad_norm": 0.58984375,
|
| 74145 |
+
"learning_rate": 0.0005354932424078192,
|
| 74146 |
+
"loss": 1.3061190843582153,
|
| 74147 |
+
"step": 10591
|
| 74148 |
+
},
|
| 74149 |
+
{
|
| 74150 |
+
"epoch": 0.9230865597906236,
|
| 74151 |
+
"grad_norm": 0.5546875,
|
| 74152 |
+
"learning_rate": 0.0005348342770962973,
|
| 74153 |
+
"loss": 1.3815596103668213,
|
| 74154 |
+
"step": 10592
|
| 74155 |
+
},
|
| 74156 |
+
{
|
| 74157 |
+
"epoch": 0.9231737092014799,
|
| 74158 |
+
"grad_norm": 0.69921875,
|
| 74159 |
+
"learning_rate": 0.0005341755702105931,
|
| 74160 |
+
"loss": 1.4937219619750977,
|
| 74161 |
+
"step": 10593
|
| 74162 |
+
},
|
| 74163 |
+
{
|
| 74164 |
+
"epoch": 0.9232608586123363,
|
| 74165 |
+
"grad_norm": 0.5859375,
|
| 74166 |
+
"learning_rate": 0.0005335171221162624,
|
| 74167 |
+
"loss": 1.3349745273590088,
|
| 74168 |
+
"step": 10594
|
| 74169 |
+
},
|
| 74170 |
+
{
|
| 74171 |
+
"epoch": 0.9233480080231926,
|
| 74172 |
+
"grad_norm": 0.70703125,
|
| 74173 |
+
"learning_rate": 0.0005328589331787166,
|
| 74174 |
+
"loss": 1.5849881172180176,
|
| 74175 |
+
"step": 10595
|
| 74176 |
+
},
|
| 74177 |
+
{
|
| 74178 |
+
"epoch": 0.923435157434049,
|
| 74179 |
+
"grad_norm": 0.59765625,
|
| 74180 |
+
"learning_rate": 0.0005322010037632264,
|
| 74181 |
+
"loss": 1.3104616403579712,
|
| 74182 |
+
"step": 10596
|
| 74183 |
+
},
|
| 74184 |
+
{
|
| 74185 |
+
"epoch": 0.9235223068449053,
|
| 74186 |
+
"grad_norm": 0.578125,
|
| 74187 |
+
"learning_rate": 0.0005315433342349149,
|
| 74188 |
+
"loss": 1.2885944843292236,
|
| 74189 |
+
"step": 10597
|
| 74190 |
+
},
|
| 74191 |
+
{
|
| 74192 |
+
"epoch": 0.9236094562557617,
|
| 74193 |
+
"grad_norm": 0.57421875,
|
| 74194 |
+
"learning_rate": 0.0005308859249587625,
|
| 74195 |
+
"loss": 1.2921130657196045,
|
| 74196 |
+
"step": 10598
|
| 74197 |
+
},
|
| 74198 |
+
{
|
| 74199 |
+
"epoch": 0.9236966056666182,
|
| 74200 |
+
"grad_norm": 0.66796875,
|
| 74201 |
+
"learning_rate": 0.0005302287762996049,
|
| 74202 |
+
"loss": 1.429068684577942,
|
| 74203 |
+
"step": 10599
|
| 74204 |
+
},
|
| 74205 |
+
{
|
| 74206 |
+
"epoch": 0.9237837550774745,
|
| 74207 |
+
"grad_norm": 0.65234375,
|
| 74208 |
+
"learning_rate": 0.0005295718886221327,
|
| 74209 |
+
"loss": 1.4047555923461914,
|
| 74210 |
+
"step": 10600
|
| 74211 |
+
},
|
| 74212 |
+
{
|
| 74213 |
+
"epoch": 0.9238709044883309,
|
| 74214 |
+
"grad_norm": 0.58984375,
|
| 74215 |
+
"learning_rate": 0.0005289152622908937,
|
| 74216 |
+
"loss": 1.5161041021347046,
|
| 74217 |
+
"step": 10601
|
| 74218 |
+
},
|
| 74219 |
+
{
|
| 74220 |
+
"epoch": 0.9239580538991872,
|
| 74221 |
+
"grad_norm": 0.609375,
|
| 74222 |
+
"learning_rate": 0.0005282588976702887,
|
| 74223 |
+
"loss": 1.2470475435256958,
|
| 74224 |
+
"step": 10602
|
| 74225 |
+
},
|
| 74226 |
+
{
|
| 74227 |
+
"epoch": 0.9240452033100436,
|
| 74228 |
+
"grad_norm": 0.6171875,
|
| 74229 |
+
"learning_rate": 0.0005276027951245727,
|
| 74230 |
+
"loss": 1.4848660230636597,
|
| 74231 |
+
"step": 10603
|
| 74232 |
+
},
|
| 74233 |
+
{
|
| 74234 |
+
"epoch": 0.9241323527208999,
|
| 74235 |
+
"grad_norm": 0.58984375,
|
| 74236 |
+
"learning_rate": 0.0005269469550178576,
|
| 74237 |
+
"loss": 1.2991611957550049,
|
| 74238 |
+
"step": 10604
|
| 74239 |
+
},
|
| 74240 |
+
{
|
| 74241 |
+
"epoch": 0.9242195021317563,
|
| 74242 |
+
"grad_norm": 0.640625,
|
| 74243 |
+
"learning_rate": 0.0005262913777141077,
|
| 74244 |
+
"loss": 1.3719066381454468,
|
| 74245 |
+
"step": 10605
|
| 74246 |
+
},
|
| 74247 |
+
{
|
| 74248 |
+
"epoch": 0.9243066515426127,
|
| 74249 |
+
"grad_norm": 0.63671875,
|
| 74250 |
+
"learning_rate": 0.0005256360635771416,
|
| 74251 |
+
"loss": 1.3915817737579346,
|
| 74252 |
+
"step": 10606
|
| 74253 |
+
},
|
| 74254 |
+
{
|
| 74255 |
+
"epoch": 0.924393800953469,
|
| 74256 |
+
"grad_norm": 0.640625,
|
| 74257 |
+
"learning_rate": 0.000524981012970634,
|
| 74258 |
+
"loss": 1.4314830303192139,
|
| 74259 |
+
"step": 10607
|
| 74260 |
+
},
|
| 74261 |
+
{
|
| 74262 |
+
"epoch": 0.9244809503643254,
|
| 74263 |
+
"grad_norm": 0.609375,
|
| 74264 |
+
"learning_rate": 0.0005243262262581111,
|
| 74265 |
+
"loss": 1.243580937385559,
|
| 74266 |
+
"step": 10608
|
| 74267 |
+
},
|
| 74268 |
+
{
|
| 74269 |
+
"epoch": 0.9245680997751817,
|
| 74270 |
+
"grad_norm": 0.65625,
|
| 74271 |
+
"learning_rate": 0.0005236717038029527,
|
| 74272 |
+
"loss": 1.568758249282837,
|
| 74273 |
+
"step": 10609
|
| 74274 |
+
},
|
| 74275 |
+
{
|
| 74276 |
+
"epoch": 0.9246552491860381,
|
| 74277 |
+
"grad_norm": 0.63671875,
|
| 74278 |
+
"learning_rate": 0.0005230174459683929,
|
| 74279 |
+
"loss": 1.4314721822738647,
|
| 74280 |
+
"step": 10610
|
| 74281 |
+
},
|
| 74282 |
+
{
|
| 74283 |
+
"epoch": 0.9247423985968944,
|
| 74284 |
+
"grad_norm": 0.625,
|
| 74285 |
+
"learning_rate": 0.0005223634531175173,
|
| 74286 |
+
"loss": 1.3028546571731567,
|
| 74287 |
+
"step": 10611
|
| 74288 |
+
},
|
| 74289 |
+
{
|
| 74290 |
+
"epoch": 0.9248295480077509,
|
| 74291 |
+
"grad_norm": 0.5859375,
|
| 74292 |
+
"learning_rate": 0.0005217097256132682,
|
| 74293 |
+
"loss": 1.4798426628112793,
|
| 74294 |
+
"step": 10612
|
| 74295 |
+
},
|
| 74296 |
+
{
|
| 74297 |
+
"epoch": 0.9249166974186073,
|
| 74298 |
+
"grad_norm": 0.67578125,
|
| 74299 |
+
"learning_rate": 0.0005210562638184369,
|
| 74300 |
+
"loss": 1.4144682884216309,
|
| 74301 |
+
"step": 10613
|
| 74302 |
+
},
|
| 74303 |
+
{
|
| 74304 |
+
"epoch": 0.9250038468294636,
|
| 74305 |
+
"grad_norm": 0.609375,
|
| 74306 |
+
"learning_rate": 0.0005204030680956691,
|
| 74307 |
+
"loss": 1.3268251419067383,
|
| 74308 |
+
"step": 10614
|
| 74309 |
+
},
|
| 74310 |
+
{
|
| 74311 |
+
"epoch": 0.92509099624032,
|
| 74312 |
+
"grad_norm": 0.5625,
|
| 74313 |
+
"learning_rate": 0.0005197501388074612,
|
| 74314 |
+
"loss": 1.3798325061798096,
|
| 74315 |
+
"step": 10615
|
| 74316 |
+
},
|
| 74317 |
+
{
|
| 74318 |
+
"epoch": 0.9251781456511763,
|
| 74319 |
+
"grad_norm": 0.55859375,
|
| 74320 |
+
"learning_rate": 0.0005190974763161626,
|
| 74321 |
+
"loss": 1.4024105072021484,
|
| 74322 |
+
"step": 10616
|
| 74323 |
+
},
|
| 74324 |
+
{
|
| 74325 |
+
"epoch": 0.9252652950620327,
|
| 74326 |
+
"grad_norm": 0.6640625,
|
| 74327 |
+
"learning_rate": 0.0005184450809839761,
|
| 74328 |
+
"loss": 1.3854591846466064,
|
| 74329 |
+
"step": 10617
|
| 74330 |
+
},
|
| 74331 |
+
{
|
| 74332 |
+
"epoch": 0.925352444472889,
|
| 74333 |
+
"grad_norm": 0.59375,
|
| 74334 |
+
"learning_rate": 0.0005177929531729546,
|
| 74335 |
+
"loss": 1.463329553604126,
|
| 74336 |
+
"step": 10618
|
| 74337 |
+
},
|
| 74338 |
+
{
|
| 74339 |
+
"epoch": 0.9254395938837454,
|
| 74340 |
+
"grad_norm": 0.5703125,
|
| 74341 |
+
"learning_rate": 0.0005171410932450028,
|
| 74342 |
+
"loss": 1.2485421895980835,
|
| 74343 |
+
"step": 10619
|
| 74344 |
+
},
|
| 74345 |
+
{
|
| 74346 |
+
"epoch": 0.9255267432946018,
|
| 74347 |
+
"grad_norm": 0.50390625,
|
| 74348 |
+
"learning_rate": 0.0005164895015618766,
|
| 74349 |
+
"loss": 1.3857693672180176,
|
| 74350 |
+
"step": 10620
|
| 74351 |
+
},
|
| 74352 |
+
{
|
| 74353 |
+
"epoch": 0.9256138927054581,
|
| 74354 |
+
"grad_norm": 0.59375,
|
| 74355 |
+
"learning_rate": 0.0005158381784851836,
|
| 74356 |
+
"loss": 1.2931714057922363,
|
| 74357 |
+
"step": 10621
|
| 74358 |
+
},
|
| 74359 |
+
{
|
| 74360 |
+
"epoch": 0.9257010421163145,
|
| 74361 |
+
"grad_norm": 0.6015625,
|
| 74362 |
+
"learning_rate": 0.0005151871243763809,
|
| 74363 |
+
"loss": 1.4780501127243042,
|
| 74364 |
+
"step": 10622
|
| 74365 |
+
},
|
| 74366 |
+
{
|
| 74367 |
+
"epoch": 0.9257881915271708,
|
| 74368 |
+
"grad_norm": 0.5859375,
|
| 74369 |
+
"learning_rate": 0.0005145363395967793,
|
| 74370 |
+
"loss": 1.1755802631378174,
|
| 74371 |
+
"step": 10623
|
| 74372 |
+
},
|
| 74373 |
+
{
|
| 74374 |
+
"epoch": 0.9258753409380273,
|
| 74375 |
+
"grad_norm": 0.59375,
|
| 74376 |
+
"learning_rate": 0.0005138858245075372,
|
| 74377 |
+
"loss": 1.4387574195861816,
|
| 74378 |
+
"step": 10624
|
| 74379 |
+
},
|
| 74380 |
+
{
|
| 74381 |
+
"epoch": 0.9259624903488836,
|
| 74382 |
+
"grad_norm": 0.6015625,
|
| 74383 |
+
"learning_rate": 0.0005132355794696646,
|
| 74384 |
+
"loss": 1.3815428018569946,
|
| 74385 |
+
"step": 10625
|
| 74386 |
+
},
|
| 74387 |
+
{
|
| 74388 |
+
"epoch": 0.92604963975974,
|
| 74389 |
+
"grad_norm": 0.6328125,
|
| 74390 |
+
"learning_rate": 0.000512585604844022,
|
| 74391 |
+
"loss": 1.5630664825439453,
|
| 74392 |
+
"step": 10626
|
| 74393 |
+
},
|
| 74394 |
+
{
|
| 74395 |
+
"epoch": 0.9261367891705964,
|
| 74396 |
+
"grad_norm": 0.53515625,
|
| 74397 |
+
"learning_rate": 0.000511935900991317,
|
| 74398 |
+
"loss": 1.2940813302993774,
|
| 74399 |
+
"step": 10627
|
| 74400 |
+
},
|
| 74401 |
+
{
|
| 74402 |
+
"epoch": 0.9262239385814527,
|
| 74403 |
+
"grad_norm": 0.62890625,
|
| 74404 |
+
"learning_rate": 0.0005112864682721121,
|
| 74405 |
+
"loss": 1.386154294013977,
|
| 74406 |
+
"step": 10628
|
| 74407 |
+
},
|
| 74408 |
+
{
|
| 74409 |
+
"epoch": 0.9263110879923091,
|
| 74410 |
+
"grad_norm": 0.546875,
|
| 74411 |
+
"learning_rate": 0.0005106373070468151,
|
| 74412 |
+
"loss": 1.374330997467041,
|
| 74413 |
+
"step": 10629
|
| 74414 |
+
},
|
| 74415 |
+
{
|
| 74416 |
+
"epoch": 0.9263982374031654,
|
| 74417 |
+
"grad_norm": 0.5546875,
|
| 74418 |
+
"learning_rate": 0.0005099884176756846,
|
| 74419 |
+
"loss": 1.3532607555389404,
|
| 74420 |
+
"step": 10630
|
| 74421 |
+
},
|
| 74422 |
+
{
|
| 74423 |
+
"epoch": 0.9264853868140218,
|
| 74424 |
+
"grad_norm": 0.60546875,
|
| 74425 |
+
"learning_rate": 0.0005093398005188283,
|
| 74426 |
+
"loss": 1.4316142797470093,
|
| 74427 |
+
"step": 10631
|
| 74428 |
+
},
|
| 74429 |
+
{
|
| 74430 |
+
"epoch": 0.9265725362248781,
|
| 74431 |
+
"grad_norm": 0.57421875,
|
| 74432 |
+
"learning_rate": 0.0005086914559362023,
|
| 74433 |
+
"loss": 1.2776588201522827,
|
| 74434 |
+
"step": 10632
|
| 74435 |
+
},
|
| 74436 |
+
{
|
| 74437 |
+
"epoch": 0.9266596856357345,
|
| 74438 |
+
"grad_norm": 0.65234375,
|
| 74439 |
+
"learning_rate": 0.0005080433842876125,
|
| 74440 |
+
"loss": 1.395501732826233,
|
| 74441 |
+
"step": 10633
|
| 74442 |
+
},
|
| 74443 |
+
{
|
| 74444 |
+
"epoch": 0.9267468350465909,
|
| 74445 |
+
"grad_norm": 0.609375,
|
| 74446 |
+
"learning_rate": 0.0005073955859327124,
|
| 74447 |
+
"loss": 1.4338113069534302,
|
| 74448 |
+
"step": 10634
|
| 74449 |
+
},
|
| 74450 |
+
{
|
| 74451 |
+
"epoch": 0.9268339844574472,
|
| 74452 |
+
"grad_norm": 0.65625,
|
| 74453 |
+
"learning_rate": 0.0005067480612310041,
|
| 74454 |
+
"loss": 1.4566060304641724,
|
| 74455 |
+
"step": 10635
|
| 74456 |
+
},
|
| 74457 |
+
{
|
| 74458 |
+
"epoch": 0.9269211338683037,
|
| 74459 |
+
"grad_norm": 0.56640625,
|
| 74460 |
+
"learning_rate": 0.000506100810541838,
|
| 74461 |
+
"loss": 1.3558721542358398,
|
| 74462 |
+
"step": 10636
|
| 74463 |
+
},
|
| 74464 |
+
{
|
| 74465 |
+
"epoch": 0.92700828327916,
|
| 74466 |
+
"grad_norm": 0.5859375,
|
| 74467 |
+
"learning_rate": 0.0005054538342244121,
|
| 74468 |
+
"loss": 1.5648887157440186,
|
| 74469 |
+
"step": 10637
|
| 74470 |
+
},
|
| 74471 |
+
{
|
| 74472 |
+
"epoch": 0.9270954326900164,
|
| 74473 |
+
"grad_norm": 0.58203125,
|
| 74474 |
+
"learning_rate": 0.0005048071326377716,
|
| 74475 |
+
"loss": 1.4658679962158203,
|
| 74476 |
+
"step": 10638
|
| 74477 |
+
},
|
| 74478 |
+
{
|
| 74479 |
+
"epoch": 0.9271825821008727,
|
| 74480 |
+
"grad_norm": 0.625,
|
| 74481 |
+
"learning_rate": 0.0005041607061408118,
|
| 74482 |
+
"loss": 1.4238897562026978,
|
| 74483 |
+
"step": 10639
|
| 74484 |
+
},
|
| 74485 |
+
{
|
| 74486 |
+
"epoch": 0.9272697315117291,
|
| 74487 |
+
"grad_norm": 0.609375,
|
| 74488 |
+
"learning_rate": 0.0005035145550922722,
|
| 74489 |
+
"loss": 1.4082732200622559,
|
| 74490 |
+
"step": 10640
|
| 74491 |
+
},
|
| 74492 |
+
{
|
| 74493 |
+
"epoch": 0.9273568809225855,
|
| 74494 |
+
"grad_norm": 0.75390625,
|
| 74495 |
+
"learning_rate": 0.0005028686798507411,
|
| 74496 |
+
"loss": 1.4516377449035645,
|
| 74497 |
+
"step": 10641
|
| 74498 |
+
},
|
| 74499 |
+
{
|
| 74500 |
+
"epoch": 0.9274440303334418,
|
| 74501 |
+
"grad_norm": 0.59765625,
|
| 74502 |
+
"learning_rate": 0.0005022230807746533,
|
| 74503 |
+
"loss": 1.372707724571228,
|
| 74504 |
+
"step": 10642
|
| 74505 |
+
},
|
| 74506 |
+
{
|
| 74507 |
+
"epoch": 0.9275311797442982,
|
| 74508 |
+
"grad_norm": 0.64453125,
|
| 74509 |
+
"learning_rate": 0.0005015777582222899,
|
| 74510 |
+
"loss": 1.4883637428283691,
|
| 74511 |
+
"step": 10643
|
| 74512 |
+
},
|
| 74513 |
+
{
|
| 74514 |
+
"epoch": 0.9276183291551545,
|
| 74515 |
+
"grad_norm": 0.65234375,
|
| 74516 |
+
"learning_rate": 0.0005009327125517801,
|
| 74517 |
+
"loss": 1.3155707120895386,
|
| 74518 |
+
"step": 10644
|
| 74519 |
+
},
|
| 74520 |
+
{
|
| 74521 |
+
"epoch": 0.9277054785660109,
|
| 74522 |
+
"grad_norm": 0.58984375,
|
| 74523 |
+
"learning_rate": 0.0005002879441210981,
|
| 74524 |
+
"loss": 1.371456503868103,
|
| 74525 |
+
"step": 10645
|
| 74526 |
+
},
|
| 74527 |
+
{
|
| 74528 |
+
"epoch": 0.9277926279768672,
|
| 74529 |
+
"grad_norm": 0.59765625,
|
| 74530 |
+
"learning_rate": 0.0004996434532880643,
|
| 74531 |
+
"loss": 1.4533182382583618,
|
| 74532 |
+
"step": 10646
|
| 74533 |
+
},
|
| 74534 |
+
{
|
| 74535 |
+
"epoch": 0.9278797773877236,
|
| 74536 |
+
"grad_norm": 0.6328125,
|
| 74537 |
+
"learning_rate": 0.0004989992404103452,
|
| 74538 |
+
"loss": 1.382372260093689,
|
| 74539 |
+
"step": 10647
|
| 74540 |
+
},
|
| 74541 |
+
{
|
| 74542 |
+
"epoch": 0.9279669267985801,
|
| 74543 |
+
"grad_norm": 0.69140625,
|
| 74544 |
+
"learning_rate": 0.0004983553058454533,
|
| 74545 |
+
"loss": 1.3707820177078247,
|
| 74546 |
+
"step": 10648
|
| 74547 |
+
},
|
| 74548 |
+
{
|
| 74549 |
+
"epoch": 0.9280540762094364,
|
| 74550 |
+
"grad_norm": 0.6796875,
|
| 74551 |
+
"learning_rate": 0.0004977116499507462,
|
| 74552 |
+
"loss": 1.3942557573318481,
|
| 74553 |
+
"step": 10649
|
| 74554 |
+
},
|
| 74555 |
+
{
|
| 74556 |
+
"epoch": 0.9281412256202928,
|
| 74557 |
+
"grad_norm": 0.6875,
|
| 74558 |
+
"learning_rate": 0.0004970682730834275,
|
| 74559 |
+
"loss": 1.5383590459823608,
|
| 74560 |
+
"step": 10650
|
| 74561 |
+
},
|
| 74562 |
+
{
|
| 74563 |
+
"epoch": 0.9282283750311491,
|
| 74564 |
+
"grad_norm": 0.55859375,
|
| 74565 |
+
"learning_rate": 0.0004964251756005457,
|
| 74566 |
+
"loss": 1.2526518106460571,
|
| 74567 |
+
"step": 10651
|
| 74568 |
+
},
|
| 74569 |
+
{
|
| 74570 |
+
"epoch": 0.9283155244420055,
|
| 74571 |
+
"grad_norm": 0.6328125,
|
| 74572 |
+
"learning_rate": 0.0004957823578589939,
|
| 74573 |
+
"loss": 1.3667023181915283,
|
| 74574 |
+
"step": 10652
|
| 74575 |
+
},
|
| 74576 |
+
{
|
| 74577 |
+
"epoch": 0.9284026738528618,
|
| 74578 |
+
"grad_norm": 0.5546875,
|
| 74579 |
+
"learning_rate": 0.00049513982021551,
|
| 74580 |
+
"loss": 1.3441028594970703,
|
| 74581 |
+
"step": 10653
|
| 74582 |
+
},
|
| 74583 |
+
{
|
| 74584 |
+
"epoch": 0.9284898232637182,
|
| 74585 |
+
"grad_norm": 0.63671875,
|
| 74586 |
+
"learning_rate": 0.0004944975630266757,
|
| 74587 |
+
"loss": 1.3417932987213135,
|
| 74588 |
+
"step": 10654
|
| 74589 |
+
},
|
| 74590 |
+
{
|
| 74591 |
+
"epoch": 0.9285769726745746,
|
| 74592 |
+
"grad_norm": 0.58203125,
|
| 74593 |
+
"learning_rate": 0.0004938555866489208,
|
| 74594 |
+
"loss": 1.3898224830627441,
|
| 74595 |
+
"step": 10655
|
| 74596 |
+
},
|
| 74597 |
+
{
|
| 74598 |
+
"epoch": 0.9286641220854309,
|
| 74599 |
+
"grad_norm": 0.578125,
|
| 74600 |
+
"learning_rate": 0.0004932138914385143,
|
| 74601 |
+
"loss": 1.3794687986373901,
|
| 74602 |
+
"step": 10656
|
| 74603 |
+
},
|
| 74604 |
+
{
|
| 74605 |
+
"epoch": 0.9287512714962873,
|
| 74606 |
+
"grad_norm": 0.59765625,
|
| 74607 |
+
"learning_rate": 0.000492572477751572,
|
| 74608 |
+
"loss": 1.456784963607788,
|
| 74609 |
+
"step": 10657
|
| 74610 |
+
},
|
| 74611 |
+
{
|
| 74612 |
+
"epoch": 0.9288384209071436,
|
| 74613 |
+
"grad_norm": 0.6015625,
|
| 74614 |
+
"learning_rate": 0.0004919313459440525,
|
| 74615 |
+
"loss": 1.3471949100494385,
|
| 74616 |
+
"step": 10658
|
| 74617 |
+
},
|
| 74618 |
+
{
|
| 74619 |
+
"epoch": 0.928925570318,
|
| 74620 |
+
"grad_norm": 0.5625,
|
| 74621 |
+
"learning_rate": 0.0004912904963717576,
|
| 74622 |
+
"loss": 1.3238189220428467,
|
| 74623 |
+
"step": 10659
|
| 74624 |
+
},
|
| 74625 |
+
{
|
| 74626 |
+
"epoch": 0.9290127197288564,
|
| 74627 |
+
"grad_norm": 0.65234375,
|
| 74628 |
+
"learning_rate": 0.0004906499293903348,
|
| 74629 |
+
"loss": 1.4607516527175903,
|
| 74630 |
+
"step": 10660
|
| 74631 |
+
},
|
| 74632 |
+
{
|
| 74633 |
+
"epoch": 0.9290998691397128,
|
| 74634 |
+
"grad_norm": 0.6796875,
|
| 74635 |
+
"learning_rate": 0.0004900096453552722,
|
| 74636 |
+
"loss": 1.3629095554351807,
|
| 74637 |
+
"step": 10661
|
| 74638 |
+
},
|
| 74639 |
+
{
|
| 74640 |
+
"epoch": 0.9291870185505692,
|
| 74641 |
+
"grad_norm": 0.65625,
|
| 74642 |
+
"learning_rate": 0.0004893696446219017,
|
| 74643 |
+
"loss": 1.4566161632537842,
|
| 74644 |
+
"step": 10662
|
| 74645 |
+
},
|
| 74646 |
+
{
|
| 74647 |
+
"epoch": 0.9292741679614255,
|
| 74648 |
+
"grad_norm": 0.6875,
|
| 74649 |
+
"learning_rate": 0.0004887299275453983,
|
| 74650 |
+
"loss": 1.304275631904602,
|
| 74651 |
+
"step": 10663
|
| 74652 |
+
},
|
| 74653 |
+
{
|
| 74654 |
+
"epoch": 0.9293613173722819,
|
| 74655 |
+
"grad_norm": 0.52734375,
|
| 74656 |
+
"learning_rate": 0.0004880904944807791,
|
| 74657 |
+
"loss": 1.2976535558700562,
|
| 74658 |
+
"step": 10664
|
| 74659 |
+
},
|
| 74660 |
+
{
|
| 74661 |
+
"epoch": 0.9294484667831382,
|
| 74662 |
+
"grad_norm": 0.53515625,
|
| 74663 |
+
"learning_rate": 0.00048745134578290325,
|
| 74664 |
+
"loss": 1.3486498594284058,
|
| 74665 |
+
"step": 10665
|
| 74666 |
+
},
|
| 74667 |
+
{
|
| 74668 |
+
"epoch": 0.9295356161939946,
|
| 74669 |
+
"grad_norm": 0.55078125,
|
| 74670 |
+
"learning_rate": 0.00048681248180647426,
|
| 74671 |
+
"loss": 1.172863483428955,
|
| 74672 |
+
"step": 10666
|
| 74673 |
+
},
|
| 74674 |
+
{
|
| 74675 |
+
"epoch": 0.9296227656048509,
|
| 74676 |
+
"grad_norm": 0.62890625,
|
| 74677 |
+
"learning_rate": 0.00048617390290603543,
|
| 74678 |
+
"loss": 1.3933426141738892,
|
| 74679 |
+
"step": 10667
|
| 74680 |
+
},
|
| 74681 |
+
{
|
| 74682 |
+
"epoch": 0.9297099150157073,
|
| 74683 |
+
"grad_norm": 0.578125,
|
| 74684 |
+
"learning_rate": 0.00048553560943597173,
|
| 74685 |
+
"loss": 1.4262231588363647,
|
| 74686 |
+
"step": 10668
|
| 74687 |
+
},
|
| 74688 |
+
{
|
| 74689 |
+
"epoch": 0.9297970644265637,
|
| 74690 |
+
"grad_norm": 0.5859375,
|
| 74691 |
+
"learning_rate": 0.00048489760175051105,
|
| 74692 |
+
"loss": 1.3945653438568115,
|
| 74693 |
+
"step": 10669
|
| 74694 |
+
},
|
| 74695 |
+
{
|
| 74696 |
+
"epoch": 0.92988421383742,
|
| 74697 |
+
"grad_norm": 0.61328125,
|
| 74698 |
+
"learning_rate": 0.0004842598802037208,
|
| 74699 |
+
"loss": 1.4212998151779175,
|
| 74700 |
+
"step": 10670
|
| 74701 |
+
},
|
| 74702 |
+
{
|
| 74703 |
+
"epoch": 0.9299713632482765,
|
| 74704 |
+
"grad_norm": 0.57421875,
|
| 74705 |
+
"learning_rate": 0.0004836224451495131,
|
| 74706 |
+
"loss": 1.2879077196121216,
|
| 74707 |
+
"step": 10671
|
| 74708 |
+
},
|
| 74709 |
+
{
|
| 74710 |
+
"epoch": 0.9300585126591328,
|
| 74711 |
+
"grad_norm": 0.65234375,
|
| 74712 |
+
"learning_rate": 0.00048298529694163785,
|
| 74713 |
+
"loss": 1.3234881162643433,
|
| 74714 |
+
"step": 10672
|
| 74715 |
+
},
|
| 74716 |
+
{
|
| 74717 |
+
"epoch": 0.9301456620699892,
|
| 74718 |
+
"grad_norm": 0.59375,
|
| 74719 |
+
"learning_rate": 0.00048234843593368716,
|
| 74720 |
+
"loss": 1.2626367807388306,
|
| 74721 |
+
"step": 10673
|
| 74722 |
+
},
|
| 74723 |
+
{
|
| 74724 |
+
"epoch": 0.9302328114808455,
|
| 74725 |
+
"grad_norm": 0.6484375,
|
| 74726 |
+
"learning_rate": 0.0004817118624790925,
|
| 74727 |
+
"loss": 1.3626574277877808,
|
| 74728 |
+
"step": 10674
|
| 74729 |
+
},
|
| 74730 |
+
{
|
| 74731 |
+
"epoch": 0.9303199608917019,
|
| 74732 |
+
"grad_norm": 0.61328125,
|
| 74733 |
+
"learning_rate": 0.00048107557693112627,
|
| 74734 |
+
"loss": 1.475213646888733,
|
| 74735 |
+
"step": 10675
|
| 74736 |
+
},
|
| 74737 |
+
{
|
| 74738 |
+
"epoch": 0.9304071103025583,
|
| 74739 |
+
"grad_norm": 0.5859375,
|
| 74740 |
+
"learning_rate": 0.0004804395796429031,
|
| 74741 |
+
"loss": 1.4251682758331299,
|
| 74742 |
+
"step": 10676
|
| 74743 |
+
},
|
| 74744 |
+
{
|
| 74745 |
+
"epoch": 0.9304942597134146,
|
| 74746 |
+
"grad_norm": 0.625,
|
| 74747 |
+
"learning_rate": 0.00047980387096737515,
|
| 74748 |
+
"loss": 1.5054700374603271,
|
| 74749 |
+
"step": 10677
|
| 74750 |
+
},
|
| 74751 |
+
{
|
| 74752 |
+
"epoch": 0.930581409124271,
|
| 74753 |
+
"grad_norm": 0.6328125,
|
| 74754 |
+
"learning_rate": 0.00047916845125733524,
|
| 74755 |
+
"loss": 1.3805677890777588,
|
| 74756 |
+
"step": 10678
|
| 74757 |
+
},
|
| 74758 |
+
{
|
| 74759 |
+
"epoch": 0.9306685585351273,
|
| 74760 |
+
"grad_norm": 0.578125,
|
| 74761 |
+
"learning_rate": 0.0004785333208654159,
|
| 74762 |
+
"loss": 1.4884077310562134,
|
| 74763 |
+
"step": 10679
|
| 74764 |
+
},
|
| 74765 |
+
{
|
| 74766 |
+
"epoch": 0.9307557079459837,
|
| 74767 |
+
"grad_norm": 0.625,
|
| 74768 |
+
"learning_rate": 0.000477898480144089,
|
| 74769 |
+
"loss": 1.3699536323547363,
|
| 74770 |
+
"step": 10680
|
| 74771 |
+
},
|
| 74772 |
+
{
|
| 74773 |
+
"epoch": 0.93084285735684,
|
| 74774 |
+
"grad_norm": 0.578125,
|
| 74775 |
+
"learning_rate": 0.0004772639294456651,
|
| 74776 |
+
"loss": 1.4125971794128418,
|
| 74777 |
+
"step": 10681
|
| 74778 |
+
},
|
| 74779 |
+
{
|
| 74780 |
+
"epoch": 0.9309300067676964,
|
| 74781 |
+
"grad_norm": 0.5703125,
|
| 74782 |
+
"learning_rate": 0.00047662966912229566,
|
| 74783 |
+
"loss": 1.4447113275527954,
|
| 74784 |
+
"step": 10682
|
| 74785 |
+
},
|
| 74786 |
+
{
|
| 74787 |
+
"epoch": 0.9310171561785529,
|
| 74788 |
+
"grad_norm": 0.6015625,
|
| 74789 |
+
"learning_rate": 0.00047599569952596916,
|
| 74790 |
+
"loss": 1.4526288509368896,
|
| 74791 |
+
"step": 10683
|
| 74792 |
+
},
|
| 74793 |
+
{
|
| 74794 |
+
"epoch": 0.9311043055894092,
|
| 74795 |
+
"grad_norm": 0.62109375,
|
| 74796 |
+
"learning_rate": 0.0004753620210085134,
|
| 74797 |
+
"loss": 1.3986235857009888,
|
| 74798 |
+
"step": 10684
|
| 74799 |
+
},
|
| 74800 |
+
{
|
| 74801 |
+
"epoch": 0.9311914550002656,
|
| 74802 |
+
"grad_norm": 0.66796875,
|
| 74803 |
+
"learning_rate": 0.00047472863392159474,
|
| 74804 |
+
"loss": 1.1874592304229736,
|
| 74805 |
+
"step": 10685
|
| 74806 |
+
},
|
| 74807 |
+
{
|
| 74808 |
+
"epoch": 0.9312786044111219,
|
| 74809 |
+
"grad_norm": 0.55859375,
|
| 74810 |
+
"learning_rate": 0.0004740955386167158,
|
| 74811 |
+
"loss": 1.3817155361175537,
|
| 74812 |
+
"step": 10686
|
| 74813 |
+
},
|
| 74814 |
+
{
|
| 74815 |
+
"epoch": 0.9313657538219783,
|
| 74816 |
+
"grad_norm": 0.61328125,
|
| 74817 |
+
"learning_rate": 0.0004734627354452219,
|
| 74818 |
+
"loss": 1.4633171558380127,
|
| 74819 |
+
"step": 10687
|
| 74820 |
+
},
|
| 74821 |
+
{
|
| 74822 |
+
"epoch": 0.9314529032328346,
|
| 74823 |
+
"grad_norm": 0.55859375,
|
| 74824 |
+
"learning_rate": 0.00047283022475829215,
|
| 74825 |
+
"loss": 1.349452018737793,
|
| 74826 |
+
"step": 10688
|
| 74827 |
+
},
|
| 74828 |
+
{
|
| 74829 |
+
"epoch": 0.931540052643691,
|
| 74830 |
+
"grad_norm": 0.59375,
|
| 74831 |
+
"learning_rate": 0.0004721980069069447,
|
| 74832 |
+
"loss": 1.320845603942871,
|
| 74833 |
+
"step": 10689
|
| 74834 |
+
},
|
| 74835 |
+
{
|
| 74836 |
+
"epoch": 0.9316272020545474,
|
| 74837 |
+
"grad_norm": 0.6328125,
|
| 74838 |
+
"learning_rate": 0.0004715660822420351,
|
| 74839 |
+
"loss": 1.4221367835998535,
|
| 74840 |
+
"step": 10690
|
| 74841 |
+
},
|
| 74842 |
+
{
|
| 74843 |
+
"epoch": 0.9317143514654037,
|
| 74844 |
+
"grad_norm": 0.625,
|
| 74845 |
+
"learning_rate": 0.0004709344511142559,
|
| 74846 |
+
"loss": 1.2934486865997314,
|
| 74847 |
+
"step": 10691
|
| 74848 |
+
},
|
| 74849 |
+
{
|
| 74850 |
+
"epoch": 0.9318015008762601,
|
| 74851 |
+
"grad_norm": 0.6171875,
|
| 74852 |
+
"learning_rate": 0.00047030311387413775,
|
| 74853 |
+
"loss": 1.4287840127944946,
|
| 74854 |
+
"step": 10692
|
| 74855 |
+
},
|
| 74856 |
+
{
|
| 74857 |
+
"epoch": 0.9318886502871164,
|
| 74858 |
+
"grad_norm": 0.5625,
|
| 74859 |
+
"learning_rate": 0.0004696720708720472,
|
| 74860 |
+
"loss": 1.3304390907287598,
|
| 74861 |
+
"step": 10693
|
| 74862 |
+
},
|
| 74863 |
+
{
|
| 74864 |
+
"epoch": 0.9319757996979728,
|
| 74865 |
+
"grad_norm": 0.57421875,
|
| 74866 |
+
"learning_rate": 0.0004690413224581879,
|
| 74867 |
+
"loss": 1.5018701553344727,
|
| 74868 |
+
"step": 10694
|
| 74869 |
+
},
|
| 74870 |
+
{
|
| 74871 |
+
"epoch": 0.9320629491088291,
|
| 74872 |
+
"grad_norm": 0.65625,
|
| 74873 |
+
"learning_rate": 0.0004684108689825998,
|
| 74874 |
+
"loss": 1.5002820491790771,
|
| 74875 |
+
"step": 10695
|
| 74876 |
+
},
|
| 74877 |
+
{
|
| 74878 |
+
"epoch": 0.9321500985196856,
|
| 74879 |
+
"grad_norm": 0.60546875,
|
| 74880 |
+
"learning_rate": 0.0004677807107951593,
|
| 74881 |
+
"loss": 1.5061651468276978,
|
| 74882 |
+
"step": 10696
|
| 74883 |
+
},
|
| 74884 |
+
{
|
| 74885 |
+
"epoch": 0.932237247930542,
|
| 74886 |
+
"grad_norm": 0.640625,
|
| 74887 |
+
"learning_rate": 0.00046715084824557827,
|
| 74888 |
+
"loss": 1.3442021608352661,
|
| 74889 |
+
"step": 10697
|
| 74890 |
+
},
|
| 74891 |
+
{
|
| 74892 |
+
"epoch": 0.9323243973413983,
|
| 74893 |
+
"grad_norm": 0.6484375,
|
| 74894 |
+
"learning_rate": 0.00046652128168340676,
|
| 74895 |
+
"loss": 1.4294358491897583,
|
| 74896 |
+
"step": 10698
|
| 74897 |
+
},
|
| 74898 |
+
{
|
| 74899 |
+
"epoch": 0.9324115467522547,
|
| 74900 |
+
"grad_norm": 0.5703125,
|
| 74901 |
+
"learning_rate": 0.00046589201145802813,
|
| 74902 |
+
"loss": 1.3213213682174683,
|
| 74903 |
+
"step": 10699
|
| 74904 |
+
},
|
| 74905 |
+
{
|
| 74906 |
+
"epoch": 0.932498696163111,
|
| 74907 |
+
"grad_norm": 0.65234375,
|
| 74908 |
+
"learning_rate": 0.0004652630379186621,
|
| 74909 |
+
"loss": 1.3561373949050903,
|
| 74910 |
+
"step": 10700
|
| 74911 |
+
},
|
| 74912 |
+
{
|
| 74913 |
+
"epoch": 0.9325858455739674,
|
| 74914 |
+
"grad_norm": 0.59765625,
|
| 74915 |
+
"learning_rate": 0.00046463436141436394,
|
| 74916 |
+
"loss": 1.4425005912780762,
|
| 74917 |
+
"step": 10701
|
| 74918 |
+
},
|
| 74919 |
+
{
|
| 74920 |
+
"epoch": 0.9326729949848237,
|
| 74921 |
+
"grad_norm": 0.609375,
|
| 74922 |
+
"learning_rate": 0.00046400598229402306,
|
| 74923 |
+
"loss": 1.3377797603607178,
|
| 74924 |
+
"step": 10702
|
| 74925 |
+
},
|
| 74926 |
+
{
|
| 74927 |
+
"epoch": 0.9327601443956801,
|
| 74928 |
+
"grad_norm": 0.55078125,
|
| 74929 |
+
"learning_rate": 0.0004633779009063662,
|
| 74930 |
+
"loss": 1.4136755466461182,
|
| 74931 |
+
"step": 10703
|
| 74932 |
+
},
|
| 74933 |
+
{
|
| 74934 |
+
"epoch": 0.9328472938065365,
|
| 74935 |
+
"grad_norm": 0.59375,
|
| 74936 |
+
"learning_rate": 0.00046275011759995267,
|
| 74937 |
+
"loss": 1.4134703874588013,
|
| 74938 |
+
"step": 10704
|
| 74939 |
+
},
|
| 74940 |
+
{
|
| 74941 |
+
"epoch": 0.9329344432173928,
|
| 74942 |
+
"grad_norm": 0.59765625,
|
| 74943 |
+
"learning_rate": 0.00046212263272317705,
|
| 74944 |
+
"loss": 1.5474461317062378,
|
| 74945 |
+
"step": 10705
|
| 74946 |
+
},
|
| 74947 |
+
{
|
| 74948 |
+
"epoch": 0.9330215926282492,
|
| 74949 |
+
"grad_norm": 0.61328125,
|
| 74950 |
+
"learning_rate": 0.00046149544662426825,
|
| 74951 |
+
"loss": 1.4391179084777832,
|
| 74952 |
+
"step": 10706
|
| 74953 |
+
},
|
| 74954 |
+
{
|
| 74955 |
+
"epoch": 0.9331087420391055,
|
| 74956 |
+
"grad_norm": 0.60546875,
|
| 74957 |
+
"learning_rate": 0.0004608685596512886,
|
| 74958 |
+
"loss": 1.2693322896957397,
|
| 74959 |
+
"step": 10707
|
| 74960 |
+
},
|
| 74961 |
+
{
|
| 74962 |
+
"epoch": 0.933195891449962,
|
| 74963 |
+
"grad_norm": 0.5390625,
|
| 74964 |
+
"learning_rate": 0.0004602419721521373,
|
| 74965 |
+
"loss": 1.3577617406845093,
|
| 74966 |
+
"step": 10708
|
| 74967 |
+
},
|
| 74968 |
+
{
|
| 74969 |
+
"epoch": 0.9332830408608183,
|
| 74970 |
+
"grad_norm": 0.57421875,
|
| 74971 |
+
"learning_rate": 0.0004596156844745437,
|
| 74972 |
+
"loss": 1.2863800525665283,
|
| 74973 |
+
"step": 10709
|
| 74974 |
+
},
|
| 74975 |
+
{
|
| 74976 |
+
"epoch": 0.9333701902716747,
|
| 74977 |
+
"grad_norm": 0.5546875,
|
| 74978 |
+
"learning_rate": 0.0004589896969660728,
|
| 74979 |
+
"loss": 1.260008692741394,
|
| 74980 |
+
"step": 10710
|
| 74981 |
+
},
|
| 74982 |
+
{
|
| 74983 |
+
"epoch": 0.9334573396825311,
|
| 74984 |
+
"grad_norm": 0.60546875,
|
| 74985 |
+
"learning_rate": 0.0004583640099741224,
|
| 74986 |
+
"loss": 1.4299962520599365,
|
| 74987 |
+
"step": 10711
|
| 74988 |
+
},
|
| 74989 |
+
{
|
| 74990 |
+
"epoch": 0.9335444890933874,
|
| 74991 |
+
"grad_norm": 0.609375,
|
| 74992 |
+
"learning_rate": 0.000457738623845924,
|
| 74993 |
+
"loss": 1.306565761566162,
|
| 74994 |
+
"step": 10712
|
| 74995 |
+
},
|
| 74996 |
+
{
|
| 74997 |
+
"epoch": 0.9336316385042438,
|
| 74998 |
+
"grad_norm": 0.6015625,
|
| 74999 |
+
"learning_rate": 0.00045711353892854056,
|
| 75000 |
+
"loss": 1.3982917070388794,
|
| 75001 |
+
"step": 10713
|
| 75002 |
+
},
|
| 75003 |
+
{
|
| 75004 |
+
"epoch": 0.9337187879151001,
|
| 75005 |
+
"grad_norm": 0.57421875,
|
| 75006 |
+
"learning_rate": 0.00045648875556887175,
|
| 75007 |
+
"loss": 1.2360150814056396,
|
| 75008 |
+
"step": 10714
|
| 75009 |
+
},
|
| 75010 |
+
{
|
| 75011 |
+
"epoch": 0.9338059373259565,
|
| 75012 |
+
"grad_norm": 0.58203125,
|
| 75013 |
+
"learning_rate": 0.0004558642741136455,
|
| 75014 |
+
"loss": 1.466596245765686,
|
| 75015 |
+
"step": 10715
|
| 75016 |
+
},
|
| 75017 |
+
{
|
| 75018 |
+
"epoch": 0.9338930867368128,
|
| 75019 |
+
"grad_norm": 0.6328125,
|
| 75020 |
+
"learning_rate": 0.00045524009490942443,
|
| 75021 |
+
"loss": 1.269097924232483,
|
| 75022 |
+
"step": 10716
|
| 75023 |
+
},
|
| 75024 |
+
{
|
| 75025 |
+
"epoch": 0.9339802361476692,
|
| 75026 |
+
"grad_norm": 0.60546875,
|
| 75027 |
+
"learning_rate": 0.00045461621830260285,
|
| 75028 |
+
"loss": 1.3210543394088745,
|
| 75029 |
+
"step": 10717
|
| 75030 |
+
},
|
| 75031 |
+
{
|
| 75032 |
+
"epoch": 0.9340673855585256,
|
| 75033 |
+
"grad_norm": 0.60546875,
|
| 75034 |
+
"learning_rate": 0.0004539926446394066,
|
| 75035 |
+
"loss": 1.4368460178375244,
|
| 75036 |
+
"step": 10718
|
| 75037 |
+
},
|
| 75038 |
+
{
|
| 75039 |
+
"epoch": 0.9341545349693819,
|
| 75040 |
+
"grad_norm": 0.6171875,
|
| 75041 |
+
"learning_rate": 0.0004533693742658955,
|
| 75042 |
+
"loss": 1.3695037364959717,
|
| 75043 |
+
"step": 10719
|
| 75044 |
+
},
|
| 75045 |
+
{
|
| 75046 |
+
"epoch": 0.9342416843802384,
|
| 75047 |
+
"grad_norm": 0.6328125,
|
| 75048 |
+
"learning_rate": 0.00045274640752795904,
|
| 75049 |
+
"loss": 1.4335851669311523,
|
| 75050 |
+
"step": 10720
|
| 75051 |
+
},
|
| 75052 |
+
{
|
| 75053 |
+
"epoch": 0.9343288337910947,
|
| 75054 |
+
"grad_norm": 0.63671875,
|
| 75055 |
+
"learning_rate": 0.0004521237447713187,
|
| 75056 |
+
"loss": 1.3742443323135376,
|
| 75057 |
+
"step": 10721
|
| 75058 |
+
},
|
| 75059 |
+
{
|
| 75060 |
+
"epoch": 0.9344159832019511,
|
| 75061 |
+
"grad_norm": 0.58203125,
|
| 75062 |
+
"learning_rate": 0.0004515013863415273,
|
| 75063 |
+
"loss": 1.4045860767364502,
|
| 75064 |
+
"step": 10722
|
| 75065 |
+
},
|
| 75066 |
+
{
|
| 75067 |
+
"epoch": 0.9345031326128074,
|
| 75068 |
+
"grad_norm": 0.6015625,
|
| 75069 |
+
"learning_rate": 0.00045087933258396824,
|
| 75070 |
+
"loss": 1.4241158962249756,
|
| 75071 |
+
"step": 10723
|
| 75072 |
+
},
|
| 75073 |
+
{
|
| 75074 |
+
"epoch": 0.9345902820236638,
|
| 75075 |
+
"grad_norm": 0.625,
|
| 75076 |
+
"learning_rate": 0.00045025758384385783,
|
| 75077 |
+
"loss": 1.3815301656723022,
|
| 75078 |
+
"step": 10724
|
| 75079 |
+
},
|
| 75080 |
+
{
|
| 75081 |
+
"epoch": 0.9346774314345202,
|
| 75082 |
+
"grad_norm": 0.546875,
|
| 75083 |
+
"learning_rate": 0.00044963614046624086,
|
| 75084 |
+
"loss": 1.3415958881378174,
|
| 75085 |
+
"step": 10725
|
| 75086 |
+
},
|
| 75087 |
+
{
|
| 75088 |
+
"epoch": 0.9347645808453765,
|
| 75089 |
+
"grad_norm": 0.62109375,
|
| 75090 |
+
"learning_rate": 0.0004490150027959941,
|
| 75091 |
+
"loss": 1.476638674736023,
|
| 75092 |
+
"step": 10726
|
| 75093 |
+
},
|
| 75094 |
+
{
|
| 75095 |
+
"epoch": 0.9348517302562329,
|
| 75096 |
+
"grad_norm": 0.58203125,
|
| 75097 |
+
"learning_rate": 0.0004483941711778232,
|
| 75098 |
+
"loss": 1.2964787483215332,
|
| 75099 |
+
"step": 10727
|
| 75100 |
+
},
|
| 75101 |
+
{
|
| 75102 |
+
"epoch": 0.9349388796670892,
|
| 75103 |
+
"grad_norm": 0.6875,
|
| 75104 |
+
"learning_rate": 0.0004477736459562652,
|
| 75105 |
+
"loss": 1.4656143188476562,
|
| 75106 |
+
"step": 10728
|
| 75107 |
+
},
|
| 75108 |
+
{
|
| 75109 |
+
"epoch": 0.9350260290779456,
|
| 75110 |
+
"grad_norm": 0.65234375,
|
| 75111 |
+
"learning_rate": 0.00044715342747568554,
|
| 75112 |
+
"loss": 1.3738725185394287,
|
| 75113 |
+
"step": 10729
|
| 75114 |
+
},
|
| 75115 |
+
{
|
| 75116 |
+
"epoch": 0.9351131784888019,
|
| 75117 |
+
"grad_norm": 0.6328125,
|
| 75118 |
+
"learning_rate": 0.00044653351608028304,
|
| 75119 |
+
"loss": 1.4470515251159668,
|
| 75120 |
+
"step": 10730
|
| 75121 |
+
},
|
| 75122 |
+
{
|
| 75123 |
+
"epoch": 0.9352003278996583,
|
| 75124 |
+
"grad_norm": 0.59375,
|
| 75125 |
+
"learning_rate": 0.00044591391211408254,
|
| 75126 |
+
"loss": 1.4029713869094849,
|
| 75127 |
+
"step": 10731
|
| 75128 |
+
},
|
| 75129 |
+
{
|
| 75130 |
+
"epoch": 0.9352874773105148,
|
| 75131 |
+
"grad_norm": 0.63671875,
|
| 75132 |
+
"learning_rate": 0.0004452946159209396,
|
| 75133 |
+
"loss": 1.4336940050125122,
|
| 75134 |
+
"step": 10732
|
| 75135 |
+
},
|
| 75136 |
+
{
|
| 75137 |
+
"epoch": 0.9353746267213711,
|
| 75138 |
+
"grad_norm": 0.62890625,
|
| 75139 |
+
"learning_rate": 0.00044467562784453825,
|
| 75140 |
+
"loss": 1.4127177000045776,
|
| 75141 |
+
"step": 10733
|
| 75142 |
+
},
|
| 75143 |
+
{
|
| 75144 |
+
"epoch": 0.9354617761322275,
|
| 75145 |
+
"grad_norm": 0.62890625,
|
| 75146 |
+
"learning_rate": 0.0004440569482283916,
|
| 75147 |
+
"loss": 1.3769465684890747,
|
| 75148 |
+
"step": 10734
|
| 75149 |
+
},
|
| 75150 |
+
{
|
| 75151 |
+
"epoch": 0.9355489255430838,
|
| 75152 |
+
"grad_norm": 0.57421875,
|
| 75153 |
+
"learning_rate": 0.00044343857741584354,
|
| 75154 |
+
"loss": 1.4565773010253906,
|
| 75155 |
+
"step": 10735
|
| 75156 |
+
},
|
| 75157 |
+
{
|
| 75158 |
+
"epoch": 0.9356360749539402,
|
| 75159 |
+
"grad_norm": 0.62109375,
|
| 75160 |
+
"learning_rate": 0.0004428205157500644,
|
| 75161 |
+
"loss": 1.4249544143676758,
|
| 75162 |
+
"step": 10736
|
| 75163 |
+
},
|
| 75164 |
+
{
|
| 75165 |
+
"epoch": 0.9357232243647965,
|
| 75166 |
+
"grad_norm": 0.57421875,
|
| 75167 |
+
"learning_rate": 0.0004422027635740537,
|
| 75168 |
+
"loss": 1.4344592094421387,
|
| 75169 |
+
"step": 10737
|
| 75170 |
+
},
|
| 75171 |
+
{
|
| 75172 |
+
"epoch": 0.9358103737756529,
|
| 75173 |
+
"grad_norm": 0.56640625,
|
| 75174 |
+
"learning_rate": 0.0004415853212306389,
|
| 75175 |
+
"loss": 1.4506667852401733,
|
| 75176 |
+
"step": 10738
|
| 75177 |
+
},
|
| 75178 |
+
{
|
| 75179 |
+
"epoch": 0.9358975231865093,
|
| 75180 |
+
"grad_norm": 0.6171875,
|
| 75181 |
+
"learning_rate": 0.0004409681890624753,
|
| 75182 |
+
"loss": 1.373611569404602,
|
| 75183 |
+
"step": 10739
|
| 75184 |
+
},
|
| 75185 |
+
{
|
| 75186 |
+
"epoch": 0.9359846725973656,
|
| 75187 |
+
"grad_norm": 0.62109375,
|
| 75188 |
+
"learning_rate": 0.00044035136741204774,
|
| 75189 |
+
"loss": 1.3849220275878906,
|
| 75190 |
+
"step": 10740
|
| 75191 |
+
},
|
| 75192 |
+
{
|
| 75193 |
+
"epoch": 0.936071822008222,
|
| 75194 |
+
"grad_norm": 0.65625,
|
| 75195 |
+
"learning_rate": 0.00043973485662166705,
|
| 75196 |
+
"loss": 1.4501248598098755,
|
| 75197 |
+
"step": 10741
|
| 75198 |
+
},
|
| 75199 |
+
{
|
| 75200 |
+
"epoch": 0.9361589714190783,
|
| 75201 |
+
"grad_norm": 0.609375,
|
| 75202 |
+
"learning_rate": 0.0004391186570334718,
|
| 75203 |
+
"loss": 1.188583493232727,
|
| 75204 |
+
"step": 10742
|
| 75205 |
+
},
|
| 75206 |
+
{
|
| 75207 |
+
"epoch": 0.9362461208299347,
|
| 75208 |
+
"grad_norm": 0.6015625,
|
| 75209 |
+
"learning_rate": 0.00043850276898942824,
|
| 75210 |
+
"loss": 1.5099093914031982,
|
| 75211 |
+
"step": 10743
|
| 75212 |
+
},
|
| 75213 |
+
{
|
| 75214 |
+
"epoch": 0.936333270240791,
|
| 75215 |
+
"grad_norm": 0.57421875,
|
| 75216 |
+
"learning_rate": 0.0004378871928313297,
|
| 75217 |
+
"loss": 1.4234578609466553,
|
| 75218 |
+
"step": 10744
|
| 75219 |
+
},
|
| 75220 |
+
{
|
| 75221 |
+
"epoch": 0.9364204196516475,
|
| 75222 |
+
"grad_norm": 0.61328125,
|
| 75223 |
+
"learning_rate": 0.00043727192890079464,
|
| 75224 |
+
"loss": 1.3401721715927124,
|
| 75225 |
+
"step": 10745
|
| 75226 |
+
},
|
| 75227 |
+
{
|
| 75228 |
+
"epoch": 0.9365075690625039,
|
| 75229 |
+
"grad_norm": 0.66796875,
|
| 75230 |
+
"learning_rate": 0.0004366569775392725,
|
| 75231 |
+
"loss": 1.4871634244918823,
|
| 75232 |
+
"step": 10746
|
| 75233 |
+
},
|
| 75234 |
+
{
|
| 75235 |
+
"epoch": 0.9365947184733602,
|
| 75236 |
+
"grad_norm": 0.6640625,
|
| 75237 |
+
"learning_rate": 0.0004360423390880354,
|
| 75238 |
+
"loss": 1.386480450630188,
|
| 75239 |
+
"step": 10747
|
| 75240 |
+
},
|
| 75241 |
+
{
|
| 75242 |
+
"epoch": 0.9366818678842166,
|
| 75243 |
+
"grad_norm": 0.578125,
|
| 75244 |
+
"learning_rate": 0.0004354280138881831,
|
| 75245 |
+
"loss": 1.4693528413772583,
|
| 75246 |
+
"step": 10748
|
| 75247 |
+
},
|
| 75248 |
+
{
|
| 75249 |
+
"epoch": 0.9367690172950729,
|
| 75250 |
+
"grad_norm": 0.59375,
|
| 75251 |
+
"learning_rate": 0.00043481400228064144,
|
| 75252 |
+
"loss": 1.3771522045135498,
|
| 75253 |
+
"step": 10749
|
| 75254 |
+
},
|
| 75255 |
+
{
|
| 75256 |
+
"epoch": 0.9368561667059293,
|
| 75257 |
+
"grad_norm": 0.56640625,
|
| 75258 |
+
"learning_rate": 0.00043420030460616205,
|
| 75259 |
+
"loss": 1.2081589698791504,
|
| 75260 |
+
"step": 10750
|
| 75261 |
+
},
|
| 75262 |
+
{
|
| 75263 |
+
"epoch": 0.9369433161167856,
|
| 75264 |
+
"grad_norm": 0.55859375,
|
| 75265 |
+
"learning_rate": 0.000433586921205323,
|
| 75266 |
+
"loss": 1.2728263139724731,
|
| 75267 |
+
"step": 10751
|
| 75268 |
+
},
|
| 75269 |
+
{
|
| 75270 |
+
"epoch": 0.937030465527642,
|
| 75271 |
+
"grad_norm": 0.58984375,
|
| 75272 |
+
"learning_rate": 0.0004329738524185274,
|
| 75273 |
+
"loss": 1.3980687856674194,
|
| 75274 |
+
"step": 10752
|
| 75275 |
+
},
|
| 75276 |
+
{
|
| 75277 |
+
"epoch": 0.9371176149384984,
|
| 75278 |
+
"grad_norm": 0.58203125,
|
| 75279 |
+
"learning_rate": 0.0004323610985860037,
|
| 75280 |
+
"loss": 1.2047626972198486,
|
| 75281 |
+
"step": 10753
|
| 75282 |
+
},
|
| 75283 |
+
{
|
| 75284 |
+
"epoch": 0.9372047643493547,
|
| 75285 |
+
"grad_norm": 0.65625,
|
| 75286 |
+
"learning_rate": 0.00043174866004780583,
|
| 75287 |
+
"loss": 1.4221163988113403,
|
| 75288 |
+
"step": 10754
|
| 75289 |
+
},
|
| 75290 |
+
{
|
| 75291 |
+
"epoch": 0.9372919137602111,
|
| 75292 |
+
"grad_norm": 0.55078125,
|
| 75293 |
+
"learning_rate": 0.0004311365371438119,
|
| 75294 |
+
"loss": 1.3433525562286377,
|
| 75295 |
+
"step": 10755
|
| 75296 |
+
},
|
| 75297 |
+
{
|
| 75298 |
+
"epoch": 0.9373790631710675,
|
| 75299 |
+
"grad_norm": 0.65234375,
|
| 75300 |
+
"learning_rate": 0.00043052473021372694,
|
| 75301 |
+
"loss": 1.3914741277694702,
|
| 75302 |
+
"step": 10756
|
| 75303 |
+
},
|
| 75304 |
+
{
|
| 75305 |
+
"epoch": 0.9374662125819239,
|
| 75306 |
+
"grad_norm": 0.75390625,
|
| 75307 |
+
"learning_rate": 0.00042991323959707864,
|
| 75308 |
+
"loss": 1.4577502012252808,
|
| 75309 |
+
"step": 10757
|
| 75310 |
+
},
|
| 75311 |
+
{
|
| 75312 |
+
"epoch": 0.9375533619927802,
|
| 75313 |
+
"grad_norm": 0.60546875,
|
| 75314 |
+
"learning_rate": 0.00042930206563321973,
|
| 75315 |
+
"loss": 1.304754614830017,
|
| 75316 |
+
"step": 10758
|
| 75317 |
+
},
|
| 75318 |
+
{
|
| 75319 |
+
"epoch": 0.9376405114036366,
|
| 75320 |
+
"grad_norm": 0.64453125,
|
| 75321 |
+
"learning_rate": 0.00042869120866132734,
|
| 75322 |
+
"loss": 1.356920599937439,
|
| 75323 |
+
"step": 10759
|
| 75324 |
+
},
|
| 75325 |
+
{
|
| 75326 |
+
"epoch": 0.937727660814493,
|
| 75327 |
+
"grad_norm": 0.6328125,
|
| 75328 |
+
"learning_rate": 0.00042808066902040267,
|
| 75329 |
+
"loss": 1.5599650144577026,
|
| 75330 |
+
"step": 10760
|
| 75331 |
+
},
|
| 75332 |
+
{
|
| 75333 |
+
"epoch": 0.9378148102253493,
|
| 75334 |
+
"grad_norm": 0.5703125,
|
| 75335 |
+
"learning_rate": 0.00042747044704926994,
|
| 75336 |
+
"loss": 1.2664642333984375,
|
| 75337 |
+
"step": 10761
|
| 75338 |
+
},
|
| 75339 |
+
{
|
| 75340 |
+
"epoch": 0.9379019596362057,
|
| 75341 |
+
"grad_norm": 0.62109375,
|
| 75342 |
+
"learning_rate": 0.0004268605430865794,
|
| 75343 |
+
"loss": 1.4127326011657715,
|
| 75344 |
+
"step": 10762
|
| 75345 |
+
},
|
| 75346 |
+
{
|
| 75347 |
+
"epoch": 0.937989109047062,
|
| 75348 |
+
"grad_norm": 0.6171875,
|
| 75349 |
+
"learning_rate": 0.00042625095747080233,
|
| 75350 |
+
"loss": 1.3692606687545776,
|
| 75351 |
+
"step": 10763
|
| 75352 |
+
},
|
| 75353 |
+
{
|
| 75354 |
+
"epoch": 0.9380762584579184,
|
| 75355 |
+
"grad_norm": 0.59375,
|
| 75356 |
+
"learning_rate": 0.00042564169054023473,
|
| 75357 |
+
"loss": 1.3256995677947998,
|
| 75358 |
+
"step": 10764
|
| 75359 |
+
},
|
| 75360 |
+
{
|
| 75361 |
+
"epoch": 0.9381634078687747,
|
| 75362 |
+
"grad_norm": 0.64453125,
|
| 75363 |
+
"learning_rate": 0.0004250327426329951,
|
| 75364 |
+
"loss": 1.527947187423706,
|
| 75365 |
+
"step": 10765
|
| 75366 |
+
},
|
| 75367 |
+
{
|
| 75368 |
+
"epoch": 0.9382505572796311,
|
| 75369 |
+
"grad_norm": 0.69140625,
|
| 75370 |
+
"learning_rate": 0.0004244241140870244,
|
| 75371 |
+
"loss": 1.5718119144439697,
|
| 75372 |
+
"step": 10766
|
| 75373 |
+
},
|
| 75374 |
+
{
|
| 75375 |
+
"epoch": 0.9383377066904875,
|
| 75376 |
+
"grad_norm": 0.59375,
|
| 75377 |
+
"learning_rate": 0.00042381580524008893,
|
| 75378 |
+
"loss": 1.3383100032806396,
|
| 75379 |
+
"step": 10767
|
| 75380 |
+
},
|
| 75381 |
+
{
|
| 75382 |
+
"epoch": 0.9384248561013439,
|
| 75383 |
+
"grad_norm": 0.56640625,
|
| 75384 |
+
"learning_rate": 0.00042320781642977436,
|
| 75385 |
+
"loss": 1.3786511421203613,
|
| 75386 |
+
"step": 10768
|
| 75387 |
+
},
|
| 75388 |
+
{
|
| 75389 |
+
"epoch": 0.9385120055122003,
|
| 75390 |
+
"grad_norm": 0.63671875,
|
| 75391 |
+
"learning_rate": 0.0004226001479934904,
|
| 75392 |
+
"loss": 1.4253110885620117,
|
| 75393 |
+
"step": 10769
|
| 75394 |
+
},
|
| 75395 |
+
{
|
| 75396 |
+
"epoch": 0.9385991549230566,
|
| 75397 |
+
"grad_norm": 0.5703125,
|
| 75398 |
+
"learning_rate": 0.00042199280026846873,
|
| 75399 |
+
"loss": 1.3506003618240356,
|
| 75400 |
+
"step": 10770
|
| 75401 |
+
},
|
| 75402 |
+
{
|
| 75403 |
+
"epoch": 0.938686304333913,
|
| 75404 |
+
"grad_norm": 0.66015625,
|
| 75405 |
+
"learning_rate": 0.0004213857735917617,
|
| 75406 |
+
"loss": 1.3676663637161255,
|
| 75407 |
+
"step": 10771
|
| 75408 |
+
},
|
| 75409 |
+
{
|
| 75410 |
+
"epoch": 0.9387734537447693,
|
| 75411 |
+
"grad_norm": 0.56640625,
|
| 75412 |
+
"learning_rate": 0.0004207790683002473,
|
| 75413 |
+
"loss": 1.4172930717468262,
|
| 75414 |
+
"step": 10772
|
| 75415 |
+
},
|
| 75416 |
+
{
|
| 75417 |
+
"epoch": 0.9388606031556257,
|
| 75418 |
+
"grad_norm": 0.6796875,
|
| 75419 |
+
"learning_rate": 0.00042017268473062166,
|
| 75420 |
+
"loss": 1.3800415992736816,
|
| 75421 |
+
"step": 10773
|
| 75422 |
+
},
|
| 75423 |
+
{
|
| 75424 |
+
"epoch": 0.9389477525664821,
|
| 75425 |
+
"grad_norm": 0.5625,
|
| 75426 |
+
"learning_rate": 0.0004195666232194029,
|
| 75427 |
+
"loss": 1.3398977518081665,
|
| 75428 |
+
"step": 10774
|
| 75429 |
+
},
|
| 75430 |
+
{
|
| 75431 |
+
"epoch": 0.9390349019773384,
|
| 75432 |
+
"grad_norm": 0.609375,
|
| 75433 |
+
"learning_rate": 0.000418960884102931,
|
| 75434 |
+
"loss": 1.2846004962921143,
|
| 75435 |
+
"step": 10775
|
| 75436 |
+
},
|
| 75437 |
+
{
|
| 75438 |
+
"epoch": 0.9391220513881948,
|
| 75439 |
+
"grad_norm": 0.6328125,
|
| 75440 |
+
"learning_rate": 0.000418355467717367,
|
| 75441 |
+
"loss": 1.348699688911438,
|
| 75442 |
+
"step": 10776
|
| 75443 |
+
},
|
| 75444 |
+
{
|
| 75445 |
+
"epoch": 0.9392092007990511,
|
| 75446 |
+
"grad_norm": 0.62890625,
|
| 75447 |
+
"learning_rate": 0.00041775037439869167,
|
| 75448 |
+
"loss": 1.4276093244552612,
|
| 75449 |
+
"step": 10777
|
| 75450 |
+
},
|
| 75451 |
+
{
|
| 75452 |
+
"epoch": 0.9392963502099075,
|
| 75453 |
+
"grad_norm": 0.640625,
|
| 75454 |
+
"learning_rate": 0.0004171456044827097,
|
| 75455 |
+
"loss": 1.4915839433670044,
|
| 75456 |
+
"step": 10778
|
| 75457 |
+
},
|
| 75458 |
+
{
|
| 75459 |
+
"epoch": 0.9393834996207638,
|
| 75460 |
+
"grad_norm": 0.73046875,
|
| 75461 |
+
"learning_rate": 0.0004165411583050434,
|
| 75462 |
+
"loss": 1.3433510065078735,
|
| 75463 |
+
"step": 10779
|
| 75464 |
+
},
|
| 75465 |
+
{
|
| 75466 |
+
"epoch": 0.9394706490316203,
|
| 75467 |
+
"grad_norm": 0.640625,
|
| 75468 |
+
"learning_rate": 0.0004159370362011356,
|
| 75469 |
+
"loss": 1.340151309967041,
|
| 75470 |
+
"step": 10780
|
| 75471 |
+
},
|
| 75472 |
+
{
|
| 75473 |
+
"epoch": 0.9395577984424767,
|
| 75474 |
+
"grad_norm": 0.59765625,
|
| 75475 |
+
"learning_rate": 0.00041533323850625,
|
| 75476 |
+
"loss": 1.4006013870239258,
|
| 75477 |
+
"step": 10781
|
| 75478 |
+
},
|
| 75479 |
+
{
|
| 75480 |
+
"epoch": 0.939644947853333,
|
| 75481 |
+
"grad_norm": 0.5703125,
|
| 75482 |
+
"learning_rate": 0.00041472976555546957,
|
| 75483 |
+
"loss": 1.4418479204177856,
|
| 75484 |
+
"step": 10782
|
| 75485 |
+
},
|
| 75486 |
+
{
|
| 75487 |
+
"epoch": 0.9397320972641894,
|
| 75488 |
+
"grad_norm": 0.62109375,
|
| 75489 |
+
"learning_rate": 0.000414126617683699,
|
| 75490 |
+
"loss": 1.4882111549377441,
|
| 75491 |
+
"step": 10783
|
| 75492 |
+
},
|
| 75493 |
+
{
|
| 75494 |
+
"epoch": 0.9398192466750457,
|
| 75495 |
+
"grad_norm": 0.58984375,
|
| 75496 |
+
"learning_rate": 0.00041352379522566045,
|
| 75497 |
+
"loss": 1.3524689674377441,
|
| 75498 |
+
"step": 10784
|
| 75499 |
+
},
|
| 75500 |
+
{
|
| 75501 |
+
"epoch": 0.9399063960859021,
|
| 75502 |
+
"grad_norm": 0.65625,
|
| 75503 |
+
"learning_rate": 0.0004129212985158967,
|
| 75504 |
+
"loss": 1.5079108476638794,
|
| 75505 |
+
"step": 10785
|
| 75506 |
+
},
|
| 75507 |
+
{
|
| 75508 |
+
"epoch": 0.9399935454967584,
|
| 75509 |
+
"grad_norm": 0.69140625,
|
| 75510 |
+
"learning_rate": 0.0004123191278887684,
|
| 75511 |
+
"loss": 1.4028526544570923,
|
| 75512 |
+
"step": 10786
|
| 75513 |
+
},
|
| 75514 |
+
{
|
| 75515 |
+
"epoch": 0.9400806949076148,
|
| 75516 |
+
"grad_norm": 0.65625,
|
| 75517 |
+
"learning_rate": 0.00041171728367845554,
|
| 75518 |
+
"loss": 1.3713098764419556,
|
| 75519 |
+
"step": 10787
|
| 75520 |
+
},
|
| 75521 |
+
{
|
| 75522 |
+
"epoch": 0.9401678443184712,
|
| 75523 |
+
"grad_norm": 0.59375,
|
| 75524 |
+
"learning_rate": 0.0004111157662189597,
|
| 75525 |
+
"loss": 1.4671008586883545,
|
| 75526 |
+
"step": 10788
|
| 75527 |
+
},
|
| 75528 |
+
{
|
| 75529 |
+
"epoch": 0.9402549937293275,
|
| 75530 |
+
"grad_norm": 0.53515625,
|
| 75531 |
+
"learning_rate": 0.00041051457584409767,
|
| 75532 |
+
"loss": 1.1971697807312012,
|
| 75533 |
+
"step": 10789
|
| 75534 |
+
},
|
| 75535 |
+
{
|
| 75536 |
+
"epoch": 0.9403421431401839,
|
| 75537 |
+
"grad_norm": 0.6171875,
|
| 75538 |
+
"learning_rate": 0.0004099137128875063,
|
| 75539 |
+
"loss": 1.4762513637542725,
|
| 75540 |
+
"step": 10790
|
| 75541 |
+
},
|
| 75542 |
+
{
|
| 75543 |
+
"epoch": 0.9404292925510402,
|
| 75544 |
+
"grad_norm": 0.546875,
|
| 75545 |
+
"learning_rate": 0.00040931317768264075,
|
| 75546 |
+
"loss": 1.3404229879379272,
|
| 75547 |
+
"step": 10791
|
| 75548 |
+
},
|
| 75549 |
+
{
|
| 75550 |
+
"epoch": 0.9405164419618967,
|
| 75551 |
+
"grad_norm": 0.625,
|
| 75552 |
+
"learning_rate": 0.0004087129705627731,
|
| 75553 |
+
"loss": 1.441551923751831,
|
| 75554 |
+
"step": 10792
|
| 75555 |
+
},
|
| 75556 |
+
{
|
| 75557 |
+
"epoch": 0.940603591372753,
|
| 75558 |
+
"grad_norm": 0.64453125,
|
| 75559 |
+
"learning_rate": 0.00040811309186099373,
|
| 75560 |
+
"loss": 1.3485558032989502,
|
| 75561 |
+
"step": 10793
|
| 75562 |
+
},
|
| 75563 |
+
{
|
| 75564 |
+
"epoch": 0.9406907407836094,
|
| 75565 |
+
"grad_norm": 0.58984375,
|
| 75566 |
+
"learning_rate": 0.00040751354191021334,
|
| 75567 |
+
"loss": 1.330207347869873,
|
| 75568 |
+
"step": 10794
|
| 75569 |
+
},
|
| 75570 |
+
{
|
| 75571 |
+
"epoch": 0.9407778901944658,
|
| 75572 |
+
"grad_norm": 0.65234375,
|
| 75573 |
+
"learning_rate": 0.00040691432104315715,
|
| 75574 |
+
"loss": 1.345668911933899,
|
| 75575 |
+
"step": 10795
|
| 75576 |
+
},
|
| 75577 |
+
{
|
| 75578 |
+
"epoch": 0.9408650396053221,
|
| 75579 |
+
"grad_norm": 0.58203125,
|
| 75580 |
+
"learning_rate": 0.0004063154295923686,
|
| 75581 |
+
"loss": 1.6343204975128174,
|
| 75582 |
+
"step": 10796
|
| 75583 |
+
},
|
| 75584 |
+
{
|
| 75585 |
+
"epoch": 0.9409521890161785,
|
| 75586 |
+
"grad_norm": 0.625,
|
| 75587 |
+
"learning_rate": 0.00040571686789020884,
|
| 75588 |
+
"loss": 1.3894102573394775,
|
| 75589 |
+
"step": 10797
|
| 75590 |
+
},
|
| 75591 |
+
{
|
| 75592 |
+
"epoch": 0.9410393384270348,
|
| 75593 |
+
"grad_norm": 0.57421875,
|
| 75594 |
+
"learning_rate": 0.00040511863626885433,
|
| 75595 |
+
"loss": 1.4507195949554443,
|
| 75596 |
+
"step": 10798
|
| 75597 |
+
},
|
| 75598 |
+
{
|
| 75599 |
+
"epoch": 0.9411264878378912,
|
| 75600 |
+
"grad_norm": 0.64453125,
|
| 75601 |
+
"learning_rate": 0.00040452073506030116,
|
| 75602 |
+
"loss": 1.2610223293304443,
|
| 75603 |
+
"step": 10799
|
| 75604 |
+
},
|
| 75605 |
+
{
|
| 75606 |
+
"epoch": 0.9412136372487475,
|
| 75607 |
+
"grad_norm": 0.57421875,
|
| 75608 |
+
"learning_rate": 0.00040392316459636013,
|
| 75609 |
+
"loss": 1.2862776517868042,
|
| 75610 |
+
"step": 10800
|
| 75611 |
+
},
|
| 75612 |
+
{
|
| 75613 |
+
"epoch": 0.9413007866596039,
|
| 75614 |
+
"grad_norm": 0.6328125,
|
| 75615 |
+
"learning_rate": 0.0004033259252086588,
|
| 75616 |
+
"loss": 1.297368049621582,
|
| 75617 |
+
"step": 10801
|
| 75618 |
+
},
|
| 75619 |
+
{
|
| 75620 |
+
"epoch": 0.9413879360704603,
|
| 75621 |
+
"grad_norm": 0.57421875,
|
| 75622 |
+
"learning_rate": 0.0004027290172286411,
|
| 75623 |
+
"loss": 1.4660052061080933,
|
| 75624 |
+
"step": 10802
|
| 75625 |
+
},
|
| 75626 |
+
{
|
| 75627 |
+
"epoch": 0.9414750854813166,
|
| 75628 |
+
"grad_norm": 0.67578125,
|
| 75629 |
+
"learning_rate": 0.00040213244098756616,
|
| 75630 |
+
"loss": 1.566052794456482,
|
| 75631 |
+
"step": 10803
|
| 75632 |
+
},
|
| 75633 |
+
{
|
| 75634 |
+
"epoch": 0.9415622348921731,
|
| 75635 |
+
"grad_norm": 0.53515625,
|
| 75636 |
+
"learning_rate": 0.0004015361968165116,
|
| 75637 |
+
"loss": 1.2307544946670532,
|
| 75638 |
+
"step": 10804
|
| 75639 |
+
},
|
| 75640 |
+
{
|
| 75641 |
+
"epoch": 0.9416493843030294,
|
| 75642 |
+
"grad_norm": 0.67578125,
|
| 75643 |
+
"learning_rate": 0.0004009402850463682,
|
| 75644 |
+
"loss": 1.464665174484253,
|
| 75645 |
+
"step": 10805
|
| 75646 |
+
},
|
| 75647 |
+
{
|
| 75648 |
+
"epoch": 0.9417365337138858,
|
| 75649 |
+
"grad_norm": 0.6015625,
|
| 75650 |
+
"learning_rate": 0.000400344706007843,
|
| 75651 |
+
"loss": 1.415521502494812,
|
| 75652 |
+
"step": 10806
|
| 75653 |
+
},
|
| 75654 |
+
{
|
| 75655 |
+
"epoch": 0.9418236831247421,
|
| 75656 |
+
"grad_norm": 0.62109375,
|
| 75657 |
+
"learning_rate": 0.0003997494600314587,
|
| 75658 |
+
"loss": 1.4694634675979614,
|
| 75659 |
+
"step": 10807
|
| 75660 |
+
},
|
| 75661 |
+
{
|
| 75662 |
+
"epoch": 0.9419108325355985,
|
| 75663 |
+
"grad_norm": 0.6796875,
|
| 75664 |
+
"learning_rate": 0.00039915454744755276,
|
| 75665 |
+
"loss": 1.4592629671096802,
|
| 75666 |
+
"step": 10808
|
| 75667 |
+
},
|
| 75668 |
+
{
|
| 75669 |
+
"epoch": 0.9419979819464549,
|
| 75670 |
+
"grad_norm": 0.5234375,
|
| 75671 |
+
"learning_rate": 0.00039855996858627736,
|
| 75672 |
+
"loss": 1.32602858543396,
|
| 75673 |
+
"step": 10809
|
| 75674 |
+
},
|
| 75675 |
+
{
|
| 75676 |
+
"epoch": 0.9420851313573112,
|
| 75677 |
+
"grad_norm": 0.5859375,
|
| 75678 |
+
"learning_rate": 0.00039796572377760124,
|
| 75679 |
+
"loss": 1.364354133605957,
|
| 75680 |
+
"step": 10810
|
| 75681 |
+
},
|
| 75682 |
+
{
|
| 75683 |
+
"epoch": 0.9421722807681676,
|
| 75684 |
+
"grad_norm": 0.490234375,
|
| 75685 |
+
"learning_rate": 0.00039737181335130567,
|
| 75686 |
+
"loss": 1.228593111038208,
|
| 75687 |
+
"step": 10811
|
| 75688 |
+
},
|
| 75689 |
+
{
|
| 75690 |
+
"epoch": 0.9422594301790239,
|
| 75691 |
+
"grad_norm": 0.625,
|
| 75692 |
+
"learning_rate": 0.0003967782376369872,
|
| 75693 |
+
"loss": 1.4669352769851685,
|
| 75694 |
+
"step": 10812
|
| 75695 |
+
},
|
| 75696 |
+
{
|
| 75697 |
+
"epoch": 0.9423465795898803,
|
| 75698 |
+
"grad_norm": 0.578125,
|
| 75699 |
+
"learning_rate": 0.00039618499696405667,
|
| 75700 |
+
"loss": 1.3811349868774414,
|
| 75701 |
+
"step": 10813
|
| 75702 |
+
},
|
| 75703 |
+
{
|
| 75704 |
+
"epoch": 0.9424337290007366,
|
| 75705 |
+
"grad_norm": 0.6015625,
|
| 75706 |
+
"learning_rate": 0.0003955920916617385,
|
| 75707 |
+
"loss": 1.3459770679473877,
|
| 75708 |
+
"step": 10814
|
| 75709 |
+
},
|
| 75710 |
+
{
|
| 75711 |
+
"epoch": 0.942520878411593,
|
| 75712 |
+
"grad_norm": 0.62109375,
|
| 75713 |
+
"learning_rate": 0.0003949995220590719,
|
| 75714 |
+
"loss": 1.3409168720245361,
|
| 75715 |
+
"step": 10815
|
| 75716 |
+
},
|
| 75717 |
+
{
|
| 75718 |
+
"epoch": 0.9426080278224495,
|
| 75719 |
+
"grad_norm": 0.58984375,
|
| 75720 |
+
"learning_rate": 0.000394407288484909,
|
| 75721 |
+
"loss": 1.4049094915390015,
|
| 75722 |
+
"step": 10816
|
| 75723 |
+
},
|
| 75724 |
+
{
|
| 75725 |
+
"epoch": 0.9426951772333058,
|
| 75726 |
+
"grad_norm": 0.56640625,
|
| 75727 |
+
"learning_rate": 0.00039381539126791574,
|
| 75728 |
+
"loss": 1.405827283859253,
|
| 75729 |
+
"step": 10817
|
| 75730 |
+
},
|
| 75731 |
+
{
|
| 75732 |
+
"epoch": 0.9427823266441622,
|
| 75733 |
+
"grad_norm": 0.578125,
|
| 75734 |
+
"learning_rate": 0.00039322383073657144,
|
| 75735 |
+
"loss": 1.349738597869873,
|
| 75736 |
+
"step": 10818
|
| 75737 |
+
},
|
| 75738 |
+
{
|
| 75739 |
+
"epoch": 0.9428694760550185,
|
| 75740 |
+
"grad_norm": 0.57421875,
|
| 75741 |
+
"learning_rate": 0.0003926326072191674,
|
| 75742 |
+
"loss": 1.3446109294891357,
|
| 75743 |
+
"step": 10819
|
| 75744 |
+
},
|
| 75745 |
+
{
|
| 75746 |
+
"epoch": 0.9429566254658749,
|
| 75747 |
+
"grad_norm": 0.65234375,
|
| 75748 |
+
"learning_rate": 0.00039204172104381134,
|
| 75749 |
+
"loss": 1.1995892524719238,
|
| 75750 |
+
"step": 10820
|
| 75751 |
+
},
|
| 75752 |
+
{
|
| 75753 |
+
"epoch": 0.9430437748767312,
|
| 75754 |
+
"grad_norm": 0.6484375,
|
| 75755 |
+
"learning_rate": 0.00039145117253841955,
|
| 75756 |
+
"loss": 1.3476479053497314,
|
| 75757 |
+
"step": 10821
|
| 75758 |
+
},
|
| 75759 |
+
{
|
| 75760 |
+
"epoch": 0.9431309242875876,
|
| 75761 |
+
"grad_norm": 0.5859375,
|
| 75762 |
+
"learning_rate": 0.00039086096203072346,
|
| 75763 |
+
"loss": 1.3362534046173096,
|
| 75764 |
+
"step": 10822
|
| 75765 |
+
},
|
| 75766 |
+
{
|
| 75767 |
+
"epoch": 0.943218073698444,
|
| 75768 |
+
"grad_norm": 0.56640625,
|
| 75769 |
+
"learning_rate": 0.0003902710898482662,
|
| 75770 |
+
"loss": 1.3388662338256836,
|
| 75771 |
+
"step": 10823
|
| 75772 |
+
},
|
| 75773 |
+
{
|
| 75774 |
+
"epoch": 0.9433052231093003,
|
| 75775 |
+
"grad_norm": 0.55859375,
|
| 75776 |
+
"learning_rate": 0.00038968155631840276,
|
| 75777 |
+
"loss": 1.3150140047073364,
|
| 75778 |
+
"step": 10824
|
| 75779 |
+
},
|
| 75780 |
+
{
|
| 75781 |
+
"epoch": 0.9433923725201567,
|
| 75782 |
+
"grad_norm": 0.6015625,
|
| 75783 |
+
"learning_rate": 0.00038909236176830044,
|
| 75784 |
+
"loss": 1.4378329515457153,
|
| 75785 |
+
"step": 10825
|
| 75786 |
+
},
|
| 75787 |
+
{
|
| 75788 |
+
"epoch": 0.943479521931013,
|
| 75789 |
+
"grad_norm": 0.5859375,
|
| 75790 |
+
"learning_rate": 0.00038850350652493983,
|
| 75791 |
+
"loss": 1.2804172039031982,
|
| 75792 |
+
"step": 10826
|
| 75793 |
+
},
|
| 75794 |
+
{
|
| 75795 |
+
"epoch": 0.9435666713418694,
|
| 75796 |
+
"grad_norm": 0.61328125,
|
| 75797 |
+
"learning_rate": 0.00038791499091511125,
|
| 75798 |
+
"loss": 1.577147126197815,
|
| 75799 |
+
"step": 10827
|
| 75800 |
+
},
|
| 75801 |
+
{
|
| 75802 |
+
"epoch": 0.9436538207527257,
|
| 75803 |
+
"grad_norm": 0.5546875,
|
| 75804 |
+
"learning_rate": 0.00038732681526541736,
|
| 75805 |
+
"loss": 1.384181022644043,
|
| 75806 |
+
"step": 10828
|
| 75807 |
+
},
|
| 75808 |
+
{
|
| 75809 |
+
"epoch": 0.9437409701635822,
|
| 75810 |
+
"grad_norm": 0.67578125,
|
| 75811 |
+
"learning_rate": 0.00038673897990227203,
|
| 75812 |
+
"loss": 1.451761245727539,
|
| 75813 |
+
"step": 10829
|
| 75814 |
+
},
|
| 75815 |
+
{
|
| 75816 |
+
"epoch": 0.9438281195744386,
|
| 75817 |
+
"grad_norm": 0.61328125,
|
| 75818 |
+
"learning_rate": 0.0003861514851518996,
|
| 75819 |
+
"loss": 1.3510509729385376,
|
| 75820 |
+
"step": 10830
|
| 75821 |
+
},
|
| 75822 |
+
{
|
| 75823 |
+
"epoch": 0.9439152689852949,
|
| 75824 |
+
"grad_norm": 0.5703125,
|
| 75825 |
+
"learning_rate": 0.00038556433134033757,
|
| 75826 |
+
"loss": 1.2669039964675903,
|
| 75827 |
+
"step": 10831
|
| 75828 |
+
},
|
| 75829 |
+
{
|
| 75830 |
+
"epoch": 0.9440024183961513,
|
| 75831 |
+
"grad_norm": 0.59765625,
|
| 75832 |
+
"learning_rate": 0.0003849775187934324,
|
| 75833 |
+
"loss": 1.3124220371246338,
|
| 75834 |
+
"step": 10832
|
| 75835 |
+
},
|
| 75836 |
+
{
|
| 75837 |
+
"epoch": 0.9440895678070076,
|
| 75838 |
+
"grad_norm": 0.6171875,
|
| 75839 |
+
"learning_rate": 0.00038439104783684087,
|
| 75840 |
+
"loss": 1.3466203212738037,
|
| 75841 |
+
"step": 10833
|
| 75842 |
+
},
|
| 75843 |
+
{
|
| 75844 |
+
"epoch": 0.944176717217864,
|
| 75845 |
+
"grad_norm": 0.61328125,
|
| 75846 |
+
"learning_rate": 0.0003838049187960312,
|
| 75847 |
+
"loss": 1.2957828044891357,
|
| 75848 |
+
"step": 10834
|
| 75849 |
+
},
|
| 75850 |
+
{
|
| 75851 |
+
"epoch": 0.9442638666287203,
|
| 75852 |
+
"grad_norm": 0.6484375,
|
| 75853 |
+
"learning_rate": 0.0003832191319962804,
|
| 75854 |
+
"loss": 1.3593542575836182,
|
| 75855 |
+
"step": 10835
|
| 75856 |
+
},
|
| 75857 |
+
{
|
| 75858 |
+
"epoch": 0.9443510160395767,
|
| 75859 |
+
"grad_norm": 0.55859375,
|
| 75860 |
+
"learning_rate": 0.0003826336877626789,
|
| 75861 |
+
"loss": 1.2994202375411987,
|
| 75862 |
+
"step": 10836
|
| 75863 |
+
},
|
| 75864 |
+
{
|
| 75865 |
+
"epoch": 0.9444381654504331,
|
| 75866 |
+
"grad_norm": 0.703125,
|
| 75867 |
+
"learning_rate": 0.0003820485864201238,
|
| 75868 |
+
"loss": 1.4568206071853638,
|
| 75869 |
+
"step": 10837
|
| 75870 |
+
},
|
| 75871 |
+
{
|
| 75872 |
+
"epoch": 0.9445253148612894,
|
| 75873 |
+
"grad_norm": 0.609375,
|
| 75874 |
+
"learning_rate": 0.00038146382829332294,
|
| 75875 |
+
"loss": 1.4523032903671265,
|
| 75876 |
+
"step": 10838
|
| 75877 |
+
},
|
| 75878 |
+
{
|
| 75879 |
+
"epoch": 0.9446124642721458,
|
| 75880 |
+
"grad_norm": 0.578125,
|
| 75881 |
+
"learning_rate": 0.00038087941370679325,
|
| 75882 |
+
"loss": 1.2172088623046875,
|
| 75883 |
+
"step": 10839
|
| 75884 |
+
},
|
| 75885 |
+
{
|
| 75886 |
+
"epoch": 0.9446996136830021,
|
| 75887 |
+
"grad_norm": 0.59375,
|
| 75888 |
+
"learning_rate": 0.0003802953429848616,
|
| 75889 |
+
"loss": 1.3055803775787354,
|
| 75890 |
+
"step": 10840
|
| 75891 |
+
},
|
| 75892 |
+
{
|
| 75893 |
+
"epoch": 0.9447867630938586,
|
| 75894 |
+
"grad_norm": 0.609375,
|
| 75895 |
+
"learning_rate": 0.00037971161645166323,
|
| 75896 |
+
"loss": 1.4756884574890137,
|
| 75897 |
+
"step": 10841
|
| 75898 |
+
},
|
| 75899 |
+
{
|
| 75900 |
+
"epoch": 0.944873912504715,
|
| 75901 |
+
"grad_norm": 0.67578125,
|
| 75902 |
+
"learning_rate": 0.0003791282344311443,
|
| 75903 |
+
"loss": 1.4392821788787842,
|
| 75904 |
+
"step": 10842
|
| 75905 |
+
},
|
| 75906 |
+
{
|
| 75907 |
+
"epoch": 0.9449610619155713,
|
| 75908 |
+
"grad_norm": 0.5546875,
|
| 75909 |
+
"learning_rate": 0.0003785451972470578,
|
| 75910 |
+
"loss": 1.3131775856018066,
|
| 75911 |
+
"step": 10843
|
| 75912 |
+
},
|
| 75913 |
+
{
|
| 75914 |
+
"epoch": 0.9450482113264277,
|
| 75915 |
+
"grad_norm": 0.625,
|
| 75916 |
+
"learning_rate": 0.000377962505222966,
|
| 75917 |
+
"loss": 1.3837449550628662,
|
| 75918 |
+
"step": 10844
|
| 75919 |
+
},
|
| 75920 |
+
{
|
| 75921 |
+
"epoch": 0.945135360737284,
|
| 75922 |
+
"grad_norm": 0.53125,
|
| 75923 |
+
"learning_rate": 0.0003773801586822397,
|
| 75924 |
+
"loss": 1.3735487461090088,
|
| 75925 |
+
"step": 10845
|
| 75926 |
+
},
|
| 75927 |
+
{
|
| 75928 |
+
"epoch": 0.9452225101481404,
|
| 75929 |
+
"grad_norm": 0.56640625,
|
| 75930 |
+
"learning_rate": 0.00037679815794805705,
|
| 75931 |
+
"loss": 1.4723949432373047,
|
| 75932 |
+
"step": 10846
|
| 75933 |
+
},
|
| 75934 |
+
{
|
| 75935 |
+
"epoch": 0.9453096595589967,
|
| 75936 |
+
"grad_norm": 0.59765625,
|
| 75937 |
+
"learning_rate": 0.0003762165033434066,
|
| 75938 |
+
"loss": 1.3084514141082764,
|
| 75939 |
+
"step": 10847
|
| 75940 |
+
},
|
| 75941 |
+
{
|
| 75942 |
+
"epoch": 0.9453968089698531,
|
| 75943 |
+
"grad_norm": 0.58203125,
|
| 75944 |
+
"learning_rate": 0.0003756351951910827,
|
| 75945 |
+
"loss": 1.396877408027649,
|
| 75946 |
+
"step": 10848
|
| 75947 |
+
},
|
| 75948 |
+
{
|
| 75949 |
+
"epoch": 0.9454839583807095,
|
| 75950 |
+
"grad_norm": 0.65234375,
|
| 75951 |
+
"learning_rate": 0.00037505423381368797,
|
| 75952 |
+
"loss": 1.3752583265304565,
|
| 75953 |
+
"step": 10849
|
| 75954 |
+
},
|
| 75955 |
+
{
|
| 75956 |
+
"epoch": 0.9455711077915658,
|
| 75957 |
+
"grad_norm": 0.57421875,
|
| 75958 |
+
"learning_rate": 0.00037447361953363317,
|
| 75959 |
+
"loss": 1.2257283926010132,
|
| 75960 |
+
"step": 10850
|
| 75961 |
+
},
|
| 75962 |
+
{
|
| 75963 |
+
"epoch": 0.9456582572024222,
|
| 75964 |
+
"grad_norm": 0.58203125,
|
| 75965 |
+
"learning_rate": 0.00037389335267313417,
|
| 75966 |
+
"loss": 1.363704800605774,
|
| 75967 |
+
"step": 10851
|
| 75968 |
+
},
|
| 75969 |
+
{
|
| 75970 |
+
"epoch": 0.9457454066132785,
|
| 75971 |
+
"grad_norm": 0.578125,
|
| 75972 |
+
"learning_rate": 0.00037331343355421825,
|
| 75973 |
+
"loss": 1.3016622066497803,
|
| 75974 |
+
"step": 10852
|
| 75975 |
+
},
|
| 75976 |
+
{
|
| 75977 |
+
"epoch": 0.945832556024135,
|
| 75978 |
+
"grad_norm": 0.57421875,
|
| 75979 |
+
"learning_rate": 0.0003727338624987162,
|
| 75980 |
+
"loss": 1.2846336364746094,
|
| 75981 |
+
"step": 10853
|
| 75982 |
+
},
|
| 75983 |
+
{
|
| 75984 |
+
"epoch": 0.9459197054349913,
|
| 75985 |
+
"grad_norm": 0.62109375,
|
| 75986 |
+
"learning_rate": 0.0003721546398282669,
|
| 75987 |
+
"loss": 1.4935826063156128,
|
| 75988 |
+
"step": 10854
|
| 75989 |
+
},
|
| 75990 |
+
{
|
| 75991 |
+
"epoch": 0.9460068548458477,
|
| 75992 |
+
"grad_norm": 0.5859375,
|
| 75993 |
+
"learning_rate": 0.00037157576586431545,
|
| 75994 |
+
"loss": 1.3338427543640137,
|
| 75995 |
+
"step": 10855
|
| 75996 |
+
},
|
| 75997 |
+
{
|
| 75998 |
+
"epoch": 0.9460940042567041,
|
| 75999 |
+
"grad_norm": 0.640625,
|
| 76000 |
+
"learning_rate": 0.00037099724092811434,
|
| 76001 |
+
"loss": 1.359696865081787,
|
| 76002 |
+
"step": 10856
|
| 76003 |
+
},
|
| 76004 |
+
{
|
| 76005 |
+
"epoch": 0.9461811536675604,
|
| 76006 |
+
"grad_norm": 0.5546875,
|
| 76007 |
+
"learning_rate": 0.00037041906534072017,
|
| 76008 |
+
"loss": 1.196385383605957,
|
| 76009 |
+
"step": 10857
|
| 76010 |
+
},
|
| 76011 |
+
{
|
| 76012 |
+
"epoch": 0.9462683030784168,
|
| 76013 |
+
"grad_norm": 0.546875,
|
| 76014 |
+
"learning_rate": 0.000369841239422999,
|
| 76015 |
+
"loss": 1.3450770378112793,
|
| 76016 |
+
"step": 10858
|
| 76017 |
+
},
|
| 76018 |
+
{
|
| 76019 |
+
"epoch": 0.9463554524892731,
|
| 76020 |
+
"grad_norm": 0.51953125,
|
| 76021 |
+
"learning_rate": 0.00036926376349562034,
|
| 76022 |
+
"loss": 1.392338514328003,
|
| 76023 |
+
"step": 10859
|
| 76024 |
+
},
|
| 76025 |
+
{
|
| 76026 |
+
"epoch": 0.9464426019001295,
|
| 76027 |
+
"grad_norm": 0.61328125,
|
| 76028 |
+
"learning_rate": 0.0003686866378790603,
|
| 76029 |
+
"loss": 1.3446500301361084,
|
| 76030 |
+
"step": 10860
|
| 76031 |
+
},
|
| 76032 |
+
{
|
| 76033 |
+
"epoch": 0.9465297513109858,
|
| 76034 |
+
"grad_norm": 0.59375,
|
| 76035 |
+
"learning_rate": 0.00036810986289360025,
|
| 76036 |
+
"loss": 1.4945433139801025,
|
| 76037 |
+
"step": 10861
|
| 76038 |
+
},
|
| 76039 |
+
{
|
| 76040 |
+
"epoch": 0.9466169007218422,
|
| 76041 |
+
"grad_norm": 0.5859375,
|
| 76042 |
+
"learning_rate": 0.0003675334388593267,
|
| 76043 |
+
"loss": 1.2035317420959473,
|
| 76044 |
+
"step": 10862
|
| 76045 |
+
},
|
| 76046 |
+
{
|
| 76047 |
+
"epoch": 0.9467040501326986,
|
| 76048 |
+
"grad_norm": 0.53515625,
|
| 76049 |
+
"learning_rate": 0.00036695736609613323,
|
| 76050 |
+
"loss": 1.3052306175231934,
|
| 76051 |
+
"step": 10863
|
| 76052 |
+
},
|
| 76053 |
+
{
|
| 76054 |
+
"epoch": 0.946791199543555,
|
| 76055 |
+
"grad_norm": 0.61328125,
|
| 76056 |
+
"learning_rate": 0.00036638164492371647,
|
| 76057 |
+
"loss": 1.3907655477523804,
|
| 76058 |
+
"step": 10864
|
| 76059 |
+
},
|
| 76060 |
+
{
|
| 76061 |
+
"epoch": 0.9468783489544114,
|
| 76062 |
+
"grad_norm": 0.62109375,
|
| 76063 |
+
"learning_rate": 0.0003658062756615786,
|
| 76064 |
+
"loss": 1.3140581846237183,
|
| 76065 |
+
"step": 10865
|
| 76066 |
+
},
|
| 76067 |
+
{
|
| 76068 |
+
"epoch": 0.9469654983652677,
|
| 76069 |
+
"grad_norm": 0.5703125,
|
| 76070 |
+
"learning_rate": 0.00036523125862902644,
|
| 76071 |
+
"loss": 1.3715146780014038,
|
| 76072 |
+
"step": 10866
|
| 76073 |
+
},
|
| 76074 |
+
{
|
| 76075 |
+
"epoch": 0.9470526477761241,
|
| 76076 |
+
"grad_norm": 0.68359375,
|
| 76077 |
+
"learning_rate": 0.00036465659414517066,
|
| 76078 |
+
"loss": 1.2546745538711548,
|
| 76079 |
+
"step": 10867
|
| 76080 |
+
},
|
| 76081 |
+
{
|
| 76082 |
+
"epoch": 0.9471397971869804,
|
| 76083 |
+
"grad_norm": 0.6328125,
|
| 76084 |
+
"learning_rate": 0.0003640822825289286,
|
| 76085 |
+
"loss": 1.4042918682098389,
|
| 76086 |
+
"step": 10868
|
| 76087 |
+
},
|
| 76088 |
+
{
|
| 76089 |
+
"epoch": 0.9472269465978368,
|
| 76090 |
+
"grad_norm": 0.6640625,
|
| 76091 |
+
"learning_rate": 0.00036350832409901934,
|
| 76092 |
+
"loss": 1.4622188806533813,
|
| 76093 |
+
"step": 10869
|
| 76094 |
+
},
|
| 76095 |
+
{
|
| 76096 |
+
"epoch": 0.9473140960086932,
|
| 76097 |
+
"grad_norm": 0.65234375,
|
| 76098 |
+
"learning_rate": 0.00036293471917396694,
|
| 76099 |
+
"loss": 1.4352420568466187,
|
| 76100 |
+
"step": 10870
|
| 76101 |
+
},
|
| 76102 |
+
{
|
| 76103 |
+
"epoch": 0.9474012454195495,
|
| 76104 |
+
"grad_norm": 0.62109375,
|
| 76105 |
+
"learning_rate": 0.0003623614680720991,
|
| 76106 |
+
"loss": 1.425258994102478,
|
| 76107 |
+
"step": 10871
|
| 76108 |
+
},
|
| 76109 |
+
{
|
| 76110 |
+
"epoch": 0.9474883948304059,
|
| 76111 |
+
"grad_norm": 0.625,
|
| 76112 |
+
"learning_rate": 0.0003617885711115471,
|
| 76113 |
+
"loss": 1.524860143661499,
|
| 76114 |
+
"step": 10872
|
| 76115 |
+
},
|
| 76116 |
+
{
|
| 76117 |
+
"epoch": 0.9475755442412622,
|
| 76118 |
+
"grad_norm": 0.66796875,
|
| 76119 |
+
"learning_rate": 0.0003612160286102456,
|
| 76120 |
+
"loss": 1.398781180381775,
|
| 76121 |
+
"step": 10873
|
| 76122 |
+
},
|
| 76123 |
+
{
|
| 76124 |
+
"epoch": 0.9476626936521186,
|
| 76125 |
+
"grad_norm": 0.5859375,
|
| 76126 |
+
"learning_rate": 0.0003606438408859333,
|
| 76127 |
+
"loss": 1.2950276136398315,
|
| 76128 |
+
"step": 10874
|
| 76129 |
+
},
|
| 76130 |
+
{
|
| 76131 |
+
"epoch": 0.9477498430629749,
|
| 76132 |
+
"grad_norm": 0.60546875,
|
| 76133 |
+
"learning_rate": 0.00036007200825615126,
|
| 76134 |
+
"loss": 1.3413006067276,
|
| 76135 |
+
"step": 10875
|
| 76136 |
+
},
|
| 76137 |
+
{
|
| 76138 |
+
"epoch": 0.9478369924738314,
|
| 76139 |
+
"grad_norm": 0.62109375,
|
| 76140 |
+
"learning_rate": 0.0003595005310382437,
|
| 76141 |
+
"loss": 1.5986438989639282,
|
| 76142 |
+
"step": 10876
|
| 76143 |
+
},
|
| 76144 |
+
{
|
| 76145 |
+
"epoch": 0.9479241418846878,
|
| 76146 |
+
"grad_norm": 0.62890625,
|
| 76147 |
+
"learning_rate": 0.00035892940954935734,
|
| 76148 |
+
"loss": 1.3787851333618164,
|
| 76149 |
+
"step": 10877
|
| 76150 |
+
},
|
| 76151 |
+
{
|
| 76152 |
+
"epoch": 0.9480112912955441,
|
| 76153 |
+
"grad_norm": 0.60546875,
|
| 76154 |
+
"learning_rate": 0.00035835864410644095,
|
| 76155 |
+
"loss": 1.3755840063095093,
|
| 76156 |
+
"step": 10878
|
| 76157 |
+
},
|
| 76158 |
+
{
|
| 76159 |
+
"epoch": 0.9480984407064005,
|
| 76160 |
+
"grad_norm": 0.6484375,
|
| 76161 |
+
"learning_rate": 0.0003577882350262488,
|
| 76162 |
+
"loss": 1.5901002883911133,
|
| 76163 |
+
"step": 10879
|
| 76164 |
+
},
|
| 76165 |
+
{
|
| 76166 |
+
"epoch": 0.9481855901172568,
|
| 76167 |
+
"grad_norm": 0.66015625,
|
| 76168 |
+
"learning_rate": 0.0003572181826253332,
|
| 76169 |
+
"loss": 1.480640172958374,
|
| 76170 |
+
"step": 10880
|
| 76171 |
+
},
|
| 76172 |
+
{
|
| 76173 |
+
"epoch": 0.9482727395281132,
|
| 76174 |
+
"grad_norm": 0.5234375,
|
| 76175 |
+
"learning_rate": 0.0003566484872200509,
|
| 76176 |
+
"loss": 1.4293866157531738,
|
| 76177 |
+
"step": 10881
|
| 76178 |
+
},
|
| 76179 |
+
{
|
| 76180 |
+
"epoch": 0.9483598889389695,
|
| 76181 |
+
"grad_norm": 0.59765625,
|
| 76182 |
+
"learning_rate": 0.00035607914912656014,
|
| 76183 |
+
"loss": 1.3817474842071533,
|
| 76184 |
+
"step": 10882
|
| 76185 |
+
},
|
| 76186 |
+
{
|
| 76187 |
+
"epoch": 0.9484470383498259,
|
| 76188 |
+
"grad_norm": 0.6171875,
|
| 76189 |
+
"learning_rate": 0.0003555101686608205,
|
| 76190 |
+
"loss": 1.5607452392578125,
|
| 76191 |
+
"step": 10883
|
| 76192 |
+
},
|
| 76193 |
+
{
|
| 76194 |
+
"epoch": 0.9485341877606823,
|
| 76195 |
+
"grad_norm": 0.546875,
|
| 76196 |
+
"learning_rate": 0.0003549415461385927,
|
| 76197 |
+
"loss": 1.4303282499313354,
|
| 76198 |
+
"step": 10884
|
| 76199 |
+
},
|
| 76200 |
+
{
|
| 76201 |
+
"epoch": 0.9486213371715386,
|
| 76202 |
+
"grad_norm": 0.59375,
|
| 76203 |
+
"learning_rate": 0.000354373281875441,
|
| 76204 |
+
"loss": 1.373576283454895,
|
| 76205 |
+
"step": 10885
|
| 76206 |
+
},
|
| 76207 |
+
{
|
| 76208 |
+
"epoch": 0.948708486582395,
|
| 76209 |
+
"grad_norm": 0.546875,
|
| 76210 |
+
"learning_rate": 0.0003538053761867284,
|
| 76211 |
+
"loss": 1.4354960918426514,
|
| 76212 |
+
"step": 10886
|
| 76213 |
+
},
|
| 76214 |
+
{
|
| 76215 |
+
"epoch": 0.9487956359932513,
|
| 76216 |
+
"grad_norm": 0.73828125,
|
| 76217 |
+
"learning_rate": 0.00035323782938762,
|
| 76218 |
+
"loss": 1.5275927782058716,
|
| 76219 |
+
"step": 10887
|
| 76220 |
+
},
|
| 76221 |
+
{
|
| 76222 |
+
"epoch": 0.9488827854041078,
|
| 76223 |
+
"grad_norm": 0.68359375,
|
| 76224 |
+
"learning_rate": 0.0003526706417930814,
|
| 76225 |
+
"loss": 1.4088103771209717,
|
| 76226 |
+
"step": 10888
|
| 76227 |
+
},
|
| 76228 |
+
{
|
| 76229 |
+
"epoch": 0.9489699348149641,
|
| 76230 |
+
"grad_norm": 0.6796875,
|
| 76231 |
+
"learning_rate": 0.0003521038137178784,
|
| 76232 |
+
"loss": 1.4045789241790771,
|
| 76233 |
+
"step": 10889
|
| 76234 |
+
},
|
| 76235 |
+
{
|
| 76236 |
+
"epoch": 0.9490570842258205,
|
| 76237 |
+
"grad_norm": 0.609375,
|
| 76238 |
+
"learning_rate": 0.00035153734547657895,
|
| 76239 |
+
"loss": 1.3758931159973145,
|
| 76240 |
+
"step": 10890
|
| 76241 |
+
},
|
| 76242 |
+
{
|
| 76243 |
+
"epoch": 0.9491442336366769,
|
| 76244 |
+
"grad_norm": 0.5703125,
|
| 76245 |
+
"learning_rate": 0.0003509712373835503,
|
| 76246 |
+
"loss": 1.4020464420318604,
|
| 76247 |
+
"step": 10891
|
| 76248 |
+
},
|
| 76249 |
+
{
|
| 76250 |
+
"epoch": 0.9492313830475332,
|
| 76251 |
+
"grad_norm": 0.6328125,
|
| 76252 |
+
"learning_rate": 0.00035040548975295887,
|
| 76253 |
+
"loss": 1.4836962223052979,
|
| 76254 |
+
"step": 10892
|
| 76255 |
+
},
|
| 76256 |
+
{
|
| 76257 |
+
"epoch": 0.9493185324583896,
|
| 76258 |
+
"grad_norm": 0.55859375,
|
| 76259 |
+
"learning_rate": 0.0003498401028987722,
|
| 76260 |
+
"loss": 1.1181589365005493,
|
| 76261 |
+
"step": 10893
|
| 76262 |
+
},
|
| 76263 |
+
{
|
| 76264 |
+
"epoch": 0.9494056818692459,
|
| 76265 |
+
"grad_norm": 0.6171875,
|
| 76266 |
+
"learning_rate": 0.00034927507713475645,
|
| 76267 |
+
"loss": 1.2913410663604736,
|
| 76268 |
+
"step": 10894
|
| 76269 |
+
},
|
| 76270 |
+
{
|
| 76271 |
+
"epoch": 0.9494928312801023,
|
| 76272 |
+
"grad_norm": 0.56640625,
|
| 76273 |
+
"learning_rate": 0.0003487104127744803,
|
| 76274 |
+
"loss": 1.4168663024902344,
|
| 76275 |
+
"step": 10895
|
| 76276 |
+
},
|
| 76277 |
+
{
|
| 76278 |
+
"epoch": 0.9495799806909586,
|
| 76279 |
+
"grad_norm": 0.625,
|
| 76280 |
+
"learning_rate": 0.0003481461101313087,
|
| 76281 |
+
"loss": 1.3656679391860962,
|
| 76282 |
+
"step": 10896
|
| 76283 |
+
},
|
| 76284 |
+
{
|
| 76285 |
+
"epoch": 0.949667130101815,
|
| 76286 |
+
"grad_norm": 0.60546875,
|
| 76287 |
+
"learning_rate": 0.0003475821695184075,
|
| 76288 |
+
"loss": 1.3501274585723877,
|
| 76289 |
+
"step": 10897
|
| 76290 |
+
},
|
| 76291 |
+
{
|
| 76292 |
+
"epoch": 0.9497542795126714,
|
| 76293 |
+
"grad_norm": 0.58984375,
|
| 76294 |
+
"learning_rate": 0.0003470185912487404,
|
| 76295 |
+
"loss": 1.3929165601730347,
|
| 76296 |
+
"step": 10898
|
| 76297 |
+
},
|
| 76298 |
+
{
|
| 76299 |
+
"epoch": 0.9498414289235277,
|
| 76300 |
+
"grad_norm": 0.66796875,
|
| 76301 |
+
"learning_rate": 0.0003464553756350711,
|
| 76302 |
+
"loss": 1.3516440391540527,
|
| 76303 |
+
"step": 10899
|
| 76304 |
+
},
|
| 76305 |
+
{
|
| 76306 |
+
"epoch": 0.9499285783343842,
|
| 76307 |
+
"grad_norm": 0.60546875,
|
| 76308 |
+
"learning_rate": 0.00034589252298996106,
|
| 76309 |
+
"loss": 1.3244688510894775,
|
| 76310 |
+
"step": 10900
|
| 76311 |
+
},
|
| 76312 |
+
{
|
| 76313 |
+
"epoch": 0.9500157277452405,
|
| 76314 |
+
"grad_norm": 0.62890625,
|
| 76315 |
+
"learning_rate": 0.0003453300336257722,
|
| 76316 |
+
"loss": 1.443703055381775,
|
| 76317 |
+
"step": 10901
|
| 76318 |
+
},
|
| 76319 |
+
{
|
| 76320 |
+
"epoch": 0.9501028771560969,
|
| 76321 |
+
"grad_norm": 0.60546875,
|
| 76322 |
+
"learning_rate": 0.000344767907854663,
|
| 76323 |
+
"loss": 1.4516948461532593,
|
| 76324 |
+
"step": 10902
|
| 76325 |
+
},
|
| 76326 |
+
{
|
| 76327 |
+
"epoch": 0.9501900265669532,
|
| 76328 |
+
"grad_norm": 0.58984375,
|
| 76329 |
+
"learning_rate": 0.0003442061459885907,
|
| 76330 |
+
"loss": 1.304955005645752,
|
| 76331 |
+
"step": 10903
|
| 76332 |
+
},
|
| 76333 |
+
{
|
| 76334 |
+
"epoch": 0.9502771759778096,
|
| 76335 |
+
"grad_norm": 0.58203125,
|
| 76336 |
+
"learning_rate": 0.00034364474833931065,
|
| 76337 |
+
"loss": 1.4244271516799927,
|
| 76338 |
+
"step": 10904
|
| 76339 |
+
},
|
| 76340 |
+
{
|
| 76341 |
+
"epoch": 0.950364325388666,
|
| 76342 |
+
"grad_norm": 0.61328125,
|
| 76343 |
+
"learning_rate": 0.00034308371521837535,
|
| 76344 |
+
"loss": 1.331715703010559,
|
| 76345 |
+
"step": 10905
|
| 76346 |
+
},
|
| 76347 |
+
{
|
| 76348 |
+
"epoch": 0.9504514747995223,
|
| 76349 |
+
"grad_norm": 0.5546875,
|
| 76350 |
+
"learning_rate": 0.00034252304693713697,
|
| 76351 |
+
"loss": 1.3917310237884521,
|
| 76352 |
+
"step": 10906
|
| 76353 |
+
},
|
| 76354 |
+
{
|
| 76355 |
+
"epoch": 0.9505386242103787,
|
| 76356 |
+
"grad_norm": 0.6328125,
|
| 76357 |
+
"learning_rate": 0.0003419627438067435,
|
| 76358 |
+
"loss": 1.3722879886627197,
|
| 76359 |
+
"step": 10907
|
| 76360 |
+
},
|
| 76361 |
+
{
|
| 76362 |
+
"epoch": 0.950625773621235,
|
| 76363 |
+
"grad_norm": 1.1640625,
|
| 76364 |
+
"learning_rate": 0.00034140280613814054,
|
| 76365 |
+
"loss": 1.4091254472732544,
|
| 76366 |
+
"step": 10908
|
| 76367 |
+
},
|
| 76368 |
+
{
|
| 76369 |
+
"epoch": 0.9507129230320914,
|
| 76370 |
+
"grad_norm": 0.609375,
|
| 76371 |
+
"learning_rate": 0.0003408432342420714,
|
| 76372 |
+
"loss": 1.4888665676116943,
|
| 76373 |
+
"step": 10909
|
| 76374 |
+
},
|
| 76375 |
+
{
|
| 76376 |
+
"epoch": 0.9508000724429477,
|
| 76377 |
+
"grad_norm": 0.6015625,
|
| 76378 |
+
"learning_rate": 0.00034028402842907436,
|
| 76379 |
+
"loss": 1.4189926385879517,
|
| 76380 |
+
"step": 10910
|
| 76381 |
+
},
|
| 76382 |
+
{
|
| 76383 |
+
"epoch": 0.9508872218538041,
|
| 76384 |
+
"grad_norm": 0.55078125,
|
| 76385 |
+
"learning_rate": 0.00033972518900948876,
|
| 76386 |
+
"loss": 1.3841023445129395,
|
| 76387 |
+
"step": 10911
|
| 76388 |
+
},
|
| 76389 |
+
{
|
| 76390 |
+
"epoch": 0.9509743712646606,
|
| 76391 |
+
"grad_norm": 0.57421875,
|
| 76392 |
+
"learning_rate": 0.00033916671629344693,
|
| 76393 |
+
"loss": 1.188202142715454,
|
| 76394 |
+
"step": 10912
|
| 76395 |
+
},
|
| 76396 |
+
{
|
| 76397 |
+
"epoch": 0.9510615206755169,
|
| 76398 |
+
"grad_norm": 0.68359375,
|
| 76399 |
+
"learning_rate": 0.00033860861059087897,
|
| 76400 |
+
"loss": 1.5412949323654175,
|
| 76401 |
+
"step": 10913
|
| 76402 |
+
},
|
| 76403 |
+
{
|
| 76404 |
+
"epoch": 0.9511486700863733,
|
| 76405 |
+
"grad_norm": 0.5625,
|
| 76406 |
+
"learning_rate": 0.000338050872211511,
|
| 76407 |
+
"loss": 1.497607946395874,
|
| 76408 |
+
"step": 10914
|
| 76409 |
+
},
|
| 76410 |
+
{
|
| 76411 |
+
"epoch": 0.9512358194972296,
|
| 76412 |
+
"grad_norm": 0.58984375,
|
| 76413 |
+
"learning_rate": 0.0003374935014648659,
|
| 76414 |
+
"loss": 1.2207014560699463,
|
| 76415 |
+
"step": 10915
|
| 76416 |
+
},
|
| 76417 |
+
{
|
| 76418 |
+
"epoch": 0.951322968908086,
|
| 76419 |
+
"grad_norm": 0.59375,
|
| 76420 |
+
"learning_rate": 0.0003369364986602608,
|
| 76421 |
+
"loss": 1.3597630262374878,
|
| 76422 |
+
"step": 10916
|
| 76423 |
+
},
|
| 76424 |
+
{
|
| 76425 |
+
"epoch": 0.9514101183189423,
|
| 76426 |
+
"grad_norm": 0.62890625,
|
| 76427 |
+
"learning_rate": 0.0003363798641068116,
|
| 76428 |
+
"loss": 1.4100755453109741,
|
| 76429 |
+
"step": 10917
|
| 76430 |
+
},
|
| 76431 |
+
{
|
| 76432 |
+
"epoch": 0.9514972677297987,
|
| 76433 |
+
"grad_norm": 0.5390625,
|
| 76434 |
+
"learning_rate": 0.00033582359811342745,
|
| 76435 |
+
"loss": 1.373608946800232,
|
| 76436 |
+
"step": 10918
|
| 76437 |
+
},
|
| 76438 |
+
{
|
| 76439 |
+
"epoch": 0.9515844171406551,
|
| 76440 |
+
"grad_norm": 0.59375,
|
| 76441 |
+
"learning_rate": 0.00033526770098881375,
|
| 76442 |
+
"loss": 1.3255009651184082,
|
| 76443 |
+
"step": 10919
|
| 76444 |
+
},
|
| 76445 |
+
{
|
| 76446 |
+
"epoch": 0.9516715665515114,
|
| 76447 |
+
"grad_norm": 0.62890625,
|
| 76448 |
+
"learning_rate": 0.00033471217304147116,
|
| 76449 |
+
"loss": 1.368410348892212,
|
| 76450 |
+
"step": 10920
|
| 76451 |
+
},
|
| 76452 |
+
{
|
| 76453 |
+
"epoch": 0.9517587159623678,
|
| 76454 |
+
"grad_norm": 0.5625,
|
| 76455 |
+
"learning_rate": 0.0003341570145796947,
|
| 76456 |
+
"loss": 1.2507421970367432,
|
| 76457 |
+
"step": 10921
|
| 76458 |
+
},
|
| 76459 |
+
{
|
| 76460 |
+
"epoch": 0.9518458653732241,
|
| 76461 |
+
"grad_norm": 0.6875,
|
| 76462 |
+
"learning_rate": 0.00033360222591157635,
|
| 76463 |
+
"loss": 1.4534509181976318,
|
| 76464 |
+
"step": 10922
|
| 76465 |
+
},
|
| 76466 |
+
{
|
| 76467 |
+
"epoch": 0.9519330147840805,
|
| 76468 |
+
"grad_norm": 0.58203125,
|
| 76469 |
+
"learning_rate": 0.000333047807345001,
|
| 76470 |
+
"loss": 1.3151963949203491,
|
| 76471 |
+
"step": 10923
|
| 76472 |
+
},
|
| 76473 |
+
{
|
| 76474 |
+
"epoch": 0.9520201641949368,
|
| 76475 |
+
"grad_norm": 0.6015625,
|
| 76476 |
+
"learning_rate": 0.0003324937591876487,
|
| 76477 |
+
"loss": 1.3997173309326172,
|
| 76478 |
+
"step": 10924
|
| 76479 |
+
},
|
| 76480 |
+
{
|
| 76481 |
+
"epoch": 0.9521073136057933,
|
| 76482 |
+
"grad_norm": 0.6015625,
|
| 76483 |
+
"learning_rate": 0.00033194008174699407,
|
| 76484 |
+
"loss": 1.302254557609558,
|
| 76485 |
+
"step": 10925
|
| 76486 |
+
},
|
| 76487 |
+
{
|
| 76488 |
+
"epoch": 0.9521944630166497,
|
| 76489 |
+
"grad_norm": 0.546875,
|
| 76490 |
+
"learning_rate": 0.0003313867753303052,
|
| 76491 |
+
"loss": 1.2490872144699097,
|
| 76492 |
+
"step": 10926
|
| 76493 |
+
},
|
| 76494 |
+
{
|
| 76495 |
+
"epoch": 0.952281612427506,
|
| 76496 |
+
"grad_norm": 0.58203125,
|
| 76497 |
+
"learning_rate": 0.00033083384024464615,
|
| 76498 |
+
"loss": 1.436303734779358,
|
| 76499 |
+
"step": 10927
|
| 76500 |
+
},
|
| 76501 |
+
{
|
| 76502 |
+
"epoch": 0.9523687618383624,
|
| 76503 |
+
"grad_norm": 0.55859375,
|
| 76504 |
+
"learning_rate": 0.00033028127679687343,
|
| 76505 |
+
"loss": 1.3244483470916748,
|
| 76506 |
+
"step": 10928
|
| 76507 |
+
},
|
| 76508 |
+
{
|
| 76509 |
+
"epoch": 0.9524559112492187,
|
| 76510 |
+
"grad_norm": 0.66796875,
|
| 76511 |
+
"learning_rate": 0.00032972908529363724,
|
| 76512 |
+
"loss": 1.3695107698440552,
|
| 76513 |
+
"step": 10929
|
| 76514 |
+
},
|
| 76515 |
+
{
|
| 76516 |
+
"epoch": 0.9525430606600751,
|
| 76517 |
+
"grad_norm": 0.65234375,
|
| 76518 |
+
"learning_rate": 0.0003291772660413819,
|
| 76519 |
+
"loss": 1.2084953784942627,
|
| 76520 |
+
"step": 10930
|
| 76521 |
+
},
|
| 76522 |
+
{
|
| 76523 |
+
"epoch": 0.9526302100709314,
|
| 76524 |
+
"grad_norm": 0.6015625,
|
| 76525 |
+
"learning_rate": 0.000328625819346345,
|
| 76526 |
+
"loss": 1.2497600317001343,
|
| 76527 |
+
"step": 10931
|
| 76528 |
+
},
|
| 76529 |
+
{
|
| 76530 |
+
"epoch": 0.9527173594817878,
|
| 76531 |
+
"grad_norm": 0.59375,
|
| 76532 |
+
"learning_rate": 0.000328074745514557,
|
| 76533 |
+
"loss": 1.3972818851470947,
|
| 76534 |
+
"step": 10932
|
| 76535 |
+
},
|
| 76536 |
+
{
|
| 76537 |
+
"epoch": 0.9528045088926442,
|
| 76538 |
+
"grad_norm": 0.56640625,
|
| 76539 |
+
"learning_rate": 0.0003275240448518425,
|
| 76540 |
+
"loss": 1.4092837572097778,
|
| 76541 |
+
"step": 10933
|
| 76542 |
+
},
|
| 76543 |
+
{
|
| 76544 |
+
"epoch": 0.9528916583035005,
|
| 76545 |
+
"grad_norm": 0.59765625,
|
| 76546 |
+
"learning_rate": 0.00032697371766381785,
|
| 76547 |
+
"loss": 1.4201757907867432,
|
| 76548 |
+
"step": 10934
|
| 76549 |
+
},
|
| 76550 |
+
{
|
| 76551 |
+
"epoch": 0.952978807714357,
|
| 76552 |
+
"grad_norm": 0.58203125,
|
| 76553 |
+
"learning_rate": 0.00032642376425589273,
|
| 76554 |
+
"loss": 1.4522488117218018,
|
| 76555 |
+
"step": 10935
|
| 76556 |
+
},
|
| 76557 |
+
{
|
| 76558 |
+
"epoch": 0.9530659571252132,
|
| 76559 |
+
"grad_norm": 0.59375,
|
| 76560 |
+
"learning_rate": 0.0003258741849332692,
|
| 76561 |
+
"loss": 1.4301923513412476,
|
| 76562 |
+
"step": 10936
|
| 76563 |
+
},
|
| 76564 |
+
{
|
| 76565 |
+
"epoch": 0.9531531065360697,
|
| 76566 |
+
"grad_norm": 0.5859375,
|
| 76567 |
+
"learning_rate": 0.0003253249800009408,
|
| 76568 |
+
"loss": 1.2767215967178345,
|
| 76569 |
+
"step": 10937
|
| 76570 |
+
},
|
| 76571 |
+
{
|
| 76572 |
+
"epoch": 0.953240255946926,
|
| 76573 |
+
"grad_norm": 0.58203125,
|
| 76574 |
+
"learning_rate": 0.00032477614976369643,
|
| 76575 |
+
"loss": 1.2616369724273682,
|
| 76576 |
+
"step": 10938
|
| 76577 |
+
},
|
| 76578 |
+
{
|
| 76579 |
+
"epoch": 0.9533274053577824,
|
| 76580 |
+
"grad_norm": 0.68359375,
|
| 76581 |
+
"learning_rate": 0.00032422769452611344,
|
| 76582 |
+
"loss": 1.4377288818359375,
|
| 76583 |
+
"step": 10939
|
| 76584 |
+
},
|
| 76585 |
+
{
|
| 76586 |
+
"epoch": 0.9534145547686388,
|
| 76587 |
+
"grad_norm": 0.58203125,
|
| 76588 |
+
"learning_rate": 0.0003236796145925626,
|
| 76589 |
+
"loss": 1.4595770835876465,
|
| 76590 |
+
"step": 10940
|
| 76591 |
+
},
|
| 76592 |
+
{
|
| 76593 |
+
"epoch": 0.9535017041794951,
|
| 76594 |
+
"grad_norm": 0.6796875,
|
| 76595 |
+
"learning_rate": 0.0003231319102672066,
|
| 76596 |
+
"loss": 1.4700055122375488,
|
| 76597 |
+
"step": 10941
|
| 76598 |
+
},
|
| 76599 |
+
{
|
| 76600 |
+
"epoch": 0.9535888535903515,
|
| 76601 |
+
"grad_norm": 0.59765625,
|
| 76602 |
+
"learning_rate": 0.00032258458185399847,
|
| 76603 |
+
"loss": 1.3998115062713623,
|
| 76604 |
+
"step": 10942
|
| 76605 |
+
},
|
| 76606 |
+
{
|
| 76607 |
+
"epoch": 0.9536760030012078,
|
| 76608 |
+
"grad_norm": 0.61328125,
|
| 76609 |
+
"learning_rate": 0.000322037629656685,
|
| 76610 |
+
"loss": 1.4860610961914062,
|
| 76611 |
+
"step": 10943
|
| 76612 |
+
},
|
| 76613 |
+
{
|
| 76614 |
+
"epoch": 0.9537631524120642,
|
| 76615 |
+
"grad_norm": 0.60546875,
|
| 76616 |
+
"learning_rate": 0.00032149105397880214,
|
| 76617 |
+
"loss": 1.2388815879821777,
|
| 76618 |
+
"step": 10944
|
| 76619 |
+
},
|
| 76620 |
+
{
|
| 76621 |
+
"epoch": 0.9538503018229205,
|
| 76622 |
+
"grad_norm": 0.62890625,
|
| 76623 |
+
"learning_rate": 0.0003209448551236773,
|
| 76624 |
+
"loss": 1.4937905073165894,
|
| 76625 |
+
"step": 10945
|
| 76626 |
+
},
|
| 76627 |
+
{
|
| 76628 |
+
"epoch": 0.9539374512337769,
|
| 76629 |
+
"grad_norm": 0.6796875,
|
| 76630 |
+
"learning_rate": 0.0003203990333944291,
|
| 76631 |
+
"loss": 1.3029348850250244,
|
| 76632 |
+
"step": 10946
|
| 76633 |
+
},
|
| 76634 |
+
{
|
| 76635 |
+
"epoch": 0.9540246006446333,
|
| 76636 |
+
"grad_norm": 0.55078125,
|
| 76637 |
+
"learning_rate": 0.0003198535890939668,
|
| 76638 |
+
"loss": 1.4672210216522217,
|
| 76639 |
+
"step": 10947
|
| 76640 |
+
},
|
| 76641 |
+
{
|
| 76642 |
+
"epoch": 0.9541117500554896,
|
| 76643 |
+
"grad_norm": 0.60546875,
|
| 76644 |
+
"learning_rate": 0.00031930852252498934,
|
| 76645 |
+
"loss": 1.2499611377716064,
|
| 76646 |
+
"step": 10948
|
| 76647 |
+
},
|
| 76648 |
+
{
|
| 76649 |
+
"epoch": 0.9541988994663461,
|
| 76650 |
+
"grad_norm": 0.5546875,
|
| 76651 |
+
"learning_rate": 0.00031876383398998814,
|
| 76652 |
+
"loss": 1.3740532398223877,
|
| 76653 |
+
"step": 10949
|
| 76654 |
+
},
|
| 76655 |
+
{
|
| 76656 |
+
"epoch": 0.9542860488772024,
|
| 76657 |
+
"grad_norm": 0.6484375,
|
| 76658 |
+
"learning_rate": 0.00031821952379124325,
|
| 76659 |
+
"loss": 1.4285303354263306,
|
| 76660 |
+
"step": 10950
|
| 76661 |
+
},
|
| 76662 |
+
{
|
| 76663 |
+
"epoch": 0.9543731982880588,
|
| 76664 |
+
"grad_norm": 0.625,
|
| 76665 |
+
"learning_rate": 0.00031767559223082434,
|
| 76666 |
+
"loss": 1.3995391130447388,
|
| 76667 |
+
"step": 10951
|
| 76668 |
+
},
|
| 76669 |
+
{
|
| 76670 |
+
"epoch": 0.9544603476989151,
|
| 76671 |
+
"grad_norm": 0.64453125,
|
| 76672 |
+
"learning_rate": 0.0003171320396105919,
|
| 76673 |
+
"loss": 1.4862656593322754,
|
| 76674 |
+
"step": 10952
|
| 76675 |
+
},
|
| 76676 |
+
{
|
| 76677 |
+
"epoch": 0.9545474971097715,
|
| 76678 |
+
"grad_norm": 0.60546875,
|
| 76679 |
+
"learning_rate": 0.00031658886623219494,
|
| 76680 |
+
"loss": 1.448495626449585,
|
| 76681 |
+
"step": 10953
|
| 76682 |
+
},
|
| 76683 |
+
{
|
| 76684 |
+
"epoch": 0.9546346465206279,
|
| 76685 |
+
"grad_norm": 0.59765625,
|
| 76686 |
+
"learning_rate": 0.00031604607239707494,
|
| 76687 |
+
"loss": 1.4339920282363892,
|
| 76688 |
+
"step": 10954
|
| 76689 |
+
},
|
| 76690 |
+
{
|
| 76691 |
+
"epoch": 0.9547217959314842,
|
| 76692 |
+
"grad_norm": 0.65625,
|
| 76693 |
+
"learning_rate": 0.00031550365840645963,
|
| 76694 |
+
"loss": 1.4500888586044312,
|
| 76695 |
+
"step": 10955
|
| 76696 |
+
},
|
| 76697 |
+
{
|
| 76698 |
+
"epoch": 0.9548089453423406,
|
| 76699 |
+
"grad_norm": 0.59375,
|
| 76700 |
+
"learning_rate": 0.00031496162456136746,
|
| 76701 |
+
"loss": 1.4123953580856323,
|
| 76702 |
+
"step": 10956
|
| 76703 |
+
},
|
| 76704 |
+
{
|
| 76705 |
+
"epoch": 0.9548960947531969,
|
| 76706 |
+
"grad_norm": 0.609375,
|
| 76707 |
+
"learning_rate": 0.0003144199711626048,
|
| 76708 |
+
"loss": 1.453245997428894,
|
| 76709 |
+
"step": 10957
|
| 76710 |
+
},
|
| 76711 |
+
{
|
| 76712 |
+
"epoch": 0.9549832441640533,
|
| 76713 |
+
"grad_norm": 0.546875,
|
| 76714 |
+
"learning_rate": 0.00031387869851076726,
|
| 76715 |
+
"loss": 1.387932300567627,
|
| 76716 |
+
"step": 10958
|
| 76717 |
+
},
|
| 76718 |
+
{
|
| 76719 |
+
"epoch": 0.9550703935749096,
|
| 76720 |
+
"grad_norm": 0.65234375,
|
| 76721 |
+
"learning_rate": 0.0003133378069062407,
|
| 76722 |
+
"loss": 1.4692347049713135,
|
| 76723 |
+
"step": 10959
|
| 76724 |
+
},
|
| 76725 |
+
{
|
| 76726 |
+
"epoch": 0.955157542985766,
|
| 76727 |
+
"grad_norm": 0.55859375,
|
| 76728 |
+
"learning_rate": 0.0003127972966491974,
|
| 76729 |
+
"loss": 1.3816195726394653,
|
| 76730 |
+
"step": 10960
|
| 76731 |
+
},
|
| 76732 |
+
{
|
| 76733 |
+
"epoch": 0.9552446923966225,
|
| 76734 |
+
"grad_norm": 0.5703125,
|
| 76735 |
+
"learning_rate": 0.0003122571680395997,
|
| 76736 |
+
"loss": 1.3278354406356812,
|
| 76737 |
+
"step": 10961
|
| 76738 |
+
},
|
| 76739 |
+
{
|
| 76740 |
+
"epoch": 0.9553318418074788,
|
| 76741 |
+
"grad_norm": 0.63671875,
|
| 76742 |
+
"learning_rate": 0.00031171742137719695,
|
| 76743 |
+
"loss": 1.4659311771392822,
|
| 76744 |
+
"step": 10962
|
| 76745 |
+
},
|
| 76746 |
+
{
|
| 76747 |
+
"epoch": 0.9554189912183352,
|
| 76748 |
+
"grad_norm": 0.5859375,
|
| 76749 |
+
"learning_rate": 0.00031117805696152665,
|
| 76750 |
+
"loss": 1.4021224975585938,
|
| 76751 |
+
"step": 10963
|
| 76752 |
+
},
|
| 76753 |
+
{
|
| 76754 |
+
"epoch": 0.9555061406291915,
|
| 76755 |
+
"grad_norm": 0.66015625,
|
| 76756 |
+
"learning_rate": 0.00031063907509191384,
|
| 76757 |
+
"loss": 1.3670710325241089,
|
| 76758 |
+
"step": 10964
|
| 76759 |
+
},
|
| 76760 |
+
{
|
| 76761 |
+
"epoch": 0.9555932900400479,
|
| 76762 |
+
"grad_norm": 0.59765625,
|
| 76763 |
+
"learning_rate": 0.00031010047606747337,
|
| 76764 |
+
"loss": 1.3548510074615479,
|
| 76765 |
+
"step": 10965
|
| 76766 |
+
},
|
| 76767 |
+
{
|
| 76768 |
+
"epoch": 0.9556804394509042,
|
| 76769 |
+
"grad_norm": 0.60546875,
|
| 76770 |
+
"learning_rate": 0.0003095622601871053,
|
| 76771 |
+
"loss": 1.4097130298614502,
|
| 76772 |
+
"step": 10966
|
| 76773 |
+
},
|
| 76774 |
+
{
|
| 76775 |
+
"epoch": 0.9557675888617606,
|
| 76776 |
+
"grad_norm": 0.6015625,
|
| 76777 |
+
"learning_rate": 0.0003090244277494979,
|
| 76778 |
+
"loss": 1.4018460512161255,
|
| 76779 |
+
"step": 10967
|
| 76780 |
+
},
|
| 76781 |
+
{
|
| 76782 |
+
"epoch": 0.955854738272617,
|
| 76783 |
+
"grad_norm": 0.640625,
|
| 76784 |
+
"learning_rate": 0.0003084869790531271,
|
| 76785 |
+
"loss": 1.3154217004776,
|
| 76786 |
+
"step": 10968
|
| 76787 |
+
},
|
| 76788 |
+
{
|
| 76789 |
+
"epoch": 0.9559418876834733,
|
| 76790 |
+
"grad_norm": 0.57421875,
|
| 76791 |
+
"learning_rate": 0.0003079499143962536,
|
| 76792 |
+
"loss": 1.2899959087371826,
|
| 76793 |
+
"step": 10969
|
| 76794 |
+
},
|
| 76795 |
+
{
|
| 76796 |
+
"epoch": 0.9560290370943297,
|
| 76797 |
+
"grad_norm": 0.609375,
|
| 76798 |
+
"learning_rate": 0.00030741323407692926,
|
| 76799 |
+
"loss": 1.2671692371368408,
|
| 76800 |
+
"step": 10970
|
| 76801 |
+
},
|
| 76802 |
+
{
|
| 76803 |
+
"epoch": 0.956116186505186,
|
| 76804 |
+
"grad_norm": 0.6640625,
|
| 76805 |
+
"learning_rate": 0.00030687693839298884,
|
| 76806 |
+
"loss": 1.4018245935440063,
|
| 76807 |
+
"step": 10971
|
| 76808 |
+
},
|
| 76809 |
+
{
|
| 76810 |
+
"epoch": 0.9562033359160425,
|
| 76811 |
+
"grad_norm": 0.60546875,
|
| 76812 |
+
"learning_rate": 0.00030634102764205516,
|
| 76813 |
+
"loss": 1.451886773109436,
|
| 76814 |
+
"step": 10972
|
| 76815 |
+
},
|
| 76816 |
+
{
|
| 76817 |
+
"epoch": 0.9562904853268988,
|
| 76818 |
+
"grad_norm": 0.5703125,
|
| 76819 |
+
"learning_rate": 0.00030580550212153725,
|
| 76820 |
+
"loss": 1.4861435890197754,
|
| 76821 |
+
"step": 10973
|
| 76822 |
+
},
|
| 76823 |
+
{
|
| 76824 |
+
"epoch": 0.9563776347377552,
|
| 76825 |
+
"grad_norm": 0.60546875,
|
| 76826 |
+
"learning_rate": 0.00030527036212863,
|
| 76827 |
+
"loss": 1.2825517654418945,
|
| 76828 |
+
"step": 10974
|
| 76829 |
+
},
|
| 76830 |
+
{
|
| 76831 |
+
"epoch": 0.9564647841486116,
|
| 76832 |
+
"grad_norm": 0.59375,
|
| 76833 |
+
"learning_rate": 0.0003047356079603151,
|
| 76834 |
+
"loss": 1.3305561542510986,
|
| 76835 |
+
"step": 10975
|
| 76836 |
+
},
|
| 76837 |
+
{
|
| 76838 |
+
"epoch": 0.9565519335594679,
|
| 76839 |
+
"grad_norm": 0.54296875,
|
| 76840 |
+
"learning_rate": 0.0003042012399133593,
|
| 76841 |
+
"loss": 1.2840900421142578,
|
| 76842 |
+
"step": 10976
|
| 76843 |
+
},
|
| 76844 |
+
{
|
| 76845 |
+
"epoch": 0.9566390829703243,
|
| 76846 |
+
"grad_norm": 0.578125,
|
| 76847 |
+
"learning_rate": 0.0003036672582843159,
|
| 76848 |
+
"loss": 1.4011597633361816,
|
| 76849 |
+
"step": 10977
|
| 76850 |
+
},
|
| 76851 |
+
{
|
| 76852 |
+
"epoch": 0.9567262323811806,
|
| 76853 |
+
"grad_norm": 0.66015625,
|
| 76854 |
+
"learning_rate": 0.00030313366336952313,
|
| 76855 |
+
"loss": 1.424378514289856,
|
| 76856 |
+
"step": 10978
|
| 76857 |
+
},
|
| 76858 |
+
{
|
| 76859 |
+
"epoch": 0.956813381792037,
|
| 76860 |
+
"grad_norm": 0.57421875,
|
| 76861 |
+
"learning_rate": 0.0003026004554651047,
|
| 76862 |
+
"loss": 1.375205159187317,
|
| 76863 |
+
"step": 10979
|
| 76864 |
+
},
|
| 76865 |
+
{
|
| 76866 |
+
"epoch": 0.9569005312028933,
|
| 76867 |
+
"grad_norm": 0.7109375,
|
| 76868 |
+
"learning_rate": 0.0003020676348669693,
|
| 76869 |
+
"loss": 1.6048765182495117,
|
| 76870 |
+
"step": 10980
|
| 76871 |
+
},
|
| 76872 |
+
{
|
| 76873 |
+
"epoch": 0.9569876806137497,
|
| 76874 |
+
"grad_norm": 0.65234375,
|
| 76875 |
+
"learning_rate": 0.0003015352018708118,
|
| 76876 |
+
"loss": 1.4007132053375244,
|
| 76877 |
+
"step": 10981
|
| 76878 |
+
},
|
| 76879 |
+
{
|
| 76880 |
+
"epoch": 0.9570748300246061,
|
| 76881 |
+
"grad_norm": 0.61328125,
|
| 76882 |
+
"learning_rate": 0.0003010031567721107,
|
| 76883 |
+
"loss": 1.3569376468658447,
|
| 76884 |
+
"step": 10982
|
| 76885 |
+
},
|
| 76886 |
+
{
|
| 76887 |
+
"epoch": 0.9571619794354624,
|
| 76888 |
+
"grad_norm": 0.65234375,
|
| 76889 |
+
"learning_rate": 0.00030047149986612984,
|
| 76890 |
+
"loss": 1.3657957315444946,
|
| 76891 |
+
"step": 10983
|
| 76892 |
+
},
|
| 76893 |
+
{
|
| 76894 |
+
"epoch": 0.9572491288463189,
|
| 76895 |
+
"grad_norm": 0.609375,
|
| 76896 |
+
"learning_rate": 0.0002999402314479173,
|
| 76897 |
+
"loss": 1.2507611513137817,
|
| 76898 |
+
"step": 10984
|
| 76899 |
+
},
|
| 76900 |
+
{
|
| 76901 |
+
"epoch": 0.9573362782571752,
|
| 76902 |
+
"grad_norm": 0.66015625,
|
| 76903 |
+
"learning_rate": 0.00029940935181230535,
|
| 76904 |
+
"loss": 1.4524526596069336,
|
| 76905 |
+
"step": 10985
|
| 76906 |
+
},
|
| 76907 |
+
{
|
| 76908 |
+
"epoch": 0.9574234276680316,
|
| 76909 |
+
"grad_norm": 0.67578125,
|
| 76910 |
+
"learning_rate": 0.000298878861253912,
|
| 76911 |
+
"loss": 1.3916499614715576,
|
| 76912 |
+
"step": 10986
|
| 76913 |
+
},
|
| 76914 |
+
{
|
| 76915 |
+
"epoch": 0.9575105770788879,
|
| 76916 |
+
"grad_norm": 0.6484375,
|
| 76917 |
+
"learning_rate": 0.0002983487600671378,
|
| 76918 |
+
"loss": 1.4126540422439575,
|
| 76919 |
+
"step": 10987
|
| 76920 |
+
},
|
| 76921 |
+
{
|
| 76922 |
+
"epoch": 0.9575977264897443,
|
| 76923 |
+
"grad_norm": 0.62890625,
|
| 76924 |
+
"learning_rate": 0.00029781904854616747,
|
| 76925 |
+
"loss": 1.3251489400863647,
|
| 76926 |
+
"step": 10988
|
| 76927 |
+
},
|
| 76928 |
+
{
|
| 76929 |
+
"epoch": 0.9576848759006007,
|
| 76930 |
+
"grad_norm": 0.578125,
|
| 76931 |
+
"learning_rate": 0.00029728972698496995,
|
| 76932 |
+
"loss": 1.2860187292099,
|
| 76933 |
+
"step": 10989
|
| 76934 |
+
},
|
| 76935 |
+
{
|
| 76936 |
+
"epoch": 0.957772025311457,
|
| 76937 |
+
"grad_norm": 0.58203125,
|
| 76938 |
+
"learning_rate": 0.0002967607956772966,
|
| 76939 |
+
"loss": 1.2345279455184937,
|
| 76940 |
+
"step": 10990
|
| 76941 |
+
},
|
| 76942 |
+
{
|
| 76943 |
+
"epoch": 0.9578591747223134,
|
| 76944 |
+
"grad_norm": 0.63671875,
|
| 76945 |
+
"learning_rate": 0.00029623225491668477,
|
| 76946 |
+
"loss": 1.4058338403701782,
|
| 76947 |
+
"step": 10991
|
| 76948 |
+
},
|
| 76949 |
+
{
|
| 76950 |
+
"epoch": 0.9579463241331697,
|
| 76951 |
+
"grad_norm": 0.58984375,
|
| 76952 |
+
"learning_rate": 0.0002957041049964522,
|
| 76953 |
+
"loss": 1.3918859958648682,
|
| 76954 |
+
"step": 10992
|
| 76955 |
+
},
|
| 76956 |
+
{
|
| 76957 |
+
"epoch": 0.9580334735440261,
|
| 76958 |
+
"grad_norm": 0.6015625,
|
| 76959 |
+
"learning_rate": 0.00029517634620970085,
|
| 76960 |
+
"loss": 1.2986831665039062,
|
| 76961 |
+
"step": 10993
|
| 76962 |
+
},
|
| 76963 |
+
{
|
| 76964 |
+
"epoch": 0.9581206229548824,
|
| 76965 |
+
"grad_norm": 0.66796875,
|
| 76966 |
+
"learning_rate": 0.0002946489788493164,
|
| 76967 |
+
"loss": 1.532016396522522,
|
| 76968 |
+
"step": 10994
|
| 76969 |
+
},
|
| 76970 |
+
{
|
| 76971 |
+
"epoch": 0.9582077723657388,
|
| 76972 |
+
"grad_norm": 0.6015625,
|
| 76973 |
+
"learning_rate": 0.00029412200320796624,
|
| 76974 |
+
"loss": 1.5022664070129395,
|
| 76975 |
+
"step": 10995
|
| 76976 |
+
},
|
| 76977 |
+
{
|
| 76978 |
+
"epoch": 0.9582949217765953,
|
| 76979 |
+
"grad_norm": 0.6015625,
|
| 76980 |
+
"learning_rate": 0.00029359541957810014,
|
| 76981 |
+
"loss": 1.206302285194397,
|
| 76982 |
+
"step": 10996
|
| 76983 |
+
},
|
| 76984 |
+
{
|
| 76985 |
+
"epoch": 0.9583820711874516,
|
| 76986 |
+
"grad_norm": 0.59765625,
|
| 76987 |
+
"learning_rate": 0.0002930692282519525,
|
| 76988 |
+
"loss": 1.3716161251068115,
|
| 76989 |
+
"step": 10997
|
| 76990 |
+
},
|
| 76991 |
+
{
|
| 76992 |
+
"epoch": 0.958469220598308,
|
| 76993 |
+
"grad_norm": 0.56640625,
|
| 76994 |
+
"learning_rate": 0.000292543429521537,
|
| 76995 |
+
"loss": 1.2581470012664795,
|
| 76996 |
+
"step": 10998
|
| 76997 |
+
},
|
| 76998 |
+
{
|
| 76999 |
+
"epoch": 0.9585563700091643,
|
| 77000 |
+
"grad_norm": 0.6171875,
|
| 77001 |
+
"learning_rate": 0.0002920180236786515,
|
| 77002 |
+
"loss": 1.293797254562378,
|
| 77003 |
+
"step": 10999
|
| 77004 |
+
},
|
| 77005 |
+
{
|
| 77006 |
+
"epoch": 0.9586435194200207,
|
| 77007 |
+
"grad_norm": 0.5859375,
|
| 77008 |
+
"learning_rate": 0.0002914930110148749,
|
| 77009 |
+
"loss": 1.3799270391464233,
|
| 77010 |
+
"step": 11000
|
| 77011 |
}
|
| 77012 |
],
|
| 77013 |
"logging_steps": 1,
|
|
|
|
| 77027 |
"attributes": {}
|
| 77028 |
}
|
| 77029 |
},
|
| 77030 |
+
"total_flos": 4.067990294700232e+18,
|
| 77031 |
"train_batch_size": 1,
|
| 77032 |
"trial_name": null,
|
| 77033 |
"trial_params": null
|