Training in progress, step 800
Browse files- .gitattributes +1 -0
- added_tokens.json +24 -0
- config.json +52 -0
- merges.txt +0 -0
- model-00001-of-00031.safetensors +3 -0
- model-00002-of-00031.safetensors +3 -0
- model-00003-of-00031.safetensors +3 -0
- model-00004-of-00031.safetensors +3 -0
- model-00005-of-00031.safetensors +3 -0
- model-00006-of-00031.safetensors +3 -0
- model-00007-of-00031.safetensors +3 -0
- model-00008-of-00031.safetensors +3 -0
- model-00009-of-00031.safetensors +3 -0
- model-00010-of-00031.safetensors +3 -0
- model-00011-of-00031.safetensors +3 -0
- model-00012-of-00031.safetensors +3 -0
- model-00013-of-00031.safetensors +3 -0
- model-00014-of-00031.safetensors +3 -0
- model-00015-of-00031.safetensors +3 -0
- model-00016-of-00031.safetensors +3 -0
- model-00017-of-00031.safetensors +3 -0
- model-00018-of-00031.safetensors +3 -0
- model-00019-of-00031.safetensors +3 -0
- model-00020-of-00031.safetensors +3 -0
- model-00021-of-00031.safetensors +3 -0
- model-00022-of-00031.safetensors +3 -0
- model-00023-of-00031.safetensors +3 -0
- model-00024-of-00031.safetensors +3 -0
- model-00025-of-00031.safetensors +3 -0
- model-00026-of-00031.safetensors +3 -0
- model-00027-of-00031.safetensors +3 -0
- model-00028-of-00031.safetensors +3 -0
- model-00029-of-00031.safetensors +3 -0
- model-00030-of-00031.safetensors +3 -0
- model-00031-of-00031.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +31 -0
- tokenizer.json +3 -0
- tokenizer_config.json +209 -0
- trainer_log.jsonl +80 -0
- training_args.bin +3 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</tool_call>": 151658,
|
3 |
+
"<tool_call>": 151657,
|
4 |
+
"<|box_end|>": 151649,
|
5 |
+
"<|box_start|>": 151648,
|
6 |
+
"<|endoftext|>": 151643,
|
7 |
+
"<|file_sep|>": 151664,
|
8 |
+
"<|fim_middle|>": 151660,
|
9 |
+
"<|fim_pad|>": 151662,
|
10 |
+
"<|fim_prefix|>": 151659,
|
11 |
+
"<|fim_suffix|>": 151661,
|
12 |
+
"<|im_end|>": 151645,
|
13 |
+
"<|im_start|>": 151644,
|
14 |
+
"<|image_pad|>": 151655,
|
15 |
+
"<|object_ref_end|>": 151647,
|
16 |
+
"<|object_ref_start|>": 151646,
|
17 |
+
"<|quad_end|>": 151651,
|
18 |
+
"<|quad_start|>": 151650,
|
19 |
+
"<|repo_name|>": 151663,
|
20 |
+
"<|video_pad|>": 151656,
|
21 |
+
"<|vision_end|>": 151653,
|
22 |
+
"<|vision_pad|>": 151654,
|
23 |
+
"<|vision_start|>": 151652
|
24 |
+
}
|
config.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Qwen/Qwen2.5-VL-72B-Instruct",
|
3 |
+
"architectures": [
|
4 |
+
"Qwen2_5_VLForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151645,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 8192,
|
11 |
+
"image_token_id": 151655,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 29568,
|
14 |
+
"max_position_embeddings": 128000,
|
15 |
+
"max_window_layers": 80,
|
16 |
+
"model_type": "qwen2_5_vl",
|
17 |
+
"num_attention_heads": 64,
|
18 |
+
"num_hidden_layers": 80,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"rms_norm_eps": 1e-06,
|
21 |
+
"rope_scaling": {
|
22 |
+
"mrope_section": [
|
23 |
+
16,
|
24 |
+
24,
|
25 |
+
24
|
26 |
+
],
|
27 |
+
"rope_type": "default",
|
28 |
+
"type": "default"
|
29 |
+
},
|
30 |
+
"rope_theta": 1000000.0,
|
31 |
+
"sliding_window": 32768,
|
32 |
+
"tie_word_embeddings": false,
|
33 |
+
"torch_dtype": "bfloat16",
|
34 |
+
"transformers_version": "4.50.0.dev0",
|
35 |
+
"use_cache": false,
|
36 |
+
"use_sliding_window": false,
|
37 |
+
"video_token_id": 151656,
|
38 |
+
"vision_config": {
|
39 |
+
"hidden_size": 1280,
|
40 |
+
"in_chans": 3,
|
41 |
+
"intermediate_size": 3456,
|
42 |
+
"model_type": "qwen2_5_vl",
|
43 |
+
"out_hidden_size": 8192,
|
44 |
+
"spatial_patch_size": 14,
|
45 |
+
"tokens_per_second": 2,
|
46 |
+
"torch_dtype": "bfloat16"
|
47 |
+
},
|
48 |
+
"vision_end_token_id": 151653,
|
49 |
+
"vision_start_token_id": 151652,
|
50 |
+
"vision_token_id": 151654,
|
51 |
+
"vocab_size": 152064
|
52 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model-00001-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a96f713c1e70f34cb0b049d7d2e4f92f469b531c612be0e7e48ce2a85edc715a
|
3 |
+
size 4687059888
|
model-00002-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76213d5b81daf35ff8ba063fa64d6120f4172f41c80de9abee01bb3d556e8e69
|
3 |
+
size 4781670320
|
model-00003-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1fbc63ab58dc8ef1c4346872dd1857c1f2a466f50e792d70a8137958917a9a8
|
3 |
+
size 4964101384
|
model-00004-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdaff69bc14870f8f68623f5ff1610e49b83bf8b049b8ee3b98ececc5a26a67f
|
3 |
+
size 4781637328
|
model-00005-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0846f0c5ab420347a7832838fc24746f4d0b70a720d82263a20ed3decb1a0651
|
3 |
+
size 4781670344
|
model-00006-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a37cb82e34f9a9bec0b709ec440f0ee080791d5feae8aa9436914d57d7cb7cb9
|
3 |
+
size 4781670360
|
model-00007-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf5edbe4fa69f20d5d72ae4b2ec28bdb62d4a4a9d96bc8a4056abc186c8ef94f
|
3 |
+
size 4964101416
|
model-00008-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6696bee96f301aa168ba49fa8f82df4919f6392edfd8a99d66e0e1bb620ec42c
|
3 |
+
size 4781637360
|
model-00009-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18d4d94cda0c81e86584486cbbd745ffeada07a9c4b7d5ea07b49721a54d2cf5
|
3 |
+
size 4781670360
|
model-00010-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4561b0c3387fbf3a688ff5a55e12d5db4e3933a564cb910fa7e13c04695a17e5
|
3 |
+
size 4781670360
|
model-00011-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4681a2555447124a6addd4d379fe341627c9083a209446754626596444702fe9
|
3 |
+
size 4964101416
|
model-00012-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4f436ad918a86c565959a0d471f514d0aaccb2e4ad3b0f472b11e34258597f9
|
3 |
+
size 4781637360
|
model-00013-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:960e8d668358012eb6cf6873f5e2eb3e45a2e693fd9600b28799dfb441ccc021
|
3 |
+
size 4781670360
|
model-00014-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49085c12c1ef6b83b4fd8f57558ff57c4cadeda10397f83fbb125164a970a67f
|
3 |
+
size 4781670360
|
model-00015-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9ef97570477a561fc8ed2639bf816ed1767eee77b728dbf22cf5d0373229f2c
|
3 |
+
size 4964101416
|
model-00016-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d22ffabdaa38e7e810294a1ba00879325610517b3b66c857441db721bff6c1f
|
3 |
+
size 4781637360
|
model-00017-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:072b3fbb1e36b38163421a202bb88ebc8e24a9a30fe84cfb69e4c1a5af27e348
|
3 |
+
size 4781670360
|
model-00018-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44bbb34833747c921fb014061651f531b4090e92a66fb53c93ef207cae8ace9d
|
3 |
+
size 4781670360
|
model-00019-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:858fe3b88c43dda194b514cefc74310bd1d7a8cefced19c6e120cb550557b34b
|
3 |
+
size 4964101416
|
model-00020-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4716365820f9eea19c98053a14298223cb3e1790c1f41dcca1b516e6c6f2ebf6
|
3 |
+
size 4781637360
|
model-00021-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c438a73a20f5ef5efff7bfc3e391c98074253900f00a294df732885c66e6977
|
3 |
+
size 4781670360
|
model-00022-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:354246e7124794bc8bdb206f2712f05826d397901aed135e2997c8d18adc51f1
|
3 |
+
size 4781670360
|
model-00023-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53b96f92bcd1e7e9a708fb9c7069eb6e0371644963d5086ee37b7268720e3beb
|
3 |
+
size 4964101416
|
model-00024-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b8af3f5fb6a66303c295c6502da927dae0259536c0ee17ed13194641aa537b3
|
3 |
+
size 4781637360
|
model-00025-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0154b0460b2ef8e31d0aa5ca34052842d407b133b85184680078d3d2337e0274
|
3 |
+
size 4781670360
|
model-00026-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:891a2727b0d091bb7a51d76597a1cdaf9d45556ec0796191a8b7e38c1f6513f8
|
3 |
+
size 4781670360
|
model-00027-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10fbaae9158e00e79a496cb295b9f561ee3a20acaff5a46d2ee1c84498c3cddb
|
3 |
+
size 4964101416
|
model-00028-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eb2e0a293bc618d08133a8880b07dc275ee51bf452ee9ad0de0e9f04d76a5d3
|
3 |
+
size 4781637360
|
model-00029-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69e7d385f809cb32aae7ef9f1b34761a889afbe30d4b37f98e756d69561176fc
|
3 |
+
size 4781670360
|
model-00030-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a72f0f534d9dbf7b1b77e63e0b297635531936bce6020a12ec279b78aadf2e3b
|
3 |
+
size 4479675656
|
model-00031-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb7edc3f3f184306e22b2395a2d76b63ace7e2b166a519f744dee74c028f1395
|
3 |
+
size 2491416704
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>",
|
5 |
+
"<|object_ref_start|>",
|
6 |
+
"<|object_ref_end|>",
|
7 |
+
"<|box_start|>",
|
8 |
+
"<|box_end|>",
|
9 |
+
"<|quad_start|>",
|
10 |
+
"<|quad_end|>",
|
11 |
+
"<|vision_start|>",
|
12 |
+
"<|vision_end|>",
|
13 |
+
"<|vision_pad|>",
|
14 |
+
"<|image_pad|>",
|
15 |
+
"<|video_pad|>"
|
16 |
+
],
|
17 |
+
"eos_token": {
|
18 |
+
"content": "<|im_end|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
},
|
24 |
+
"pad_token": {
|
25 |
+
"content": "<|endoftext|>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": false,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
}
|
31 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
3 |
+
size 11421896
|
tokenizer_config.json
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"151643": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"151644": {
|
14 |
+
"content": "<|im_start|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"151645": {
|
22 |
+
"content": "<|im_end|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"151646": {
|
30 |
+
"content": "<|object_ref_start|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"151647": {
|
38 |
+
"content": "<|object_ref_end|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"151648": {
|
46 |
+
"content": "<|box_start|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"151649": {
|
54 |
+
"content": "<|box_end|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"151650": {
|
62 |
+
"content": "<|quad_start|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
"151651": {
|
70 |
+
"content": "<|quad_end|>",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": false,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": true
|
76 |
+
},
|
77 |
+
"151652": {
|
78 |
+
"content": "<|vision_start|>",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": false,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": true
|
84 |
+
},
|
85 |
+
"151653": {
|
86 |
+
"content": "<|vision_end|>",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": false,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": true
|
92 |
+
},
|
93 |
+
"151654": {
|
94 |
+
"content": "<|vision_pad|>",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": false,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": true
|
100 |
+
},
|
101 |
+
"151655": {
|
102 |
+
"content": "<|image_pad|>",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": false,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": true
|
108 |
+
},
|
109 |
+
"151656": {
|
110 |
+
"content": "<|video_pad|>",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": false,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": true
|
116 |
+
},
|
117 |
+
"151657": {
|
118 |
+
"content": "<tool_call>",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": false
|
124 |
+
},
|
125 |
+
"151658": {
|
126 |
+
"content": "</tool_call>",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": false,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"151659": {
|
134 |
+
"content": "<|fim_prefix|>",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": false,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"151660": {
|
142 |
+
"content": "<|fim_middle|>",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": false,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
+
},
|
149 |
+
"151661": {
|
150 |
+
"content": "<|fim_suffix|>",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": false,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": false
|
156 |
+
},
|
157 |
+
"151662": {
|
158 |
+
"content": "<|fim_pad|>",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": false,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": false
|
164 |
+
},
|
165 |
+
"151663": {
|
166 |
+
"content": "<|repo_name|>",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": false,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": false
|
172 |
+
},
|
173 |
+
"151664": {
|
174 |
+
"content": "<|file_sep|>",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": false,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": false
|
180 |
+
}
|
181 |
+
},
|
182 |
+
"additional_special_tokens": [
|
183 |
+
"<|im_start|>",
|
184 |
+
"<|im_end|>",
|
185 |
+
"<|object_ref_start|>",
|
186 |
+
"<|object_ref_end|>",
|
187 |
+
"<|box_start|>",
|
188 |
+
"<|box_end|>",
|
189 |
+
"<|quad_start|>",
|
190 |
+
"<|quad_end|>",
|
191 |
+
"<|vision_start|>",
|
192 |
+
"<|vision_end|>",
|
193 |
+
"<|vision_pad|>",
|
194 |
+
"<|image_pad|>",
|
195 |
+
"<|video_pad|>"
|
196 |
+
],
|
197 |
+
"bos_token": null,
|
198 |
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
199 |
+
"clean_up_tokenization_spaces": false,
|
200 |
+
"eos_token": "<|im_end|>",
|
201 |
+
"errors": "replace",
|
202 |
+
"extra_special_tokens": {},
|
203 |
+
"model_max_length": 4096,
|
204 |
+
"pad_token": "<|endoftext|>",
|
205 |
+
"padding_side": "right",
|
206 |
+
"split_special_tokens": false,
|
207 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
208 |
+
"unk_token": null
|
209 |
+
}
|
trainer_log.jsonl
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 4905, "loss": 3.6483, "lr": 4.0650406504065046e-07, "epoch": 0.010193679918450561, "percentage": 0.2, "elapsed_time": "0:14:59", "remaining_time": "5 days, 2:16:31"}
|
2 |
+
{"current_steps": 20, "total_steps": 4905, "loss": 1.9733, "lr": 8.130081300813009e-07, "epoch": 0.020387359836901122, "percentage": 0.41, "elapsed_time": "0:29:52", "remaining_time": "5 days, 1:37:54"}
|
3 |
+
{"current_steps": 30, "total_steps": 4905, "loss": 1.1774, "lr": 1.2195121951219514e-06, "epoch": 0.03058103975535168, "percentage": 0.61, "elapsed_time": "0:44:44", "remaining_time": "5 days, 1:09:16"}
|
4 |
+
{"current_steps": 40, "total_steps": 4905, "loss": 0.78, "lr": 1.6260162601626018e-06, "epoch": 0.040774719673802244, "percentage": 0.82, "elapsed_time": "0:59:21", "remaining_time": "5 days, 0:19:29"}
|
5 |
+
{"current_steps": 50, "total_steps": 4905, "loss": 0.6473, "lr": 2.0325203252032523e-06, "epoch": 0.0509683995922528, "percentage": 1.02, "elapsed_time": "1:14:03", "remaining_time": "4 days, 23:50:22"}
|
6 |
+
{"current_steps": 60, "total_steps": 4905, "loss": 0.5444, "lr": 2.4390243902439027e-06, "epoch": 0.06116207951070336, "percentage": 1.22, "elapsed_time": "1:28:42", "remaining_time": "4 days, 23:22:34"}
|
7 |
+
{"current_steps": 70, "total_steps": 4905, "loss": 0.4696, "lr": 2.845528455284553e-06, "epoch": 0.07135575942915393, "percentage": 1.43, "elapsed_time": "1:43:37", "remaining_time": "4 days, 23:17:45"}
|
8 |
+
{"current_steps": 80, "total_steps": 4905, "loss": 0.4324, "lr": 3.2520325203252037e-06, "epoch": 0.08154943934760449, "percentage": 1.63, "elapsed_time": "1:58:25", "remaining_time": "4 days, 23:02:19"}
|
9 |
+
{"current_steps": 90, "total_steps": 4905, "loss": 0.389, "lr": 3.6585365853658537e-06, "epoch": 0.09174311926605505, "percentage": 1.83, "elapsed_time": "2:13:09", "remaining_time": "4 days, 22:43:40"}
|
10 |
+
{"current_steps": 100, "total_steps": 4905, "loss": 0.352, "lr": 4.0650406504065046e-06, "epoch": 0.1019367991845056, "percentage": 2.04, "elapsed_time": "2:28:07", "remaining_time": "4 days, 22:37:45"}
|
11 |
+
{"current_steps": 110, "total_steps": 4905, "loss": 0.3062, "lr": 4.471544715447155e-06, "epoch": 0.11213047910295616, "percentage": 2.24, "elapsed_time": "2:42:58", "remaining_time": "4 days, 22:24:25"}
|
12 |
+
{"current_steps": 120, "total_steps": 4905, "loss": 0.2818, "lr": 4.8780487804878055e-06, "epoch": 0.12232415902140673, "percentage": 2.45, "elapsed_time": "2:57:43", "remaining_time": "4 days, 22:06:34"}
|
13 |
+
{"current_steps": 130, "total_steps": 4905, "loss": 0.2502, "lr": 5.2845528455284555e-06, "epoch": 0.1325178389398573, "percentage": 2.65, "elapsed_time": "3:12:55", "remaining_time": "4 days, 22:06:32"}
|
14 |
+
{"current_steps": 140, "total_steps": 4905, "loss": 0.2127, "lr": 5.691056910569106e-06, "epoch": 0.14271151885830785, "percentage": 2.85, "elapsed_time": "3:27:23", "remaining_time": "4 days, 21:38:57"}
|
15 |
+
{"current_steps": 150, "total_steps": 4905, "loss": 0.1878, "lr": 6.0975609756097564e-06, "epoch": 0.1529051987767584, "percentage": 3.06, "elapsed_time": "3:42:02", "remaining_time": "4 days, 21:18:57"}
|
16 |
+
{"current_steps": 160, "total_steps": 4905, "loss": 0.2214, "lr": 6.504065040650407e-06, "epoch": 0.16309887869520898, "percentage": 3.26, "elapsed_time": "3:56:51", "remaining_time": "4 days, 21:04:23"}
|
17 |
+
{"current_steps": 170, "total_steps": 4905, "loss": 0.1763, "lr": 6.910569105691057e-06, "epoch": 0.17329255861365953, "percentage": 3.47, "elapsed_time": "4:11:11", "remaining_time": "4 days, 20:36:38"}
|
18 |
+
{"current_steps": 180, "total_steps": 4905, "loss": 0.168, "lr": 7.317073170731707e-06, "epoch": 0.1834862385321101, "percentage": 3.67, "elapsed_time": "4:25:58", "remaining_time": "4 days, 20:21:58"}
|
19 |
+
{"current_steps": 190, "total_steps": 4905, "loss": 0.1535, "lr": 7.723577235772358e-06, "epoch": 0.19367991845056065, "percentage": 3.87, "elapsed_time": "4:40:21", "remaining_time": "4 days, 19:57:22"}
|
20 |
+
{"current_steps": 200, "total_steps": 4905, "loss": 0.1369, "lr": 8.130081300813009e-06, "epoch": 0.2038735983690112, "percentage": 4.08, "elapsed_time": "4:55:09", "remaining_time": "4 days, 19:43:24"}
|
21 |
+
{"current_steps": 210, "total_steps": 4905, "loss": 0.1438, "lr": 8.536585365853658e-06, "epoch": 0.21406727828746178, "percentage": 4.28, "elapsed_time": "5:09:22", "remaining_time": "4 days, 19:16:45"}
|
22 |
+
{"current_steps": 220, "total_steps": 4905, "loss": 0.1266, "lr": 8.94308943089431e-06, "epoch": 0.22426095820591233, "percentage": 4.49, "elapsed_time": "5:24:01", "remaining_time": "4 days, 19:00:19"}
|
23 |
+
{"current_steps": 230, "total_steps": 4905, "loss": 0.128, "lr": 9.34959349593496e-06, "epoch": 0.2344546381243629, "percentage": 4.69, "elapsed_time": "5:38:51", "remaining_time": "4 days, 18:47:48"}
|
24 |
+
{"current_steps": 240, "total_steps": 4905, "loss": 0.1287, "lr": 9.756097560975611e-06, "epoch": 0.24464831804281345, "percentage": 4.89, "elapsed_time": "5:53:16", "remaining_time": "4 days, 18:26:52"}
|
25 |
+
{"current_steps": 250, "total_steps": 4905, "loss": 0.101, "lr": 9.999981812456187e-06, "epoch": 0.254841997961264, "percentage": 5.1, "elapsed_time": "6:08:14", "remaining_time": "4 days, 18:16:46"}
|
26 |
+
{"current_steps": 260, "total_steps": 4905, "loss": 0.118, "lr": 9.99977720410784e-06, "epoch": 0.2650356778797146, "percentage": 5.3, "elapsed_time": "6:22:47", "remaining_time": "4 days, 17:58:47"}
|
27 |
+
{"current_steps": 270, "total_steps": 4905, "loss": 0.099, "lr": 9.999345262315669e-06, "epoch": 0.27522935779816515, "percentage": 5.5, "elapsed_time": "6:37:31", "remaining_time": "4 days, 17:44:06"}
|
28 |
+
{"current_steps": 280, "total_steps": 4905, "loss": 0.1091, "lr": 9.99868600671951e-06, "epoch": 0.2854230377166157, "percentage": 5.71, "elapsed_time": "6:51:58", "remaining_time": "4 days, 17:25:01"}
|
29 |
+
{"current_steps": 290, "total_steps": 4905, "loss": 0.0988, "lr": 9.99779946729487e-06, "epoch": 0.29561671763506625, "percentage": 5.91, "elapsed_time": "7:06:46", "remaining_time": "4 days, 17:11:38"}
|
30 |
+
{"current_steps": 300, "total_steps": 4905, "loss": 0.0814, "lr": 9.99668568435156e-06, "epoch": 0.3058103975535168, "percentage": 6.12, "elapsed_time": "7:21:23", "remaining_time": "4 days, 16:55:24"}
|
31 |
+
{"current_steps": 310, "total_steps": 4905, "loss": 0.0847, "lr": 9.995344708531853e-06, "epoch": 0.3160040774719674, "percentage": 6.32, "elapsed_time": "7:36:12", "remaining_time": "4 days, 16:42:14"}
|
32 |
+
{"current_steps": 320, "total_steps": 4905, "loss": 0.0739, "lr": 9.9937766008082e-06, "epoch": 0.32619775739041795, "percentage": 6.52, "elapsed_time": "7:51:05", "remaining_time": "4 days, 16:29:51"}
|
33 |
+
{"current_steps": 330, "total_steps": 4905, "loss": 0.0732, "lr": 9.991981432480444e-06, "epoch": 0.3363914373088685, "percentage": 6.73, "elapsed_time": "8:05:23", "remaining_time": "4 days, 16:09:16"}
|
34 |
+
{"current_steps": 340, "total_steps": 4905, "loss": 0.0762, "lr": 9.989959285172578e-06, "epoch": 0.34658511722731905, "percentage": 6.93, "elapsed_time": "8:19:52", "remaining_time": "4 days, 15:51:27"}
|
35 |
+
{"current_steps": 350, "total_steps": 4905, "loss": 0.0634, "lr": 9.987710250829048e-06, "epoch": 0.3567787971457696, "percentage": 7.14, "elapsed_time": "8:34:16", "remaining_time": "4 days, 15:32:51"}
|
36 |
+
{"current_steps": 360, "total_steps": 4905, "loss": 0.0558, "lr": 9.985234431710547e-06, "epoch": 0.3669724770642202, "percentage": 7.34, "elapsed_time": "8:49:01", "remaining_time": "4 days, 15:18:57"}
|
37 |
+
{"current_steps": 370, "total_steps": 4905, "loss": 0.0625, "lr": 9.982531940389395e-06, "epoch": 0.37716615698267075, "percentage": 7.54, "elapsed_time": "9:03:48", "remaining_time": "4 days, 15:05:15"}
|
38 |
+
{"current_steps": 380, "total_steps": 4905, "loss": 0.0596, "lr": 9.979602899744396e-06, "epoch": 0.3873598369011213, "percentage": 7.75, "elapsed_time": "9:18:32", "remaining_time": "4 days, 14:50:58"}
|
39 |
+
{"current_steps": 390, "total_steps": 4905, "loss": 0.0708, "lr": 9.976447442955267e-06, "epoch": 0.39755351681957185, "percentage": 7.95, "elapsed_time": "9:32:46", "remaining_time": "4 days, 14:31:01"}
|
40 |
+
{"current_steps": 400, "total_steps": 4905, "loss": 0.0575, "lr": 9.973065713496567e-06, "epoch": 0.4077471967380224, "percentage": 8.15, "elapsed_time": "9:47:37", "remaining_time": "4 days, 14:18:04"}
|
41 |
+
{"current_steps": 410, "total_steps": 4905, "loss": 0.0513, "lr": 9.969457865131194e-06, "epoch": 0.417940876656473, "percentage": 8.36, "elapsed_time": "10:02:23", "remaining_time": "4 days, 14:04:11"}
|
42 |
+
{"current_steps": 420, "total_steps": 4905, "loss": 0.0535, "lr": 9.965624061903372e-06, "epoch": 0.42813455657492355, "percentage": 8.56, "elapsed_time": "10:17:10", "remaining_time": "4 days, 13:50:33"}
|
43 |
+
{"current_steps": 430, "total_steps": 4905, "loss": 0.039, "lr": 9.961564478131211e-06, "epoch": 0.4383282364933741, "percentage": 8.77, "elapsed_time": "10:32:05", "remaining_time": "4 days, 13:38:05"}
|
44 |
+
{"current_steps": 440, "total_steps": 4905, "loss": 0.0538, "lr": 9.957279298398763e-06, "epoch": 0.44852191641182465, "percentage": 8.97, "elapsed_time": "10:46:54", "remaining_time": "4 days, 13:24:38"}
|
45 |
+
{"current_steps": 450, "total_steps": 4905, "loss": 0.0559, "lr": 9.952768717547647e-06, "epoch": 0.45871559633027525, "percentage": 9.17, "elapsed_time": "11:01:52", "remaining_time": "4 days, 13:12:31"}
|
46 |
+
{"current_steps": 460, "total_steps": 4905, "loss": 0.0449, "lr": 9.948032940668177e-06, "epoch": 0.4689092762487258, "percentage": 9.38, "elapsed_time": "11:16:43", "remaining_time": "4 days, 12:59:09"}
|
47 |
+
{"current_steps": 470, "total_steps": 4905, "loss": 0.0464, "lr": 9.94307218309004e-06, "epoch": 0.47910295616717635, "percentage": 9.58, "elapsed_time": "11:31:18", "remaining_time": "4 days, 12:43:15"}
|
48 |
+
{"current_steps": 480, "total_steps": 4905, "loss": 0.0434, "lr": 9.937886670372507e-06, "epoch": 0.4892966360856269, "percentage": 9.79, "elapsed_time": "11:45:41", "remaining_time": "4 days, 12:25:31"}
|
49 |
+
{"current_steps": 490, "total_steps": 4905, "loss": 0.0403, "lr": 9.932476638294179e-06, "epoch": 0.49949031600407745, "percentage": 9.99, "elapsed_time": "12:00:04", "remaining_time": "4 days, 12:08:03"}
|
50 |
+
{"current_steps": 500, "total_steps": 4905, "loss": 0.0438, "lr": 9.926842332842262e-06, "epoch": 0.509683995922528, "percentage": 10.19, "elapsed_time": "12:14:52", "remaining_time": "4 days, 11:54:16"}
|
51 |
+
{"current_steps": 510, "total_steps": 4905, "loss": 0.0407, "lr": 9.92098401020138e-06, "epoch": 0.5198776758409785, "percentage": 10.4, "elapsed_time": "12:29:29", "remaining_time": "4 days, 11:38:55"}
|
52 |
+
{"current_steps": 520, "total_steps": 4905, "loss": 0.0468, "lr": 9.914901936741937e-06, "epoch": 0.5300713557594292, "percentage": 10.6, "elapsed_time": "12:44:48", "remaining_time": "4 days, 11:29:23"}
|
53 |
+
{"current_steps": 530, "total_steps": 4905, "loss": 0.0403, "lr": 9.908596389007997e-06, "epoch": 0.5402650356778798, "percentage": 10.81, "elapsed_time": "12:59:40", "remaining_time": "4 days, 11:16:01"}
|
54 |
+
{"current_steps": 540, "total_steps": 4905, "loss": 0.0313, "lr": 9.902067653704712e-06, "epoch": 0.5504587155963303, "percentage": 11.01, "elapsed_time": "13:14:39", "remaining_time": "4 days, 11:03:29"}
|
55 |
+
{"current_steps": 550, "total_steps": 4905, "loss": 0.0343, "lr": 9.895316027685284e-06, "epoch": 0.5606523955147809, "percentage": 11.21, "elapsed_time": "13:29:35", "remaining_time": "4 days, 10:50:32"}
|
56 |
+
{"current_steps": 560, "total_steps": 4905, "loss": 0.046, "lr": 9.888341817937471e-06, "epoch": 0.5708460754332314, "percentage": 11.42, "elapsed_time": "13:44:38", "remaining_time": "4 days, 10:38:19"}
|
57 |
+
{"current_steps": 570, "total_steps": 4905, "loss": 0.0403, "lr": 9.88114534156963e-06, "epoch": 0.581039755351682, "percentage": 11.62, "elapsed_time": "13:59:32", "remaining_time": "4 days, 10:24:56"}
|
58 |
+
{"current_steps": 580, "total_steps": 4905, "loss": 0.0291, "lr": 9.873726925796287e-06, "epoch": 0.5912334352701325, "percentage": 11.82, "elapsed_time": "14:14:42", "remaining_time": "4 days, 10:13:31"}
|
59 |
+
{"current_steps": 590, "total_steps": 4905, "loss": 0.0352, "lr": 9.866086907923278e-06, "epoch": 0.601427115188583, "percentage": 12.03, "elapsed_time": "14:29:44", "remaining_time": "4 days, 10:00:52"}
|
60 |
+
{"current_steps": 600, "total_steps": 4905, "loss": 0.0271, "lr": 9.858225635332397e-06, "epoch": 0.6116207951070336, "percentage": 12.23, "elapsed_time": "14:44:21", "remaining_time": "4 days, 9:45:12"}
|
61 |
+
{"current_steps": 610, "total_steps": 4905, "loss": 0.0336, "lr": 9.850143465465603e-06, "epoch": 0.6218144750254841, "percentage": 12.44, "elapsed_time": "14:59:03", "remaining_time": "4 days, 9:30:18"}
|
62 |
+
{"current_steps": 620, "total_steps": 4905, "loss": 0.0335, "lr": 9.841840765808772e-06, "epoch": 0.6320081549439348, "percentage": 12.64, "elapsed_time": "15:14:01", "remaining_time": "4 days, 9:17:06"}
|
63 |
+
{"current_steps": 630, "total_steps": 4905, "loss": 0.0405, "lr": 9.833317913874991e-06, "epoch": 0.6422018348623854, "percentage": 12.84, "elapsed_time": "15:28:51", "remaining_time": "4 days, 9:02:55"}
|
64 |
+
{"current_steps": 640, "total_steps": 4905, "loss": 0.028, "lr": 9.82457529718738e-06, "epoch": 0.6523955147808359, "percentage": 13.05, "elapsed_time": "15:43:36", "remaining_time": "4 days, 8:48:15"}
|
65 |
+
{"current_steps": 650, "total_steps": 4905, "loss": 0.0288, "lr": 9.815613313261485e-06, "epoch": 0.6625891946992865, "percentage": 13.25, "elapsed_time": "15:57:53", "remaining_time": "4 days, 8:30:31"}
|
66 |
+
{"current_steps": 660, "total_steps": 4905, "loss": 0.0265, "lr": 9.8064323695872e-06, "epoch": 0.672782874617737, "percentage": 13.46, "elapsed_time": "16:12:29", "remaining_time": "4 days, 8:14:51"}
|
67 |
+
{"current_steps": 670, "total_steps": 4905, "loss": 0.0229, "lr": 9.79703288361023e-06, "epoch": 0.6829765545361876, "percentage": 13.66, "elapsed_time": "16:26:32", "remaining_time": "4 days, 7:55:50"}
|
68 |
+
{"current_steps": 680, "total_steps": 4905, "loss": 0.0325, "lr": 9.787415282713126e-06, "epoch": 0.6931702344546381, "percentage": 13.86, "elapsed_time": "16:41:04", "remaining_time": "4 days, 7:39:57"}
|
69 |
+
{"current_steps": 690, "total_steps": 4905, "loss": 0.0248, "lr": 9.777580004195838e-06, "epoch": 0.7033639143730887, "percentage": 14.07, "elapsed_time": "16:55:49", "remaining_time": "4 days, 7:25:22"}
|
70 |
+
{"current_steps": 700, "total_steps": 4905, "loss": 0.0349, "lr": 9.767527495255842e-06, "epoch": 0.7135575942915392, "percentage": 14.27, "elapsed_time": "17:11:04", "remaining_time": "4 days, 7:13:47"}
|
71 |
+
{"current_steps": 710, "total_steps": 4905, "loss": 0.0266, "lr": 9.757258212967799e-06, "epoch": 0.7237512742099899, "percentage": 14.48, "elapsed_time": "17:25:45", "remaining_time": "4 days, 6:58:46"}
|
72 |
+
{"current_steps": 720, "total_steps": 4905, "loss": 0.0266, "lr": 9.746772624262775e-06, "epoch": 0.7339449541284404, "percentage": 14.68, "elapsed_time": "17:40:24", "remaining_time": "4 days, 6:43:39"}
|
73 |
+
{"current_steps": 730, "total_steps": 4905, "loss": 0.0222, "lr": 9.736071205907017e-06, "epoch": 0.744138634046891, "percentage": 14.88, "elapsed_time": "17:54:55", "remaining_time": "4 days, 6:27:41"}
|
74 |
+
{"current_steps": 740, "total_steps": 4905, "loss": 0.0248, "lr": 9.72515444448026e-06, "epoch": 0.7543323139653415, "percentage": 15.09, "elapsed_time": "18:09:55", "remaining_time": "4 days, 6:14:28"}
|
75 |
+
{"current_steps": 750, "total_steps": 4905, "loss": 0.0217, "lr": 9.714022836353618e-06, "epoch": 0.764525993883792, "percentage": 15.29, "elapsed_time": "18:24:29", "remaining_time": "4 days, 5:58:53"}
|
76 |
+
{"current_steps": 760, "total_steps": 4905, "loss": 0.0192, "lr": 9.702676887667007e-06, "epoch": 0.7747196738022426, "percentage": 15.49, "elapsed_time": "18:39:10", "remaining_time": "4 days, 5:43:56"}
|
77 |
+
{"current_steps": 770, "total_steps": 4905, "loss": 0.022, "lr": 9.69111711430613e-06, "epoch": 0.7849133537206932, "percentage": 15.7, "elapsed_time": "18:54:05", "remaining_time": "4 days, 5:30:12"}
|
78 |
+
{"current_steps": 780, "total_steps": 4905, "loss": 0.0255, "lr": 9.679344041879026e-06, "epoch": 0.7951070336391437, "percentage": 15.9, "elapsed_time": "19:08:43", "remaining_time": "4 days, 5:15:00"}
|
79 |
+
{"current_steps": 790, "total_steps": 4905, "loss": 0.032, "lr": 9.66735820569217e-06, "epoch": 0.8053007135575942, "percentage": 16.11, "elapsed_time": "19:23:18", "remaining_time": "4 days, 4:59:32"}
|
80 |
+
{"current_steps": 800, "total_steps": 4905, "loss": 0.0247, "lr": 9.655160150726126e-06, "epoch": 0.8154943934760448, "percentage": 16.31, "elapsed_time": "19:38:19", "remaining_time": "4 days, 4:46:14"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cfb7637d619ef70c5c5b817601dc5ea1695d82d34f73976ee7411413d46822a
|
3 |
+
size 7825
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|