Upload folder using huggingface_hub
Browse files- LICENSE +202 -0
- README.md +109 -40
- config.json +39 -0
- generation_config.json +12 -0
- merges.txt +0 -0
- model-00001-of-00016.safetensors +3 -0
- model-00002-of-00016.safetensors +3 -0
- model-00003-of-00016.safetensors +3 -0
- model-00004-of-00016.safetensors +3 -0
- model-00005-of-00016.safetensors +3 -0
- model-00006-of-00016.safetensors +3 -0
- model-00007-of-00016.safetensors +3 -0
- model-00008-of-00016.safetensors +3 -0
- model-00009-of-00016.safetensors +3 -0
- model-00010-of-00016.safetensors +3 -0
- model-00011-of-00016.safetensors +3 -0
- model-00012-of-00016.safetensors +3 -0
- model-00013-of-00016.safetensors +3 -0
- model-00014-of-00016.safetensors +3 -0
- model-00015-of-00016.safetensors +3 -0
- model-00016-of-00016.safetensors +3 -0
- model.safetensors.index.json +0 -0
- qwen3coder_tool_parser.py +675 -0
- tokenizer.json +0 -0
- tokenizer_config.json +239 -0
- vocab.json +0 -0
LICENSE
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Apache License
|
3 |
+
Version 2.0, January 2004
|
4 |
+
http://www.apache.org/licenses/
|
5 |
+
|
6 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7 |
+
|
8 |
+
1. Definitions.
|
9 |
+
|
10 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
11 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
12 |
+
|
13 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14 |
+
the copyright owner that is granting the License.
|
15 |
+
|
16 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
17 |
+
other entities that control, are controlled by, or are under common
|
18 |
+
control with that entity. For the purposes of this definition,
|
19 |
+
"control" means (i) the power, direct or indirect, to cause the
|
20 |
+
direction or management of such entity, whether by contract or
|
21 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23 |
+
|
24 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25 |
+
exercising permissions granted by this License.
|
26 |
+
|
27 |
+
"Source" form shall mean the preferred form for making modifications,
|
28 |
+
including but not limited to software source code, documentation
|
29 |
+
source, and configuration files.
|
30 |
+
|
31 |
+
"Object" form shall mean any form resulting from mechanical
|
32 |
+
transformation or translation of a Source form, including but
|
33 |
+
not limited to compiled object code, generated documentation,
|
34 |
+
and conversions to other media types.
|
35 |
+
|
36 |
+
"Work" shall mean the work of authorship, whether in Source or
|
37 |
+
Object form, made available under the License, as indicated by a
|
38 |
+
copyright notice that is included in or attached to the work
|
39 |
+
(an example is provided in the Appendix below).
|
40 |
+
|
41 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42 |
+
form, that is based on (or derived from) the Work and for which the
|
43 |
+
editorial revisions, annotations, elaborations, or other modifications
|
44 |
+
represent, as a whole, an original work of authorship. For the purposes
|
45 |
+
of this License, Derivative Works shall not include works that remain
|
46 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47 |
+
the Work and Derivative Works thereof.
|
48 |
+
|
49 |
+
"Contribution" shall mean any work of authorship, including
|
50 |
+
the original version of the Work and any modifications or additions
|
51 |
+
to that Work or Derivative Works thereof, that is intentionally
|
52 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
55 |
+
means any form of electronic, verbal, or written communication sent
|
56 |
+
to the Licensor or its representatives, including but not limited to
|
57 |
+
communication on electronic mailing lists, source code control systems,
|
58 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
59 |
+
Licensor for the purpose of discussing and improving the Work, but
|
60 |
+
excluding communication that is conspicuously marked or otherwise
|
61 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
62 |
+
|
63 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64 |
+
on behalf of whom a Contribution has been received by Licensor and
|
65 |
+
subsequently incorporated within the Work.
|
66 |
+
|
67 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68 |
+
this License, each Contributor hereby grants to You a perpetual,
|
69 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70 |
+
copyright license to reproduce, prepare Derivative Works of,
|
71 |
+
publicly display, publicly perform, sublicense, and distribute the
|
72 |
+
Work and such Derivative Works in Source or Object form.
|
73 |
+
|
74 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77 |
+
(except as stated in this section) patent license to make, have made,
|
78 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79 |
+
where such license applies only to those patent claims licensable
|
80 |
+
by such Contributor that are necessarily infringed by their
|
81 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
82 |
+
with the Work to which such Contribution(s) was submitted. If You
|
83 |
+
institute patent litigation against any entity (including a
|
84 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85 |
+
or a Contribution incorporated within the Work constitutes direct
|
86 |
+
or contributory patent infringement, then any patent licenses
|
87 |
+
granted to You under this License for that Work shall terminate
|
88 |
+
as of the date such litigation is filed.
|
89 |
+
|
90 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
91 |
+
Work or Derivative Works thereof in any medium, with or without
|
92 |
+
modifications, and in Source or Object form, provided that You
|
93 |
+
meet the following conditions:
|
94 |
+
|
95 |
+
(a) You must give any other recipients of the Work or
|
96 |
+
Derivative Works a copy of this License; and
|
97 |
+
|
98 |
+
(b) You must cause any modified files to carry prominent notices
|
99 |
+
stating that You changed the files; and
|
100 |
+
|
101 |
+
(c) You must retain, in the Source form of any Derivative Works
|
102 |
+
that You distribute, all copyright, patent, trademark, and
|
103 |
+
attribution notices from the Source form of the Work,
|
104 |
+
excluding those notices that do not pertain to any part of
|
105 |
+
the Derivative Works; and
|
106 |
+
|
107 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108 |
+
distribution, then any Derivative Works that You distribute must
|
109 |
+
include a readable copy of the attribution notices contained
|
110 |
+
within such NOTICE file, excluding those notices that do not
|
111 |
+
pertain to any part of the Derivative Works, in at least one
|
112 |
+
of the following places: within a NOTICE text file distributed
|
113 |
+
as part of the Derivative Works; within the Source form or
|
114 |
+
documentation, if provided along with the Derivative Works; or,
|
115 |
+
within a display generated by the Derivative Works, if and
|
116 |
+
wherever such third-party notices normally appear. The contents
|
117 |
+
of the NOTICE file are for informational purposes only and
|
118 |
+
do not modify the License. You may add Your own attribution
|
119 |
+
notices within Derivative Works that You distribute, alongside
|
120 |
+
or as an addendum to the NOTICE text from the Work, provided
|
121 |
+
that such additional attribution notices cannot be construed
|
122 |
+
as modifying the License.
|
123 |
+
|
124 |
+
You may add Your own copyright statement to Your modifications and
|
125 |
+
may provide additional or different license terms and conditions
|
126 |
+
for use, reproduction, or distribution of Your modifications, or
|
127 |
+
for any such Derivative Works as a whole, provided Your use,
|
128 |
+
reproduction, and distribution of the Work otherwise complies with
|
129 |
+
the conditions stated in this License.
|
130 |
+
|
131 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132 |
+
any Contribution intentionally submitted for inclusion in the Work
|
133 |
+
by You to the Licensor shall be under the terms and conditions of
|
134 |
+
this License, without any additional terms or conditions.
|
135 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136 |
+
the terms of any separate license agreement you may have executed
|
137 |
+
with Licensor regarding such Contributions.
|
138 |
+
|
139 |
+
6. Trademarks. This License does not grant permission to use the trade
|
140 |
+
names, trademarks, service marks, or product names of the Licensor,
|
141 |
+
except as required for reasonable and customary use in describing the
|
142 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
143 |
+
|
144 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145 |
+
agreed to in writing, Licensor provides the Work (and each
|
146 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148 |
+
implied, including, without limitation, any warranties or conditions
|
149 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151 |
+
appropriateness of using or redistributing the Work and assume any
|
152 |
+
risks associated with Your exercise of permissions under this License.
|
153 |
+
|
154 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
155 |
+
whether in tort (including negligence), contract, or otherwise,
|
156 |
+
unless required by applicable law (such as deliberate and grossly
|
157 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158 |
+
liable to You for damages, including any direct, indirect, special,
|
159 |
+
incidental, or consequential damages of any character arising as a
|
160 |
+
result of this License or out of the use or inability to use the
|
161 |
+
Work (including but not limited to damages for loss of goodwill,
|
162 |
+
work stoppage, computer failure or malfunction, or any and all
|
163 |
+
other commercial damages or losses), even if such Contributor
|
164 |
+
has been advised of the possibility of such damages.
|
165 |
+
|
166 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
168 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169 |
+
or other liability obligations and/or rights consistent with this
|
170 |
+
License. However, in accepting such obligations, You may act only
|
171 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172 |
+
of any other Contributor, and only if You agree to indemnify,
|
173 |
+
defend, and hold each Contributor harmless for any liability
|
174 |
+
incurred by, or claims asserted against, such Contributor by reason
|
175 |
+
of your accepting any such warranty or additional liability.
|
176 |
+
|
177 |
+
END OF TERMS AND CONDITIONS
|
178 |
+
|
179 |
+
APPENDIX: How to apply the Apache License to your work.
|
180 |
+
|
181 |
+
To apply the Apache License to your work, attach the following
|
182 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183 |
+
replaced with your own identifying information. (Don't include
|
184 |
+
the brackets!) The text should be enclosed in the appropriate
|
185 |
+
comment syntax for the file format. We also recommend that a
|
186 |
+
file or class name and description of purpose be included on the
|
187 |
+
same "printed page" as the copyright notice for easier
|
188 |
+
identification within third-party archives.
|
189 |
+
|
190 |
+
Copyright 2024 Alibaba Cloud
|
191 |
+
|
192 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193 |
+
you may not use this file except in compliance with the License.
|
194 |
+
You may obtain a copy of the License at
|
195 |
+
|
196 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
197 |
+
|
198 |
+
Unless required by applicable law or agreed to in writing, software
|
199 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201 |
+
See the License for the specific language governing permissions and
|
202 |
+
limitations under the License.
|
README.md
CHANGED
@@ -1,33 +1,56 @@
|
|
1 |
---
|
|
|
2 |
license: apache-2.0
|
3 |
-
|
4 |
-
|
|
|
5 |
|
|
|
|
|
|
|
|
|
6 |
|
|
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
9 |
|
|
|
10 |
|
11 |
-
## Model
|
12 |
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
|
17 |
|
|
|
18 |
|
19 |
-
**vLLM usage**
|
20 |
|
21 |
-
|
22 |
-
vllm serve Intel/Qwen3-Coder-30B-A3B-Instruct-int4-AutoRound --tensor-parallel-size 4 --max-model-len 65536
|
23 |
-
~~~
|
24 |
|
25 |
-
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
29 |
|
30 |
-
model_name = "
|
31 |
|
32 |
# load the tokenizer and the model
|
33 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
@@ -59,39 +82,85 @@ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
|
59 |
content = tokenizer.decode(output_ids, skip_special_tokens=True)
|
60 |
|
61 |
print("content:", content)
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
-
|
|
|
82 |
|
83 |
-
|
84 |
|
85 |
-
|
86 |
|
87 |
-
|
|
|
88 |
|
89 |
-
|
90 |
|
91 |
-
The license on this model does not constitute legal advice. We are not responsible for the actions of third parties who use this model. Please consult an attorney before using this model for commercial purposes.
|
92 |
|
93 |
-
|
94 |
|
95 |
-
|
96 |
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
library_name: transformers
|
3 |
license: apache-2.0
|
4 |
+
license_link: https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct/blob/main/LICENSE
|
5 |
+
pipeline_tag: text-generation
|
6 |
+
---
|
7 |
|
8 |
+
# Qwen3-Coder-30B-A3B-Instruct
|
9 |
+
<a href="https://chat.qwen.ai/" target="_blank" style="margin: 2px;">
|
10 |
+
<img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
|
11 |
+
</a>
|
12 |
|
13 |
+
## Highlights
|
14 |
|
15 |
+
**Qwen3-Coder** is available in multiple sizes. Today, we're excited to introduce **Qwen3-Coder-30B-A3B-Instruct**. This streamlined model maintains impressive performance and efficiency, featuring the following key enhancements:
|
16 |
+
|
17 |
+
- **Significant Performance** among open models on **Agentic Coding**, **Agentic Browser-Use**, and other foundational coding tasks.
|
18 |
+
- **Long-context Capabilities** with native support for **256K** tokens, extendable up to **1M** tokens using Yarn, optimized for repository-scale understanding.
|
19 |
+
- **Agentic Coding** supporting for most platform such as **Qwen Code**, **CLINE**, featuring a specially designed function call format.
|
20 |
|
21 |
+

|
22 |
|
23 |
+
## Model Overview
|
24 |
|
25 |
+
**Qwen3-Coder-30B-A3B-Instruct** has the following features:
|
26 |
+
- Type: Causal Language Models
|
27 |
+
- Training Stage: Pretraining & Post-training
|
28 |
+
- Number of Parameters: 30.5B in total and 3.3B activated
|
29 |
+
- Number of Layers: 48
|
30 |
+
- Number of Attention Heads (GQA): 32 for Q and 4 for KV
|
31 |
+
- Number of Experts: 128
|
32 |
+
- Number of Activated Experts: 8
|
33 |
+
- Context Length: **262,144 natively**.
|
34 |
|
35 |
+
**NOTE: This model supports only non-thinking mode and does not generate ``<think></think>`` blocks in its output. Meanwhile, specifying `enable_thinking=False` is no longer required.**
|
36 |
|
37 |
+
For more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https://qwenlm.github.io/blog/qwen3-coder/), [GitHub](https://github.com/QwenLM/Qwen3-Coder), and [Documentation](https://qwen.readthedocs.io/en/latest/).
|
38 |
|
|
|
39 |
|
40 |
+
## Quickstart
|
|
|
|
|
41 |
|
42 |
+
We advise you to use the latest version of `transformers`.
|
43 |
|
44 |
+
With `transformers<4.51.0`, you will encounter the following error:
|
45 |
+
```
|
46 |
+
KeyError: 'qwen3_moe'
|
47 |
+
```
|
48 |
+
|
49 |
+
The following contains a code snippet illustrating how to use the model generate content based on given inputs.
|
50 |
+
```python
|
51 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
52 |
|
53 |
+
model_name = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
|
54 |
|
55 |
# load the tokenizer and the model
|
56 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
82 |
content = tokenizer.decode(output_ids, skip_special_tokens=True)
|
83 |
|
84 |
print("content:", content)
|
85 |
+
```
|
86 |
+
|
87 |
+
**Note: If you encounter out-of-memory (OOM) issues, consider reducing the context length to a shorter value, such as `32,768`.**
|
88 |
+
|
89 |
+
For local use, applications such as Ollama, LMStudio, MLX-LM, llama.cpp, and KTransformers have also supported Qwen3.
|
90 |
+
|
91 |
+
## Agentic Coding
|
92 |
+
|
93 |
+
Qwen3-Coder excels in tool calling capabilities.
|
94 |
+
|
95 |
+
You can simply define or use any tools as following example.
|
96 |
+
```python
|
97 |
+
# Your tool implementation
|
98 |
+
def square_the_number(num: float) -> dict:
|
99 |
+
return num ** 2
|
100 |
+
|
101 |
+
# Define Tools
|
102 |
+
tools=[
|
103 |
+
{
|
104 |
+
"type":"function",
|
105 |
+
"function":{
|
106 |
+
"name": "square_the_number",
|
107 |
+
"description": "output the square of the number.",
|
108 |
+
"parameters": {
|
109 |
+
"type": "object",
|
110 |
+
"required": ["input_num"],
|
111 |
+
"properties": {
|
112 |
+
'input_num': {
|
113 |
+
'type': 'number',
|
114 |
+
'description': 'input_num is a number that will be squared'
|
115 |
+
}
|
116 |
+
},
|
117 |
+
}
|
118 |
+
}
|
119 |
+
}
|
120 |
+
]
|
121 |
|
122 |
+
import OpenAI
|
123 |
+
# Define LLM
|
124 |
+
client = OpenAI(
|
125 |
+
# Use a custom endpoint compatible with OpenAI API
|
126 |
+
base_url='http://localhost:8000/v1', # api_base
|
127 |
+
api_key="EMPTY"
|
128 |
+
)
|
129 |
+
|
130 |
+
messages = [{'role': 'user', 'content': 'square the number 1024'}]
|
131 |
+
|
132 |
+
completion = client.chat.completions.create(
|
133 |
+
messages=messages,
|
134 |
+
model="Qwen3-Coder-30B-A3B-Instruct",
|
135 |
+
max_tokens=65536,
|
136 |
+
tools=tools,
|
137 |
+
)
|
138 |
|
139 |
+
print(completion.choice[0])
|
140 |
+
```
|
141 |
|
142 |
+
## Best Practices
|
143 |
|
144 |
+
To achieve optimal performance, we recommend the following settings:
|
145 |
|
146 |
+
1. **Sampling Parameters**:
|
147 |
+
- We suggest using `temperature=0.7`, `top_p=0.8`, `top_k=20`, `repetition_penalty=1.05`.
|
148 |
|
149 |
+
2. **Adequate Output Length**: We recommend using an output length of 65,536 tokens for most queries, which is adequate for instruct models.
|
150 |
|
|
|
151 |
|
152 |
+
### Citation
|
153 |
|
154 |
+
If you find our work helpful, feel free to give us a cite.
|
155 |
|
156 |
+
```
|
157 |
+
@misc{qwen3technicalreport,
|
158 |
+
title={Qwen3 Technical Report},
|
159 |
+
author={Qwen Team},
|
160 |
+
year={2025},
|
161 |
+
eprint={2505.09388},
|
162 |
+
archivePrefix={arXiv},
|
163 |
+
primaryClass={cs.CL},
|
164 |
+
url={https://arxiv.org/abs/2505.09388},
|
165 |
+
}
|
166 |
+
```
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Qwen3MoeForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"decoder_sparse_step": 1,
|
7 |
+
"eos_token_id": 151645,
|
8 |
+
"head_dim": 128,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 2048,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 5472,
|
13 |
+
"max_position_embeddings": 262144,
|
14 |
+
"max_window_layers": 28,
|
15 |
+
"mlp_only_layers": [],
|
16 |
+
"model_type": "qwen3_moe",
|
17 |
+
"moe_intermediate_size": 768,
|
18 |
+
"norm_topk_prob": true,
|
19 |
+
"num_attention_heads": 32,
|
20 |
+
"num_experts": 128,
|
21 |
+
"num_experts_per_tok": 8,
|
22 |
+
"num_hidden_layers": 48,
|
23 |
+
"num_key_value_heads": 4,
|
24 |
+
"output_router_logits": false,
|
25 |
+
"qkv_bias": false,
|
26 |
+
"rms_norm_eps": 1e-06,
|
27 |
+
"rope_scaling": null,
|
28 |
+
"rope_theta": 10000000,
|
29 |
+
"router_aux_loss_coef": 0.0,
|
30 |
+
"shared_expert_intermediate_size": 0,
|
31 |
+
"sliding_window": null,
|
32 |
+
"tie_word_embeddings": false,
|
33 |
+
"torch_dtype": "bfloat16",
|
34 |
+
"transformers_version": "4.52.3",
|
35 |
+
"use_cache": true,
|
36 |
+
"use_qk_norm": true,
|
37 |
+
"use_sliding_window": false,
|
38 |
+
"vocab_size": 151936
|
39 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"pad_token_id": 151643,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": [
|
5 |
+
151645,
|
6 |
+
151643
|
7 |
+
],
|
8 |
+
"repetition_penalty": 1.05,
|
9 |
+
"temperature": 0.7,
|
10 |
+
"top_p": 0.8,
|
11 |
+
"top_k": 20
|
12 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model-00001-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8f4ee157c2bdb00521d0f422c08ef16f0f3f5f7e234786d829401d627d2e167
|
3 |
+
size 3998893112
|
model-00002-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cf85f0e5a0ea8d8cd3fd1f411175d811ca735571210873fd42bb5eec0b35406
|
3 |
+
size 3999974192
|
model-00003-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fa5cc233e965c8671d32a335045697fc4317c8d2939d8ca5fb9ef62d0233207
|
3 |
+
size 3997360832
|
model-00004-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c10ff15f08abea0ff60b2fc345dcc34191bc8049d05d3bece6b081a99cbfc4c8
|
3 |
+
size 3999975056
|
model-00005-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65d3701e2354a8cf6e5afa3f84520bf81effcdc29b0c481dbbeb089f80349100
|
3 |
+
size 3999975400
|
model-00006-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c832bf9e527b7b6f3184b561de05f30981bdcfdffe9c69e69064cdaa97071b5
|
3 |
+
size 3999975400
|
model-00007-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71a710c3ff52e85d22ae3f5b10b9a4dd49821c9533460fb1d4bb21593f71b585
|
3 |
+
size 3999975472
|
model-00008-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd2861625e1bd8a212151445072b9d826c9754fd4618f8b83c2857fc0854b778
|
3 |
+
size 3997362064
|
model-00009-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8335225fd61c0fe283a83463a8d9c5c19e211ea8048925b031a088f274eb9c4e
|
3 |
+
size 3999975408
|
model-00010-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:421481c42e8d85891c9d88492d7530d6b6d8b15c23a6db6a9b7438e0b6161330
|
3 |
+
size 3999975400
|
model-00011-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7dd447928ddec67a3ec06d5bc4fd99dde9ac5bdf0b6ab500faca5dfdfb963b4
|
3 |
+
size 3999975408
|
model-00012-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:349a0b0247d244ffef34bc23b403ce756f515242323f024a5f8028c109a798af
|
3 |
+
size 3987924896
|
model-00013-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52e7f570649b27bc35b8cd27ac643d2158c02a5c76078c80a8afc449377b508e
|
3 |
+
size 3999975088
|
model-00014-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b30aed1b55b3f25fbf2c5e81784438d0a08461c9518e26e11987eb10d80ebd7
|
3 |
+
size 3999975400
|
model-00015-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb8b3b13c0519874ed0cbe6832ab57e51476e3821c83f0ffe4ec83a8d23fc5f4
|
3 |
+
size 3999975400
|
model-00016-of-00016.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:778479fa2b8b766ba1c7bfc56a71ed9b060081e0fbc5eb4edafd19a1d6b4c116
|
3 |
+
size 1085307128
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
qwen3coder_tool_parser.py
ADDED
@@ -0,0 +1,675 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SPDX-License-Identifier: Apache-2.0
|
2 |
+
|
3 |
+
import json
|
4 |
+
import re
|
5 |
+
import uuid
|
6 |
+
from collections.abc import Sequence
|
7 |
+
from typing import Union, Optional, Any, List, Dict
|
8 |
+
from enum import Enum
|
9 |
+
|
10 |
+
from vllm.entrypoints.openai.protocol import (
|
11 |
+
ChatCompletionRequest,
|
12 |
+
ChatCompletionToolsParam,
|
13 |
+
DeltaMessage,
|
14 |
+
DeltaToolCall,
|
15 |
+
DeltaFunctionCall,
|
16 |
+
ExtractedToolCallInformation,
|
17 |
+
FunctionCall,
|
18 |
+
ToolCall,
|
19 |
+
)
|
20 |
+
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
|
21 |
+
ToolParser,
|
22 |
+
ToolParserManager,
|
23 |
+
)
|
24 |
+
from vllm.logger import init_logger
|
25 |
+
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
26 |
+
|
27 |
+
logger = init_logger(__name__)
|
28 |
+
|
29 |
+
|
30 |
+
@ToolParserManager.register_module("qwen3_xml")
|
31 |
+
class Qwen3XMLToolParser(ToolParser):
|
32 |
+
def __init__(self, tokenizer: AnyTokenizer):
|
33 |
+
super().__init__(tokenizer)
|
34 |
+
|
35 |
+
self.current_tool_name_sent: bool = False
|
36 |
+
self.prev_tool_call_arr: list[dict] = []
|
37 |
+
self.current_tool_id: int = -1
|
38 |
+
self.streamed_args_for_tool: list[str] = []
|
39 |
+
|
40 |
+
# Sentinel tokens for streaming mode
|
41 |
+
self.tool_call_start_token: str = "<tool_call>"
|
42 |
+
self.tool_call_end_token: str = "</tool_call>"
|
43 |
+
self.tool_call_prefix: str = "<function="
|
44 |
+
self.function_end_token: str = "</function>"
|
45 |
+
self.parameter_prefix: str = "<parameter="
|
46 |
+
self.parameter_end_token: str = "</parameter>"
|
47 |
+
self.is_tool_call_started: bool = False
|
48 |
+
self.failed_count: int = 0
|
49 |
+
|
50 |
+
# Enhanced streaming state - reset for each new message
|
51 |
+
self._reset_streaming_state()
|
52 |
+
|
53 |
+
# Regex patterns
|
54 |
+
self.tool_call_complete_regex = re.compile(
|
55 |
+
r"<tool_call>(.*?)</tool_call>", re.DOTALL
|
56 |
+
)
|
57 |
+
self.tool_call_regex = re.compile(
|
58 |
+
r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL
|
59 |
+
)
|
60 |
+
self.tool_call_function_regex = re.compile(
|
61 |
+
r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL
|
62 |
+
)
|
63 |
+
self.tool_call_parameter_regex = re.compile(
|
64 |
+
r"<parameter=(.*?)</parameter>|<parameter=(.*?)$", re.DOTALL
|
65 |
+
)
|
66 |
+
|
67 |
+
if not self.model_tokenizer:
|
68 |
+
raise ValueError(
|
69 |
+
"The model tokenizer must be passed to the ToolParser "
|
70 |
+
"constructor during construction."
|
71 |
+
)
|
72 |
+
|
73 |
+
self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
|
74 |
+
self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
|
75 |
+
|
76 |
+
if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
|
77 |
+
raise RuntimeError(
|
78 |
+
"Qwen3 XML Tool parser could not locate tool call start/end "
|
79 |
+
"tokens in the tokenizer!"
|
80 |
+
)
|
81 |
+
|
82 |
+
logger.info(f"vLLM Successfully import tool parser {self.__class__.__name__} !")
|
83 |
+
|
84 |
+
def _generate_tool_call_id(self) -> str:
|
85 |
+
"""Generate a unique tool call ID."""
|
86 |
+
return f"call_{uuid.uuid4().hex[:24]}"
|
87 |
+
|
88 |
+
def _reset_streaming_state(self):
|
89 |
+
"""Reset all streaming state."""
|
90 |
+
self.current_tool_index = 0
|
91 |
+
self.is_tool_call_started = False
|
92 |
+
self.header_sent = False
|
93 |
+
self.current_tool_id = None
|
94 |
+
self.current_function_name = None
|
95 |
+
self.current_param_name = None
|
96 |
+
self.current_param_value = ""
|
97 |
+
self.param_count = 0
|
98 |
+
self.in_param = False
|
99 |
+
self.in_function = False
|
100 |
+
self.accumulated_text = ""
|
101 |
+
self.json_started = False
|
102 |
+
self.json_closed = False
|
103 |
+
|
104 |
+
def _parse_xml_function_call(
|
105 |
+
self, function_call_str: str, tools: Optional[list[ChatCompletionToolsParam]]
|
106 |
+
) -> Optional[ToolCall]:
|
107 |
+
def get_arguments_config(func_name: str) -> dict:
|
108 |
+
if tools is None:
|
109 |
+
return {}
|
110 |
+
for config in tools:
|
111 |
+
if not hasattr(config, "type") or not (
|
112 |
+
hasattr(config, "function") and hasattr(config.function, "name")
|
113 |
+
):
|
114 |
+
continue
|
115 |
+
if config.type == "function" and config.function.name == func_name:
|
116 |
+
if not hasattr(config.function, "parameters"):
|
117 |
+
return {}
|
118 |
+
params = config.function.parameters
|
119 |
+
if isinstance(params, dict) and "properties" in params:
|
120 |
+
return params["properties"]
|
121 |
+
elif isinstance(params, dict):
|
122 |
+
return params
|
123 |
+
else:
|
124 |
+
return {}
|
125 |
+
logger.warning(f"Tool '{func_name}' is not defined in the tools list.")
|
126 |
+
return {}
|
127 |
+
|
128 |
+
def convert_param_value(
|
129 |
+
param_value: str, param_name: str, param_config: dict, func_name: str
|
130 |
+
) -> Any:
|
131 |
+
# Handle null value for any type
|
132 |
+
if param_value.lower() == "null":
|
133 |
+
return None
|
134 |
+
|
135 |
+
if param_name not in param_config:
|
136 |
+
if param_config != {}:
|
137 |
+
logger.warning(
|
138 |
+
f"Parsed parameter '{param_name}' is not defined in the tool "
|
139 |
+
f"parameters for tool '{func_name}', directly returning the string value."
|
140 |
+
)
|
141 |
+
return param_value
|
142 |
+
|
143 |
+
if (
|
144 |
+
isinstance(param_config[param_name], dict)
|
145 |
+
and "type" in param_config[param_name]
|
146 |
+
):
|
147 |
+
param_type = str(param_config[param_name]["type"]).strip().lower()
|
148 |
+
else:
|
149 |
+
param_type = "string"
|
150 |
+
if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
|
151 |
+
return param_value
|
152 |
+
elif (
|
153 |
+
param_type.startswith("int")
|
154 |
+
or param_type.startswith("uint")
|
155 |
+
or param_type.startswith("long")
|
156 |
+
or param_type.startswith("short")
|
157 |
+
or param_type.startswith("unsigned")
|
158 |
+
):
|
159 |
+
try:
|
160 |
+
param_value = int(param_value)
|
161 |
+
except:
|
162 |
+
logger.warning(
|
163 |
+
f"Parsed value '{param_value}' of parameter '{param_name}' is not an integer in tool "
|
164 |
+
f"'{func_name}', degenerating to string."
|
165 |
+
)
|
166 |
+
return param_value
|
167 |
+
elif param_type.startswith("num") or param_type.startswith("float"):
|
168 |
+
try:
|
169 |
+
float_param_value = float(param_value)
|
170 |
+
param_value = float_param_value if float_param_value - int(float_param_value) != 0 else int(float_param_value)
|
171 |
+
except:
|
172 |
+
logger.warning(
|
173 |
+
f"Parsed value '{param_value}' of parameter '{param_name}' is not a float in tool "
|
174 |
+
f"'{func_name}', degenerating to string."
|
175 |
+
)
|
176 |
+
return param_value
|
177 |
+
elif param_type in ["boolean", "bool", "binary"]:
|
178 |
+
param_value = param_value.lower()
|
179 |
+
if param_value not in ["true", "false"]:
|
180 |
+
logger.warning(
|
181 |
+
f"Parsed value '{param_value}' of parameter '{param_name}' is not a boolean (`true` of `false`) in tool '{func_name}', degenerating to false."
|
182 |
+
)
|
183 |
+
return param_value == "true"
|
184 |
+
else:
|
185 |
+
if param_type == "object" or param_type.startswith("dict"):
|
186 |
+
try:
|
187 |
+
param_value = json.loads(param_value)
|
188 |
+
return param_value
|
189 |
+
except:
|
190 |
+
logger.warning(
|
191 |
+
f"Parsed value '{param_value}' of parameter '{param_name}' is not a valid JSON object in tool "
|
192 |
+
f"'{func_name}', will try other methods to parse it."
|
193 |
+
)
|
194 |
+
try:
|
195 |
+
param_value = eval(param_value)
|
196 |
+
except:
|
197 |
+
logger.warning(
|
198 |
+
f"Parsed value '{param_value}' of parameter '{param_name}' cannot be converted via Python `eval()` in tool '{func_name}', degenerating to string."
|
199 |
+
)
|
200 |
+
return param_value
|
201 |
+
|
202 |
+
# Extract function name
|
203 |
+
end_index = function_call_str.index(">")
|
204 |
+
function_name = function_call_str[:end_index]
|
205 |
+
param_config = get_arguments_config(function_name)
|
206 |
+
parameters = function_call_str[end_index + 1 :]
|
207 |
+
param_dict = {}
|
208 |
+
for match in self.tool_call_parameter_regex.findall(parameters):
|
209 |
+
match_text = match[0] if match[0] else match[1]
|
210 |
+
idx = match_text.index(">")
|
211 |
+
param_name = match_text[:idx]
|
212 |
+
param_value = str(match_text[idx + 1 :])
|
213 |
+
# Remove prefix and trailing \n
|
214 |
+
if param_value.startswith("\n"):
|
215 |
+
param_value = param_value[1:]
|
216 |
+
if param_value.endswith("\n"):
|
217 |
+
param_value = param_value[:-1]
|
218 |
+
|
219 |
+
param_dict[param_name] = convert_param_value(
|
220 |
+
param_value, param_name, param_config, function_name
|
221 |
+
)
|
222 |
+
return ToolCall(
|
223 |
+
type="function",
|
224 |
+
function=FunctionCall(
|
225 |
+
name=function_name, arguments=json.dumps(param_dict, ensure_ascii=False)
|
226 |
+
),
|
227 |
+
)
|
228 |
+
|
229 |
+
def _get_function_calls(self, model_output: str) -> List[str]:
|
230 |
+
# Find all tool calls
|
231 |
+
matched_ranges = self.tool_call_regex.findall(model_output)
|
232 |
+
raw_tool_calls = [
|
233 |
+
match[0] if match[0] else match[1] for match in matched_ranges
|
234 |
+
]
|
235 |
+
|
236 |
+
# Back-off strategy if no tool_call tags found
|
237 |
+
if len(raw_tool_calls) == 0:
|
238 |
+
raw_tool_calls = [model_output]
|
239 |
+
|
240 |
+
raw_function_calls = []
|
241 |
+
for tool_call in raw_tool_calls:
|
242 |
+
raw_function_calls.extend(self.tool_call_function_regex.findall(tool_call))
|
243 |
+
|
244 |
+
function_calls = [
|
245 |
+
match[0] if match[0] else match[1] for match in raw_function_calls
|
246 |
+
]
|
247 |
+
return function_calls
|
248 |
+
|
249 |
+
def extract_tool_calls(
|
250 |
+
self,
|
251 |
+
model_output: str,
|
252 |
+
request: ChatCompletionRequest,
|
253 |
+
) -> ExtractedToolCallInformation:
|
254 |
+
# Quick check to avoid unnecessary processing
|
255 |
+
if self.tool_call_prefix not in model_output:
|
256 |
+
return ExtractedToolCallInformation(
|
257 |
+
tools_called=False, tool_calls=[], content=model_output
|
258 |
+
)
|
259 |
+
|
260 |
+
try:
|
261 |
+
function_calls = self._get_function_calls(model_output)
|
262 |
+
if len(function_calls) == 0:
|
263 |
+
return ExtractedToolCallInformation(
|
264 |
+
tools_called=False, tool_calls=[], content=model_output
|
265 |
+
)
|
266 |
+
|
267 |
+
tool_calls = [
|
268 |
+
self._parse_xml_function_call(function_call_str, request.tools)
|
269 |
+
for function_call_str in function_calls
|
270 |
+
]
|
271 |
+
|
272 |
+
# Populate prev_tool_call_arr for serving layer to set finish_reason
|
273 |
+
self.prev_tool_call_arr.clear() # Clear previous calls
|
274 |
+
for tool_call in tool_calls:
|
275 |
+
if tool_call:
|
276 |
+
self.prev_tool_call_arr.append(
|
277 |
+
{
|
278 |
+
"name": tool_call.function.name,
|
279 |
+
"arguments": tool_call.function.arguments,
|
280 |
+
}
|
281 |
+
)
|
282 |
+
|
283 |
+
# Extract content before tool calls
|
284 |
+
content_index = model_output.find(self.tool_call_start_token)
|
285 |
+
content_index = (
|
286 |
+
content_index
|
287 |
+
if content_index >= 0
|
288 |
+
else model_output.find(self.tool_call_prefix)
|
289 |
+
)
|
290 |
+
content = model_output[:content_index] # .rstrip()
|
291 |
+
|
292 |
+
return ExtractedToolCallInformation(
|
293 |
+
tools_called=(len(tool_calls) > 0),
|
294 |
+
tool_calls=tool_calls,
|
295 |
+
content=content if content else None,
|
296 |
+
)
|
297 |
+
|
298 |
+
except Exception:
|
299 |
+
logger.exception("Error in extracting tool call from response.")
|
300 |
+
return ExtractedToolCallInformation(
|
301 |
+
tools_called=False, tool_calls=[], content=model_output
|
302 |
+
)
|
303 |
+
|
304 |
+
def extract_tool_calls_streaming(
|
305 |
+
self,
|
306 |
+
previous_text: str,
|
307 |
+
current_text: str,
|
308 |
+
delta_text: str,
|
309 |
+
previous_token_ids: Sequence[int],
|
310 |
+
current_token_ids: Sequence[int],
|
311 |
+
delta_token_ids: Sequence[int],
|
312 |
+
request: ChatCompletionRequest,
|
313 |
+
) -> Union[DeltaMessage, None]:
|
314 |
+
# If no delta text, return None unless it's an EOS token after tool calls
|
315 |
+
if not delta_text:
|
316 |
+
# Check if this is an EOS token after all tool calls are complete
|
317 |
+
# We check for tool calls in the text even if is_tool_call_started is False
|
318 |
+
# because it might have been reset after processing all tools
|
319 |
+
if delta_token_ids and self.tool_call_end_token_id not in delta_token_ids:
|
320 |
+
# Count complete tool calls
|
321 |
+
complete_calls = len(
|
322 |
+
self.tool_call_complete_regex.findall(current_text)
|
323 |
+
)
|
324 |
+
|
325 |
+
# If we have completed tool calls and populated prev_tool_call_arr
|
326 |
+
if complete_calls > 0 and len(self.prev_tool_call_arr) > 0:
|
327 |
+
# Check if all tool calls are closed
|
328 |
+
open_calls = current_text.count(
|
329 |
+
self.tool_call_start_token
|
330 |
+
) - current_text.count(self.tool_call_end_token)
|
331 |
+
if open_calls == 0:
|
332 |
+
# Return empty delta message to allow finish_reason processing
|
333 |
+
return DeltaMessage(content="")
|
334 |
+
elif not self.is_tool_call_started and current_text:
|
335 |
+
# This is a regular content response that's now complete
|
336 |
+
return DeltaMessage(content="")
|
337 |
+
return None
|
338 |
+
|
339 |
+
# Check if this is the first call (reset state if needed)
|
340 |
+
if not previous_text:
|
341 |
+
self._reset_streaming_state()
|
342 |
+
|
343 |
+
# Update accumulated text
|
344 |
+
self.accumulated_text = current_text
|
345 |
+
|
346 |
+
# Check if we need to advance to next tool
|
347 |
+
if self.json_closed and not self.in_function:
|
348 |
+
# Check if this tool call has ended
|
349 |
+
tool_ends = current_text.count(self.tool_call_end_token)
|
350 |
+
if tool_ends > self.current_tool_index:
|
351 |
+
# This tool has ended, advance to next
|
352 |
+
self.current_tool_index += 1
|
353 |
+
self.header_sent = False
|
354 |
+
self.param_count = 0
|
355 |
+
self.json_started = False
|
356 |
+
self.json_closed = False
|
357 |
+
|
358 |
+
# Check if there are more tool calls
|
359 |
+
tool_starts = current_text.count(self.tool_call_start_token)
|
360 |
+
if self.current_tool_index >= tool_starts:
|
361 |
+
# No more tool calls
|
362 |
+
self.is_tool_call_started = False
|
363 |
+
# Continue processing next tool
|
364 |
+
return None
|
365 |
+
|
366 |
+
# Handle normal content before tool calls
|
367 |
+
if not self.is_tool_call_started:
|
368 |
+
# Check if tool call is starting
|
369 |
+
if (
|
370 |
+
self.tool_call_start_token_id in delta_token_ids
|
371 |
+
or self.tool_call_start_token in delta_text
|
372 |
+
):
|
373 |
+
self.is_tool_call_started = True
|
374 |
+
# Return any content before the tool call
|
375 |
+
if self.tool_call_start_token in delta_text:
|
376 |
+
content_before = delta_text[
|
377 |
+
: delta_text.index(self.tool_call_start_token)
|
378 |
+
]
|
379 |
+
if content_before:
|
380 |
+
return DeltaMessage(content=content_before)
|
381 |
+
return None
|
382 |
+
else:
|
383 |
+
# Check if we're between tool calls - skip whitespace
|
384 |
+
if current_text.rstrip().endswith(self.tool_call_end_token):
|
385 |
+
# We just ended a tool call, skip whitespace
|
386 |
+
if delta_text.strip() == "":
|
387 |
+
return None
|
388 |
+
# Normal content, no tool call
|
389 |
+
return DeltaMessage(content=delta_text)
|
390 |
+
|
391 |
+
# Check if we're between tool calls (waiting for next one)
|
392 |
+
# Count tool calls we've seen vs processed
|
393 |
+
tool_starts_count = current_text.count(self.tool_call_start_token)
|
394 |
+
if self.current_tool_index >= tool_starts_count:
|
395 |
+
# We're past all tool calls, shouldn't be here
|
396 |
+
return None
|
397 |
+
|
398 |
+
# We're in a tool call, find the current tool call portion
|
399 |
+
# Need to find the correct tool call based on current_tool_index
|
400 |
+
tool_starts = []
|
401 |
+
idx = 0
|
402 |
+
while True:
|
403 |
+
idx = current_text.find(self.tool_call_start_token, idx)
|
404 |
+
if idx == -1:
|
405 |
+
break
|
406 |
+
tool_starts.append(idx)
|
407 |
+
idx += len(self.tool_call_start_token)
|
408 |
+
|
409 |
+
if self.current_tool_index >= len(tool_starts):
|
410 |
+
# No more tool calls to process yet
|
411 |
+
return None
|
412 |
+
|
413 |
+
tool_start_idx = tool_starts[self.current_tool_index]
|
414 |
+
# Find where this tool call ends (or current position if not ended yet)
|
415 |
+
tool_end_idx = current_text.find(self.tool_call_end_token, tool_start_idx)
|
416 |
+
if tool_end_idx == -1:
|
417 |
+
tool_text = current_text[tool_start_idx:]
|
418 |
+
else:
|
419 |
+
tool_text = current_text[
|
420 |
+
tool_start_idx : tool_end_idx + len(self.tool_call_end_token)
|
421 |
+
]
|
422 |
+
|
423 |
+
# Looking for function header
|
424 |
+
if not self.header_sent:
|
425 |
+
if self.tool_call_prefix in tool_text:
|
426 |
+
func_start = tool_text.find(self.tool_call_prefix) + len(
|
427 |
+
self.tool_call_prefix
|
428 |
+
)
|
429 |
+
func_end = tool_text.find(">", func_start)
|
430 |
+
|
431 |
+
if func_end != -1:
|
432 |
+
# Found complete function name
|
433 |
+
self.current_function_name = tool_text[func_start:func_end]
|
434 |
+
self.current_tool_id = self._generate_tool_call_id()
|
435 |
+
self.header_sent = True
|
436 |
+
self.in_function = True
|
437 |
+
|
438 |
+
# IMPORTANT: Add to prev_tool_call_arr immediately when we detect a tool call
|
439 |
+
# This ensures finish_reason="tool_calls" even if parsing isn't complete
|
440 |
+
already_added = any(
|
441 |
+
tool.get("name") == self.current_function_name
|
442 |
+
for tool in self.prev_tool_call_arr
|
443 |
+
)
|
444 |
+
if not already_added:
|
445 |
+
self.prev_tool_call_arr.append(
|
446 |
+
{
|
447 |
+
"name": self.current_function_name,
|
448 |
+
"arguments": "{}", # Placeholder, will be updated later
|
449 |
+
}
|
450 |
+
)
|
451 |
+
|
452 |
+
# Send header with function info
|
453 |
+
return DeltaMessage(
|
454 |
+
tool_calls=[
|
455 |
+
DeltaToolCall(
|
456 |
+
index=self.current_tool_index,
|
457 |
+
id=self.current_tool_id,
|
458 |
+
function=DeltaFunctionCall(
|
459 |
+
name=self.current_function_name, arguments=""
|
460 |
+
),
|
461 |
+
type="function",
|
462 |
+
)
|
463 |
+
]
|
464 |
+
)
|
465 |
+
return None
|
466 |
+
|
467 |
+
# We've sent header, now handle function body
|
468 |
+
if self.in_function:
|
469 |
+
# Send opening brace if not sent yet
|
470 |
+
if not self.json_started and not self.parameter_prefix in delta_text:
|
471 |
+
self.json_started = True
|
472 |
+
return DeltaMessage(
|
473 |
+
tool_calls=[
|
474 |
+
DeltaToolCall(
|
475 |
+
index=self.current_tool_index,
|
476 |
+
function=DeltaFunctionCall(arguments="{"),
|
477 |
+
)
|
478 |
+
]
|
479 |
+
)
|
480 |
+
|
481 |
+
# Make sure json_started is set if we're processing parameters
|
482 |
+
if not self.json_started:
|
483 |
+
self.json_started = True
|
484 |
+
|
485 |
+
# Check for function end in accumulated text
|
486 |
+
if not self.json_closed and self.function_end_token in tool_text:
|
487 |
+
# Close JSON
|
488 |
+
self.json_closed = True
|
489 |
+
|
490 |
+
# Extract the complete tool call to update prev_tool_call_arr with final arguments
|
491 |
+
# Find the function content
|
492 |
+
func_start = tool_text.find(self.tool_call_prefix) + len(
|
493 |
+
self.tool_call_prefix
|
494 |
+
)
|
495 |
+
func_content_end = tool_text.find(self.function_end_token, func_start)
|
496 |
+
if func_content_end != -1:
|
497 |
+
func_content = tool_text[func_start:func_content_end]
|
498 |
+
# Parse to get the complete arguments
|
499 |
+
try:
|
500 |
+
parsed_tool = self._parse_xml_function_call(
|
501 |
+
func_content, request.tools if request else None
|
502 |
+
)
|
503 |
+
if parsed_tool:
|
504 |
+
# Update existing entry in prev_tool_call_arr with complete arguments
|
505 |
+
for i, tool in enumerate(self.prev_tool_call_arr):
|
506 |
+
if tool.get("name") == parsed_tool.function.name:
|
507 |
+
self.prev_tool_call_arr[i]["arguments"] = (
|
508 |
+
parsed_tool.function.arguments
|
509 |
+
)
|
510 |
+
break
|
511 |
+
except Exception:
|
512 |
+
pass # Ignore parsing errors during streaming
|
513 |
+
|
514 |
+
result = DeltaMessage(
|
515 |
+
tool_calls=[
|
516 |
+
DeltaToolCall(
|
517 |
+
index=self.current_tool_index,
|
518 |
+
function=DeltaFunctionCall(arguments="}"),
|
519 |
+
)
|
520 |
+
]
|
521 |
+
)
|
522 |
+
|
523 |
+
# Reset state for next tool
|
524 |
+
self.in_function = False
|
525 |
+
self.json_closed = True
|
526 |
+
|
527 |
+
return result
|
528 |
+
|
529 |
+
# Look for parameters
|
530 |
+
# Count how many complete parameters we have processed
|
531 |
+
complete_params = tool_text.count(self.parameter_end_token)
|
532 |
+
|
533 |
+
# Check if we should start a new parameter
|
534 |
+
if not self.in_param and self.param_count < complete_params:
|
535 |
+
# Find the unprocessed parameter
|
536 |
+
# Count parameter starts
|
537 |
+
param_starts = []
|
538 |
+
idx = 0
|
539 |
+
while True:
|
540 |
+
idx = tool_text.find(self.parameter_prefix, idx)
|
541 |
+
if idx == -1:
|
542 |
+
break
|
543 |
+
param_starts.append(idx)
|
544 |
+
idx += len(self.parameter_prefix)
|
545 |
+
|
546 |
+
if len(param_starts) > self.param_count:
|
547 |
+
# Process the next parameter
|
548 |
+
param_idx = param_starts[self.param_count]
|
549 |
+
param_start = param_idx + len(self.parameter_prefix)
|
550 |
+
remaining = tool_text[param_start:]
|
551 |
+
|
552 |
+
if ">" in remaining:
|
553 |
+
# We have the complete parameter name
|
554 |
+
name_end = remaining.find(">")
|
555 |
+
self.current_param_name = remaining[:name_end]
|
556 |
+
|
557 |
+
# Find the parameter value
|
558 |
+
value_start = param_start + name_end + 1
|
559 |
+
value_text = tool_text[value_start:]
|
560 |
+
if value_text.startswith("\n"):
|
561 |
+
value_text = value_text[1:]
|
562 |
+
|
563 |
+
# Find where this parameter ends
|
564 |
+
param_end_idx = value_text.find(self.parameter_end_token)
|
565 |
+
if param_end_idx != -1:
|
566 |
+
# Complete parameter found
|
567 |
+
param_value = value_text[:param_end_idx]
|
568 |
+
if param_value.endswith("\n"):
|
569 |
+
param_value = param_value[:-1]
|
570 |
+
|
571 |
+
# Build complete JSON fragment for this parameter
|
572 |
+
if self.param_count == 0:
|
573 |
+
json_fragment = (
|
574 |
+
'"'
|
575 |
+
+ self.current_param_name
|
576 |
+
+ '": "'
|
577 |
+
+ json.dumps(param_value)[1:-1]
|
578 |
+
+ '"'
|
579 |
+
)
|
580 |
+
else:
|
581 |
+
json_fragment = (
|
582 |
+
', "'
|
583 |
+
+ self.current_param_name
|
584 |
+
+ '": "'
|
585 |
+
+ json.dumps(param_value)[1:-1]
|
586 |
+
+ '"'
|
587 |
+
)
|
588 |
+
|
589 |
+
self.param_count += 1
|
590 |
+
|
591 |
+
return DeltaMessage(
|
592 |
+
tool_calls=[
|
593 |
+
DeltaToolCall(
|
594 |
+
index=self.current_tool_index,
|
595 |
+
function=DeltaFunctionCall(
|
596 |
+
arguments=json_fragment
|
597 |
+
),
|
598 |
+
)
|
599 |
+
]
|
600 |
+
)
|
601 |
+
|
602 |
+
# Continue parameter value
|
603 |
+
if self.in_param:
|
604 |
+
if self.parameter_end_token in delta_text:
|
605 |
+
# End of parameter
|
606 |
+
end_idx = delta_text.find(self.parameter_end_token)
|
607 |
+
value_chunk = delta_text[:end_idx]
|
608 |
+
|
609 |
+
# Skip past > if at start
|
610 |
+
if not self.current_param_value and ">" in value_chunk:
|
611 |
+
gt_idx = value_chunk.find(">")
|
612 |
+
value_chunk = value_chunk[gt_idx + 1 :]
|
613 |
+
|
614 |
+
if not self.current_param_value and value_chunk.startswith("\n"):
|
615 |
+
value_chunk = value_chunk[1:]
|
616 |
+
|
617 |
+
# Calculate incremental JSON
|
618 |
+
full_value = self.current_param_value + value_chunk
|
619 |
+
prev_escaped = (
|
620 |
+
json.dumps(self.current_param_value)[1:-1]
|
621 |
+
if self.current_param_value
|
622 |
+
else ""
|
623 |
+
)
|
624 |
+
full_escaped = json.dumps(full_value)[1:-1]
|
625 |
+
delta_escaped = full_escaped[len(prev_escaped) :]
|
626 |
+
|
627 |
+
self.in_param = False
|
628 |
+
self.current_param_value = ""
|
629 |
+
|
630 |
+
return DeltaMessage(
|
631 |
+
tool_calls=[
|
632 |
+
DeltaToolCall(
|
633 |
+
index=self.current_tool_index,
|
634 |
+
function=DeltaFunctionCall(
|
635 |
+
arguments=delta_escaped + '"'
|
636 |
+
),
|
637 |
+
)
|
638 |
+
]
|
639 |
+
)
|
640 |
+
else:
|
641 |
+
# Continue accumulating value
|
642 |
+
value_chunk = delta_text
|
643 |
+
|
644 |
+
# Handle first chunk after param name
|
645 |
+
if not self.current_param_value and ">" in value_chunk:
|
646 |
+
gt_idx = value_chunk.find(">")
|
647 |
+
value_chunk = value_chunk[gt_idx + 1 :]
|
648 |
+
|
649 |
+
if not self.current_param_value and value_chunk.startswith("\n"):
|
650 |
+
value_chunk = value_chunk[1:]
|
651 |
+
|
652 |
+
if value_chunk:
|
653 |
+
# Stream the escaped delta
|
654 |
+
prev_escaped = (
|
655 |
+
json.dumps(self.current_param_value)[1:-1]
|
656 |
+
if self.current_param_value
|
657 |
+
else ""
|
658 |
+
)
|
659 |
+
self.current_param_value += value_chunk
|
660 |
+
full_escaped = json.dumps(self.current_param_value)[1:-1]
|
661 |
+
delta_escaped = full_escaped[len(prev_escaped) :]
|
662 |
+
|
663 |
+
if delta_escaped:
|
664 |
+
return DeltaMessage(
|
665 |
+
tool_calls=[
|
666 |
+
DeltaToolCall(
|
667 |
+
index=self.current_tool_index,
|
668 |
+
function=DeltaFunctionCall(
|
669 |
+
arguments=delta_escaped
|
670 |
+
),
|
671 |
+
)
|
672 |
+
]
|
673 |
+
)
|
674 |
+
|
675 |
+
return None
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"151643": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"151644": {
|
13 |
+
"content": "<|im_start|>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"151645": {
|
21 |
+
"content": "<|im_end|>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"151646": {
|
29 |
+
"content": "<|object_ref_start|>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"151647": {
|
37 |
+
"content": "<|object_ref_end|>",
|
38 |
+
"lstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
},
|
44 |
+
"151648": {
|
45 |
+
"content": "<|box_start|>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false,
|
50 |
+
"special": true
|
51 |
+
},
|
52 |
+
"151649": {
|
53 |
+
"content": "<|box_end|>",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": false,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false,
|
58 |
+
"special": true
|
59 |
+
},
|
60 |
+
"151650": {
|
61 |
+
"content": "<|quad_start|>",
|
62 |
+
"lstrip": false,
|
63 |
+
"normalized": false,
|
64 |
+
"rstrip": false,
|
65 |
+
"single_word": false,
|
66 |
+
"special": true
|
67 |
+
},
|
68 |
+
"151651": {
|
69 |
+
"content": "<|quad_end|>",
|
70 |
+
"lstrip": false,
|
71 |
+
"normalized": false,
|
72 |
+
"rstrip": false,
|
73 |
+
"single_word": false,
|
74 |
+
"special": true
|
75 |
+
},
|
76 |
+
"151652": {
|
77 |
+
"content": "<|vision_start|>",
|
78 |
+
"lstrip": false,
|
79 |
+
"normalized": false,
|
80 |
+
"rstrip": false,
|
81 |
+
"single_word": false,
|
82 |
+
"special": true
|
83 |
+
},
|
84 |
+
"151653": {
|
85 |
+
"content": "<|vision_end|>",
|
86 |
+
"lstrip": false,
|
87 |
+
"normalized": false,
|
88 |
+
"rstrip": false,
|
89 |
+
"single_word": false,
|
90 |
+
"special": true
|
91 |
+
},
|
92 |
+
"151654": {
|
93 |
+
"content": "<|vision_pad|>",
|
94 |
+
"lstrip": false,
|
95 |
+
"normalized": false,
|
96 |
+
"rstrip": false,
|
97 |
+
"single_word": false,
|
98 |
+
"special": true
|
99 |
+
},
|
100 |
+
"151655": {
|
101 |
+
"content": "<|image_pad|>",
|
102 |
+
"lstrip": false,
|
103 |
+
"normalized": false,
|
104 |
+
"rstrip": false,
|
105 |
+
"single_word": false,
|
106 |
+
"special": true
|
107 |
+
},
|
108 |
+
"151656": {
|
109 |
+
"content": "<|video_pad|>",
|
110 |
+
"lstrip": false,
|
111 |
+
"normalized": false,
|
112 |
+
"rstrip": false,
|
113 |
+
"single_word": false,
|
114 |
+
"special": true
|
115 |
+
},
|
116 |
+
"151657": {
|
117 |
+
"content": "<tool_call>",
|
118 |
+
"lstrip": false,
|
119 |
+
"normalized": false,
|
120 |
+
"rstrip": false,
|
121 |
+
"single_word": false,
|
122 |
+
"special": false
|
123 |
+
},
|
124 |
+
"151658": {
|
125 |
+
"content": "</tool_call>",
|
126 |
+
"lstrip": false,
|
127 |
+
"normalized": false,
|
128 |
+
"rstrip": false,
|
129 |
+
"single_word": false,
|
130 |
+
"special": false
|
131 |
+
},
|
132 |
+
"151659": {
|
133 |
+
"content": "<|fim_prefix|>",
|
134 |
+
"lstrip": false,
|
135 |
+
"normalized": false,
|
136 |
+
"rstrip": false,
|
137 |
+
"single_word": false,
|
138 |
+
"special": false
|
139 |
+
},
|
140 |
+
"151660": {
|
141 |
+
"content": "<|fim_middle|>",
|
142 |
+
"lstrip": false,
|
143 |
+
"normalized": false,
|
144 |
+
"rstrip": false,
|
145 |
+
"single_word": false,
|
146 |
+
"special": false
|
147 |
+
},
|
148 |
+
"151661": {
|
149 |
+
"content": "<|fim_suffix|>",
|
150 |
+
"lstrip": false,
|
151 |
+
"normalized": false,
|
152 |
+
"rstrip": false,
|
153 |
+
"single_word": false,
|
154 |
+
"special": false
|
155 |
+
},
|
156 |
+
"151662": {
|
157 |
+
"content": "<|fim_pad|>",
|
158 |
+
"lstrip": false,
|
159 |
+
"normalized": false,
|
160 |
+
"rstrip": false,
|
161 |
+
"single_word": false,
|
162 |
+
"special": false
|
163 |
+
},
|
164 |
+
"151663": {
|
165 |
+
"content": "<|repo_name|>",
|
166 |
+
"lstrip": false,
|
167 |
+
"normalized": false,
|
168 |
+
"rstrip": false,
|
169 |
+
"single_word": false,
|
170 |
+
"special": false
|
171 |
+
},
|
172 |
+
"151664": {
|
173 |
+
"content": "<|file_sep|>",
|
174 |
+
"lstrip": false,
|
175 |
+
"normalized": false,
|
176 |
+
"rstrip": false,
|
177 |
+
"single_word": false,
|
178 |
+
"special": false
|
179 |
+
},
|
180 |
+
"151665": {
|
181 |
+
"content": "<tool_response>",
|
182 |
+
"lstrip": false,
|
183 |
+
"normalized": false,
|
184 |
+
"rstrip": false,
|
185 |
+
"single_word": false,
|
186 |
+
"special": false
|
187 |
+
},
|
188 |
+
"151666": {
|
189 |
+
"content": "</tool_response>",
|
190 |
+
"lstrip": false,
|
191 |
+
"normalized": false,
|
192 |
+
"rstrip": false,
|
193 |
+
"single_word": false,
|
194 |
+
"special": false
|
195 |
+
},
|
196 |
+
"151667": {
|
197 |
+
"content": "<think>",
|
198 |
+
"lstrip": false,
|
199 |
+
"normalized": false,
|
200 |
+
"rstrip": false,
|
201 |
+
"single_word": false,
|
202 |
+
"special": false
|
203 |
+
},
|
204 |
+
"151668": {
|
205 |
+
"content": "</think>",
|
206 |
+
"lstrip": false,
|
207 |
+
"normalized": false,
|
208 |
+
"rstrip": false,
|
209 |
+
"single_word": false,
|
210 |
+
"special": false
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"additional_special_tokens": [
|
214 |
+
"<|im_start|>",
|
215 |
+
"<|im_end|>",
|
216 |
+
"<|object_ref_start|>",
|
217 |
+
"<|object_ref_end|>",
|
218 |
+
"<|box_start|>",
|
219 |
+
"<|box_end|>",
|
220 |
+
"<|quad_start|>",
|
221 |
+
"<|quad_end|>",
|
222 |
+
"<|vision_start|>",
|
223 |
+
"<|vision_end|>",
|
224 |
+
"<|vision_pad|>",
|
225 |
+
"<|image_pad|>",
|
226 |
+
"<|video_pad|>"
|
227 |
+
],
|
228 |
+
"bos_token": null,
|
229 |
+
"chat_template": "{% macro render_item_list(item_list, tag_name='required') %}\n {%- if item_list is defined and item_list is iterable and item_list | length > 0 %}\n {%- if tag_name %}{{- '\\n<' ~ tag_name ~ '>' -}}{% endif %}\n {{- '[' }}\n {%- for item in item_list -%}\n {%- if loop.index > 1 %}{{- \", \"}}{% endif -%}\n {%- if item is string -%}\n {{ \"`\" ~ item ~ \"`\" }}\n {%- else -%}\n {{ item }}\n {%- endif -%}\n {%- endfor -%}\n {{- ']' }}\n {%- if tag_name %}{{- '</' ~ tag_name ~ '>' -}}{% endif %}\n {%- endif %}\n{% endmacro %}\n\n{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{%- if not tools is defined %}\n {%- set tools = [] %}\n{%- endif %}\n\n{%- if system_message is defined %}\n {{- \"<|im_start|>system\\n\" + system_message }}\n{%- else %}\n {%- if tools is iterable and tools | length > 0 %}\n {{- \"<|im_start|>system\\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks.\" }}\n {%- endif %}\n{%- endif %}\n{%- if tools is iterable and tools | length > 0 %}\n {{- \"\\n\\nYou have access to the following functions:\\n\\n\" }}\n {{- \"<tools>\" }}\n {%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- \"\\n<function>\\n<name>\" ~ tool.name ~ \"</name>\" }}\n {{- '\\n<description>' ~ (tool.description | trim) ~ '</description>' }}\n {{- '\\n<parameters>' }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- '\\n<parameter>' }}\n {{- '\\n<name>' ~ param_name ~ '</name>' }}\n {%- if param_fields.type is defined %}\n {{- '\\n<type>' ~ (param_fields.type | string) ~ '</type>' }}\n {%- endif %}\n {%- if param_fields.description is defined %}\n {{- '\\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}\n {%- endif %}\n {{- render_item_list(param_fields.enum, 'enum') }}\n {%- set handled_keys = ['type', 'description', 'enum', 'required'] %}\n {%- for json_key in param_fields.keys() | reject(\"in\", handled_keys) %}\n {%- set normed_json_key = json_key | replace(\"-\", \"_\") | replace(\" \", \"_\") | replace(\"$\", \"\") %}\n {%- if param_fields[json_key] is mapping %}\n {{- '\\n<' ~ normed_json_key ~ '>' ~ (param_fields[json_key] | tojson | safe) ~ '</' ~ normed_json_key ~ '>' }}\n {%- else %}\n {{-'\\n<' ~ normed_json_key ~ '>' ~ (param_fields[json_key] | string) ~ '</' ~ normed_json_key ~ '>' }}\n {%- endif %}\n {%- endfor %}\n {{- render_item_list(param_fields.required, 'required') }}\n {{- '\\n</parameter>' }}\n {%- endfor %}\n {{- render_item_list(tool.parameters.required, 'required') }}\n {{- '\\n</parameters>' }}\n {%- if tool.return is defined %}\n {%- if tool.return is mapping %}\n {{- '\\n<return>' ~ (tool.return | tojson | safe) ~ '</return>' }}\n {%- else %}\n {{- '\\n<return>' ~ (tool.return | string) ~ '</return>' }}\n {%- endif %}\n {%- endif %}\n {{- '\\n</function>' }}\n {%- endfor %}\n {{- \"\\n</tools>\" }}\n {{- '\\n\\nIf you choose to call a function ONLY reply in the following format with NO suffix:\\n\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>\\nvalue_1\\n</parameter>\\n<parameter=example_parameter_2>\\nThis is the value for the second parameter\\nthat can span\\nmultiple lines\\n</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\nReminder:\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- Required parameters MUST be specified\\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\\n</IMPORTANT>' }}\n{%- endif %}\n{%- if system_message is defined %}\n {{- '<|im_end|>\\n' }}\n{%- else %}\n {%- if tools is iterable and tools | length > 0 %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in loop_messages %}\n {%- if message.role == \"assistant\" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}\n {{- '\\n' + message.content | trim + '\\n' }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- if tool_call.arguments is defined %}\n {%- for args_name, args_value in tool_call.arguments|items %}\n {{- '<parameter=' + args_name + '>\\n' }}\n {%- set args_value = args_value if args_value is string else args_value | string %}\n {{- args_value }}\n {{- '\\n</parameter>\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '</function>\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"user\" or message.role == \"system\" or message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<|im_start|>user\\n' }}\n {%- endif %}\n {{- '<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>\\n' }}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<|im_end|>\\n' }}\n {%- elif loop.last %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
230 |
+
"clean_up_tokenization_spaces": false,
|
231 |
+
"eos_token": "<|im_end|>",
|
232 |
+
"errors": "replace",
|
233 |
+
"model_max_length": 1048576,
|
234 |
+
"pad_token": "<|endoftext|>",
|
235 |
+
"split_special_tokens": false,
|
236 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
237 |
+
"unk_token": null,
|
238 |
+
"add_bos_token": false
|
239 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|