zRzRzRzRzRzRzR commited on
Commit
325c020
·
1 Parent(s): 9ec8fec
Files changed (1) hide show
  1. app.py +36 -74
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import copy
2
- import re
3
  import time
4
  import html
5
  from openai import OpenAI
@@ -11,7 +10,6 @@ stop_generation = False
11
  def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
12
  global stop_generation
13
  client = OpenAI()
14
-
15
  response = client.chat.completions.create(
16
  model="GLM-4.5",
17
  messages=messages,
@@ -19,72 +17,43 @@ def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
19
  stream=True,
20
  max_tokens=65536,
21
  extra_body={
22
- "thinking":
23
- {
24
- "type": "enabled" if thinking_enabled else "disabled",
25
- }
26
  }
27
  )
28
  for chunk in response:
29
  if stop_generation:
30
  break
31
-
32
  if chunk.choices and chunk.choices[0].delta:
33
- delta = chunk.choices[0].delta
34
- yield delta
35
 
36
 
37
  class GLM45Model:
38
  def __init__(self):
39
- self.reset_state()
 
40
 
41
  def reset_state(self):
42
- self.accumulated_text = ""
43
-
44
- def _strip_html(self, text: str) -> str:
45
- return re.sub(r"<[^>]+>", "", text).strip()
46
-
47
- def _wrap_text(self, text: str):
48
- return [{"type": "text", "text": text}]
49
-
50
- def _parse_thinking_content(self, text: str):
51
- thinking_content = ""
52
- regular_content = ""
53
-
54
- if "<think>" in text:
55
- think_pattern = r'<think>(.*?)</think>'
56
- think_match = re.search(think_pattern, text, re.DOTALL)
57
-
58
- if think_match:
59
- thinking_content = think_match.group(1).strip()
60
- regular_content = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
61
- else:
62
- think_start = text.find("<think>")
63
- if think_start != -1:
64
- thinking_content = text[think_start + 7:]
65
- regular_content = text[:think_start].strip()
66
- else:
67
- regular_content = text
68
 
69
- return thinking_content, regular_content
70
-
71
- def _render_response(self, thinking_content: str, regular_content: str, skip_think: bool = False):
72
  html_parts = []
73
 
74
- if thinking_content and not skip_think:
75
- thinking_escaped = html.escape(thinking_content).replace("\n", "<br>")
76
  think_html = (
77
- "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
78
  "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
79
- + thinking_escaped +
80
  "</div></details>"
81
  )
82
  html_parts.append(think_html)
83
 
84
  if regular_content:
85
- content_escaped = html.escape(regular_content)
86
- content_formatted = content_escaped.replace("\n", "<br>")
87
- content_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
88
  html_parts.append(content_html)
89
 
90
  return "".join(html_parts)
@@ -93,21 +62,20 @@ class GLM45Model:
93
  msgs = []
94
  if sys_prompt.strip():
95
  msgs.append({"role": "system", "content": [{"type": "text", "text": sys_prompt.strip()}]})
 
96
  for h in raw_hist:
97
  if h["role"] == "user":
98
- msgs.append({"role": "user", "content": self._wrap_text(h["content"])})
99
  else:
100
- raw = re.sub(r"<details.*?</details>", "", h["content"], flags=re.DOTALL)
101
- clean_content = self._strip_html(raw).strip()
102
  if clean_content:
103
- msgs.append({"role": "assistant", "content": self._wrap_text(clean_content)})
104
  return msgs
105
 
106
- def stream_generate(self, raw_hist, sys_prompt: str, thinking_enabled: bool = True, temperature: float = 1.0):
107
  global stop_generation
108
  stop_generation = False
109
  msgs = self._build_messages(raw_hist, sys_prompt)
110
-
111
  self.reset_state()
112
 
113
  try:
@@ -115,20 +83,16 @@ class GLM45Model:
115
  if stop_generation:
116
  break
117
 
118
- delta_content = ""
119
  if hasattr(delta, 'content') and delta.content:
120
- delta_content = delta.content
121
- elif isinstance(delta, dict) and 'content' in delta and delta['content']:
122
- delta_content = delta['content']
 
123
 
124
- if delta_content:
125
- self.accumulated_text += delta_content
126
- thinking_content, regular_content = self._parse_thinking_content(self.accumulated_text)
127
- yield self._render_response(thinking_content, regular_content, not thinking_enabled)
128
 
129
  except Exception as e:
130
- error_msg = f"Error during streaming: {str(e)}"
131
- yield self._render_response("", error_msg)
132
 
133
 
134
  glm45 = GLM45Model()
@@ -141,10 +105,10 @@ def chat(msg, raw_hist, sys_prompt, thinking_enabled, temperature):
141
  if not msg.strip():
142
  return raw_hist, copy.deepcopy(raw_hist), ""
143
 
144
- user_rec = {"role": "user", "content": msg.strip()}
145
  if raw_hist is None:
146
  raw_hist = []
147
- raw_hist.append(user_rec)
 
148
  place = {"role": "assistant", "content": ""}
149
  raw_hist.append(place)
150
 
@@ -157,12 +121,9 @@ def chat(msg, raw_hist, sys_prompt, thinking_enabled, temperature):
157
  place["content"] = chunk
158
  yield raw_hist, copy.deepcopy(raw_hist), ""
159
  except Exception as e:
160
- error_content = f"<div style='color: red;'>Error: {html.escape(str(e))}</div>"
161
- place["content"] = error_content
162
  yield raw_hist, copy.deepcopy(raw_hist), ""
163
 
164
- yield raw_hist, copy.deepcopy(raw_hist), ""
165
-
166
 
167
  def reset():
168
  global stop_generation
@@ -177,13 +138,14 @@ with demo:
177
  gr.HTML(
178
  "<div style='text-align:center;font-size:32px;font-weight:bold;margin-bottom:10px;'>GLM-4.5 API Demo</div>"
179
  "<div style='text-align:center;color:red;font-size:16px;margin-bottom:20px;'>"
180
- "This demo uses the API version of the service for faster response.<br>"
181
- "Chat only. For tool use, MCP support, and web search, please refer to the API.</div>"
182
- "<div style='text-align:center;'><a href='https://huggingface.co/zai-org/GLM-4.5'>Model Hub</a> | "
183
  "<a href='https://github.com/zai-org/GLM-4.5'>Github</a> | "
184
  "<a href='http://z.ai/blog/glm-4.5'>Blog</a> | "
185
- "<a href='https://docs.bigmodel.cn/cn/guide/models/text/glm-4.5'>API</a></div>"
186
  )
 
187
  raw_history = gr.State([])
188
 
189
  with gr.Row():
@@ -204,8 +166,8 @@ with demo:
204
  thinking_toggle = gr.Checkbox(label="Enable Thinking", value=True)
205
  gr.HTML(
206
  "<div style='color:red;font-size:12px;margin-top:5px;margin-bottom:15px;'>"
207
- "ON: Enable model thinking.<br>"
208
- "OFF: Not enable model thinking, the model will directly answer the question without reasoning."
209
  "</div>"
210
  )
211
  temperature_slider = gr.Slider(
 
1
  import copy
 
2
  import time
3
  import html
4
  from openai import OpenAI
 
10
  def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
11
  global stop_generation
12
  client = OpenAI()
 
13
  response = client.chat.completions.create(
14
  model="GLM-4.5",
15
  messages=messages,
 
17
  stream=True,
18
  max_tokens=65536,
19
  extra_body={
20
+ "thinking": {
21
+ "type": "enabled" if thinking_enabled else "disabled",
22
+ }
 
23
  }
24
  )
25
  for chunk in response:
26
  if stop_generation:
27
  break
 
28
  if chunk.choices and chunk.choices[0].delta:
29
+ yield chunk.choices[0].delta
 
30
 
31
 
32
  class GLM45Model:
33
  def __init__(self):
34
+ self.accumulated_content = ""
35
+ self.accumulated_reasoning = ""
36
 
37
  def reset_state(self):
38
+ self.accumulated_content = ""
39
+ self.accumulated_reasoning = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ def _render_response(self, reasoning_content, regular_content, skip_think=False):
 
 
42
  html_parts = []
43
 
44
+ if reasoning_content and not skip_think:
45
+ reasoning_escaped = html.escape(reasoning_content).replace("\n", "<br>")
46
  think_html = (
47
+ "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>🤔 Thinking</summary>"
48
  "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
49
+ + reasoning_escaped +
50
  "</div></details>"
51
  )
52
  html_parts.append(think_html)
53
 
54
  if regular_content:
55
+ content_escaped = html.escape(regular_content).replace("\n", "<br>")
56
+ content_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_escaped}</div>"
 
57
  html_parts.append(content_html)
58
 
59
  return "".join(html_parts)
 
62
  msgs = []
63
  if sys_prompt.strip():
64
  msgs.append({"role": "system", "content": [{"type": "text", "text": sys_prompt.strip()}]})
65
+
66
  for h in raw_hist:
67
  if h["role"] == "user":
68
+ msgs.append({"role": "user", "content": [{"type": "text", "text": h["content"]}]})
69
  else:
70
+ clean_content = html.escape(h["content"]).replace("<br>", "\n")
 
71
  if clean_content:
72
+ msgs.append({"role": "assistant", "content": [{"type": "text", "text": clean_content}]})
73
  return msgs
74
 
75
+ def stream_generate(self, raw_hist, sys_prompt, thinking_enabled=True, temperature=1.0):
76
  global stop_generation
77
  stop_generation = False
78
  msgs = self._build_messages(raw_hist, sys_prompt)
 
79
  self.reset_state()
80
 
81
  try:
 
83
  if stop_generation:
84
  break
85
 
 
86
  if hasattr(delta, 'content') and delta.content:
87
+ self.accumulated_content += delta.content
88
+
89
+ if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
90
+ self.accumulated_reasoning += delta.reasoning_content
91
 
92
+ yield self._render_response(self.accumulated_reasoning, self.accumulated_content, not thinking_enabled)
 
 
 
93
 
94
  except Exception as e:
95
+ yield self._render_response("", f"Error: {str(e)}")
 
96
 
97
 
98
  glm45 = GLM45Model()
 
105
  if not msg.strip():
106
  return raw_hist, copy.deepcopy(raw_hist), ""
107
 
 
108
  if raw_hist is None:
109
  raw_hist = []
110
+
111
+ raw_hist.append({"role": "user", "content": msg.strip()})
112
  place = {"role": "assistant", "content": ""}
113
  raw_hist.append(place)
114
 
 
121
  place["content"] = chunk
122
  yield raw_hist, copy.deepcopy(raw_hist), ""
123
  except Exception as e:
124
+ place["content"] = f"<div style='color: red;'>Error: {html.escape(str(e))}</div>"
 
125
  yield raw_hist, copy.deepcopy(raw_hist), ""
126
 
 
 
127
 
128
  def reset():
129
  global stop_generation
 
138
  gr.HTML(
139
  "<div style='text-align:center;font-size:32px;font-weight:bold;margin-bottom:10px;'>GLM-4.5 API Demo</div>"
140
  "<div style='text-align:center;color:red;font-size:16px;margin-bottom:20px;'>"
141
+ "This demo uses the API version of the service for faster response speeds.<br>"
142
+ "Only chat functionality is supported. For tool usage, MCP support, and web search, please refer to the API documentation.</div>"
143
+ "<div style='text-align:center;'><a href='https://huggingface.co/collections/zai-org/glm-45-687c621d34bda8c9e4bf503b'>Model</a> | "
144
  "<a href='https://github.com/zai-org/GLM-4.5'>Github</a> | "
145
  "<a href='http://z.ai/blog/glm-4.5'>Blog</a> | "
146
+ "<a href='https://docs.bigmodel.cn/cn/guide/models/text/glm-4.5'>API Docs</a></div>"
147
  )
148
+
149
  raw_history = gr.State([])
150
 
151
  with gr.Row():
 
166
  thinking_toggle = gr.Checkbox(label="Enable Thinking", value=True)
167
  gr.HTML(
168
  "<div style='color:red;font-size:12px;margin-top:5px;margin-bottom:15px;'>"
169
+ "Enabled: Activates the model's thinking capability. The model will decide whether to think based on the situation and may return empty thinking content.<br>"
170
+ "Disabled: Disables the model's thinking capability. The model will answer questions directly without reasoning."
171
  "</div>"
172
  )
173
  temperature_slider = gr.Slider(