Update main.py
Browse files
main.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import json
|
3 |
import time
|
4 |
import uuid
|
|
|
5 |
from typing import List, Dict, Optional, Union, Generator, Any
|
6 |
|
7 |
# --- Core Dependencies ---
|
@@ -13,84 +14,98 @@ from curl_cffi.requests import Session
|
|
13 |
from curl_cffi import CurlError
|
14 |
|
15 |
# --- Environment Configuration ---
|
16 |
-
QODO_API_KEY = os.getenv("QODO_API_KEY"
|
17 |
-
QODO_URL = os.getenv("QODO_URL", "https
|
18 |
-
QODO_INFO_URL = os.getenv("QODO_INFO_URL", "
|
19 |
|
20 |
-
# --- Recreated/Mocked webscout Dependencies ---
|
21 |
# This section recreates the necessary classes and functions
|
22 |
# to make the QodoAI provider self-contained.
|
23 |
|
24 |
-
# webscout.exceptions
|
25 |
class exceptions:
|
26 |
class FailedToGenerateResponseError(Exception):
|
27 |
pass
|
28 |
|
29 |
-
# webscout.AIutel.sanitize_stream
|
30 |
def sanitize_stream(data: Generator[bytes, None, None], content_extractor: callable, **kwargs: Any) -> Generator[str, None, None]:
|
31 |
-
"""
|
32 |
-
Parses a stream of byte chunks, extracts complete JSON objects,
|
33 |
-
and yields content processed by the content_extractor.
|
34 |
-
"""
|
35 |
buffer = ""
|
36 |
for byte_chunk in data:
|
37 |
buffer += byte_chunk.decode('utf-8', errors='ignore')
|
|
|
|
|
|
|
38 |
|
39 |
start_index = 0
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
break
|
48 |
-
|
49 |
-
# Find the corresponding end brace
|
50 |
-
brace_count = 1
|
51 |
-
i = obj_start + 1
|
52 |
-
while i < len(buffer) and brace_count > 0:
|
53 |
if buffer[i] == '{':
|
54 |
brace_count += 1
|
55 |
elif buffer[i] == '}':
|
56 |
brace_count -= 1
|
57 |
-
|
|
|
|
|
58 |
|
59 |
-
if
|
60 |
-
json_str = buffer[obj_start:
|
61 |
try:
|
62 |
json_obj = json.loads(json_str)
|
63 |
content = content_extractor(json_obj)
|
64 |
if content:
|
65 |
yield content
|
|
|
66 |
except json.JSONDecodeError:
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
-
# webscout.Provider.OPENAI.utils (Pydantic Models)
|
75 |
class Tool(BaseModel):
|
76 |
type: str = "function"
|
77 |
-
function:
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
class ChatCompletionMessage(BaseModel):
|
80 |
role: str
|
81 |
content: Optional[str] = None
|
82 |
tool_calls: Optional[List[Dict]] = None
|
83 |
|
84 |
-
class Choice(BaseModel):
|
85 |
-
index: int
|
86 |
-
message: Optional[ChatCompletionMessage] = None
|
87 |
-
finish_reason: Optional[str] = None
|
88 |
-
delta: Optional[Dict] = Field(default_factory=dict)
|
89 |
-
|
90 |
class ChoiceDelta(BaseModel):
|
91 |
content: Optional[str] = None
|
92 |
role: Optional[str] = None
|
93 |
|
|
|
|
|
|
|
|
|
|
|
94 |
class ChoiceStreaming(BaseModel):
|
95 |
index: int
|
96 |
delta: ChoiceDelta
|
@@ -117,7 +132,8 @@ class ChatCompletionChunk(BaseModel):
|
|
117 |
object: str = "chat.completion.chunk"
|
118 |
usage: Optional[CompletionUsage] = None
|
119 |
|
120 |
-
|
|
|
121 |
class BaseCompletions:
|
122 |
def __init__(self, client: Any):
|
123 |
self._client = client
|
@@ -130,13 +146,7 @@ class OpenAICompatibleProvider:
|
|
130 |
def __init__(self, **kwargs: Any):
|
131 |
pass
|
132 |
|
133 |
-
#
|
134 |
-
try:
|
135 |
-
from webscout.litagent import LitAgent
|
136 |
-
except ImportError:
|
137 |
-
LitAgent = None
|
138 |
-
|
139 |
-
# --- QodoAI Provider Code (from the prompt) ---
|
140 |
|
141 |
class Completions(BaseCompletions):
|
142 |
def create(
|
@@ -147,22 +157,23 @@ class Completions(BaseCompletions):
|
|
147 |
stream: bool = False,
|
148 |
**kwargs: Any
|
149 |
) -> Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]]:
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
154 |
user_prompt = ""
|
155 |
for message in reversed(messages):
|
156 |
if message.get("role") == "user":
|
157 |
user_prompt = message.get("content", "")
|
158 |
break
|
159 |
-
|
160 |
if not user_prompt:
|
161 |
-
raise ValueError("No user message found in messages")
|
162 |
|
163 |
payload = self._client._build_payload(user_prompt, model)
|
164 |
payload["stream"] = stream
|
165 |
-
payload["custom_model"] = model
|
166 |
|
167 |
request_id = f"chatcmpl-{uuid.uuid4()}"
|
168 |
created_time = int(time.time())
|
@@ -172,102 +183,45 @@ class Completions(BaseCompletions):
|
|
172 |
else:
|
173 |
return self._create_non_stream(request_id, created_time, model, payload, user_prompt)
|
174 |
|
175 |
-
def _create_stream(
|
176 |
-
self, request_id: str, created_time: int, model: str, payload: Dict[str, Any], user_prompt: str
|
177 |
-
) -> Generator[ChatCompletionChunk, None, None]:
|
178 |
try:
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
completion_tokens = 0
|
194 |
-
|
195 |
-
processed_stream = sanitize_stream(
|
196 |
-
data=response.iter_content(chunk_size=None),
|
197 |
-
content_extractor=QodoAI._qodo_extractor
|
198 |
-
)
|
199 |
-
|
200 |
-
for content_chunk in processed_stream:
|
201 |
-
if content_chunk:
|
202 |
-
completion_tokens += len(content_chunk.split())
|
203 |
-
|
204 |
-
delta = ChoiceDelta(content=content_chunk, role="assistant")
|
205 |
-
choice = ChoiceStreaming(index=0, delta=delta, finish_reason=None)
|
206 |
-
chunk = ChatCompletionChunk(id=request_id, choices=[choice], created=created_time, model=model)
|
207 |
-
yield chunk
|
208 |
-
|
209 |
-
final_choice = ChoiceStreaming(index=0, delta=ChoiceDelta(), finish_reason="stop")
|
210 |
-
yield ChatCompletionChunk(id=request_id, choices=[final_choice], created=created_time, model=model)
|
211 |
-
|
212 |
-
except CurlError as e:
|
213 |
-
raise exceptions.FailedToGenerateResponseError(f"Request failed (CurlError): {e}")
|
214 |
except Exception as e:
|
215 |
-
raise exceptions.FailedToGenerateResponseError(f"
|
216 |
|
217 |
-
def _create_non_stream(
|
218 |
-
self, request_id: str, created_time: int, model: str, payload: Dict[str, Any], user_prompt: str
|
219 |
-
) -> ChatCompletion:
|
220 |
try:
|
221 |
payload["stream"] = False
|
222 |
-
response = self._client.session.post(
|
223 |
-
|
224 |
-
json=payload,
|
225 |
-
timeout=self._client.timeout,
|
226 |
-
impersonate="chrome110"
|
227 |
-
)
|
228 |
-
|
229 |
if response.status_code == 401:
|
230 |
raise exceptions.FailedToGenerateResponseError("Invalid Qodo API key provided.")
|
231 |
-
|
232 |
-
raise IOError(f"Qodo request failed with status code {response.status_code}: {response.text}")
|
233 |
|
234 |
-
|
235 |
-
full_response = ""
|
236 |
-
|
237 |
-
# This logic parses concatenated JSON objects from the response body.
|
238 |
-
current_json = ""
|
239 |
-
brace_count = 0
|
240 |
-
json_objects = []
|
241 |
-
lines = response_text.strip().split('\n')
|
242 |
-
for line in lines:
|
243 |
-
current_json += line
|
244 |
-
brace_count += line.count('{') - line.count('}')
|
245 |
-
if brace_count == 0 and current_json:
|
246 |
-
json_objects.append(current_json)
|
247 |
-
current_json = ""
|
248 |
-
|
249 |
-
for json_str in json_objects:
|
250 |
-
try:
|
251 |
-
json_obj = json.loads(json_str)
|
252 |
-
content = QodoAI._qodo_extractor(json_obj)
|
253 |
-
if content:
|
254 |
-
full_response += content
|
255 |
-
except json.JSONDecodeError:
|
256 |
-
pass
|
257 |
|
258 |
prompt_tokens = len(user_prompt.split())
|
259 |
completion_tokens = len(full_response.split())
|
260 |
-
total_tokens = prompt_tokens + completion_tokens
|
261 |
|
262 |
message = ChatCompletionMessage(role="assistant", content=full_response)
|
263 |
choice = Choice(index=0, message=message, finish_reason="stop")
|
264 |
-
usage = CompletionUsage(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=
|
265 |
return ChatCompletion(id=request_id, choices=[choice], created=created_time, model=model, usage=usage)
|
266 |
-
|
267 |
-
except CurlError as e:
|
268 |
-
raise exceptions.FailedToGenerateResponseError(f"Request failed (CurlError): {e}")
|
269 |
except Exception as e:
|
270 |
-
raise exceptions.FailedToGenerateResponseError(f"
|
271 |
|
272 |
class Chat(BaseChat):
|
273 |
def __init__(self, client: 'QodoAI'):
|
@@ -278,26 +232,14 @@ class QodoAI(OpenAICompatibleProvider):
|
|
278 |
|
279 |
def __init__(self, api_key: str, **kwargs: Any):
|
280 |
super().__init__(api_key=api_key, **kwargs)
|
281 |
-
|
282 |
-
self.url = QODO_URL
|
283 |
-
self.info_url = QODO_INFO_URL
|
284 |
-
self.timeout = 600
|
285 |
-
self.api_key = api_key
|
286 |
-
|
287 |
self.user_agent = "axios/1.10.0"
|
288 |
self.session_id = self._get_session_id()
|
289 |
-
self.request_id = str(uuid.uuid4())
|
290 |
-
|
291 |
self.headers = {
|
292 |
-
"
|
293 |
-
"
|
294 |
-
"Connection": "close", "Content-Type": "application/json",
|
295 |
-
"host": "api.cli.qodo.ai", "Request-id": self.request_id,
|
296 |
-
"Session-id": self.session_id, "User-Agent": self.user_agent,
|
297 |
}
|
298 |
-
|
299 |
-
self.session = Session()
|
300 |
-
self.session.headers.update(self.headers)
|
301 |
self.chat = Chat(self)
|
302 |
|
303 |
@staticmethod
|
@@ -305,24 +247,13 @@ class QodoAI(OpenAICompatibleProvider):
|
|
305 |
if isinstance(chunk, dict):
|
306 |
data = chunk.get("data", {})
|
307 |
if isinstance(data, dict):
|
308 |
-
|
309 |
-
if
|
310 |
-
return tool_args.get("content")
|
311 |
-
if "content" in data:
|
312 |
-
return data["content"]
|
313 |
return None
|
314 |
|
315 |
def _get_session_id(self) -> str:
|
316 |
try:
|
317 |
-
|
318 |
-
temp_headers = {
|
319 |
-
"Authorization": f"Bearer {self.api_key}",
|
320 |
-
"User-Agent": self.user_agent,
|
321 |
-
}
|
322 |
-
temp_session.headers.update(temp_headers)
|
323 |
-
|
324 |
-
response = temp_session.get(self.info_url, timeout=self.timeout, impersonate="chrome110")
|
325 |
-
|
326 |
if response.status_code == 200:
|
327 |
return response.json().get("session-id", f"fallback-{uuid.uuid4()}")
|
328 |
elif response.status_code == 401:
|
@@ -333,13 +264,8 @@ class QodoAI(OpenAICompatibleProvider):
|
|
333 |
raise exceptions.FailedToGenerateResponseError(f"Failed to connect to Qodo API to get session_id: {e}")
|
334 |
|
335 |
def _build_payload(self, prompt: str, model: str) -> Dict[str, Any]:
|
336 |
-
return {
|
337 |
-
|
338 |
-
"user_data": {"extension_version": "0.7.2", "os_platform": "win32"},
|
339 |
-
"tools": {"web_search": []}, "user_request": prompt,
|
340 |
-
"execution_strategy": "act", "custom_model": model, "stream": True
|
341 |
-
}
|
342 |
-
|
343 |
# --- FastAPI Application ---
|
344 |
|
345 |
app = FastAPI(
|
@@ -348,57 +274,34 @@ app = FastAPI(
|
|
348 |
version="1.0.0"
|
349 |
)
|
350 |
|
351 |
-
|
352 |
-
try:
|
353 |
-
client = QodoAI(api_key=QODO_API_KEY)
|
354 |
-
except exceptions.FailedToGenerateResponseError as e:
|
355 |
-
print(f"FATAL: Could not initialize QodoAI client: {e}")
|
356 |
-
print("Please ensure the QODO_API_KEY environment variable is set correctly.")
|
357 |
-
client = None
|
358 |
-
|
359 |
-
# --- API Models ---
|
360 |
-
|
361 |
-
class Model(BaseModel):
|
362 |
-
id: str
|
363 |
-
object: str = "model"
|
364 |
-
created: int = Field(default_factory=lambda: int(time.time()))
|
365 |
-
owned_by: str = "qodoai"
|
366 |
-
|
367 |
-
class ModelList(BaseModel):
|
368 |
-
object: str = "list"
|
369 |
-
data: List[Model]
|
370 |
-
|
371 |
-
class ChatCompletionRequest(BaseModel):
|
372 |
-
model: str
|
373 |
-
messages: List[Dict[str, Any]]
|
374 |
-
max_tokens: Optional[int] = 2049
|
375 |
-
stream: bool = False
|
376 |
-
temperature: Optional[float] = None
|
377 |
-
top_p: Optional[float] = None
|
378 |
-
tools: Optional[List[Dict[str, Any]]] = None
|
379 |
-
tool_choice: Optional[str] = None
|
380 |
-
|
381 |
-
# --- API Endpoints ---
|
382 |
|
383 |
@app.on_event("startup")
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
raise RuntimeError("
|
388 |
-
|
|
|
|
|
|
|
|
|
389 |
|
390 |
-
@app.get("/v1/models",
|
391 |
async def list_models():
|
392 |
"""Lists the available models from the QodoAI provider."""
|
393 |
-
|
394 |
-
|
|
|
|
|
|
|
395 |
|
396 |
@app.post("/v1/chat/completions")
|
397 |
async def create_chat_completion(request: ChatCompletionRequest):
|
398 |
"""Creates a chat completion, supporting both streaming and non-streaming modes."""
|
399 |
if client is None:
|
400 |
-
raise HTTPException(status_code=
|
401 |
-
|
402 |
params = request.model_dump(exclude_none=True)
|
403 |
|
404 |
try:
|
@@ -408,12 +311,11 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
408 |
generator = client.chat.completions.create(**params)
|
409 |
for chunk in generator:
|
410 |
yield f"data: {chunk.model_dump_json()}\n\n"
|
411 |
-
yield "data: [DONE]\n\n"
|
412 |
except exceptions.FailedToGenerateResponseError as e:
|
413 |
-
error_payload = {"error": {"message": str(e), "type": "api_error"}}
|
414 |
yield f"data: {json.dumps(error_payload)}\n\n"
|
|
|
415 |
yield "data: [DONE]\n\n"
|
416 |
-
|
417 |
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
418 |
else:
|
419 |
response = client.chat.completions.create(**params)
|
@@ -425,4 +327,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
425 |
raise HTTPException(status_code=400, detail=str(e))
|
426 |
|
427 |
if __name__ == "__main__":
|
|
|
|
|
|
|
428 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
2 |
import json
|
3 |
import time
|
4 |
import uuid
|
5 |
+
import sys
|
6 |
from typing import List, Dict, Optional, Union, Generator, Any
|
7 |
|
8 |
# --- Core Dependencies ---
|
|
|
14 |
from curl_cffi import CurlError
|
15 |
|
16 |
# --- Environment Configuration ---
|
17 |
+
QODO_API_KEY = os.getenv("QODO_API_KEY") # No default key to encourage setting it explicitly
|
18 |
+
QODO_URL = os.getenv("QODO_URL", "https://*")
|
19 |
+
QODO_INFO_URL = os.getenv("QODO_INFO_URL", "*")
|
20 |
|
21 |
+
# --- Recreated/Mocked webscout & OpenAI Dependencies ---
|
22 |
# This section recreates the necessary classes and functions
|
23 |
# to make the QodoAI provider self-contained.
|
24 |
|
|
|
25 |
class exceptions:
|
26 |
class FailedToGenerateResponseError(Exception):
|
27 |
pass
|
28 |
|
|
|
29 |
def sanitize_stream(data: Generator[bytes, None, None], content_extractor: callable, **kwargs: Any) -> Generator[str, None, None]:
|
|
|
|
|
|
|
|
|
30 |
buffer = ""
|
31 |
for byte_chunk in data:
|
32 |
buffer += byte_chunk.decode('utf-8', errors='ignore')
|
33 |
+
obj_start_indices = [i for i, char in enumerate(buffer) if char == '{']
|
34 |
+
if not obj_start_indices:
|
35 |
+
continue
|
36 |
|
37 |
start_index = 0
|
38 |
+
for obj_start in obj_start_indices:
|
39 |
+
if obj_start < start_index:
|
40 |
+
continue
|
41 |
+
|
42 |
+
brace_count = 0
|
43 |
+
obj_end = -1
|
44 |
+
for i in range(obj_start, len(buffer)):
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
if buffer[i] == '{':
|
46 |
brace_count += 1
|
47 |
elif buffer[i] == '}':
|
48 |
brace_count -= 1
|
49 |
+
if brace_count == 0:
|
50 |
+
obj_end = i
|
51 |
+
break
|
52 |
|
53 |
+
if obj_end != -1:
|
54 |
+
json_str = buffer[obj_start:obj_end + 1]
|
55 |
try:
|
56 |
json_obj = json.loads(json_str)
|
57 |
content = content_extractor(json_obj)
|
58 |
if content:
|
59 |
yield content
|
60 |
+
start_index = obj_end + 1
|
61 |
except json.JSONDecodeError:
|
62 |
+
continue
|
63 |
+
buffer = buffer[start_index:]
|
64 |
+
|
65 |
+
# --- OpenAI-Compatible Pydantic Models ---
|
66 |
+
|
67 |
+
# Request Models
|
68 |
+
class ChatMessage(BaseModel):
|
69 |
+
role: str
|
70 |
+
content: str
|
71 |
+
name: Optional[str] = None
|
72 |
+
tool_calls: Optional[List[Dict]] = None
|
73 |
+
tool_call_id: Optional[str] = None
|
74 |
+
|
75 |
+
class Function(BaseModel):
|
76 |
+
name: str
|
77 |
+
description: Optional[str] = None
|
78 |
+
parameters: Dict[str, Any]
|
79 |
|
|
|
80 |
class Tool(BaseModel):
|
81 |
type: str = "function"
|
82 |
+
function: Function
|
83 |
|
84 |
+
class ChatCompletionRequest(BaseModel):
|
85 |
+
model: str
|
86 |
+
messages: List[ChatMessage]
|
87 |
+
max_tokens: Optional[int] = 2049
|
88 |
+
stream: bool = False
|
89 |
+
temperature: Optional[float] = 1.0
|
90 |
+
top_p: Optional[float] = 1.0
|
91 |
+
tools: Optional[List[Tool]] = None
|
92 |
+
tool_choice: Optional[Union[str, Dict]] = None
|
93 |
+
|
94 |
+
# Response Models
|
95 |
class ChatCompletionMessage(BaseModel):
|
96 |
role: str
|
97 |
content: Optional[str] = None
|
98 |
tool_calls: Optional[List[Dict]] = None
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
class ChoiceDelta(BaseModel):
|
101 |
content: Optional[str] = None
|
102 |
role: Optional[str] = None
|
103 |
|
104 |
+
class Choice(BaseModel):
|
105 |
+
index: int
|
106 |
+
message: ChatCompletionMessage
|
107 |
+
finish_reason: Optional[str] = "stop"
|
108 |
+
|
109 |
class ChoiceStreaming(BaseModel):
|
110 |
index: int
|
111 |
delta: ChoiceDelta
|
|
|
132 |
object: str = "chat.completion.chunk"
|
133 |
usage: Optional[CompletionUsage] = None
|
134 |
|
135 |
+
|
136 |
+
# --- Base Provider Structure ---
|
137 |
class BaseCompletions:
|
138 |
def __init__(self, client: Any):
|
139 |
self._client = client
|
|
|
146 |
def __init__(self, **kwargs: Any):
|
147 |
pass
|
148 |
|
149 |
+
# --- QodoAI Provider Logic ---
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
class Completions(BaseCompletions):
|
152 |
def create(
|
|
|
157 |
stream: bool = False,
|
158 |
**kwargs: Any
|
159 |
) -> Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]]:
|
160 |
+
|
161 |
+
# Warn about unsupported parameters
|
162 |
+
unsupported_params = ['temperature', 'top_p', 'tools', 'tool_choice', 'max_tokens']
|
163 |
+
for param in unsupported_params:
|
164 |
+
if param in kwargs:
|
165 |
+
print(f"Warning: Parameter '{param}' is not supported by the QodoAI provider and will be ignored.", file=sys.stderr)
|
166 |
+
|
167 |
user_prompt = ""
|
168 |
for message in reversed(messages):
|
169 |
if message.get("role") == "user":
|
170 |
user_prompt = message.get("content", "")
|
171 |
break
|
|
|
172 |
if not user_prompt:
|
173 |
+
raise ValueError("No user message with 'role': 'user' found in messages.")
|
174 |
|
175 |
payload = self._client._build_payload(user_prompt, model)
|
176 |
payload["stream"] = stream
|
|
|
177 |
|
178 |
request_id = f"chatcmpl-{uuid.uuid4()}"
|
179 |
created_time = int(time.time())
|
|
|
183 |
else:
|
184 |
return self._create_non_stream(request_id, created_time, model, payload, user_prompt)
|
185 |
|
186 |
+
def _create_stream(self, request_id, created_time, model, payload, user_prompt) -> Generator[ChatCompletionChunk, None, None]:
|
|
|
|
|
187 |
try:
|
188 |
+
with self._client.session.post(self._client.url, json=payload, stream=True, timeout=self._client.timeout, impersonate="chrome110") as response:
|
189 |
+
if response.status_code == 401:
|
190 |
+
raise exceptions.FailedToGenerateResponseError("Invalid Qodo API key provided.")
|
191 |
+
response.raise_for_status()
|
192 |
+
|
193 |
+
for content_chunk in sanitize_stream(response.iter_content(chunk_size=8192), QodoAI._qodo_extractor):
|
194 |
+
if content_chunk:
|
195 |
+
delta = ChoiceDelta(content=content_chunk, role="assistant")
|
196 |
+
choice = ChoiceStreaming(index=0, delta=delta, finish_reason=None)
|
197 |
+
yield ChatCompletionChunk(id=request_id, choices=[choice], created=created_time, model=model)
|
198 |
+
|
199 |
+
final_delta = ChoiceDelta()
|
200 |
+
final_choice = ChoiceStreaming(index=0, delta=final_delta, finish_reason="stop")
|
201 |
+
yield ChatCompletionChunk(id=request_id, choices=[final_choice], created=created_time, model=model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
except Exception as e:
|
203 |
+
raise exceptions.FailedToGenerateResponseError(f"Stream generation failed: {e}")
|
204 |
|
205 |
+
def _create_non_stream(self, request_id, created_time, model, payload, user_prompt) -> ChatCompletion:
|
|
|
|
|
206 |
try:
|
207 |
payload["stream"] = False
|
208 |
+
response = self._client.session.post(self._client.url, json=payload, timeout=self._client.timeout, impersonate="chrome110")
|
209 |
+
|
|
|
|
|
|
|
|
|
|
|
210 |
if response.status_code == 401:
|
211 |
raise exceptions.FailedToGenerateResponseError("Invalid Qodo API key provided.")
|
212 |
+
response.raise_for_status()
|
|
|
213 |
|
214 |
+
full_response = "".join(list(sanitize_stream(iter([response.content]), QodoAI._qodo_extractor)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
prompt_tokens = len(user_prompt.split())
|
217 |
completion_tokens = len(full_response.split())
|
|
|
218 |
|
219 |
message = ChatCompletionMessage(role="assistant", content=full_response)
|
220 |
choice = Choice(index=0, message=message, finish_reason="stop")
|
221 |
+
usage = CompletionUsage(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=prompt_tokens + completion_tokens)
|
222 |
return ChatCompletion(id=request_id, choices=[choice], created=created_time, model=model, usage=usage)
|
|
|
|
|
|
|
223 |
except Exception as e:
|
224 |
+
raise exceptions.FailedToGenerateResponseError(f"Non-stream generation failed: {e}")
|
225 |
|
226 |
class Chat(BaseChat):
|
227 |
def __init__(self, client: 'QodoAI'):
|
|
|
232 |
|
233 |
def __init__(self, api_key: str, **kwargs: Any):
|
234 |
super().__init__(api_key=api_key, **kwargs)
|
235 |
+
self.url, self.info_url, self.timeout, self.api_key = QODO_URL, QODO_INFO_URL, 600, api_key
|
|
|
|
|
|
|
|
|
|
|
236 |
self.user_agent = "axios/1.10.0"
|
237 |
self.session_id = self._get_session_id()
|
|
|
|
|
238 |
self.headers = {
|
239 |
+
"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json",
|
240 |
+
"User-Agent": self.user_agent, "Session-id": self.session_id
|
|
|
|
|
|
|
241 |
}
|
242 |
+
self.session = Session(headers=self.headers)
|
|
|
|
|
243 |
self.chat = Chat(self)
|
244 |
|
245 |
@staticmethod
|
|
|
247 |
if isinstance(chunk, dict):
|
248 |
data = chunk.get("data", {})
|
249 |
if isinstance(data, dict):
|
250 |
+
content = data.get("content") or (data.get("tool_args", {}) or {}).get("content")
|
251 |
+
if content: return content
|
|
|
|
|
|
|
252 |
return None
|
253 |
|
254 |
def _get_session_id(self) -> str:
|
255 |
try:
|
256 |
+
response = Session(headers={"Authorization": f"Bearer {self.api_key}"}).get(self.info_url, timeout=self.timeout)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
if response.status_code == 200:
|
258 |
return response.json().get("session-id", f"fallback-{uuid.uuid4()}")
|
259 |
elif response.status_code == 401:
|
|
|
264 |
raise exceptions.FailedToGenerateResponseError(f"Failed to connect to Qodo API to get session_id: {e}")
|
265 |
|
266 |
def _build_payload(self, prompt: str, model: str) -> Dict[str, Any]:
|
267 |
+
return {"agent_type": "cli", "session_id": self.session_id, "user_request": prompt, "custom_model": model, "stream": True}
|
268 |
+
|
|
|
|
|
|
|
|
|
|
|
269 |
# --- FastAPI Application ---
|
270 |
|
271 |
app = FastAPI(
|
|
|
274 |
version="1.0.0"
|
275 |
)
|
276 |
|
277 |
+
client: Optional[QodoAI] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
@app.on_event("startup")
|
280 |
+
def startup_event():
|
281 |
+
global client
|
282 |
+
if not QODO_API_KEY:
|
283 |
+
raise RuntimeError("QODO_API_KEY environment variable not set. The server cannot start without an API key.")
|
284 |
+
try:
|
285 |
+
client = QodoAI(api_key=QODO_API_KEY)
|
286 |
+
print("QodoAI client initialized successfully.")
|
287 |
+
except exceptions.FailedToGenerateResponseError as e:
|
288 |
+
raise RuntimeError(f"FATAL: Could not initialize QodoAI client: {e}")
|
289 |
|
290 |
+
@app.get("/v1/models", response_model_exclude_none=True)
|
291 |
async def list_models():
|
292 |
"""Lists the available models from the QodoAI provider."""
|
293 |
+
models_data = [
|
294 |
+
{"id": model_id, "object": "model", "created": int(time.time()), "owned_by": "qodoai"}
|
295 |
+
for model_id in QodoAI.AVAILABLE_MODELS
|
296 |
+
]
|
297 |
+
return {"object": "list", "data": models_data}
|
298 |
|
299 |
@app.post("/v1/chat/completions")
|
300 |
async def create_chat_completion(request: ChatCompletionRequest):
|
301 |
"""Creates a chat completion, supporting both streaming and non-streaming modes."""
|
302 |
if client is None:
|
303 |
+
raise HTTPException(status_code=503, detail="QodoAI client is not available or failed to initialize.")
|
304 |
+
|
305 |
params = request.model_dump(exclude_none=True)
|
306 |
|
307 |
try:
|
|
|
311 |
generator = client.chat.completions.create(**params)
|
312 |
for chunk in generator:
|
313 |
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
|
314 |
except exceptions.FailedToGenerateResponseError as e:
|
315 |
+
error_payload = {"error": {"message": str(e), "type": "api_error", "code": 500}}
|
316 |
yield f"data: {json.dumps(error_payload)}\n\n"
|
317 |
+
finally:
|
318 |
yield "data: [DONE]\n\n"
|
|
|
319 |
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
320 |
else:
|
321 |
response = client.chat.completions.create(**params)
|
|
|
327 |
raise HTTPException(status_code=400, detail=str(e))
|
328 |
|
329 |
if __name__ == "__main__":
|
330 |
+
if not QODO_API_KEY:
|
331 |
+
print("Error: The QODO_API_KEY environment variable must be set.", file=sys.stderr)
|
332 |
+
sys.exit(1)
|
333 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|