Spaces:

fastrtc
/

talk-to-oai-gpt-oss-20b

Running

App Files Files Community

freddyaboulton HF Staff commited on 15 days ago

Commit

22d4d93

verified ·

1 Parent(s): be8c5f4

Create app.py

Browse files

Files changed (1) hide show

app.py +89 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import numpy as np
+import gradio as gr
+from dotenv import load_dotenv
+from fastrtc import (
+    ReplyOnPause,
+    Stream,
+    AdditionalOutputs,
+    get_current_context,
+    get_hf_turn_credentials,
+    get_hf_turn_credentials_async,
+    get_stt_model,
+    get_tts_model,
+    WebRTCError,
+)
+import gradio as gr
+from huggingface_hub import InferenceClient
+load_dotenv()
+stt_model = get_stt_model()
+tts_model = get_tts_model()
+conversations: dict[str, list[dict[str, str]]] = {}
+def response(
+    audio: tuple[int, np.ndarray],
+    hf_token: str | None,
+):
+    if hf_token is None:
+        raise WebRTCError("HF Token is required")
+    llm_client = InferenceClient(
+        provider="groq",
+        api_key=hf_token,
+    )
+    context = get_current_context()
+    print("context", context)
+    if context.webrtc_id not in conversations:
+        conversations[context.webrtc_id] = [
+            {
+                "role": "system",
+                "content": (
+                    "You are a helpful assistant that can have engaging conversations."
+                    "Your responses must be very short and concise. No more than two sentences. "
+                ),
+            }
+        ]
+    messages = conversations[context.webrtc_id]
+    transcription = stt_model.stt(audio)
+    messages.append({"role": "user", "content": transcription})
+    output = llm_client.chat.completions.create(  # type: ignore
+        model="openai/gpt-oss-20b",
+        messages=messages,  # type: ignore
+        max_tokens=1024,
+        stream=True,
+    )
+    output_text = ""
+    for chunk in output:
+        output_text += chunk.choices[0].delta.content or ""
+    messages.append({"role": "assistant", "content": output_text})
+    conversations[context.webrtc_id] = messages
+    yield from tts_model.stream_tts_sync(output_text)
+    yield AdditionalOutputs(messages)
+chatbot = gr.Chatbot(label="Chatbot", type="messages")
+token = gr.Textbox(
+    label="HF Token",
+    value="",
+    type="password",
+)
+stream = Stream(
+    modality="audio",
+    mode="send-receive",
+    handler=ReplyOnPause(response),
+    server_rtc_configuration=get_hf_turn_credentials(),
+    rtc_configuration=get_hf_turn_credentials_async,
+    additional_inputs=[token],
+    additional_outputs=[chatbot],
+    additional_outputs_handler=lambda old, new: new,
+    ui_args={"title": "Talk To OpenAI GPT-OSS 20B (Powered by FastRTC ⚡️)"},
+)
+stream.ui.launch()