freddyaboulton HF Staff commited on
Commit
22d4d93
·
verified ·
1 Parent(s): be8c5f4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ from dotenv import load_dotenv
4
+ from fastrtc import (
5
+ ReplyOnPause,
6
+ Stream,
7
+ AdditionalOutputs,
8
+ get_current_context,
9
+ get_hf_turn_credentials,
10
+ get_hf_turn_credentials_async,
11
+ get_stt_model,
12
+ get_tts_model,
13
+ WebRTCError,
14
+ )
15
+ import gradio as gr
16
+ from huggingface_hub import InferenceClient
17
+
18
+ load_dotenv()
19
+
20
+ stt_model = get_stt_model()
21
+ tts_model = get_tts_model()
22
+
23
+ conversations: dict[str, list[dict[str, str]]] = {}
24
+
25
+
26
+ def response(
27
+ audio: tuple[int, np.ndarray],
28
+ hf_token: str | None,
29
+ ):
30
+ if hf_token is None:
31
+ raise WebRTCError("HF Token is required")
32
+ llm_client = InferenceClient(
33
+ provider="groq",
34
+ api_key=hf_token,
35
+ )
36
+ context = get_current_context()
37
+ print("context", context)
38
+ if context.webrtc_id not in conversations:
39
+ conversations[context.webrtc_id] = [
40
+ {
41
+ "role": "system",
42
+ "content": (
43
+ "You are a helpful assistant that can have engaging conversations."
44
+ "Your responses must be very short and concise. No more than two sentences. "
45
+ ),
46
+ }
47
+ ]
48
+
49
+ messages = conversations[context.webrtc_id]
50
+
51
+ transcription = stt_model.stt(audio)
52
+ messages.append({"role": "user", "content": transcription})
53
+
54
+ output = llm_client.chat.completions.create( # type: ignore
55
+ model="openai/gpt-oss-20b",
56
+ messages=messages, # type: ignore
57
+ max_tokens=1024,
58
+ stream=True,
59
+ )
60
+
61
+ output_text = ""
62
+ for chunk in output:
63
+ output_text += chunk.choices[0].delta.content or ""
64
+
65
+ messages.append({"role": "assistant", "content": output_text})
66
+ conversations[context.webrtc_id] = messages
67
+ yield from tts_model.stream_tts_sync(output_text)
68
+ yield AdditionalOutputs(messages)
69
+
70
+
71
+ chatbot = gr.Chatbot(label="Chatbot", type="messages")
72
+ token = gr.Textbox(
73
+ label="HF Token",
74
+ value="",
75
+ type="password",
76
+ )
77
+ stream = Stream(
78
+ modality="audio",
79
+ mode="send-receive",
80
+ handler=ReplyOnPause(response),
81
+ server_rtc_configuration=get_hf_turn_credentials(),
82
+ rtc_configuration=get_hf_turn_credentials_async,
83
+ additional_inputs=[token],
84
+ additional_outputs=[chatbot],
85
+ additional_outputs_handler=lambda old, new: new,
86
+ ui_args={"title": "Talk To OpenAI GPT-OSS 20B (Powered by FastRTC ⚡️)"},
87
+ )
88
+
89
+ stream.ui.launch()