freddyaboulton HF Staff commited on
Commit
ae054df
·
verified ·
1 Parent(s): a67cb3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -6
app.py CHANGED
@@ -1,19 +1,43 @@
1
- import numpy as np
2
  import gradio as gr
 
 
3
  from dotenv import load_dotenv
4
  from fastrtc import (
 
5
  ReplyOnPause,
6
  Stream,
7
- AdditionalOutputs,
 
8
  get_current_context,
9
  get_hf_turn_credentials,
10
  get_hf_turn_credentials_async,
11
  get_stt_model,
12
  get_tts_model,
13
- WebRTCError,
14
  )
15
- import gradio as gr
16
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  load_dotenv()
19
 
@@ -30,7 +54,12 @@ def response(
30
  if hf_token is None or hf_token == "":
31
  raise WebRTCError("HF Token is required")
32
 
33
- llm_client = InferenceClient(provider="groq", token=hf_token)
 
 
 
 
 
34
 
35
  context = get_current_context()
36
  if context.webrtc_id not in conversations:
@@ -46,7 +75,6 @@ def response(
46
 
47
  messages = conversations[context.webrtc_id]
48
 
49
- transcription = stt_model.stt(audio)
50
  messages.append({"role": "user", "content": transcription})
51
 
52
  output = llm_client.chat.completions.create( # type: ignore
@@ -82,6 +110,8 @@ stream = Stream(
82
  additional_outputs=[chatbot],
83
  additional_outputs_handler=lambda old, new: new,
84
  ui_args={"title": "Talk To OpenAI GPT-OSS 20B (Powered by FastRTC ⚡️)"},
 
 
85
  )
86
 
87
  stream.ui.launch()
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ import torch
4
  from dotenv import load_dotenv
5
  from fastrtc import (
6
+ AdditionalOutputs,
7
  ReplyOnPause,
8
  Stream,
9
+ WebRTCError,
10
+ audio_to_float32,
11
  get_current_context,
12
  get_hf_turn_credentials,
13
  get_hf_turn_credentials_async,
14
  get_stt_model,
15
  get_tts_model,
 
16
  )
 
17
  from huggingface_hub import InferenceClient
18
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
19
+
20
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
21
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
22
+
23
+ model_id = "openai/whisper-large-v3-turbo"
24
+
25
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
26
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
27
+ )
28
+ model.to(device)
29
+
30
+ processor = AutoProcessor.from_pretrained(model_id)
31
+
32
+ pipe = pipeline(
33
+ "automatic-speech-recognition",
34
+ model=model,
35
+ tokenizer=processor.tokenizer,
36
+ feature_extractor=processor.feature_extractor,
37
+ torch_dtype=torch_dtype,
38
+ device=device,
39
+ )
40
+
41
 
42
  load_dotenv()
43
 
 
54
  if hf_token is None or hf_token == "":
55
  raise WebRTCError("HF Token is required")
56
 
57
+ llm_client = InferenceClient(provider="auto", token=hf_token)
58
+
59
+ result = pipe(
60
+ {"array": audio_to_float32(audio[1]).squeeze(), "sampling_rate": audio[0]}
61
+ )
62
+ transcription = result["text"]
63
 
64
  context = get_current_context()
65
  if context.webrtc_id not in conversations:
 
75
 
76
  messages = conversations[context.webrtc_id]
77
 
 
78
  messages.append({"role": "user", "content": transcription})
79
 
80
  output = llm_client.chat.completions.create( # type: ignore
 
110
  additional_outputs=[chatbot],
111
  additional_outputs_handler=lambda old, new: new,
112
  ui_args={"title": "Talk To OpenAI GPT-OSS 20B (Powered by FastRTC ⚡️)"},
113
+ time_limit=90,
114
+ concurrency_limit=5,
115
  )
116
 
117
  stream.ui.launch()