whisperrrr / app.py
SAadettin-BERber's picture
Update app.py
5e65b59 verified
# -*- coding: utf-8 -*-
"""Demo.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/14CzvGJOpNIGSyyH_fI5Q54ovm-TZCEHX
"""
import gradio as gr
from transformers import AutoProcessor
from transformers import WhisperForConditionalGeneration, WhisperProcessor
from pydub import AudioSegment
import torch
import os
import gradio as gr
import librosa
import numpy as np
# Model yükleme (CPU)
model_id = "SAadettin-BERber/whisper_small_model_atc_10"
adapter_id = "SAadettin-BERber/whisper-small_atc_10"
#merge yap
#model.load_adapter(adapter_id)
from transformers import pipeline
MODEL_OPTIONS = {
"İnce Ayar Çekilmiş Whisper Small": "SAadettin-BERber/normalize_whisper-small_atc_shuffle_3",
"İnce Ayar Çekilmiş Large Turbo": "SAadettin-BERber/normalize_whisper-large-v3-turbo_atc_shuffle_1",
"Orijinal Whisper Small": "unsloth/whisper-small",
"Orijinal Whisper Turbo": "unsloth/whisper-large-v3-turbo",
}
def transcribe_choice(audio_file, selected_model_name):
model_id = MODEL_OPTIONS[selected_model_name]
# Bilgilendirme
#status_text = f"📦 '{selected_model_name}' modeli yükleniyor..."
pipe = pipeline("automatic-speech-recognition", model=model_id, device=-1)
audio = AudioSegment.from_file(audio_file)
segment_length = 10_000 # 10 saniye
results = []
for i in range(0, len(audio), segment_length):
segment = audio[i:i + segment_length]
temp_path = f"temp_{i//1000}.wav"
segment.export(temp_path, format="wav")
output = pipe(temp_path)
results.append(output["text"])
os.remove(temp_path)
transcription = " ".join(results)
return transcription
with gr.Blocks() as demo:
gr.Markdown("## ✨ Whisper Model Transcriber with Multiple Model Support")
audio_input = gr.Audio(type="filepath", label="Ses Dosyası")
model_choice = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Model Seçin")
#status_box = gr.Textbox(label="Durum", interactive=False)
output_box = gr.Textbox(label="Transkripsiyon")
transcribe_btn = gr.Button("Transkribe Et")
transcribe_btn.click(
fn=transcribe_choice,
inputs=[audio_input, model_choice],
outputs=[output_box]
)
demo.launch()