mfidabel commited on
Commit
9d6f79c
1 Parent(s): d63587c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ from peft import PeftModel, PeftConfig
5
+ from transformers import WhisperForConditionalGeneration, WhisperTokenizer, WhisperProcessor, AutomaticSpeechRecognitionPipeline
6
+
7
+ peft_model_id = "mfidabel/Modelo_1_Whisper_Large_V3"
8
+ language = "guarani"
9
+ task = "transcribe"
10
+ peft_config = PeftConfig.from_pretrained(peft_model_id)
11
+ model = WhisperForConditionalGeneration.from_pretrained(
12
+ peft_config.base_model_name_or_path, load_in_8bit=True, device_map="auto"
13
+ )
14
+ model = PeftModel.from_pretrained(model, peft_model_id)
15
+ tokenizer = WhisperTokenizer.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)
16
+ processor = WhisperProcessor.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)
17
+ feature_extractor = processor.feature_extractor
18
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task=task)
19
+
20
+ pipeline = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
21
+
22
+ def transcribe(audio):
23
+ if audio is None:
24
+ return "Espera a que la grabación termine de subirse al servidor !! Intentelo de nuevo en unos segundos"
25
+
26
+ sr, y = audio
27
+ y = y.astype(np.float32)
28
+ y /= np.max(np.abs(y))
29
+ with torch.cuda.amp.autocast():
30
+ return pipeline({"sampling_rate": sr, "raw": y}, generate_kwargs={"forced_decoder_ids": forced_decoder_ids}, max_new_tokens=255)["text"]
31
+
32
+ gr.Interface(fn=transcribe, inputs="microphone", outputs="text").launch(share=True)