KingNish commited on
Commit
ed9ac19
1 Parent(s): fb74874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -39
app.py CHANGED
@@ -20,7 +20,7 @@ import urllib
20
  import PIL.Image
21
  import io
22
  import datasets
23
-
24
  import gradio as gr
25
  from transformers import TextIteratorStreamer
26
  from transformers import Idefics2ForConditionalGeneration
@@ -70,39 +70,31 @@ theme = gr.themes.Soft(
70
  background_fill_secondary_dark="#111111",
71
  color_accent_soft_dark="transparent")
72
 
73
- MODEL_NAME = "openai/whisper-medium"
74
- BATCH_SIZE = 10
75
-
76
- device = 0 if torch.cuda.is_available() else "cpu"
77
 
78
- pipe = pipeline(
79
- task="automatic-speech-recognition",
80
- model=MODEL_NAME,
81
- chunk_length_s=30,
82
- device=device,
83
- )
84
-
85
- @spaces.GPU(queue=False)
86
- def transcribe(inputs):
87
- if inputs is None:
88
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
89
 
90
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"})["text"]
91
- return text
 
 
 
92
 
93
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
94
 
95
  def client_fn(model):
96
- if "Mixtral" in model:
97
- return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
98
- elif "Llama" in model:
99
- return InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
100
  elif "Mistral" in model:
101
  return InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
102
  elif "Phi" in model:
103
  return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
 
 
104
  else:
105
- return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
106
 
107
  def randomize_seed_fn(seed: int) -> int:
108
  seed = random.randint(0, 999999)
@@ -117,16 +109,12 @@ def models(text, model="Mixtral 8x7B", seed=42):
117
 
118
  client = client_fn(model)
119
  generate_kwargs = dict(
120
- temperature=0.7,
121
  max_new_tokens=512,
122
- top_p=0.95,
123
- repetition_penalty=1,
124
- do_sample=True,
125
  seed=seed,
126
  )
127
 
128
  formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
129
- stream = client1.text_generation(
130
  formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
131
  output = ""
132
  for response in stream:
@@ -135,9 +123,9 @@ def models(text, model="Mixtral 8x7B", seed=42):
135
 
136
  return output
137
 
138
- async def respond(audio):
139
  user = transcribe(audio)
140
- reply = model(user)
141
  communicate = edge_tts.Communicate(reply)
142
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
143
  tmp_path = tmp_file.name
@@ -548,14 +536,7 @@ with gr.Blocks(
548
 
549
  with gr.Blocks() as voice:
550
  with gr.Row():
551
- select = gr.Dropdown([ 'Mixtral 8x7B',
552
- 'Llama 3 8B',
553
- 'Mistral 7B v0.3',
554
- 'Phi 3 mini',
555
- ],
556
- value="Mixtral 8x7B",
557
- label="Model"
558
- )
559
  seed = gr.Slider(
560
  label="Seed",
561
  minimum=0,
@@ -571,7 +552,7 @@ with gr.Blocks() as voice:
571
  elem_classes="audio")
572
  gr.Interface(
573
  fn=respond,
574
- inputs=[input],
575
  outputs=[output], api_name="translate", live=True)
576
 
577
  with gr.Blocks() as livechat:
 
20
  import PIL.Image
21
  import io
22
  import datasets
23
+ from streaming_stt_nemo import Model as nemo
24
  import gradio as gr
25
  from transformers import TextIteratorStreamer
26
  from transformers import Idefics2ForConditionalGeneration
 
70
  background_fill_secondary_dark="#111111",
71
  color_accent_soft_dark="transparent")
72
 
73
+ default_lang = "en"
 
 
 
74
 
75
+ engines = { default_lang: nemo(default_lang) }
 
 
 
 
 
 
 
 
 
 
76
 
77
+ def transcribe(audio):
78
+ lang = "en"
79
+ model = engines[lang]
80
+ text = model.stt_file(audio)[0]
81
+ return text
82
 
83
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
84
 
85
  def client_fn(model):
86
+ if "Nous" in model:
87
+ return InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
88
+ elif "Star" in model:
89
+ return InferenceClient("HuggingFaceH4/starchat2-15b-v0.1")
90
  elif "Mistral" in model:
91
  return InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
92
  elif "Phi" in model:
93
  return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
94
+ elif "Zephyr" in model:
95
+ return InferenceClient("HuggingFaceH4/zephyr-7b-beta")
96
  else:
97
+ return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
98
 
99
  def randomize_seed_fn(seed: int) -> int:
100
  seed = random.randint(0, 999999)
 
109
 
110
  client = client_fn(model)
111
  generate_kwargs = dict(
 
112
  max_new_tokens=512,
 
 
 
113
  seed=seed,
114
  )
115
 
116
  formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
117
+ stream = client.text_generation(
118
  formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
119
  output = ""
120
  for response in stream:
 
123
 
124
  return output
125
 
126
+ async def respond(audio, model, seed):
127
  user = transcribe(audio)
128
+ reply = models(user, model, seed)
129
  communicate = edge_tts.Communicate(reply)
130
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
131
  tmp_path = tmp_file.name
 
536
 
537
  with gr.Blocks() as voice:
538
  with gr.Row():
539
+ select = gr.Dropdown([ 'Nous Hermes Mixtral 8x7B DPO', 'Mixtral 8x7B','StarChat2 15b','Mistral 7B v0.3','Phi 3 mini', 'Zephyr 7b' ], value="Mistral 7B v0.3", label="Select Model")
 
 
 
 
 
 
 
540
  seed = gr.Slider(
541
  label="Seed",
542
  minimum=0,
 
552
  elem_classes="audio")
553
  gr.Interface(
554
  fn=respond,
555
+ inputs=[input, select,seed],
556
  outputs=[output], api_name="translate", live=True)
557
 
558
  with gr.Blocks() as livechat: