Steveeeeeeen commited on
Commit
66cbb93
1 Parent(s): f0f7172

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -31,7 +31,7 @@ def compute_wer_table(audio, text):
31
  for model in model_name:
32
  pipe = pipeline("automatic-speech-recognition", model=model_name[model])
33
  transcription = pipe(audio_input)['text']
34
- transcription = "".join([char for char in transcription if char.isalpha() or char.isspace()])
35
  trans.append(transcription)
36
  wer = wer_metric.compute(predictions=[transcription.upper()], references=[text.upper()])
37
  wer_scores.append(wer)
@@ -62,9 +62,9 @@ with gr.Blocks() as demo:
62
  "Lower WER scores indicate better performance."
63
  "\n\n| Model | WER |\n"
64
  "|--------------------------|--------------------------|\n"
65
- "| [whisper-tiny](https://huggingface.co/openai/whisper-tiny.en) | 0.06052 |\n"
66
- "| [wav2vec2-large-960h](https://huggingface.co/facebook/wav2vec2-large-960h) | 0.02201 |\n"
67
- "| [distill-whisper-small](https://huggingface.co/distil-whisper/distil-small.en)| 0.03959 |\n"
68
  "\n\n### Data Source\n"
69
  "The data used in this demo is a subset of the [LibriSpeech](https://huggingface.co/datasets/openslr/librispeech_asr) dataset which contains the first 100 audio samples and their corresponding reference texts in the validation set."
70
  ),
 
31
  for model in model_name:
32
  pipe = pipeline("automatic-speech-recognition", model=model_name[model])
33
  transcription = pipe(audio_input)['text']
34
+ transcription = transcription.replace(",", "").replace(".", "").replace("!", "").replace("?", "")
35
  trans.append(transcription)
36
  wer = wer_metric.compute(predictions=[transcription.upper()], references=[text.upper()])
37
  wer_scores.append(wer)
 
62
  "Lower WER scores indicate better performance."
63
  "\n\n| Model | WER |\n"
64
  "|--------------------------|--------------------------|\n"
65
+ "| [whisper-tiny](https://huggingface.co/openai/whisper-tiny.en) | 0.05511 |\n"
66
+ "| [wav2vec2-large-960h](https://huggingface.co/facebook/wav2vec2-large-960h) | 0.01617 |\n"
67
+ "| [distill-whisper-small](https://huggingface.co/distil-whisper/distil-small.en)| 0.03686 |\n"
68
  "\n\n### Data Source\n"
69
  "The data used in this demo is a subset of the [LibriSpeech](https://huggingface.co/datasets/openslr/librispeech_asr) dataset which contains the first 100 audio samples and their corresponding reference texts in the validation set."
70
  ),