|
from speechbox import PunctuationRestorer |
|
from datasets import load_dataset |
|
|
|
streamed_dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True) |
|
|
|
|
|
sample = next(iter(streamed_dataset)) |
|
|
|
|
|
print(sample["text"]) |
|
|
|
|
|
|
|
restorer = PunctuationRestorer.from_pretrained("openai/whisper-tiny.en") |
|
restorer.to("cuda") |
|
|
|
restored_text, log_probs = restorer(sample["audio"]["array"], sample["text"], sampling_rate=sample["audio"]["sampling_rate"], num_beams=1) |
|
|
|
print("Restored text:\n", restored_text) |