--- base_model: - openai/whisper-large-v3-turbo datasets: - mozilla-foundation/common_voice_17_0 - google/fleurs language: - th library_name: transformers pipeline_tag: automatic-speech-recognition --- ```python from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import torch MODEL_NAME = "FILM6912/whisper-large-v3-turbo-thai" device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 model = AutoModelForSpeechSeq2Seq.from_pretrained( MODEL_NAME, torch_dtype=torch_dtype, # low_cpu_mem_usage=True, # use_safetensors=True, ) model.to(device) processor = AutoProcessor.from_pretrained(MODEL_NAME) whisper = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, max_new_tokens=128, torch_dtype=torch_dtype, device=device, ) whisper("c.mp3", chunk_length_s=30, stride_length_s=5, batch_size=16, return_timestamps=True, generate_kwargs = {"language":"th"} ) ```