Spaces:
Running
on
A10G
Running
on
A10G
File size: 3,804 Bytes
8fb0be5 ee531be 59f6126 024f740 e8a4c9c 024f740 4b331f0 e8a4c9c ee531be 4b331f0 7dc42bb 04cecab 4b331f0 29a10e5 4b331f0 04cecab 791adc1 4b331f0 ee531be 4b331f0 ee531be 4b331f0 ee531be 4b331f0 ee531be 4b331f0 4380489 4b331f0 4380489 4b331f0 ee531be 4b331f0 791adc1 4b331f0 a8c8823 4b331f0 e8a4c9c cdf8637 97cd0ed e8a4c9c 4b331f0 791adc1 ee531be 791adc1 4b331f0 ee531be 4b331f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
import streamlit as st
import whisperx
import torch
from utils import convert_segments_object_to_text, check_password
from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens
if check_password():
st.title('Audio Transcription App')
st.sidebar.title("Settings")
device = os.getenv('DEVICE')
batch_size = int(os.getenv('BATCH_SIZE'))
compute_type = os.getenv('COMPUTE_TYPE')
initial_giga_base_prompt = os.getenv('GIGA_BASE_PROMPT')
initial_giga_processing_prompt = os.getenv('GIGA_PROCCESS_PROMPT')
giga_base_prompt = st.sidebar.text_area("Промпт для резюмирования", value=initial_giga_base_prompt)
giga_max_tokens = st.sidebar.number_input("Максимальное количество токенов при резюмировании", min_value=1, value=1024)
enable_summarization = st.sidebar.checkbox("Добавить обработку транскрибации", value=False)
giga_processing_prompt = st.sidebar.text_area("Промпт для обработки транскрибации", value=initial_giga_processing_prompt)
ACCESS_TOKEN = st.secrets["HF_TOKEN"]
uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"])
if uploaded_file is not None:
st.audio(uploaded_file)
file_extension = uploaded_file.name.split(".")[-1] # Получаем расширение файла
temp_file_path = f"temp_file.{file_extension}" # Создаем временное имя файла с правильным расширением
with open(temp_file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
with st.spinner('Транскрибируем...'):
# Load model
model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type)
# Load and transcribe audio
audio = whisperx.load_audio(temp_file_path)
result = model.transcribe(audio, batch_size=batch_size, language="ru")
print('Transcribed, now aligning')
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
print('Aligned, now diarizing')
diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device)
diarize_segments = diarize_model(audio)
result_diar = whisperx.assign_word_speakers(diarize_segments, result)
st.write("Результат транскрибации:")
transcript = convert_segments_object_to_text(result_diar)
st.text(transcript)
access_token = get_access_token()
if (enable_summarization):
with st.spinner('Обрабатываем транскрибацию...'):
number_of_tokens = get_number_of_tokens(transcript, access_token)
print('Количество токенов в транскрибации: ' + str(number_of_tokens))
transcript = get_completion_from_gigachat(giga_processing_prompt + transcript, number_of_tokens + 500, access_token)
st.write("Результат обработки:")
st.text(transcript)
with st.spinner('Резюмируем...'):
summary_answer = get_completion_from_gigachat(giga_base_prompt + transcript, giga_max_tokens, access_token)
st.write("Результат резюмирования:")
st.text(summary_answer) |