Spaces:

Sabbah13
/

text_transcribation_diarization_and_summarization

Running on A10G

text_transcribation_diarization_and_summarization

File size: 3,804 Bytes

8fb0be5
ee531be
59f6126
 
024f740
e8a4c9c
024f740
4b331f0
 
 
e8a4c9c
 
 
 
ee531be
4b331f0
 
7dc42bb
04cecab
4b331f0
29a10e5
4b331f0
04cecab
791adc1
4b331f0
ee531be
4b331f0
ee531be
4b331f0
 
 
 
ee531be
4b331f0
 
ee531be
4b331f0
 
 
 
 
 
 
4380489
4b331f0
 
 
4380489
4b331f0
 
 
ee531be
4b331f0
 
 
791adc1
4b331f0
a8c8823
4b331f0
 
e8a4c9c
cdf8637
97cd0ed
e8a4c9c
4b331f0
 
791adc1
ee531be
791adc1
4b331f0
 
ee531be
4b331f0

import os
import streamlit as st
import whisperx
import torch
from utils import convert_segments_object_to_text, check_password
from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens

if check_password():    
    st.title('Audio Transcription App')
    st.sidebar.title("Settings")
    
    device = os.getenv('DEVICE')
    batch_size = int(os.getenv('BATCH_SIZE'))
    compute_type = os.getenv('COMPUTE_TYPE')

    initial_giga_base_prompt = os.getenv('GIGA_BASE_PROMPT')
    initial_giga_processing_prompt = os.getenv('GIGA_PROCCESS_PROMPT')

    giga_base_prompt = st.sidebar.text_area("Промпт для резюмирования", value=initial_giga_base_prompt)
    giga_max_tokens = st.sidebar.number_input("Максимальное количество токенов при резюмировании", min_value=1, value=1024)

    enable_summarization = st.sidebar.checkbox("Добавить обработку транскрибации", value=False)
    giga_processing_prompt = st.sidebar.text_area("Промпт для обработки транскрибации", value=initial_giga_processing_prompt)

    ACCESS_TOKEN = st.secrets["HF_TOKEN"]

    uploaded_file = st.file_uploader("Загрузите аудиофайл", type=["mp4", "wav", "m4a"])

    if uploaded_file is not None:
        st.audio(uploaded_file)
        file_extension = uploaded_file.name.split(".")[-1]  # Получаем расширение файла
        temp_file_path = f"temp_file.{file_extension}"  # Создаем временное имя файла с правильным расширением
    
        with open(temp_file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
    
        with st.spinner('Транскрибируем...'):
            # Load model
            model = whisperx.load_model(os.getenv('WHISPER_MODEL_SIZE'), device, compute_type=compute_type)
            # Load and transcribe audio
            audio = whisperx.load_audio(temp_file_path)
            result = model.transcribe(audio, batch_size=batch_size, language="ru")
            print('Transcribed, now aligning')
        
            model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
            result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
            print('Aligned, now diarizing')
        
            diarize_model = whisperx.DiarizationPipeline(use_auth_token=st.secrets["HF_TOKEN"], device=device)
            diarize_segments = diarize_model(audio)
            result_diar = whisperx.assign_word_speakers(diarize_segments, result)
        
        st.write("Результат транскрибации:")
        transcript = convert_segments_object_to_text(result_diar)
        st.text(transcript)

        access_token = get_access_token()
    
        if (enable_summarization):
            with st.spinner('Обрабатываем транскрибацию...'):
                number_of_tokens = get_number_of_tokens(transcript, access_token)
                print('Количество токенов в транскрибации: ' + str(number_of_tokens))
                transcript = get_completion_from_gigachat(giga_processing_prompt + transcript, number_of_tokens + 500, access_token)
                
                st.write("Результат обработки:")
                st.text(transcript)

        
    
        with st.spinner('Резюмируем...'):
            summary_answer = get_completion_from_gigachat(giga_base_prompt + transcript, giga_max_tokens, access_token)
        
            st.write("Результат резюмирования:")
            st.text(summary_answer)