import streamlit as st import speech_recognition as sr from transformers import pipeline import re # Load NLP models summarizer = pipeline("summarization", model="facebook/bart-large-cnn") classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") def extract_info(text): candidate_labels = ["project status", "risks", "questions", "administration"] result = classifier(text, candidate_labels) return result def normalize_text(text): text = text.lower() text = re.sub(r'\s+', ' ', text) return text st.title("Audio to Text Processing and Categorization") audio_file = st.file_uploader("Upload an audio file", type=["wav"]) if audio_file is not None: st.audio(audio_file, format='audio/wav') # Convert audio to text recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio_data = recognizer.record(source) text = recognizer.recognize_google(audio_data) st.write("Transcribed Text:") st.write(text) # NLP processing summary = summarizer(text, max_length=150, min_length=30, do_sample=False) st.write("Summarized Text:") st.write(summary[0]['summary_text']) # Information extraction extracted_info = extract_info(summary[0]['summary_text']) st.write("Extracted Information:") st.write(extracted_info) # Text normalization normalized_text = normalize_text(str(extracted_info)) st.write("Normalized Text:") st.write(normalized_text)