adriszmar commited on
Commit
f194a66
1 Parent(s): 88c92b0
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import speech_recognition as sr
3
+ from transformers import pipeline
4
+ import re
5
+
6
+ # Load NLP models
7
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
9
+
10
+ def extract_info(text):
11
+ candidate_labels = ["project status", "risks", "questions", "administration"]
12
+ result = classifier(text, candidate_labels)
13
+ return result
14
+
15
+ def normalize_text(text):
16
+ text = text.lower()
17
+ text = re.sub(r'\s+', ' ', text)
18
+ return text
19
+
20
+ st.title("Audio to Text Processing and Categorization")
21
+
22
+ audio_file = st.file_uploader("Upload an audio file", type=["wav"])
23
+
24
+ if audio_file is not None:
25
+ st.audio(audio_file, format='audio/wav')
26
+
27
+ # Convert audio to text
28
+ recognizer = sr.Recognizer()
29
+ with sr.AudioFile(audio_file) as source:
30
+ audio_data = recognizer.record(source)
31
+ text = recognizer.recognize_google(audio_data)
32
+
33
+ st.write("Transcribed Text:")
34
+ st.write(text)
35
+
36
+ # NLP processing
37
+ summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
38
+
39
+ st.write("Summarized Text:")
40
+ st.write(summary[0]['summary_text'])
41
+
42
+ # Information extraction
43
+ extracted_info = extract_info(summary[0]['summary_text'])
44
+
45
+ st.write("Extracted Information:")
46
+ st.write(extracted_info)
47
+
48
+ # Text normalization
49
+ normalized_text = normalize_text(str(extracted_info))
50
+
51
+ st.write("Normalized Text:")
52
+ st.write(normalized_text)