Spaces:
Runtime error
Runtime error
akashkumarbtc
commited on
Commit
•
4519e61
1
Parent(s):
7f0685b
app.py file
Browse files
app.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import wave
|
3 |
+
import nltk
|
4 |
+
import torch
|
5 |
+
import torch
|
6 |
+
import openai
|
7 |
+
import whisper
|
8 |
+
import datetime
|
9 |
+
import requests
|
10 |
+
import subprocess
|
11 |
+
import contextlib
|
12 |
+
import numpy as np
|
13 |
+
import gradio as gr
|
14 |
+
from pyannote.audio import Audio
|
15 |
+
from pyannote.core import Segment
|
16 |
+
from sklearn.cluster import AgglomerativeClustering
|
17 |
+
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
18 |
+
from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
|
19 |
+
|
20 |
+
|
21 |
+
embedding_model = PretrainedSpeakerEmbedding(
|
22 |
+
"speechbrain/spkrec-ecapa-voxceleb",
|
23 |
+
device=torch.device("cpu"))
|
24 |
+
|
25 |
+
nltk.download('vader_lexicon')
|
26 |
+
sid = SentimentIntensityAnalyzer()
|
27 |
+
model = whisper.load_model('models/medium.pt')
|
28 |
+
audio = Audio()
|
29 |
+
openai.api_key = os.environ['OPEN_AI_API_KEY']
|
30 |
+
|
31 |
+
example_files = [
|
32 |
+
"https://pdf.bluetickconsultants.com/customer_support.mp3",
|
33 |
+
"https://pdf.bluetickconsultants.com/product_refund.mp3",
|
34 |
+
]
|
35 |
+
|
36 |
+
|
37 |
+
file_names = []
|
38 |
+
|
39 |
+
|
40 |
+
def download_file(url, save_name):
|
41 |
+
url = url
|
42 |
+
if not os.path.exists(save_name):
|
43 |
+
file = requests.get(url)
|
44 |
+
open(save_name, 'wb').write(file.content)
|
45 |
+
|
46 |
+
|
47 |
+
for url in example_files:
|
48 |
+
save_name = str(url).split("/")[-1]
|
49 |
+
download_file(url, str(url).split("/")[-1])
|
50 |
+
file_names.append([save_name, 2])
|
51 |
+
|
52 |
+
|
53 |
+
def segment_embedding(segment, duration, audio_file):
|
54 |
+
start = segment["start"]
|
55 |
+
# Whisper overshoots the end timestamp in the last segment
|
56 |
+
end = min(duration, segment["end"])
|
57 |
+
clip = Segment(start, end)
|
58 |
+
waveform, sample_rate = audio.crop(audio_file, clip)
|
59 |
+
waveform = waveform.mean(dim=0, keepdim=True)
|
60 |
+
return embedding_model(waveform.unsqueeze(0))
|
61 |
+
|
62 |
+
|
63 |
+
def speech_to_text_and_sentiment(audio_file, number_of_speakers=2):
|
64 |
+
|
65 |
+
if audio_file[-3:] != 'wav':
|
66 |
+
audio_file_name = audio_file.split("/")[-1]
|
67 |
+
audio_file_name = audio_file_name.split(".")[0] + ".wav"
|
68 |
+
subprocess.call(['ffmpeg', '-i', audio_file, audio_file_name, '-y'])
|
69 |
+
audio_file = audio_file_name
|
70 |
+
|
71 |
+
result = model.transcribe(audio_file)
|
72 |
+
segments = result["segments"]
|
73 |
+
|
74 |
+
with contextlib.closing(wave.open(audio_file, 'r')) as f:
|
75 |
+
frames = f.getnframes()
|
76 |
+
rate = f.getframerate()
|
77 |
+
duration = frames / float(rate)
|
78 |
+
|
79 |
+
embeddings = np.zeros(shape=(len(segments), 192))
|
80 |
+
for i, segment in enumerate(segments):
|
81 |
+
embeddings[i] = segment_embedding(segment, duration, audio_file)
|
82 |
+
|
83 |
+
embeddings = np.nan_to_num(embeddings)
|
84 |
+
|
85 |
+
clustering = AgglomerativeClustering(
|
86 |
+
int(number_of_speakers)).fit(embeddings)
|
87 |
+
labels = clustering.labels_
|
88 |
+
for i in range(len(segments)):
|
89 |
+
segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
|
90 |
+
|
91 |
+
def time(secs):
|
92 |
+
return datetime.timedelta(seconds=round(secs))
|
93 |
+
|
94 |
+
conv = ""
|
95 |
+
|
96 |
+
for (i, segment) in enumerate(segments):
|
97 |
+
if i == 0 or segments[i - 1]["speaker"] != segment["speaker"]:
|
98 |
+
conv += "\n" + segment["speaker"] + ' ' + \
|
99 |
+
str(time(segment["start"])) + '\n'
|
100 |
+
conv += segment["text"][1:] + ' '
|
101 |
+
|
102 |
+
sentiment_scores = sid.polarity_scores(conv)
|
103 |
+
|
104 |
+
messages = [
|
105 |
+
{
|
106 |
+
"role": "system",
|
107 |
+
"content": """You will be provided with a conversation. Your task is to give a summary and mention all the main details in bullet points.
|
108 |
+
Replace speaker 1 and speaker 2 with sales excutive or comapny name and customer name if available.
|
109 |
+
"""
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"role": "user",
|
113 |
+
"content": conv
|
114 |
+
}
|
115 |
+
]
|
116 |
+
|
117 |
+
response = openai.ChatCompletion.create(
|
118 |
+
model="gpt-3.5-turbo",
|
119 |
+
messages=messages,
|
120 |
+
temperature=0,
|
121 |
+
max_tokens=1000,
|
122 |
+
top_p=1,
|
123 |
+
frequency_penalty=0,
|
124 |
+
presence_penalty=0
|
125 |
+
)
|
126 |
+
call_summary = ""
|
127 |
+
call_summary += f"Sentiment Analysis:\nPositive: {sentiment_scores['pos']} | Negative: {sentiment_scores['neg']} | Neutral: {sentiment_scores['neu']}\n\n"
|
128 |
+
call_summary += response["choices"][0]["message"]["content"]
|
129 |
+
|
130 |
+
return conv, call_summary
|
131 |
+
|
132 |
+
|
133 |
+
demo = gr.Interface(
|
134 |
+
title="Bluetick Sales Call Evaluator",
|
135 |
+
description="Upload a sales call audio file and get a transcription of the call along with sentiment analysis",
|
136 |
+
fn=speech_to_text_and_sentiment,
|
137 |
+
inputs=[
|
138 |
+
gr.Audio(label="Select audio file", type="filepath"),
|
139 |
+
gr.Number(label="Select number of speakers (1-5)",
|
140 |
+
default=2, type="number", min=1, max=5)
|
141 |
+
],
|
142 |
+
outputs=[
|
143 |
+
gr.Textbox(label="Transcript"),
|
144 |
+
gr.Textbox(label="Analysis")
|
145 |
+
],
|
146 |
+
examples=file_names,
|
147 |
+
theme=gr.themes.Default(primary_hue=gr.themes.colors.red,
|
148 |
+
secondary_hue=gr.themes.colors.pink),
|
149 |
+
css=" .gradio-title, .gradio-description {color: black;}",
|
150 |
+
|
151 |
+
)
|
152 |
+
|
153 |
+
demo.launch(debug=True)
|