Sabbah13 commited on
Commit
bd03d45
1 Parent(s): 50e81bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -74
app.py CHANGED
@@ -5,80 +5,7 @@ import json
5
  import streamlit as st
6
  import whisperx
7
  import torch
8
-
9
- def convert_segments_object_to_text(data):
10
- result = []
11
-
12
- for segment in data['segments']:
13
- words = segment['words']
14
- segment_speaker = segment.get('speaker', None)
15
- segment_start = segment.get('start', None)
16
- segment_end = segment.get('end', None)
17
- current_speaker = None
18
- current_start = None
19
- current_end = None
20
- current_text = []
21
-
22
- # Forward fill speaker, start and end if missing
23
- for i, word_info in enumerate(words):
24
- if 'speaker' not in word_info:
25
- if i > 0 and 'speaker' in words[i - 1]:
26
- word_info['speaker'] = words[i - 1]['speaker']
27
- elif i < len(words) - 1 and 'speaker' in words[i + 1]:
28
- word_info['speaker'] = words[i + 1]['speaker']
29
- else:
30
- word_info['speaker'] = segment_speaker
31
-
32
- if 'start' not in word_info:
33
- if i > 0 and 'end' in words[i - 1]:
34
- word_info['start'] = words[i - 1]['end']
35
- else:
36
- word_info['start'] = segment_start
37
-
38
- if 'end' not in word_info:
39
- if i < len(words) - 1 and 'start' in words[i + 1]:
40
- word_info['end'] = words[i + 1]['start']
41
- elif i == len(words) - 1:
42
- word_info['end'] = segment_end
43
- else:
44
- word_info['end'] = word_info['start']
45
-
46
- for word_info in words:
47
- word = word_info.get('word', '')
48
- start = word_info.get('start', None)
49
- end = word_info.get('end', None)
50
- speaker = word_info.get('speaker', None)
51
-
52
- if current_speaker is None:
53
- current_speaker = speaker
54
- current_start = start
55
-
56
- if speaker == current_speaker:
57
- current_text.append(word)
58
- current_end = end
59
- else:
60
- # Finish current segment
61
- if current_start is not None and current_end is not None:
62
- formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
63
- else:
64
- formatted_text = f'{current_speaker} : {" ".join(current_text)}'
65
- result.append(formatted_text)
66
-
67
- # Start new segment
68
- current_speaker = speaker
69
- current_start = start
70
- current_end = end
71
- current_text = [word]
72
-
73
- # Append the last segment
74
- if current_text:
75
- if current_start is not None and current_end is not None:
76
- formatted_text = f'{current_speaker} ({current_start} : {current_end}) : {" ".join(current_text)}'
77
- else:
78
- formatted_text = f'{current_speaker} : {" ".join(current_text)}'
79
- result.append(formatted_text)
80
-
81
- return '\n'.join(result)
82
 
83
  st.title('Audio Transcription App')
84
  st.sidebar.title("Settings")
 
5
  import streamlit as st
6
  import whisperx
7
  import torch
8
+ from utils import convert_segments_object_to_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  st.title('Audio Transcription App')
11
  st.sidebar.title("Settings")