arxivgpt kim
commited on
Commit
•
27907af
1
Parent(s):
2918353
Update app.py
Browse files
app.py
CHANGED
@@ -235,26 +235,14 @@ diff_out = gr.HighlightedText(label="Cuts Diffs", combine_adjacent=True)
|
|
235 |
examples = gr.Dataset(components=[video_in], samples=VIDEOS, type="index")
|
236 |
|
237 |
css = """
|
238 |
-
|
239 |
-
|
240 |
-
.output-markdown {max-width: 65ch !important;}
|
241 |
-
#video-container{
|
242 |
-
max-width: 40rem;
|
243 |
}
|
244 |
"""
|
|
|
245 |
with gr.Blocks(css=css) as demo:
|
246 |
transcription_var = gr.State()
|
247 |
timestamps_var = gr.State()
|
248 |
-
with gr.Row():
|
249 |
-
with gr.Column():
|
250 |
-
gr.Markdown("""
|
251 |
-
# Edit Video By Editing Text
|
252 |
-
This project is a quick proof of concept of a simple video editor where the edits
|
253 |
-
are made by editing the audio transcription.
|
254 |
-
Using the [Huggingface Automatic Speech Recognition Pipeline](https://huggingface.co/tasks/automatic-speech-recognition)
|
255 |
-
with a fine tuned [Wav2Vec2 model using Connectionist Temporal Classification (CTC)](https://huggingface.co/facebook/wav2vec2-large-960h-lv60-self)
|
256 |
-
you can predict not only the text transcription but also the [character or word base timestamps](https://huggingface.co/docs/transformers/v4.19.2/en/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__.return_timestamps)
|
257 |
-
""")
|
258 |
|
259 |
with gr.Row():
|
260 |
|
@@ -300,14 +288,8 @@ with gr.Blocks(css=css) as demo:
|
|
300 |
with gr.Column():
|
301 |
video_out.render()
|
302 |
diff_out.render()
|
303 |
-
with gr.Row():
|
304 |
-
gr.Markdown("""
|
305 |
-
#### Video Credits
|
306 |
|
307 |
-
1. [Cooking](https://vimeo.com/573792389)
|
308 |
-
1. [Shia LaBeouf "Just Do It"](https://www.youtube.com/watch?v=n2lTxIk_Dr0)
|
309 |
-
1. [Mark Zuckerberg & Yuval Noah Harari in Conversation](https://www.youtube.com/watch?v=Boj9eD0Wug8)
|
310 |
-
""")
|
311 |
demo.queue()
|
|
|
312 |
if __name__ == "__main__":
|
313 |
demo.launch(debug=True)
|
|
|
235 |
examples = gr.Dataset(components=[video_in], samples=VIDEOS, type="index")
|
236 |
|
237 |
css = """
|
238 |
+
footer {
|
239 |
+
visibility: hidden;
|
|
|
|
|
|
|
240 |
}
|
241 |
"""
|
242 |
+
|
243 |
with gr.Blocks(css=css) as demo:
|
244 |
transcription_var = gr.State()
|
245 |
timestamps_var = gr.State()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
with gr.Row():
|
248 |
|
|
|
288 |
with gr.Column():
|
289 |
video_out.render()
|
290 |
diff_out.render()
|
|
|
|
|
|
|
291 |
|
|
|
|
|
|
|
|
|
292 |
demo.queue()
|
293 |
+
|
294 |
if __name__ == "__main__":
|
295 |
demo.launch(debug=True)
|