imCuteCat commited on
Commit
b0aacf7
1 Parent(s): 3f3b350

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitignore +6 -0
  2. LICENSE +21 -0
  3. README.md +2 -10
  4. cog.yaml +12 -0
  5. predict.py +53 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .cog
2
+ __pycache__
3
+ .DS_Store
4
+ *.wav
5
+ *.mp4
6
+ *.png
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 fofrAI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,10 +1,2 @@
1
- ---
2
- title: Cog
3
- emoji: 👀
4
- colorFrom: pink
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # audio-to-waveform
2
+ Convert an audio file to a waveform video
 
 
 
 
 
 
 
 
cog.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration for Cog ⚙️
2
+ # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
3
+
4
+ build:
5
+ gpu: false
6
+ python_version: "3.9"
7
+ system_packages:
8
+ - "ffmpeg"
9
+ - "imagemagick"
10
+ python_packages:
11
+ - "gradio==3.50.2"
12
+ predict: "predict.py:Predictor"
predict.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ from cog import BasePredictor, Input, Path
3
+ import gradio as gr
4
+ import tempfile
5
+
6
+ class Predictor(BasePredictor):
7
+ def predict(self,
8
+ audio: Path = Input(description="Audio file to create waveform from"),
9
+ bg_color: str = Input(description="Background color of waveform", default="#000000"),
10
+ fg_alpha: float = Input(description="Opacity of foreground waveform", default=0.75),
11
+ bars_color: str = Input(description="Color of waveform bars", default="#ffffff"),
12
+ bar_count: int = Input(description="Number of bars in waveform", default=100),
13
+ bar_width: float = Input(description="Width of bars in waveform. 1 represents full width, 0.5 represents half width, etc.", default=0.4),
14
+ caption_text: str = Input(description="Caption text for the video", default=""),
15
+ ) -> Path:
16
+ """Make waveform video from audio file"""
17
+ waveform_video = gr.make_waveform(
18
+ str(audio),
19
+ bg_color=bg_color,
20
+ fg_alpha=fg_alpha,
21
+ bars_color=bars_color,
22
+ bar_count=bar_count,
23
+ bar_width=bar_width,
24
+ )
25
+
26
+ if caption_text == "" or caption_text is None:
27
+ return Path(waveform_video)
28
+ else:
29
+ padded_waveform_path = tempfile.mktemp(suffix=".mp4")
30
+ background_image_path = tempfile.mktemp(suffix=".png")
31
+ final_video_path = tempfile.mktemp(suffix=".mp4")
32
+
33
+ # Add padding to the top of the waveform video
34
+ subprocess.run([
35
+ 'ffmpeg', '-y', '-i', waveform_video, '-vf',
36
+ f'pad=width=1000:height=667:x=0:y=467:color={bg_color[1:]}',
37
+ padded_waveform_path
38
+ ], check=True)
39
+
40
+ # Create an image using ImageMagick
41
+ subprocess.run([
42
+ 'convert', '-background', bg_color, '-fill', bars_color, '-font', 'font/Roboto-Black.ttf',
43
+ '-pointsize', '48', '-size', '900x367', '-gravity', 'center', f'caption:{caption_text}',
44
+ '-bordercolor', bg_color, '-border', '40', background_image_path
45
+ ], check=True)
46
+
47
+ # Overlay the image on the padded waveform video
48
+ subprocess.run([
49
+ 'ffmpeg', '-y', '-i', padded_waveform_path, '-i', background_image_path,
50
+ '-filter_complex', 'overlay=0:0', final_video_path
51
+ ], check=True)
52
+
53
+ return Path(final_video_path)