Spaces:

imCuteCat
/

cog

File size: 2,663 Bytes

import subprocess
from cog import BasePredictor, Input, Path
import gradio as gr
import tempfile

class Predictor(BasePredictor):
    def predict(self,
        audio: Path = Input(description="Audio file to create waveform from"),
        bg_color: str = Input(description="Background color of waveform", default="#000000"),
        fg_alpha: float = Input(description="Opacity of foreground waveform", default=0.75),
        bars_color: str = Input(description="Color of waveform bars", default="#ffffff"),
        bar_count: int = Input(description="Number of bars in waveform", default=100),
        bar_width: float = Input(description="Width of bars in waveform. 1 represents full width, 0.5 represents half width, etc.", default=0.4),
        caption_text: str = Input(description="Caption text for the video", default=""),
    ) -> Path:
        """Make waveform video from audio file"""
        waveform_video = gr.make_waveform(
            str(audio),
            bg_color=bg_color,
            fg_alpha=fg_alpha,
            bars_color=bars_color,
            bar_count=bar_count,
            bar_width=bar_width,
        )

        if caption_text == "" or caption_text is None:
            return Path(waveform_video)
        else:
            padded_waveform_path = tempfile.mktemp(suffix=".mp4")
            background_image_path = tempfile.mktemp(suffix=".png")
            final_video_path = tempfile.mktemp(suffix=".mp4")

            # Add padding to the top of the waveform video
            subprocess.run([
                'ffmpeg', '-y', '-i', waveform_video, '-vf',
                f'pad=width=1000:height=667:x=0:y=467:color={bg_color[1:]}',
                padded_waveform_path
            ], check=True)

            # Create an image using ImageMagick
            subprocess.run([
                'convert', '-background', bg_color, '-fill', bars_color, '-font', 'font/Roboto-Black.ttf',
                '-pointsize', '48', '-size', '900x367', '-gravity', 'center', f'caption:{caption_text}',
                '-bordercolor', bg_color, '-border', '40', background_image_path
            ], check=True)

            # Overlay the image on the padded waveform video
            subprocess.run([
                'ffmpeg', '-y', '-i', padded_waveform_path, '-i', background_image_path,
                '-filter_complex', 'overlay=0:0', final_video_path
            ], check=True)

        return Path(final_video_path)

# Gradio user interface
def gradio_predict(audio, bg_color, fg_alpha, bars_color, bar_count, bar_width, caption_text):
    predictor = Predictor()
    result = predictor.predict(
        audio=audio,
        bg_color=bg