{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "80b68f52", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import stf_alternative\n", "import torch\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 43, "id": "8c8a5d19", "metadata": {}, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": 44, "id": "89e49422", "metadata": {}, "outputs": [], "source": [ "stf_path = \"/workspace/docker/jw93/services/poc/ai-presenter/stf/\"" ] }, { "cell_type": "code", "execution_count": 48, "id": "65c8905f", "metadata": {}, "outputs": [], "source": [ "config_path = os.path.join(stf_path, \"front_config.json\")\n", "checkpoint_path = os.path.join(stf_path, \"089.pth\")\n", "work_root_path = os.path.join(stf_path, \"works\")\n", "device = \"cuda:0\"" ] }, { "cell_type": "code", "execution_count": 49, "id": "1de6e2e9", "metadata": {}, "outputs": [], "source": [ "model = stf_alternative.create_model(\n", " config_path=config_path,\n", " checkpoint_path=checkpoint_path,\n", " work_root_path=work_root_path,\n", " device=device,\n", " wavlm_path=\"microsoft/wavlm-large\",\n", ")\n", "template = stf_alternative.Template(\n", " model=model,\n", " config_path=config_path,\n", " template_video_path=\"templates/front_one_piece_dress_nodded_cut.webm\",\n", ")" ] }, { "cell_type": "code", "execution_count": 50, "id": "8aeae390", "metadata": {}, "outputs": [], "source": [ "from concurrent.futures import ThreadPoolExecutor\n", "\n", "from pydub import AudioSegment" ] }, { "cell_type": "code", "execution_count": 51, "id": "50aac876", "metadata": {}, "outputs": [], "source": [ "audio = \"../LivePortrait/tmp/0245373b-452d-4341-b2e8-7e37a80de116_1720768774.mp3\"" ] }, { "cell_type": "code", "execution_count": 52, "id": "67fc9bf9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "AudioSegment.from_file(audio)" ] }, { "cell_type": "code", "execution_count": 53, "id": "ca742175", "metadata": {}, "outputs": [], "source": [ "import cv2" ] }, { "cell_type": "code", "execution_count": 54, "id": "fc2065d4", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "\n", "def create_silent_video(template, audio):\n", " reader = iter(template._get_reader(num_skip_frames=0))\n", " audio_segment = AudioSegment.from_file(audio)\n", " pivot = 0\n", " results = []\n", " with ThreadPoolExecutor(4) as p:\n", " try:\n", "\n", " gen_infer = template.gen_infer_concurrent(\n", " p,\n", " audio_segment,\n", " pivot,\n", " )\n", " for idx, (it, chunk) in enumerate(gen_infer, pivot):\n", " frame = next(reader)\n", " composed = template.compose(idx, frame, it)\n", " cv2.imwrite(\"temp/\"+f\"{idx}\".zfill(5)+\".jpg\", it['pred'][:,:,::-1])\n", " pivot = idx + 1\n", " except StopIteration as e:\n", " pass\n", "\n", " return results" ] }, { "cell_type": "code", "execution_count": 55, "id": "6ceedf46", "metadata": {}, "outputs": [], "source": [ "results = create_silent_video(template, audio)" ] }, { "cell_type": "code", "execution_count": null, "id": "4cabfa23", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }