{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "80b68f52",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.10/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import stf_alternative\n",
"import torch\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "8c8a5d19",
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "89e49422",
"metadata": {},
"outputs": [],
"source": [
"stf_path = \"/workspace/docker/jw93/services/poc/ai-presenter/stf/\""
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "65c8905f",
"metadata": {},
"outputs": [],
"source": [
"config_path = os.path.join(stf_path, \"front_config.json\")\n",
"checkpoint_path = os.path.join(stf_path, \"089.pth\")\n",
"work_root_path = os.path.join(stf_path, \"works\")\n",
"device = \"cuda:0\""
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "1de6e2e9",
"metadata": {},
"outputs": [],
"source": [
"model = stf_alternative.create_model(\n",
" config_path=config_path,\n",
" checkpoint_path=checkpoint_path,\n",
" work_root_path=work_root_path,\n",
" device=device,\n",
" wavlm_path=\"microsoft/wavlm-large\",\n",
")\n",
"template = stf_alternative.Template(\n",
" model=model,\n",
" config_path=config_path,\n",
" template_video_path=\"templates/front_one_piece_dress_nodded_cut.webm\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "8aeae390",
"metadata": {},
"outputs": [],
"source": [
"from concurrent.futures import ThreadPoolExecutor\n",
"\n",
"from pydub import AudioSegment"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "50aac876",
"metadata": {},
"outputs": [],
"source": [
"audio = \"../LivePortrait/tmp/0245373b-452d-4341-b2e8-7e37a80de116_1720768774.mp3\""
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "67fc9bf9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"AudioSegment.from_file(audio)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "ca742175",
"metadata": {},
"outputs": [],
"source": [
"import cv2"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "fc2065d4",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"\n",
"def create_silent_video(template, audio):\n",
" reader = iter(template._get_reader(num_skip_frames=0))\n",
" audio_segment = AudioSegment.from_file(audio)\n",
" pivot = 0\n",
" results = []\n",
" with ThreadPoolExecutor(4) as p:\n",
" try:\n",
"\n",
" gen_infer = template.gen_infer_concurrent(\n",
" p,\n",
" audio_segment,\n",
" pivot,\n",
" )\n",
" for idx, (it, chunk) in enumerate(gen_infer, pivot):\n",
" frame = next(reader)\n",
" composed = template.compose(idx, frame, it)\n",
" cv2.imwrite(\"temp/\"+f\"{idx}\".zfill(5)+\".jpg\", it['pred'][:,:,::-1])\n",
" pivot = idx + 1\n",
" except StopIteration as e:\n",
" pass\n",
"\n",
" return results"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "6ceedf46",
"metadata": {},
"outputs": [],
"source": [
"results = create_silent_video(template, audio)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4cabfa23",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}