H-Liu1997 commited on
Commit
4b12aec
1 Parent(s): 3e5e0c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -25
app.py CHANGED
@@ -266,6 +266,14 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
266
  node["motion_low"] = motion_low_all[i]
267
 
268
  graph = graph_pruning(graph)
 
 
 
 
 
 
 
 
269
  # drop the id of gt
270
  idx = 0
271
  audio_waveform, sr = librosa.load(audio_path)
@@ -438,7 +446,7 @@ def prepare_all(yaml_name):
438
  return config
439
 
440
 
441
- def save_first_20_seconds(video_path, output_path="./save_video.mp4"):
442
  import cv2
443
  cap = cv2.VideoCapture(video_path)
444
 
@@ -452,7 +460,7 @@ def save_first_20_seconds(video_path, output_path="./save_video.mp4"):
452
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
453
  out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
454
 
455
- frames_to_save = fps * 20
456
  frame_count = 0
457
 
458
  while cap.isOpened() and frame_count < frames_to_save:
@@ -475,7 +483,6 @@ character_name_to_yaml = {
475
  }
476
 
477
  cfg = prepare_all("./configs/gradio.yaml")
478
- seed_everything(cfg.seed)
479
 
480
  smplx_model = smplx.create(
481
  "./emage/smplx_models/",
@@ -499,9 +506,10 @@ state_dict = checkpoint['model_state_dict']
499
  # new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
500
  model.load_state_dict(state_dict, strict=False)
501
 
502
- @spaces.GPU(duration=1000)
503
- def tango(audio_path, character_name, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
504
-
 
505
  experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
506
  saved_audio_path = "./saved_audio.wav"
507
  sample_rate, audio_waveform = audio_path
@@ -523,7 +531,7 @@ def tango(audio_path, character_name, create_graph=False, video_folder_path=None
523
  create_graph=True
524
  # load video, and save it to "./save_video.mp4 for the first 20s of the video."
525
  os.makedirs("./outputs/tmpvideo/", exist_ok=True)
526
- save_first_20_seconds(character_name, "./outputs/tmpvideo/save_video.mp4")
527
 
528
  if create_graph:
529
  video_folder_path = "./outputs/tmpvideo/"
@@ -564,7 +572,7 @@ examples_video = [
564
  ]
565
 
566
  combined_examples = [
567
- [audio[0], video[0]] for audio in examples_audio for video in examples_video
568
  ]
569
 
570
  def make_demo():
@@ -589,21 +597,20 @@ def make_demo():
589
  """
590
  )
591
 
592
- with gr.Row():
593
- gr.Markdown("""
594
- <h4 style="text-align: left;">
595
- This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
596
-
597
- Details of the low-quality mode:
598
- 1. Lower resolution.
599
- 2. More discontinuous frames (causing noticeable "frame jumps").
600
- 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
601
- 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
602
- 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
603
-
604
- Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
605
- </h4>
606
- """)
607
 
608
  # Create a gallery with 5 videos
609
  with gr.Row():
@@ -652,6 +659,8 @@ def make_demo():
652
  label="Character Examples",
653
  cache_examples=False
654
  )
 
 
655
 
656
  # Fourth row: Generate video button
657
  with gr.Row():
@@ -660,7 +669,7 @@ def make_demo():
660
  # Define button click behavior
661
  run_button.click(
662
  fn=tango,
663
- inputs=[audio_input, video_input],
664
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
665
  )
666
 
@@ -669,7 +678,7 @@ def make_demo():
669
  print(combined_examples)
670
  gr.Examples(
671
  examples=combined_examples,
672
- inputs=[audio_input, video_input], # Both audio and video as inputs
673
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
674
  fn=tango, # Function that processes both audio and video inputs
675
  label="Select Combined Audio and Video Examples (Cached)",
 
266
  node["motion_low"] = motion_low_all[i]
267
 
268
  graph = graph_pruning(graph)
269
+ # for gradio, use a subgraph
270
+ if len(graph.vs) > 1800:
271
+ gap = len(graph.vs) - 1800
272
+ start_d = random.randint(0, 1800)
273
+ graph.delete_vertices(range(start_d, start_d + gap))
274
+ ascc_2 = graph.clusters(mode="STRONG")
275
+ graph = ascc_2.giant()
276
+
277
  # drop the id of gt
278
  idx = 0
279
  audio_waveform, sr = librosa.load(audio_path)
 
446
  return config
447
 
448
 
449
+ def save_first_10_seconds(video_path, output_path="./save_video.mp4"):
450
  import cv2
451
  cap = cv2.VideoCapture(video_path)
452
 
 
460
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
461
  out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
462
 
463
+ frames_to_save = fps * 10
464
  frame_count = 0
465
 
466
  while cap.isOpened() and frame_count < frames_to_save:
 
483
  }
484
 
485
  cfg = prepare_all("./configs/gradio.yaml")
 
486
 
487
  smplx_model = smplx.create(
488
  "./emage/smplx_models/",
 
506
  # new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
507
  model.load_state_dict(state_dict, strict=False)
508
 
509
+ @spaces.GPU(duration=299)
510
+ def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
511
+ cfg.seed = seed
512
+ seed_everything(cfg.seed)
513
  experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
514
  saved_audio_path = "./saved_audio.wav"
515
  sample_rate, audio_waveform = audio_path
 
531
  create_graph=True
532
  # load video, and save it to "./save_video.mp4 for the first 20s of the video."
533
  os.makedirs("./outputs/tmpvideo/", exist_ok=True)
534
+ save_first_10_seconds(character_name, "./outputs/tmpvideo/save_video.mp4")
535
 
536
  if create_graph:
537
  video_folder_path = "./outputs/tmpvideo/"
 
572
  ]
573
 
574
  combined_examples = [
575
+ [audio[0], video[0], 2024] for audio in examples_audio for video in examples_video
576
  ]
577
 
578
  def make_demo():
 
597
  """
598
  )
599
 
600
+ gr.Markdown("""
601
+ <h4 style="text-align: left;">
602
+ This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
603
+
604
+ Details of the low-quality mode:
605
+ 1. Lower resolution.
606
+ 2. More discontinuous frames (causing noticeable "frame jumps").
607
+ 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
608
+ 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
609
+ 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
610
+
611
+ Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
612
+ </h4>
613
+ """)
 
614
 
615
  # Create a gallery with 5 videos
616
  with gr.Row():
 
659
  label="Character Examples",
660
  cache_examples=False
661
  )
662
+ with gr.Row():
663
+ seed_input = gr.Number(label="Seed", value=2024, interactive=True)
664
 
665
  # Fourth row: Generate video button
666
  with gr.Row():
 
669
  # Define button click behavior
670
  run_button.click(
671
  fn=tango,
672
+ inputs=[audio_input, video_input, seed_input],
673
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
674
  )
675
 
 
678
  print(combined_examples)
679
  gr.Examples(
680
  examples=combined_examples,
681
+ inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
682
  outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
683
  fn=tango, # Function that processes both audio and video inputs
684
  label="Select Combined Audio and Video Examples (Cached)",