Spaces:

innat
/

VideoMAE

Running

App Files Files Community

innat commited on Oct 9, 2023

Commit

bbc8456

•

1 Parent(s): 01acd5f

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -30

app.py CHANGED Viewed

@@ -10,6 +10,26 @@ from utils import read_video, frame_sampling, denormalize, reconstrunction
 from utils import IMAGENET_MEAN, IMAGENET_STD, num_frames, patch_size, input_size
 from labels import K400_label_map, SSv2_label_map, UCF_label_map
 def tube_mask_generator(mask_ratio):
     window_size = (
@@ -31,21 +51,27 @@ def tube_mask_generator(mask_ratio):
 def get_model(data_type):
     ft_model = keras.models.load_model(MODELS[data_type][0])
     pt_model = keras.models.load_model(MODELS[data_type][1])
-    label_map = {v: k for k, v in K400_label_map.items()}
     return ft_model, pt_model, label_map
-def inference(video_file, dataset_type, mask_ratio):
     print('---------------------------')
     print(video_file)
-    print(dataset_type)
     print(mask_ratio)
     print('---------------------------')
     container = read_video(video_file)
     frames = frame_sampling(container, num_frames=num_frames)
     bool_masked_pos_tf = tube_mask_generator(mask_ratio)
-    ft_model, pt_model, label_map = get_model(dataset_type)
     ft_model.trainable = False
     pt_model.trainable = False
@@ -78,25 +104,11 @@ def inference(video_file, dataset_type, mask_ratio):
 def main():
-    MODELS = {
-        'K400': [
-            './TFVideoMAE_S_K400_16x224_FT',
-            './TFVideoMAE_S_K400_16x224_PT'
-            ],
-        'SSv2': [
-            './TFVideoMAE_S_K400_16x224_FT',
-            './TFVideoMAE_S_K400_16x224_PT'
-            ],
-        'UCF' : [
-            './TFVideoMAE_S_K400_16x224_FT',
-            './TFVideoMAE_S_K400_16x224_PT'
-            ]
-    }
-    BENCHMARK_DATASETS = ['K400', 'SSv2', 'UCF']
-    SAMPLE_EXAMPLES = [
         ["examples/k400.mp4", 'Kintetics-400'],
-        ["examples/k400.mp4", 'SSv2'],
-        ["examples/k400.mp4", 'UCF']
     ]
     iface = gr.Interface(
@@ -104,16 +116,16 @@ def main():
         inputs=[
             gr.Video(type="file", label="Input Video"),
             gr.Radio(
-                BENCHMARK_DATASETS,
                 type='value',
-                # default=BENCHMARK_DATASETS[0],
                 label='Dataset',
             ),
             gr.Slider(
-                0,
-                1,
-                step=0.05,
-                # default=0.5,
                 label='Mask Ratio'
             )
         ],
@@ -121,7 +133,7 @@ def main():
             gr.Label(num_top_classes=3, label='scores'),
             gr.Image(type="filepath", label='reconstructed')
         ],
-        examples=SAMPLE_EXAMPLES,
         title="VideoMAE",
         description="Keras reimplementation of <a href='https://github.com/innat/VideoMAE'>VideoMAE</a> is presented here."
     )

 from utils import IMAGENET_MEAN, IMAGENET_STD, num_frames, patch_size, input_size
 from labels import K400_label_map, SSv2_label_map, UCF_label_map
+MODELS = {
+    'K400': [
+        './TFVideoMAE_S_K400_16x224_FT',
+        './TFVideoMAE_S_K400_16x224_PT'
+        ],
+    'SSv2': [
+        './TFVideoMAE_S_K400_16x224_FT',
+        './TFVideoMAE_S_K400_16x224_PT'
+        ],
+    'UCF' : [
+        './TFVideoMAE_S_K400_16x224_FT',
+        './TFVideoMAE_S_K400_16x224_PT'
+        ]
+}
+LABEL_MAPS = {
+    'K400': K400_label_map,
+    'SSv2': SSv2_label_map,
+    'UCF' : UCF_label_map
+}
 def tube_mask_generator(mask_ratio):
     window_size = (
 def get_model(data_type):
     ft_model = keras.models.load_model(MODELS[data_type][0])
     pt_model = keras.models.load_model(MODELS[data_type][1])
+    label_map = LABEL_MAPS.get(data_type)
+    label_map = {v: k for k, v in label_map.items()}
     return ft_model, pt_model, label_map
+def inference(video_file, data_type, mask_ratio):
     print('---------------------------')
     print(video_file)
+    print(data_type)
     print(mask_ratio)
     print('---------------------------')
+    # get sample data
     container = read_video(video_file)
     frames = frame_sampling(container, num_frames=num_frames)
+    # get models
     bool_masked_pos_tf = tube_mask_generator(mask_ratio)
+    ft_model, pt_model, label_map = get_model(data_type)
     ft_model.trainable = False
     pt_model.trainable = False
 def main():
+    datasets = ['K400', 'SSv2', 'UCF']
+    sample_example = [
         ["examples/k400.mp4", 'Kintetics-400'],
+        ["examples/k400.mp4", 'Something-Something-V2'],
+        ["examples/k400.mp4", 'UCF101']
     ]
     iface = gr.Interface(
         inputs=[
             gr.Video(type="file", label="Input Video"),
             gr.Radio(
+                datasets,
                 type='value',
+                default=datasets[0],
                 label='Dataset',
             ),
             gr.Slider(
+                0.5,
+                1.0,
+                step=0.1,
+                default=0.5,
                 label='Mask Ratio'
             )
         ],
             gr.Label(num_top_classes=3, label='scores'),
             gr.Image(type="filepath", label='reconstructed')
         ],
+        examples=sample_example,
         title="VideoMAE",
         description="Keras reimplementation of <a href='https://github.com/innat/VideoMAE'>VideoMAE</a> is presented here."
     )