Spaces:

jlonsako
/

Amh-Transcribe

Running

jlonsako commited on Jul 21, 2023

Commit

394d8c1

•

1 Parent(s): 9437cb7

Added padding to batch_decode to handle differing audio sample sizes

Files changed (1) hide show

app.py CHANGED Viewed

@@ -108,7 +108,7 @@ def Transcribe(file, batch_size):
         # If the batch is full, process it
         if len(batch) == batch_size:
             # Concatenate all segments in the batch along the time axis
-            input_values = processor(batch, sampling_rate=16_000, return_tensors="pt")
             input_values = input_values.to(device)
             with torch.no_grad():
                 logits = model(**input_values).logits
@@ -141,7 +141,7 @@ def Transcribe(file, batch_size):
     if batch:
             # Concatenate all segments in the batch along the time axis
-            input_values = processor(batch, sampling_rate=16_000, return_tensors="pt")
             input_values = input_values.to(device)
             with torch.no_grad():
                 logits = model(**input_values).logits

         # If the batch is full, process it
         if len(batch) == batch_size:
             # Concatenate all segments in the batch along the time axis
+            input_values = processor(batch, sampling_rate=16_000, return_tensors="pt", padding=True)
             input_values = input_values.to(device)
             with torch.no_grad():
                 logits = model(**input_values).logits
     if batch:
             # Concatenate all segments in the batch along the time axis
+            input_values = processor(batch, sampling_rate=16_000, return_tensors="pt", padding=True)
             input_values = input_values.to(device)
             with torch.no_grad():
                 logits = model(**input_values).logits