Spaces:

neuralspace
/

wer_calculator

Sleeping

File size: 2,588 Bytes

d9bd25a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5ddedb
d9bd25a
 
 
b5ddedb
 
 
 
d9bd25a
 
 
 
 
b5ddedb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9bd25a
 
 
 
 
 
b5ddedb
 
 
 
 
 
c0c2927
48267d2
b5ddedb
 
d9bd25a
 
 
 
b5ddedb
 
 
 
 
 
 
 
 
 
 
 
 
d9bd25a

import gradio as gr
import pandas as pd
from jiwer import wer
import re
import os

REGEX_YAML_BLOCK = re.compile(r"---[\n\r]+([\S\s]*?)[\n\r]+---[\n\r]")


def parse_readme(filepath):
    if not os.path.exists(filepath):
        return "No README.md found."
    with open(filepath, "r") as f:
        text = f.read()
        match = REGEX_YAML_BLOCK.search(text)
        if match:
            text = text[match.end():]
    return text


def get_wer(df: pd.DataFrame):
    print(df.keys())
    preds = df.iloc[:, 0].tolist()
    truths = df.iloc[:, 1].tolist()
    print(truths, preds, type(truths))
    err = wer(truths, preds)
    return err


def compute(input_df: pd.DataFrame = None, input_file: str = None):
    if input_df is not None and not input_df.empty and input_file is None:
        print("in df")
        if not (input_df.values == "").any():
            print("in df but empty string")
            return get_wer(input_df)
    elif input_file and (input_df.values == "").any():
        print("in file")
        file_df = pd.read_csv(input_file.name)
        print(file_df)
        return get_wer(file_df)
    else:
        print("in error")
        raise ValueError("Please don't provide both DataFrame and file.")


description = """
To calculate WER:

* Type the `prediction` and the `truth` in the respective columns in the below calculator. 
* You can insert multiple predictions and truths by clicking on the `New row` button. 
* To calculate the WER after inserting all the texts, click on `Submit`.

OR

* Upload a CSV file with the columns being `prediction` and `truth`. 
* The first row of the file is supposed to have the column names.
* The sentences should be enclosed within `""` and the prediction and truth need to be separated by `,`.
* Find an example file [here](https://huggingface.co/spaces/neuralspace/wer_calculator/resolve/main/example.csv).
* To calculate the WER after uploading the CSV file, click on `Submit`.

NOTE: Pleasd don't use both the methods at once.
"""

demo = gr.Interface(
        fn=compute,
        inputs=[
            gr.components.Dataframe(
                headers=["prediction", "truth"],
                col_count=2,
                row_count=1,
                label="Input"
            ),
            gr.File(
                file_count='single',
                file_types=['.csv'],
                label="CSV File"
            )
        ],
        outputs=gr.components.Textbox(label="WER"),
        description=description,
        title="WER Calculator",
        article=parse_readme("README.md")
    )

demo.launch()