File size: 2,588 Bytes
d9bd25a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5ddedb
d9bd25a
 
 
b5ddedb
 
 
 
d9bd25a
 
 
 
 
b5ddedb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9bd25a
 
 
 
 
 
b5ddedb
 
 
 
 
 
c0c2927
48267d2
b5ddedb
 
d9bd25a
 
 
 
b5ddedb
 
 
 
 
 
 
 
 
 
 
 
 
d9bd25a
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import pandas as pd
from jiwer import wer
import re
import os

REGEX_YAML_BLOCK = re.compile(r"---[\n\r]+([\S\s]*?)[\n\r]+---[\n\r]")


def parse_readme(filepath):
    if not os.path.exists(filepath):
        return "No README.md found."
    with open(filepath, "r") as f:
        text = f.read()
        match = REGEX_YAML_BLOCK.search(text)
        if match:
            text = text[match.end():]
    return text


def get_wer(df: pd.DataFrame):
    print(df.keys())
    preds = df.iloc[:, 0].tolist()
    truths = df.iloc[:, 1].tolist()
    print(truths, preds, type(truths))
    err = wer(truths, preds)
    return err


def compute(input_df: pd.DataFrame = None, input_file: str = None):
    if input_df is not None and not input_df.empty and input_file is None:
        print("in df")
        if not (input_df.values == "").any():
            print("in df but empty string")
            return get_wer(input_df)
    elif input_file and (input_df.values == "").any():
        print("in file")
        file_df = pd.read_csv(input_file.name)
        print(file_df)
        return get_wer(file_df)
    else:
        print("in error")
        raise ValueError("Please don't provide both DataFrame and file.")


description = """
To calculate WER:

* Type the `prediction` and the `truth` in the respective columns in the below calculator. 
* You can insert multiple predictions and truths by clicking on the `New row` button. 
* To calculate the WER after inserting all the texts, click on `Submit`.

OR

* Upload a CSV file with the columns being `prediction` and `truth`. 
* The first row of the file is supposed to have the column names.
* The sentences should be enclosed within `""` and the prediction and truth need to be separated by `,`.
* Find an example file [here](https://huggingface.co/spaces/neuralspace/wer_calculator/resolve/main/example.csv).
* To calculate the WER after uploading the CSV file, click on `Submit`.

NOTE: Pleasd don't use both the methods at once.
"""

demo = gr.Interface(
        fn=compute,
        inputs=[
            gr.components.Dataframe(
                headers=["prediction", "truth"],
                col_count=2,
                row_count=1,
                label="Input"
            ),
            gr.File(
                file_count='single',
                file_types=['.csv'],
                label="CSV File"
            )
        ],
        outputs=gr.components.Textbox(label="WER"),
        description=description,
        title="WER Calculator",
        article=parse_readme("README.md")
    )

demo.launch()