glotlid / app.py
anzorq's picture
Update app.py
2196815 verified
raw
history blame contribute delete
No virus
1.27 kB
import gradio as gr
import fasttext
from huggingface_hub import hf_hub_download
import re
import string
import numpy as np
def load_GlotLID():
model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model_v3.bin")
model = fasttext.load_model(model_path)
return model
model = load_GlotLID()
def preprocess_text(text):
text = text.replace('\n', ' ')
replace_by = " "
replacement_map = {ord(c): replace_by for c in ':•#{|}' + string.digits}
text = text.translate(replacement_map)
text = re.sub(r'\s+', ' ', text)
return text.strip()
def compute(sentence):
sentence = preprocess_text(sentence)
# Get top 3 predictions
labels, probs = model.predict(sentence, k=3)
probs = np.asarray(probs)
results = []
for label, score in zip(labels, probs):
label = label.split('__')[-1]
results.append(f"{label}: {score:.4f}")
return "\n".join(results)
iface = gr.Interface(
fn=compute,
inputs=gr.Textbox(label="Enter a sentence"),
outputs=gr.Textbox(label="Top 3 Language Predictions"),
title="GlotLID: Language Identification (v3)",
description="This app uses GlotLID v3 to identify the top 3 most likely languages for the input text."
)
iface.launch()