import gradio as gr import fasttext from huggingface_hub import hf_hub_download import re import string import numpy as np def load_GlotLID(): model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model_v3.bin") model = fasttext.load_model(model_path) return model model = load_GlotLID() def preprocess_text(text): text = text.replace('\n', ' ') replace_by = " " replacement_map = {ord(c): replace_by for c in ':•#{|}' + string.digits} text = text.translate(replacement_map) text = re.sub(r'\s+', ' ', text) return text.strip() def compute(sentence): sentence = preprocess_text(sentence) # Get top 3 predictions labels, probs = model.predict(sentence, k=3) probs = np.asarray(probs) results = [] for label, score in zip(labels, probs): label = label.split('__')[-1] results.append(f"{label}: {score:.4f}") return "\n".join(results) iface = gr.Interface( fn=compute, inputs=gr.Textbox(label="Enter a sentence"), outputs=gr.Textbox(label="Top 3 Language Predictions"), title="GlotLID: Language Identification (v3)", description="This app uses GlotLID v3 to identify the top 3 most likely languages for the input text." ) iface.launch()