import gradio as gr
import fasttext
from huggingface_hub import hf_hub_download
import re
import string
import numpy as np

def load_GlotLID():
    model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model_v3.bin")
    model = fasttext.load_model(model_path)
    return model

model = load_GlotLID()

def preprocess_text(text):
    text = text.replace('\n', ' ')
    replace_by = " "
    replacement_map = {ord(c): replace_by for c in ':•#{|}' + string.digits}
    text = text.translate(replacement_map)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def compute(sentence):
    sentence = preprocess_text(sentence)
    
    # Get top 3 predictions
    labels, probs = model.predict(sentence, k=3)
    probs = np.asarray(probs)
    
    results = []
    for label, score in zip(labels, probs):
        label = label.split('__')[-1]
        results.append(f"{label}: {score:.4f}")
    
    return "\n".join(results)

iface = gr.Interface(
    fn=compute,
    inputs=gr.Textbox(label="Enter a sentence"),
    outputs=gr.Textbox(label="Top 3 Language Predictions"),
    title="GlotLID: Language Identification (v3)",
    description="This app uses GlotLID v3 to identify the top 3 most likely languages for the input text."
)

iface.launch()