Translator / app.py
Lenylvt's picture
Update app.py
d06641e verified
raw
history blame contribute delete
No virus
2.72 kB
import streamlit as st
import pandas as pd
import requests
from transformers import MarianMTModel, MarianTokenizer
import io
def fetch_languages(url):
response = requests.get(url)
if response.status_code == 200:
# Convert bytes to a string using decode, then create a file-like object with io.StringIO
csv_content = response.content.decode('utf-8')
df = pd.read_csv(io.StringIO(csv_content), delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name']}") for index, row in df.iterrows()]
return language_options
else:
return []
# Make sure to replace the URL with the correct one if it has changed
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
language_options = fetch_languages(url)
# Streamlit UI components
st.title("πŸ“œ Translator")
st.write("We use model from [Language Technology Research Group at the University of Helsinki](https://huggingface.co/Helsinki-NLP). For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Translator-API). πŸ”΄ All Language are not Available")
source_language = st.selectbox("1️⃣ Select Source Language", options=language_options, format_func=lambda x: x[1])
target_language = st.selectbox("2️⃣ Select Target Language", options=language_options, format_func=lambda x: x[1])
text = st.text_area("βœ’οΈ Enter text to translate...", height=150)
def translate_text(text, source_language_code, target_language_code):
model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
if source_language_code == target_language_code:
return "πŸ”΄ Translation between the same languages is not supported."
try:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
except Exception as e:
return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
if st.button("πŸ“ Translate"):
source_language_code, _ = source_language
target_language_code, _ = target_language
translation = translate_text(text, source_language_code, target_language_code)
st.text_area("⬇️ Translated Text", value=translation, height=150, key="translation_output")