from functools import lru_cache from transformers import pipeline import gradio as gr @lru_cache(maxsize=None) def load_pipeline(lang: str, rev: str): return pipeline('ner', f'carbon225/canine-s-wordseg-{lang}', revision=rev) PH = 'enteryoursentencehereitcanevenbeverylongthemodelwilltryitsbestbutdonotexpectittoworkeverytime' def wordseg(text: str, lang: str, rev: str): if text in ('', None): text = PH ents = load_pipeline(lang, rev)(text) return ''.join((' ' if e['entity'].startswith('B-') else '') + e['word'] for e in ents).strip() app = gr.Interface( title='Word Segmentation', description= 'This app will divide text without spaces into individual words. ' 'You can try different models with the revision dropdown.', fn=wordseg, inputs=[ gr.Textbox( label='Input text', lines=3, placeholder=PH, ), gr.Dropdown( label='Language', choices=['en', 'pl'], value='en', ), gr.Dropdown( label='Model revision', choices=['main', 'latest'], value='main', ), ], outputs=gr.Textbox(label='Segmented text'), ) app.launch()