salvadoran-news / app.py
Juan Martínez
remove model cache
0409dcd
raw
history blame contribute delete
No virus
1.24 kB
import streamlit as st
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
def search(text, model, ds, n):
encoded_text = model.encode(text)
scores, retrieved_examples = ds.get_nearest_examples('embedding', encoded_text, k=n)
matching_titles = retrieved_examples["title"]
urls = retrieved_examples["link"]
contents = retrieved_examples["content"]
return list(zip(matching_titles, [c[:150] for c in contents], urls, scores))
@st.cache()
def get_dataset():
ds = load_dataset("justinian336/salvadoran-news-embedded")["train"]
ds.add_faiss_index(column="embedding")
return ds
def get_model():
if "model" not in st.session_state:
st.session_state["model"] = SentenceTransformer("justinian336/chupeto")
ds = get_dataset()
get_model()
st.markdown("# Buscador de Noticias Salvadoreñas")
search_text = st.text_input(label="Búsqueda")
if search_text:
search_results = search(search_text, st.session_state["model"], ds, 10)
for title, content, url, _ in search_results:
st.markdown(f"""<div><a href="{url}">{title}</a></div>""", unsafe_allow_html=True)
st.markdown(f"""<div>{content}...</div>""", unsafe_allow_html=True)
st.markdown("---")