abokbot commited on
Commit
c5ea378
1 Parent(s): 2d3c21e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
3
  import torch
4
  from huggingface_hub import hf_hub_download
@@ -33,6 +34,16 @@ def load_encoders():
33
  bi_encoder, cross_encoder = load_encoders()
34
  st.success('Encoders loaded!')
35
  st_model_load.text("")
 
 
 
 
 
 
 
 
 
 
36
 
37
  if 'text' not in st.session_state:
38
  st.session_state.text = ""
@@ -49,7 +60,7 @@ def search():
49
  ##### Sematic Search #####
50
  # Encode the query using the bi-encoder and find potentially relevant passages
51
  question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
52
- hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k)
53
  hits = hits[0] # Get the hits for the first query
54
 
55
  ##### Re-Ranking #####
 
1
  import streamlit as st
2
+ from dataset import load_dataset
3
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
4
  import torch
5
  from huggingface_hub import hf_hub_download
 
34
  bi_encoder, cross_encoder = load_encoders()
35
  st.success('Encoders loaded!')
36
  st_model_load.text("")
37
+
38
+ @st.cache_resource
39
+ def load_wikipedia_dataset():
40
+ print("Loading wikipedia dataset...")
41
+ dataset = load_dataset("abokbot/wikipedia-first-paragraph")
42
+ return dataset
43
+
44
+ dataset = load_wikipedia_dataset()
45
+ st.success('Datset loaded!')
46
+ st_model_load.text("")
47
 
48
  if 'text' not in st.session_state:
49
  st.session_state.text = ""
 
60
  ##### Sematic Search #####
61
  # Encode the query using the bi-encoder and find potentially relevant passages
62
  question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
63
+ hits = util.semantic_search(question_embedding, wikipedia_embedding, top_k=top_k)
64
  hits = hits[0] # Get the hits for the first query
65
 
66
  ##### Re-Ranking #####