import streamlit as st import pickle from sentence_transformers import CrossEncoder with open('eiga_docs.pkl', 'rb') as file: documents=pickle.load( file) #with open('index.pkl', 'rb') as file: # index=pickle.load( file) #documents =[ i["title"]+" "+i["text"] for i in documentt] #bi_encoder = SentenceTransformer('all-mpnet-base-v2',device="cpu",) # Load a cross-encoder model for re-ranking cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') # Streamlit app #st.title("Information Retrieval System") #query = st.text_input("Enter your query:") #if query: # Encode the query using the bi-encoder #query_embedding = bi_encoder.encode([query]) # Retrieve top-k documents using FAISS #k = 30 # Number of documents to retrieve #D, I = index.search(query_embedding, k) #Collect the retrieved documents #retrieved_docs = [documents[i] for i in I[0]] #Re-rank the retrieved documents using the cross-encoder #cross_inp = [[query, doc["title"]+" "+doc["text"]] for doc in documents] #scores = cross_encoder.predict(cross_inp) # Sort the documents by score #sorted_docs = [documents[doc] for _, doc in sorted(zip(scores, range(len(documents))), reverse=True)][:10] # Display the results #st.write("Top documents:") #for i, doc in enumerate(sorted_docs): # st.write(f"Result {i+1}. Title: {doc['title']}") # st.write(f"Text : {doc['text']}") # st.write(f"Lien de telechargment : {doc['doc_url']}") # st.markdown("""---""") # Function to display results for a given page def display_results(sorted_docs, page_number, results_per_page): start_index = (page_number - 1) * results_per_page end_index = min(page_number * results_per_page, len(sorted_docs)) for i, doc in enumerate(sorted_docs[start_index:end_index], start=start_index): st.write(f"**Result {i+1}.** Title: {doc['title']}") st.write(f"Text : {doc['text']}") st.write(f"Link : {doc['doc_url']}") st.markdown("""---""") # Function to perform search and return sorted documents def perform_search(query): if query: cross_inp = [[query, doc["title"] + " " + doc["text"]] for doc in documents] scores = cross_encoder.predict(cross_inp) # Sort the documents by score sorted_docs = [documents[doc] for _, doc in sorted(zip(scores, range(len(documents))), reverse=True)] return sorted_docs else: return [] # Main Streamlit app def main(): st.sidebar.title("Pagination") results_per_page = st.sidebar.number_input("Results per page", min_value=1, max_value=10, value=5) total_pages = (len(documents) + results_per_page - 1) // results_per_page page_number = st.sidebar.number_input("Page Number", min_value=1, max_value=total_pages, value=1) st.sidebar.write(f"Current Page Number: {page_number}") st.sidebar.write( f"Totale Pages :{total_pages}") #print(results_per_page,total_pages,page_number) st.title("Information Retrieval System") query = st.text_input("Enter your question:") if st.button("Search") or query: sorted_docs = perform_search(query) st.session_state.sorted_docs = sorted_docs else: sorted_docs = st.session_state.get("sorted_docs", []) if sorted_docs: display_results(st.session_state.sorted_docs, page_number, results_per_page) st.write(f"Current Page Number: {page_number}") # Run the app if __name__ == "__main__": main()