jerpint commited on
Commit
2785052
1 Parent(s): 01b468b

drop duplicates when showing documents (#6)

Browse files
Files changed (1) hide show
  1. gradio_app.py +4 -0
gradio_app.py CHANGED
@@ -34,6 +34,10 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
34
  matched_documents.similarity_to_answer = (
35
  matched_documents.similarity_to_answer * 100
36
  )
 
 
 
 
37
  documents = "\n".join(
38
  [
39
  document_template.format(document=document)
 
34
  matched_documents.similarity_to_answer = (
35
  matched_documents.similarity_to_answer * 100
36
  )
37
+
38
+ # drop duplicates, keep highest ranking ones
39
+ matched_documents = matched_documents.sort_values("similarity_to_answer", ascending=False).drop_duplicates("title", keep="first")
40
+
41
  documents = "\n".join(
42
  [
43
  document_template.format(document=document)