Spaces:
Running
Running
drop duplicates when showing documents (#6)
Browse files- gradio_app.py +4 -0
gradio_app.py
CHANGED
@@ -34,6 +34,10 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
|
|
34 |
matched_documents.similarity_to_answer = (
|
35 |
matched_documents.similarity_to_answer * 100
|
36 |
)
|
|
|
|
|
|
|
|
|
37 |
documents = "\n".join(
|
38 |
[
|
39 |
document_template.format(document=document)
|
|
|
34 |
matched_documents.similarity_to_answer = (
|
35 |
matched_documents.similarity_to_answer * 100
|
36 |
)
|
37 |
+
|
38 |
+
# drop duplicates, keep highest ranking ones
|
39 |
+
matched_documents = matched_documents.sort_values("similarity_to_answer", ascending=False).drop_duplicates("title", keep="first")
|
40 |
+
|
41 |
documents = "\n".join(
|
42 |
[
|
43 |
document_template.format(document=document)
|