Spaces:
Running
Running
Louis-François Bouchard
commited on
Commit
•
71cffeb
1
Parent(s):
30aecba
5 sources (#26)
Browse files* changed back to 5 sources
* removed number of chunks
* ran black
app.py
CHANGED
@@ -75,20 +75,20 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
|
|
75 |
return ""
|
76 |
|
77 |
documents_answer_template: str = "📝 Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
|
78 |
-
document_template: str = "[🔗 {document.source}: {document.title}]({document.url}), highest relevance: {document.similarity_to_answer:2.1f} % | # total chunks matched: {document.repetition:d}"
|
79 |
|
80 |
matched_documents.similarity_to_answer = (
|
81 |
matched_documents.similarity_to_answer * 100
|
82 |
)
|
83 |
|
84 |
-
matched_documents["repetition"] = matched_documents.groupby("title")[
|
85 |
-
|
86 |
-
].transform("size")
|
87 |
|
88 |
# drop duplicates, keep highest ranking ones
|
89 |
matched_documents = matched_documents.sort_values(
|
90 |
"similarity_to_answer", ascending=False
|
91 |
-
).drop_duplicates("title", keep="first")
|
92 |
|
93 |
# Revert back to correct display
|
94 |
display_source_to_ui = {
|
|
|
75 |
return ""
|
76 |
|
77 |
documents_answer_template: str = "📝 Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
|
78 |
+
document_template: str = "[🔗 {document.source}: {document.title}]({document.url}), highest relevance: {document.similarity_to_answer:2.1f} %" # | # total chunks matched: {document.repetition:d}"
|
79 |
|
80 |
matched_documents.similarity_to_answer = (
|
81 |
matched_documents.similarity_to_answer * 100
|
82 |
)
|
83 |
|
84 |
+
# matched_documents["repetition"] = matched_documents.groupby("title")[
|
85 |
+
# "title"
|
86 |
+
# ].transform("size")
|
87 |
|
88 |
# drop duplicates, keep highest ranking ones
|
89 |
matched_documents = matched_documents.sort_values(
|
90 |
"similarity_to_answer", ascending=False
|
91 |
+
).drop_duplicates("title", keep="first")
|
92 |
|
93 |
# Revert back to correct display
|
94 |
display_source_to_ui = {
|
cfg.py
CHANGED
@@ -73,7 +73,7 @@ Q:
|
|
73 |
},
|
74 |
retriever_cfg={
|
75 |
"path": f"{DEEPLAKE_DATASET_PATH}",
|
76 |
-
"top_k":
|
77 |
"thresh": 0.55,
|
78 |
"max_tokens": 13000,
|
79 |
"embedding_model": "text-embedding-ada-002",
|
|
|
73 |
},
|
74 |
retriever_cfg={
|
75 |
"path": f"{DEEPLAKE_DATASET_PATH}",
|
76 |
+
"top_k": 5,
|
77 |
"thresh": 0.55,
|
78 |
"max_tokens": 13000,
|
79 |
"embedding_model": "text-embedding-ada-002",
|