Louis-François Bouchard commited on
Commit
71cffeb
1 Parent(s): 30aecba

5 sources (#26)

Browse files

* changed back to 5 sources

* removed number of chunks

* ran black

Files changed (2) hide show
  1. app.py +5 -5
  2. cfg.py +1 -1
app.py CHANGED
@@ -75,20 +75,20 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
75
  return ""
76
 
77
  documents_answer_template: str = "📝 Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
78
- document_template: str = "[🔗 {document.source}: {document.title}]({document.url}), highest relevance: {document.similarity_to_answer:2.1f} % | # total chunks matched: {document.repetition:d}"
79
 
80
  matched_documents.similarity_to_answer = (
81
  matched_documents.similarity_to_answer * 100
82
  )
83
 
84
- matched_documents["repetition"] = matched_documents.groupby("title")[
85
- "title"
86
- ].transform("size")
87
 
88
  # drop duplicates, keep highest ranking ones
89
  matched_documents = matched_documents.sort_values(
90
  "similarity_to_answer", ascending=False
91
- ).drop_duplicates("title", keep="first")[:5]
92
 
93
  # Revert back to correct display
94
  display_source_to_ui = {
 
75
  return ""
76
 
77
  documents_answer_template: str = "📝 Here are the sources I used to answer your question:\n\n{documents}\n\n{footnote}"
78
+ document_template: str = "[🔗 {document.source}: {document.title}]({document.url}), highest relevance: {document.similarity_to_answer:2.1f} %" # | # total chunks matched: {document.repetition:d}"
79
 
80
  matched_documents.similarity_to_answer = (
81
  matched_documents.similarity_to_answer * 100
82
  )
83
 
84
+ # matched_documents["repetition"] = matched_documents.groupby("title")[
85
+ # "title"
86
+ # ].transform("size")
87
 
88
  # drop duplicates, keep highest ranking ones
89
  matched_documents = matched_documents.sort_values(
90
  "similarity_to_answer", ascending=False
91
+ ).drop_duplicates("title", keep="first")
92
 
93
  # Revert back to correct display
94
  display_source_to_ui = {
cfg.py CHANGED
@@ -73,7 +73,7 @@ Q:
73
  },
74
  retriever_cfg={
75
  "path": f"{DEEPLAKE_DATASET_PATH}",
76
- "top_k": 10,
77
  "thresh": 0.55,
78
  "max_tokens": 13000,
79
  "embedding_model": "text-embedding-ada-002",
 
73
  },
74
  retriever_cfg={
75
  "path": f"{DEEPLAKE_DATASET_PATH}",
76
+ "top_k": 5,
77
  "thresh": 0.55,
78
  "max_tokens": 13000,
79
  "embedding_model": "text-embedding-ada-002",