KingNish commited on
Commit
0e92f07
1 Parent(s): bee5883

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +36 -11
chatbot.py CHANGED
@@ -263,18 +263,43 @@ def fetch_and_extract(link, max_chars_per_page):
263
  except requests.exceptions.RequestException as e:
264
  return {"link": link, "text": None}
265
 
266
- def search(term, max_results=2, max_chars_per_page=8000, max_threads=10):
267
- gr.Info("Searching...")
268
- """Performs a DuckDuckGo search and extracts text from webpages."""
 
 
269
  all_results = []
270
- result_block = DDGS().text(term, max_results=max_results)
271
- threads = []
272
- for result in result_block:
273
- if 'href' in result:
274
- link = result["href"]
275
- thread = Thread(target=lambda: all_results.append(fetch_and_extract(link, max_chars_per_page)))
276
- threads.append(thread)
277
- thread.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  for thread in threads:
279
  thread.join()
280
  gr.Info("Extracting Important Info..")
 
263
  except requests.exceptions.RequestException as e:
264
  return {"link": link, "text": None}
265
 
266
+ # Perform a Google search and return the results
267
+ def search(term, num_results=3, lang="en", timeout=5, safe="active", ssl_verify=None):
268
+ """Performs a Google search and returns the results."""
269
+ escaped_term = urllib.parse.quote_plus(term)
270
+ start = 0
271
  all_results = []
272
+ # Limit the number of characters from each webpage to stay under the token limit
273
+ max_chars_per_page = 8000 # Adjust this value based on your token limit and average webpage length
274
+
275
+ with requests.Session() as session:
276
+ while start < num_results:
277
+ resp = session.get(
278
+ url="https://www.google.com/search",
279
+ headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
280
+ params={
281
+ "q": term,
282
+ "num": num_results - start,
283
+ "hl": lang,
284
+ "start": start,
285
+ "safe": safe,
286
+ },
287
+ timeout=timeout,
288
+ verify=ssl_verify,
289
+ )
290
+ resp.raise_for_status()
291
+ soup = BeautifulSoup(resp.text, "html.parser")
292
+ result_block = soup.find_all("div", attrs={"class": "g"})
293
+ if not result_block:
294
+ start += 1
295
+ continue
296
+ for result in result_block:
297
+ link = result.find("a", href=True)
298
+ if link:
299
+ link = link["href"]
300
+ thread = Thread(target=lambda: all_results.append(fetch_and_extract(link, max_chars_per_page)))
301
+ threads.append(thread)
302
+ thread.start()
303
  for thread in threads:
304
  thread.join()
305
  gr.Info("Extracting Important Info..")