Spaces:

Tsumugii
/

PoetryChat

Runtime error

App Files Files Community

PoetryChat / src /search_engine.py

Tsumugii24

initial commit

f7161fa 3 months ago

raw

history blame contribute delete

No virus

8.49 kB

	# -- coding: utf-8 --
	"""
	@author:XuMing(xuming624@qq.com)
	@description:
	"""
	import json
	from itertools import islice

	import requests
	from fastapi import HTTPException
	from loguru import logger

	# Search engine related. You don't really need to change this.
	BING_SEARCH_V7_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"
	BING_MKT = "en-US"
	GOOGLE_SEARCH_ENDPOINT = "https://customsearch.googleapis.com/customsearch/v1"
	SERPER_SEARCH_ENDPOINT = "https://google.serper.dev/search"
	SEARCHAPI_SEARCH_ENDPOINT = "https://www.searchapi.io/api/v1/search"
	# Specify the number of references from the search engine you want to use.
	# 8 is usually a good number.
	REFERENCE_COUNT = 8

	# Specify the default timeout for the search engine. If the search engine
	# does not respond within this time, we will return an error.
	DEFAULT_SEARCH_ENGINE_TIMEOUT = 5


	def search_with_bing(query: str, subscription_key: str):
	"""
	Search with bing and return the contexts.
	"""
	params = {"q": query, "mkt": BING_MKT}
	response = requests.get(
	BING_SEARCH_V7_ENDPOINT,
	headers={"Ocp-Apim-Subscription-Key": subscription_key},
	params=params,
	timeout=DEFAULT_SEARCH_ENGINE_TIMEOUT,
	)
	if not response.ok:
	logger.error(f"{response.status_code} {response.text}")
	raise HTTPException(response.status_code, "Search engine error.")
	json_content = response.json()
	try:
	contexts = json_content["webPages"]["value"][:REFERENCE_COUNT]
	except KeyError:
	logger.error(f"Error encountered: {json_content}")
	return []
	return contexts


	def search_with_google(query: str, subscription_key: str, cx: str):
	"""
	Search with google and return the contexts.
	"""
	params = {
	"key": subscription_key,
	"cx": cx,
	"q": query,
	"num": REFERENCE_COUNT,
	}
	response = requests.get(
	GOOGLE_SEARCH_ENDPOINT, params=params, timeout=DEFAULT_SEARCH_ENGINE_TIMEOUT
	)
	if not response.ok:
	logger.error(f"{response.status_code} {response.text}")
	raise HTTPException(response.status_code, "Search engine error.")
	json_content = response.json()
	try:
	contexts = json_content["items"][:REFERENCE_COUNT]
	except KeyError:
	logger.error(f"Error encountered: {json_content}")
	return []
	return contexts


	def search_with_serper(query: str, subscription_key: str):
	"""
	Search with serper and return the contexts.
	"""
	payload = json.dumps({
	"q": query,
	"num": (
	REFERENCE_COUNT
	if REFERENCE_COUNT % 10 == 0
	else (REFERENCE_COUNT // 10 + 1) * 10
	),
	})
	headers = {"X-API-KEY": subscription_key, "Content-Type": "application/json"}
	logger.info(
	f"{payload} {headers} {subscription_key} {query} {SERPER_SEARCH_ENDPOINT}"
	)
	response = requests.post(
	SERPER_SEARCH_ENDPOINT,
	headers=headers,
	data=payload,
	timeout=DEFAULT_SEARCH_ENGINE_TIMEOUT,
	)
	if not response.ok:
	logger.error(f"{response.status_code} {response.text}")
	raise HTTPException(response.status_code, "Search engine error.")
	json_content = response.json()
	try:
	# convert to the same format as bing/google
	contexts = []
	if json_content.get("knowledgeGraph"):
	url = json_content["knowledgeGraph"].get("descriptionUrl") or json_content["knowledgeGraph"].get("website")
	snippet = json_content["knowledgeGraph"].get("description")
	if url and snippet:
	contexts.append({
	"name": json_content["knowledgeGraph"].get("title", ""),
	"url": url,
	"snippet": snippet
	})
	if json_content.get("answerBox"):
	url = json_content["answerBox"].get("url")
	snippet = json_content["answerBox"].get("snippet") or json_content["answerBox"].get("answer")
	if url and snippet:
	contexts.append({
	"name": json_content["answerBox"].get("title", ""),
	"url": url,
	"snippet": snippet
	})
	contexts += [
	{"name": c["title"], "url": c["link"], "snippet": c.get("snippet", "")}
	for c in json_content["organic"]
	]
	return contexts[:REFERENCE_COUNT]
	except KeyError:
	logger.error(f"Error encountered: {json_content}")
	return []


	def search_with_searchapi(query: str, subscription_key: str):
	"""
	Search with SearchApi.io and return the contexts.
	"""
	payload = {
	"q": query,
	"engine": "google",
	"num": (
	REFERENCE_COUNT
	if REFERENCE_COUNT % 10 == 0
	else (REFERENCE_COUNT // 10 + 1) * 10
	),
	}
	headers = {"Authorization": f"Bearer {subscription_key}", "Content-Type": "application/json"}
	logger.info(
	f"{payload} {headers} {subscription_key} {query} {SEARCHAPI_SEARCH_ENDPOINT}"
	)
	response = requests.get(
	SEARCHAPI_SEARCH_ENDPOINT,
	headers=headers,
	params=payload,
	timeout=30,
	)
	if not response.ok:
	logger.error(f"{response.status_code} {response.text}")
	raise HTTPException(response.status_code, "Search engine error.")
	json_content = response.json()
	try:
	# convert to the same format as bing/google
	contexts = []

	if json_content.get("answer_box"):
	if json_content["answer_box"].get("organic_result"):
	title = json_content["answer_box"].get("organic_result").get("title", "")
	url = json_content["answer_box"].get("organic_result").get("link", "")
	if json_content["answer_box"].get("type") == "population_graph":
	title = json_content["answer_box"].get("place", "")
	url = json_content["answer_box"].get("explore_more_link", "")

	title = json_content["answer_box"].get("title", "")
	url = json_content["answer_box"].get("link")
	snippet = json_content["answer_box"].get("answer") or json_content["answer_box"].get("snippet")

	if url and snippet:
	contexts.append({
	"name": title,
	"url": url,
	"snippet": snippet
	})

	if json_content.get("knowledge_graph"):
	if json_content["knowledge_graph"].get("source"):
	url = json_content["knowledge_graph"].get("source").get("link", "")

	url = json_content["knowledge_graph"].get("website", "")
	snippet = json_content["knowledge_graph"].get("description")

	if url and snippet:
	contexts.append({
	"name": json_content["knowledge_graph"].get("title", ""),
	"url": url,
	"snippet": snippet
	})

	contexts += [
	{"name": c["title"], "url": c["link"], "snippet": c.get("snippet", "")}
	for c in json_content["organic_results"]
	]

	if json_content.get("related_questions"):
	for question in json_content["related_questions"]:
	if question.get("source"):
	url = question.get("source").get("link", "")
	else:
	url = ""

	snippet = question.get("answer", "")

	if url and snippet:
	contexts.append({
	"name": question.get("question", ""),
	"url": url,
	"snippet": snippet
	})

	return contexts[:REFERENCE_COUNT]
	except KeyError:
	logger.error(f"Error encountered: {json_content}")
	return []


	def search_with_duckduckgo(query: str):
	"""
	Search with DuckDuckGo and return the contexts.
	"""
	try:
	from duckduckgo_search import DDGS
	except ImportError:
	raise ImportError("Please install duckduckgo-search to use this search engine.")
	contexts = []
	with DDGS() as ddgs:
	ddgs_gen = ddgs.text(query, backend="lite")
	for r in islice(ddgs_gen, REFERENCE_COUNT):
	contexts.append({
	"name": r['title'],
	"url": r['href'],
	"snippet": r['body']
	})
	return contexts