xtrade_bot_gradio / download_data_for_RAG.py
Josh-Ola's picture
Upload folder using huggingface_hub
8ab1018 verified
raw
history blame contribute delete
No virus
2 kB
"""
Module for downloading reports and other relevant files from
africaexchange.
"""
import os
import requests
from fastapi import APIRouter
from _endpoints import all_queries, urls
reports_etl = APIRouter()
@reports_etl.get("/fetch-reports", summary="For fetching all reports")
def download_reports(folder: str ="data_")-> dict:
queries = all_queries()
base_url, graph_url = urls()
for name, query in queries.items():
name = name.replace("query_", "")
json = {"query": query}
response = requests.post(url=graph_url, json=json)
if response.status_code != 200:
return {
"status_code": response.status_code,
"message": "An error occured when calling graphql"
}
file_urls = [vals["attributes"]["document"]["data"]["attributes"]["url"]
for vals in response.json()["data"]["reports"]["data"]]#[:2]
for file_url in file_urls:
file_response = requests.get(f"{base_url}{file_url}")
if file_response.status_code != 200:
return {
"status_code": file_response.status_code,
"message": "An error occured when downloading the file"
}
file_name = file_url.split("/")[-1]
subdir = os.path.join(
folder,
name
)
if not os.path.exists(subdir):
os.makedirs(name=subdir, exist_ok=True)
# print(subdir)
destination = os.path.join(
subdir,
file_name,
)
with open(destination, "wb") as file:
file.write(file_response.content)
print(f"Report downloaded successfully to: {destination}")
# print(response.content)
return {
"status_code": 200,
"message": f"file downloaded and saved to {folder}"
}
if __name__ == "__main__":
download_reports()