Spaces:

OEvortex
/

Webscout-API

Running

App Files Files Community

KingNish commited on Jun 25

Commit

9a9538f

•

1 Parent(s): e6162d3

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -2

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from webscout import WEBS, transcriber
 from typing import Optional
 from fastapi.encoders import jsonable_encoder
 app = FastAPI()
@@ -122,6 +125,73 @@ async def chat(
         raise HTTPException(status_code=500, detail=f"Error getting chat results: {e}")
 @app.get("/api/maps")
 async def maps(
@@ -195,4 +265,4 @@ def get_ascii_weather(location: str):
 # Run the API server if this script is executed
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8080)

 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from webscout import WEBS, transcriber
 from typing import Optional
 from fastapi.encoders import jsonable_encoder
+from bs4 import BeautifulSoup
+import requests
+from functools import lru_cache
+import urllib.parse
 app = FastAPI()
         raise HTTPException(status_code=500, detail=f"Error getting chat results: {e}")
+@lru_cache(maxsize=128)
+def extract_text_from_webpage(html_content):
+    """Extracts visible text from HTML content using BeautifulSoup."""
+    soup = BeautifulSoup(html_content, "html.parser")
+    # Remove unwanted tags
+    for tag in soup(["script", "style", "header", "footer", "nav"]):
+        tag.extract()
+    # Get the remaining visible text
+    visible_text = soup.get_text(strip=True)
+    return visible_text
+@app.get("/api/web_extract")
+async def web_extract(
+    url: str,
+    max_chars: int = 12000,  # Adjust based on token limit
+):
+    """Extracts text from a given URL."""
+    try:
+        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
+        response.raise_for_status()
+        visible_text = extract_text_from_webpage(response.text)
+        if len(visible_text) > max_chars:
+            visible_text = visible_text[:max_chars] + "..."
+        return {"url": url, "text": visible_text}
+    except requests.exceptions.RequestException as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching or processing URL: {e}")
+@app.get("/api/search-and-extract")
+async def web_search_and_extract(
+    q: str,
+    max_results: int = 3,
+    timelimit: Optional[str] = None,
+    safesearch: str = "moderate",
+    region: str = "wt-wt",
+    backend: str = "api",
+    max_chars: int = 6000  # Adjust based on token limit
+):
+    """
+    Searches using WEBS, extracts text from the top results, and returns both.
+    """
+    try:
+        with WEBS() as webs:
+            # Perform WEBS search
+            search_results = webs.text(keywords=q, region=region, safesearch=safesearch,
+                                     timelimit=timelimit, backend=backend, max_results=max_results)
+            # Extract text from each result's link
+            extracted_results = []
+            for result in search_results:
+                if 'href' in result:
+                    link = result['href']
+                    try:
+                        response = requests.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
+                        response.raise_for_status()
+                        visible_text = extract_text_from_webpage(response.text)
+                        if len(visible_text) > max_chars:
+                            visible_text = visible_text[:max_chars] + "..."
+                        extracted_results.append({"link": link, "text": visible_text})
+                    except requests.exceptions.RequestException as e:
+                        print(f"Error fetching or processing {link}: {e}")
+                        extracted_results.append({"link": link, "text": None})
+                else:
+                    extracted_results.append({"link": None, "text": None})
+            return JSONResponse(content=jsonable_encoder({"search_results": search_results, "extracted_results": extracted_results}))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error during search and extraction: {e}")
 @app.get("/api/maps")
 async def maps(
 # Run the API server if this script is executed
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8080)