KingNish commited on
Commit
9a9538f
1 Parent(s): e6162d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -2
app.py CHANGED
@@ -1,9 +1,12 @@
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import JSONResponse
3
  from webscout import WEBS, transcriber
4
-
5
  from typing import Optional
6
  from fastapi.encoders import jsonable_encoder
 
 
 
 
7
 
8
  app = FastAPI()
9
 
@@ -122,6 +125,73 @@ async def chat(
122
  raise HTTPException(status_code=500, detail=f"Error getting chat results: {e}")
123
 
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  @app.get("/api/maps")
127
  async def maps(
@@ -195,4 +265,4 @@ def get_ascii_weather(location: str):
195
  # Run the API server if this script is executed
196
  if __name__ == "__main__":
197
  import uvicorn
198
- uvicorn.run(app, host="0.0.0.0", port=8080)
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import JSONResponse
3
  from webscout import WEBS, transcriber
 
4
  from typing import Optional
5
  from fastapi.encoders import jsonable_encoder
6
+ from bs4 import BeautifulSoup
7
+ import requests
8
+ from functools import lru_cache
9
+ import urllib.parse
10
 
11
  app = FastAPI()
12
 
 
125
  raise HTTPException(status_code=500, detail=f"Error getting chat results: {e}")
126
 
127
 
128
+ @lru_cache(maxsize=128)
129
+ def extract_text_from_webpage(html_content):
130
+ """Extracts visible text from HTML content using BeautifulSoup."""
131
+ soup = BeautifulSoup(html_content, "html.parser")
132
+ # Remove unwanted tags
133
+ for tag in soup(["script", "style", "header", "footer", "nav"]):
134
+ tag.extract()
135
+ # Get the remaining visible text
136
+ visible_text = soup.get_text(strip=True)
137
+ return visible_text
138
+
139
+ @app.get("/api/web_extract")
140
+ async def web_extract(
141
+ url: str,
142
+ max_chars: int = 12000, # Adjust based on token limit
143
+ ):
144
+ """Extracts text from a given URL."""
145
+ try:
146
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
147
+ response.raise_for_status()
148
+ visible_text = extract_text_from_webpage(response.text)
149
+ if len(visible_text) > max_chars:
150
+ visible_text = visible_text[:max_chars] + "..."
151
+ return {"url": url, "text": visible_text}
152
+ except requests.exceptions.RequestException as e:
153
+ raise HTTPException(status_code=500, detail=f"Error fetching or processing URL: {e}")
154
+
155
+ @app.get("/api/search-and-extract")
156
+ async def web_search_and_extract(
157
+ q: str,
158
+ max_results: int = 3,
159
+ timelimit: Optional[str] = None,
160
+ safesearch: str = "moderate",
161
+ region: str = "wt-wt",
162
+ backend: str = "api",
163
+ max_chars: int = 6000 # Adjust based on token limit
164
+ ):
165
+ """
166
+ Searches using WEBS, extracts text from the top results, and returns both.
167
+ """
168
+ try:
169
+ with WEBS() as webs:
170
+ # Perform WEBS search
171
+ search_results = webs.text(keywords=q, region=region, safesearch=safesearch,
172
+ timelimit=timelimit, backend=backend, max_results=max_results)
173
+
174
+ # Extract text from each result's link
175
+ extracted_results = []
176
+ for result in search_results:
177
+ if 'href' in result:
178
+ link = result['href']
179
+ try:
180
+ response = requests.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
181
+ response.raise_for_status()
182
+ visible_text = extract_text_from_webpage(response.text)
183
+ if len(visible_text) > max_chars:
184
+ visible_text = visible_text[:max_chars] + "..."
185
+ extracted_results.append({"link": link, "text": visible_text})
186
+ except requests.exceptions.RequestException as e:
187
+ print(f"Error fetching or processing {link}: {e}")
188
+ extracted_results.append({"link": link, "text": None})
189
+ else:
190
+ extracted_results.append({"link": None, "text": None})
191
+
192
+ return JSONResponse(content=jsonable_encoder({"search_results": search_results, "extracted_results": extracted_results}))
193
+ except Exception as e:
194
+ raise HTTPException(status_code=500, detail=f"Error during search and extraction: {e}")
195
 
196
  @app.get("/api/maps")
197
  async def maps(
 
265
  # Run the API server if this script is executed
266
  if __name__ == "__main__":
267
  import uvicorn
268
+ uvicorn.run(app, host="0.0.0.0", port=8080)