usmanyousaf's picture
Update app.py
f560e5e verified
import streamlit as st # type: ignore
from scrape import scrape_website, extract_body_content, clean_body_content, split_dom_content
from parse import parse_with_groq
# Streamlit UI with sidebar
st.set_page_config(page_title="AI Web Scraping App", page_icon="🌐")
st.sidebar.title("πŸš€ Model Selection")
selected_model = st.sidebar.selectbox(
"Choose a Model for Parsing:",
[
"llama3-8b-8192",
"llama3-groq-70b-8192-tool-use-preview",
"llama-3.1-8b-instant",
"llava-v1.5-7b-4096-preview",
"mixtral-8x7b-32768",
]
)
# Application title
st.title("AI Web Scraper App 🌐")
st.write("Easily scrape and analyze web content using advanced AI models. 🌟")
# Input for website URL
url = st.text_input("Enter Website URL πŸ”—")
# Step 1: Scrape the Website
if st.button("Scrape Website"):
if url:
st.write("πŸ•΅οΈβ€β™‚οΈ Scraping the website...")
# Scrape the website
dom_content = scrape_website(url)
body_content = extract_body_content(dom_content)
cleaned_content = clean_body_content(body_content)
# Store the DOM content in Streamlit session state
st.session_state.dom_content = cleaned_content
# Display the DOM content in an expandable text box
with st.expander("View DOM Content"):
st.text_area("DOM Content", cleaned_content, height=300)
# Step 2: Parse the Content
if "dom_content" in st.session_state:
parse_description = st.text_area("Describe what you want to parse πŸ“")
if st.button("Parse Content"):
if parse_description:
st.write(f"πŸ€– Parsing the content with {selected_model}...")
# Parse content using Groq
dom_chunks = split_dom_content(st.session_state.dom_content)
parsed_result = parse_with_groq(dom_chunks, parse_description, model=selected_model)
st.write(parsed_result)
# CSS for footer at the bottom of the sidebar
st.markdown(
"""
<style>
.footer {
position: fixed;
bottom: 0;
left: 0;
width: 100%;
background-color: #272432; /* Dark background for visibility */
color: white;
text-align: center;
padding: 10px;
font-size: 14px;
}
.sidebar .footer {
position: fixed;
bottom: 0;
}
</style>
<div class="footer">
Made with ❀️ by Usman Yousaf πŸš€<br>
Feel free to improve and expand this app for more powerful insights! πŸ”₯
</div>
""",
unsafe_allow_html=True
)