File size: 5,642 Bytes
47f67ac
 
 
aa933fe
 
 
 
07f4bca
aa933fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47f67ac
 
 
 
 
 
 
 
 
 
e807dea
 
 
a7bf877
47f67ac
 
 
 
dcb7035
7da96c4
 
 
47f67ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import gradio as gr
from huggingface_hub import InferenceClient

import requests
from bs4 import BeautifulSoup
import urllib
import random

# List of user agents to choose from for requests
_useragent_list = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'
]

def get_useragent():
    """Returns a random user agent from the list."""
    return random.choice(_useragent_list)

def extract_text_from_webpage(html_content):
    """Extracts visible text from HTML content using BeautifulSoup."""
    soup = BeautifulSoup(html_content, "html.parser")
    # Remove unwanted tags
    for tag in soup(["script", "style", "header", "footer", "nav"]):
        tag.extract()
    # Get the remaining visible text
    visible_text = soup.get_text(strip=True)
    return visible_text

def search(term, num_results=1, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
    """Performs a Google search and returns the results."""
    escaped_term = urllib.parse.quote_plus(term)
    start = 0
    all_results = []

    # Fetch results in batches
    while start < num_results:
        resp = requests.get(
            url="https://www.google.com/search",
            headers={"User-Agent": get_useragent()}, # Set random user agent
            params={
                "q": term,
                "num": num_results - start, # Number of results to fetch in this batch
                "hl": lang,
                "start": start,
                "safe": safe,
            },
            timeout=timeout,
            verify=ssl_verify,
        )
        resp.raise_for_status() # Raise an exception if request fails

        soup = BeautifulSoup(resp.text, "html.parser")
        result_block = soup.find_all("div", attrs={"class": "g"})

        # If no results, continue to the next batch
        if not result_block:
            start += 1
            continue

        # Extract link and text from each result
        for result in result_block:
            link = result.find("a", href=True)
            if link:
                link = link["href"]
                try:
                    # Fetch webpage content
                    webpage = requests.get(link, headers={"User-Agent": get_useragent()})
                    webpage.raise_for_status()
                    # Extract visible text from webpage
                    visible_text = extract_text_from_webpage(webpage.text)
                    all_results.append({"link": link, "text": visible_text})
                except requests.exceptions.RequestException as e:
                    # Handle errors fetching or processing webpage
                    print(f"Error fetching or processing {link}: {e}")
                    all_results.append({"link": link, "text": None})
            else:
                all_results.append({"link": None, "text": None})

        start += len(result_block) # Update starting index for next batch

    return all_results


client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = "<s>[SYSTEM] Your name is Chatchat.Answer as Real OpenGPT 4o, Made by 'peterpeter8585', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"+system_message
    web_results = search(text)
    web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
    formatted_prompt = messages + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"


    response = ""

    stream = client.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
    token="".join([response.token.text for response in stream if response.token.text != "</s>"])
    response += token
    yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()