File size: 2,343 Bytes
261e3f4
 
 
 
d6f1399
261e3f4
 
d6f1399
261e3f4
 
 
 
 
d6f1399
261e3f4
 
 
d6f1399
261e3f4
 
 
d6f1399
261e3f4
 
d6f1399
261e3f4
 
d6f1399
261e3f4
 
d6f1399
261e3f4
 
 
 
 
d6f1399
261e3f4
 
 
 
 
 
 
 
 
 
 
 
 
d6f1399
261e3f4
 
 
 
 
 
 
 
 
 
 
 
d6f1399
261e3f4
 
d6f1399
261e3f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6f1399
261e3f4
d6f1399
261e3f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import threading
import time
import subprocess

print("Expanding user path for Ollama")
OLLAMA = os.path.expanduser("~/ollama")

print("Checking if Ollama exists at the path")
if not os.path.exists(OLLAMA):
    print("Ollama not found, downloading it")
    subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
    os.chmod(OLLAMA, 0o755)

def ollama_service_thread():
    print("Starting Ollama service thread")
    subprocess.run("~/ollama serve", shell=True)

print("Creating and starting Ollama service thread")
OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
OLLAMA_SERVICE_THREAD.start()

print("Giving Ollama serve a moment to start")
time.sleep(10)

print("Setting model to 'gemma2'")
model = "gemma2"

print(f"Pulling model {model}")
subprocess.run(f"~/ollama pull {model}", shell=True)

################################################
################################################
import copy
import gradio as gr
from ollama import Client

print("Initializing Ollama client")
client = Client(host='http://localhost:11434', timeout=120)

print("Getting Hugging Face token and model ID from environment variables")
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-9b-it")
MODEL_NAME = MODEL_ID.split("/")[-1]

print("Setting up title and description for Gradio interface")
TITLE = "<h1><center>ollama-Chat</center></h1>"
DESCRIPTION = f"""
<h3>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
<p>Running on Ollama backend.</p>
"""


CSS = """
.duplicate-button {
    margin: auto !important;
    color: white !important;
    background: black !important;
    border-radius: 100vh !important;
}
h3 {
    text-align: center;
}
"""
import gradio as gr
from llama_index.llms.ollama import Ollama

# Initialize the Ollama model
llm = Ollama(model="llama3", request_timeout=120.0)

# Define the function to get the response from Ollama
def get_response(question):
    resp = llm.complete(question)
    return resp

# Create the Gradio interface
iface = gr.Interface(
    fn=get_response,
    inputs="text",
    outputs="text",
    title="Ask Paul Graham",
    description="Enter a question to learn more about Paul Graham."
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()