Entz commited on
Commit
bc4f1c2
1 Parent(s): a9b61a6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -63
app.py CHANGED
@@ -1,63 +1,125 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ import time
4
+ import subprocess
5
+ import socket
6
+
7
+ def is_port_available(port):
8
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
9
+ return s.connect_ex(('localhost', port)) != 0
10
+
11
+ def kill_process_on_port(port):
12
+ try:
13
+ pid = subprocess.check_output(f"lsof -t -i:{port}", shell=True)
14
+ pid = pid.decode().strip()
15
+ if pid:
16
+ print(f"Killing process {pid} using port {port}")
17
+ subprocess.run(f"kill -9 {pid}", shell=True)
18
+ except subprocess.CalledProcessError:
19
+ print(f"No process found using port {port}")
20
+
21
+ def find_available_port(start_port=11435):
22
+ port = start_port
23
+ while True:
24
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
25
+ if s.connect_ex(('localhost', port)) != 0:
26
+ print(f"Found available port: {port}")
27
+ return port
28
+ port += 1
29
+
30
+ print("Starting script...")
31
+
32
+ OLLAMA = os.path.expanduser("~/ollama")
33
+
34
+ if not os.path.exists(OLLAMA):
35
+ print("ollama path doesn't exist...")
36
+ subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
37
+ os.chmod(OLLAMA, 0o755)
38
+ print("Changed mod... chmod(OLLAMA, 0o755)")
39
+ else:
40
+ print("ollama path exists.")
41
+
42
+ # Default port is 11434
43
+ default_port = 11434
44
+ print(f"Default port is set to {default_port}")
45
+
46
+ # Kill any process that might be using the default port
47
+ print(f"Attempting to kill any process using port {default_port}")
48
+ kill_process_on_port(default_port)
49
+ print(f"{default_port} port is killed")
50
+
51
+ # Verify the default port is now available
52
+ print(f"Checking if port {default_port} is available...")
53
+ if is_port_available(default_port):
54
+ available_port = default_port
55
+ print(f"Port {default_port} is available. Using it for the new instance.")
56
+ else:
57
+ # Find another available port starting from 11435
58
+ print(f"Port {default_port} is not available. Finding another port...")
59
+ available_port = find_available_port(start_port=11435)
60
+ print(f"Using available port {available_port} for the new instance.")
61
+
62
+ def ollama_service_thread():
63
+ print(f"Starting ollama service on port {available_port}...")
64
+ os.environ['OLLAMA_HOST'] = f'127.0.0.1:{available_port}'
65
+ subprocess.run("~/ollama serve", shell=True)
66
+ print("ollama service started.")
67
+
68
+ OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
69
+ OLLAMA_SERVICE_THREAD.start()
70
+
71
+ print("Giving ollama serve a moment")
72
+ time.sleep(10)
73
+ print("Proceeding after sleep.")
74
+
75
+ # Pull the model
76
+ model = "gemma2"
77
+ print(f"Pulling model {model}...")
78
+ subprocess.run(f"~/ollama pull {model}", shell=True)
79
+ print(f"Model {model} pulled.")
80
+
81
+ # Import the client
82
+ print("Importing ollama Client...")
83
+ from ollama import Client
84
+ print("Imported ollama Client.")
85
+ client = Client(host=f'http://localhost:{available_port}', timeout=120)
86
+ print(f"Client created with host: http://localhost:{available_port}")
87
+
88
+ def stream_chat(message: str, history: list):
89
+ print("Starting stream_chat function...")
90
+ conversation = []
91
+ for prompt, answer in history:
92
+ conversation.extend([
93
+ {"role": "user", "content": prompt},
94
+ {"role": "assistant", "content": answer},
95
+ ])
96
+ conversation.append({"role": "user", "content": message})
97
+ print(f"Sending message to model: {message}")
98
+
99
+ response = client.chat(
100
+ model=model,
101
+ messages=conversation,
102
+ stream=True,
103
+ options={
104
+ 'num_predict': 1024,
105
+ 'temperature': 0.8,
106
+ 'top_p': 0.8,
107
+ 'top_k': 20,
108
+ 'repeat_penalty': 1.0,
109
+ 'low_vram': True,
110
+ },
111
+ )
112
+
113
+ buffer = ""
114
+ for chunk in response:
115
+ buffer += chunk["message"]["content"]
116
+ print(f"Received chunk: {chunk['message']['content']}")
117
+ yield buffer
118
+
119
+ # Example usage
120
+ if __name__ == "__main__":
121
+ print("Starting main function...")
122
+ history = [("Hello", "Hi there! How can I help you today?")]
123
+
124
+ # Send a new message to the model
125
+