Cran-May mikeee commited on
Commit
a519dfe
0 Parent(s):

Duplicate from mikeee/30B-Lazarus-GGML

Browse files

Co-authored-by: mikeee <mikeee@users.noreply.huggingface.co>

Files changed (7) hide show
  1. .gitattributes +35 -0
  2. .gitignore +11 -0
  3. .ruff.toml +19 -0
  4. .stignore +102 -0
  5. README.md +13 -0
  6. app.py +274 -0
  7. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ call-activate.bat
2
+ okteto.yml
3
+ okteto-up.bat
4
+ install-sw.sh
5
+ install-sw1.sh
6
+ start-sshd.sh
7
+ pyproject.toml
8
+ models
9
+ .ruff_cache
10
+ run-nodemon.sh
11
+ app-.py
.ruff.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assume Python 3.10.
2
+ target-version = "py310"
3
+ # Decrease the maximum line length to 79 characters.
4
+ line-length = 300
5
+
6
+ # pyflakes, pycodestyle, isort
7
+ # flake8 YTT, pydocstyle D, pylint PLC
8
+ select = ["F", "E", "W", "I001", "YTT", "D", "PLC"]
9
+ # select = ["ALL"]
10
+
11
+ # D100 Missing docstring in public module
12
+ # E501 Line too long
13
+ # D103 Missing docstring in public function
14
+ # D101 Missing docstring in public class
15
+ # `multi-line-summary-first-line` (D212)
16
+ # `one-blank-line-before-class` (D203)
17
+ extend-ignore = ["E501", "D100", "D103", "D101", "D212", "D203"]
18
+
19
+ exclude = [".venv"]
.stignore ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ models
2
+ .git
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build
14
+ develop-eggs
15
+ dist
16
+ downloads
17
+ eggs
18
+ .eggs
19
+ lib
20
+ lib64
21
+ parts
22
+ sdist
23
+ var
24
+ wheels
25
+ pip-wheel-metadata
26
+ share/python-wheels
27
+ *.egg-info
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Translations
43
+ *.mo
44
+ *.pot
45
+
46
+ # Django stuff:
47
+ *.log
48
+ local_settings.py
49
+ db.sqlite3
50
+
51
+ # Flask stuff:
52
+ instance
53
+ .webassets-cache
54
+
55
+ # Scrapy stuff:
56
+ .scrapy
57
+
58
+ # Sphinx documentation
59
+ docs/_build
60
+
61
+ # PyBuilder
62
+ target
63
+
64
+ # Jupyter Notebook
65
+ .ipynb_checkpoints
66
+
67
+ # IPython
68
+ profile_default
69
+ ipython_config.py
70
+
71
+ # pyenv
72
+ .python-version
73
+
74
+ # celery beat schedule file
75
+ celerybeat-schedule
76
+
77
+ # SageMath parsed files
78
+ *.sage.py
79
+
80
+ # Environments
81
+ .env
82
+ .venv
83
+ env
84
+ venv
85
+ ENV
86
+ env.bak
87
+ venv.bak
88
+
89
+ # Spyder project settings
90
+ .spyderproject
91
+ .spyproject
92
+
93
+ # Rope project settings
94
+ .ropeproject
95
+
96
+ # mypy
97
+ .mypy_cache
98
+ .dmypy.json
99
+ dmypy.json
100
+
101
+ # Pyre type checker
102
+ .pyre
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 30B-Lazarus-GGML
3
+ emoji: 🚀
4
+ colorFrom: green
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: mikeee/30B-Lazarus-GGML
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from urllib.parse import urlparse
3
+
4
+ import gradio as gr
5
+ import psutil
6
+ from ctransformers import AutoModelForCausalLM
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ _ = """
10
+ snapshot_download(
11
+ repo_id="TheBloke/falcon-7b-instruct-GGML",
12
+ allow_patterns="falcon7b-instruct.ggmlv3.q4_0.bin",
13
+ revision="ggmlv3",
14
+ local_dir="models",
15
+ local_dir_use_symlinks=False, # default "auto"
16
+ )
17
+
18
+ hf_hub_download(
19
+ repo_id=repo_id,
20
+ filename=model_filename,
21
+ local_dir=local_path,
22
+ local_dir_use_symlinks=True,
23
+ )
24
+ # """
25
+ # 4.06G
26
+
27
+ _ = """
28
+ llm = AutoModelForCausalLM.from_pretrained(
29
+ "TheBloke/falcon-7b-instruct-GGML",
30
+ model_file="falcon7b-instruct.ggmlv3.q4_0.bin",
31
+ model_type="falcon", gpu_layers=32, threads=2,
32
+ )
33
+ # """
34
+ # _ = Path("models", "falcon7b-instruct.ggmlv3.q4_0.bin").absolute().as_posix()
35
+ # assert Path(_).exists(), f"{_} does not exist, perhaps snapshot_download failed?"
36
+
37
+ URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/main/falcon-7b-instruct.ggccv1.q4_1.bin"
38
+ URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/ggmlv3/falcon7b-instruct.ggmlv3.q4_1.bin"
39
+ repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2])
40
+ URL = "https://huggingface.co/TheBloke/30B-Lazarus-GGML/blob/main/30b-Lazarus.ggmlv3.q2_K.bin" # 13.6 G
41
+ repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2])
42
+ model_file = Path(URL).name
43
+
44
+ _ = """
45
+ _ = hf_hub_download(
46
+ repo_id=repo_id,
47
+ revision="ggmlv3",
48
+ filename=model_file,
49
+ local_dir="models",
50
+ # local_dir_use_symlinks=True,
51
+ )
52
+ # """ # for falcon
53
+
54
+ _ = hf_hub_download(
55
+ repo_id=repo_id,
56
+ filename=model_file,
57
+ local_dir="models",
58
+ # local_dir_use_symlinks=True,
59
+ )
60
+
61
+
62
+ llm = AutoModelForCausalLM.from_pretrained(
63
+ # repo_id, # "TheBloke/falcon-7b-instruct-GGML",
64
+ # model_file=model_file,
65
+ # model_file=_,
66
+ _,
67
+ # model_type="falcon",
68
+ model_type="llama",
69
+ threads=psutil.cpu_count(logical=False),
70
+ )
71
+
72
+ TITLE = f"""<h2 align="center">🦅 30B-Lazarus-GGM ({model_file}) 🦅"""
73
+ USER_NAME = "User"
74
+ BOT_NAME = "Assistant"
75
+ DEFAULT_INSTRUCTIONS = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will answer User's questions. Assistant will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
76
+ """
77
+ RETRY_COMMAND = "/retry"
78
+ STOP_STR = f"\n{USER_NAME}:"
79
+ STOP_SUSPECT_LIST = [":", "\n", "User"]
80
+
81
+
82
+ def chat_accordion():
83
+ with gr.Accordion("Parameters", open=False):
84
+ temperature = gr.Slider(
85
+ minimum=0.1,
86
+ maximum=2.0,
87
+ value=0.8,
88
+ step=0.1,
89
+ interactive=True,
90
+ label="Temperature",
91
+ )
92
+ top_p = gr.Slider(
93
+ minimum=0.1,
94
+ maximum=0.99,
95
+ value=0.9,
96
+ step=0.01,
97
+ interactive=True,
98
+ label="p (nucleus sampling)",
99
+ )
100
+ return temperature, top_p
101
+
102
+
103
+ # TODO: fix prompt
104
+ def format_chat_prompt(message: str, chat_history, instructions: str) -> str:
105
+ instructions = instructions.strip(" ").strip("\n")
106
+ prompt = instructions
107
+ for turn in chat_history:
108
+ user_message, bot_message = turn
109
+ prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}"
110
+ prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
111
+ return prompt
112
+
113
+
114
+ def chat():
115
+ with gr.Column(elem_id="chat_container"):
116
+ with gr.Row():
117
+ chatbot = gr.Chatbot(elem_id="chatbot")
118
+ with gr.Row():
119
+ inputs = gr.Textbox(
120
+ placeholder="Ask me anything...",
121
+ label="Type an input and press Enter",
122
+ max_lines=3,
123
+ )
124
+
125
+ with gr.Row(elem_id="button_container"):
126
+ # with gr.Column(min_width=32):
127
+ # submit_button = gr.Button("🚀 Submit")
128
+ with gr.Column(min_width=32):
129
+ retry_button = gr.Button("♻️ Retry last turn")
130
+ with gr.Column(min_width=32):
131
+ delete_turn_button = gr.Button("🧽 Delete last turn")
132
+ with gr.Column(min_width=32):
133
+ clear_chat_button = gr.Button("✨ Delete all history")
134
+
135
+ gr.Examples(
136
+ [
137
+ ["Hey! Any recommendations for my holidays in Abu Dhabi?"],
138
+ ["What's the Everett interpretation of quantum mechanics?"],
139
+ [
140
+ "Give me a list of the top 10 dive sites you would recommend around the world."
141
+ ],
142
+ ["Can you tell me more about deep-water soloing?"],
143
+ [
144
+ "Can you write a short tweet about 30B-Lazarus-GGM?"
145
+ ],
146
+ ],
147
+ inputs=inputs,
148
+ label="Click on any example and press Enter in the input textbox!",
149
+ )
150
+
151
+ with gr.Row(elem_id="param_container"):
152
+ with gr.Column():
153
+ temperature, top_p = chat_accordion()
154
+ with gr.Column():
155
+ with gr.Accordion("Instructions", open=False):
156
+ instructions = gr.Textbox(
157
+ placeholder="LLM instructions",
158
+ value=DEFAULT_INSTRUCTIONS,
159
+ lines=10,
160
+ interactive=True,
161
+ label="Instructions",
162
+ max_lines=16,
163
+ show_label=False,
164
+ )
165
+
166
+ def run_chat(
167
+ message: str, chat_history, instructions: str, temperature: float, top_p: float
168
+ ):
169
+ if not message or (message == RETRY_COMMAND and len(chat_history) == 0):
170
+ yield chat_history
171
+ return
172
+
173
+ if message == RETRY_COMMAND and chat_history:
174
+ prev_turn = chat_history.pop(-1)
175
+ user_message, _ = prev_turn
176
+ message = user_message
177
+
178
+ prompt = format_chat_prompt(message, chat_history, instructions)
179
+ chat_history = chat_history + [[message, ""]]
180
+ stream = llm(
181
+ prompt,
182
+ max_new_tokens=1024,
183
+ stop=[STOP_STR, "<|endoftext|>"],
184
+ temperature=temperature,
185
+ top_p=top_p,
186
+ stream=True,
187
+ )
188
+ acc_text = ""
189
+ for idx, response in enumerate(stream):
190
+ text_token = response
191
+
192
+ if text_token in STOP_SUSPECT_LIST:
193
+ acc_text += text_token
194
+ continue
195
+
196
+ if idx == 0 and text_token.startswith(" "):
197
+ text_token = text_token[1:]
198
+
199
+ acc_text += text_token
200
+ last_turn = list(chat_history.pop(-1))
201
+ last_turn[-1] += acc_text
202
+ chat_history = chat_history + [last_turn]
203
+ yield chat_history
204
+ acc_text = ""
205
+
206
+ def delete_last_turn(chat_history):
207
+ if chat_history:
208
+ chat_history.pop(-1)
209
+ return {chatbot: gr.update(value=chat_history)}
210
+
211
+ def run_retry(
212
+ message: str, chat_history, instructions: str, temperature: float, top_p: float
213
+ ):
214
+ yield from run_chat(
215
+ RETRY_COMMAND, chat_history, instructions, temperature, top_p
216
+ )
217
+
218
+ def clear_chat():
219
+ return []
220
+
221
+ inputs.submit(
222
+ run_chat,
223
+ [inputs, chatbot, instructions, temperature, top_p],
224
+ outputs=[chatbot],
225
+ show_progress="minimal",
226
+ )
227
+ inputs.submit(lambda: "", inputs=None, outputs=inputs)
228
+ # submit_button.click(
229
+ # run_chat,
230
+ # [inputs, chatbot, instructions, temperature, top_p],
231
+ # outputs=[chatbot],
232
+ # show_progress="minimal",
233
+ # )
234
+ delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot])
235
+ retry_button.click(
236
+ run_retry,
237
+ [inputs, chatbot, instructions, temperature, top_p],
238
+ outputs=[chatbot],
239
+ show_progress="minimal",
240
+ )
241
+ clear_chat_button.click(clear_chat, [], chatbot)
242
+
243
+
244
+ def get_demo():
245
+ with gr.Blocks(
246
+ # css=None
247
+ # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;}
248
+ # #button_container {width: 700px; margin-left: auto; margin-right: auto;}
249
+ # #param_container {width: 700px; margin-left: auto; margin-right: auto;}"""
250
+ css="""#chatbot {
251
+ font-size: 14px;
252
+ min-height: 300px;
253
+ }"""
254
+ ) as demo:
255
+ gr.HTML(TITLE)
256
+
257
+ with gr.Row():
258
+ with gr.Column():
259
+ gr.Markdown(
260
+ """
261
+ ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so.
262
+ """
263
+ )
264
+
265
+ chat()
266
+
267
+ return demo
268
+
269
+
270
+ if __name__ == "__main__":
271
+ demo = get_demo()
272
+ demo.queue(max_size=64, concurrency_count=8)
273
+ # demo.launch(server_name="0.0.0.0", server_port=7860)
274
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ctransformers
2
+ transformers
3
+ huggingface_hub
4
+ # gradio
5
+ loguru
6
+ about-time
7
+ psutil