Reflection-70B-llamacpp

Paused

gokaygokay commited on 13 days ago

Commit

9f18ec6

•

1 Parent(s): eede49b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,22 +18,7 @@ hf_hub_download(
     local_dir = "./models"
 )
-def initialize_model(model):
-    global llm, llm_model
-    if llm is None or llm_model != model:
-        llm = Llama(
-            model_path=f"models/{model}",
-            flash_attn=True,
-            n_gpu_layers=81,
-            n_batch=1024,
-            n_ctx=8192,
-        )
-        llm_model = model
-    return llm
-# Initialize the model with the default model
-default_model = "Reflection-Llama-3.1-70B-Q3_K_M.gguf"
-initialize_model(default_model)
 def get_messages_formatter_type(model_name):
     if "Llama" in model_name:
@@ -42,7 +27,7 @@ def get_messages_formatter_type(model_name):
         raise ValueError(f"Unsupported model: {model_name}")
-@spaces.GPU(duration=60)
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -54,12 +39,20 @@ def respond(
     top_k,
     repeat_penalty,
 ):
-    global llm, llm_model
     chat_template = get_messages_formatter_type(model)
-    if llm_model != model:
-        llm = initialize_model(model)
     provider = LlamaCppPythonProvider(llm)

     local_dir = "./models"
 )
 def get_messages_formatter_type(model_name):
     if "Llama" in model_name:
         raise ValueError(f"Unsupported model: {model_name}")
+@spaces.GPU(duration=120)
 def respond(
     message,
     history: list[tuple[str, str]],
     top_k,
     repeat_penalty,
 ):
+    global llm
+    global llm_model
     chat_template = get_messages_formatter_type(model)
+    if llm is None or llm_model != model:
+        llm = Llama(
+            model_path=f"models/{model}",
+            flash_attn=True,
+            n_gpu_layers=81,
+            n_batch=1024,
+            n_ctx=8192,
+        )
+        llm_model = model
     provider = LlamaCppPythonProvider(llm)