Luminia-13B_SD_Prompt

Running on Zero

App Files Files Community

Nekochu commited on Apr 13

Commit

2bc6f48

•

1 Parent(s): b0302a5

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
 import spaces
 import torch
@@ -20,35 +21,33 @@ LICENSE = """
 ---.
 """
-if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-models_cache = {}
-def load_model(model_id: str):
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
     return model, tokenizer
 @spaces.GPU(duration=120)
 def generate(
-    model_id: str,
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    # Load the model if it's not already loaded
-    if model_id not in models_cache:
-        model, tokenizer = load_model(model_id)
-        models_cache[model_id] = (model, tokenizer)
-    else:
-        model, tokenizer = models_cache[model_id]
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
@@ -86,8 +85,8 @@ def generate(
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
-        gr.Textbox(label="Model ID", placeholder="Nekochu/Luminia-13B-v3"),
         gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",
             minimum=1,
@@ -138,4 +137,4 @@ with gr.Blocks(css="style.css") as demo:
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

 import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
 import spaces
 import torch
 ---.
 """
+def load_model(model_id):
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
     return model, tokenizer
+if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+if torch.cuda.is_available():
+    model_id = "Nekochu/Luminia-13B-v3"
+    model, tokenizer = load_model(model_id)
 @spaces.GPU(duration=120)
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
+    model_id: str = "Nekochu/Luminia-13B-v3",
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    model, tokenizer = load_model(model_id)
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         gr.Textbox(label="System prompt", lines=6),
+        gr.Textbox(label="Model ID", default="Nekochu/Luminia-13B-v3"),
         gr.Slider(
             label="Max new tokens",
             minimum=1,
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()