Spaces:

jordigonzm
/

Phi-3-mini-128k-instruct

Runtime error

App Files Files Community

jordigonzm commited on Apr 25

Commit

0067ed6

•

1 Parent(s): 976d049

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -85

app.py CHANGED Viewed

@@ -1,86 +1,59 @@
-import os
 import gradio as gr
-from http import HTTPStatus
-from transformers import pipeline
-from typing import List, Optional, Tuple, Dict
-from urllib.error import HTTPError
-default_system = 'You are a helpful assistant.'
-History = List[Tuple[str, str]]
-Messages = List[Dict[str, str]]
-def clear_session() -> History:
-    return '', []
-def modify_system_session(system: str) -> str:
-    if system is None or len(system) == 0:
-        system = default_system
-    return system, system, []
-def history_to_messages(history: History, system: str) -> Messages:
-    messages = [{'role': Role.SYSTEM, 'content': system}]
-    for h in history:
-        messages.append({'role': Role.USER, 'content': h[0]})
-        messages.append({'role': Role.ASSISTANT, 'content': h[1]})
-    return messages
-def messages_to_history(messages: Messages) -> Tuple[str, History]:
-    assert messages[0]['role'] == Role.SYSTEM
-    system = messages[0]['content']
-    history = []
-    for q, r in zip(messages[1::2], messages[2::2]):
-        history.append([q['content'], r['content']])
-    return system, history
-def model_chat(query: Optional[str], history: Optional[History], system: str
-) -> Tuple[str, str, History]:
-    if query is None:
-        query = ''
-    if history is None:
-        history = []
-    messages = history_to_messages(history, system)
-    messages.append({'role': Role.USER, 'content': query})
-    generator = pipeline('text-generation', model='microsoft/Phi-3-mini-128k-instruct')
-    response = generator(query, max_length=150)  # Ajusta la longitud máxima según necesidad
-    role = Role.ASSISTANT
-    response_content = response[0]['generated_text']
-    system, history = messages_to_history(messages + [{'role': role, 'content': response_content}])
-    return '', history, system
-with gr.Blocks() as demo:
-    with gr.TabBar():
-        with gr.Tab("Model Info"):
-            gr.Markdown("""Modelo actual: `microsoft/Phi-3-mini-128k-instruct`""")
-        with gr.Tab("Chat"):
-            gr.Markdown("""<center><font size=8>Chat Bot Preview👾</center>""")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    system_input = gr.Textbox(value=default_system, lines=1, label='System')
-                with gr.Column(scale=1):
-                    modify_system = gr.Button("🛠️ Set system prompt and clear history", scale=2)
-                system_state = gr.Textbox(value=default_system, visible=False)
-            chatbot = gr.Chatbot(label='Chat with AI')
-            textbox = gr.Textbox(lines=2, label='Input')
-            with gr.Row():
-                clear_history = gr.Button("🧹 Clear history")
-                submit = gr.Button("🚀 Send")
-            submit.click(model_chat,
-                         inputs=[textbox, chatbot, system_state],
-                         outputs=[textbox, chatbot, system_input],
-                         concurrency_limit=100)
-            clear_history.click(fn=clear_session,
-                                inputs=[],
-                                outputs=[textbox, chatbot])
-            modify_system.click(fn=modify_system_session,
-                                inputs=[system_input],
-                                outputs=[system_state, system_input, chatbot])
-demo.queue(api_open=False)
-demo.launch(max_threads=30)

 import gradio as gr
+import spaces
+import torch
+import transformers
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "microsoft/Phi-3-mini-128k-instruct"
+pipeline = transformers.pipeline(
+    "text-generation",
+    model=model_name,
+    model_kwargs={"torch_dtype": torch.bfloat16},
+    device="cuda",
+)
+@spaces.GPU
+def chat_function(message, history, system_prompt,max_new_tokens,temperature):
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": message},
+    ]
+    prompt = pipeline.tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    temp = temperature + 0.1
+    outputs = pipeline(
+        prompt,
+        max_new_tokens=max_new_tokens,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=temp,
+        top_p=0.9,
+    )
+    return outputs[0]["generated_text"][len(prompt):]
+gr.ChatInterface(
+    chat_function,
+    chatbot=gr.Chatbot(height=400),
+    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
+    title="microsoft/Phi-3-mini-128k-instruct",
+    description="""
+    This space is dedicated for chatting with Meta's Latest LLM - Llama 8b Instruct. Find this model here: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
+    Feel free to play with customization in the "Additional Inputs".
+    """,
+    theme="soft",
+    additional_inputs=[
+        gr.Textbox("You are helpful AI.", label="System Prompt"),
+        gr.Slider(512, 4096, label="Max New Tokens"),
+        gr.Slider(0, 1, label="Temperature")
+    ]
+).launch()