jordigonzm commited on
Commit
0067ed6
1 Parent(s): 976d049

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -85
app.py CHANGED
@@ -1,86 +1,59 @@
1
- import os
2
  import gradio as gr
3
- from http import HTTPStatus
4
- from transformers import pipeline
5
- from typing import List, Optional, Tuple, Dict
6
- from urllib.error import HTTPError
7
-
8
- default_system = 'You are a helpful assistant.'
9
-
10
- History = List[Tuple[str, str]]
11
- Messages = List[Dict[str, str]]
12
-
13
- def clear_session() -> History:
14
- return '', []
15
-
16
- def modify_system_session(system: str) -> str:
17
- if system is None or len(system) == 0:
18
- system = default_system
19
- return system, system, []
20
-
21
- def history_to_messages(history: History, system: str) -> Messages:
22
- messages = [{'role': Role.SYSTEM, 'content': system}]
23
- for h in history:
24
- messages.append({'role': Role.USER, 'content': h[0]})
25
- messages.append({'role': Role.ASSISTANT, 'content': h[1]})
26
- return messages
27
-
28
- def messages_to_history(messages: Messages) -> Tuple[str, History]:
29
- assert messages[0]['role'] == Role.SYSTEM
30
- system = messages[0]['content']
31
- history = []
32
- for q, r in zip(messages[1::2], messages[2::2]):
33
- history.append([q['content'], r['content']])
34
- return system, history
35
-
36
- def model_chat(query: Optional[str], history: Optional[History], system: str
37
- ) -> Tuple[str, str, History]:
38
- if query is None:
39
- query = ''
40
- if history is None:
41
- history = []
42
- messages = history_to_messages(history, system)
43
- messages.append({'role': Role.USER, 'content': query})
44
-
45
- generator = pipeline('text-generation', model='microsoft/Phi-3-mini-128k-instruct')
46
- response = generator(query, max_length=150) # Ajusta la longitud máxima según necesidad
47
-
48
- role = Role.ASSISTANT
49
- response_content = response[0]['generated_text']
50
- system, history = messages_to_history(messages + [{'role': role, 'content': response_content}])
51
- return '', history, system
52
-
53
- with gr.Blocks() as demo:
54
- with gr.TabBar():
55
- with gr.Tab("Model Info"):
56
- gr.Markdown("""Modelo actual: `microsoft/Phi-3-mini-128k-instruct`""")
57
-
58
- with gr.Tab("Chat"):
59
- gr.Markdown("""<center><font size=8>Chat Bot Preview👾</center>""")
60
-
61
- with gr.Row():
62
- with gr.Column(scale=3):
63
- system_input = gr.Textbox(value=default_system, lines=1, label='System')
64
- with gr.Column(scale=1):
65
- modify_system = gr.Button("🛠️ Set system prompt and clear history", scale=2)
66
- system_state = gr.Textbox(value=default_system, visible=False)
67
- chatbot = gr.Chatbot(label='Chat with AI')
68
- textbox = gr.Textbox(lines=2, label='Input')
69
-
70
- with gr.Row():
71
- clear_history = gr.Button("🧹 Clear history")
72
- submit = gr.Button("🚀 Send")
73
-
74
- submit.click(model_chat,
75
- inputs=[textbox, chatbot, system_state],
76
- outputs=[textbox, chatbot, system_input],
77
- concurrency_limit=100)
78
- clear_history.click(fn=clear_session,
79
- inputs=[],
80
- outputs=[textbox, chatbot])
81
- modify_system.click(fn=modify_system_session,
82
- inputs=[system_input],
83
- outputs=[system_state, system_input, chatbot])
84
-
85
- demo.queue(api_open=False)
86
- demo.launch(max_threads=30)
 
 
1
  import gradio as gr
2
+ import spaces
3
+ import torch
4
+
5
+ import transformers
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
+
9
+ model_name = "microsoft/Phi-3-mini-128k-instruct"
10
+
11
+ pipeline = transformers.pipeline(
12
+ "text-generation",
13
+ model=model_name,
14
+ model_kwargs={"torch_dtype": torch.bfloat16},
15
+ device="cuda",
16
+ )
17
+
18
+ @spaces.GPU
19
+ def chat_function(message, history, system_prompt,max_new_tokens,temperature):
20
+ messages = [
21
+ {"role": "system", "content": system_prompt},
22
+ {"role": "user", "content": message},
23
+ ]
24
+ prompt = pipeline.tokenizer.apply_chat_template(
25
+ messages,
26
+ tokenize=False,
27
+ add_generation_prompt=True
28
+ )
29
+ terminators = [
30
+ pipeline.tokenizer.eos_token_id,
31
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
32
+ ]
33
+ temp = temperature + 0.1
34
+ outputs = pipeline(
35
+ prompt,
36
+ max_new_tokens=max_new_tokens,
37
+ eos_token_id=terminators,
38
+ do_sample=True,
39
+ temperature=temp,
40
+ top_p=0.9,
41
+ )
42
+ return outputs[0]["generated_text"][len(prompt):]
43
+
44
+ gr.ChatInterface(
45
+ chat_function,
46
+ chatbot=gr.Chatbot(height=400),
47
+ textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
48
+ title="microsoft/Phi-3-mini-128k-instruct",
49
+ description="""
50
+ This space is dedicated for chatting with Meta's Latest LLM - Llama 8b Instruct. Find this model here: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
51
+ Feel free to play with customization in the "Additional Inputs".
52
+ """,
53
+ theme="soft",
54
+ additional_inputs=[
55
+ gr.Textbox("You are helpful AI.", label="System Prompt"),
56
+ gr.Slider(512, 4096, label="Max New Tokens"),
57
+ gr.Slider(0, 1, label="Temperature")
58
+ ]
59
+ ).launch()