anakin87 commited on
Commit
4d3de5e
1 Parent(s): 698d75d
Files changed (1) hide show
  1. app.py +22 -12
app.py CHANGED
@@ -6,6 +6,14 @@ import gradio as gr
6
  import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
 
 
 
9
 
10
  DESCRIPTION = """\
11
  # Gemma 2 9B IT
@@ -24,11 +32,12 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
24
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
25
 
26
  model_id = "anakin87/Phi-3.5-mini-ITA"
27
- tokenizer = AutoTokenizer.from_pretrained(model_id)
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
  device_map="auto",
31
  torch_dtype=torch.bfloat16,
 
32
  )
33
  model.config.sliding_window = 4096
34
  model.eval()
@@ -39,10 +48,10 @@ def generate(
39
  message: str,
40
  chat_history: list[tuple[str, str]],
41
  max_new_tokens: int = 1024,
42
- temperature: float = 0.6,
43
- top_p: float = 0.9,
44
  top_k: int = 50,
45
- repetition_penalty: float = 1.2,
46
  ) -> Iterator[str]:
47
  conversation = []
48
  for user, assistant in chat_history:
@@ -93,17 +102,17 @@ chat_interface = gr.ChatInterface(
93
  ),
94
  gr.Slider(
95
  label="Temperature",
96
- minimum=0.1,
97
  maximum=4.0,
98
  step=0.1,
99
- value=0.6,
100
  ),
101
  gr.Slider(
102
  label="Top-p (nucleus sampling)",
103
  minimum=0.05,
104
  maximum=1.0,
105
  step=0.05,
106
- value=0.9,
107
  ),
108
  gr.Slider(
109
  label="Top-k",
@@ -117,19 +126,20 @@ chat_interface = gr.ChatInterface(
117
  minimum=1.0,
118
  maximum=2.0,
119
  step=0.05,
120
- value=1.2,
121
  ),
122
  ],
123
  stop_btn=None,
124
  examples=[
125
  ["Ciao! Come stai?"],
126
- ["Puoi spiegarmi brevemente cos'è il linguaggio di programmazione Python?"],
127
- ["Spiega la trama di Cenerentola in una frase."],
128
- ["Quante ore ci vogliono a un uomo per mangiare un elicottero?"],
 
 
129
  ["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
130
  ["Hello there! How are you doing?"],
131
  ["Can you explain briefly to me what is the Python programming language?"],
132
- ["Explain the plot of Cinderella in a sentence."],
133
  ["How many hours does it take a man to eat a Helicopter?"],
134
  ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
135
  ],
 
6
  import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+ import subprocess
10
+
11
+ subprocess.run(
12
+ "pip install flash-attn --no-build-isolation",
13
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
14
+ shell=True,
15
+ )
16
+
17
 
18
  DESCRIPTION = """\
19
  # Gemma 2 9B IT
 
32
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
33
 
34
  model_id = "anakin87/Phi-3.5-mini-ITA"
35
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
36
  model = AutoModelForCausalLM.from_pretrained(
37
  model_id,
38
  device_map="auto",
39
  torch_dtype=torch.bfloat16,
40
+ trust_remote_code=True,
41
  )
42
  model.config.sliding_window = 4096
43
  model.eval()
 
48
  message: str,
49
  chat_history: list[tuple[str, str]],
50
  max_new_tokens: int = 1024,
51
+ temperature: float = 0.001,
52
+ top_p: float = 1.0,
53
  top_k: int = 50,
54
+ repetition_penalty: float = 1.0,
55
  ) -> Iterator[str]:
56
  conversation = []
57
  for user, assistant in chat_history:
 
102
  ),
103
  gr.Slider(
104
  label="Temperature",
105
+ minimum=0,
106
  maximum=4.0,
107
  step=0.1,
108
+ value=0.001,
109
  ),
110
  gr.Slider(
111
  label="Top-p (nucleus sampling)",
112
  minimum=0.05,
113
  maximum=1.0,
114
  step=0.05,
115
+ value=1.0,
116
  ),
117
  gr.Slider(
118
  label="Top-k",
 
126
  minimum=1.0,
127
  maximum=2.0,
128
  step=0.05,
129
+ value=1.0,
130
  ),
131
  ],
132
  stop_btn=None,
133
  examples=[
134
  ["Ciao! Come stai?"],
135
+ ["Pro e contro di una relazione a lungo termine. Elenco puntato con max 3 pro e 3 contro sintetici."],
136
+ ["Quante ore impiega un uomo per mangiare un elicottero?"],
137
+ ["Come si apre un file JSON in Python?"],
138
+ ["Fammi un elenco puntato dei pro e contro di vivere in Italia. Massimo 2 pro e 2 contro."],
139
+ ["Inventa una breve storia con animali sul valore dell'amicizia."],
140
  ["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
141
  ["Hello there! How are you doing?"],
142
  ["Can you explain briefly to me what is the Python programming language?"],
 
143
  ["How many hours does it take a man to eat a Helicopter?"],
144
  ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
145
  ],