rovi27 commited on
Commit
70888c4
1 Parent(s): 2b64f6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -41
app.py CHANGED
@@ -2,43 +2,21 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from peft import PeftModel
5
- # !python -c "import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'"
6
  import json
7
- import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer, StoppingCriteria, StoppingCriteriaList, GenerationConfig
9
  import os
10
 
11
- #sft_model = "somosnlp/gemma-FULL-RAC-Colombia_v2"
12
  #sft_model = "somosnlp/RecetasDeLaAbuela_mistral-7b-instruct-v0.2-bnb-4bit"
13
  #base_model_name = "unsloth/Mistral-7B-Instruct-v0.2"
14
- sft_model1 = "somosnlp/RecetasDeLaAbuela_gemma-2b-it-bnb-4bit"
15
- sft_model2 = "somosnlp/RecetasDeLaAbuela_mistral-7b-instruct-v0.2-bnb-4bit"
16
  base_model_name = "unsloth/gemma-2b-it-bnb-4bit"
17
 
18
- bnb_config = BitsAndBytesConfig(
19
- load_in_4bit=True,
20
- bnb_4bit_quant_type="nf4",
21
- bnb_4bit_compute_dtype=torch.bfloat16
22
- )
23
  max_seq_length=400
24
-
25
- # if torch.cuda.get_device_capability()[0] >= 8:
26
- # # print("Flash Attention")
27
- # attn_implementation="flash_attention_2"
28
- # else:
29
- # attn_implementation=None
30
- attn_implementation=None
31
-
32
- #base_model = AutoModelForCausalLM.from_pretrained(model_name,return_dict=True,torch_dtype=torch.float16,)
33
  base_model = AutoModelForCausalLM.from_pretrained(base_model_name,return_dict=True,device_map="auto", torch_dtype=torch.float16,)
34
- #base_model = AutoModelForCausalLM.from_pretrained(base_model_name, return_dict=True, device_map = {"":0}, attn_implementation = attn_implementation,).eval()
35
-
36
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, max_length = max_seq_length)
37
- sft_model = sft_model1
38
  ft_model = PeftModel.from_pretrained(base_model, sft_model)
39
  model = ft_model.merge_and_unload()
40
  model.save_pretrained(".")
41
- #model.to('cuda')
42
  tokenizer.save_pretrained(".")
43
 
44
  class ListOfTokensStoppingCriteria(StoppingCriteria):
@@ -68,15 +46,9 @@ stopping_criteria = ListOfTokensStoppingCriteria(tokenizer, stop_tokens)
68
  # Añade tu criterio de parada a una StoppingCriteriaList
69
  stopping_criteria_list = StoppingCriteriaList([stopping_criteria])
70
 
71
- def generate_text(modelin, prompt, context, max_length=2100):
72
- print('Modelo es: '+modelin)
73
- #sft_model = modelin
74
- #ft_model = PeftModel.from_pretrained(base_model, sft_model)
75
- #model = ft_model.merge_and_unload()
76
-
77
  prompt=prompt.replace("\n", "").replace("¿","").replace("?","")
78
- #input_text = f'''<bos><start_of_turn>system ¿{context}?<end_of_turn><start_of_turn>user ¿{prompt}?<end_of_turn><start_of_turn>model'''
79
- input_text = str(context)+str(prompt)
80
  inputs = tokenizer.encode(input_text, return_tensors="pt", add_special_tokens=False).to("cuda:0")
81
  max_new_tokens=max_length
82
  generation_config = GenerationConfig(
@@ -90,25 +62,23 @@ def generate_text(modelin, prompt, context, max_length=2100):
90
  outputs = model.generate(generation_config=generation_config, input_ids=inputs, stopping_criteria=stopping_criteria_list,)
91
  return tokenizer.decode(outputs[0], skip_special_tokens=False) #True
92
 
93
- def mostrar_respuesta(modelo, pregunta, contexto):
94
  try:
95
- print('Modelo: '+str(modelo))
96
- print('Pregunta: '+str(pregunta))
97
- print('Contexto: '+str(contexto))
98
- res= generate_text(modelo, pregunta, contexto, max_length=500)
99
- print('Respuesta: '+str(contexto))
100
  return str(res)
101
  except Exception as e:
102
  return str(e)
103
 
104
  # Ejemplos de preguntas
105
- mis_ejemplos = [[sft_model1,"¿Dime la receta de la tortilla de patatatas?","Cocinero español"],[sft_model1,"¿Dime la receta del ceviche?","Cocinero peruano"], [sft_model1, "¿Como se cocinan unos autenticos frijoles?","Cocinero de México"],]
106
- lista_modelos = [sft_model1, sft_model2]
 
 
 
107
 
108
  iface = gr.Interface(
109
  fn=mostrar_respuesta,
110
- inputs=[gr.Dropdown(choices=lista_modelos, value = sft_model1, label="Modelo", type="value"), gr.Textbox(label="Pregunta"),
111
- gr.Textbox(label="Contexto", value="You are a helpful AI assistant. Eres un experto cocinero hispanoamericano."),],
112
  outputs=[gr.Textbox(label="Respuesta", lines=2),],
113
  title="Recetas de la Abuel@",
114
  description="Introduce tu pregunta sobre recetas de cocina.",
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from peft import PeftModel
 
5
  import json
 
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer, StoppingCriteria, StoppingCriteriaList, GenerationConfig
7
  import os
8
 
 
9
  #sft_model = "somosnlp/RecetasDeLaAbuela_mistral-7b-instruct-v0.2-bnb-4bit"
10
  #base_model_name = "unsloth/Mistral-7B-Instruct-v0.2"
11
+ sft_model = "somosnlp/RecetasDeLaAbuela_gemma-2b-it-bnb-4bit"
 
12
  base_model_name = "unsloth/gemma-2b-it-bnb-4bit"
13
 
 
 
 
 
 
14
  max_seq_length=400
 
 
 
 
 
 
 
 
 
15
  base_model = AutoModelForCausalLM.from_pretrained(base_model_name,return_dict=True,device_map="auto", torch_dtype=torch.float16,)
 
 
16
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, max_length = max_seq_length)
 
17
  ft_model = PeftModel.from_pretrained(base_model, sft_model)
18
  model = ft_model.merge_and_unload()
19
  model.save_pretrained(".")
 
20
  tokenizer.save_pretrained(".")
21
 
22
  class ListOfTokensStoppingCriteria(StoppingCriteria):
 
46
  # Añade tu criterio de parada a una StoppingCriteriaList
47
  stopping_criteria_list = StoppingCriteriaList([stopping_criteria])
48
 
49
+ def generate_text(prompt, context, max_length=2100):
 
 
 
 
 
50
  prompt=prompt.replace("\n", "").replace("¿","").replace("?","")
51
+ input_text = f'''<bos><start_of_turn>system ¿{context}?<end_of_turn><start_of_turn>user ¿{prompt}?<end_of_turn><start_of_turn>model'''
 
52
  inputs = tokenizer.encode(input_text, return_tensors="pt", add_special_tokens=False).to("cuda:0")
53
  max_new_tokens=max_length
54
  generation_config = GenerationConfig(
 
62
  outputs = model.generate(generation_config=generation_config, input_ids=inputs, stopping_criteria=stopping_criteria_list,)
63
  return tokenizer.decode(outputs[0], skip_special_tokens=False) #True
64
 
65
+ def mostrar_respuesta(pregunta, contexto):
66
  try:
67
+ res= generate_text(pregunta, contexto, max_length=500)
 
 
 
 
68
  return str(res)
69
  except Exception as e:
70
  return str(e)
71
 
72
  # Ejemplos de preguntas
73
+ mis_ejemplos = [
74
+ ["¿Dime la receta de la tortilla de patatatas?"],
75
+ ["¿Dime la receta del ceviche?"],
76
+ ["¿Como se cocinan unos autenticos frijoles?"],
77
+ ]
78
 
79
  iface = gr.Interface(
80
  fn=mostrar_respuesta,
81
+ inputs=[gr.Textbox(label="Pregunta"), gr.Textbox(label="Contexto", value="You are a helpful AI assistant. Eres un experto cocinero hispanoamericano."),],
 
82
  outputs=[gr.Textbox(label="Respuesta", lines=2),],
83
  title="Recetas de la Abuel@",
84
  description="Introduce tu pregunta sobre recetas de cocina.",