tangzhy commited on
Commit
fecb2b3
1 Parent(s): 5c20eaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -37
app.py CHANGED
@@ -8,30 +8,26 @@ import torch
8
  from transformers import (
9
  AutoModelForCausalLM,
10
  BitsAndBytesConfig,
11
- AutoTokenizer,
12
  TextIteratorStreamer,
13
  )
14
 
15
  DESCRIPTION = """\
16
- # ORLM LLaMA-3-8B
17
- Hello! I'm ORLM-LLaMA-3-8B, here to automate your optimization modeling tasks! Check our [repo](https://github.com/Cardinal-Operations/ORLM) and [paper](https://arxiv.org/abs/2405.17743)!
 
 
 
18
  """
19
 
20
- PROMPT_TEMPLATE = """
21
- Below is an operations research question. Build a mathematical model and corresponding python code using `coptpy` that appropriately addresses the question.
 
22
 
23
- # Question:
24
- {Question}
25
 
26
- # Response:
27
- """
28
-
29
- MAX_MAX_NEW_TOKENS = 4096
30
- DEFAULT_MAX_NEW_TOKENS = 4096
31
- MAX_INPUT_TOKEN_LENGTH = 2048
32
-
33
- model_id = "CardinalOperations/ORLM-LLaMA-3-8B"
34
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
35
  model = AutoModelForCausalLM.from_pretrained(
36
  model_id,
37
  device_map="auto",
@@ -41,7 +37,7 @@ model.config.sliding_window = 4096
41
  model.eval()
42
 
43
 
44
- @spaces.GPU(duration=60)
45
  def generate(
46
  message: str,
47
  chat_history: list[tuple[str, str]],
@@ -51,12 +47,20 @@ def generate(
51
  top_k: int = 50,
52
  repetition_penalty: float = 1.2,
53
  ) -> Iterator[str]:
54
- if chat_history != []:
55
- return "Sorry, I am an instruction-tuned model and currently do not support chatting. Please try clearing the chat history or refreshing the page to ask a new question."
56
-
57
- prompt = PROMPT_TEMPLATE.replace("{Question}", message).strip()
58
- tokenized_example = tokenizer(prompt, return_tensors='pt', max_length=MAX_INPUT_TOKEN_LENGTH, truncation=True)
59
- input_ids = tokenized_example.input_ids
 
 
 
 
 
 
 
 
60
  input_ids = input_ids.to(model.device)
61
 
62
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
@@ -64,13 +68,12 @@ def generate(
64
  {"input_ids": input_ids},
65
  streamer=streamer,
66
  max_new_tokens=max_new_tokens,
67
- do_sample=False if temperature == 0.0 else True,
68
  top_p=top_p,
69
  top_k=top_k,
70
  temperature=temperature,
71
  num_beams=1,
72
  repetition_penalty=repetition_penalty,
73
- eos_token_id=[tok.eos_token_id],
74
  )
75
  t = Thread(target=model.generate, kwargs=generate_kwargs)
76
  t.start()
@@ -79,9 +82,6 @@ def generate(
79
  for text in streamer:
80
  outputs.append(text)
81
  yield "".join(outputs)
82
-
83
- # outputs.append("\n\nI have now attempted to solve the optimization modeling task! Please try executing the code in your environment, making sure it is equipped with `coptpy`.")
84
- # yield "".join(outputs)
85
 
86
 
87
  chat_interface = gr.ChatInterface(
@@ -96,44 +96,46 @@ chat_interface = gr.ChatInterface(
96
  ),
97
  gr.Slider(
98
  label="Temperature",
99
- minimum=0.0,
100
  maximum=4.0,
101
  step=0.1,
102
- value=0.0,
103
  ),
104
  gr.Slider(
105
  label="Top-p (nucleus sampling)",
106
  minimum=0.05,
107
  maximum=1.0,
108
  step=0.05,
109
- value=1.0,
110
  ),
111
  gr.Slider(
112
  label="Top-k",
113
  minimum=1,
114
  maximum=1000,
115
  step=1,
116
- value=20,
117
  ),
118
  gr.Slider(
119
  label="Repetition penalty",
120
  minimum=1.0,
121
  maximum=2.0,
122
  step=0.05,
123
- value=1.0,
124
  ),
125
  ],
126
  stop_btn=None,
127
  examples=[
128
- ["A lab has 1000 units of medicinal ingredients to make two pills, a large pill and a small pill. A large pill requires 3 units of medicinal ingredients and 2 units of filler. A small pill requires 2 units of medicinal ingredients and 1 unit of filler. The lab has to make at least 100 large pills. However, since small pills are more popular at least 60% of the total number of pills must be small. How many of each should be made to minimize the total number of filler material needed?"],
129
- ["Let's say you're on a mission to create the perfect meal plan for yourself. You're focused on getting the right balance of nutrients without digging too deep into your pockets. You have six different food options to consider: Chicken, Oats, Tofu, Rice, Beef, and Salmon. Each of these foods provides a certain amount of protein, carbohydrates, and calories, and they all come with their own distinct prices.\n\nHere's a detailed breakdown of the nutritional content and cost of each food item:\n\n- Chicken: Delivers 14 grams of protein, a single gram of carbohydrates, and 164 calories at a cost of $6.\n- Oats: Gives you 11 grams of protein, 6 grams of carbohydrates, and 210 calories for just $2.\n- Tofu: Offers 8 grams of protein, 12 grams of carbohydrates, and 98 calories at a cost of $9.\n- Rice: Provides 1 gram of protein, a generous 17 grams of carbohydrates, and 92 calories for $3.\n- Beef: Comes packed with 16 grams of protein, 11 grams of carbohydrates, and 211 calories, priced at $7.\n- Salmon: Brings a hefty 19 grams of protein, 13 grams of carbohydrates, and 211 calories but costs $9.\n\nYou want to make sure your meal plan meets the following nutritional targets: at least 70 grams of protein, 117 grams of carbohydrates, and 1837 calories. Considering these six foods, what is the least amount of money you need to spend to meet these dietary needs?\nRemember, your response should only contain the optimal value of the cost to meet the requirements."],
130
- ["Haus Toys can manufacture and sell toy trucks, toy planes, toy boats, and toy trains. The profit from selling one truck is $5, from one plane is $10, from one boat is $8, and from one train is $7. How many types of toys should Haus Toys manufacture to maximize profit?\n\nThere are 890 units of wood available. Manufacturing one truck requires 12 units of wood, one plane requires 20 units of wood, one boat requires 15 units of wood, and one train requires 10 units of wood.\n\nThere are 500 units of steel available. Manufacturing one plane requires 3 units of steel, one boat requires 5 units of steel, one train requires 4 units of steel, and one truck requires 6 units of steel.\n\nIf Haus Toys manufactures trucks, then they will not manufacture trains.\n\nHowever, if they manufacture boats, they will also manufacture planes.\n\nThe number of toy boats manufactured cannot exceed the number of toy trains manufactured."],
 
 
131
  ],
132
  )
133
 
134
  with gr.Blocks(css="style.css", fill_height=True) as demo:
135
  gr.Markdown(DESCRIPTION)
136
- # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
137
  chat_interface.render()
138
 
139
  if __name__ == "__main__":
 
8
  from transformers import (
9
  AutoModelForCausalLM,
10
  BitsAndBytesConfig,
11
+ GemmaTokenizerFast,
12
  TextIteratorStreamer,
13
  )
14
 
15
  DESCRIPTION = """\
16
+ # Gemma 2 9B IT
17
+ Gemma 2 is Google's latest iteration of open LLMs.
18
+ This is a demo of [`google/gemma-2-9b-it`](https://huggingface.co/google/gemma-2-9b-it), fine-tuned for instruction following.
19
+ For more details, please check [our post](https://huggingface.co/blog/gemma2).
20
+ 👉 Looking for a larger and more powerful version? Try the 27B version in [HuggingChat](https://huggingface.co/chat/models/google/gemma-2-27b-it).
21
  """
22
 
23
+ MAX_MAX_NEW_TOKENS = 2048
24
+ DEFAULT_MAX_NEW_TOKENS = 1024
25
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
26
 
27
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
28
 
29
+ model_id = "google/gemma-2-9b-it"
30
+ tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
 
 
 
 
 
 
 
31
  model = AutoModelForCausalLM.from_pretrained(
32
  model_id,
33
  device_map="auto",
 
37
  model.eval()
38
 
39
 
40
+ @spaces.GPU(duration=90)
41
  def generate(
42
  message: str,
43
  chat_history: list[tuple[str, str]],
 
47
  top_k: int = 50,
48
  repetition_penalty: float = 1.2,
49
  ) -> Iterator[str]:
50
+ conversation = []
51
+ for user, assistant in chat_history:
52
+ conversation.extend(
53
+ [
54
+ {"role": "user", "content": user},
55
+ {"role": "assistant", "content": assistant},
56
+ ]
57
+ )
58
+ conversation.append({"role": "user", "content": message})
59
+
60
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
61
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
62
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
63
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
64
  input_ids = input_ids.to(model.device)
65
 
66
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
 
68
  {"input_ids": input_ids},
69
  streamer=streamer,
70
  max_new_tokens=max_new_tokens,
71
+ do_sample=True,
72
  top_p=top_p,
73
  top_k=top_k,
74
  temperature=temperature,
75
  num_beams=1,
76
  repetition_penalty=repetition_penalty,
 
77
  )
78
  t = Thread(target=model.generate, kwargs=generate_kwargs)
79
  t.start()
 
82
  for text in streamer:
83
  outputs.append(text)
84
  yield "".join(outputs)
 
 
 
85
 
86
 
87
  chat_interface = gr.ChatInterface(
 
96
  ),
97
  gr.Slider(
98
  label="Temperature",
99
+ minimum=0.1,
100
  maximum=4.0,
101
  step=0.1,
102
+ value=0.6,
103
  ),
104
  gr.Slider(
105
  label="Top-p (nucleus sampling)",
106
  minimum=0.05,
107
  maximum=1.0,
108
  step=0.05,
109
+ value=0.9,
110
  ),
111
  gr.Slider(
112
  label="Top-k",
113
  minimum=1,
114
  maximum=1000,
115
  step=1,
116
+ value=50,
117
  ),
118
  gr.Slider(
119
  label="Repetition penalty",
120
  minimum=1.0,
121
  maximum=2.0,
122
  step=0.05,
123
+ value=1.2,
124
  ),
125
  ],
126
  stop_btn=None,
127
  examples=[
128
+ ["Hello there! How are you doing?"],
129
+ ["Can you explain briefly to me what is the Python programming language?"],
130
+ ["Explain the plot of Cinderella in a sentence."],
131
+ ["How many hours does it take a man to eat a Helicopter?"],
132
+ ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
133
  ],
134
  )
135
 
136
  with gr.Blocks(css="style.css", fill_height=True) as demo:
137
  gr.Markdown(DESCRIPTION)
138
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
139
  chat_interface.render()
140
 
141
  if __name__ == "__main__":