jordigonzm commited on
Commit
c86f71e
1 Parent(s): 9932057

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -8,39 +8,35 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
  model_name = "microsoft/Phi-3-mini-128k-instruct"
10
 
 
 
 
11
  pipeline = transformers.pipeline(
12
  "text-generation",
13
- model=model_name,
14
- model_kwargs={"torch_dtype": torch.bfloat16},
15
  device="cpu",
16
  trust_remote_code=True
17
  )
18
 
19
  @spaces.GPU
20
  def chat_function(message, history, system_prompt,max_new_tokens,temperature):
21
- messages = [
22
- {"role": "system", "content": system_prompt},
23
- {"role": "user", "content": message},
24
- ]
25
- prompt = pipeline.tokenizer.apply_chat_template(
26
- messages,
27
- tokenize=False,
28
- add_generation_prompt=True
29
- )
30
- terminators = [
31
- pipeline.tokenizer.eos_token_id,
32
- pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
33
- ]
34
  temp = temperature + 0.1
35
  outputs = pipeline(
36
  prompt,
37
  max_new_tokens=max_new_tokens,
38
- eos_token_id=terminators,
39
  do_sample=True,
40
  temperature=temp,
41
  top_p=0.9,
42
  )
43
- return outputs[0]["generated_text"][len(prompt):]
 
 
44
 
45
  gr.ChatInterface(
46
  chat_function,
 
8
 
9
  model_name = "microsoft/Phi-3-mini-128k-instruct"
10
 
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
13
+
14
  pipeline = transformers.pipeline(
15
  "text-generation",
16
+ model=model,
17
+ tokenizer=tokenizer,
18
  device="cpu",
19
  trust_remote_code=True
20
  )
21
 
22
  @spaces.GPU
23
  def chat_function(message, history, system_prompt,max_new_tokens,temperature):
24
+ prompt = system_prompt
25
+ for msg in history:
26
+ prompt += f'{msg["role"]}: {msg["content"]}\n'
27
+ prompt += f'user: {message}\n'
28
+
 
 
 
 
 
 
 
 
29
  temp = temperature + 0.1
30
  outputs = pipeline(
31
  prompt,
32
  max_new_tokens=max_new_tokens,
 
33
  do_sample=True,
34
  temperature=temp,
35
  top_p=0.9,
36
  )
37
+ generated_text = outputs[0]['generated_text']
38
+ new_text = generated_text[len(prompt):]
39
+ return new_text
40
 
41
  gr.ChatInterface(
42
  chat_function,