Spaces:

jordigonzm
/

Phi-3-mini-128k-instruct

Runtime error

jordigonzm commited on Apr 25

Commit

c86f71e

•

1 Parent(s): 9932057

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,39 +8,35 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "microsoft/Phi-3-mini-128k-instruct"
 pipeline = transformers.pipeline(
     "text-generation",
-    model=model_name,
-    model_kwargs={"torch_dtype": torch.bfloat16},
     device="cpu",
     trust_remote_code=True
 )
 @spaces.GPU
 def chat_function(message, history, system_prompt,max_new_tokens,temperature):
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": message},
-    ]
-    prompt = pipeline.tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    terminators = [
-        pipeline.tokenizer.eos_token_id,
-        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
-    ]
     temp = temperature + 0.1
     outputs = pipeline(
         prompt,
         max_new_tokens=max_new_tokens,
-        eos_token_id=terminators,
         do_sample=True,
         temperature=temp,
         top_p=0.9,
     )
-    return outputs[0]["generated_text"][len(prompt):]
 gr.ChatInterface(
     chat_function,

 model_name = "microsoft/Phi-3-mini-128k-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
 pipeline = transformers.pipeline(
     "text-generation",
+    model=model,
+    tokenizer=tokenizer,
     device="cpu",
     trust_remote_code=True
 )
 @spaces.GPU
 def chat_function(message, history, system_prompt,max_new_tokens,temperature):
+    prompt = system_prompt
+    for msg in history:
+        prompt += f'{msg["role"]}: {msg["content"]}\n'
+    prompt += f'user: {message}\n'
     temp = temperature + 0.1
     outputs = pipeline(
         prompt,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temp,
         top_p=0.9,
     )
+    generated_text = outputs[0]['generated_text']
+    new_text = generated_text[len(prompt):]
+    return new_text
 gr.ChatInterface(
     chat_function,