WeeRobots
/

phi-2-chat-v05

Text Generation

Inference Endpoints

Model card Files Files and versions Community

PeterZentai commited on Jan 27

Commit

266406b

•

1 Parent(s): b0ae2fb

Update README.md

Files changed (1) hide show

README.md +4 -1

README.md CHANGED Viewed

@@ -43,6 +43,7 @@ Cost for inference.
 ```python
 from transformers import AutoTokenizer, AutoModelForCausalLM
 model_id = "WeeRobots/phi-2-chat-v05"
@@ -50,6 +51,8 @@ model_id = "WeeRobots/phi-2-chat-v05"
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"": 0}, trust_remote_code=True)
 tokenizer = tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, trust_remote_code=True)
 payload = tokenizer.apply_chat_template([
     { 'role': 'system', 'content': '''You are a state machine. The user will add state slot values and I'll keep track of them.''' },
     { 'role': 'user', 'content': '''Place 15 into slot apple''' },
@@ -58,7 +61,7 @@ payload = tokenizer.apply_chat_template([
     { 'role': 'assistant', 'content': '''Certainly''' },
     { 'role': 'user', 'content': '''What is value of  Apples + Bananas?''' },
 ], tokenize=False, add_generation_prompt=True,)
 model_input = tokenizer(payload, return_tensors="pt").to(device)
 with torch.no_grad():
   # IMPORTANT: always set the eos_token_id in this call. the model is trained to emit the eos_token the right time

 ```python
+import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 model_id = "WeeRobots/phi-2-chat-v05"
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"": 0}, trust_remote_code=True)
 tokenizer = tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, trust_remote_code=True)
 payload = tokenizer.apply_chat_template([
     { 'role': 'system', 'content': '''You are a state machine. The user will add state slot values and I'll keep track of them.''' },
     { 'role': 'user', 'content': '''Place 15 into slot apple''' },
     { 'role': 'assistant', 'content': '''Certainly''' },
     { 'role': 'user', 'content': '''What is value of  Apples + Bananas?''' },
 ], tokenize=False, add_generation_prompt=True,)
+device = "cuda"
 model_input = tokenizer(payload, return_tensors="pt").to(device)
 with torch.no_grad():
   # IMPORTANT: always set the eos_token_id in this call. the model is trained to emit the eos_token the right time