Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
user_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
### Input: | |
{} | |
### Response: | |
{}""" | |
load_in_4bit=True | |
if True: | |
from unsloth import FastLanguageModel | |
model, tokenizer = FastLanguageModel.from_pretrained( | |
model_name="lawscribe_model", # YOUR MODEL YOU USED FOR TRAINING | |
max_seq_length=2048, | |
dtype=None, | |
load_in_4bit=load_in_4bit, | |
) | |
FastLanguageModel.for_inference(model) | |
inputs = tokenizer( | |
[ | |
user_prompt.format( | |
"What is a loan agreement?", # input | |
"", # output - leave this blank for generation! | |
) | |
], | |
return_tensors="pt" | |
) | |
# .to("cuda") | |
device = torch.device("cpu") | |
model.to(device) | |
inputs = {key: value.to(device) for key, value in inputs.items()} | |
from transformers import TextStreamer | |
text_streamer = TextStreamer(tokenizer) | |
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128) |