Spaces:
Runtime error
Runtime error
update app.py
Browse files- app.py +27 -104
- requirements.txt +3 -7
app.py
CHANGED
@@ -1,40 +1,7 @@
|
|
1 |
-
# Fine Tuning with Unsloth and SFTTrainer (Supervised Finetuning Trainer)
|
2 |
-
|
3 |
-
# Commented out IPython magic to ensure Python compatibility.
|
4 |
-
# %%capture
|
5 |
-
# # Installs Unsloth, Xformers (Flash Attention) and all other packages!
|
6 |
-
# !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
7 |
-
# !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
|
8 |
-
|
9 |
-
from unsloth import FastLanguageModel
|
10 |
import torch
|
11 |
-
|
12 |
-
dtype = None # None for auto detection.
|
13 |
-
load_in_4bit = True # Use 4bit quantization to reduce memory usage.
|
14 |
-
|
15 |
-
model, tokenizer = FastLanguageModel.from_pretrained(
|
16 |
-
model_name = "unsloth/llama-3-8b-bnb-4bit",
|
17 |
-
max_seq_length = max_seq_length,
|
18 |
-
dtype = dtype,
|
19 |
-
load_in_4bit = load_in_4bit,
|
20 |
-
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
|
21 |
-
)
|
22 |
|
23 |
-
|
24 |
-
model,
|
25 |
-
r = 16,
|
26 |
-
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
|
27 |
-
"gate_proj", "up_proj", "down_proj",],
|
28 |
-
lora_alpha = 16,
|
29 |
-
lora_dropout = 0, # Optimized
|
30 |
-
bias = "none", # Optimized
|
31 |
-
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
|
32 |
-
random_state = 3407,
|
33 |
-
use_rslora = False, # We support rank stabilized LoRA
|
34 |
-
loftq_config = None, # And LoftQ
|
35 |
-
)
|
36 |
-
|
37 |
-
user_prompt="""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
38 |
|
39 |
### Input:
|
40 |
{}
|
@@ -42,77 +9,33 @@ user_prompt="""Below is an instruction that describes a task, paired with an inp
|
|
42 |
### Response:
|
43 |
{}"""
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
pass
|
56 |
-
|
57 |
-
from datasets import load_dataset
|
58 |
-
dataset = load_dataset("saruo06/train-llama3-lawscribe", split = "train")
|
59 |
-
dataset = dataset.map(formatting_prompts_func, batched = True,)
|
60 |
-
|
61 |
-
from trl import SFTTrainer
|
62 |
-
from transformers import TrainingArguments
|
63 |
-
from unsloth import is_bfloat16_supported
|
64 |
-
|
65 |
-
trainer = SFTTrainer(
|
66 |
-
model = model,
|
67 |
-
tokenizer = tokenizer,
|
68 |
-
train_dataset = dataset,
|
69 |
-
dataset_text_field = "text",
|
70 |
-
max_seq_length = max_seq_length,
|
71 |
-
dataset_num_proc = 2,
|
72 |
-
packing = False, # Can make training 5x faster for short sequences.
|
73 |
-
args = TrainingArguments(
|
74 |
-
per_device_train_batch_size = 2,
|
75 |
-
gradient_accumulation_steps = 4,
|
76 |
-
warmup_steps = 5,
|
77 |
-
max_steps = 60,
|
78 |
-
learning_rate = 2e-4,
|
79 |
-
fp16 = not is_bfloat16_supported(),
|
80 |
-
bf16 = is_bfloat16_supported(),
|
81 |
-
logging_steps = 1,
|
82 |
-
optim = "adamw_8bit",
|
83 |
-
weight_decay = 0.01,
|
84 |
-
lr_scheduler_type = "linear",
|
85 |
-
seed = 3407,
|
86 |
-
output_dir = "outputs",
|
87 |
-
),
|
88 |
-
)
|
89 |
-
|
90 |
-
trainer_stats = trainer.train()
|
91 |
-
|
92 |
-
FastLanguageModel.for_inference(model)
|
93 |
|
94 |
inputs = tokenizer(
|
95 |
-
[
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
tokenizer.batch_decode(outputs)
|
105 |
|
106 |
-
|
|
|
107 |
|
108 |
-
inputs =
|
109 |
-
[
|
110 |
-
user_prompt.format(
|
111 |
-
# "Paraphrase sentence", # instruction
|
112 |
-
"Paraphrase the sentence - country india, city bangalore. the rules are ask me before painting, pay 50000 fine for any minor damages anf 2 lak for major damages, no renting out to other people, the mentioned party should definitely be stayimg in that house. provide me these rules in proper legal terms to include in lease agreement", # input
|
113 |
-
"", # output
|
114 |
-
)
|
115 |
-
], return_tensors = "pt").to("cuda")
|
116 |
|
117 |
-
|
118 |
-
tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import torch
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
user_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
### Input:
|
7 |
{}
|
|
|
9 |
### Response:
|
10 |
{}"""
|
11 |
|
12 |
+
load_in_4bit=True
|
13 |
+
if True:
|
14 |
+
from unsloth import FastLanguageModel
|
15 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
16 |
+
model_name="lawscribe_model", # YOUR MODEL YOU USED FOR TRAINING
|
17 |
+
max_seq_length=2048,
|
18 |
+
dtype=None,
|
19 |
+
load_in_4bit=load_in_4bit,
|
20 |
+
)
|
21 |
+
FastLanguageModel.for_inference(model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
inputs = tokenizer(
|
24 |
+
[
|
25 |
+
user_prompt.format(
|
26 |
+
"What is a loan agreement?", # input
|
27 |
+
"", # output - leave this blank for generation!
|
28 |
+
)
|
29 |
+
],
|
30 |
+
return_tensors="pt"
|
31 |
+
)
|
32 |
+
# .to("cuda")
|
|
|
33 |
|
34 |
+
device = torch.device("cpu")
|
35 |
+
model.to(device)
|
36 |
|
37 |
+
inputs = {key: value.to(device) for key, value in inputs.items()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
from transformers import TextStreamer
|
40 |
+
text_streamer = TextStreamer(tokenizer)
|
41 |
+
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)
|
requirements.txt
CHANGED
@@ -1,8 +1,4 @@
|
|
1 |
-
|
|
|
2 |
xformers<0.0.27
|
3 |
-
|
4 |
-
peft
|
5 |
-
accelerate
|
6 |
-
bitsandbytes
|
7 |
-
uvicorn[standard]
|
8 |
-
unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
|
|
|
1 |
+
torch
|
2 |
+
transformers
|
3 |
xformers<0.0.27
|
4 |
+
unsloth
|
|
|
|
|
|
|
|
|
|