saruo06 commited on
Commit
4556ecf
1 Parent(s): a7e7860

update app.py

Browse files
Files changed (2) hide show
  1. app.py +27 -104
  2. requirements.txt +3 -7
app.py CHANGED
@@ -1,40 +1,7 @@
1
- # Fine Tuning with Unsloth and SFTTrainer (Supervised Finetuning Trainer)
2
-
3
- # Commented out IPython magic to ensure Python compatibility.
4
- # %%capture
5
- # # Installs Unsloth, Xformers (Flash Attention) and all other packages!
6
- # !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
7
- # !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
8
-
9
- from unsloth import FastLanguageModel
10
  import torch
11
- max_seq_length = 2048
12
- dtype = None # None for auto detection.
13
- load_in_4bit = True # Use 4bit quantization to reduce memory usage.
14
-
15
- model, tokenizer = FastLanguageModel.from_pretrained(
16
- model_name = "unsloth/llama-3-8b-bnb-4bit",
17
- max_seq_length = max_seq_length,
18
- dtype = dtype,
19
- load_in_4bit = load_in_4bit,
20
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
21
- )
22
 
23
- model = FastLanguageModel.get_peft_model(
24
- model,
25
- r = 16,
26
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
27
- "gate_proj", "up_proj", "down_proj",],
28
- lora_alpha = 16,
29
- lora_dropout = 0, # Optimized
30
- bias = "none", # Optimized
31
- use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
32
- random_state = 3407,
33
- use_rslora = False, # We support rank stabilized LoRA
34
- loftq_config = None, # And LoftQ
35
- )
36
-
37
- user_prompt="""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
38
 
39
  ### Input:
40
  {}
@@ -42,77 +9,33 @@ user_prompt="""Below is an instruction that describes a task, paired with an inp
42
  ### Response:
43
  {}"""
44
 
45
- EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
46
- def formatting_prompts_func(examples):
47
- # instructions = examples["instructions"]
48
- inputs = examples["input"]
49
- outputs = examples["output"]
50
- texts = []
51
- for input, output in zip(inputs, outputs):
52
- text = user_prompt.format(input, output) + EOS_TOKEN
53
- texts.append(text)
54
- return { "text" : texts, }
55
- pass
56
-
57
- from datasets import load_dataset
58
- dataset = load_dataset("saruo06/train-llama3-lawscribe", split = "train")
59
- dataset = dataset.map(formatting_prompts_func, batched = True,)
60
-
61
- from trl import SFTTrainer
62
- from transformers import TrainingArguments
63
- from unsloth import is_bfloat16_supported
64
-
65
- trainer = SFTTrainer(
66
- model = model,
67
- tokenizer = tokenizer,
68
- train_dataset = dataset,
69
- dataset_text_field = "text",
70
- max_seq_length = max_seq_length,
71
- dataset_num_proc = 2,
72
- packing = False, # Can make training 5x faster for short sequences.
73
- args = TrainingArguments(
74
- per_device_train_batch_size = 2,
75
- gradient_accumulation_steps = 4,
76
- warmup_steps = 5,
77
- max_steps = 60,
78
- learning_rate = 2e-4,
79
- fp16 = not is_bfloat16_supported(),
80
- bf16 = is_bfloat16_supported(),
81
- logging_steps = 1,
82
- optim = "adamw_8bit",
83
- weight_decay = 0.01,
84
- lr_scheduler_type = "linear",
85
- seed = 3407,
86
- output_dir = "outputs",
87
- ),
88
- )
89
-
90
- trainer_stats = trainer.train()
91
-
92
- FastLanguageModel.for_inference(model)
93
 
94
  inputs = tokenizer(
95
- [
96
- user_prompt.format(
97
- # "Paraphrase sentence", # instruction
98
- "What is a loan agreement?", # input
99
- "", # output
100
- )
101
- ], return_tensors = "pt").to("cuda")
102
-
103
- outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
104
- tokenizer.batch_decode(outputs)
105
 
106
- """# NLP Tasks"""
 
107
 
108
- inputs = tokenizer(
109
- [
110
- user_prompt.format(
111
- # "Paraphrase sentence", # instruction
112
- "Paraphrase the sentence - country india, city bangalore. the rules are ask me before painting, pay 50000 fine for any minor damages anf 2 lak for major damages, no renting out to other people, the mentioned party should definitely be stayimg in that house. provide me these rules in proper legal terms to include in lease agreement", # input
113
- "", # output
114
- )
115
- ], return_tensors = "pt").to("cuda")
116
 
117
- outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
118
- tokenizer.batch_decode(outputs)
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
3
 
4
+ user_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  ### Input:
7
  {}
 
9
  ### Response:
10
  {}"""
11
 
12
+ load_in_4bit=True
13
+ if True:
14
+ from unsloth import FastLanguageModel
15
+ model, tokenizer = FastLanguageModel.from_pretrained(
16
+ model_name="lawscribe_model", # YOUR MODEL YOU USED FOR TRAINING
17
+ max_seq_length=2048,
18
+ dtype=None,
19
+ load_in_4bit=load_in_4bit,
20
+ )
21
+ FastLanguageModel.for_inference(model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  inputs = tokenizer(
24
+ [
25
+ user_prompt.format(
26
+ "What is a loan agreement?", # input
27
+ "", # output - leave this blank for generation!
28
+ )
29
+ ],
30
+ return_tensors="pt"
31
+ )
32
+ # .to("cuda")
 
33
 
34
+ device = torch.device("cpu")
35
+ model.to(device)
36
 
37
+ inputs = {key: value.to(device) for key, value in inputs.items()}
 
 
 
 
 
 
 
38
 
39
+ from transformers import TextStreamer
40
+ text_streamer = TextStreamer(tokenizer)
41
+ _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)
requirements.txt CHANGED
@@ -1,8 +1,4 @@
1
- fastapi
 
2
  xformers<0.0.27
3
- trl<0.9.0
4
- peft
5
- accelerate
6
- bitsandbytes
7
- uvicorn[standard]
8
- unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
 
1
+ torch
2
+ transformers
3
  xformers<0.0.27
4
+ unsloth