Spaces:

saruo06
/

llama3-lawscribe

Runtime error

App Files Files Community

saruo06 commited on about 1 month ago

Commit

4556ecf

•

1 Parent(s): a7e7860

update app.py

Browse files

Files changed (2) hide show

app.py +27 -104
requirements.txt +3 -7

app.py CHANGED Viewed

@@ -1,40 +1,7 @@
-# Fine Tuning with Unsloth and SFTTrainer (Supervised Finetuning Trainer)
-# Commented out IPython magic to ensure Python compatibility.
-# %%capture
-# # Installs Unsloth, Xformers (Flash Attention) and all other packages!
-# !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
-# !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
-from unsloth import FastLanguageModel
 import torch
-max_seq_length = 2048
-dtype = None # None for auto detection.
-load_in_4bit = True # Use 4bit quantization to reduce memory usage.
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name = "unsloth/llama-3-8b-bnb-4bit",
-    max_seq_length = max_seq_length,
-    dtype = dtype,
-    load_in_4bit = load_in_4bit,
-    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
-)
-model = FastLanguageModel.get_peft_model(
-    model,
-    r = 16,
-    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
-                      "gate_proj", "up_proj", "down_proj",],
-    lora_alpha = 16,
-    lora_dropout = 0, # Optimized
-    bias = "none",    # Optimized
-    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
-    random_state = 3407,
-    use_rslora = False,  # We support rank stabilized LoRA
-    loftq_config = None, # And LoftQ
-)
-user_prompt="""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Input:
 {}
@@ -42,77 +9,33 @@ user_prompt="""Below is an instruction that describes a task, paired with an inp
 ### Response:
 {}"""
-EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
-def formatting_prompts_func(examples):
-    # instructions = examples["instructions"]
-    inputs       = examples["input"]
-    outputs      = examples["output"]
-    texts = []
-    for input, output in zip(inputs, outputs):
-        text = user_prompt.format(input, output) + EOS_TOKEN
-        texts.append(text)
-    return { "text" : texts, }
-pass
-from datasets import load_dataset
-dataset = load_dataset("saruo06/train-llama3-lawscribe", split = "train")
-dataset = dataset.map(formatting_prompts_func, batched = True,)
-from trl import SFTTrainer
-from transformers import TrainingArguments
-from unsloth import is_bfloat16_supported
-trainer = SFTTrainer(
-    model = model,
-    tokenizer = tokenizer,
-    train_dataset = dataset,
-    dataset_text_field = "text",
-    max_seq_length = max_seq_length,
-    dataset_num_proc = 2,
-    packing = False, # Can make training 5x faster for short sequences.
-    args = TrainingArguments(
-        per_device_train_batch_size = 2,
-        gradient_accumulation_steps = 4,
-        warmup_steps = 5,
-        max_steps = 60,
-        learning_rate = 2e-4,
-        fp16 = not is_bfloat16_supported(),
-        bf16 = is_bfloat16_supported(),
-        logging_steps = 1,
-        optim = "adamw_8bit",
-        weight_decay = 0.01,
-        lr_scheduler_type = "linear",
-        seed = 3407,
-        output_dir = "outputs",
-    ),
-)
-trainer_stats = trainer.train()
-FastLanguageModel.for_inference(model)
 inputs = tokenizer(
-[
-    user_prompt.format(
-        # "Paraphrase sentence", # instruction
-        "What is a loan agreement?", # input
-        "", # output
-    )
-], return_tensors = "pt").to("cuda")
-outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
-tokenizer.batch_decode(outputs)
-"""# NLP Tasks"""
-inputs = tokenizer(
-[
-    user_prompt.format(
-        # "Paraphrase sentence", # instruction
-        "Paraphrase the sentence - country india, city bangalore. the rules are ask me before painting, pay 50000 fine for any minor damages anf 2 lak for major damages, no renting out to other people, the mentioned party should definitely be stayimg in that house. provide me these rules in proper legal terms to include in lease agreement", # input
-        "", # output
-    )
-], return_tensors = "pt").to("cuda")
-outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
-tokenizer.batch_decode(outputs)

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+user_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Input:
 {}
 ### Response:
 {}"""
+load_in_4bit=True
+if True:
+    from unsloth import FastLanguageModel
+    model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name="lawscribe_model",  # YOUR MODEL YOU USED FOR TRAINING
+            max_seq_length=2048,
+            dtype=None,
+            load_in_4bit=load_in_4bit,
+        )
+    FastLanguageModel.for_inference(model)
 inputs = tokenizer(
+    [
+        user_prompt.format(
+            "What is a loan agreement?",  # input
+            "",  # output - leave this blank for generation!
+        )
+    ],
+    return_tensors="pt"
+)
+# .to("cuda")
+device = torch.device("cpu")
+model.to(device)
+inputs = {key: value.to(device) for key, value in inputs.items()}
+from transformers import TextStreamer
+text_streamer = TextStreamer(tokenizer)
+_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

requirements.txt CHANGED Viewed

@@ -1,8 +1,4 @@
-fastapi
 xformers<0.0.27
-trl<0.9.0
-peft
-accelerate
-bitsandbytes
-uvicorn[standard]
-unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git

+torch
+transformers
 xformers<0.0.27
+unsloth