Further documentation for the libraries used can be found at https://unsloth.ai/

For information on hugging face tokens go to https://huggingface.co/docs/hub/en/security-tokens

In [None]:
# Add your hugging face token to secret keys or store your huggingface token as an environment variable.
# It is used to download or upload models to your account.
from google.colab import userdata
from huggingface_hub import login
login(userdata.get('TOKEN'))

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --upgrade transformers

# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048

def load_model(model_name, max_seq_length=max_seq_length, dtype=None, load_in_4bit=True):
 model, tokenizer = FastLanguageModel.from_pretrained(
 model_name=model_name,
 max_seq_length=max_seq_length,
 dtype=dtype,
 load_in_4bit=load_in_4bit,
 )
 return model, tokenizer

#Load the base model and attach LoRa adapters.


In [None]:
models = [
 'Phi-3.5-mini-instruct-bnb-4bit', # |Min Training Gpu : T4, Max Model size : 14.748 GB|
 'gemma-2-27b-it-bnb-4bit', # |Min Training Gpu: A100, Min Testing GPU: T4, Max Model size: 39.564 GB|
 'Meta-Llama-3.1-8B-Instruct-bnb-4bit' # |Min Training Gpu: T4, Min Testing GPU: T4, Max Model size : 14.748 GB|
 ]

chat_templates = [
 'phi-3',
 'alpaca',
 'llama-3.1'
]

# Select model and corresponding template.
selection = 2
model_name = models[selection]
chat_template = chat_templates[selection]
HfUsername = "CooperW"

base_model = f"unsloth/{model_name}"
LoRa_Adapters = f"{HfUsername}/{model_name.replace('-', '_').replace('.', '_')}_128prompt"

# Load the base model.
model, tokenizer = load_model(base_model)

try:
 # For continued training load LoRa adapters to model.
 from peft import PeftModel
 model = PeftModel.from_pretrained(model, LoRa_Adapters)

 from huggingface_hub import snapshot_download
 import os

 download_path = snapshot_download(repo_id=LoRa_Adapters, ignore_patterns=["*.md", "*.safetensors"])
 last_checkpoint_path = os.path.join(download_path, "last-checkpoint")
except:
 # Initialise LoRa adapters.
 model = FastLanguageModel.get_peft_model(
 model,
 r = 128,
 target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
 "gate_proj", "up_proj", "down_proj",],
 lora_alpha = 8,
 lora_dropout = 0,
 bias = "none",
 use_gradient_checkpointing = "unsloth",
 random_state = 3407,
 use_rslora = True,
 loftq_config = None,
 )

#Data Mapping
This function will take the given dataset and map the contents to the ["text"] field, this format is what is needed to be input to the model.

Example:
```
<|user|>
Hi!<|end|>
<|assistant|>
Hello! How are you?<|end|>
<|user|>
I'm doing great! And you?<|end|>

```

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
 tokenizer,
 chat_template = chat_template,
 mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"},
)

def formatting_prompts_func(examples):
 convos = examples["conversations"]
 texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
 return { "text" : texts, }
pass

In [None]:
# This will load datasets from your hugging face repository, alternatively load locally stored datasets.
from datasets import load_dataset

dataset_repo = f"{HfUsername}/jadidi"
dataset_name = "train_network.jsonl"

train_dataset = load_dataset(dataset_repo, data_files=dataset_name, split='train')
train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)

In [None]:
from transformers import Trainer, TrainingArguments
from trl import SFTTrainer
import os

training_args = TrainingArguments(
 output_dir="outputs",
 per_device_train_batch_size=8,
 gradient_accumulation_steps=16,
 lr_scheduler_type = "cosine_with_restarts",
 lr_scheduler_kwargs = { "num_cycles": len(trainer.get_train_dataloader()) / 100 },
 optim="adamw_8bit",
 weight_decay=0.01,
 seed=3407,
 warmup_steps=10,
 # max_steps=100,
 num_train_epochs=3,
 learning_rate=2e-4,
 fp16=not torch.cuda.is_bf16_supported(),
 bf16=torch.cuda.is_bf16_supported(),
 logging_steps=1,
)

# Define the name of the repo your model is uploaded. By default : (YourHuggingFaceAccount/baseModelName)
training_args = training_args.set_push_to_hub(
 model_id=LoRa_Adapters,
 strategy="checkpoint",
 private_repo=True,
 always_push=False
)

# Define how often your model is uploaded during training.
# Only LoRa adapters are saved.
training_args = training_args.set_save(
 strategy='steps',
 steps=50,
 total_limit=2,
)

trainer = SFTTrainer(
 model=model,
 tokenizer=tokenizer,
 train_dataset=train_dataset,
 dataset_text_field="text",
 max_seq_length=max_seq_length,
 dataset_num_proc=os.cpu_count(),
 packing=False,
 args=training_args,
)


In [None]:
try:
 trainer_stats = trainer.train(resume_from_checkpoint=True)
except:
 trainer_stats = trainer.train()

In [None]:
from google.colab import runtime
runtime.unassign()