# run as a module using: python3 -m scripts.finetune # Using: https://huggingface.co/blog/mlabonne/sft-llama3 import torch from trl import SFTTrainer from datasets import load_dataset from transformers import TrainingArguments, TextStreamer from unsloth.chat_templates import get_chat_template from unsloth import FastLanguageModel, is_bfloat16_supported from data.fine_tune_dataset import load_data def finetune(model="unsloth/Meta-Llama-3.1-8B-bnb-4bit", dataset="mlabonne/FineTome-100k"): hf_token = "" # Loading the model and restricting context window max_seq_length = 2048 model, tokenizer = FastLanguageModel.from_pretrained( model_name=model, max_seq_length=max_seq_length, load_in_4bit=True, dtype=None, ) # Loading prepared dataset dataset = load_data(dataset, tokenizer) # Loading the model for fine tuning - only set to FT 42million/8billion parameters model = FastLanguageModel.get_peft_model( model, r=16, # rank determines LoRA (Low rank adaptation - freezing much of the model for fine tuning) matrix size, higher increases memory and compute cost lora_alpha=16, # scaling factor for updates lora_dropout=0, # not used for speedup target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], # where LoRA targets use_rslora=True, # rank stabilised use_gradient_checkpointing="unsloth" ) # Saving the untrained model, save_method can be lora to only save adapters or merged (16 or 4 bit) model.save_pretrained_merged("models/PreFineLlama-3.1-8B", tokenizer, save_method="merged_16bit") # save to models directory locally model.push_to_hub_merged("thebigoed/PreFineLlama-3.1-8B", tokenizer, token=hf_token, save_method="merged_16bit") trainer=SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, dataset_text_field="text", max_seq_length=max_seq_length, dataset_num_proc=2, packing=True, args=TrainingArguments( learning_rate=3e-4, # to low = slow and local minima, too high = unstable lr_scheduler_type="linear", # adjusts the learning rate (linear and cosine are most popular) per_device_train_batch_size=8, gradient_accumulation_steps=2, num_train_epochs=1, fp16=not is_bfloat16_supported(), bf16=is_bfloat16_supported(), logging_steps=1, optim="adamw_8bit", weight_decay=0.01, warmup_steps=10, output_dir="output", seed=0, ), ) trainer.train() # Saving the model, save_method can be lora to only save adapters or merged (16 or 4 bit) model.save_pretrained_merged("models/FineLlama-3.1-8B", tokenizer, save_method="merged_16bit") # save to models directory locally model.push_to_hub_merged("thebigoed/FineLlama-3.1-8B", tokenizer, token=hf_token, save_method="merged_16bit") # Use to save in GGUF quantised format # quant_methods = ["q2_k", "q3_k_m", "q4_k_m", "q5_k_m", "q6_k", "q8_0"] # for quant in quant_methods: # model.push_to_hub_gguf("", tokenizer, quant) return if __name__ == "__main__": finetune()