Edit model card

My first run, 8192 ctx qlora, trained on AEZAKMI-3_6 dataset. Base seems to not be too slopped but finetune is not great - lots of slopped GPTisms, "It's important to remember" etc. It does seem uncensored though, so if you're not fine with Llama-3-8B-Instruct, this might be an option until more better finetunes come out. ChatML prompt format. Training script below. Took around 8 hours on 3090 Ti via unsloth. Benchmark prompt results can be found in my misc repo

from unsloth import FastLanguageModel
from datasets import Dataset, load_dataset
from dataclasses import dataclass, field
from typing import Dict, Optional
import torch
max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "model-path-llama-3-8b", # Choose ANY! eg mistralai/Mistral-7B-Instruct-v0.2
    max_seq_length = max_seq_length,
    attn_implementation="flash_attention_2",
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)



#@title Alignment Handbook utils
import os
import re
from typing import List, Literal, Optional

from datasets import DatasetDict, concatenate_datasets, load_dataset, load_from_disk
from datasets.builder import DatasetGenerationError


#DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"

from datasets import load_dataset

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

dataset = load_dataset("adamo1139/AEZAKMI_v3-6", split = "train")
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = []
    mapper = {"system" : "<|im_start|>system\n", "human" : "<|im_start|>user\n", "gpt" : "<|im_start|>assistant\n"}
    end_mapper = {"system" : "<|im_end|>\n", "human" : "<|im_end|>\n", "gpt" : "<|im_end|>\n"}
    for convo in convos:
        text = "".join(f"{mapper[(turn := x['from'])]} {x['value']}{end_mapper[turn]}" for x in convo)
        texts.append(f"{text}{EOS_TOKEN}") # Since there are multi-turn
        # conversations, I append the EOS_TOKEN at the end of the whole
        # conversation. These conversations always ends with a gpt message.
    return { "text" : texts, }
pass
dataset = dataset.map(formatting_prompts_func, batched = True,)

import pprint
pprint.pprint("""NOT a formatted dataset""")
pprint
pprint.pprint(dataset[250])
pprint.pprint(dataset[260])
pprint.pprint(dataset[270])
pprint.pprint(dataset[280])
pprint.pprint(dataset[290])

# Print sample
pprint.pprint("""formatted dataset""")
pprint.pprint(dataset[250])
pprint.pprint(dataset[260])
pprint.pprint(dataset[270])
pprint.pprint(dataset[280])
pprint.pprint(dataset[290])


model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Currently only supports dropout = 0
    bias = "none",    # Currently only supports bias = "none"
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

model.print_trainable_parameters()

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
from transformers.utils import logging
from trl import SFTTrainer

sft_trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 8192,
    packing=True,
    args = TrainingArguments(
        evaluation_strategy = "no",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        num_train_epochs = 1.5,
        warmup_steps = 10,
        learning_rate = 0.000095,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        output_dir = "1904-llama-3-8b-aezakmi-intermediate",
        optim = "adamw_8bit",
        weight_decay = 0.0,
        lr_scheduler_type = "cosine",
        seed = 42,
        save_strategy = "steps",
        save_steps = 150,
        save_total_limit = 5,
    ),
)


sft_trainer.train()
model.save_pretrained("1904-llama-3-8b-aezakmi-final") # Local saving
Downloads last month
7
Safetensors
Model size
8.03B params
Tensor type
FP16
Β·
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Spaces using adamo1139/Llama-3-8B-AEZAKMI-run1 3

Collection including adamo1139/Llama-3-8B-AEZAKMI-run1