Further documentation for the libraries used can be found at https://unsloth.ai/

For information on hugging face tokens go to https://huggingface.co/docs/hub/en/security-tokens

In [None]:
# Add your hugging face token to secret keys or store your huggingface token as an environment variable.
# It is used to download or upload models to your account.
from google.colab import userdata
from huggingface_hub import login
login(userdata.get('TOKEN'))

In [2]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install huggingface_hub
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048

def load_model(model_name, max_seq_length=max_seq_length, dtype=None, load_in_4bit=True):
 model, tokenizer = FastLanguageModel.from_pretrained(
 model_name=model_name,
 max_seq_length=max_seq_length,
 dtype=dtype,
 load_in_4bit=load_in_4bit,
 )
 return model, tokenizer

#Load the base model and attach the trained LoRa adapters.

In [None]:
from peft import PeftModel

models = [
 'Phi-3.5-mini-instruct-bnb-4bit', # |Min Training Gpu : T4, Min Testing GPU: T4, Max Model size : 14.748 GB|
 'gemma-2-27b-it-bnb-4bit', # |Min Training Gpu: A100, Min Testing GPU: T4, Max Model size: 39.564GB|
 'Meta-Llama-3.1-8B-Instruct-bnb-4bit' # |Min Training Gpu: T4, Min Testing GPU: T4, Max Model size : 22.168GB|
]

chat_templates = [
 'phi-3',
 'alpaca', #Or gemma.
 'llama-3.1'
]

# Select model and corresponding template.
selection = 0
model_name = models[selection]
chat_template = chat_templates[selection]
HfUsername = "CooperW"

try:
 # This is the name of your model repository on hugging face.
 LoRa_Adapters = f"{HfUsername}/{model_name.replace('-', '_').replace('.', '_')}_128Batch"
except:
 print('No LoRa Adapters Found')
 exit()

# Load base model and fit Adapters.
model = f"unsloth/{model_name}"
model, tokenizer = load_model(model)
model = PeftModel.from_pretrained(model, LoRa_Adapters)

#Data Mapping
This function will take the given dataset and map the contents to the ["text"] field, this format is what is needed to be input to the model.

Example:
```
<|user|>
Hi!<|end|>
<|assistant|>
Hello! How are you?<|end|>
<|user|>
I'm doing great! And you?<|end|>

```

In [6]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
 tokenizer,
 chat_template = chat_template,
 mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"},
)

def formatting_prompts_func(examples):
 convos = examples["conversations"]
 texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
 return { "text" : texts, }
pass

In [None]:
# This will load datasets from your hugging face repository, alternatively load locally stored datasets.
from datasets import load_dataset

dataset_repo = f"{HfUsername}/jadidi"
dataset_name = "test_network.jsonl"

test_dataset = load_dataset(dataset_repo, data_files=dataset_name, split='train')
test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)

#Evaluation Loop
This function will send an input message to the model, the input will be a row from the testing dataset then recieve an output, complete the output if necessary, and provide a live accuracy update.

In [8]:
import time
import sys

def evaluate_model_on_dataset(model, tokenizer, test_dataset, num_samples=len(test_dataset)):

 # Enable native 2x faster inference
 FastLanguageModel.for_inference(model)

 # List to store predictions
 predictions = []
 ground_truths = []
 start_time = time.time()

 # Metrics
 correct = 0
 incorrect = 0
 accuracy = 0

 prompt = ""

 # Function to map incomplete answers to full words (e.g., attac or norm)
 def complete_prediction(pred):
 # Dictionary of known classes in the ToN-IoT dataset with their first letters (or first two for dos and ddos)
 known_classes = {
 "n": "normal", # Normal traffic
 "m": "mitm", # Man in the Middle attack
 "p": "password", # Password attacks (brute force, etc.)
 "b": "backdoor", # Backdoor attacks
 "i": "injection", # SQL injection, Command injection, etc.
 "x": "xss", # Cross-Site Scripting (XSS) attacks
 "r": "ransomware", # Ransomware
 "s": "scanning", # Port scanning, vulnerability scanning
 "a": "attack" # For Binary Classification
 }

 pred_lower = pred.lower() # Convert prediction to lowercase

 # For "dos" and "ddos", use the first two letters; for others, use the first letter
 if pred_lower.startswith("dd"):
 return "ddos"
 elif pred_lower.startswith("do"):
 return "dos"
 else:
 # Use the first letter for other classes
 return known_classes.get(pred_lower[0], pred) # Return original prediction if no match is found

 # Loop over each item in the test dataset
 for item in test_dataset.select(range(num_samples)):

 # Extract the human input from the item
 human_input = item['conversations'][0]['value']

 # Prepare messages with the current input
 message = [{"from": "human", "value": f"{prompt}{human_input}"}]

 # Tokenize the input
 inputs = tokenizer.apply_chat_template(
 message,
 tokenize=True,
 add_generation_prompt=True, # Must add for generation
 return_tensors="pt",
 ).to("cuda")

 # Generate the output
 outputs = model.generate(input_ids=inputs, max_new_tokens=1, use_cache=True)
 answer = tokenizer.batch_decode(outputs)

 # Process the prediction
 pred = complete_prediction(answer[0].split()[-1])
 truth = item['conversations'][1]['value'].strip()

 # Append predictions and truths
 predictions.append(pred)
 ground_truths.append(truth)

 # Update metrics
 if pred == truth:
 correct += 1
 color_code = "\033[32m" # Green
 else:
 incorrect += 1
 color_code = "\033[31m" # Red

 # Calculate accuracy
 elapsed_time = time.time() - start_time
 accuracy = (correct / (correct + incorrect)) * 100

 # Print current status
 sys.stdout.write(f"\rElapsed Time: {elapsed_time:.2f} seconds, Accuracy: {accuracy:.2f}%, Completed: {correct + incorrect}/{num_samples} Prediction: {color_code}{pred}\033[0m, Ground Truth: {color_code}{truth}\033[0m")
 sys.stdout.flush()

 print("\n\n\nInference Complete\n\n\n")

 return predictions, ground_truths

#For Binary Classification Metrics

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import numpy as np
import pandas as pd

def metrics(predictions, ground_truths):
 # Convert to numpy arrays
 y_true = np.array(ground_truths)
 y_pred = np.array(predictions)

 # Convert string labels to numeric values
 label_mapping = {'attack': 1, 'normal': 0}
 y_true_numeric = np.array([label_mapping[label] for label in y_true])
 y_pred_numeric = np.array([label_mapping[label] for label in y_pred])

 accuracy = accuracy_score(y_true, y_pred)*100
 print(f"Accuracy: {accuracy:.2f}%\n")

 # Print classification report
 class_report = classification_report(y_true, y_pred, labels=['attack', 'normal'])
 print("Classification Report:")
 print(class_report, '\n')

 # Compute metrics
 precision = precision_score(y_true, y_pred, average=None, labels=['attack', 'normal'])
 recall = recall_score(y_true, y_pred, average=None, labels=['attack', 'normal'])
 f1 = f1_score(y_true, y_pred, average=None, labels=['attack', 'normal'])

 metrics_df = pd.DataFrame({
 'Class': ['attack', 'normal'],
 'Precision': precision,
 'Recall': recall,
 'F1 Score': f1
 })

 # Create subplots
 fig, axes = plt.subplots(1, 2, figsize=(18, 6)) # Two plots side by side

 # Plot precision, recall, and F1 score
 metrics_df.plot(x='Class', kind='bar', ax=axes[0])
 axes[0].set_title('Precision, Recall, and F1 Score by Class')
 axes[0].set_xlabel('Class')
 axes[0].set_ylabel('Score')
 axes[0].set_ylim(0, 1)

 # Plot confusion matrix
 cm = confusion_matrix(y_true, y_pred, labels=['attack', 'normal'])
 sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['attack', 'normal'], yticklabels=['attack', 'normal'], ax=axes[1])
 axes[1].set_xlabel('Predicted')
 axes[1].set_ylabel('True')
 axes[1].set_title('Confusion Matrix')

 # Adjust layout
 plt.subplots_adjust(wspace=0.3)
 plt.show()

In [None]:
# Call the evaluation function
pred, truth = evaluate_model_on_dataset(model, tokenizer, test_dataset, num_samples=1000)

In [None]:
metrics(pred, truth)

#For Multi-Class Classification Metrics

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import numpy as np
import pandas as pd

def metrics(predictions, ground_truths):
 # Convert to numpy arrays
 y_true = np.array(ground_truths)
 y_pred = np.array(predictions)

 # List of all known ToN-IoT classes
 class_labels = ["normal", "ddos", "dos", "mitm", "password", "backdoor", "injection", "xss", "ransomware", "scanning"]

 # Compute and print accuracy
 accuracy = accuracy_score(y_true, y_pred) * 100
 print(f"Accuracy: {accuracy:.2f}%\n")

 # Print classification report
 class_report = classification_report(y_true, y_pred, labels=class_labels, zero_division=0)
 print("Classification Report:")
 print(class_report, '\n')

 # Compute precision, recall, and F1 score for each class
 precision = precision_score(y_true, y_pred, average=None, labels=class_labels, zero_division=0)
 recall = recall_score(y_true, y_pred, average=None, labels=class_labels, zero_division=0)
 f1 = f1_score(y_true, y_pred, average=None, labels=class_labels, zero_division=0)

 metrics_df = pd.DataFrame({
 'Class': class_labels,
 'Precision': precision,
 'Recall': recall,
 'F1 Score': f1
 })

 # Create subplots
 fig, axes = plt.subplots(1, 2, figsize=(18, 6)) # Two plots side by side

 # Plot precision, recall, and F1 score
 metrics_df.plot(x='Class', kind='bar', ax=axes[0], color=['blue', 'orange', 'green'])
 axes[0].set_title('Precision, Recall, and F1 Score by Class')
 axes[0].set_xlabel('Class')
 axes[0].set_ylabel('Score')
 axes[0].set_ylim(0, 1)
 axes[0].legend(["Precision", "Recall", "F1 Score"], loc='upper left')

 # Plot confusion matrix
 cm = confusion_matrix(y_true, y_pred, labels=class_labels)
 sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels, ax=axes[1])
 axes[1].set_xlabel('Predicted')
 axes[1].set_ylabel('True')
 axes[1].set_title('Confusion Matrix')

 # Adjust layout
 plt.subplots_adjust(wspace=0.3)
 plt.show()


In [None]:
metrics(pred, truth)

In [None]:
# from google.colab import runtime
# runtime.unassign()