In [None]:
!unzip AI.zip
!unzip Photo.zip

In [None]:
!pip install umap-learn
!pip install PyWavelets

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import umap
import pywt

In [None]:
# prompt: Create a function to load all the files in a folder as images.

import os
from PIL import Image
def load_images_from_folder(folder):
 images = []
 labels = []
 for filename in os.listdir(folder):
 if not filename.endswith('.jpg') and not filename.endswith('.png') \
 and not filename.endswith('jpeg') and not filename.endswith('webp'):
 continue
 img = Image.open(os.path.join(folder,filename))
 img = img.resize((512, 512))
 if img is not None:
 images.append(img)
 labels.append(1 if folder == "AI" else 0)
 return images, labels

In [None]:
# prompt: Can you write a function that can implement the discrete wavelet transform and display the wavelets given in an array for the image? The function should take in an image_path and a list of wavelets and perform the dwt and display the wavelets.

import matplotlib.pyplot as plt
import numpy as np
def apply_wavelet_transform_and_display_multiple(image_path, wavelets):
 # Load the image
 img = Image.open(image_path).convert('L')

 # Convert image to numpy array
 img_array = np.array(img)

 num_wavelets = len(wavelets)
 fig, axes = plt.subplots(1, num_wavelets + 1, figsize=(5 * (num_wavelets + 1), 5))

 # Display the original image
 axes[0].imshow(img_array, cmap='gray')
 axes[0].set_title('Original Image')

 # Apply DWT and display wavelets
 for i, wavelet in enumerate(wavelets):
 cA, cD = pywt.dwt(img_array, wavelet)
 axes[i + 1].imshow(cD, cmap='gray')
 axes[i + 1].set_title(f'Approximate Image ({wavelet})')

 plt.tight_layout()
 plt.show()


In [None]:
apply_wavelet_transform_and_display_multiple('kiri-in-high-resolution-love-her-3-v0-ezejx6try3va1.webp', ['db1', 'db6', 'db10', 'db12', 'db16'])

In [None]:
# prompt: Can you write a function that given a list of images from PIL can convert them to grayscale and apply a set of wavelets using dwt and then combined them into one feature vector?

import numpy as np
def extract_wavelet_features(images, wavelets):
 all_features = []
 for img in images:
 img_gray = img.convert('L')
 img_array = np.array(img_gray)
 features = []
 for wavelet in wavelets:
 cA, cD = pywt.dwt(img_array, wavelet)
 features.extend(cD.flatten())
 all_features.append(features)
 return np.array(all_features)


In [None]:
# prompt: Apply the Fourier transform to the images from the load_images_from_folder function.

import numpy as np


# Example usage (assuming 'folder_path' contains your images)
ai_images, ai_labels = load_images_from_folder('AI')
photo_images, photo_labels = load_images_from_folder('Photo')
min_length = min(len(ai_images), len(photo_images))
ai_images = ai_images[:min_length]
photo_images = photo_images[:min_length]
ai_labels = ai_labels[:min_length]
photo_labels = photo_labels[:min_length]

print(f"Number of AI images: {len(ai_images)}")
print(f"Number of Photo images: {len(photo_images)}")
images = ai_images + photo_images
labels = ai_labels + photo_labels
features = np.array(extract_wavelet_features(images, ["db4", "db10"]))

In [None]:
reducer = umap.UMAP(n_neighbors=16, n_components=32, random_state=42)
embeddings = reducer.fit_transform(features)

In [None]:
reducer.embedding_.dtype

In [None]:
X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)

In [None]:
from xgboost import XGBClassifier

In [None]:
xgb_clf = XGBClassifier(n_estimators=200, eval_metric="logloss", learning_rate=0.01,
 reg_lambda=0.8, max_depth=5, gamma=1.0, subsample=0.5,
 colsample_bytree=0.5, min_child_weight=10)
xgb_clf.fit(X_train, y_train, eval_set=[(X_test, y_test)],
 verbose=True)

xgb_clf_pred = xgb_clf.predict(X_test)
score = xgb_clf.score(X_test, y_test)
print(f"Accuracy: {score}")

print(f"F1 score: {f1_score(y_test, xgb_clf_pred)}")

In [None]:
# prompt: Calculate the training accuracy

xgb_clf_pred_train = xgb_clf.predict(X_train)
score = xgb_clf.score(X_train, y_train)
print(f"Training Accuracy: {score}")

score = xgb_clf.score(X_test, y_test)
print(f"Test Accuracy: {score}")

In [None]:
# prompt: Can you perform four fold cross validation on the xgboost model?

from sklearn.model_selection import cross_val_score, KFold
# Perform four-fold cross-validation
kfold = KFold(n_splits=4, shuffle=True, random_state=42)
scores = cross_val_score(xgb_clf, embeddings, labels, cv=kfold, scoring='accuracy')

# Print the cross-validation scores
print("Cross-validation scores:", scores)
print("Average cross-validation score:", scores.mean())

In [None]:
ConfusionMatrixDisplay.from_estimator(xgb_clf, X_test, y_test)

In [None]:
xgb_clf.save_model("xgb_flux_detection_model.json")

In [None]:
# prompt: A random classifier

from sklearn.dummy import DummyClassifier

# Initialize a random classifier
dummy_clf = DummyClassifier(strategy='uniform') # Predicts randomly

# Fit the classifier (not really necessary for a random classifier)
dummy_clf.fit(X_train, y_train)

# Make predictions
dummy_pred = dummy_clf.predict(X_test)

# Evaluate the performance
score = dummy_clf.score(X_test, y_test)
print(f"Accuracy: {score}")
print(f"F1 score: {f1_score(y_test, dummy_pred)}")

ConfusionMatrixDisplay.from_estimator(dummy_clf, X_test, y_test)

In [None]:
# prompt: random forests with pruning

from sklearn.ensemble import RandomForestClassifier

# Initialize the RandomForestClassifier with pruning parameters
rf_clf = RandomForestClassifier(n_estimators=100, # Number of trees in the forest
 max_depth=5, # Maximum depth of each tree (pruning)
 min_samples_split=5, # Minimum samples required to split a node (pruning)
 random_state=42) # Random seed for reproducibility

# Fit the classifier to the training data
rf_clf.fit(X_train, y_train)

# Make predictions on the test data
rf_pred = rf_clf.predict(X_test)

# Evaluate the performance
score = rf_clf.score(X_test, y_test)
print(f"Accuracy: {score}")

print(f"F1 score: {f1_score(y_test, rf_pred)}")

ConfusionMatrixDisplay.from_estimator(rf_clf, X_test, y_test)

In [None]:
# prompt: Can you perform four fold cross validation on the rf model?

from sklearn.model_selection import cross_val_score, KFold
# Perform four-fold cross-validation
kfold = KFold(n_splits=4, shuffle=True, random_state=42)
scores = cross_val_score(rf_clf, embeddings, labels, cv=kfold, scoring='accuracy')

# Print the cross-validation scores
print("Cross-validation scores:", scores)
print("Average cross-validation score:", scores.mean())

In [None]:
# prompt: SVC classifier

from sklearn.svm import SVC

# Initialize the SVC classifier
svc_clf = SVC()

# Fit the classifier to the training data
svc_clf.fit(X_train, y_train)

# Make predictions on the test data
svc_pred = svc_clf.predict(X_test)

# Evaluate the performance
score = svc_clf.score(X_test, y_test)
print(f"Accuracy: {score}")

print(f"F1 score: {f1_score(y_test, svc_pred)}")

ConfusionMatrixDisplay.from_estimator(svc_clf, X_test, y_test)


In [None]:
# prompt: classify with KNN and K=7

from sklearn.neighbors import KNeighborsClassifier
# Initialize the KNeighborsClassifier with K=7
knn_clf = KNeighborsClassifier(n_neighbors=7)

# Fit the classifier to the training data
knn_clf.fit(X_train, y_train)

# Make predictions on the test data
knn_pred = knn_clf.predict(X_test)

# Evaluate the performance
score = knn_clf.score(X_test, y_test)
print(f"Accuracy: {score}")

print(f"F1 score: {f1_score(y_test, knn_pred)}")

ConfusionMatrixDisplay.from_estimator(knn_clf, X_test, y_test)


In [None]:
# prompt: Can you perform four fold cross validation on the KNN model?

from sklearn.model_selection import cross_val_score, KFold
# Perform four-fold cross-validation
kfold = KFold(n_splits=4, shuffle=True, random_state=42)
scores = cross_val_score(knn_clf, embeddings, labels, cv=kfold, scoring='accuracy')

# Print the cross-validation scores
print("Cross-validation scores:", scores)
print("Average cross-validation score:", scores.mean())

In [None]:
import plotly.express as px
# Initialize UMAP with desired parameters
reducer = umap.UMAP(n_components=2, random_state=42)

# Reduce the dimensionality of the features array
embedding = reducer.fit_transform(features)
import pandas as pd

# Create a DataFrame for Plotly
embedding_df = pd.DataFrame(embedding, columns=['UMAP1', 'UMAP2'])
embedding_df['label'] = labels
# Create a scatter plot
fig = px.scatter(
 embedding_df,
 x='UMAP1',
 y='UMAP2',
 color='label',
 title='UMAP Dimensionality Reduction',
 labels={'color': 'Label'}
)

# Show the plot
fig.show()

In [None]:
# prompt: Save the knn classifier as a file

import joblib

# Save the knn classifier to a file
filename = 'knn_model.pkl'
joblib.dump(knn_clf, filename)


In [None]:
# prompt: load the knn model

# Load the knn classifier from the file
filename = 'knn_model.pkl'
loaded_knn_clf = joblib.load(filename)

In [None]:
# prompt: load the validation images and apply the wavelet transforms

# Assuming 'validation_folder' contains your validation images
validation_images, validation_labels = load_images_from_folder('validation_folder')

# Extract wavelet features from validation images
validation_features = extract_wavelet_features(validation_images, ["db4", "db10"])

# Reduce dimensionality of validation features using the same UMAP reducer
validation_embeddings = reducer.transform(validation_features)

# Now you have 'validation_embeddings' and 'validation_labels' for further use
# (e.g., evaluating your trained models on validation data)


### Validation

In [None]:
!unzip Validation.zip

In [None]:
# prompt: load the validation images

# Assuming 'Validation' is the folder containing your validation images
ai_validation_images, ai_validation_labels = load_images_from_folder('Validation/AI')
photo_validation_images, photo_validation_labels = load_images_from_folder('Validation/Photo')


# Now you have 'validation_images' and 'validation_labels' for further use
print(f"Number of AI Validation images: {len(ai_validation_images)}")
print(f"Number of Photo Validation images: {len(ai_validation_images)}")

In [None]:
# prompt: Combine both validation datasets and extract the wavelet features.

# Combine validation datasets
validation_images = ai_validation_images + photo_validation_images
validation_labels = ai_validation_labels + photo_validation_labels

# Extract wavelet features from validation images
validation_features = extract_wavelet_features(validation_images, ["db4", "db10"])

In [None]:
# prompt: apply the reducer to find the validation embeddings

# Reduce dimensionality of validation features using the same UMAP reducer
validation_embeddings = reducer.transform(validation_features)

In [None]:
# prompt: find the accuracy and f1 score on the knn classifier for validation features

# Make predictions on the validation data
knn_pred_validation = knn_clf.predict(validation_embeddings)

# Evaluate the performance on validation data
score_validation = knn_clf.score(validation_embeddings, validation_labels)
print(f"Validation Accuracy: {score_validation}")

print(f"Validation F1 score: {f1_score(validation_labels, knn_pred_validation)}")


In [None]:
# prompt: Can you combine the entire pipeline into one class?

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import umap
import pywt
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score, KFold
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import plotly.express as px
import pandas as pd
import joblib
from tqdm import tqdm
import lzma

class FluxClassifier:
 def __init__(self, wavelets=["db4", "db10"], umap_n_neighbors=16, umap_n_components=32, random_state=42):
 self.wavelets = wavelets
 self.umap_n_neighbors = umap_n_neighbors
 self.umap_n_components = umap_n_components
 self.random_state = random_state
 self.reducer = umap.UMAP(n_neighbors=self.umap_n_neighbors,
 n_components=self.umap_n_components,
 random_state=self.random_state)
 self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier

 def load_images_from_folder(self, folder):
 images = []
 labels = []
 print(f"Loading images from {folder}")
 for filename in tqdm(os.listdir(folder)):
 if not (filename.endswith('.jpg') or filename.endswith('.png') or
 filename.endswith('jpeg') or filename.endswith('webp')):
 continue
 img = Image.open(os.path.join(folder, filename))
 img = img.resize((512, 512))
 if img is not None:
 images.append(img)
 labels.append(1 if "AI" in folder else 0) # Assuming folder names contain "AI" or not
 return images, labels

 def extract_wavelet_features(self, images):
 all_features = []
 for img in images:
 img_gray = img.convert('L')
 img_array = np.array(img_gray)
 features = []
 for wavelet in self.wavelets:
 cA, cD = pywt.dwt(img_array, wavelet)
 features.extend(cD.flatten())
 all_features.append(features)
 return np.array(all_features)

 def fit(self, train_folder1, train_folder2):
 # Load images and extract features
 images1, labels1 = self.load_images_from_folder(train_folder1)
 images2, labels2 = self.load_images_from_folder(train_folder2)

 min_length = min(len(images1), len(images2))
 images1 = images1[:min_length]
 images2 = images2[:min_length]
 labels1 = labels1[:min_length]
 labels2 = labels2[:min_length]

 images = images1 + images2
 labels = labels1 + labels2
 features = self.extract_wavelet_features(images)

 # Apply UMAP dimensionality reduction
 embeddings = self.reducer.fit_transform(features)
 X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)

 # Train the classifier
 self.classifier.fit(X_train, y_train)

 acc = self.classifier.score(X_test, y_test)
 y_pred = self.classifier.predict(X_test)
 print(f"Classifier accuracy = {acc}")

 f1 = f1_score(y_test, y_pred)
 print(f"Classifier F1 = {f1}")
 print(classification_report(y_test, y_pred))


 def predict(self, images):
 # Load images and extract features
 features = self.extract_wavelet_features(images)

 # Apply UMAP dimensionality reduction
 embeddings = self.reducer.transform(features)

 # Make predictions
 return self.classifier.predict(embeddings)

 def predict_proba(self, images):
 # Load images and extract features
 features = self.extract_wavelet_features(images)

 # Apply UMAP dimensionality reduction
 embeddings = self.reducer.transform(features)

 # Make predictions
 return self.classifier.predict_proba(embeddings)

 def score(self, test_folder):
 # Load images and extract features
 images, labels = self.load_images_from_folder(test_folder)
 features = self.extract_wavelet_features(images)

 # Apply UMAP dimensionality reduction
 embeddings = self.reducer.transform(features)

 # Evaluate the classifier
 return self.classifier.score(embeddings, labels)

 def save_model(self, filename):
 joblib.dump(self, filename, compress=('zlib', 9))

 @staticmethod
 def load_model(filename):
 return joblib.load(filename)

In [None]:
classifier = FluxClassifier()
classifier.fit("AI", "Photo")

In [None]:
classifier.save_model("flux_classifier.pkl")

In [None]:
# prompt: save the model to my google drive.

from google.colab import drive
drive.mount('/content/drive')
!cp flux_classifier.pkl /content/drive/MyDrive

In [None]:
images = [Image.open("pDGQUK1BYaJYhrFB5ouQU.jpeg"), Image.open("jenta2.jpeg")]
predictions = classifier.predict_proba(images)
print(predictions)