# -*- coding: utf-8 -*- """Flux Classification App.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1ckzOtXUiFW_NqlIandwoH07lnsLGKTLB """ import gradio as gr from sklearn.model_selection import train_test_split from sklearn.metrics import ( accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, ) from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA import umap import pywt import os from PIL import Image import matplotlib.pyplot as plt import numpy as np from xgboost import XGBClassifier from sklearn.model_selection import cross_val_score, KFold from sklearn.dummy import DummyClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import plotly.express as px import pandas as pd import joblib from tqdm import tqdm import lzma class FluxClassifier: def __init__( self, wavelets=["db4", "db10"], umap_n_neighbors=16, umap_n_components=32, random_state=42, ): self.wavelets = wavelets self.umap_n_neighbors = umap_n_neighbors self.umap_n_components = umap_n_components self.random_state = random_state self.reducer = umap.UMAP( n_neighbors=self.umap_n_neighbors, n_components=self.umap_n_components, random_state=self.random_state, ) self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier def load_images_from_folder(self, folder): images = [] labels = [] print(f"Loading images from {folder}") for filename in tqdm(os.listdir(folder)): if not ( filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith("jpeg") or filename.endswith("webp") ): continue img = Image.open(os.path.join(folder, filename)) img = img.resize((512, 512)) if img is not None: images.append(img) labels.append( 1 if "AI" in folder else 0 ) # Assuming folder names contain "AI" or not return images, labels def extract_wavelet_features(self, images): all_features = [] for img in images: img_gray = img.convert("L") img_array = np.array(img_gray) features = [] for wavelet in self.wavelets: cA, cD = pywt.dwt(img_array, wavelet) features.extend(cD.flatten()) all_features.append(features) return np.array(all_features) def fit(self, train_folder1, train_folder2): # Load images and extract features images1, labels1 = self.load_images_from_folder(train_folder1) images2, labels2 = self.load_images_from_folder(train_folder2) min_length = min(len(images1), len(images2)) images1 = images1[:min_length] images2 = images2[:min_length] labels1 = labels1[:min_length] labels2 = labels2[:min_length] images = images1 + images2 labels = labels1 + labels2 features = self.extract_wavelet_features(images) # Apply UMAP dimensionality reduction embeddings = self.reducer.fit_transform(features) X_train, X_test, y_train, y_test = train_test_split( embeddings, labels, test_size=0.2, random_state=42 ) # Train the classifier self.classifier.fit(X_train, y_train) acc = self.classifier.score(X_test, y_test) y_pred = self.classifier.predict(X_test) print(f"Classifier accuracy = {acc}") f1 = f1_score(y_test, y_pred) print(f"Classifier F1 = {f1}") print(classification_report(y_test, y_pred)) def predict(self, images): # Load images and extract features features = self.extract_wavelet_features(images) # Apply UMAP dimensionality reduction embeddings = self.reducer.transform(features) # Make predictions return self.classifier.predict(embeddings) def predict_proba(self, images): # Load images and extract features features = self.extract_wavelet_features(images) # Apply UMAP dimensionality reduction embeddings = self.reducer.transform(features) # Make predictions return self.classifier.predict_proba(embeddings) def score(self, test_folder): # Load images and extract features images, labels = self.load_images_from_folder(test_folder) features = self.extract_wavelet_features(images) # Apply UMAP dimensionality reduction embeddings = self.reducer.transform(features) # Evaluate the classifier return self.classifier.score(embeddings, labels) def cross_val_score(self, folder1, folder2, n_splits=5): # Load images and extract features # Load images and extract features images1, labels1 = self.load_images_from_folder(folder1) images2, labels2 = self.load_images_from_folder(folder2) min_length = min(len(images1), len(images2)) images1 = images1[:min_length] images2 = images2[:min_length] labels1 = labels1[:min_length] labels2 = labels2[:min_length] images = images1 + images2 labels = labels1 + labels2 features = self.extract_wavelet_features(images) # Apply UMAP dimensionality reduction embeddings = self.reducer.fit_transform(features) # Perform four-fold cross-validation kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42) scores = cross_val_score( self.classifier, embeddings, labels, cv=kfold, scoring="accuracy" ) # Print the cross-validation scores print("Cross-validation scores:", scores) print("Average cross-validation score:", scores.mean()) def save_model(self, filename): joblib.dump(self, filename, compress=("zlib", 9)) @staticmethod def load_model(filename): return joblib.load(filename) # Load the knn classifier from the file filename = "flux_classifier.pkl" classifier = joblib.load(filename) def classify_image(image): # apply wavelet function to image probabilities = list( classifier.predict_proba([Image.fromarray(image).resize((512, 512))]) ) labels = ["Photo", "FLUX"] return {f"{labels[i]}": prob for i, prob in enumerate(probabilities[0])} interface = gr.Interface( fn=classify_image, inputs=["image"], outputs=gr.Label(num_top_classes=2) ) interface.launch(share=True)