Flux_Image_Detector / flux_classifier_app.py
nhradek's picture
Upload folder using huggingface_hub
92055e5 verified
raw
history blame
No virus
6.9 kB
# -*- coding: utf-8 -*-
"""Flux Classification App.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1ckzOtXUiFW_NqlIandwoH07lnsLGKTLB
"""
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
accuracy_score,
f1_score,
confusion_matrix,
ConfusionMatrixDisplay,
)
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import umap
import pywt
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score, KFold
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import plotly.express as px
import pandas as pd
import joblib
from tqdm import tqdm
import lzma
class FluxClassifier:
def __init__(
self,
wavelets=["db4", "db10"],
umap_n_neighbors=16,
umap_n_components=32,
random_state=42,
):
self.wavelets = wavelets
self.umap_n_neighbors = umap_n_neighbors
self.umap_n_components = umap_n_components
self.random_state = random_state
self.reducer = umap.UMAP(
n_neighbors=self.umap_n_neighbors,
n_components=self.umap_n_components,
random_state=self.random_state,
)
self.classifier = KNeighborsClassifier(n_neighbors=7) # Default classifier
def load_images_from_folder(self, folder):
images = []
labels = []
print(f"Loading images from {folder}")
for filename in tqdm(os.listdir(folder)):
if not (
filename.endswith(".jpg")
or filename.endswith(".png")
or filename.endswith("jpeg")
or filename.endswith("webp")
):
continue
img = Image.open(os.path.join(folder, filename))
img = img.resize((512, 512))
if img is not None:
images.append(img)
labels.append(
1 if "AI" in folder else 0
) # Assuming folder names contain "AI" or not
return images, labels
def extract_wavelet_features(self, images):
all_features = []
for img in images:
img_gray = img.convert("L")
img_array = np.array(img_gray)
features = []
for wavelet in self.wavelets:
cA, cD = pywt.dwt(img_array, wavelet)
features.extend(cD.flatten())
all_features.append(features)
return np.array(all_features)
def fit(self, train_folder1, train_folder2):
# Load images and extract features
images1, labels1 = self.load_images_from_folder(train_folder1)
images2, labels2 = self.load_images_from_folder(train_folder2)
min_length = min(len(images1), len(images2))
images1 = images1[:min_length]
images2 = images2[:min_length]
labels1 = labels1[:min_length]
labels2 = labels2[:min_length]
images = images1 + images2
labels = labels1 + labels2
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(
embeddings, labels, test_size=0.2, random_state=42
)
# Train the classifier
self.classifier.fit(X_train, y_train)
acc = self.classifier.score(X_test, y_test)
y_pred = self.classifier.predict(X_test)
print(f"Classifier accuracy = {acc}")
f1 = f1_score(y_test, y_pred)
print(f"Classifier F1 = {f1}")
print(classification_report(y_test, y_pred))
def predict(self, images):
# Load images and extract features
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.transform(features)
# Make predictions
return self.classifier.predict(embeddings)
def predict_proba(self, images):
# Load images and extract features
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.transform(features)
# Make predictions
return self.classifier.predict_proba(embeddings)
def score(self, test_folder):
# Load images and extract features
images, labels = self.load_images_from_folder(test_folder)
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.transform(features)
# Evaluate the classifier
return self.classifier.score(embeddings, labels)
def cross_val_score(self, folder1, folder2, n_splits=5):
# Load images and extract features
# Load images and extract features
images1, labels1 = self.load_images_from_folder(folder1)
images2, labels2 = self.load_images_from_folder(folder2)
min_length = min(len(images1), len(images2))
images1 = images1[:min_length]
images2 = images2[:min_length]
labels1 = labels1[:min_length]
labels2 = labels2[:min_length]
images = images1 + images2
labels = labels1 + labels2
features = self.extract_wavelet_features(images)
# Apply UMAP dimensionality reduction
embeddings = self.reducer.fit_transform(features)
# Perform four-fold cross-validation
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
scores = cross_val_score(
self.classifier, embeddings, labels, cv=kfold, scoring="accuracy"
)
# Print the cross-validation scores
print("Cross-validation scores:", scores)
print("Average cross-validation score:", scores.mean())
def save_model(self, filename):
joblib.dump(self, filename, compress=("zlib", 9))
@staticmethod
def load_model(filename):
return joblib.load(filename)
# Load the knn classifier from the file
filename = "flux_classifier.pkl"
classifier = joblib.load(filename)
def classify_image(image):
# apply wavelet function to image
probabilities = list(
classifier.predict_proba([Image.fromarray(image).resize((512, 512))])
)
labels = ["Photo", "FLUX"]
return {f"{labels[i]}": prob for i, prob in enumerate(probabilities[0])}
interface = gr.Interface(
fn=classify_image, inputs=["image"], outputs=gr.Label(num_top_classes=2)
)
interface.launch(share=True)