tool updates

This commit is contained in:
Jan Kowalczyk
2025-08-13 14:15:15 +02:00
parent 44da3c2bd9
commit 8a5adc6360
3 changed files with 195 additions and 68 deletions

View File

@@ -1,11 +1,15 @@
from pathlib import Path
import torch import torch
import torch.onnx import torch.onnx
from networks.mnist_LeNet import MNIST_LeNet_Autoencoder
from networks.subter_LeNet import SubTer_LeNet_Autoencoder
from networks.subter_LeNet_rf import SubTer_Efficient_AE
def export_model_to_onnx(model, filepath, input_shape=(1, 1, 28, 28)): def export_model_to_onnx(model, filepath):
model.eval() # Set the model to evaluation mode model.eval() # Set the model to evaluation mode
dummy_input = torch.randn(input_shape) # Create a dummy input tensor dummy_input = torch.randn(model.input_dim) # Create a dummy input tensor
torch.onnx.export( torch.onnx.export(
model, # model being run model, # model being run
dummy_input, # model input (or a tuple for multiple inputs) dummy_input, # model input (or a tuple for multiple inputs)
@@ -23,13 +27,17 @@ def export_model_to_onnx(model, filepath, input_shape=(1, 1, 28, 28)):
if __name__ == "__main__": if __name__ == "__main__":
# Initialize the autoencoder model output_folder_path = Path("./onnx_models")
autoencoder = MNIST_LeNet_Autoencoder(rep_dim=32) output_folder_path.mkdir(parents=True, exist_ok=True)
# Define the file path where the ONNX model will be saved models_to_visualize = [
onnx_file_path = "mnist_lenet_autoencoder.onnx" (
SubTer_LeNet_Autoencoder(rep_dim=32),
output_folder_path / "subter_lenet_ae.onnx",
),
(SubTer_Efficient_AE(rep_dim=32), output_folder_path / "subter_ef_ae.onnx"),
]
# Export the model for model, output_path in models_to_visualize:
export_model_to_onnx(autoencoder, onnx_file_path) export_model_to_onnx(model, output_path)
print(f"Model has been exported to {output_path}")
print(f"Model has been exported to {onnx_file_path}")

3
tools/.gitignore vendored
View File

@@ -7,4 +7,7 @@ tmp
.envrc .envrc
.vscode .vscode
test test
*.jpg
*.jpeg
*.png

View File

@@ -1,13 +1,18 @@
import json
import pickle import pickle
from pathlib import Path from pathlib import Path
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from rich.progress import track
from scipy.stats import sem, t from scipy.stats import sem, t
from sklearn.metrics import auc
models = ["deepsad", "isoforest", "ocsvm"]
evaluation_types = ["exp_based", "manual_based"]
parent_results_path = Path("/home/fedex/mt/results/done")
base_output_path = Path("/home/fedex/mt/results/tmp_plots")
# Confidence interval function
def confidence_interval(data, confidence=0.95): def confidence_interval(data, confidence=0.95):
n = len(data) n = len(data)
mean = np.mean(data) mean = np.mean(data)
@@ -16,67 +21,178 @@ def confidence_interval(data, confidence=0.95):
return mean, h return mean, h
# Load ROC and AUC values from pickle files def load_results_data(folder):
roc_data = [] experiment_data = {}
auc_scores = []
isoforest_roc_data = []
isoforest_auc_scores = []
results_path = Path( json_config_path = folder / "config.json"
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_0_0" with json_config_path.open("r") as f:
config = json.load(f)
try:
net = config["net_name"]
num_known_normal, num_known_anomalous = (
config["num_known_normal"],
config["num_known_outlier"],
) )
semi_known_nums = (num_known_normal, num_known_anomalous)
latent_dim = config["latent_space_dim"]
for i in range(5): exp_title = f"{net} - {num_known_normal} normal, {num_known_anomalous} anomalous, latent dim {latent_dim}"
with (results_path / f"results_{i}.pkl").open("rb") as f:
if not config["k_fold"]:
raise ValueError(f"{folder.name} was not trained as k-fold. Exiting...")
k_fold_num = config["k_fold_num"]
except KeyError as e:
print(f"Missing key in config.json for experiment folder {folder.name}: {e}")
raise
experiment_data["exp_title"] = exp_title
experiment_data["k_fold_num"] = k_fold_num
experiment_data["semi_known_nums"] = semi_known_nums
experiment_data["folder"] = folder
experiment_data["net"] = net
experiment_data["latent_dim"] = latent_dim
roc_data = {}
roc_auc_data = {}
prc_data = {}
for model in models:
# You can adjust the number of folds if needed
for fold_idx in range(k_fold_num):
results_file = folder / f"results_{model}_{fold_idx}.pkl"
if not results_file.exists():
print(
f"Expected results file {results_file.name} does not exist. Skipping..."
)
with results_file.open("rb") as f:
data = pickle.load(f) data = pickle.load(f)
roc_data.append(data["test_roc"]) try:
auc_scores.append(data["test_auc"]) if model == "deepsad":
with (results_path / f"results.isoforest_{i}.pkl").open("rb") as f: test_results = data["test"]
data = pickle.load(f) for evaluation_type in evaluation_types:
isoforest_roc_data.append(data["test_roc"]) eval_type_results = test_results[evaluation_type]
isoforest_auc_scores.append(data["test_auc"]) roc_data.setdefault(model, {}).setdefault(
evaluation_type, {}
)[fold_idx] = eval_type_results["roc"]
roc_auc_data.setdefault(model, {}).setdefault(
evaluation_type, {}
)[fold_idx] = eval_type_results["auc"]
prc_data.setdefault(model, {}).setdefault(
evaluation_type, {}
)[fold_idx] = eval_type_results["prc"]
elif model in ["isoforest", "ocsvm"]:
for evaluation_type in evaluation_types:
roc_data.setdefault(model, {}).setdefault(
evaluation_type, {}
)[fold_idx] = data[f"test_roc_{evaluation_type}"]
roc_auc_data.setdefault(model, {}).setdefault(
evaluation_type, {}
)[fold_idx] = data[f"test_auc_{evaluation_type}"]
prc_data.setdefault(model, {}).setdefault(
evaluation_type, {}
)[fold_idx] = data[f"test_prc_{evaluation_type}"]
# Calculate mean and confidence interval for AUC scores except KeyError as e:
mean_auc, auc_ci = confidence_interval(auc_scores) print(f"Missing key in results file {results_file.name}: {e}")
raise
# Combine ROC curves experiment_data["roc_data"] = roc_data
experiment_data["roc_auc_data"] = roc_auc_data
experiment_data["prc_data"] = prc_data
return experiment_data
def plot_roc_curve(experiment_data, output_path):
try:
k_fold_num = experiment_data["k_fold_num"]
roc_data = experiment_data["roc_data"]
roc_auc_data = experiment_data["roc_auc_data"]
folder = experiment_data["folder"]
exp_title = experiment_data["exp_title"]
except KeyError as e:
print(f"Missing key in experiment data: {e}")
raise
for evaluation_type in evaluation_types:
plt.figure(figsize=(8, 6))
for model in models:
# Gather all folds' ROC data for this model and evaluation_type
fold_rocs = []
auc_scores = []
for fold_idx in range(k_fold_num):
try:
fpr, tpr, thresholds = roc_data[model][evaluation_type][fold_idx]
fold_rocs.append((fpr, tpr))
auc_scores.append(roc_auc_data[model][evaluation_type][fold_idx])
except KeyError:
continue
if not fold_rocs:
print(
f"No ROC data for model {model}, evaluation {evaluation_type} in {folder.name}"
)
continue
# Interpolate TPRs to a common FPR grid
mean_fpr = np.linspace(0, 1, 100) mean_fpr = np.linspace(0, 1, 100)
tprs = [] interp_tprs = []
for fpr, tpr in fold_rocs:
for fpr, tpr, _ in roc_data:
interp_tpr = np.interp(mean_fpr, fpr, tpr) interp_tpr = np.interp(mean_fpr, fpr, tpr)
interp_tpr[0] = 0.0 interp_tpr[0] = 0.0
tprs.append(interp_tpr) interp_tprs.append(interp_tpr)
mean_tpr = np.mean(interp_tprs, axis=0)
mean_tpr = np.mean(tprs, axis=0) std_tpr = np.std(interp_tprs, axis=0)
mean_tpr[-1] = 1.0 mean_tpr[-1] = 1.0
std_tpr = np.std(tprs, axis=0)
# Plot ROC curves with confidence margins # Mean and CI for AUC
plt.figure() mean_auc, auc_ci = confidence_interval(auc_scores)
# Plot mean ROC and std band
plt.plot( plt.plot(
mean_fpr, mean_fpr,
mean_tpr, mean_tpr,
color="b", label=f"{model} (AUC={mean_auc:.2f}±{auc_ci:.2f})",
label=f"Mean ROC (AUC = {mean_auc:.2f} ± {auc_ci:.2f})",
) )
plt.fill_between( plt.fill_between(
mean_fpr, mean_fpr,
mean_tpr - std_tpr, mean_tpr - std_tpr,
mean_tpr + std_tpr, mean_tpr + std_tpr,
color="b", alpha=0.15,
alpha=0.2,
label="± 1 std. dev.",
) )
# Plot each fold's ROC curve (optional)
for i, (fpr, tpr, _) in enumerate(roc_data):
plt.plot(fpr, tpr, lw=1, alpha=0.3, label=f"Fold {i + 1} ROC")
# Labels and legend
plt.plot([0, 1], [0, 1], "k--", label="Chance") plt.plot([0, 1], [0, 1], "k--", label="Chance")
plt.xlabel("False Positive Rate") plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate") plt.ylabel("True Positive Rate")
plt.title("ROC Curve with 5-Fold Cross-Validation") plt.title(f"ROC Curve ({exp_title} - {evaluation_type})")
plt.legend(loc="lower right") plt.legend(loc="lower right")
plt.savefig("roc_curve_0_0.png") plt.tight_layout()
plt.savefig(
(output_path / f"roc_curve_{folder.name}_{evaluation_type}.png").as_posix()
)
plt.close()
def main():
base_output_path.mkdir(exist_ok=True, parents=True)
# Find all subfolders (skip files)
subfolders = [f for f in parent_results_path.iterdir() if f.is_dir()]
print(f"Found {len(subfolders)} subfolders in {parent_results_path}")
all_experiments_data = []
for folder in track(
subfolders, description="[cyan]Loading data...", total=len(subfolders)
):
all_experiments_data.append(load_results_data(folder))
print("Data loading complete. Plotting ROC curves...")
roc_curves_output_path = base_output_path / "roc_curves"
roc_curves_output_path.mkdir(exist_ok=True, parents=True)
for experiment_data in track(
all_experiments_data,
description="[green]Plotting ROC curves...",
total=len(all_experiments_data),
):
plot_roc_curve(experiment_data, roc_curves_output_path)
if __name__ == "__main__":
main()