This commit is contained in:
Jan Kowalczyk
2025-06-10 09:31:28 +02:00
parent 3538b15073
commit 156b6d2ac1
8 changed files with 794 additions and 580 deletions

View File

@@ -24,6 +24,7 @@ from utils.visualization.plot_images_grid import plot_images_grid
[
"train",
"infer",
"ae_elbow_test", # Add new action
]
),
)
@@ -76,8 +77,8 @@ from utils.visualization.plot_images_grid import plot_images_grid
@click.option(
"--k_fold_num",
type=int,
default=5,
help="Number of folds for k-fold cross-validation (default: 5).",
default=None,
help="Number of folds for k-fold cross-validation (default: None).",
)
@click.option(
"--num_known_normal",
@@ -277,6 +278,13 @@ from utils.visualization.plot_images_grid import plot_images_grid
default=-1,
help="Number of jobs for model training.",
)
@click.option(
"--ae_elbow_dims",
type=int,
multiple=True,
default=[128, 256, 384, 512, 768, 1024],
help="List of latent space dimensions to test for autoencoder elbow analysis.",
)
def main(
action,
dataset_name,
@@ -319,6 +327,7 @@ def main(
isoforest_max_samples,
isoforest_contamination,
isoforest_n_jobs_model,
ae_elbow_dims,
):
"""
Deep SAD, a method for deep semi-supervised anomaly detection.
@@ -402,7 +411,7 @@ def main(
ratio_known_outlier,
ratio_pollution,
random_state=np.random.RandomState(cfg.settings["seed"]),
k_fold=k_fold,
k_fold_num=k_fold_num,
num_known_normal=num_known_normal,
num_known_outlier=num_known_outlier,
)
@@ -593,18 +602,35 @@ def main(
# Plot most anomalous and most normal test samples
if train_deepsad:
indices, labels, scores = zip(*deepSAD.results["test_scores"])
# Use experiment-based scores for plotting
indices, labels, scores = zip(
*deepSAD.results["test"]["exp_based"]["scores"]
)
indices, labels, scores = (
np.array(indices),
np.array(labels),
np.array(scores),
)
# Filter out samples with unknown labels (0)
valid_mask = labels != 0
indices = indices[valid_mask]
labels = labels[valid_mask]
scores = scores[valid_mask]
# Convert labels from -1/1 to 0/1 for plotting
labels = (labels == -1).astype(int) # -1 (anomaly) → 1, 1 (normal) → 0
idx_all_sorted = indices[
np.argsort(scores)
] # from lowest to highest score
idx_normal_sorted = indices[labels == 0][
np.argsort(scores[labels == 0])
] # from lowest to highest score
]
# Optionally plot manual-based results:
# indices_m, labels_m, scores_m = zip(*deepSAD.results["test"]["manual_based"]["scores"])
# ...same processing as above...
if dataset_name in (
"mnist",
@@ -745,6 +771,71 @@ def main(
logger.info(
f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
)
elif action == "ae_elbow_test":
# Load data once
dataset = load_dataset(
dataset_name,
data_path,
normal_class,
known_outlier_class,
n_known_outlier_classes,
ratio_known_normal,
ratio_known_outlier,
ratio_pollution,
random_state=np.random.RandomState(cfg.settings["seed"]),
)
# Dictionary to store results for each dimension
elbow_results = {"dimensions": list(ae_elbow_dims), "ae_results": {}}
# Test each dimension
for rep_dim in ae_elbow_dims:
logger.info(f"Testing autoencoder with latent dimension: {rep_dim}")
# Initialize DeepSAD model with current dimension
deepSAD = DeepSAD(cfg.settings["eta"])
deepSAD.set_network(
net_name, rep_dim=rep_dim
) # Pass rep_dim to network builder
# Pretrain autoencoder with current dimension
deepSAD.pretrain(
dataset,
optimizer_name=cfg.settings["ae_optimizer_name"],
lr=cfg.settings["ae_lr"],
n_epochs=cfg.settings["ae_n_epochs"],
lr_milestones=cfg.settings["ae_lr_milestone"],
batch_size=cfg.settings["ae_batch_size"],
weight_decay=cfg.settings["ae_weight_decay"],
device=device,
n_jobs_dataloader=n_jobs_dataloader,
)
# Store results for this dimension
elbow_results["ae_results"][rep_dim] = {
"train_time": deepSAD.ae.train_time,
"train_loss": deepSAD.ae.train_loss,
"test_auc": deepSAD.ae.test_auc, # if available
"test_loss": deepSAD.ae.test_loss,
"scores": deepSAD.ae.test_scores,
}
logger.info(f"Finished testing dimension {rep_dim}")
logger.info(f"Train time: {deepSAD.ae.train_time:.3f}s")
logger.info(f"Final train loss: {deepSAD.ae.train_loss[-1]:.6f}")
logger.info(f"Final test loss: {deepSAD.ae.test_loss:.6f}")
# Clear some memory
del deepSAD
torch.cuda.empty_cache()
# Save all results
results_path = Path(xp_path) / "ae_elbow_results.pkl"
with open(results_path, "wb") as f:
pickle.dump(elbow_results, f)
logger.info(f"Saved elbow test results to {results_path}")
else:
logger.error(f"Unknown action: {action}")