wip
This commit is contained in:
@@ -24,6 +24,7 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
||||
[
|
||||
"train",
|
||||
"infer",
|
||||
"ae_elbow_test", # Add new action
|
||||
]
|
||||
),
|
||||
)
|
||||
@@ -76,8 +77,8 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
||||
@click.option(
|
||||
"--k_fold_num",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of folds for k-fold cross-validation (default: 5).",
|
||||
default=None,
|
||||
help="Number of folds for k-fold cross-validation (default: None).",
|
||||
)
|
||||
@click.option(
|
||||
"--num_known_normal",
|
||||
@@ -277,6 +278,13 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
||||
default=-1,
|
||||
help="Number of jobs for model training.",
|
||||
)
|
||||
@click.option(
|
||||
"--ae_elbow_dims",
|
||||
type=int,
|
||||
multiple=True,
|
||||
default=[128, 256, 384, 512, 768, 1024],
|
||||
help="List of latent space dimensions to test for autoencoder elbow analysis.",
|
||||
)
|
||||
def main(
|
||||
action,
|
||||
dataset_name,
|
||||
@@ -319,6 +327,7 @@ def main(
|
||||
isoforest_max_samples,
|
||||
isoforest_contamination,
|
||||
isoforest_n_jobs_model,
|
||||
ae_elbow_dims,
|
||||
):
|
||||
"""
|
||||
Deep SAD, a method for deep semi-supervised anomaly detection.
|
||||
@@ -402,7 +411,7 @@ def main(
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
random_state=np.random.RandomState(cfg.settings["seed"]),
|
||||
k_fold=k_fold,
|
||||
k_fold_num=k_fold_num,
|
||||
num_known_normal=num_known_normal,
|
||||
num_known_outlier=num_known_outlier,
|
||||
)
|
||||
@@ -593,18 +602,35 @@ def main(
|
||||
|
||||
# Plot most anomalous and most normal test samples
|
||||
if train_deepsad:
|
||||
indices, labels, scores = zip(*deepSAD.results["test_scores"])
|
||||
# Use experiment-based scores for plotting
|
||||
indices, labels, scores = zip(
|
||||
*deepSAD.results["test"]["exp_based"]["scores"]
|
||||
)
|
||||
indices, labels, scores = (
|
||||
np.array(indices),
|
||||
np.array(labels),
|
||||
np.array(scores),
|
||||
)
|
||||
|
||||
# Filter out samples with unknown labels (0)
|
||||
valid_mask = labels != 0
|
||||
indices = indices[valid_mask]
|
||||
labels = labels[valid_mask]
|
||||
scores = scores[valid_mask]
|
||||
|
||||
# Convert labels from -1/1 to 0/1 for plotting
|
||||
labels = (labels == -1).astype(int) # -1 (anomaly) → 1, 1 (normal) → 0
|
||||
|
||||
idx_all_sorted = indices[
|
||||
np.argsort(scores)
|
||||
] # from lowest to highest score
|
||||
idx_normal_sorted = indices[labels == 0][
|
||||
np.argsort(scores[labels == 0])
|
||||
] # from lowest to highest score
|
||||
]
|
||||
|
||||
# Optionally plot manual-based results:
|
||||
# indices_m, labels_m, scores_m = zip(*deepSAD.results["test"]["manual_based"]["scores"])
|
||||
# ...same processing as above...
|
||||
|
||||
if dataset_name in (
|
||||
"mnist",
|
||||
@@ -745,6 +771,71 @@ def main(
|
||||
logger.info(
|
||||
f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
|
||||
)
|
||||
elif action == "ae_elbow_test":
|
||||
# Load data once
|
||||
dataset = load_dataset(
|
||||
dataset_name,
|
||||
data_path,
|
||||
normal_class,
|
||||
known_outlier_class,
|
||||
n_known_outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
random_state=np.random.RandomState(cfg.settings["seed"]),
|
||||
)
|
||||
|
||||
# Dictionary to store results for each dimension
|
||||
elbow_results = {"dimensions": list(ae_elbow_dims), "ae_results": {}}
|
||||
|
||||
# Test each dimension
|
||||
for rep_dim in ae_elbow_dims:
|
||||
logger.info(f"Testing autoencoder with latent dimension: {rep_dim}")
|
||||
|
||||
# Initialize DeepSAD model with current dimension
|
||||
deepSAD = DeepSAD(cfg.settings["eta"])
|
||||
deepSAD.set_network(
|
||||
net_name, rep_dim=rep_dim
|
||||
) # Pass rep_dim to network builder
|
||||
|
||||
# Pretrain autoencoder with current dimension
|
||||
deepSAD.pretrain(
|
||||
dataset,
|
||||
optimizer_name=cfg.settings["ae_optimizer_name"],
|
||||
lr=cfg.settings["ae_lr"],
|
||||
n_epochs=cfg.settings["ae_n_epochs"],
|
||||
lr_milestones=cfg.settings["ae_lr_milestone"],
|
||||
batch_size=cfg.settings["ae_batch_size"],
|
||||
weight_decay=cfg.settings["ae_weight_decay"],
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
)
|
||||
|
||||
# Store results for this dimension
|
||||
elbow_results["ae_results"][rep_dim] = {
|
||||
"train_time": deepSAD.ae.train_time,
|
||||
"train_loss": deepSAD.ae.train_loss,
|
||||
"test_auc": deepSAD.ae.test_auc, # if available
|
||||
"test_loss": deepSAD.ae.test_loss,
|
||||
"scores": deepSAD.ae.test_scores,
|
||||
}
|
||||
|
||||
logger.info(f"Finished testing dimension {rep_dim}")
|
||||
logger.info(f"Train time: {deepSAD.ae.train_time:.3f}s")
|
||||
logger.info(f"Final train loss: {deepSAD.ae.train_loss[-1]:.6f}")
|
||||
logger.info(f"Final test loss: {deepSAD.ae.test_loss:.6f}")
|
||||
|
||||
# Clear some memory
|
||||
del deepSAD
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
# Save all results
|
||||
results_path = Path(xp_path) / "ae_elbow_results.pkl"
|
||||
with open(results_path, "wb") as f:
|
||||
pickle.dump(elbow_results, f)
|
||||
|
||||
logger.info(f"Saved elbow test results to {results_path}")
|
||||
|
||||
else:
|
||||
logger.error(f"Unknown action: {action}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user