ae elbow work
This commit is contained in:
@@ -89,10 +89,10 @@ class DeepSAD(object):
|
||||
|
||||
self.ae_results = {"train_time": None, "test_auc": None, "test_time": None}
|
||||
|
||||
def set_network(self, net_name):
|
||||
def set_network(self, net_name, rep_dim=1024):
|
||||
"""Builds the neural network phi."""
|
||||
self.net_name = net_name
|
||||
self.net = build_network(net_name)
|
||||
self.net = build_network(net_name, rep_dim=rep_dim)
|
||||
|
||||
def train(
|
||||
self,
|
||||
@@ -256,15 +256,42 @@ class DeepSAD(object):
|
||||
)
|
||||
self.ae_net = self.ae_trainer.train(dataset, self.ae_net, k_fold_idx=k_fold_idx)
|
||||
|
||||
# Get train results
|
||||
self.ae_results["train_time"] = self.ae_trainer.train_time
|
||||
|
||||
# Test
|
||||
self.ae_trainer.test(dataset, self.ae_net, k_fold_idx=k_fold_idx)
|
||||
|
||||
# Get test results
|
||||
self.ae_results["test_auc"] = self.ae_trainer.test_auc
|
||||
self.ae_results["test_time"] = self.ae_trainer.test_time
|
||||
# Get train results
|
||||
self.ae_results = {
|
||||
"train": {
|
||||
"time": self.ae_trainer.train_time,
|
||||
"indices": self.ae_trainer.train_indices,
|
||||
"labels_exp_based": self.ae_trainer.train_labels_exp_based,
|
||||
"labels_manual_based": self.ae_trainer.train_labels_manual_based,
|
||||
"semi_targets": self.ae_trainer.train_semi_targets,
|
||||
"file_ids": self.ae_trainer.train_file_ids,
|
||||
"frame_ids": self.ae_trainer.train_frame_ids,
|
||||
"scores": self.ae_trainer.train_scores,
|
||||
"loss": self.ae_trainer.train_loss,
|
||||
"file_names": {
|
||||
file_id: dataset.get_file_name_from_idx(file_id)
|
||||
for file_id in np.unique(self.ae_trainer.train_file_ids)
|
||||
},
|
||||
},
|
||||
"test": {
|
||||
"time": self.ae_trainer.test_time,
|
||||
"indices": self.ae_trainer.test_indices,
|
||||
"labels_exp_based": self.ae_trainer.test_labels_exp_based,
|
||||
"labels_manual_based": self.ae_trainer.test_labels_manual_based,
|
||||
"semi_targets": self.ae_trainer.test_semi_targets,
|
||||
"file_ids": self.ae_trainer.test_file_ids,
|
||||
"frame_ids": self.ae_trainer.test_frame_ids,
|
||||
"scores": self.ae_trainer.test_scores,
|
||||
"loss": self.ae_trainer.test_loss,
|
||||
"file_names": {
|
||||
file_id: dataset.get_file_name_from_idx(file_id)
|
||||
for file_id in np.unique(self.ae_trainer.test_file_ids)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Initialize Deep SAD network weights from pre-trained encoder
|
||||
self.init_network_weights_from_pretraining()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import pickle
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
@@ -278,13 +279,6 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
||||
default=-1,
|
||||
help="Number of jobs for model training.",
|
||||
)
|
||||
@click.option(
|
||||
"--ae_elbow_dims",
|
||||
type=int,
|
||||
multiple=True,
|
||||
default=[128, 256, 384, 512, 768, 1024],
|
||||
help="List of latent space dimensions to test for autoencoder elbow analysis.",
|
||||
)
|
||||
def main(
|
||||
action,
|
||||
dataset_name,
|
||||
@@ -327,7 +321,6 @@ def main(
|
||||
isoforest_max_samples,
|
||||
isoforest_contamination,
|
||||
isoforest_n_jobs_model,
|
||||
ae_elbow_dims,
|
||||
):
|
||||
"""
|
||||
Deep SAD, a method for deep semi-supervised anomaly detection.
|
||||
@@ -786,6 +779,8 @@ def main(
|
||||
)
|
||||
|
||||
# Dictionary to store results for each dimension
|
||||
# ae_elbow_dims = [32, 64, 128, 256, 384, 512, 768, 1024]
|
||||
ae_elbow_dims = [32, 64]
|
||||
elbow_results = {"dimensions": list(ae_elbow_dims), "ae_results": {}}
|
||||
|
||||
# Test each dimension
|
||||
@@ -812,25 +807,16 @@ def main(
|
||||
)
|
||||
|
||||
# Store results for this dimension
|
||||
elbow_results["ae_results"][rep_dim] = {
|
||||
"train_time": deepSAD.ae.train_time,
|
||||
"train_loss": deepSAD.ae.train_loss,
|
||||
"test_auc": deepSAD.ae.test_auc, # if available
|
||||
"test_loss": deepSAD.ae.test_loss,
|
||||
"scores": deepSAD.ae.test_scores,
|
||||
}
|
||||
elbow_results["ae_results"][rep_dim] = deepSAD.ae_results
|
||||
|
||||
logger.info(f"Finished testing dimension {rep_dim}")
|
||||
logger.info(f"Train time: {deepSAD.ae.train_time:.3f}s")
|
||||
logger.info(f"Final train loss: {deepSAD.ae.train_loss[-1]:.6f}")
|
||||
logger.info(f"Final test loss: {deepSAD.ae.test_loss:.6f}")
|
||||
|
||||
# Clear some memory
|
||||
del deepSAD
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
# Save all results
|
||||
results_path = Path(xp_path) / "ae_elbow_results.pkl"
|
||||
results_path = Path(xp_path) / f"ae_elbow_results_{net_name}.pkl"
|
||||
with open(results_path, "wb") as f:
|
||||
pickle.dump(elbow_results, f)
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from .subter_LeNet_Split import SubTer_LeNet_Split, SubTer_LeNet_Split_Autoencod
|
||||
from .vae import VariationalAutoencoder
|
||||
|
||||
|
||||
def build_network(net_name, ae_net=None):
|
||||
def build_network(net_name, ae_net=None, rep_dim=1024):
|
||||
"""Builds the neural network."""
|
||||
|
||||
implemented_networks = (
|
||||
@@ -46,7 +46,7 @@ def build_network(net_name, ae_net=None):
|
||||
net = MNIST_LeNet()
|
||||
|
||||
if net_name == "subter_LeNet":
|
||||
net = SubTer_LeNet()
|
||||
net = SubTer_LeNet(rep_dim=rep_dim)
|
||||
|
||||
if net_name == "subter_LeNet_Split":
|
||||
net = SubTer_LeNet_Split()
|
||||
|
||||
@@ -72,34 +72,45 @@ class AETrainer(BaseTrainer):
|
||||
optimizer, milestones=self.lr_milestones, gamma=0.1
|
||||
)
|
||||
|
||||
# Prepare containers for results (similar to test)
|
||||
all_indices = []
|
||||
all_labels_exp_based = []
|
||||
all_labels_manual_based = []
|
||||
all_semi_targets = []
|
||||
all_file_ids = []
|
||||
all_frame_ids = []
|
||||
all_scores = []
|
||||
|
||||
# Training
|
||||
logger.info("Starting pretraining...")
|
||||
start_time = time.time()
|
||||
ae_net.train()
|
||||
|
||||
all_training_data = []
|
||||
for epoch in range(self.n_epochs):
|
||||
epoch_loss = 0.0
|
||||
n_batches = 0
|
||||
epoch_start_time = time.time()
|
||||
for data in train_loader:
|
||||
inputs, _, _, _, _, file_frame_ids = data
|
||||
inputs = inputs.to(self.device)
|
||||
all_training_data.append(
|
||||
np.dstack(
|
||||
(
|
||||
file_frame_ids[0].detach().cpu().numpy(),
|
||||
file_frame_ids[1].detach().cpu().numpy(),
|
||||
)
|
||||
)
|
||||
inputs,
|
||||
labels_exp_based,
|
||||
labels_manual_based,
|
||||
semi_targets,
|
||||
idx,
|
||||
(file_id, frame_id),
|
||||
) = data
|
||||
inputs, idx = (
|
||||
inputs.to(self.device),
|
||||
idx.to(self.device),
|
||||
)
|
||||
|
||||
# Zero the network parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Update network parameters via backpropagation: forward + backward + optimize
|
||||
# Forward + backward + optimize
|
||||
rec = ae_net(inputs)
|
||||
rec_loss = criterion(rec, inputs)
|
||||
scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))
|
||||
loss = torch.mean(rec_loss)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
@@ -107,6 +118,17 @@ class AETrainer(BaseTrainer):
|
||||
epoch_loss += loss.item()
|
||||
n_batches += 1
|
||||
|
||||
# Save all relevant information from dataloader
|
||||
all_indices.extend(idx.detach().cpu().numpy())
|
||||
all_labels_exp_based.extend(labels_exp_based.detach().cpu().numpy())
|
||||
all_labels_manual_based.extend(
|
||||
labels_manual_based.detach().cpu().numpy()
|
||||
)
|
||||
all_semi_targets.extend(semi_targets.detach().cpu().numpy())
|
||||
all_file_ids.extend(file_id.detach().cpu().numpy())
|
||||
all_frame_ids.extend(frame_id.detach().cpu().numpy())
|
||||
all_scores.extend(scores.detach().cpu().numpy())
|
||||
|
||||
scheduler.step()
|
||||
if epoch in self.lr_milestones:
|
||||
logger.info(
|
||||
@@ -122,14 +144,18 @@ class AETrainer(BaseTrainer):
|
||||
)
|
||||
|
||||
self.train_time = time.time() - start_time
|
||||
|
||||
# Save all results as member variables (like in test)
|
||||
self.train_indices = np.array(all_indices)
|
||||
self.train_labels_exp_based = np.array(all_labels_exp_based)
|
||||
self.train_labels_manual_based = np.array(all_labels_manual_based)
|
||||
self.train_semi_targets = np.array(all_semi_targets)
|
||||
self.train_file_ids = np.array(all_file_ids)
|
||||
self.train_frame_ids = np.array(all_frame_ids)
|
||||
self.train_scores = np.array(all_scores)
|
||||
self.train_loss = epoch_loss / n_batches if n_batches > 0 else float("nan")
|
||||
|
||||
logger.info("Pretraining Time: {:.3f}s".format(self.train_time))
|
||||
|
||||
all_training_data = np.concatenate([x.squeeze() for x in all_training_data])
|
||||
|
||||
sorted_training_data = all_training_data[
|
||||
np.lexsort((all_training_data[:, 1], all_training_data[:, 0]))
|
||||
]
|
||||
|
||||
logger.info("Finished pretraining.")
|
||||
|
||||
return ae_net
|
||||
@@ -156,65 +182,70 @@ class AETrainer(BaseTrainer):
|
||||
ae_net = ae_net.to(self.device)
|
||||
criterion = criterion.to(self.device)
|
||||
|
||||
# Prepare containers for results
|
||||
all_indices = []
|
||||
all_labels_exp_based = []
|
||||
all_labels_manual_based = []
|
||||
all_semi_targets = []
|
||||
all_file_ids = []
|
||||
all_frame_ids = []
|
||||
all_scores = []
|
||||
|
||||
# Testing
|
||||
logger.info("Testing autoencoder...")
|
||||
epoch_loss = 0.0
|
||||
n_batches = 0
|
||||
start_time = time.time()
|
||||
idx_label_score = []
|
||||
ae_net.eval()
|
||||
all_training_data = []
|
||||
with torch.no_grad():
|
||||
for data in test_loader:
|
||||
inputs, labels, _, _, idx, file_frame_ids = data
|
||||
inputs, labels, idx = (
|
||||
inputs.to(self.device),
|
||||
labels.to(self.device),
|
||||
idx.to(self.device),
|
||||
)
|
||||
|
||||
all_training_data.append(
|
||||
np.dstack(
|
||||
(
|
||||
file_frame_ids[0].detach().cpu().numpy(),
|
||||
file_frame_ids[1].detach().cpu().numpy(),
|
||||
)
|
||||
)
|
||||
inputs,
|
||||
labels_exp_based,
|
||||
labels_manual_based,
|
||||
semi_targets,
|
||||
idx,
|
||||
(file_id, frame_id),
|
||||
) = data
|
||||
inputs, idx = (
|
||||
inputs.to(self.device),
|
||||
idx.to(self.device),
|
||||
)
|
||||
|
||||
rec = ae_net(inputs)
|
||||
rec_loss = criterion(rec, inputs)
|
||||
scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))
|
||||
|
||||
# Save triple of (idx, label, score) in a list
|
||||
idx_label_score += list(
|
||||
zip(
|
||||
idx.cpu().data.numpy().tolist(),
|
||||
labels.cpu().data.numpy().tolist(),
|
||||
scores.cpu().data.numpy().tolist(),
|
||||
)
|
||||
)
|
||||
|
||||
loss = torch.mean(rec_loss)
|
||||
epoch_loss += loss.item()
|
||||
n_batches += 1
|
||||
|
||||
# Save all relevant information from dataloader
|
||||
all_indices.extend(idx.detach().cpu().numpy())
|
||||
all_labels_exp_based.extend(labels_exp_based.detach().cpu().numpy())
|
||||
all_labels_manual_based.extend(
|
||||
labels_manual_based.detach().cpu().numpy()
|
||||
)
|
||||
all_semi_targets.extend(semi_targets.detach().cpu().numpy())
|
||||
all_file_ids.extend(file_id.detach().cpu().numpy())
|
||||
all_frame_ids.extend(frame_id.detach().cpu().numpy())
|
||||
all_scores.extend(scores.detach().cpu().numpy())
|
||||
|
||||
self.test_time = time.time() - start_time
|
||||
|
||||
all_training_data = np.concatenate([x.squeeze() for x in all_training_data])
|
||||
# Save all results as member variables
|
||||
self.test_indices = np.array(all_indices)
|
||||
self.test_labels_exp_based = np.array(all_labels_exp_based)
|
||||
self.test_labels_manual_based = np.array(all_labels_manual_based)
|
||||
self.test_semi_targets = np.array(all_semi_targets)
|
||||
self.test_file_ids = np.array(all_file_ids)
|
||||
self.test_frame_ids = np.array(all_frame_ids)
|
||||
self.test_scores = np.array(all_scores)
|
||||
|
||||
sorted_training_data = all_training_data[
|
||||
np.lexsort((all_training_data[:, 1], all_training_data[:, 0]))
|
||||
]
|
||||
|
||||
# Compute AUC
|
||||
_, labels, scores = zip(*idx_label_score)
|
||||
labels = np.array(labels)
|
||||
scores = np.array(scores)
|
||||
self.test_auc = roc_auc_score(labels, scores)
|
||||
# No performance metric is calculated using labels, as this is pre-training
|
||||
self.test_loss = epoch_loss / n_batches if n_batches > 0 else float("nan")
|
||||
|
||||
# Log results
|
||||
logger.info("Test Loss: {:.6f}".format(epoch_loss / n_batches))
|
||||
logger.info("Test AUC: {:.2f}%".format(100.0 * self.test_auc))
|
||||
logger.info("Test Loss: {:.6f}".format(self.test_loss))
|
||||
logger.info("Test Time: {:.3f}s".format(self.test_time))
|
||||
logger.info("Finished testing autoencoder.")
|
||||
|
||||
Reference in New Issue
Block a user