full upload so not to lose anything important
This commit is contained in:
1
Deep-SAD-PyTorch/.gitignore
vendored
1
Deep-SAD-PyTorch/.gitignore
vendored
@@ -4,3 +4,4 @@
|
|||||||
**/__pycache__/
|
**/__pycache__/
|
||||||
data
|
data
|
||||||
log
|
log
|
||||||
|
infer
|
||||||
|
|||||||
@@ -26,6 +26,17 @@
|
|||||||
config.allowUnfree = true;
|
config.allowUnfree = true;
|
||||||
config.cudaSupport = true;
|
config.cudaSupport = true;
|
||||||
};
|
};
|
||||||
|
thundersvm = import ./nix/thundersvm.nix {
|
||||||
|
inherit pkgs;
|
||||||
|
inherit (pkgs) fetchFromGitHub cmake gcc12Stdenv;
|
||||||
|
cudaPackages = pkgs.cudaPackages;
|
||||||
|
};
|
||||||
|
|
||||||
|
thundersvm-python = import ./nix/thundersvm-python.nix {
|
||||||
|
inherit pkgs;
|
||||||
|
pythonPackages = pkgs.python311Packages;
|
||||||
|
thundersvm = thundersvm;
|
||||||
|
};
|
||||||
inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }) mkPoetryApplication;
|
inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }) mkPoetryApplication;
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
@@ -43,6 +54,7 @@
|
|||||||
buildInputs = with pkgs.python311Packages; [
|
buildInputs = with pkgs.python311Packages; [
|
||||||
torch-bin
|
torch-bin
|
||||||
torchvision-bin
|
torchvision-bin
|
||||||
|
thundersvm-python
|
||||||
];
|
];
|
||||||
#LD_LIBRARY_PATH = with pkgs; lib.makeLibraryPath [
|
#LD_LIBRARY_PATH = with pkgs; lib.makeLibraryPath [
|
||||||
#pkgs.stdenv.cc.cc
|
#pkgs.stdenv.cc.cc
|
||||||
|
|||||||
48
Deep-SAD-PyTorch/nix/thundersvm-python.nix
Normal file
48
Deep-SAD-PyTorch/nix/thundersvm-python.nix
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
{ pkgs
|
||||||
|
, pythonPackages
|
||||||
|
, thundersvm
|
||||||
|
, ...
|
||||||
|
}:
|
||||||
|
|
||||||
|
pythonPackages.buildPythonPackage rec {
|
||||||
|
pname = "thundersvm-python";
|
||||||
|
version = "0.3.4";
|
||||||
|
|
||||||
|
src = pkgs.fetchFromGitHub {
|
||||||
|
owner = "Xtra-Computing";
|
||||||
|
repo = "thundersvm";
|
||||||
|
rev = "5c6a056ac7f474b085d5415c81c5d48a1419642a";
|
||||||
|
sha256 = "sha256-58YtOC8FK1d3ATETmwrEBc+/SQx0Pzlx5L1MgxXPQDE=";
|
||||||
|
};
|
||||||
|
|
||||||
|
#sourceRoot = "${src.name}/python";
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
pkgs.cudaPackages.cudatoolkit
|
||||||
|
thundersvm
|
||||||
|
];
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
pythonPackages.scikit-learn
|
||||||
|
];
|
||||||
|
|
||||||
|
preBuild = ''
|
||||||
|
ln -s ${thundersvm} build
|
||||||
|
cd python
|
||||||
|
'';
|
||||||
|
|
||||||
|
doCheck = false;
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
pkgs.autoPatchelfHook
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = with pkgs.lib; {
|
||||||
|
description = "Python bindings for ThunderSVM, with CUDA support";
|
||||||
|
homepage = "https://github.com/Xtra-Computing/thundersvm";
|
||||||
|
license = licenses.asl20;
|
||||||
|
maintainers = [];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
35
Deep-SAD-PyTorch/nix/thundersvm.nix
Normal file
35
Deep-SAD-PyTorch/nix/thundersvm.nix
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
{ pkgs
|
||||||
|
, fetchFromGitHub
|
||||||
|
, cmake
|
||||||
|
, gcc12Stdenv ? pkgs.gcc12Stdenv
|
||||||
|
, cudaPackages ? pkgs.cudaPackages
|
||||||
|
}:
|
||||||
|
|
||||||
|
gcc12Stdenv.mkDerivation rec {
|
||||||
|
pname = "thundersvm";
|
||||||
|
version = "git";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "Xtra-Computing";
|
||||||
|
repo = "thundersvm";
|
||||||
|
rev = "5c6a056ac7f474b085d5415c81c5d48a1419642a";
|
||||||
|
sha256 = "sha256-58YtOC8FK1d3ATETmwrEBc+/SQx0Pzlx5L1MgxXPQDE=";
|
||||||
|
};
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
cmake
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
cudaPackages.cudatoolkit
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = with pkgs.lib; {
|
||||||
|
description = "ThunderSVM CLI and library (compiled with CUDA)";
|
||||||
|
homepage = "https://github.com/Xtra-Computing/thundersvm";
|
||||||
|
license = licenses.asl20;
|
||||||
|
maintainers = [];
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
import json
|
import json
|
||||||
|
import pickle
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from base.base_dataset import BaseADDataset
|
from base.base_dataset import BaseADDataset
|
||||||
from networks.main import build_network, build_autoencoder
|
from networks.main import build_autoencoder, build_network
|
||||||
from optim.DeepSAD_trainer import DeepSADTrainer
|
|
||||||
from optim.ae_trainer import AETrainer
|
from optim.ae_trainer import AETrainer
|
||||||
|
from optim.DeepSAD_trainer import DeepSADTrainer
|
||||||
|
|
||||||
|
|
||||||
class DeepSAD(object):
|
class DeepSAD(object):
|
||||||
@@ -65,6 +67,7 @@ class DeepSAD(object):
|
|||||||
weight_decay: float = 1e-6,
|
weight_decay: float = 1e-6,
|
||||||
device: str = "cuda",
|
device: str = "cuda",
|
||||||
n_jobs_dataloader: int = 0,
|
n_jobs_dataloader: int = 0,
|
||||||
|
k_fold_idx: int = None,
|
||||||
):
|
):
|
||||||
"""Trains the Deep SAD model on the training data."""
|
"""Trains the Deep SAD model on the training data."""
|
||||||
|
|
||||||
@@ -82,7 +85,7 @@ class DeepSAD(object):
|
|||||||
n_jobs_dataloader=n_jobs_dataloader,
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
)
|
)
|
||||||
# Get the model
|
# Get the model
|
||||||
self.net = self.trainer.train(dataset, self.net)
|
self.net = self.trainer.train(dataset, self.net, k_fold_idx=k_fold_idx)
|
||||||
self.results["train_time"] = self.trainer.train_time
|
self.results["train_time"] = self.trainer.train_time
|
||||||
self.c = self.trainer.c.cpu().data.numpy().tolist() # get as list
|
self.c = self.trainer.c.cpu().data.numpy().tolist() # get as list
|
||||||
|
|
||||||
@@ -99,7 +102,11 @@ class DeepSAD(object):
|
|||||||
return self.trainer.infer(dataset, self.net)
|
return self.trainer.infer(dataset, self.net)
|
||||||
|
|
||||||
def test(
|
def test(
|
||||||
self, dataset: BaseADDataset, device: str = "cuda", n_jobs_dataloader: int = 0
|
self,
|
||||||
|
dataset: BaseADDataset,
|
||||||
|
device: str = "cuda",
|
||||||
|
n_jobs_dataloader: int = 0,
|
||||||
|
k_fold_idx: int = None,
|
||||||
):
|
):
|
||||||
"""Tests the Deep SAD model on the test data."""
|
"""Tests the Deep SAD model on the test data."""
|
||||||
|
|
||||||
@@ -108,10 +115,13 @@ class DeepSAD(object):
|
|||||||
self.c, self.eta, device=device, n_jobs_dataloader=n_jobs_dataloader
|
self.c, self.eta, device=device, n_jobs_dataloader=n_jobs_dataloader
|
||||||
)
|
)
|
||||||
|
|
||||||
self.trainer.test(dataset, self.net)
|
self.trainer.test(dataset, self.net, k_fold_idx=k_fold_idx)
|
||||||
|
|
||||||
# Get results
|
# Get results
|
||||||
self.results["test_auc"] = self.trainer.test_auc
|
self.results["test_auc"] = self.trainer.test_auc
|
||||||
|
self.results["test_roc"] = self.trainer.test_roc
|
||||||
|
self.results["test_prc"] = self.trainer.test_prc
|
||||||
|
self.results["test_ap"] = self.trainer.test_ap
|
||||||
self.results["test_time"] = self.trainer.test_time
|
self.results["test_time"] = self.trainer.test_time
|
||||||
self.results["test_scores"] = self.trainer.test_scores
|
self.results["test_scores"] = self.trainer.test_scores
|
||||||
|
|
||||||
@@ -126,6 +136,7 @@ class DeepSAD(object):
|
|||||||
weight_decay: float = 1e-6,
|
weight_decay: float = 1e-6,
|
||||||
device: str = "cuda",
|
device: str = "cuda",
|
||||||
n_jobs_dataloader: int = 0,
|
n_jobs_dataloader: int = 0,
|
||||||
|
k_fold_idx: int = None,
|
||||||
):
|
):
|
||||||
"""Pretrains the weights for the Deep SAD network phi via autoencoder."""
|
"""Pretrains the weights for the Deep SAD network phi via autoencoder."""
|
||||||
|
|
||||||
@@ -144,13 +155,13 @@ class DeepSAD(object):
|
|||||||
device=device,
|
device=device,
|
||||||
n_jobs_dataloader=n_jobs_dataloader,
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
)
|
)
|
||||||
self.ae_net = self.ae_trainer.train(dataset, self.ae_net)
|
self.ae_net = self.ae_trainer.train(dataset, self.ae_net, k_fold_idx=k_fold_idx)
|
||||||
|
|
||||||
# Get train results
|
# Get train results
|
||||||
self.ae_results["train_time"] = self.ae_trainer.train_time
|
self.ae_results["train_time"] = self.ae_trainer.train_time
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
self.ae_trainer.test(dataset, self.ae_net)
|
self.ae_trainer.test(dataset, self.ae_net, k_fold_idx=k_fold_idx)
|
||||||
|
|
||||||
# Get test results
|
# Get test results
|
||||||
self.ae_results["test_auc"] = self.ae_trainer.test_auc
|
self.ae_results["test_auc"] = self.ae_trainer.test_auc
|
||||||
@@ -197,10 +208,11 @@ class DeepSAD(object):
|
|||||||
self.ae_net = build_autoencoder(self.net_name)
|
self.ae_net = build_autoencoder(self.net_name)
|
||||||
self.ae_net.load_state_dict(model_dict["ae_net_dict"])
|
self.ae_net.load_state_dict(model_dict["ae_net_dict"])
|
||||||
|
|
||||||
def save_results(self, export_json):
|
def save_results(self, export_pkl):
|
||||||
"""Save results dict to a JSON-file."""
|
"""Save results dict to a JSON-file."""
|
||||||
with open(export_json, "w") as fp:
|
with open(export_pkl, "wb") as fp:
|
||||||
json.dump(self.results, fp)
|
# json.dump(self.results, fp)
|
||||||
|
pickle.dump(self.results, fp)
|
||||||
|
|
||||||
def save_ae_results(self, export_json):
|
def save_ae_results(self, export_json):
|
||||||
"""Save autoencoder results dict to a JSON-file."""
|
"""Save autoencoder results dict to a JSON-file."""
|
||||||
|
|||||||
@@ -1,12 +1,16 @@
|
|||||||
|
from sklearn.model_selection import KFold
|
||||||
|
from torch.utils.data import ConcatDataset, DataLoader, Subset
|
||||||
|
|
||||||
from .base_dataset import BaseADDataset
|
from .base_dataset import BaseADDataset
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
|
|
||||||
|
|
||||||
class TorchvisionDataset(BaseADDataset):
|
class TorchvisionDataset(BaseADDataset):
|
||||||
"""TorchvisionDataset class for datasets already implemented in torchvision.datasets."""
|
"""TorchvisionDataset class for datasets already implemented in torchvision.datasets."""
|
||||||
|
|
||||||
def __init__(self, root: str):
|
def __init__(self, root: str, k_fold_number: int = 5):
|
||||||
super().__init__(root)
|
super().__init__(root)
|
||||||
|
self.k_fold_number = k_fold_number
|
||||||
|
self.fold_indices = None
|
||||||
|
|
||||||
def loaders(
|
def loaders(
|
||||||
self,
|
self,
|
||||||
@@ -50,3 +54,43 @@ class TorchvisionDataset(BaseADDataset):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
return train_loader, test_loader, inference_loader
|
return train_loader, test_loader, inference_loader
|
||||||
|
|
||||||
|
def loaders_k_fold(
|
||||||
|
self,
|
||||||
|
fold_idx: int,
|
||||||
|
batch_size: int,
|
||||||
|
shuffle_train=True,
|
||||||
|
shuffle_test=False,
|
||||||
|
num_workers: int = 0,
|
||||||
|
) -> (DataLoader, DataLoader):
|
||||||
|
if self.fold_indices is None:
|
||||||
|
# Define the K-fold Cross Validator
|
||||||
|
kfold = KFold(n_splits=self.k_fold_number, shuffle=False)
|
||||||
|
self.fold_indices = []
|
||||||
|
# Generate indices for each fold and store them in a list
|
||||||
|
for train_indices, val_indices in kfold.split(self.data_set):
|
||||||
|
self.fold_indices.append((train_indices, val_indices))
|
||||||
|
|
||||||
|
train_loader = (
|
||||||
|
DataLoader(
|
||||||
|
dataset=Subset(self.data_set, self.fold_indices[fold_idx][0]),
|
||||||
|
batch_size=batch_size,
|
||||||
|
shuffle=shuffle_train,
|
||||||
|
num_workers=num_workers,
|
||||||
|
drop_last=True,
|
||||||
|
)
|
||||||
|
if self.data_set is not None
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
test_loader = (
|
||||||
|
DataLoader(
|
||||||
|
dataset=Subset(self.data_set, self.fold_indices[fold_idx][1]),
|
||||||
|
batch_size=batch_size,
|
||||||
|
shuffle=shuffle_test,
|
||||||
|
num_workers=num_workers,
|
||||||
|
drop_last=False,
|
||||||
|
)
|
||||||
|
if self.data_set is not None
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
return train_loader, test_loader
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
|
import pickle
|
||||||
import time
|
import time
|
||||||
import torch
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from torch.utils.data import DataLoader
|
import numpy as np
|
||||||
|
import torch
|
||||||
from sklearn.ensemble import IsolationForest
|
from sklearn.ensemble import IsolationForest
|
||||||
from sklearn.metrics import roc_auc_score
|
from sklearn.metrics import (
|
||||||
|
average_precision_score,
|
||||||
|
precision_recall_curve,
|
||||||
|
roc_auc_score,
|
||||||
|
roc_curve,
|
||||||
|
)
|
||||||
|
|
||||||
from base.base_dataset import BaseADDataset
|
from base.base_dataset import BaseADDataset
|
||||||
from networks.main import build_autoencoder
|
from networks.main import build_autoencoder
|
||||||
|
|
||||||
@@ -22,7 +27,7 @@ class IsoForest(object):
|
|||||||
contamination=0.1,
|
contamination=0.1,
|
||||||
n_jobs=-1,
|
n_jobs=-1,
|
||||||
seed=None,
|
seed=None,
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""Init Isolation Forest instance."""
|
"""Init Isolation Forest instance."""
|
||||||
self.n_estimators = n_estimators
|
self.n_estimators = n_estimators
|
||||||
@@ -37,7 +42,7 @@ class IsoForest(object):
|
|||||||
contamination=contamination,
|
contamination=contamination,
|
||||||
n_jobs=n_jobs,
|
n_jobs=n_jobs,
|
||||||
random_state=seed,
|
random_state=seed,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.hybrid = hybrid
|
self.hybrid = hybrid
|
||||||
@@ -47,28 +52,44 @@ class IsoForest(object):
|
|||||||
"train_time": None,
|
"train_time": None,
|
||||||
"test_time": None,
|
"test_time": None,
|
||||||
"test_auc": None,
|
"test_auc": None,
|
||||||
|
"test_roc": None,
|
||||||
"test_scores": None,
|
"test_scores": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
def train(
|
def train(
|
||||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
self,
|
||||||
|
dataset: BaseADDataset,
|
||||||
|
device: str = "cpu",
|
||||||
|
n_jobs_dataloader: int = 0,
|
||||||
|
k_fold_idx: int = None,
|
||||||
):
|
):
|
||||||
"""Trains the Isolation Forest model on the training data."""
|
"""Trains the Isolation Forest model on the training data."""
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
# do not drop last batch for non-SGD optimization shallow_ssad
|
# do not drop last batch for non-SGD optimization shallow_ssad
|
||||||
train_loader = DataLoader(
|
# drop_last necessary?
|
||||||
dataset=dataset.train_set,
|
# train_loader = DataLoader(
|
||||||
batch_size=128,
|
# dataset=dataset.train_set,
|
||||||
shuffle=True,
|
# batch_size=128,
|
||||||
num_workers=n_jobs_dataloader,
|
# shuffle=True,
|
||||||
drop_last=False,
|
# num_workers=n_jobs_dataloader,
|
||||||
)
|
# drop_last=False,
|
||||||
|
# )
|
||||||
|
|
||||||
|
if k_fold_idx is not None:
|
||||||
|
train_loader, _ = dataset.loaders_k_fold(
|
||||||
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
train_loader, _, _ = dataset.loaders(
|
||||||
|
batch_size=128, num_workers=n_jobs_dataloader
|
||||||
|
)
|
||||||
# Get data from loader
|
# Get data from loader
|
||||||
X = ()
|
X = ()
|
||||||
for data in train_loader:
|
for data in train_loader:
|
||||||
inputs, _, _, _ = data
|
inputs, _, _, _, _ = data
|
||||||
inputs = inputs.to(device)
|
inputs = inputs.to(device)
|
||||||
if self.hybrid:
|
if self.hybrid:
|
||||||
inputs = self.ae_net.encoder(
|
inputs = self.ae_net.encoder(
|
||||||
@@ -91,14 +112,25 @@ class IsoForest(object):
|
|||||||
logger.info("Finished training.")
|
logger.info("Finished training.")
|
||||||
|
|
||||||
def test(
|
def test(
|
||||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
self,
|
||||||
|
dataset: BaseADDataset,
|
||||||
|
device: str = "cpu",
|
||||||
|
n_jobs_dataloader: int = 0,
|
||||||
|
k_fold_idx: int = None,
|
||||||
):
|
):
|
||||||
"""Tests the Isolation Forest model on the test data."""
|
"""Tests the Isolation Forest model on the test data."""
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
_, test_loader, _ = dataset.loaders(
|
if k_fold_idx is not None:
|
||||||
batch_size=128, num_workers=n_jobs_dataloader
|
_, test_loader = dataset.loaders_k_fold(
|
||||||
)
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=128,
|
||||||
|
num_workers=n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_, test_loader, _ = dataset.loaders(
|
||||||
|
batch_size=128, num_workers=n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
# Get data from loader
|
# Get data from loader
|
||||||
idx_label_score = []
|
idx_label_score = []
|
||||||
@@ -106,7 +138,7 @@ class IsoForest(object):
|
|||||||
idxs = []
|
idxs = []
|
||||||
labels = []
|
labels = []
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
inputs, label_batch, _, idx = data
|
inputs, label_batch, _, idx, _ = data
|
||||||
inputs, label_batch, idx = (
|
inputs, label_batch, idx = (
|
||||||
inputs.to(device),
|
inputs.to(device),
|
||||||
label_batch.to(device),
|
label_batch.to(device),
|
||||||
@@ -140,6 +172,9 @@ class IsoForest(object):
|
|||||||
labels = np.array(labels)
|
labels = np.array(labels)
|
||||||
scores = np.array(scores)
|
scores = np.array(scores)
|
||||||
self.results["test_auc"] = roc_auc_score(labels, scores)
|
self.results["test_auc"] = roc_auc_score(labels, scores)
|
||||||
|
self.results["test_roc"] = roc_curve(labels, scores)
|
||||||
|
self.results["test_prc"] = precision_recall_curve(labels, scores)
|
||||||
|
self.results["test_ap"] = average_precision_score(labels, scores)
|
||||||
|
|
||||||
# Log results
|
# Log results
|
||||||
logger.info("Test AUC: {:.2f}%".format(100.0 * self.results["test_auc"]))
|
logger.info("Test AUC: {:.2f}%".format(100.0 * self.results["test_auc"]))
|
||||||
@@ -178,7 +213,8 @@ class IsoForest(object):
|
|||||||
"""Load Isolation Forest model from import_path."""
|
"""Load Isolation Forest model from import_path."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def save_results(self, export_json):
|
def save_results(self, export_pkl):
|
||||||
"""Save results dict to a JSON-file."""
|
"""Save results dict to a JSON-file."""
|
||||||
with open(export_json, "w") as fp:
|
with open(export_pkl, "wb") as fp:
|
||||||
json.dump(self.results, fp)
|
# json.dump(self.results, fp)
|
||||||
|
pickle.dump(self.results, fp)
|
||||||
|
|||||||
@@ -1,12 +1,18 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import pickle
|
||||||
import time
|
import time
|
||||||
import torch
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from torch.utils.data import DataLoader
|
import numpy as np
|
||||||
from sklearn.svm import OneClassSVM
|
import torch
|
||||||
from sklearn.metrics import roc_auc_score
|
from sklearn.metrics import (
|
||||||
|
average_precision_score,
|
||||||
|
precision_recall_curve,
|
||||||
|
roc_auc_score,
|
||||||
|
roc_curve,
|
||||||
|
)
|
||||||
|
from thundersvm import OneClassSVM
|
||||||
|
|
||||||
from base.base_dataset import BaseADDataset
|
from base.base_dataset import BaseADDataset
|
||||||
from networks.main import build_autoencoder
|
from networks.main import build_autoencoder
|
||||||
|
|
||||||
@@ -21,7 +27,7 @@ class OCSVM(object):
|
|||||||
self.rho = None
|
self.rho = None
|
||||||
self.gamma = None
|
self.gamma = None
|
||||||
|
|
||||||
self.model = OneClassSVM(kernel=kernel, nu=nu)
|
self.model = OneClassSVM(kernel=kernel, nu=nu, verbose=True, max_mem_size=4048)
|
||||||
|
|
||||||
self.hybrid = hybrid
|
self.hybrid = hybrid
|
||||||
self.ae_net = None # autoencoder network for the case of a hybrid model
|
self.ae_net = None # autoencoder network for the case of a hybrid model
|
||||||
@@ -40,24 +46,31 @@ class OCSVM(object):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def train(
|
def train(
|
||||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
self,
|
||||||
|
dataset: BaseADDataset,
|
||||||
|
device: str = "cpu",
|
||||||
|
n_jobs_dataloader: int = 0,
|
||||||
|
k_fold_idx: int = None,
|
||||||
|
batch_size: int = 32,
|
||||||
):
|
):
|
||||||
"""Trains the OC-SVM model on the training data."""
|
"""Trains the OC-SVM model on the training data."""
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
# do not drop last batch for non-SGD optimization shallow_ssad
|
if k_fold_idx is not None:
|
||||||
train_loader = DataLoader(
|
train_loader, _ = dataset.loaders_k_fold(
|
||||||
dataset=dataset.train_set,
|
fold_idx=k_fold_idx,
|
||||||
batch_size=128,
|
batch_size=batch_size,
|
||||||
shuffle=True,
|
num_workers=n_jobs_dataloader,
|
||||||
num_workers=n_jobs_dataloader,
|
)
|
||||||
drop_last=False,
|
else:
|
||||||
)
|
train_loader, _, _ = dataset.loaders(
|
||||||
|
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
# Get data from loader
|
# Get data from loader
|
||||||
X = ()
|
X = ()
|
||||||
for data in train_loader:
|
for data in train_loader:
|
||||||
inputs, _, _, _ = data
|
inputs, _, _, _, _ = data
|
||||||
inputs = inputs.to(device)
|
inputs = inputs.to(device)
|
||||||
if self.hybrid:
|
if self.hybrid:
|
||||||
inputs = self.ae_net.encoder(
|
inputs = self.ae_net.encoder(
|
||||||
@@ -77,14 +90,21 @@ class OCSVM(object):
|
|||||||
best_auc = 0.0
|
best_auc = 0.0
|
||||||
|
|
||||||
# Sample hold-out set from test set
|
# Sample hold-out set from test set
|
||||||
_, test_loader, _ = dataset.loaders(
|
if k_fold_idx is not None:
|
||||||
batch_size=128, num_workers=n_jobs_dataloader
|
_, test_loader = dataset.loaders_k_fold(
|
||||||
)
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=batch_size,
|
||||||
|
num_workers=n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_, test_loader, _ = dataset.loaders(
|
||||||
|
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
X_test = ()
|
X_test = ()
|
||||||
labels = []
|
labels = []
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
inputs, label_batch, _, _ = data
|
inputs, label_batch, _, _, _ = data
|
||||||
inputs, label_batch = inputs.to(device), label_batch.to(device)
|
inputs, label_batch = inputs.to(device), label_batch.to(device)
|
||||||
if self.hybrid:
|
if self.hybrid:
|
||||||
inputs = self.ae_net.encoder(
|
inputs = self.ae_net.encoder(
|
||||||
@@ -102,8 +122,9 @@ class OCSVM(object):
|
|||||||
np.sum(labels == 1),
|
np.sum(labels == 1),
|
||||||
)
|
)
|
||||||
n_val = int(0.1 * n_test)
|
n_val = int(0.1 * n_test)
|
||||||
n_val_normal, n_val_outlier = int(n_val * (n_normal / n_test)), int(
|
n_val_normal, n_val_outlier = (
|
||||||
n_val * (n_outlier / n_test)
|
int(n_val * (n_normal / n_test)),
|
||||||
|
int(n_val * (n_outlier / n_test)),
|
||||||
)
|
)
|
||||||
perm = np.random.permutation(n_test)
|
perm = np.random.permutation(n_test)
|
||||||
X_val = np.concatenate(
|
X_val = np.concatenate(
|
||||||
@@ -116,9 +137,14 @@ class OCSVM(object):
|
|||||||
|
|
||||||
i = 1
|
i = 1
|
||||||
for gamma in gammas:
|
for gamma in gammas:
|
||||||
|
|
||||||
# Model candidate
|
# Model candidate
|
||||||
model = OneClassSVM(kernel=self.kernel, nu=self.nu, gamma=gamma)
|
model = OneClassSVM(
|
||||||
|
kernel=self.kernel,
|
||||||
|
nu=self.nu,
|
||||||
|
gamma=gamma,
|
||||||
|
verbose=True,
|
||||||
|
max_mem_size=4048,
|
||||||
|
)
|
||||||
|
|
||||||
# Train
|
# Train
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
@@ -147,7 +173,9 @@ class OCSVM(object):
|
|||||||
|
|
||||||
# If hybrid, also train a model with linear kernel
|
# If hybrid, also train a model with linear kernel
|
||||||
if self.hybrid:
|
if self.hybrid:
|
||||||
self.linear_model = OneClassSVM(kernel="linear", nu=self.nu)
|
self.linear_model = OneClassSVM(
|
||||||
|
kernel="linear", nu=self.nu, max_mem_size=4048
|
||||||
|
)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
self.linear_model.fit(X)
|
self.linear_model.fit(X)
|
||||||
train_time = time.time() - start_time
|
train_time = time.time() - start_time
|
||||||
@@ -160,14 +188,26 @@ class OCSVM(object):
|
|||||||
logger.info("Finished training.")
|
logger.info("Finished training.")
|
||||||
|
|
||||||
def test(
|
def test(
|
||||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
self,
|
||||||
|
dataset: BaseADDataset,
|
||||||
|
device: str = "cpu",
|
||||||
|
n_jobs_dataloader: int = 0,
|
||||||
|
k_fold_idx: int = None,
|
||||||
|
batch_size: int = 32,
|
||||||
):
|
):
|
||||||
"""Tests the OC-SVM model on the test data."""
|
"""Tests the OC-SVM model on the test data."""
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
_, test_loader, _ = dataset.loaders(
|
if k_fold_idx is not None:
|
||||||
batch_size=128, num_workers=n_jobs_dataloader
|
_, test_loader = dataset.loaders_k_fold(
|
||||||
)
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=batch_size,
|
||||||
|
num_workers=n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_, test_loader, _ = dataset.loaders(
|
||||||
|
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
# Get data from loader
|
# Get data from loader
|
||||||
idx_label_score = []
|
idx_label_score = []
|
||||||
@@ -175,7 +215,7 @@ class OCSVM(object):
|
|||||||
idxs = []
|
idxs = []
|
||||||
labels = []
|
labels = []
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
inputs, label_batch, _, idx = data
|
inputs, label_batch, _, idx, _ = data
|
||||||
inputs, label_batch, idx = (
|
inputs, label_batch, idx = (
|
||||||
inputs.to(device),
|
inputs.to(device),
|
||||||
label_batch.to(device),
|
label_batch.to(device),
|
||||||
@@ -212,6 +252,9 @@ class OCSVM(object):
|
|||||||
labels = np.array(labels)
|
labels = np.array(labels)
|
||||||
scores = np.array(scores)
|
scores = np.array(scores)
|
||||||
self.results["test_auc"] = roc_auc_score(labels, scores)
|
self.results["test_auc"] = roc_auc_score(labels, scores)
|
||||||
|
self.results["test_roc"] = roc_curve(labels, scores)
|
||||||
|
self.results["test_prc"] = precision_recall_curve(labels, scores)
|
||||||
|
self.results["test_ap"] = average_precision_score(labels, scores)
|
||||||
|
|
||||||
# If hybrid, also test model with linear kernel
|
# If hybrid, also test model with linear kernel
|
||||||
if self.hybrid:
|
if self.hybrid:
|
||||||
@@ -268,7 +311,7 @@ class OCSVM(object):
|
|||||||
"""Load OC-SVM model from import_path."""
|
"""Load OC-SVM model from import_path."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def save_results(self, export_json):
|
def save_results(self, export_pkl):
|
||||||
"""Save results dict to a JSON-file."""
|
with open(export_pkl, "wb") as fp:
|
||||||
with open(export_json, "w") as fp:
|
# json.dump(self.results, fp)
|
||||||
json.dump(self.results, fp)
|
pickle.dump(self.results, fp)
|
||||||
|
|||||||
264
Deep-SAD-PyTorch/src/datasets/esmerasplit.py
Normal file
264
Deep-SAD-PyTorch/src/datasets/esmerasplit.py
Normal file
@@ -0,0 +1,264 @@
|
|||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torchvision.transforms as transforms
|
||||||
|
from PIL import Image
|
||||||
|
from torch.utils.data import Subset
|
||||||
|
from torchvision.datasets import VisionDataset
|
||||||
|
|
||||||
|
from base.torchvision_dataset import TorchvisionDataset
|
||||||
|
|
||||||
|
from .preprocessing import create_semisupervised_setting
|
||||||
|
|
||||||
|
|
||||||
|
class EsmeraSplit_Dataset(TorchvisionDataset):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
root: str,
|
||||||
|
ratio_known_normal: float = 0.0,
|
||||||
|
ratio_known_outlier: float = 0.0,
|
||||||
|
ratio_pollution: float = 0.0,
|
||||||
|
inference: bool = False,
|
||||||
|
):
|
||||||
|
super().__init__(root)
|
||||||
|
|
||||||
|
# Define normal and outlier classes
|
||||||
|
self.n_classes = 2 # 0: normal, 1: outlier
|
||||||
|
self.normal_classes = tuple([0])
|
||||||
|
self.outlier_classes = tuple([1])
|
||||||
|
self.inference_set = None
|
||||||
|
|
||||||
|
# MNIST preprocessing: feature scaling to [0, 1]
|
||||||
|
# FIXME understand mnist feature scaling and check if it or other preprocessing is necessary for elpv
|
||||||
|
transform = transforms.ToTensor()
|
||||||
|
target_transform = transforms.Lambda(lambda x: int(x in self.outlier_classes))
|
||||||
|
|
||||||
|
if inference:
|
||||||
|
self.inference_set = EsmeraSplitInference(
|
||||||
|
root=self.root,
|
||||||
|
transform=transform,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Get train set
|
||||||
|
train_set = EsmeraSplitTraining(
|
||||||
|
root=self.root,
|
||||||
|
transform=transform,
|
||||||
|
target_transform=target_transform,
|
||||||
|
train=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create semi-supervised setting
|
||||||
|
idx, _, semi_targets = create_semisupervised_setting(
|
||||||
|
train_set.targets.cpu().data.numpy(),
|
||||||
|
self.normal_classes,
|
||||||
|
self.outlier_classes,
|
||||||
|
self.outlier_classes,
|
||||||
|
ratio_known_normal,
|
||||||
|
ratio_known_outlier,
|
||||||
|
ratio_pollution,
|
||||||
|
)
|
||||||
|
train_set.semi_targets[idx] = torch.tensor(
|
||||||
|
np.array(semi_targets, dtype=np.int8)
|
||||||
|
) # set respective semi-supervised labels
|
||||||
|
|
||||||
|
# Subset train_set to semi-supervised setup
|
||||||
|
self.train_set = Subset(train_set, idx)
|
||||||
|
|
||||||
|
# Get test set
|
||||||
|
self.test_set = EsmeraSplitTraining(
|
||||||
|
root=self.root,
|
||||||
|
train=False,
|
||||||
|
transform=transform,
|
||||||
|
target_transform=target_transform,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def split_array_into_subarrays(array, split_height, split_width):
|
||||||
|
original_shape = array.shape
|
||||||
|
height, width = original_shape[-2], original_shape[-1]
|
||||||
|
assert height % split_height == 0, "The height is not divisible by the split_height"
|
||||||
|
assert width % split_width == 0, "The width is not divisible by the split_width"
|
||||||
|
num_splits_height = height // split_height
|
||||||
|
num_splits_width = width // split_width
|
||||||
|
reshaped_array = array.reshape(
|
||||||
|
-1, num_splits_height, split_height, num_splits_width, split_width
|
||||||
|
)
|
||||||
|
transposed_array = reshaped_array.transpose(0, 1, 3, 2, 4)
|
||||||
|
final_array = transposed_array.reshape(-1, split_height, split_width)
|
||||||
|
return final_array
|
||||||
|
|
||||||
|
|
||||||
|
class EsmeraSplitTraining(VisionDataset):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
root: str,
|
||||||
|
transforms: Optional[Callable] = None,
|
||||||
|
transform: Optional[Callable] = None,
|
||||||
|
target_transform: Optional[Callable] = None,
|
||||||
|
train=False,
|
||||||
|
split=0.7,
|
||||||
|
seed=0,
|
||||||
|
height=16,
|
||||||
|
width=256,
|
||||||
|
):
|
||||||
|
super(EsmeraSplitTraining, self).__init__(
|
||||||
|
root, transforms, transform, target_transform
|
||||||
|
)
|
||||||
|
|
||||||
|
experiments_data = []
|
||||||
|
experiments_targets = []
|
||||||
|
validation_files = []
|
||||||
|
experiment_files = []
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
for experiment_file in Path(root).iterdir():
|
||||||
|
if experiment_file.is_dir() and experiment_file.name == "validation":
|
||||||
|
for validation_file in experiment_file.iterdir():
|
||||||
|
if validation_file.suffix != ".npy":
|
||||||
|
continue
|
||||||
|
validation_files.append(experiment_file)
|
||||||
|
if experiment_file.suffix != ".npy":
|
||||||
|
continue
|
||||||
|
experiment_files.append(experiment_file)
|
||||||
|
experiment_data = np.load(experiment_file)
|
||||||
|
|
||||||
|
if (
|
||||||
|
experiment_data.shape[1] % height != 0
|
||||||
|
or experiment_data.shape[2] % width != 0
|
||||||
|
):
|
||||||
|
logger.error(
|
||||||
|
f"Experiment {experiment_file.name} has shape {experiment_data.shape} which is not divisible by {height}x{width}"
|
||||||
|
)
|
||||||
|
experiment_data = split_array_into_subarrays(experiment_data, height, width)
|
||||||
|
|
||||||
|
# experiment_data = np.lib.format.open_memmap(experiment_file, mode='r+')
|
||||||
|
experiment_targets = (
|
||||||
|
np.ones(experiment_data.shape[0], dtype=np.int8)
|
||||||
|
if "smoke" in experiment_file.name
|
||||||
|
else np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||||
|
)
|
||||||
|
experiments_data.append(experiment_data)
|
||||||
|
experiments_targets.append(experiment_targets)
|
||||||
|
|
||||||
|
filtered_validation_files = []
|
||||||
|
for validation_file in validation_files:
|
||||||
|
validation_file_name = validation_file.name
|
||||||
|
file_exists_in_experiments = any(
|
||||||
|
experiment_file.name == validation_file_name
|
||||||
|
for experiment_file in experiment_files
|
||||||
|
)
|
||||||
|
if not file_exists_in_experiments:
|
||||||
|
filtered_validation_files.append(validation_file)
|
||||||
|
validation_files = filtered_validation_files
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Train/Test experiments: {[experiment_file.name for experiment_file in experiment_files]}"
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Validation experiments: {[validation_file.name for validation_file in validation_files]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
lidar_projections = np.concatenate(experiments_data)
|
||||||
|
smoke_presence = np.concatenate(experiments_targets)
|
||||||
|
|
||||||
|
np.random.seed(seed)
|
||||||
|
|
||||||
|
shuffled_indices = np.random.permutation(lidar_projections.shape[0])
|
||||||
|
shuffled_lidar_projections = lidar_projections[shuffled_indices]
|
||||||
|
shuffled_smoke_presence = smoke_presence[shuffled_indices]
|
||||||
|
|
||||||
|
split_idx = int(split * shuffled_lidar_projections.shape[0])
|
||||||
|
|
||||||
|
if train:
|
||||||
|
self.data = shuffled_lidar_projections[:split_idx]
|
||||||
|
self.targets = shuffled_smoke_presence[:split_idx]
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.data = shuffled_lidar_projections[split_idx:]
|
||||||
|
self.targets = shuffled_smoke_presence[split_idx:]
|
||||||
|
|
||||||
|
self.data = np.nan_to_num(self.data)
|
||||||
|
|
||||||
|
self.data = torch.tensor(self.data)
|
||||||
|
self.targets = torch.tensor(self.targets, dtype=torch.int8)
|
||||||
|
|
||||||
|
self.semi_targets = torch.zeros_like(self.targets, dtype=torch.int8)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.data)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
"""Override the original method of the MNIST class.
|
||||||
|
Args:
|
||||||
|
index (int): Index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (image, target, semi_target, index)
|
||||||
|
"""
|
||||||
|
img, target, semi_target = (
|
||||||
|
self.data[index],
|
||||||
|
int(self.targets[index]),
|
||||||
|
int(self.semi_targets[index]),
|
||||||
|
)
|
||||||
|
|
||||||
|
# doing this so that it is consistent with all other datasets
|
||||||
|
# to return a PIL Image
|
||||||
|
img = Image.fromarray(img.numpy(), mode="F")
|
||||||
|
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
|
||||||
|
if self.target_transform is not None:
|
||||||
|
target = self.target_transform(target)
|
||||||
|
|
||||||
|
return img, target, semi_target, index
|
||||||
|
|
||||||
|
|
||||||
|
class EsmeraSplitInference(VisionDataset):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
root: str,
|
||||||
|
transforms: Optional[Callable] = None,
|
||||||
|
transform: Optional[Callable] = None,
|
||||||
|
):
|
||||||
|
super(EsmeraSplitInference, self).__init__(root, transforms, transform)
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
self.experiment_file_path = Path(root)
|
||||||
|
|
||||||
|
if not self.experiment_file_path.is_file():
|
||||||
|
logger.error(
|
||||||
|
"For inference the data path has to be a single experiment file!"
|
||||||
|
)
|
||||||
|
raise Exception("Inference data is not a loadable file!")
|
||||||
|
|
||||||
|
self.data = np.load(self.experiment_file_path)
|
||||||
|
self.data = split_array_into_subarrays(self.data, 16, 256)
|
||||||
|
self.data = np.nan_to_num(self.data)
|
||||||
|
self.data = torch.tensor(self.data)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.data)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
"""Override the original method of the MNIST class.
|
||||||
|
Args:
|
||||||
|
index (int): Index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (image, index)
|
||||||
|
"""
|
||||||
|
img = self.data[index]
|
||||||
|
|
||||||
|
# doing this so that it is consistent with all other datasets
|
||||||
|
# to return a PIL Image
|
||||||
|
img = Image.fromarray(img.numpy(), mode="F")
|
||||||
|
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
|
||||||
|
return img, index
|
||||||
@@ -18,6 +18,9 @@ def load_dataset(
|
|||||||
ratio_pollution: float = 0.0,
|
ratio_pollution: float = 0.0,
|
||||||
random_state=None,
|
random_state=None,
|
||||||
inference: bool = False,
|
inference: bool = False,
|
||||||
|
k_fold: bool = False,
|
||||||
|
num_known_normal: int = 0,
|
||||||
|
num_known_outlier: int = 0,
|
||||||
):
|
):
|
||||||
"""Loads the dataset."""
|
"""Loads the dataset."""
|
||||||
|
|
||||||
@@ -46,6 +49,9 @@ def load_dataset(
|
|||||||
ratio_known_outlier=ratio_known_outlier,
|
ratio_known_outlier=ratio_known_outlier,
|
||||||
ratio_pollution=ratio_pollution,
|
ratio_pollution=ratio_pollution,
|
||||||
inference=inference,
|
inference=inference,
|
||||||
|
k_fold=k_fold,
|
||||||
|
num_known_normal=num_known_normal,
|
||||||
|
num_known_outlier=num_known_outlier,
|
||||||
)
|
)
|
||||||
|
|
||||||
if dataset_name == "subtersplit":
|
if dataset_name == "subtersplit":
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -6,12 +7,13 @@ from typing import Callable, Optional
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torchvision.transforms as transforms
|
import torchvision.transforms as transforms
|
||||||
from base.torchvision_dataset import TorchvisionDataset
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from torch.utils.data import Subset
|
from torch.utils.data import Subset
|
||||||
from torch.utils.data.dataset import ConcatDataset
|
from torch.utils.data.dataset import ConcatDataset
|
||||||
from torchvision.datasets import VisionDataset
|
from torchvision.datasets import VisionDataset
|
||||||
|
|
||||||
|
from base.torchvision_dataset import TorchvisionDataset
|
||||||
|
|
||||||
from .preprocessing import create_semisupervised_setting
|
from .preprocessing import create_semisupervised_setting
|
||||||
|
|
||||||
|
|
||||||
@@ -23,8 +25,22 @@ class SubTer_Dataset(TorchvisionDataset):
|
|||||||
ratio_known_outlier: float = 0.0,
|
ratio_known_outlier: float = 0.0,
|
||||||
ratio_pollution: float = 0.0,
|
ratio_pollution: float = 0.0,
|
||||||
inference: bool = False,
|
inference: bool = False,
|
||||||
|
k_fold: bool = False,
|
||||||
|
num_known_normal: int = 0,
|
||||||
|
num_known_outlier: int = 0,
|
||||||
|
only_use_given_semi_targets_for_evaluation: bool = True,
|
||||||
):
|
):
|
||||||
super().__init__(root)
|
super().__init__(root)
|
||||||
|
if Path(root).is_dir():
|
||||||
|
with open(Path(root) / "semi_targets.json", "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
semi_targets_given = {
|
||||||
|
item["filename"]: (
|
||||||
|
item["semi_target_begin_frame"],
|
||||||
|
item["semi_target_end_frame"],
|
||||||
|
)
|
||||||
|
for item in data["files"]
|
||||||
|
}
|
||||||
|
|
||||||
# Define normal and outlier classes
|
# Define normal and outlier classes
|
||||||
self.n_classes = 2 # 0: normal, 1: outlier
|
self.n_classes = 2 # 0: normal, 1: outlier
|
||||||
@@ -43,38 +59,146 @@ class SubTer_Dataset(TorchvisionDataset):
|
|||||||
transform=transform,
|
transform=transform,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Get train set
|
if k_fold:
|
||||||
train_set = SubTerTraining(
|
# Get train set
|
||||||
root=self.root,
|
data_set = SubTerTraining(
|
||||||
transform=transform,
|
root=self.root,
|
||||||
target_transform=target_transform,
|
transform=transform,
|
||||||
train=True,
|
target_transform=target_transform,
|
||||||
)
|
train=True,
|
||||||
|
split=1,
|
||||||
|
semi_targets_given=semi_targets_given,
|
||||||
|
)
|
||||||
|
|
||||||
# Create semi-supervised setting
|
np.random.seed(0)
|
||||||
idx, _, semi_targets = create_semisupervised_setting(
|
semi_targets = data_set.semi_targets.numpy()
|
||||||
train_set.targets.cpu().data.numpy(),
|
|
||||||
self.normal_classes,
|
|
||||||
self.outlier_classes,
|
|
||||||
self.outlier_classes,
|
|
||||||
ratio_known_normal,
|
|
||||||
ratio_known_outlier,
|
|
||||||
ratio_pollution,
|
|
||||||
)
|
|
||||||
train_set.semi_targets[idx] = torch.tensor(
|
|
||||||
np.array(semi_targets, dtype=np.int8)
|
|
||||||
) # set respective semi-supervised labels
|
|
||||||
|
|
||||||
# Subset train_set to semi-supervised setup
|
# Find indices where semi_targets is -1 (abnormal) or 1 (normal)
|
||||||
self.train_set = Subset(train_set, idx)
|
normal_indices = np.where(semi_targets == 1)[0]
|
||||||
|
abnormal_indices = np.where(semi_targets == -1)[0]
|
||||||
|
|
||||||
# Get test set
|
# Randomly select the specified number of indices to keep for each category
|
||||||
self.test_set = SubTerTraining(
|
if len(normal_indices) > num_known_normal:
|
||||||
root=self.root,
|
keep_normal_indices = np.random.choice(
|
||||||
train=False,
|
normal_indices, size=num_known_normal, replace=False
|
||||||
transform=transform,
|
)
|
||||||
target_transform=target_transform,
|
else:
|
||||||
)
|
keep_normal_indices = (
|
||||||
|
normal_indices # Keep all if there are fewer than required
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(abnormal_indices) > num_known_outlier:
|
||||||
|
keep_abnormal_indices = np.random.choice(
|
||||||
|
abnormal_indices, size=num_known_outlier, replace=False
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
keep_abnormal_indices = (
|
||||||
|
abnormal_indices # Keep all if there are fewer than required
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set all values to 0, then restore only the selected -1 and 1 values
|
||||||
|
semi_targets[(semi_targets == 1) | (semi_targets == -1)] = 0
|
||||||
|
semi_targets[keep_normal_indices] = 1
|
||||||
|
semi_targets[keep_abnormal_indices] = -1
|
||||||
|
data_set.semi_targets = torch.tensor(semi_targets, dtype=torch.int8)
|
||||||
|
|
||||||
|
self.data_set = data_set
|
||||||
|
|
||||||
|
# # Create semi-supervised setting
|
||||||
|
# idx, _, semi_targets = create_semisupervised_setting(
|
||||||
|
# data_set.targets.cpu().data.numpy(),
|
||||||
|
# self.normal_classes,
|
||||||
|
# self.outlier_classes,
|
||||||
|
# self.outlier_classes,
|
||||||
|
# ratio_known_normal,
|
||||||
|
# ratio_known_outlier,
|
||||||
|
# ratio_pollution,
|
||||||
|
# )
|
||||||
|
# data_set.semi_targets[idx] = torch.tensor(
|
||||||
|
# np.array(semi_targets, dtype=np.int8)
|
||||||
|
# ) # set respective semi-supervised labels
|
||||||
|
|
||||||
|
# # Subset data_set to semi-supervised setup
|
||||||
|
# self.data_set = Subset(data_set, idx)
|
||||||
|
else:
|
||||||
|
# Get train set
|
||||||
|
if only_use_given_semi_targets_for_evaluation:
|
||||||
|
pass
|
||||||
|
train_set = SubTerTrainingSelective(
|
||||||
|
root=self.root,
|
||||||
|
transform=transform,
|
||||||
|
target_transform=target_transform,
|
||||||
|
train=True,
|
||||||
|
num_known_outlier=num_known_outlier,
|
||||||
|
semi_targets_given=semi_targets_given,
|
||||||
|
)
|
||||||
|
|
||||||
|
np.random.seed(0)
|
||||||
|
semi_targets = train_set.semi_targets.numpy()
|
||||||
|
|
||||||
|
# Find indices where semi_targets is -1 (abnormal) or 1 (normal)
|
||||||
|
normal_indices = np.where(semi_targets == 1)[0]
|
||||||
|
|
||||||
|
# Randomly select the specified number of indices to keep for each category
|
||||||
|
if len(normal_indices) > num_known_normal:
|
||||||
|
keep_normal_indices = np.random.choice(
|
||||||
|
normal_indices, size=num_known_normal, replace=False
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
keep_normal_indices = (
|
||||||
|
normal_indices # Keep all if there are fewer than required
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set all values to 0, then restore only the selected -1 and 1 values
|
||||||
|
semi_targets[semi_targets == 1] = 0
|
||||||
|
semi_targets[keep_normal_indices] = 1
|
||||||
|
train_set.semi_targets = torch.tensor(
|
||||||
|
semi_targets, dtype=torch.int8
|
||||||
|
)
|
||||||
|
|
||||||
|
self.train_set = train_set
|
||||||
|
self.test_set = SubTerTrainingSelective(
|
||||||
|
root=self.root,
|
||||||
|
transform=transform,
|
||||||
|
target_transform=target_transform,
|
||||||
|
num_known_outlier=num_known_outlier,
|
||||||
|
train=False,
|
||||||
|
semi_targets_given=semi_targets_given,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
train_set = SubTerTraining(
|
||||||
|
root=self.root,
|
||||||
|
transform=transform,
|
||||||
|
target_transform=target_transform,
|
||||||
|
train=True,
|
||||||
|
semi_targets_given=semi_targets_given,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create semi-supervised setting
|
||||||
|
idx, _, semi_targets = create_semisupervised_setting(
|
||||||
|
train_set.targets.cpu().data.numpy(),
|
||||||
|
self.normal_classes,
|
||||||
|
self.outlier_classes,
|
||||||
|
self.outlier_classes,
|
||||||
|
ratio_known_normal,
|
||||||
|
ratio_known_outlier,
|
||||||
|
ratio_pollution,
|
||||||
|
)
|
||||||
|
train_set.semi_targets[idx] = torch.tensor(
|
||||||
|
np.array(semi_targets, dtype=np.int8)
|
||||||
|
) # set respective semi-supervised labels
|
||||||
|
|
||||||
|
# Subset train_set to semi-supervised setup
|
||||||
|
self.train_set = Subset(train_set, idx)
|
||||||
|
|
||||||
|
# Get test set
|
||||||
|
self.test_set = SubTerTraining(
|
||||||
|
root=self.root,
|
||||||
|
train=False,
|
||||||
|
transform=transform,
|
||||||
|
target_transform=target_transform,
|
||||||
|
semi_targets_given=semi_targets_given,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SubTerTraining(VisionDataset):
|
class SubTerTraining(VisionDataset):
|
||||||
@@ -87,6 +211,8 @@ class SubTerTraining(VisionDataset):
|
|||||||
train=False,
|
train=False,
|
||||||
split=0.7,
|
split=0.7,
|
||||||
seed=0,
|
seed=0,
|
||||||
|
semi_targets_given=None,
|
||||||
|
only_use_given_semi_targets_for_evaluation=False,
|
||||||
):
|
):
|
||||||
super(SubTerTraining, self).__init__(
|
super(SubTerTraining, self).__init__(
|
||||||
root, transforms, transform, target_transform
|
root, transforms, transform, target_transform
|
||||||
@@ -94,73 +220,120 @@ class SubTerTraining(VisionDataset):
|
|||||||
|
|
||||||
experiments_data = []
|
experiments_data = []
|
||||||
experiments_targets = []
|
experiments_targets = []
|
||||||
validation_files = []
|
experiments_semi_targets = []
|
||||||
|
# validation_files = []
|
||||||
experiment_files = []
|
experiment_files = []
|
||||||
|
experiment_frame_ids = []
|
||||||
|
experiment_file_ids = []
|
||||||
|
file_names = {}
|
||||||
|
|
||||||
for experiment_file in Path(root).iterdir():
|
for file_idx, experiment_file in enumerate(sorted(Path(root).iterdir())):
|
||||||
if experiment_file.is_dir() and experiment_file.name == "validation":
|
# if experiment_file.is_dir() and experiment_file.name == "validation":
|
||||||
for validation_file in experiment_file.iterdir():
|
# for validation_file in experiment_file.iterdir():
|
||||||
if validation_file.suffix != ".npy":
|
# if validation_file.suffix != ".npy":
|
||||||
continue
|
# continue
|
||||||
validation_files.append(experiment_file)
|
# validation_files.append(experiment_file)
|
||||||
if experiment_file.suffix != ".npy":
|
if experiment_file.suffix != ".npy":
|
||||||
continue
|
continue
|
||||||
|
file_names[file_idx] = experiment_file.name
|
||||||
experiment_files.append(experiment_file)
|
experiment_files.append(experiment_file)
|
||||||
experiment_data = np.load(experiment_file)
|
experiment_data = np.load(experiment_file)
|
||||||
# experiment_data = np.lib.format.open_memmap(experiment_file, mode='r+')
|
|
||||||
experiment_targets = (
|
experiment_targets = (
|
||||||
np.ones(experiment_data.shape[0], dtype=np.int8)
|
np.ones(experiment_data.shape[0], dtype=np.int8)
|
||||||
if "smoke" in experiment_file.name
|
if "smoke" in experiment_file.name
|
||||||
else np.zeros(experiment_data.shape[0], dtype=np.int8)
|
else np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||||
)
|
)
|
||||||
|
# experiment_data = np.lib.format.open_memmap(experiment_file, mode='r+')
|
||||||
|
experiment_semi_targets = np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||||
|
if "smoke" not in experiment_file.name:
|
||||||
|
experiment_semi_targets = np.ones(
|
||||||
|
experiment_data.shape[0], dtype=np.int8
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if semi_targets_given:
|
||||||
|
if experiment_file.name in semi_targets_given:
|
||||||
|
semi_target_begin_frame, semi_target_end_frame = (
|
||||||
|
semi_targets_given[experiment_file.name]
|
||||||
|
)
|
||||||
|
experiment_semi_targets[
|
||||||
|
semi_target_begin_frame:semi_target_end_frame
|
||||||
|
] = -1
|
||||||
|
else:
|
||||||
|
experiment_semi_targets = (
|
||||||
|
np.ones(experiment_data.shape[0], dtype=np.int8) * -1
|
||||||
|
)
|
||||||
|
|
||||||
|
experiment_file_ids.append(
|
||||||
|
np.full(experiment_data.shape[0], file_idx, dtype=np.int8)
|
||||||
|
)
|
||||||
|
experiment_frame_ids.append(
|
||||||
|
np.arange(experiment_data.shape[0], dtype=np.int32)
|
||||||
|
)
|
||||||
experiments_data.append(experiment_data)
|
experiments_data.append(experiment_data)
|
||||||
experiments_targets.append(experiment_targets)
|
experiments_targets.append(experiment_targets)
|
||||||
|
experiments_semi_targets.append(experiment_semi_targets)
|
||||||
|
|
||||||
filtered_validation_files = []
|
# filtered_validation_files = []
|
||||||
for validation_file in validation_files:
|
# for validation_file in validation_files:
|
||||||
validation_file_name = validation_file.name
|
# validation_file_name = validation_file.name
|
||||||
file_exists_in_experiments = any(
|
# file_exists_in_experiments = any(
|
||||||
experiment_file.name == validation_file_name
|
# experiment_file.name == validation_file_name
|
||||||
for experiment_file in experiment_files
|
# for experiment_file in experiment_files
|
||||||
)
|
# )
|
||||||
if not file_exists_in_experiments:
|
# if not file_exists_in_experiments:
|
||||||
filtered_validation_files.append(validation_file)
|
# filtered_validation_files.append(validation_file)
|
||||||
validation_files = filtered_validation_files
|
# validation_files = filtered_validation_files
|
||||||
|
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train/Test experiments: {[experiment_file.name for experiment_file in experiment_files]}"
|
f"Train/Test experiments: {[experiment_file.name for experiment_file in experiment_files]}"
|
||||||
)
|
)
|
||||||
logger.info(
|
# logger.info(
|
||||||
f"Validation experiments: {[validation_file.name for validation_file in validation_files]}"
|
# f"Validation experiments: {[validation_file.name for validation_file in validation_files]}"
|
||||||
)
|
# )
|
||||||
|
|
||||||
lidar_projections = np.concatenate(experiments_data)
|
lidar_projections = np.concatenate(experiments_data)
|
||||||
smoke_presence = np.concatenate(experiments_targets)
|
smoke_presence = np.concatenate(experiments_targets)
|
||||||
|
semi_targets = np.concatenate(experiments_semi_targets)
|
||||||
|
file_ids = np.concatenate(experiment_file_ids)
|
||||||
|
frame_ids = np.concatenate(experiment_frame_ids)
|
||||||
|
self.file_names = file_names
|
||||||
|
|
||||||
np.random.seed(seed)
|
np.random.seed(seed)
|
||||||
|
|
||||||
shuffled_indices = np.random.permutation(lidar_projections.shape[0])
|
shuffled_indices = np.random.permutation(lidar_projections.shape[0])
|
||||||
shuffled_lidar_projections = lidar_projections[shuffled_indices]
|
shuffled_lidar_projections = lidar_projections[shuffled_indices]
|
||||||
shuffled_smoke_presence = smoke_presence[shuffled_indices]
|
shuffled_smoke_presence = smoke_presence[shuffled_indices]
|
||||||
|
shuffled_file_ids = file_ids[shuffled_indices]
|
||||||
|
shuffled_frame_ids = frame_ids[shuffled_indices]
|
||||||
|
shuffled_semis = semi_targets[shuffled_indices]
|
||||||
|
|
||||||
split_idx = int(split * shuffled_lidar_projections.shape[0])
|
split_idx = int(split * shuffled_lidar_projections.shape[0])
|
||||||
|
|
||||||
if train:
|
if train:
|
||||||
self.data = shuffled_lidar_projections[:split_idx]
|
self.data = shuffled_lidar_projections[:split_idx]
|
||||||
self.targets = shuffled_smoke_presence[:split_idx]
|
self.targets = shuffled_smoke_presence[:split_idx]
|
||||||
|
semi_targets = shuffled_semis[:split_idx]
|
||||||
|
self.shuffled_file_ids = shuffled_file_ids[:split_idx]
|
||||||
|
self.shuffled_frame_ids = shuffled_frame_ids[:split_idx]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.data = shuffled_lidar_projections[split_idx:]
|
self.data = shuffled_lidar_projections[split_idx:]
|
||||||
self.targets = shuffled_smoke_presence[split_idx:]
|
self.targets = shuffled_smoke_presence[split_idx:]
|
||||||
|
semi_targets = shuffled_semis[split_idx:]
|
||||||
|
self.shuffled_file_ids = shuffled_file_ids[split_idx:]
|
||||||
|
self.shuffled_frame_ids = shuffled_frame_ids[split_idx:]
|
||||||
|
|
||||||
self.data = np.nan_to_num(self.data)
|
self.data = np.nan_to_num(self.data)
|
||||||
|
|
||||||
self.data = torch.tensor(self.data)
|
self.data = torch.tensor(self.data)
|
||||||
self.targets = torch.tensor(self.targets, dtype=torch.int8)
|
self.targets = torch.tensor(self.targets, dtype=torch.int8)
|
||||||
|
|
||||||
self.semi_targets = torch.zeros_like(self.targets, dtype=torch.int8)
|
if semi_targets_given is not None:
|
||||||
|
self.semi_targets = torch.tensor(semi_targets, dtype=torch.int8)
|
||||||
|
else:
|
||||||
|
self.semi_targets = torch.zeros_like(self.targets, dtype=torch.int8)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.data)
|
return len(self.data)
|
||||||
@@ -173,10 +346,12 @@ class SubTerTraining(VisionDataset):
|
|||||||
Returns:
|
Returns:
|
||||||
tuple: (image, target, semi_target, index)
|
tuple: (image, target, semi_target, index)
|
||||||
"""
|
"""
|
||||||
img, target, semi_target = (
|
img, target, semi_target, file_id, frame_id = (
|
||||||
self.data[index],
|
self.data[index],
|
||||||
int(self.targets[index]),
|
int(self.targets[index]),
|
||||||
int(self.semi_targets[index]),
|
int(self.semi_targets[index]),
|
||||||
|
int(self.shuffled_file_ids[index]),
|
||||||
|
int(self.shuffled_frame_ids[index]),
|
||||||
)
|
)
|
||||||
|
|
||||||
# doing this so that it is consistent with all other datasets
|
# doing this so that it is consistent with all other datasets
|
||||||
@@ -189,7 +364,10 @@ class SubTerTraining(VisionDataset):
|
|||||||
if self.target_transform is not None:
|
if self.target_transform is not None:
|
||||||
target = self.target_transform(target)
|
target = self.target_transform(target)
|
||||||
|
|
||||||
return img, target, semi_target, index
|
return img, target, semi_target, index, (file_id, frame_id)
|
||||||
|
|
||||||
|
def get_file_name_from_idx(self, idx: int):
|
||||||
|
return self.file_names[idx]
|
||||||
|
|
||||||
|
|
||||||
class SubTerInference(VisionDataset):
|
class SubTerInference(VisionDataset):
|
||||||
@@ -235,3 +413,191 @@ class SubTerInference(VisionDataset):
|
|||||||
img = self.transform(img)
|
img = self.transform(img)
|
||||||
|
|
||||||
return img, index
|
return img, index
|
||||||
|
|
||||||
|
|
||||||
|
class SubTerTrainingSelective(VisionDataset):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
root: str,
|
||||||
|
transforms: Optional[Callable] = None,
|
||||||
|
transform: Optional[Callable] = None,
|
||||||
|
target_transform: Optional[Callable] = None,
|
||||||
|
train=False,
|
||||||
|
num_known_outlier=0,
|
||||||
|
seed=0,
|
||||||
|
semi_targets_given=None,
|
||||||
|
ratio_test_normal_to_anomalous=3,
|
||||||
|
):
|
||||||
|
super(SubTerTrainingSelective, self).__init__(
|
||||||
|
root, transforms, transform, target_transform
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
if semi_targets_given is None:
|
||||||
|
raise ValueError(
|
||||||
|
"semi_targets_given must be provided for selective training"
|
||||||
|
)
|
||||||
|
|
||||||
|
experiments_data = []
|
||||||
|
experiments_targets = []
|
||||||
|
experiments_semi_targets = []
|
||||||
|
# validation_files = []
|
||||||
|
experiment_files = []
|
||||||
|
experiment_frame_ids = []
|
||||||
|
experiment_file_ids = []
|
||||||
|
file_names = {}
|
||||||
|
|
||||||
|
for file_idx, experiment_file in enumerate(sorted(Path(root).iterdir())):
|
||||||
|
if experiment_file.suffix != ".npy":
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_names[file_idx] = experiment_file.name
|
||||||
|
experiment_files.append(experiment_file)
|
||||||
|
experiment_data = np.load(experiment_file)
|
||||||
|
|
||||||
|
experiment_targets = (
|
||||||
|
np.ones(experiment_data.shape[0], dtype=np.int8)
|
||||||
|
if "smoke" in experiment_file.name
|
||||||
|
else np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||||
|
)
|
||||||
|
|
||||||
|
experiment_semi_targets = np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||||
|
if "smoke" not in experiment_file.name:
|
||||||
|
experiment_semi_targets = np.ones(
|
||||||
|
experiment_data.shape[0], dtype=np.int8
|
||||||
|
)
|
||||||
|
elif experiment_file.name in semi_targets_given:
|
||||||
|
semi_target_begin_frame, semi_target_end_frame = semi_targets_given[
|
||||||
|
experiment_file.name
|
||||||
|
]
|
||||||
|
experiment_semi_targets[
|
||||||
|
semi_target_begin_frame:semi_target_end_frame
|
||||||
|
] = -1
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"smoke experiment not in given semi_targets. required for selective training"
|
||||||
|
)
|
||||||
|
|
||||||
|
experiment_file_ids.append(
|
||||||
|
np.full(experiment_data.shape[0], file_idx, dtype=np.int8)
|
||||||
|
)
|
||||||
|
experiment_frame_ids.append(
|
||||||
|
np.arange(experiment_data.shape[0], dtype=np.int32)
|
||||||
|
)
|
||||||
|
experiments_data.append(experiment_data)
|
||||||
|
experiments_targets.append(experiment_targets)
|
||||||
|
experiments_semi_targets.append(experiment_semi_targets)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Train/Test experiments: {[experiment_file.name for experiment_file in experiment_files]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
lidar_projections = np.concatenate(experiments_data)
|
||||||
|
smoke_presence = np.concatenate(experiments_targets)
|
||||||
|
semi_targets = np.concatenate(experiments_semi_targets)
|
||||||
|
file_ids = np.concatenate(experiment_file_ids)
|
||||||
|
frame_ids = np.concatenate(experiment_frame_ids)
|
||||||
|
self.file_names = file_names
|
||||||
|
|
||||||
|
np.random.seed(seed)
|
||||||
|
|
||||||
|
shuffled_indices = np.random.permutation(lidar_projections.shape[0])
|
||||||
|
shuffled_lidar_projections = lidar_projections[shuffled_indices]
|
||||||
|
shuffled_smoke_presence = smoke_presence[shuffled_indices]
|
||||||
|
shuffled_file_ids = file_ids[shuffled_indices]
|
||||||
|
shuffled_frame_ids = frame_ids[shuffled_indices]
|
||||||
|
shuffled_semis = semi_targets[shuffled_indices]
|
||||||
|
|
||||||
|
# check if there are enough known normal and known outlier samples
|
||||||
|
outlier_indices = np.where(shuffled_semis == -1)[0]
|
||||||
|
normal_indices = np.where(shuffled_semis == 1)[0]
|
||||||
|
|
||||||
|
if len(outlier_indices) < num_known_outlier:
|
||||||
|
raise ValueError(
|
||||||
|
f"Not enough known outliers in dataset. Required: {num_known_outlier}, Found: {len(outlier_indices)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# randomly select known normal and outlier samples
|
||||||
|
keep_outlier_indices = np.random.choice(
|
||||||
|
outlier_indices, size=num_known_outlier, replace=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# put outliers that are not kept into test set and the same number of normal samples aside for testing
|
||||||
|
test_outlier_indices = np.setdiff1d(outlier_indices, keep_outlier_indices)
|
||||||
|
num_test_outliers = len(test_outlier_indices)
|
||||||
|
test_normal_indices = np.random.choice(
|
||||||
|
normal_indices,
|
||||||
|
size=num_test_outliers * ratio_test_normal_to_anomalous,
|
||||||
|
replace=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# combine test indices
|
||||||
|
test_indices = np.concatenate([test_outlier_indices, test_normal_indices])
|
||||||
|
|
||||||
|
# training indices are the rest
|
||||||
|
train_indices = np.setdiff1d(np.arange(len(shuffled_semis)), test_indices)
|
||||||
|
|
||||||
|
if train:
|
||||||
|
self.data = shuffled_lidar_projections[train_indices]
|
||||||
|
self.targets = shuffled_smoke_presence[train_indices]
|
||||||
|
semi_targets = shuffled_semis[train_indices]
|
||||||
|
self.shuffled_file_ids = shuffled_file_ids[train_indices]
|
||||||
|
self.shuffled_frame_ids = shuffled_frame_ids[train_indices]
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.data = shuffled_lidar_projections[test_indices]
|
||||||
|
self.targets = shuffled_smoke_presence[test_indices]
|
||||||
|
semi_targets = shuffled_semis[test_indices]
|
||||||
|
self.shuffled_file_ids = shuffled_file_ids[test_indices]
|
||||||
|
self.shuffled_frame_ids = shuffled_frame_ids[test_indices]
|
||||||
|
|
||||||
|
self.data = np.nan_to_num(self.data)
|
||||||
|
|
||||||
|
self.data = torch.tensor(self.data)
|
||||||
|
self.targets = torch.tensor(self.targets, dtype=torch.int8)
|
||||||
|
self.semi_targets = torch.tensor(semi_targets, dtype=torch.int8)
|
||||||
|
|
||||||
|
# log some stats to ensure the data is loaded correctly
|
||||||
|
if train:
|
||||||
|
logger.info(
|
||||||
|
f"Training set: {len(self.data)} samples, {sum(self.semi_targets == -1)} semi-supervised samples"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
f"Test set: {len(self.data)} samples, {sum(self.semi_targets == -1)} semi-supervised samples"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.data)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
"""Override the original method of the MNIST class.
|
||||||
|
Args:
|
||||||
|
index (int): Index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (image, target, semi_target, index)
|
||||||
|
"""
|
||||||
|
img, target, semi_target, file_id, frame_id = (
|
||||||
|
self.data[index],
|
||||||
|
int(self.targets[index]),
|
||||||
|
int(self.semi_targets[index]),
|
||||||
|
int(self.shuffled_file_ids[index]),
|
||||||
|
int(self.shuffled_frame_ids[index]),
|
||||||
|
)
|
||||||
|
|
||||||
|
# doing this so that it is consistent with all other datasets
|
||||||
|
# to return a PIL Image
|
||||||
|
img = Image.fromarray(img.numpy(), mode="F")
|
||||||
|
|
||||||
|
if self.transform is not None:
|
||||||
|
img = self.transform(img)
|
||||||
|
|
||||||
|
if self.target_transform is not None:
|
||||||
|
target = self.target_transform(target)
|
||||||
|
|
||||||
|
return img, target, semi_target, index, (file_id, frame_id)
|
||||||
|
|
||||||
|
def get_file_name_from_idx(self, idx: int):
|
||||||
|
return self.file_names[idx]
|
||||||
|
|||||||
@@ -5,6 +5,9 @@ from pathlib import Path
|
|||||||
import click
|
import click
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from baselines.isoforest import IsoForest
|
||||||
|
from baselines.ocsvm import OCSVM
|
||||||
from datasets.main import load_dataset
|
from datasets.main import load_dataset
|
||||||
from DeepSAD import DeepSAD
|
from DeepSAD import DeepSAD
|
||||||
from utils.config import Config
|
from utils.config import Config
|
||||||
@@ -64,6 +67,30 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
|||||||
)
|
)
|
||||||
@click.argument("xp_path", type=click.Path(exists=True))
|
@click.argument("xp_path", type=click.Path(exists=True))
|
||||||
@click.argument("data_path", type=click.Path(exists=True))
|
@click.argument("data_path", type=click.Path(exists=True))
|
||||||
|
@click.option(
|
||||||
|
"--k_fold",
|
||||||
|
type=bool,
|
||||||
|
default=False,
|
||||||
|
help="Use k-fold cross-validation for training (default: False).",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--k_fold_num",
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help="Number of folds for k-fold cross-validation (default: 5).",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--num_known_normal",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Number of max known normal samples (semi-supervised-setting) (default: 0).",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--num_known_outlier",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Number of max known outlier samples (semi-supervised-setting) (default: 0).",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--load_config",
|
"--load_config",
|
||||||
type=click.Path(exists=True),
|
type=click.Path(exists=True),
|
||||||
@@ -214,12 +241,52 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
|||||||
"If 1, outlier class as specified in --known_outlier_class option."
|
"If 1, outlier class as specified in --known_outlier_class option."
|
||||||
"If > 1, the specified number of outlier classes will be sampled at random.",
|
"If > 1, the specified number of outlier classes will be sampled at random.",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--ocsvm_kernel",
|
||||||
|
type=click.Choice(["rbf", "linear", "poly"]),
|
||||||
|
default="rbf",
|
||||||
|
help="Kernel for the OC-SVM",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--ocsvm_nu",
|
||||||
|
type=float,
|
||||||
|
default=0.1,
|
||||||
|
help="OC-SVM hyperparameter nu (must be 0 < nu <= 1).",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--isoforest_n_estimators",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="Set the number of base estimators in the ensemble (default: 100).",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--isoforest_max_samples",
|
||||||
|
type=int,
|
||||||
|
default=256,
|
||||||
|
help="Set the number of samples drawn to train each base estimator (default: 256).",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--isoforest_contamination",
|
||||||
|
type=float,
|
||||||
|
default=0.1,
|
||||||
|
help="Expected fraction of anomalies in the training set. (default: 0.1).",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--isoforest_n_jobs_model",
|
||||||
|
type=int,
|
||||||
|
default=-1,
|
||||||
|
help="Number of jobs for model training.",
|
||||||
|
)
|
||||||
def main(
|
def main(
|
||||||
action,
|
action,
|
||||||
dataset_name,
|
dataset_name,
|
||||||
net_name,
|
net_name,
|
||||||
xp_path,
|
xp_path,
|
||||||
data_path,
|
data_path,
|
||||||
|
k_fold,
|
||||||
|
k_fold_num,
|
||||||
|
num_known_normal,
|
||||||
|
num_known_outlier,
|
||||||
load_config,
|
load_config,
|
||||||
load_model,
|
load_model,
|
||||||
eta,
|
eta,
|
||||||
@@ -246,6 +313,12 @@ def main(
|
|||||||
normal_class,
|
normal_class,
|
||||||
known_outlier_class,
|
known_outlier_class,
|
||||||
n_known_outlier_classes,
|
n_known_outlier_classes,
|
||||||
|
ocsvm_kernel,
|
||||||
|
ocsvm_nu,
|
||||||
|
isoforest_n_estimators,
|
||||||
|
isoforest_max_samples,
|
||||||
|
isoforest_contamination,
|
||||||
|
isoforest_n_jobs_model,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Deep SAD, a method for deep semi-supervised anomaly detection.
|
Deep SAD, a method for deep semi-supervised anomaly detection.
|
||||||
@@ -318,6 +391,7 @@ def main(
|
|||||||
|
|
||||||
if action == "train":
|
if action == "train":
|
||||||
# Load data
|
# Load data
|
||||||
|
# TODO: pass num of folds
|
||||||
dataset = load_dataset(
|
dataset = load_dataset(
|
||||||
dataset_name,
|
dataset_name,
|
||||||
data_path,
|
data_path,
|
||||||
@@ -328,135 +402,297 @@ def main(
|
|||||||
ratio_known_outlier,
|
ratio_known_outlier,
|
||||||
ratio_pollution,
|
ratio_pollution,
|
||||||
random_state=np.random.RandomState(cfg.settings["seed"]),
|
random_state=np.random.RandomState(cfg.settings["seed"]),
|
||||||
|
k_fold=k_fold,
|
||||||
|
num_known_normal=num_known_normal,
|
||||||
|
num_known_outlier=num_known_outlier,
|
||||||
)
|
)
|
||||||
# Log random sample of known anomaly classes if more than 1 class
|
# Log random sample of known anomaly classes if more than 1 class
|
||||||
if n_known_outlier_classes > 1:
|
if n_known_outlier_classes > 1:
|
||||||
logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,))
|
logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,))
|
||||||
|
|
||||||
# Initialize DeepSAD model and set neural network phi
|
train_passes = range(k_fold_num) if k_fold else [None]
|
||||||
deepSAD = DeepSAD(cfg.settings["eta"])
|
|
||||||
deepSAD.set_network(net_name)
|
|
||||||
|
|
||||||
# If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
|
train_isoforest = True
|
||||||
if load_model:
|
train_ocsvm = False
|
||||||
deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
|
train_deepsad = True
|
||||||
logger.info("Loading model from %s." % load_model)
|
|
||||||
|
|
||||||
logger.info("Pretraining: %s" % pretrain)
|
for fold_idx in train_passes:
|
||||||
if pretrain:
|
if fold_idx is None:
|
||||||
# Log pretraining details
|
logger.info("Single training without k-fold")
|
||||||
logger.info("Pretraining optimizer: %s" % cfg.settings["ae_optimizer_name"])
|
else:
|
||||||
logger.info("Pretraining learning rate: %g" % cfg.settings["ae_lr"])
|
logger.info(f"Fold {fold_idx + 1}/{k_fold_num}")
|
||||||
logger.info("Pretraining epochs: %d" % cfg.settings["ae_n_epochs"])
|
|
||||||
|
# Initialize OC-SVM model
|
||||||
|
if train_ocsvm:
|
||||||
|
ocsvm = OCSVM(kernel=ocsvm_kernel, nu=ocsvm_nu, hybrid=False)
|
||||||
|
|
||||||
|
# Initialize Isolation Forest model
|
||||||
|
if train_isoforest:
|
||||||
|
Isoforest = IsoForest(
|
||||||
|
hybrid=False,
|
||||||
|
n_estimators=isoforest_n_estimators,
|
||||||
|
max_samples=isoforest_max_samples,
|
||||||
|
contamination=isoforest_contamination,
|
||||||
|
n_jobs=isoforest_n_jobs_model,
|
||||||
|
seed=seed,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize DeepSAD model and set neural network phi
|
||||||
|
if train_deepsad:
|
||||||
|
deepSAD = DeepSAD(cfg.settings["eta"])
|
||||||
|
deepSAD.set_network(net_name)
|
||||||
|
|
||||||
|
# If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
|
||||||
|
if train_deepsad and load_model:
|
||||||
|
deepSAD.load_model(
|
||||||
|
model_path=load_model, load_ae=True, map_location=device
|
||||||
|
)
|
||||||
|
logger.info("Loading model from %s." % load_model)
|
||||||
|
|
||||||
|
logger.info("Pretraining: %s" % pretrain)
|
||||||
|
if train_deepsad and pretrain:
|
||||||
|
# Log pretraining details
|
||||||
|
logger.info(
|
||||||
|
"Pretraining optimizer: %s" % cfg.settings["ae_optimizer_name"]
|
||||||
|
)
|
||||||
|
logger.info("Pretraining learning rate: %g" % cfg.settings["ae_lr"])
|
||||||
|
logger.info("Pretraining epochs: %d" % cfg.settings["ae_n_epochs"])
|
||||||
|
logger.info(
|
||||||
|
"Pretraining learning rate scheduler milestones: %s"
|
||||||
|
% (cfg.settings["ae_lr_milestone"],)
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Pretraining batch size: %d" % cfg.settings["ae_batch_size"]
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Pretraining weight decay: %g" % cfg.settings["ae_weight_decay"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pretrain model on dataset (via autoencoder)
|
||||||
|
deepSAD.pretrain(
|
||||||
|
dataset,
|
||||||
|
optimizer_name=cfg.settings["ae_optimizer_name"],
|
||||||
|
lr=cfg.settings["ae_lr"],
|
||||||
|
n_epochs=cfg.settings["ae_n_epochs"],
|
||||||
|
lr_milestones=cfg.settings["ae_lr_milestone"],
|
||||||
|
batch_size=cfg.settings["ae_batch_size"],
|
||||||
|
weight_decay=cfg.settings["ae_weight_decay"],
|
||||||
|
device=device,
|
||||||
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
|
k_fold_idx=fold_idx,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save pretraining results
|
||||||
|
if fold_idx is None:
|
||||||
|
deepSAD.save_ae_results(export_json=xp_path + "/ae_results.json")
|
||||||
|
else:
|
||||||
|
deepSAD.save_ae_results(
|
||||||
|
export_json=xp_path + f"/ae_results_{fold_idx}.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log training details
|
||||||
|
logger.info("Training optimizer: %s" % cfg.settings["optimizer_name"])
|
||||||
|
logger.info("Training learning rate: %g" % cfg.settings["lr"])
|
||||||
|
logger.info("Training epochs: %d" % cfg.settings["n_epochs"])
|
||||||
logger.info(
|
logger.info(
|
||||||
"Pretraining learning rate scheduler milestones: %s"
|
"Training learning rate scheduler milestones: %s"
|
||||||
% (cfg.settings["ae_lr_milestone"],)
|
% (cfg.settings["lr_milestone"],)
|
||||||
)
|
|
||||||
logger.info("Pretraining batch size: %d" % cfg.settings["ae_batch_size"])
|
|
||||||
logger.info(
|
|
||||||
"Pretraining weight decay: %g" % cfg.settings["ae_weight_decay"]
|
|
||||||
)
|
)
|
||||||
|
logger.info("Training batch size: %d" % cfg.settings["batch_size"])
|
||||||
|
logger.info("Training weight decay: %g" % cfg.settings["weight_decay"])
|
||||||
|
|
||||||
# Pretrain model on dataset (via autoencoder)
|
# Train model on dataset
|
||||||
deepSAD.pretrain(
|
if train_deepsad:
|
||||||
dataset,
|
deepSAD.train(
|
||||||
optimizer_name=cfg.settings["ae_optimizer_name"],
|
dataset,
|
||||||
lr=cfg.settings["ae_lr"],
|
optimizer_name=cfg.settings["optimizer_name"],
|
||||||
n_epochs=cfg.settings["ae_n_epochs"],
|
lr=cfg.settings["lr"],
|
||||||
lr_milestones=cfg.settings["ae_lr_milestone"],
|
n_epochs=cfg.settings["n_epochs"],
|
||||||
batch_size=cfg.settings["ae_batch_size"],
|
lr_milestones=cfg.settings["lr_milestone"],
|
||||||
weight_decay=cfg.settings["ae_weight_decay"],
|
batch_size=cfg.settings["batch_size"],
|
||||||
device=device,
|
weight_decay=cfg.settings["weight_decay"],
|
||||||
n_jobs_dataloader=n_jobs_dataloader,
|
device=device,
|
||||||
)
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
|
k_fold_idx=fold_idx,
|
||||||
# Save pretraining results
|
|
||||||
deepSAD.save_ae_results(export_json=xp_path + "/ae_results.json")
|
|
||||||
|
|
||||||
# Log training details
|
|
||||||
logger.info("Training optimizer: %s" % cfg.settings["optimizer_name"])
|
|
||||||
logger.info("Training learning rate: %g" % cfg.settings["lr"])
|
|
||||||
logger.info("Training epochs: %d" % cfg.settings["n_epochs"])
|
|
||||||
logger.info(
|
|
||||||
"Training learning rate scheduler milestones: %s"
|
|
||||||
% (cfg.settings["lr_milestone"],)
|
|
||||||
)
|
|
||||||
logger.info("Training batch size: %d" % cfg.settings["batch_size"])
|
|
||||||
logger.info("Training weight decay: %g" % cfg.settings["weight_decay"])
|
|
||||||
|
|
||||||
# Train model on dataset
|
|
||||||
deepSAD.train(
|
|
||||||
dataset,
|
|
||||||
optimizer_name=cfg.settings["optimizer_name"],
|
|
||||||
lr=cfg.settings["lr"],
|
|
||||||
n_epochs=cfg.settings["n_epochs"],
|
|
||||||
lr_milestones=cfg.settings["lr_milestone"],
|
|
||||||
batch_size=cfg.settings["batch_size"],
|
|
||||||
weight_decay=cfg.settings["weight_decay"],
|
|
||||||
device=device,
|
|
||||||
n_jobs_dataloader=n_jobs_dataloader,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test model
|
|
||||||
deepSAD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)
|
|
||||||
|
|
||||||
# Save results, model, and configuration
|
|
||||||
deepSAD.save_results(export_json=xp_path + "/results.json")
|
|
||||||
deepSAD.save_model(export_model=xp_path + "/model.tar")
|
|
||||||
cfg.save_config(export_json=xp_path + "/config.json")
|
|
||||||
|
|
||||||
# Plot most anomalous and most normal test samples
|
|
||||||
indices, labels, scores = zip(*deepSAD.results["test_scores"])
|
|
||||||
indices, labels, scores = np.array(indices), np.array(labels), np.array(scores)
|
|
||||||
idx_all_sorted = indices[np.argsort(scores)] # from lowest to highest score
|
|
||||||
idx_normal_sorted = indices[labels == 0][
|
|
||||||
np.argsort(scores[labels == 0])
|
|
||||||
] # from lowest to highest score
|
|
||||||
|
|
||||||
if dataset_name in ("mnist", "fmnist", "cifar10", "elpv"):
|
|
||||||
if dataset_name in ("mnist", "fmnist", "elpv"):
|
|
||||||
X_all_low = dataset.test_set.data[idx_all_sorted[:32], ...].unsqueeze(1)
|
|
||||||
X_all_high = dataset.test_set.data[idx_all_sorted[-32:], ...].unsqueeze(
|
|
||||||
1
|
|
||||||
)
|
)
|
||||||
X_normal_low = dataset.test_set.data[
|
|
||||||
idx_normal_sorted[:32], ...
|
|
||||||
].unsqueeze(1)
|
|
||||||
X_normal_high = dataset.test_set.data[
|
|
||||||
idx_normal_sorted[-32:], ...
|
|
||||||
].unsqueeze(1)
|
|
||||||
|
|
||||||
if dataset_name == "cifar10":
|
# Train model on dataset
|
||||||
X_all_low = torch.tensor(
|
if train_ocsvm:
|
||||||
np.transpose(
|
ocsvm.train(
|
||||||
dataset.test_set.data[idx_all_sorted[:32], ...], (0, 3, 1, 2)
|
dataset,
|
||||||
|
device=device,
|
||||||
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
|
k_fold_idx=fold_idx,
|
||||||
|
batch_size=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Train model on dataset
|
||||||
|
if train_isoforest:
|
||||||
|
Isoforest.train(
|
||||||
|
dataset,
|
||||||
|
device=device,
|
||||||
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
|
k_fold_idx=fold_idx,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test model
|
||||||
|
if train_deepsad:
|
||||||
|
deepSAD.test(
|
||||||
|
dataset,
|
||||||
|
device=device,
|
||||||
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
|
k_fold_idx=fold_idx,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test model
|
||||||
|
if train_ocsvm:
|
||||||
|
ocsvm.test(
|
||||||
|
dataset,
|
||||||
|
device=device,
|
||||||
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
|
k_fold_idx=fold_idx,
|
||||||
|
batch_size=8,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test model
|
||||||
|
if train_isoforest:
|
||||||
|
Isoforest.test(
|
||||||
|
dataset,
|
||||||
|
device=device,
|
||||||
|
n_jobs_dataloader=n_jobs_dataloader,
|
||||||
|
k_fold_idx=fold_idx,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save results, model, and configuration
|
||||||
|
if fold_idx is None:
|
||||||
|
if train_deepsad:
|
||||||
|
deepSAD.save_results(export_pkl=xp_path + "/results.pkl")
|
||||||
|
deepSAD.save_model(export_model=xp_path + "/model.tar")
|
||||||
|
if train_ocsvm:
|
||||||
|
ocsvm.save_results(export_pkl=xp_path + "/results_ocsvm.pkl")
|
||||||
|
if train_isoforest:
|
||||||
|
Isoforest.save_results(
|
||||||
|
export_pkl=xp_path + "/results_isoforest.pkl"
|
||||||
)
|
)
|
||||||
)
|
else:
|
||||||
X_all_high = torch.tensor(
|
if train_deepsad:
|
||||||
np.transpose(
|
deepSAD.save_results(
|
||||||
dataset.test_set.data[idx_all_sorted[-32:], ...], (0, 3, 1, 2)
|
export_pkl=xp_path + f"/results_{fold_idx}.pkl"
|
||||||
)
|
)
|
||||||
)
|
deepSAD.save_model(export_model=xp_path + f"/model_{fold_idx}.tar")
|
||||||
X_normal_low = torch.tensor(
|
if train_ocsvm:
|
||||||
np.transpose(
|
ocsvm.save_results(
|
||||||
dataset.test_set.data[idx_normal_sorted[:32], ...], (0, 3, 1, 2)
|
export_pkl=xp_path + f"/results_ocsvm_{fold_idx}.pkl"
|
||||||
)
|
)
|
||||||
)
|
if train_isoforest:
|
||||||
X_normal_high = torch.tensor(
|
Isoforest.save_results(
|
||||||
np.transpose(
|
export_pkl=xp_path + f"/results_isoforest_{fold_idx}.pkl"
|
||||||
dataset.test_set.data[idx_normal_sorted[-32:], ...],
|
|
||||||
(0, 3, 1, 2),
|
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
plot_images_grid(X_all_low, export_img=xp_path + "/all_low", padding=2)
|
cfg.save_config(export_json=xp_path + "/config.json")
|
||||||
plot_images_grid(X_all_high, export_img=xp_path + "/all_high", padding=2)
|
|
||||||
plot_images_grid(
|
# Plot most anomalous and most normal test samples
|
||||||
X_normal_low, export_img=xp_path + "/normals_low", padding=2
|
if train_deepsad:
|
||||||
)
|
indices, labels, scores = zip(*deepSAD.results["test_scores"])
|
||||||
plot_images_grid(
|
indices, labels, scores = (
|
||||||
X_normal_high, export_img=xp_path + "/normals_high", padding=2
|
np.array(indices),
|
||||||
)
|
np.array(labels),
|
||||||
|
np.array(scores),
|
||||||
|
)
|
||||||
|
idx_all_sorted = indices[
|
||||||
|
np.argsort(scores)
|
||||||
|
] # from lowest to highest score
|
||||||
|
idx_normal_sorted = indices[labels == 0][
|
||||||
|
np.argsort(scores[labels == 0])
|
||||||
|
] # from lowest to highest score
|
||||||
|
|
||||||
|
if dataset_name in (
|
||||||
|
"mnist",
|
||||||
|
"fmnist",
|
||||||
|
"cifar10",
|
||||||
|
"elpv",
|
||||||
|
):
|
||||||
|
if dataset_name in (
|
||||||
|
"mnist",
|
||||||
|
"fmnist",
|
||||||
|
"elpv",
|
||||||
|
):
|
||||||
|
X_all_low = dataset.test_set.data[
|
||||||
|
idx_all_sorted[:32], ...
|
||||||
|
].unsqueeze(1)
|
||||||
|
X_all_high = dataset.test_set.data[
|
||||||
|
idx_all_sorted[-32:], ...
|
||||||
|
].unsqueeze(1)
|
||||||
|
X_normal_low = dataset.test_set.data[
|
||||||
|
idx_normal_sorted[:32], ...
|
||||||
|
].unsqueeze(1)
|
||||||
|
X_normal_high = dataset.test_set.data[
|
||||||
|
idx_normal_sorted[-32:], ...
|
||||||
|
].unsqueeze(1)
|
||||||
|
|
||||||
|
if dataset_name == "cifar10":
|
||||||
|
X_all_low = torch.tensor(
|
||||||
|
np.transpose(
|
||||||
|
dataset.test_set.data[idx_all_sorted[:32], ...],
|
||||||
|
(0, 3, 1, 2),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
X_all_high = torch.tensor(
|
||||||
|
np.transpose(
|
||||||
|
dataset.test_set.data[idx_all_sorted[-32:], ...],
|
||||||
|
(0, 3, 1, 2),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
X_normal_low = torch.tensor(
|
||||||
|
np.transpose(
|
||||||
|
dataset.test_set.data[idx_normal_sorted[:32], ...],
|
||||||
|
(0, 3, 1, 2),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
X_normal_high = torch.tensor(
|
||||||
|
np.transpose(
|
||||||
|
dataset.test_set.data[idx_normal_sorted[-32:], ...],
|
||||||
|
(0, 3, 1, 2),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if fold_idx is None:
|
||||||
|
plot_images_grid(
|
||||||
|
X_all_low, export_img=xp_path + "/all_low", padding=2
|
||||||
|
)
|
||||||
|
plot_images_grid(
|
||||||
|
X_all_high, export_img=xp_path + "/all_high", padding=2
|
||||||
|
)
|
||||||
|
plot_images_grid(
|
||||||
|
X_normal_low, export_img=xp_path + "/normals_low", padding=2
|
||||||
|
)
|
||||||
|
plot_images_grid(
|
||||||
|
X_normal_high,
|
||||||
|
export_img=xp_path + "/normals_high",
|
||||||
|
padding=2,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
plot_images_grid(
|
||||||
|
X_all_low,
|
||||||
|
export_img=xp_path + f"/all_low_{fold_idx}",
|
||||||
|
padding=2,
|
||||||
|
)
|
||||||
|
plot_images_grid(
|
||||||
|
X_all_high,
|
||||||
|
export_img=xp_path + f"/all_high_{fold_idx}",
|
||||||
|
padding=2,
|
||||||
|
)
|
||||||
|
plot_images_grid(
|
||||||
|
X_normal_low,
|
||||||
|
export_img=xp_path + f"/normals_low_{fold_idx}",
|
||||||
|
padding=2,
|
||||||
|
)
|
||||||
|
plot_images_grid(
|
||||||
|
X_normal_high,
|
||||||
|
export_img=xp_path + f"/normals_high_{fold_idx}",
|
||||||
|
padding=2,
|
||||||
|
)
|
||||||
|
|
||||||
elif action == "infer":
|
elif action == "infer":
|
||||||
dataset = load_dataset(
|
dataset = load_dataset(
|
||||||
dataset_name,
|
dataset_name,
|
||||||
@@ -488,14 +724,23 @@ def main(
|
|||||||
deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
|
deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
|
||||||
logger.info("Loading model from %s." % load_model)
|
logger.info("Loading model from %s." % load_model)
|
||||||
|
|
||||||
inference_results = deepSAD.inference(
|
inference_results, all_outputs = deepSAD.inference(
|
||||||
dataset, device=device, n_jobs_dataloader=n_jobs_dataloader
|
dataset, device=device, n_jobs_dataloader=n_jobs_dataloader
|
||||||
)
|
)
|
||||||
inference_results_path = (
|
inference_results_path = (
|
||||||
Path(xp_path) / "inference" / Path(dataset.root).with_suffix(".npy").stem
|
Path(xp_path)
|
||||||
|
/ "inference"
|
||||||
|
/ Path(Path(dataset.root).stem).with_suffix(".npy")
|
||||||
)
|
)
|
||||||
|
inference_outputs_path = (
|
||||||
|
Path(xp_path)
|
||||||
|
/ "inference"
|
||||||
|
/ Path(Path(dataset.root).stem + "_outputs").with_suffix(".npy")
|
||||||
|
)
|
||||||
|
|
||||||
inference_results_path.parent.mkdir(parents=True, exist_ok=True)
|
inference_results_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
np.save(inference_results_path, inference_results, fix_imports=False)
|
np.save(inference_results_path, inference_results, fix_imports=False)
|
||||||
|
np.save(inference_outputs_path, all_outputs, fix_imports=False)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
|
f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
|
||||||
|
|||||||
@@ -1,18 +1,23 @@
|
|||||||
from base.base_trainer import BaseTrainer
|
|
||||||
from base.base_dataset import BaseADDataset
|
|
||||||
from base.base_net import BaseNet
|
|
||||||
from torch.utils.data.dataloader import DataLoader
|
|
||||||
from sklearn.metrics import roc_auc_score
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
from sklearn.metrics import (
|
||||||
|
average_precision_score,
|
||||||
|
precision_recall_curve,
|
||||||
|
roc_auc_score,
|
||||||
|
roc_curve,
|
||||||
|
)
|
||||||
|
from torch.utils.data.dataloader import DataLoader
|
||||||
|
|
||||||
|
from base.base_dataset import BaseADDataset
|
||||||
|
from base.base_net import BaseNet
|
||||||
|
from base.base_trainer import BaseTrainer
|
||||||
|
|
||||||
|
|
||||||
class DeepSADTrainer(BaseTrainer):
|
class DeepSADTrainer(BaseTrainer):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
c,
|
c,
|
||||||
@@ -50,13 +55,22 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
self.test_time = None
|
self.test_time = None
|
||||||
self.test_scores = None
|
self.test_scores = None
|
||||||
|
|
||||||
def train(self, dataset: BaseADDataset, net: BaseNet):
|
def train(
|
||||||
|
self, dataset: BaseADDataset, net: BaseNet, k_fold_idx: int = None
|
||||||
|
) -> BaseNet:
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
# Get train data loader
|
# Get train data loader
|
||||||
train_loader, _, _ = dataset.loaders(
|
if k_fold_idx is not None:
|
||||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
train_loader, _ = dataset.loaders_k_fold(
|
||||||
)
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
num_workers=self.n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
train_loader, _, _ = dataset.loaders(
|
||||||
|
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
# Set device for network
|
# Set device for network
|
||||||
net = net.to(self.device)
|
net = net.to(self.device)
|
||||||
@@ -82,14 +96,14 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
net.train()
|
net.train()
|
||||||
for epoch in range(self.n_epochs):
|
for epoch in range(self.n_epochs):
|
||||||
|
|
||||||
epoch_loss = 0.0
|
epoch_loss = 0.0
|
||||||
n_batches = 0
|
n_batches = 0
|
||||||
epoch_start_time = time.time()
|
epoch_start_time = time.time()
|
||||||
for data in train_loader:
|
for data in train_loader:
|
||||||
inputs, _, semi_targets, _ = data
|
inputs, _, semi_targets, _, _ = data
|
||||||
inputs, semi_targets = inputs.to(self.device), semi_targets.to(
|
inputs, semi_targets = (
|
||||||
self.device
|
inputs.to(self.device),
|
||||||
|
semi_targets.to(self.device),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Zero the network parameter gradients
|
# Zero the network parameter gradients
|
||||||
@@ -145,6 +159,7 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
logger.info("Starting inference...")
|
logger.info("Starting inference...")
|
||||||
n_batches = 0
|
n_batches = 0
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
all_outputs = np.zeros((len(inference_loader.dataset), 1024), dtype=np.float32)
|
||||||
scores = []
|
scores = []
|
||||||
net.eval()
|
net.eval()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
@@ -155,6 +170,10 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
idx = idx.to(self.device)
|
idx = idx.to(self.device)
|
||||||
|
|
||||||
outputs = net(inputs)
|
outputs = net(inputs)
|
||||||
|
all_idx = n_batches * self.batch_size
|
||||||
|
all_outputs[all_idx : all_idx + len(inputs)] = (
|
||||||
|
outputs.cpu().data.numpy()
|
||||||
|
)
|
||||||
dist = torch.sum((outputs - self.c) ** 2, dim=1)
|
dist = torch.sum((outputs - self.c) ** 2, dim=1)
|
||||||
scores += dist.cpu().data.numpy().tolist()
|
scores += dist.cpu().data.numpy().tolist()
|
||||||
|
|
||||||
@@ -166,15 +185,22 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
logger.info("Inference Time: {:.3f}s".format(self.inference_time))
|
logger.info("Inference Time: {:.3f}s".format(self.inference_time))
|
||||||
logger.info("Finished inference.")
|
logger.info("Finished inference.")
|
||||||
|
|
||||||
return np.array(scores)
|
return np.array(scores), all_outputs
|
||||||
|
|
||||||
def test(self, dataset: BaseADDataset, net: BaseNet):
|
def test(self, dataset: BaseADDataset, net: BaseNet, k_fold_idx: int = None):
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
# Get test data loader
|
# Get test data loader
|
||||||
_, test_loader, _ = dataset.loaders(
|
if k_fold_idx is not None:
|
||||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
_, test_loader = dataset.loaders_k_fold(
|
||||||
)
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
num_workers=self.n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_, test_loader, _ = dataset.loaders(
|
||||||
|
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
# Set device for network
|
# Set device for network
|
||||||
net = net.to(self.device)
|
net = net.to(self.device)
|
||||||
@@ -188,7 +214,7 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
net.eval()
|
net.eval()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
inputs, labels, semi_targets, idx = data
|
inputs, labels, semi_targets, idx, _ = data
|
||||||
|
|
||||||
inputs = inputs.to(self.device)
|
inputs = inputs.to(self.device)
|
||||||
labels = labels.to(self.device)
|
labels = labels.to(self.device)
|
||||||
@@ -225,6 +251,9 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
labels = np.array(labels)
|
labels = np.array(labels)
|
||||||
scores = np.array(scores)
|
scores = np.array(scores)
|
||||||
self.test_auc = roc_auc_score(labels, scores)
|
self.test_auc = roc_auc_score(labels, scores)
|
||||||
|
self.test_roc = roc_curve(labels, scores)
|
||||||
|
self.test_prc = precision_recall_curve(labels, scores)
|
||||||
|
self.test_ap = average_precision_score(labels, scores)
|
||||||
|
|
||||||
# Log results
|
# Log results
|
||||||
logger.info("Test Loss: {:.6f}".format(epoch_loss / n_batches))
|
logger.info("Test Loss: {:.6f}".format(epoch_loss / n_batches))
|
||||||
@@ -241,7 +270,7 @@ class DeepSADTrainer(BaseTrainer):
|
|||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for data in train_loader:
|
for data in train_loader:
|
||||||
# get the inputs of the batch
|
# get the inputs of the batch
|
||||||
inputs, _, _, _ = data
|
inputs, _, _, _, _ = data
|
||||||
inputs = inputs.to(self.device)
|
inputs = inputs.to(self.device)
|
||||||
outputs = net(inputs)
|
outputs = net(inputs)
|
||||||
n_samples += outputs.shape[0]
|
n_samples += outputs.shape[0]
|
||||||
|
|||||||
@@ -1,18 +1,18 @@
|
|||||||
from base.base_trainer import BaseTrainer
|
|
||||||
from base.base_dataset import BaseADDataset
|
|
||||||
from base.base_net import BaseNet
|
|
||||||
from sklearn.metrics import roc_auc_score
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import numpy as np
|
from sklearn.metrics import roc_auc_score
|
||||||
|
|
||||||
|
from base.base_dataset import BaseADDataset
|
||||||
|
from base.base_net import BaseNet
|
||||||
|
from base.base_trainer import BaseTrainer
|
||||||
|
|
||||||
|
|
||||||
class AETrainer(BaseTrainer):
|
class AETrainer(BaseTrainer):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
optimizer_name: str = "adam",
|
optimizer_name: str = "adam",
|
||||||
@@ -40,13 +40,20 @@ class AETrainer(BaseTrainer):
|
|||||||
self.test_auc = None
|
self.test_auc = None
|
||||||
self.test_time = None
|
self.test_time = None
|
||||||
|
|
||||||
def train(self, dataset: BaseADDataset, ae_net: BaseNet):
|
def train(self, dataset: BaseADDataset, ae_net: BaseNet, k_fold_idx: int = None):
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
# Get train data loader
|
# Get train data loader
|
||||||
train_loader, _, _ = dataset.loaders(
|
if k_fold_idx is not None:
|
||||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
train_loader, _ = dataset.loaders_k_fold(
|
||||||
)
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
num_workers=self.n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
train_loader, _, _ = dataset.loaders(
|
||||||
|
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
# Set loss
|
# Set loss
|
||||||
criterion = nn.MSELoss(reduction="none")
|
criterion = nn.MSELoss(reduction="none")
|
||||||
@@ -69,14 +76,23 @@ class AETrainer(BaseTrainer):
|
|||||||
logger.info("Starting pretraining...")
|
logger.info("Starting pretraining...")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
ae_net.train()
|
ae_net.train()
|
||||||
for epoch in range(self.n_epochs):
|
|
||||||
|
|
||||||
|
all_training_data = []
|
||||||
|
for epoch in range(self.n_epochs):
|
||||||
epoch_loss = 0.0
|
epoch_loss = 0.0
|
||||||
n_batches = 0
|
n_batches = 0
|
||||||
epoch_start_time = time.time()
|
epoch_start_time = time.time()
|
||||||
for data in train_loader:
|
for data in train_loader:
|
||||||
inputs, _, _, _ = data
|
inputs, _, _, _, file_frame_ids = data
|
||||||
inputs = inputs.to(self.device)
|
inputs = inputs.to(self.device)
|
||||||
|
all_training_data.append(
|
||||||
|
np.dstack(
|
||||||
|
(
|
||||||
|
file_frame_ids[0].detach().cpu().numpy(),
|
||||||
|
file_frame_ids[1].detach().cpu().numpy(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Zero the network parameter gradients
|
# Zero the network parameter gradients
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
@@ -107,17 +123,31 @@ class AETrainer(BaseTrainer):
|
|||||||
|
|
||||||
self.train_time = time.time() - start_time
|
self.train_time = time.time() - start_time
|
||||||
logger.info("Pretraining Time: {:.3f}s".format(self.train_time))
|
logger.info("Pretraining Time: {:.3f}s".format(self.train_time))
|
||||||
|
|
||||||
|
all_training_data = np.concatenate([x.squeeze() for x in all_training_data])
|
||||||
|
|
||||||
|
sorted_training_data = all_training_data[
|
||||||
|
np.lexsort((all_training_data[:, 1], all_training_data[:, 0]))
|
||||||
|
]
|
||||||
|
|
||||||
logger.info("Finished pretraining.")
|
logger.info("Finished pretraining.")
|
||||||
|
|
||||||
return ae_net
|
return ae_net
|
||||||
|
|
||||||
def test(self, dataset: BaseADDataset, ae_net: BaseNet):
|
def test(self, dataset: BaseADDataset, ae_net: BaseNet, k_fold_idx: int = None):
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
# Get test data loader
|
# Get test data loader
|
||||||
_, test_loader, _ = dataset.loaders(
|
if k_fold_idx is not None:
|
||||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
_, test_loader = dataset.loaders_k_fold(
|
||||||
)
|
fold_idx=k_fold_idx,
|
||||||
|
batch_size=self.batch_size,
|
||||||
|
num_workers=self.n_jobs_dataloader,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_, test_loader, _ = dataset.loaders(
|
||||||
|
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||||
|
)
|
||||||
|
|
||||||
# Set loss
|
# Set loss
|
||||||
criterion = nn.MSELoss(reduction="none")
|
criterion = nn.MSELoss(reduction="none")
|
||||||
@@ -133,15 +163,25 @@ class AETrainer(BaseTrainer):
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
idx_label_score = []
|
idx_label_score = []
|
||||||
ae_net.eval()
|
ae_net.eval()
|
||||||
|
all_training_data = []
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
inputs, labels, _, idx = data
|
inputs, labels, _, idx, file_frame_ids = data
|
||||||
inputs, labels, idx = (
|
inputs, labels, idx = (
|
||||||
inputs.to(self.device),
|
inputs.to(self.device),
|
||||||
labels.to(self.device),
|
labels.to(self.device),
|
||||||
idx.to(self.device),
|
idx.to(self.device),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
all_training_data.append(
|
||||||
|
np.dstack(
|
||||||
|
(
|
||||||
|
file_frame_ids[0].detach().cpu().numpy(),
|
||||||
|
file_frame_ids[1].detach().cpu().numpy(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
rec = ae_net(inputs)
|
rec = ae_net(inputs)
|
||||||
rec_loss = criterion(rec, inputs)
|
rec_loss = criterion(rec, inputs)
|
||||||
scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))
|
scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))
|
||||||
@@ -161,6 +201,12 @@ class AETrainer(BaseTrainer):
|
|||||||
|
|
||||||
self.test_time = time.time() - start_time
|
self.test_time = time.time() - start_time
|
||||||
|
|
||||||
|
all_training_data = np.concatenate([x.squeeze() for x in all_training_data])
|
||||||
|
|
||||||
|
sorted_training_data = all_training_data[
|
||||||
|
np.lexsort((all_training_data[:, 1], all_training_data[:, 0]))
|
||||||
|
]
|
||||||
|
|
||||||
# Compute AUC
|
# Compute AUC
|
||||||
_, labels, scores = zip(*idx_label_score)
|
_, labels, scores = zip(*idx_label_score)
|
||||||
labels = np.array(labels)
|
labels = np.array(labels)
|
||||||
|
|||||||
151
tools/animate_score.py
Normal file
151
tools/animate_score.py
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
from functools import partial
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import animation
|
||||||
|
|
||||||
|
# from util import calculate_average_frame_rate, load_dataset
|
||||||
|
from rich.progress import Progress, track
|
||||||
|
|
||||||
|
scores_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/subter_selective_test/inference/3_smoke_human_walking_2023-01-23.npy"
|
||||||
|
)
|
||||||
|
|
||||||
|
# scores_path = Path(
|
||||||
|
# "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/subter_split_test/inference/3_smoke_human_walking_2023-01-23.npy"
|
||||||
|
# )
|
||||||
|
# scores_path = Path(
|
||||||
|
# "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/subter_split_test/inference/1_loop_closure_illuminated_two_LiDARs_2023-01-23.npy"
|
||||||
|
# )
|
||||||
|
# scores_path = Path(
|
||||||
|
# "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/esmera_split_test/inference/experiment_3.npy"
|
||||||
|
# )
|
||||||
|
|
||||||
|
# dataset_path = Path(
|
||||||
|
# "/home/fedex/mt/data/subter/1_loop_closure_illuminated_two_LiDARs_2023-01-23.bag"
|
||||||
|
# )
|
||||||
|
# dataset_path = Path("/home/fedex/mt/data/subter/3_smoke_human_walking_2023-01-23.bag")
|
||||||
|
|
||||||
|
# dataset = load_dataset(dataset_path)
|
||||||
|
fps = 10
|
||||||
|
|
||||||
|
all_scores = np.load(scores_path)
|
||||||
|
y_limit = 1.1 * np.max(all_scores[np.isfinite(all_scores)])
|
||||||
|
# y_limit = 10
|
||||||
|
# all_scores = np.where(all_scores > 10, 10, all_scores)
|
||||||
|
# all_scores = all_scores.reshape(-1, 16).T #SUBTER
|
||||||
|
# all_scores = all_scores.reshape(-1, 8).T # ESMERA
|
||||||
|
all_scores = all_scores.reshape(-1, 1).T # ESMERA
|
||||||
|
|
||||||
|
print(all_scores.shape, y_limit)
|
||||||
|
|
||||||
|
fig, axes = plt.subplots(
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
figsize=(7.68, 7.2),
|
||||||
|
# 1, 1, figsize=(7.68, 7.2), gridspec_kw={"wspace": 0.10, "hspace": 0.10}
|
||||||
|
)
|
||||||
|
|
||||||
|
axes = [axes]
|
||||||
|
# fig, axes = plt.subplots(
|
||||||
|
# 1, 8, figsize=(20.48, 7.2), gridspec_kw={"wspace": 0.05, "hspace": 0.05}
|
||||||
|
# )
|
||||||
|
|
||||||
|
# Flatten the axes for easier indexing
|
||||||
|
# axes = axes.flatten()
|
||||||
|
|
||||||
|
last_running_avg = [None] * len(all_scores)
|
||||||
|
|
||||||
|
|
||||||
|
# Function to calculate the running average with a dynamic window size
|
||||||
|
def running_average_dynamic(data, current_frame, max_window=20):
|
||||||
|
"""Calculate the running average with dynamic window size up to max_window."""
|
||||||
|
window = min(
|
||||||
|
current_frame, max_window
|
||||||
|
) # Use the minimum of current frame or the max window size
|
||||||
|
return np.convolve(data, np.ones(window) / window, mode="valid")
|
||||||
|
|
||||||
|
|
||||||
|
# Function to animate each subplot
|
||||||
|
def animate(i, progress_bar, progress_task):
|
||||||
|
for score_id, ax in enumerate(axes):
|
||||||
|
ax.cla() # Clear the axes for the current frame
|
||||||
|
|
||||||
|
ax.plot(
|
||||||
|
all_scores[score_id][:i], label="Anomaly Score"
|
||||||
|
) # Plot the scores up to frame `i`
|
||||||
|
if i > 0:
|
||||||
|
avg_data = running_average_dynamic(
|
||||||
|
all_scores[score_id][:i], current_frame=i, max_window=20
|
||||||
|
)
|
||||||
|
ax.plot(range(len(avg_data)), avg_data, color="orange", label="Running Avg")
|
||||||
|
|
||||||
|
ax.set_xlim([0, all_scores[score_id].shape[0]]) # Set x limits
|
||||||
|
ax.set_ylim([0, y_limit]) # Set y limits
|
||||||
|
# ax.set_title(f"Score {score_id + 1}") # Add a title for each subplot
|
||||||
|
|
||||||
|
ax.set_ylabel("Score", fontsize=10) # Add y-axis label
|
||||||
|
ax.set_xlabel("Frame", fontsize=10) # Add y-axis label
|
||||||
|
ax.legend(loc="upper right", fontsize=10) # Add a legend
|
||||||
|
# Only show y-axis tick labels for the leftmost subplots
|
||||||
|
# if score_id % 8 == 0: # First column of subplots
|
||||||
|
# ax.set_ylabel("Score", fontsize=8) # Add y-axis label
|
||||||
|
# else:
|
||||||
|
# ax.set_yticklabels([]) # Remove y-axis tick labels
|
||||||
|
# if score_id < 8:
|
||||||
|
# ax.set_xticklabels([]) # Remove x-axis tick labels
|
||||||
|
|
||||||
|
# ax.tick_params(labelsize=6)
|
||||||
|
|
||||||
|
# Update the running average text every 10 frames
|
||||||
|
if i % fps == 0 and i > 0:
|
||||||
|
# Calculate the current running average value (up to last 20 frames)
|
||||||
|
current_window = min(i, 20)
|
||||||
|
last_running_avg[score_id] = np.mean(
|
||||||
|
all_scores[score_id][i - current_window : i]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Display the last updated running average value (if available)
|
||||||
|
if last_running_avg[score_id] is not None:
|
||||||
|
ax.text(
|
||||||
|
0.05,
|
||||||
|
0.95,
|
||||||
|
f"Current Avg: {last_running_avg[score_id]:>2.1f}",
|
||||||
|
transform=ax.transAxes,
|
||||||
|
fontsize=10,
|
||||||
|
verticalalignment="top",
|
||||||
|
horizontalalignment="left",
|
||||||
|
color="black",
|
||||||
|
fontfamily="monospace",
|
||||||
|
)
|
||||||
|
progress_bar.update(progress_task, completed=i)
|
||||||
|
|
||||||
|
|
||||||
|
# plt.subplots_adjust(
|
||||||
|
# left=0.02, right=0.98, top=0.95, bottom=0.05, wspace=0.05, hspace=0.05
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
with Progress() as progress:
|
||||||
|
total = all_scores[0].shape[0] + 1
|
||||||
|
progress_task = progress.add_task("[cyan]Animating...", total=total)
|
||||||
|
progress.update(progress_task, completed=0)
|
||||||
|
|
||||||
|
animate_partial = partial(
|
||||||
|
animate, progress_bar=progress, progress_task=progress_task
|
||||||
|
)
|
||||||
|
|
||||||
|
# anim = animation.FuncAnimation(fig, animate_partial, frames=50, interval=1, blit=False)
|
||||||
|
anim = animation.FuncAnimation(
|
||||||
|
fig, animate_partial, frames=total, interval=1, blit=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save the animation as a single video
|
||||||
|
animated_score_filename = f"{scores_path.stem}_selective.mp4"
|
||||||
|
anim.save(animated_score_filename, writer=animation.FFMpegWriter(fps=fps))
|
||||||
|
|
||||||
|
progress.update(progress_task, completed=all_scores[0].shape[0] + 1)
|
||||||
|
|
||||||
|
# Clear the figure after saving
|
||||||
|
fig.clear()
|
||||||
93
tools/anomaly_scatter_plot.py
Normal file
93
tools/anomaly_scatter_plot.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Fix the random seed for reproducibility
|
||||||
|
np.random.seed(0)
|
||||||
|
|
||||||
|
# 1. Generate NORMAL DATA (e.g. points roughly around the origin)
|
||||||
|
# We'll keep them relatively close together so that a circle can enclose them easily.
|
||||||
|
normal_data = np.random.randn(50, 2) * 0.75
|
||||||
|
|
||||||
|
# 2. Generate ANOMALOUS DATA
|
||||||
|
# - Cluster 1: 3 points close together
|
||||||
|
anomaly_cluster_1 = np.array([[3.0, 3.0], [3.2, 3.1], [2.8, 2.9], [0.4, 4.0]])
|
||||||
|
|
||||||
|
# - Cluster 2: A single point
|
||||||
|
|
||||||
|
# 3. Compute the center and radius for a boundary circle around normal data
|
||||||
|
center = normal_data.mean(axis=0)
|
||||||
|
distances = np.linalg.norm(normal_data - center, axis=1)
|
||||||
|
radius = (
|
||||||
|
np.max(distances) + 0.2
|
||||||
|
) # Add a small margin to ensure all normal points are inside
|
||||||
|
|
||||||
|
# Create coordinates for plotting the circular boundary
|
||||||
|
theta = np.linspace(0, 2 * np.pi, 200)
|
||||||
|
circle_x = center[0] + radius * np.cos(theta)
|
||||||
|
circle_y = center[1] + radius * np.sin(theta)
|
||||||
|
|
||||||
|
# 4. Plot the data
|
||||||
|
plt.figure(figsize=(7, 7))
|
||||||
|
|
||||||
|
# Scatter normal points with 'o'
|
||||||
|
plt.scatter(
|
||||||
|
normal_data[:, 0], normal_data[:, 1], marker="o", color="blue", label="Normal Data"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Scatter anomalous points with 'x', but separate them by cluster for clarity
|
||||||
|
plt.scatter(
|
||||||
|
anomaly_cluster_1[:, 0],
|
||||||
|
anomaly_cluster_1[:, 1],
|
||||||
|
marker="x",
|
||||||
|
color="red",
|
||||||
|
label="Anomalies",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Plot the boundary (circle) around the normal data
|
||||||
|
plt.plot(circle_x, circle_y, linestyle="--", color="black", label="Boundary")
|
||||||
|
|
||||||
|
# 5. Annotate/label the clusters
|
||||||
|
# Label the normal cluster near its center
|
||||||
|
# plt.text(center[0], center[1],
|
||||||
|
# 'Normal Cluster',
|
||||||
|
# horizontalalignment='center',
|
||||||
|
# verticalalignment='center',
|
||||||
|
# fontsize=9,
|
||||||
|
# bbox=dict(facecolor='white', alpha=0.7))
|
||||||
|
#
|
||||||
|
# # Label anomaly cluster 1 near its centroid
|
||||||
|
# ac1_center = anomaly_cluster_1.mean(axis=0)
|
||||||
|
# plt.text(ac1_center[0], ac1_center[1],
|
||||||
|
# 'Anomaly Cluster 1',
|
||||||
|
# horizontalalignment='center',
|
||||||
|
# verticalalignment='center',
|
||||||
|
# fontsize=9,
|
||||||
|
# bbox=dict(facecolor='white', alpha=0.7))
|
||||||
|
#
|
||||||
|
# # Label anomaly cluster 2
|
||||||
|
# ac2_point = anomaly_cluster_2[0]
|
||||||
|
# plt.text(ac2_point[0]+0.2, ac2_point[1],
|
||||||
|
# 'Anomaly Cluster 2',
|
||||||
|
# horizontalalignment='left',
|
||||||
|
# verticalalignment='center',
|
||||||
|
# fontsize=9,
|
||||||
|
# bbox=dict(facecolor='white', alpha=0.7))
|
||||||
|
|
||||||
|
# Add legend and make plot look nice
|
||||||
|
plt.legend(loc="upper left")
|
||||||
|
# plt.title('2D Scatter Plot Showing Normal and Anomalous Clusters')
|
||||||
|
plt.xlabel("x")
|
||||||
|
plt.ylabel("y")
|
||||||
|
plt.tick_params(
|
||||||
|
axis="both", # changes apply to the x-axis
|
||||||
|
which="both", # both major and minor ticks are affected
|
||||||
|
bottom=False, # ticks along the bottom edge are off
|
||||||
|
top=False, # ticks along the top edge are off
|
||||||
|
left=False, # ticks along the top edge are off
|
||||||
|
right=False, # ticks along the top edge are off
|
||||||
|
labelbottom=False,
|
||||||
|
labelleft=False,
|
||||||
|
) #
|
||||||
|
# plt.grid(True)
|
||||||
|
plt.axis("equal") # Makes circles look circular rather than elliptical
|
||||||
|
plt.savefig("scatter_plot.png")
|
||||||
@@ -22,21 +22,6 @@ from rosbags.typesys.stores.ros1_noetic import (
|
|||||||
|
|
||||||
from util import existing_path
|
from util import existing_path
|
||||||
|
|
||||||
|
|
||||||
def get_o3d_pointcloud(points: np.ndarray) -> o3d.geometry.PointCloud:
|
|
||||||
xyz_array = np.stack(
|
|
||||||
[
|
|
||||||
points["x"].astype(np.float64),
|
|
||||||
points["y"].astype(np.float64),
|
|
||||||
points["z"].astype(np.float64),
|
|
||||||
],
|
|
||||||
axis=-1,
|
|
||||||
)
|
|
||||||
filtered_xyz = xyz_array[~np.all(xyz_array == 0, axis=1)]
|
|
||||||
o3d_vector = o3d.utility.Vector3dVector(filtered_xyz)
|
|
||||||
return o3d.geometry.PointCloud(o3d_vector)
|
|
||||||
|
|
||||||
|
|
||||||
# Mapping of PointField datatypes to NumPy dtypes
|
# Mapping of PointField datatypes to NumPy dtypes
|
||||||
POINTFIELD_DATATYPES = {
|
POINTFIELD_DATATYPES = {
|
||||||
1: np.int8, # INT8
|
1: np.int8, # INT8
|
||||||
@@ -178,9 +163,9 @@ def main() -> int:
|
|||||||
with AnyReader([args.input_experiment_path]) as reader:
|
with AnyReader([args.input_experiment_path]) as reader:
|
||||||
connections = reader.connections
|
connections = reader.connections
|
||||||
topics = dict(sorted({conn.topic: conn for conn in connections}.items()))
|
topics = dict(sorted({conn.topic: conn for conn in connections}.items()))
|
||||||
assert (
|
assert args.pointcloud_topic in topics, (
|
||||||
args.pointcloud_topic in topics
|
f"Topic {args.pointcloud_topic} not found"
|
||||||
), f"Topic {args.pointcloud_topic} not found"
|
)
|
||||||
|
|
||||||
original_types = {}
|
original_types = {}
|
||||||
typestore = get_typestore(Stores.ROS1_NOETIC)
|
typestore = get_typestore(Stores.ROS1_NOETIC)
|
||||||
@@ -207,16 +192,78 @@ def main() -> int:
|
|||||||
"Processing all messages", total=reader.message_count
|
"Processing all messages", total=reader.message_count
|
||||||
)
|
)
|
||||||
|
|
||||||
|
i = 0
|
||||||
for connection, timestamp, rawdata in reader.messages():
|
for connection, timestamp, rawdata in reader.messages():
|
||||||
if connection.topic == args.pointcloud_topic:
|
if connection.topic == args.pointcloud_topic:
|
||||||
|
i += 1
|
||||||
# For the pointcloud topic, we need to modify the data
|
# For the pointcloud topic, we need to modify the data
|
||||||
msg = reader.deserialize(rawdata, connection.msgtype)
|
msg = reader.deserialize(rawdata, connection.msgtype)
|
||||||
original_pointcloud = read_pointcloud(msg)
|
original_pointcloud = read_pointcloud(msg)
|
||||||
cleaned_pointcloud = clean_pointcloud(original_pointcloud)
|
cleaned_pointcloud = clean_pointcloud(original_pointcloud)
|
||||||
|
|
||||||
o3d_pcd = get_o3d_pointcloud(cleaned_pointcloud)
|
# o3d_pointcloud = o3d.geometry.PointCloud()
|
||||||
|
|
||||||
|
# xyz_points = np.zeros((cleaned_pointcloud.shape[0], 3))
|
||||||
|
# xyz_points[:, 0] = cleaned_pointcloud["x"]
|
||||||
|
# xyz_points[:, 1] = cleaned_pointcloud["y"]
|
||||||
|
# xyz_points[:, 2] = cleaned_pointcloud["z"]
|
||||||
|
|
||||||
|
# o3d_pointcloud.points = o3d.utility.Vector3dVector(xyz_points)
|
||||||
|
|
||||||
|
range_dtypes = {
|
||||||
|
500: ("range_smaller_500", "u1"),
|
||||||
|
800: ("range_smaller_800", "u1"),
|
||||||
|
1000: ("range_smaller_1000", "u1"),
|
||||||
|
1200: ("range_smaller_1200", "u1"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# radius_outlier_dtypes = {
|
||||||
|
# # 10: ("radius_outlier_10", "u1"),
|
||||||
|
# 50: ("radius_outlier_50", "u1"),
|
||||||
|
# # 100: ("radius_outlier_100", "u1"),
|
||||||
|
# }
|
||||||
|
|
||||||
|
# statistical_outlier_dtypes = {
|
||||||
|
# # (20, 1.0): ("statisstical_outlier_20_1", "u1"),
|
||||||
|
# # (20, 2.0): ("statisstical_outlier_20_2", "u1"),
|
||||||
|
# (20, 4.0): ("statisstical_outlier_20_4", "u1"),
|
||||||
|
# }
|
||||||
|
|
||||||
|
edited_dtype = np.dtype(
|
||||||
|
cleaned_pointcloud.dtype.descr + list(range_dtypes.values())
|
||||||
|
# + list(radius_outlier_dtypes.values())
|
||||||
|
# + list(statistical_outlier_dtypes.values())
|
||||||
|
)
|
||||||
|
|
||||||
|
edited_pointcloud = np.zeros(
|
||||||
|
cleaned_pointcloud.shape, dtype=edited_dtype
|
||||||
|
)
|
||||||
|
for name in cleaned_pointcloud.dtype.names:
|
||||||
|
edited_pointcloud[name] = cleaned_pointcloud[name]
|
||||||
|
|
||||||
|
for key, val in range_dtypes.items():
|
||||||
|
edited_pointcloud[val[0]][
|
||||||
|
edited_pointcloud["range"] < key
|
||||||
|
] = 255
|
||||||
|
|
||||||
|
# for key, val in radius_outlier_dtypes.items():
|
||||||
|
# _, mask_indices = o3d_pointcloud.remove_radius_outlier(
|
||||||
|
# nb_points=2, radius=key
|
||||||
|
# )
|
||||||
|
# mask = np.zeros(edited_pointcloud.shape[0], dtype=bool)
|
||||||
|
# mask[mask_indices] = True
|
||||||
|
# edited_pointcloud[val[0]][mask] = 255
|
||||||
|
|
||||||
|
# for key, val in statistical_outlier_dtypes.items():
|
||||||
|
# _, mask_indices = o3d_pointcloud.remove_statistical_outlier(
|
||||||
|
# nb_neighbors=key[0], std_ratio=key[1]
|
||||||
|
# )
|
||||||
|
# mask = np.zeros(edited_pointcloud.shape[0], dtype=bool)
|
||||||
|
# mask[mask_indices] = True
|
||||||
|
# edited_pointcloud[val[0]][mask] = 255
|
||||||
|
|
||||||
|
msg = create_pointcloud2_msg(msg, edited_pointcloud)
|
||||||
|
|
||||||
msg = create_pointcloud2_msg(msg, cleaned_pointcloud)
|
|
||||||
writer.write(
|
writer.write(
|
||||||
connection=new_connections[connection.id],
|
connection=new_connections[connection.id],
|
||||||
timestamp=timestamp,
|
timestamp=timestamp,
|
||||||
@@ -224,6 +271,16 @@ def main() -> int:
|
|||||||
message=msg, typename=msg.__msgtype__
|
message=msg, typename=msg.__msgtype__
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Create a boolean mask where the condition is met
|
||||||
|
# mask = cleaned_pointcloud["range"] > 1
|
||||||
|
|
||||||
|
# Use the mask to set fields to zero
|
||||||
|
# cleaned_pointcloud["x"][mask] = 0
|
||||||
|
# cleaned_pointcloud["y"][mask] = 0
|
||||||
|
# cleaned_pointcloud["z"][mask] = 0
|
||||||
|
# cleaned_pointcloud["range"][mask] = 0
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# For all other topics, we can write rawdata directly, no need to deserialize
|
# For all other topics, we can write rawdata directly, no need to deserialize
|
||||||
writer.write(new_connections[connection.id], timestamp, rawdata)
|
writer.write(new_connections[connection.id], timestamp, rawdata)
|
||||||
|
|||||||
127
tools/evaluate_prc.py
Normal file
127
tools/evaluate_prc.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from scipy.stats import sem, t
|
||||||
|
from sklearn.metrics import PrecisionRecallDisplay, auc
|
||||||
|
|
||||||
|
|
||||||
|
def confidence_interval(data, confidence=0.95):
|
||||||
|
"""Compute mean and margin of error for a given list of scores."""
|
||||||
|
n = len(data)
|
||||||
|
mean = np.mean(data)
|
||||||
|
# Standard error of the mean:
|
||||||
|
std_err = sem(data)
|
||||||
|
# Confidence interval radius
|
||||||
|
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||||
|
return mean, h
|
||||||
|
|
||||||
|
|
||||||
|
# 1) LOAD PRECISION-RECALL DATA
|
||||||
|
prc_data = [] # Stores (precision, recall) for each DeepSAD fold
|
||||||
|
ap_scores = [] # Average Precision for each DeepSAD fold
|
||||||
|
|
||||||
|
isoforest_prc_data = [] # Stores (precision, recall) for each IsoForest fold
|
||||||
|
isoforest_ap_scores = [] # Average Precision for each IsoForest fold
|
||||||
|
|
||||||
|
results_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_800_3000_new"
|
||||||
|
)
|
||||||
|
|
||||||
|
# We assume we have 5 folds (adjust if you have a different number)
|
||||||
|
for i in range(5):
|
||||||
|
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
precision, recall, _ = data["test_prc"] # (precision, recall, thresholds)
|
||||||
|
prc_data.append((precision, recall))
|
||||||
|
# Compute Average Precision (AP) via AUC of the (recall, precision) curve
|
||||||
|
ap_scores.append(auc(recall, precision))
|
||||||
|
|
||||||
|
with (results_path / f"results_isoforest_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
precision, recall, _ = data["test_prc"]
|
||||||
|
isoforest_prc_data.append((precision, recall))
|
||||||
|
isoforest_ap_scores.append(auc(recall, precision))
|
||||||
|
|
||||||
|
# 2) CALCULATE PER-FOLD STATISTICS
|
||||||
|
mean_ap, ap_ci = confidence_interval(ap_scores)
|
||||||
|
isoforest_mean_ap, isoforest_ap_ci = confidence_interval(isoforest_ap_scores)
|
||||||
|
|
||||||
|
# 3) INTERPOLATE EACH FOLD'S PRC ON A COMMON RECALL GRID FOR MEAN CURVE
|
||||||
|
mean_recall = np.linspace(0, 1, 100)
|
||||||
|
|
||||||
|
# -- DeepSAD
|
||||||
|
deep_sad_precisions_interp = []
|
||||||
|
for precision, recall in prc_data:
|
||||||
|
# Interpolate precision values at mean_recall
|
||||||
|
interp_precision = np.interp(mean_recall, precision, recall)
|
||||||
|
deep_sad_precisions_interp.append(interp_precision)
|
||||||
|
|
||||||
|
mean_precision = np.mean(deep_sad_precisions_interp, axis=0)
|
||||||
|
std_precision = np.std(deep_sad_precisions_interp, axis=0)
|
||||||
|
|
||||||
|
# -- IsoForest
|
||||||
|
isoforest_precisions_interp = []
|
||||||
|
for precision, recall in isoforest_prc_data:
|
||||||
|
interp_precision = np.interp(mean_recall, precision, recall)
|
||||||
|
isoforest_precisions_interp.append(interp_precision)
|
||||||
|
|
||||||
|
isoforest_mean_precision = np.mean(isoforest_precisions_interp, axis=0)
|
||||||
|
isoforest_std_precision = np.std(isoforest_precisions_interp, axis=0)
|
||||||
|
|
||||||
|
# 4) CREATE PLOT USING PrecisionRecallDisplay
|
||||||
|
fig, ax = plt.subplots(figsize=(8, 6))
|
||||||
|
|
||||||
|
# (A) Plot each fold (optional) for DeepSAD
|
||||||
|
for i, (precision, recall) in enumerate(prc_data):
|
||||||
|
disp = PrecisionRecallDisplay(precision=precision, recall=recall)
|
||||||
|
# Label only the first fold to avoid legend clutter
|
||||||
|
disp.plot(
|
||||||
|
ax=ax, color="b", alpha=0.3, label=f"DeepSAD Fold {i+1}" if i == 0 else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# (B) Plot each fold (optional) for IsoForest
|
||||||
|
for i, (precision, recall) in enumerate(isoforest_prc_data):
|
||||||
|
disp = PrecisionRecallDisplay(precision=precision, recall=recall)
|
||||||
|
disp.plot(
|
||||||
|
ax=ax, color="r", alpha=0.3, label=f"IsoForest Fold {i+1}" if i == 0 else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# (C) Plot mean curve for DeepSAD
|
||||||
|
mean_disp_deepsad = PrecisionRecallDisplay(precision=mean_precision, recall=mean_recall)
|
||||||
|
mean_disp_deepsad.plot(
|
||||||
|
ax=ax, color="b", label=f"DeepSAD Mean PR (AP={mean_ap:.2f} ± {ap_ci:.2f})"
|
||||||
|
)
|
||||||
|
ax.fill_between(
|
||||||
|
mean_recall,
|
||||||
|
mean_precision - std_precision,
|
||||||
|
mean_precision + std_precision,
|
||||||
|
color="b",
|
||||||
|
alpha=0.2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# (D) Plot mean curve for IsoForest
|
||||||
|
mean_disp_isoforest = PrecisionRecallDisplay(
|
||||||
|
precision=isoforest_mean_precision, recall=mean_recall
|
||||||
|
)
|
||||||
|
mean_disp_isoforest.plot(
|
||||||
|
ax=ax,
|
||||||
|
color="r",
|
||||||
|
label=f"IsoForest Mean PR (AP={isoforest_mean_ap:.2f} ± {isoforest_ap_ci:.2f})",
|
||||||
|
)
|
||||||
|
ax.fill_between(
|
||||||
|
mean_recall,
|
||||||
|
isoforest_mean_precision - isoforest_std_precision,
|
||||||
|
isoforest_mean_precision + isoforest_std_precision,
|
||||||
|
color="r",
|
||||||
|
alpha=0.2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5) FINAL PLOT ADJUSTMENTS
|
||||||
|
ax.set_xlabel("Recall")
|
||||||
|
ax.set_ylabel("Precision")
|
||||||
|
ax.set_title("Precision-Recall Curve with 5-Fold Cross-Validation")
|
||||||
|
ax.legend(loc="upper right")
|
||||||
|
|
||||||
|
plt.savefig("pr_curve_800_3000_2.png")
|
||||||
135
tools/evaluate_prc_2.py
Normal file
135
tools/evaluate_prc_2.py
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from scipy.stats import sem, t
|
||||||
|
from sklearn.metrics import auc
|
||||||
|
|
||||||
|
|
||||||
|
# Confidence interval function
|
||||||
|
def confidence_interval(data, confidence=0.95):
|
||||||
|
n = len(data)
|
||||||
|
mean = np.mean(data)
|
||||||
|
std_err = sem(data)
|
||||||
|
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||||
|
return mean, h
|
||||||
|
|
||||||
|
|
||||||
|
# Load PRC (precision-recall) data and compute AP (average precision)
|
||||||
|
prc_data = []
|
||||||
|
ap_scores = []
|
||||||
|
isoforest_prc_data = []
|
||||||
|
isoforest_ap_scores = []
|
||||||
|
|
||||||
|
results_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_3000_800_2"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(5):
|
||||||
|
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
# data["test_prc"] should be (precision, recall, thresholds)
|
||||||
|
precision, recall, _ = data["test_prc"]
|
||||||
|
prc_data.append((precision, recall))
|
||||||
|
|
||||||
|
# Compute AP using area under the precision-recall curve
|
||||||
|
ap_scores.append(auc(recall, precision))
|
||||||
|
|
||||||
|
with (results_path / f"results_isoforest_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
precision, recall, _ = data["test_prc"]
|
||||||
|
isoforest_prc_data.append((precision, recall))
|
||||||
|
|
||||||
|
isoforest_ap_scores.append(auc(recall, precision))
|
||||||
|
|
||||||
|
# Calculate mean and confidence interval for DeepSAD AP scores
|
||||||
|
mean_ap, ap_ci = confidence_interval(ap_scores)
|
||||||
|
|
||||||
|
# Interpolate precision over a common recall range for DeepSAD
|
||||||
|
mean_recall = np.linspace(0, 1, 100)
|
||||||
|
precisions = []
|
||||||
|
|
||||||
|
for precision, recall in prc_data:
|
||||||
|
# Make sure recall is sorted (usually is from sklearn)
|
||||||
|
# Interpolate precision at the points in mean_recall
|
||||||
|
interp_prec = np.interp(mean_recall, np.flip(recall), np.flip(precision))
|
||||||
|
precisions.append(interp_prec)
|
||||||
|
|
||||||
|
mean_precision = np.mean(precisions, axis=0)
|
||||||
|
std_precision = np.std(precisions, axis=0)
|
||||||
|
|
||||||
|
# Calculate mean and confidence interval for IsoForest AP scores
|
||||||
|
isoforest_mean_ap, isoforest_ap_ci = confidence_interval(isoforest_ap_scores)
|
||||||
|
|
||||||
|
# Interpolate precision over a common recall range for IsoForest
|
||||||
|
isoforest_precisions = []
|
||||||
|
for precision, recall in isoforest_prc_data:
|
||||||
|
interp_prec = np.interp(mean_recall, np.flip(recall), np.flip(precision))
|
||||||
|
isoforest_precisions.append(interp_prec)
|
||||||
|
|
||||||
|
isoforest_mean_precision = np.mean(isoforest_precisions, axis=0)
|
||||||
|
isoforest_std_precision = np.std(isoforest_precisions, axis=0)
|
||||||
|
|
||||||
|
# Plot Precision-Recall curves with confidence margins
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
|
||||||
|
# DeepSAD curve
|
||||||
|
plt.plot(
|
||||||
|
mean_recall,
|
||||||
|
mean_precision,
|
||||||
|
color="b",
|
||||||
|
label=f"DeepSAD Mean PR (AP = {mean_ap:.2f} ± {ap_ci:.2f})",
|
||||||
|
)
|
||||||
|
plt.fill_between(
|
||||||
|
mean_recall,
|
||||||
|
mean_precision - std_precision,
|
||||||
|
mean_precision + std_precision,
|
||||||
|
color="b",
|
||||||
|
alpha=0.2,
|
||||||
|
label="DeepSAD ± 1 std. dev.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# IsoForest curve
|
||||||
|
plt.plot(
|
||||||
|
mean_recall,
|
||||||
|
isoforest_mean_precision,
|
||||||
|
color="r",
|
||||||
|
label=f"IsoForest Mean PR (AP = {isoforest_mean_ap:.2f} ± {isoforest_ap_ci:.2f})",
|
||||||
|
)
|
||||||
|
plt.fill_between(
|
||||||
|
mean_recall,
|
||||||
|
isoforest_mean_precision - isoforest_std_precision,
|
||||||
|
isoforest_mean_precision + isoforest_std_precision,
|
||||||
|
color="r",
|
||||||
|
alpha=0.2,
|
||||||
|
label="IsoForest ± 1 std. dev.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Optional: plot each fold's curve for DeepSAD
|
||||||
|
for i, (precision, recall) in enumerate(prc_data):
|
||||||
|
plt.plot(
|
||||||
|
recall,
|
||||||
|
precision,
|
||||||
|
lw=1,
|
||||||
|
alpha=0.3,
|
||||||
|
color="b",
|
||||||
|
label=f"DeepSAD Fold {i + 1} PR" if i == 0 else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Optional: plot each fold's curve for IsoForest
|
||||||
|
for i, (precision, recall) in enumerate(isoforest_prc_data):
|
||||||
|
plt.plot(
|
||||||
|
recall,
|
||||||
|
precision,
|
||||||
|
lw=1,
|
||||||
|
alpha=0.3,
|
||||||
|
color="r",
|
||||||
|
label=f"IsoForest Fold {i + 1} PR" if i == 0 else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.xlabel("Recall")
|
||||||
|
plt.ylabel("Precision")
|
||||||
|
plt.title("Precision-Recall Curve with 5-Fold Cross-Validation")
|
||||||
|
plt.legend(loc="upper right")
|
||||||
|
plt.savefig("pr_curve_800_3000_4.png")
|
||||||
50
tools/evaluate_prc_singlerun.py
Normal file
50
tools/evaluate_prc_singlerun.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.metrics import auc
|
||||||
|
|
||||||
|
results_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_selective"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Paths to your result files
|
||||||
|
deepsad_results_file = results_path / "results.pkl"
|
||||||
|
isoforest_results_file = results_path / "results_isoforest.pkl"
|
||||||
|
|
||||||
|
# Load DeepSAD precision-recall data
|
||||||
|
with deepsad_results_file.open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
deep_precision, deep_recall, _ = data["test_prc"]
|
||||||
|
|
||||||
|
# Compute AP for DeepSAD
|
||||||
|
deep_ap = auc(deep_recall, deep_precision)
|
||||||
|
|
||||||
|
# Load IsoForest precision-recall data
|
||||||
|
with isoforest_results_file.open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
iso_precision, iso_recall, _ = data["test_prc"]
|
||||||
|
|
||||||
|
# Compute AP for IsoForest
|
||||||
|
iso_ap = auc(iso_recall, iso_precision)
|
||||||
|
|
||||||
|
# Create plot
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
|
||||||
|
# Plot DeepSAD PR curve
|
||||||
|
plt.plot(deep_recall, deep_precision, color="b", label=f"DeepSAD (AP = {deep_ap:.2f})")
|
||||||
|
|
||||||
|
# Plot IsoForest PR curve
|
||||||
|
plt.plot(iso_recall, iso_precision, color="r", label=f"IsoForest (AP = {iso_ap:.2f})")
|
||||||
|
|
||||||
|
# Labels
|
||||||
|
plt.xlabel("Recall")
|
||||||
|
plt.ylabel("Precision")
|
||||||
|
plt.title("Precision-Recall Curve (Single Run)")
|
||||||
|
|
||||||
|
# Add legend
|
||||||
|
plt.legend(loc="upper right")
|
||||||
|
|
||||||
|
# Save and/or show plot
|
||||||
|
plt.savefig("pr_curve_single_run.png")
|
||||||
|
plt.show()
|
||||||
82
tools/evaluate_roc.py
Normal file
82
tools/evaluate_roc.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from scipy.stats import sem, t
|
||||||
|
from sklearn.metrics import auc
|
||||||
|
|
||||||
|
|
||||||
|
# Confidence interval function
|
||||||
|
def confidence_interval(data, confidence=0.95):
|
||||||
|
n = len(data)
|
||||||
|
mean = np.mean(data)
|
||||||
|
std_err = sem(data)
|
||||||
|
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||||
|
return mean, h
|
||||||
|
|
||||||
|
|
||||||
|
# Load ROC and AUC values from pickle files
|
||||||
|
roc_data = []
|
||||||
|
auc_scores = []
|
||||||
|
isoforest_roc_data = []
|
||||||
|
isoforest_auc_scores = []
|
||||||
|
|
||||||
|
results_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_0_0"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(5):
|
||||||
|
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
roc_data.append(data["test_roc"])
|
||||||
|
auc_scores.append(data["test_auc"])
|
||||||
|
with (results_path / f"results.isoforest_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
isoforest_roc_data.append(data["test_roc"])
|
||||||
|
isoforest_auc_scores.append(data["test_auc"])
|
||||||
|
|
||||||
|
# Calculate mean and confidence interval for AUC scores
|
||||||
|
mean_auc, auc_ci = confidence_interval(auc_scores)
|
||||||
|
|
||||||
|
# Combine ROC curves
|
||||||
|
mean_fpr = np.linspace(0, 1, 100)
|
||||||
|
tprs = []
|
||||||
|
|
||||||
|
for fpr, tpr, _ in roc_data:
|
||||||
|
interp_tpr = np.interp(mean_fpr, fpr, tpr)
|
||||||
|
interp_tpr[0] = 0.0
|
||||||
|
tprs.append(interp_tpr)
|
||||||
|
|
||||||
|
mean_tpr = np.mean(tprs, axis=0)
|
||||||
|
mean_tpr[-1] = 1.0
|
||||||
|
std_tpr = np.std(tprs, axis=0)
|
||||||
|
|
||||||
|
# Plot ROC curves with confidence margins
|
||||||
|
plt.figure()
|
||||||
|
plt.plot(
|
||||||
|
mean_fpr,
|
||||||
|
mean_tpr,
|
||||||
|
color="b",
|
||||||
|
label=f"Mean ROC (AUC = {mean_auc:.2f} ± {auc_ci:.2f})",
|
||||||
|
)
|
||||||
|
plt.fill_between(
|
||||||
|
mean_fpr,
|
||||||
|
mean_tpr - std_tpr,
|
||||||
|
mean_tpr + std_tpr,
|
||||||
|
color="b",
|
||||||
|
alpha=0.2,
|
||||||
|
label="± 1 std. dev.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Plot each fold's ROC curve (optional)
|
||||||
|
for i, (fpr, tpr, _) in enumerate(roc_data):
|
||||||
|
plt.plot(fpr, tpr, lw=1, alpha=0.3, label=f"Fold {i + 1} ROC")
|
||||||
|
|
||||||
|
# Labels and legend
|
||||||
|
plt.plot([0, 1], [0, 1], "k--", label="Chance")
|
||||||
|
plt.xlabel("False Positive Rate")
|
||||||
|
plt.ylabel("True Positive Rate")
|
||||||
|
plt.title("ROC Curve with 5-Fold Cross-Validation")
|
||||||
|
plt.legend(loc="lower right")
|
||||||
|
plt.savefig("roc_curve_0_0.png")
|
||||||
133
tools/evaluate_roc_all.py
Normal file
133
tools/evaluate_roc_all.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from scipy.stats import sem, t
|
||||||
|
from sklearn.metrics import auc
|
||||||
|
|
||||||
|
|
||||||
|
# Confidence interval function
|
||||||
|
def confidence_interval(data, confidence=0.95):
|
||||||
|
n = len(data)
|
||||||
|
mean = np.mean(data)
|
||||||
|
std_err = sem(data)
|
||||||
|
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||||
|
return mean, h
|
||||||
|
|
||||||
|
|
||||||
|
# Load ROC and AUC values from pickle files
|
||||||
|
roc_data = []
|
||||||
|
auc_scores = []
|
||||||
|
isoforest_roc_data = []
|
||||||
|
isoforest_auc_scores = []
|
||||||
|
|
||||||
|
results_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_800_3000_new"
|
||||||
|
)
|
||||||
|
|
||||||
|
for i in range(5):
|
||||||
|
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
roc_data.append(data["test_roc"])
|
||||||
|
auc_scores.append(data["test_auc"])
|
||||||
|
with (results_path / f"results_isoforest_{i}.pkl").open("rb") as f:
|
||||||
|
data = pickle.load(f)
|
||||||
|
isoforest_roc_data.append(data["test_roc"])
|
||||||
|
isoforest_auc_scores.append(data["test_auc"])
|
||||||
|
|
||||||
|
# Calculate mean and confidence interval for DeepSAD AUC scores
|
||||||
|
mean_auc, auc_ci = confidence_interval(auc_scores)
|
||||||
|
|
||||||
|
# Combine ROC curves for DeepSAD
|
||||||
|
mean_fpr = np.linspace(0, 1, 100)
|
||||||
|
tprs = []
|
||||||
|
|
||||||
|
for fpr, tpr, _ in roc_data:
|
||||||
|
interp_tpr = np.interp(mean_fpr, fpr, tpr)
|
||||||
|
interp_tpr[0] = 0.0
|
||||||
|
tprs.append(interp_tpr)
|
||||||
|
|
||||||
|
mean_tpr = np.mean(tprs, axis=0)
|
||||||
|
mean_tpr[-1] = 1.0
|
||||||
|
std_tpr = np.std(tprs, axis=0)
|
||||||
|
|
||||||
|
# -- ADDED: Calculate mean and confidence interval for IsoForest AUC scores
|
||||||
|
isoforest_mean_auc, isoforest_auc_ci = confidence_interval(isoforest_auc_scores)
|
||||||
|
|
||||||
|
# -- ADDED: Combine ROC curves for IsoForest
|
||||||
|
isoforest_mean_fpr = np.linspace(0, 1, 100)
|
||||||
|
isoforest_tprs = []
|
||||||
|
|
||||||
|
for fpr, tpr, _ in isoforest_roc_data:
|
||||||
|
interp_tpr = np.interp(isoforest_mean_fpr, fpr, tpr)
|
||||||
|
interp_tpr[0] = 0.0
|
||||||
|
isoforest_tprs.append(interp_tpr)
|
||||||
|
|
||||||
|
isoforest_mean_tpr = np.mean(isoforest_tprs, axis=0)
|
||||||
|
isoforest_mean_tpr[-1] = 1.0
|
||||||
|
isoforest_std_tpr = np.std(isoforest_tprs, axis=0)
|
||||||
|
|
||||||
|
# Plot ROC curves with confidence margins for DeepSAD
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
plt.plot(
|
||||||
|
mean_fpr,
|
||||||
|
mean_tpr,
|
||||||
|
color="b",
|
||||||
|
label=f"DeepSAD Mean ROC (AUC = {mean_auc:.2f} ± {auc_ci:.2f})",
|
||||||
|
)
|
||||||
|
plt.fill_between(
|
||||||
|
mean_fpr,
|
||||||
|
mean_tpr - std_tpr,
|
||||||
|
mean_tpr + std_tpr,
|
||||||
|
color="b",
|
||||||
|
alpha=0.2,
|
||||||
|
label="DeepSAD ± 1 std. dev.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# -- ADDED: Plot ROC curves with confidence margins for IsoForest
|
||||||
|
plt.plot(
|
||||||
|
isoforest_mean_fpr,
|
||||||
|
isoforest_mean_tpr,
|
||||||
|
color="r",
|
||||||
|
label=f"IsoForest Mean ROC (AUC = {isoforest_mean_auc:.2f} ± {isoforest_auc_ci:.2f})",
|
||||||
|
)
|
||||||
|
plt.fill_between(
|
||||||
|
isoforest_mean_fpr,
|
||||||
|
isoforest_mean_tpr - isoforest_std_tpr,
|
||||||
|
isoforest_mean_tpr + isoforest_std_tpr,
|
||||||
|
color="r",
|
||||||
|
alpha=0.2,
|
||||||
|
label="IsoForest ± 1 std. dev.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Plot each fold's ROC curve (optional) for DeepSAD
|
||||||
|
for i, (fpr, tpr, _) in enumerate(roc_data):
|
||||||
|
plt.plot(
|
||||||
|
fpr,
|
||||||
|
tpr,
|
||||||
|
lw=1,
|
||||||
|
alpha=0.3,
|
||||||
|
color="b",
|
||||||
|
label=f"DeepSAD Fold {i+1} ROC" if i == 0 else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
# -- ADDED: Plot each fold's ROC curve (optional) for IsoForest
|
||||||
|
for i, (fpr, tpr, _) in enumerate(isoforest_roc_data):
|
||||||
|
plt.plot(
|
||||||
|
fpr,
|
||||||
|
tpr,
|
||||||
|
lw=1,
|
||||||
|
alpha=0.3,
|
||||||
|
color="r",
|
||||||
|
label=f"IsoForest Fold {i+1} ROC" if i == 0 else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Labels and legend
|
||||||
|
plt.plot([0, 1], [0, 1], "k--", label="Chance")
|
||||||
|
plt.xlabel("False Positive Rate")
|
||||||
|
plt.ylabel("True Positive Rate")
|
||||||
|
plt.title("ROC Curve with 5-Fold Cross-Validation")
|
||||||
|
plt.legend(loc="lower right")
|
||||||
|
plt.savefig("roc_curve_800_3000_isoforest.png")
|
||||||
|
plt.show()
|
||||||
37
tools/list_topics.py
Normal file
37
tools/list_topics.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
from rosbags.highlevel import AnyReader
|
||||||
|
|
||||||
|
|
||||||
|
def list_topics(bag_file):
|
||||||
|
frame_count = 0
|
||||||
|
output_folder = Path("./test") / bag_file.stem
|
||||||
|
output_folder.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
with AnyReader([bag_file]) as reader:
|
||||||
|
connections = reader.connections
|
||||||
|
topics = dict(sorted({conn.topic: conn for conn in connections}.items()))
|
||||||
|
|
||||||
|
topic = topics["/stereo_inertial_publisher/left/image_rect"]
|
||||||
|
|
||||||
|
for connection, timestamp, rawdata in reader.messages(connections=[topic]):
|
||||||
|
img_msg = reader.deserialize(rawdata, connection.msgtype)
|
||||||
|
|
||||||
|
img_data = np.frombuffer(img_msg.data, dtype=np.uint8).reshape(
|
||||||
|
(img_msg.height, img_msg.width)
|
||||||
|
)
|
||||||
|
img = Image.fromarray(img_data, mode="L")
|
||||||
|
frame_count += 1
|
||||||
|
img.save(output_folder / f"frame_{frame_count:04d}.png")
|
||||||
|
|
||||||
|
print(f"Saved {frame_count} frames to {output_folder}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
bag_path = Path(
|
||||||
|
"/home/fedex/mt/data/subter/3_smoke_robot_stationary_static_excess_smoke_2023-01-23.bag"
|
||||||
|
)
|
||||||
|
print(f"{bag_path.exists()=}")
|
||||||
|
list_topics(bag_path)
|
||||||
100
tools/plot_score.py
Normal file
100
tools/plot_score.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Path to your .npy file with anomaly scores
|
||||||
|
scores_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/"
|
||||||
|
"DeepSAD/subter_test/inference/3_smoke_human_walking_2023-01-23.npy"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Frames per second
|
||||||
|
fps = 10
|
||||||
|
|
||||||
|
# Load all scores
|
||||||
|
all_scores = np.load(scores_path)
|
||||||
|
|
||||||
|
# (Optional) If your scores are a 1D array, remove or adjust this reshape
|
||||||
|
# The final shape is expected to be (1, total_frames) or (N,) if you want only one line.
|
||||||
|
all_scores = all_scores.reshape(1, -1) # shape becomes (1, total_frames)
|
||||||
|
|
||||||
|
# We only want frames [0..299] => 300 frames
|
||||||
|
max_frames = 300
|
||||||
|
scores = all_scores[0, :max_frames] # Take the first row's first 300 frames
|
||||||
|
|
||||||
|
# Convert frames to time in seconds: time[i] = i / fps
|
||||||
|
time_values = np.arange(len(scores)) / fps # shape is (max_frames,)
|
||||||
|
|
||||||
|
# Recalculate y-limit based on the selected slice
|
||||||
|
y_limit = 1.1 * np.max(scores[np.isfinite(scores)])
|
||||||
|
# If you want to ensure y_limit >= 10, uncomment below:
|
||||||
|
# y_limit = max(y_limit, 10)
|
||||||
|
|
||||||
|
print("Selected Scores Shape:", scores.shape)
|
||||||
|
print("Time Range:", time_values[0], "->", time_values[-1])
|
||||||
|
print("Y-limit:", y_limit)
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# Improve default font sizes
|
||||||
|
# --------------------------
|
||||||
|
plt.rcParams.update(
|
||||||
|
{
|
||||||
|
"figure.dpi": 150,
|
||||||
|
"font.size": 14,
|
||||||
|
"axes.labelsize": 16,
|
||||||
|
"axes.titlesize": 16,
|
||||||
|
"xtick.labelsize": 14,
|
||||||
|
"ytick.labelsize": 14,
|
||||||
|
"legend.fontsize": 14,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
|
# Create the figure (half the previous width = 3.84)
|
||||||
|
# --------------------------------------------------
|
||||||
|
fig, ax = plt.subplots(figsize=(3.84, 7))
|
||||||
|
|
||||||
|
# Plot the scores vs. time
|
||||||
|
ax.plot(time_values, scores, label="Anomaly Score", color="blue")
|
||||||
|
|
||||||
|
# Set axis labels
|
||||||
|
ax.set_xlabel("Time [s]", fontsize=16)
|
||||||
|
ax.set_ylabel("Score", fontsize=16)
|
||||||
|
|
||||||
|
# Restrict X-axis from 0 to last time value (or 30 s if exactly 300 frames)
|
||||||
|
ax.set_xlim([0, time_values[-1]])
|
||||||
|
ax.set_ylim([0, y_limit])
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# Customize the ticks
|
||||||
|
# -------------------------
|
||||||
|
# Example: only 4 ticks on the x-axis (0, 10, 20, 30) if 300 frames => 30 s
|
||||||
|
ax.set_xticks([0, 10, 20, 30])
|
||||||
|
|
||||||
|
# Guarantee at least one tick at y=10.
|
||||||
|
# Adjust other ticks as you like; here we use 0 and the y_limit.
|
||||||
|
# If y_limit <= 10, you might want to override it.
|
||||||
|
if y_limit > 10:
|
||||||
|
ax.set_yticks([0, 10, round(y_limit, 1)]) # round for cleaner display
|
||||||
|
else:
|
||||||
|
# If your data doesn't go that high, just keep 0 and y_limit
|
||||||
|
ax.set_yticks([0, round(y_limit, 1)])
|
||||||
|
|
||||||
|
# Optionally add a legend
|
||||||
|
ax.legend(loc="upper right")
|
||||||
|
|
||||||
|
plt.axvline(x=22, color="r", label="shown image")
|
||||||
|
|
||||||
|
# Tight layout for small figure
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# ----------------------
|
||||||
|
# Save to disk, no show
|
||||||
|
# ----------------------
|
||||||
|
output_filename = f"{scores_path.stem}_static_time_plot.png"
|
||||||
|
plt.savefig(output_filename, dpi=150)
|
||||||
|
|
||||||
|
# Clear the figure if you continue using matplotlib
|
||||||
|
plt.clf()
|
||||||
179
tools/plot_scripts/data_count_lidar_frames.py
Normal file
179
tools/plot_scripts/data_count_lidar_frames.py
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
# this script loads the numpy array files containing the lidar frames and counts the number of frames in each file
|
||||||
|
# the number of frames is then printed to the console per file as well as a congregated sum of all frames in files
|
||||||
|
# containing the word smoke and ones that do not contain that word, as well as an overall sum of all frames
|
||||||
|
# We also plot a pie chart of the distribution of data points in normal and anomalous experiments
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
# define data path containing the numpy array files and output path for the plots
|
||||||
|
data_path = Path("/home/fedex/mt/data/subter")
|
||||||
|
|
||||||
|
output_path = Path("/home/fedex/mt/plots/data_count_lidar_frames")
|
||||||
|
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
|
|
||||||
|
latest_folder_path = output_path / "latest"
|
||||||
|
archive_folder_path = output_path / "archive"
|
||||||
|
output_datetime_path = output_path / datetime_folder_name
|
||||||
|
|
||||||
|
# if output does not exist, create it
|
||||||
|
output_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
|
||||||
|
# find all numpy array files and sort them correctly by name
|
||||||
|
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||||
|
for npy_file_path in data_path.iterdir():
|
||||||
|
if npy_file_path.suffix != ".npy":
|
||||||
|
continue
|
||||||
|
if "smoke" in npy_file_path.name:
|
||||||
|
anomaly_experiment_paths.append(npy_file_path)
|
||||||
|
else:
|
||||||
|
normal_experiment_paths.append(npy_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
# function that counts the number of frames in one experiment
|
||||||
|
def count_frames(npy_file_path):
|
||||||
|
frames = np.load(npy_file_path).shape[0]
|
||||||
|
return frames
|
||||||
|
|
||||||
|
|
||||||
|
# we want to print the numbers of frames in a table so we first gather all the data in two maps
|
||||||
|
normal_experiment_frames = {
|
||||||
|
npy_file_path.stem: count_frames(npy_file_path)
|
||||||
|
for npy_file_path in normal_experiment_paths
|
||||||
|
}
|
||||||
|
anomaly_experiment_frames = {
|
||||||
|
npy_file_path.stem: count_frames(npy_file_path)
|
||||||
|
for npy_file_path in anomaly_experiment_paths
|
||||||
|
}
|
||||||
|
|
||||||
|
# prepare data for tabulate
|
||||||
|
normal_experiment_table = [
|
||||||
|
(experiment, frames) for experiment, frames in normal_experiment_frames.items()
|
||||||
|
]
|
||||||
|
anomaly_experiment_table = [
|
||||||
|
(experiment, frames) for experiment, frames in anomaly_experiment_frames.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
# sort the tables by experiment name
|
||||||
|
normal_experiment_table.sort(key=lambda x: x[0])
|
||||||
|
anomaly_experiment_table.sort(key=lambda x: x[0])
|
||||||
|
|
||||||
|
# add the sum of all frames to the tables
|
||||||
|
normal_experiment_table.append(("Sum", sum(normal_experiment_frames.values())))
|
||||||
|
anomaly_experiment_table.append(("Sum", sum(anomaly_experiment_frames.values())))
|
||||||
|
|
||||||
|
# print the number of frames in each file using tabulate
|
||||||
|
print("Normal experiments:")
|
||||||
|
print(
|
||||||
|
tabulate(normal_experiment_table, headers=["Experiment", "Frames"], tablefmt="grid")
|
||||||
|
)
|
||||||
|
|
||||||
|
# print the smallest, largest, mean and median time of the normal experiments assuming 10 frames per second
|
||||||
|
normal_experiment_frames_values = list(normal_experiment_frames.values())
|
||||||
|
print(
|
||||||
|
f"Smallest time: {min(normal_experiment_frames_values) / 10} seconds, Largest time: {max(normal_experiment_frames_values) / 10} seconds, Mean time: {np.mean(normal_experiment_frames_values) / 10} seconds, Median time: {np.median(normal_experiment_frames_values) / 10} seconds"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Anomaly experiments:")
|
||||||
|
print(
|
||||||
|
tabulate(
|
||||||
|
anomaly_experiment_table, headers=["Experiment", "Frames"], tablefmt="grid"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# print the smallest, largest, mean and median time of the anomalous experiments assuming 10 frames per second
|
||||||
|
anomaly_experiment_frames_values = list(anomaly_experiment_frames.values())
|
||||||
|
print(
|
||||||
|
f"Smallest time: {min(anomaly_experiment_frames_values) / 10} seconds, Largest time: {max(anomaly_experiment_frames_values) / 10} seconds, Mean time: {np.mean(anomaly_experiment_frames_values) / 10} seconds, Median time: {np.median(anomaly_experiment_frames_values) / 10} seconds"
|
||||||
|
)
|
||||||
|
|
||||||
|
# print the sum of all frames in all experiments
|
||||||
|
total_frames = sum(normal_experiment_frames.values()) + sum(
|
||||||
|
anomaly_experiment_frames.values()
|
||||||
|
)
|
||||||
|
print(f"Total frames in all (normal and anmoaly) experiments: {total_frames} frames")
|
||||||
|
|
||||||
|
# print the sum of normal and anomalous experiments as percentage of the total frames
|
||||||
|
print(
|
||||||
|
f"Percentage of normal experiments: {sum(normal_experiment_frames.values()) / total_frames * 100}%"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"Percentage of anomaly experiments: {sum(anomaly_experiment_frames.values()) / total_frames * 100}%"
|
||||||
|
)
|
||||||
|
sum(normal_experiment_frames.values()) + sum(anomaly_experiment_frames.values())
|
||||||
|
|
||||||
|
|
||||||
|
# define function to plot pie chart of the distribution of data points in normal and anomalous experiments
|
||||||
|
def plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames):
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# we want to plot the sum of all frames in normal and anomaly experiments as total values and also percentages
|
||||||
|
total_normal_frames = sum(normal_experiment_frames.values())
|
||||||
|
total_anomaly_frames = sum(anomaly_experiment_frames.values())
|
||||||
|
total_frames = total_normal_frames + total_anomaly_frames
|
||||||
|
|
||||||
|
# prepare data for pie chart
|
||||||
|
labels = [
|
||||||
|
"Normal Lidar Frames\nNon-Degraded Pointclouds",
|
||||||
|
"Anomalous Lidar Frames\nDegraded Pointclouds",
|
||||||
|
]
|
||||||
|
sizes = [total_normal_frames, total_anomaly_frames]
|
||||||
|
explode = (0.1, 0) # explode the normal slice
|
||||||
|
|
||||||
|
# define an autopct function that shows percentage and total number of frames per slice
|
||||||
|
def make_autopct(pct):
|
||||||
|
return f"{pct:.1f}%\n({int(pct * total_frames / 100)} frames)"
|
||||||
|
|
||||||
|
# plot pie chart without shadow and with custom autopct
|
||||||
|
fig1, ax1 = plt.subplots()
|
||||||
|
# set a figure size of 10x5
|
||||||
|
fig1.set_size_inches(10, 5)
|
||||||
|
ax1.pie(sizes, explode=explode, labels=labels, autopct=make_autopct, shadow=False)
|
||||||
|
# for labels use center alignment
|
||||||
|
|
||||||
|
ax1.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle.
|
||||||
|
# display the total number of frames in the center of the pie chart (adjusted vertically)
|
||||||
|
plt.text(
|
||||||
|
0,
|
||||||
|
0.2,
|
||||||
|
f"Total:\n{total_frames} frames",
|
||||||
|
fontsize=12,
|
||||||
|
ha="center",
|
||||||
|
va="center",
|
||||||
|
color="black",
|
||||||
|
)
|
||||||
|
plt.title(
|
||||||
|
"Distribution of Normal and Anomalous\nPointclouds in all Experiments (Lidar Frames)"
|
||||||
|
)
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# save the plot
|
||||||
|
plt.savefig(output_datetime_path / "data_points_pie.png")
|
||||||
|
|
||||||
|
|
||||||
|
plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames)
|
||||||
|
|
||||||
|
# delete current latest folder
|
||||||
|
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||||
|
|
||||||
|
# create new latest folder
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# copy contents of output folder to the latest folder
|
||||||
|
for file in output_datetime_path.iterdir():
|
||||||
|
shutil.copy2(file, latest_folder_path)
|
||||||
|
|
||||||
|
# copy this python script to the output datetime folder to preserve the code used to generate the plots
|
||||||
|
shutil.copy2(__file__, output_datetime_path)
|
||||||
|
shutil.copy2(__file__, latest_folder_path)
|
||||||
|
|
||||||
|
# move output date folder to archive
|
||||||
|
shutil.move(output_datetime_path, archive_folder_path)
|
||||||
243
tools/plot_scripts/data_missing_points.py
Normal file
243
tools/plot_scripts/data_missing_points.py
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
import pickle
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from pointcloudset import Dataset
|
||||||
|
|
||||||
|
# define data path containing the bag files
|
||||||
|
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||||
|
|
||||||
|
output_path = Path("/home/fedex/mt/plots/data_missing_points")
|
||||||
|
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
|
|
||||||
|
latest_folder_path = output_path / "latest"
|
||||||
|
archive_folder_path = output_path / "archive"
|
||||||
|
output_datetime_path = output_path / datetime_folder_name
|
||||||
|
|
||||||
|
# if output does not exist, create it
|
||||||
|
output_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
data_resolution = 32 * 2048
|
||||||
|
|
||||||
|
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||||
|
|
||||||
|
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
|
||||||
|
for bag_file_path in all_data_path.iterdir():
|
||||||
|
if bag_file_path.suffix != ".bag":
|
||||||
|
continue
|
||||||
|
if "smoke" in bag_file_path.name:
|
||||||
|
anomaly_experiment_paths.append(bag_file_path)
|
||||||
|
else:
|
||||||
|
normal_experiment_paths.append(bag_file_path)
|
||||||
|
|
||||||
|
# sort anomaly and normal experiments by filesize, ascending
|
||||||
|
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||||
|
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||||
|
|
||||||
|
|
||||||
|
# function that plots histogram of how many points are missing in pointclouds for both normal and anomaly experiments
|
||||||
|
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||||
|
# function that finds the number of missing points in list of experiments (used for both normal and anomalous)
|
||||||
|
def find_missing_points(experiment_paths):
|
||||||
|
missing_points = []
|
||||||
|
for dataset in (
|
||||||
|
Dataset.from_file(experiment_path, topic="/ouster/points")
|
||||||
|
for experiment_path in experiment_paths
|
||||||
|
):
|
||||||
|
missing_points_per_pc = []
|
||||||
|
for pc in dataset:
|
||||||
|
missing_points_per_pc.append(data_resolution - pc.data.shape[0])
|
||||||
|
missing_points.append(missing_points_per_pc)
|
||||||
|
# FIXME temporary break to test code on only one experiment
|
||||||
|
# break
|
||||||
|
return missing_points
|
||||||
|
|
||||||
|
# check if the data has already been calculated and saved to a pickle file
|
||||||
|
if (output_path / "missing_points.pkl").exists():
|
||||||
|
with open(output_path / "missing_points.pkl", "rb") as file:
|
||||||
|
missing_points_normal, missing_points_anomaly = pickle.load(file)
|
||||||
|
else:
|
||||||
|
missing_points_normal = find_missing_points(normal_experiment_paths)
|
||||||
|
missing_points_anomaly = find_missing_points(anomaly_experiment_paths)
|
||||||
|
|
||||||
|
# for faster subsequent runs, save the data to a pickle file
|
||||||
|
with open(output_path / "missing_points.pkl", "wb") as file:
|
||||||
|
pickle.dump(
|
||||||
|
(missing_points_normal, missing_points_anomaly),
|
||||||
|
file,
|
||||||
|
protocol=pickle.HIGHEST_PROTOCOL,
|
||||||
|
)
|
||||||
|
|
||||||
|
# combine all missing points into one list for each type of experiment
|
||||||
|
missing_points_normal = np.concatenate(missing_points_normal)
|
||||||
|
missing_points_anomaly = np.concatenate(missing_points_anomaly)
|
||||||
|
|
||||||
|
# create histogram of missing points for normal and anomaly experiments
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.hist(missing_points_normal, bins=100, alpha=0.5, label="Normal Experiments")
|
||||||
|
plt.hist(missing_points_anomaly, bins=100, alpha=0.5, label="Anomaly Experiments")
|
||||||
|
plt.title(title)
|
||||||
|
plt.xlabel("Number of Missing Points")
|
||||||
|
plt.ylabel("Number of Pointclouds")
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / "missing_points.png")
|
||||||
|
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
# alternatively plot curves representing the data
|
||||||
|
|
||||||
|
# create alternative version with missing points on y axis and number of pointclouds on x axis
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.hist(
|
||||||
|
missing_points_normal,
|
||||||
|
bins=100,
|
||||||
|
alpha=0.5,
|
||||||
|
label="Normal Experiments",
|
||||||
|
orientation="horizontal",
|
||||||
|
)
|
||||||
|
plt.hist(
|
||||||
|
missing_points_anomaly,
|
||||||
|
bins=100,
|
||||||
|
alpha=0.5,
|
||||||
|
label="Anomaly Experiments",
|
||||||
|
orientation="horizontal",
|
||||||
|
)
|
||||||
|
plt.title(title)
|
||||||
|
plt.xlabel("Number of Pointclouds")
|
||||||
|
plt.ylabel("Number of Missing Points")
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / "missing_points_alternative.png")
|
||||||
|
|
||||||
|
# find min and max of both categories so we can set the same limits for both plots
|
||||||
|
min = np.min([np.min(missing_points_normal), np.min(missing_points_anomaly)])
|
||||||
|
max = np.max([np.max(missing_points_normal), np.max(missing_points_anomaly)])
|
||||||
|
|
||||||
|
# create bins array with min and max values
|
||||||
|
bins = np.linspace(min, max, 100)
|
||||||
|
|
||||||
|
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
|
||||||
|
plt.clf()
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.hist(
|
||||||
|
missing_points_normal,
|
||||||
|
bins=bins,
|
||||||
|
alpha=0.5,
|
||||||
|
label="Normal Experiments",
|
||||||
|
color="blue",
|
||||||
|
density=True,
|
||||||
|
)
|
||||||
|
plt.hist(
|
||||||
|
missing_points_anomaly,
|
||||||
|
bins=bins,
|
||||||
|
alpha=0.5,
|
||||||
|
color="red",
|
||||||
|
label="Anomaly Experiments",
|
||||||
|
density=True,
|
||||||
|
)
|
||||||
|
plt.title(title)
|
||||||
|
plt.xlabel("Number of Missing Points")
|
||||||
|
plt.ylabel("Density")
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / "missing_points_density.png")
|
||||||
|
|
||||||
|
# create another density version which does not plot number of missing points but percentage of measurements that are missing (total number of points is 32*2048)
|
||||||
|
bins = np.linspace(0, 1, 100)
|
||||||
|
plt.clf()
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.hist(
|
||||||
|
missing_points_normal / data_resolution,
|
||||||
|
bins=bins,
|
||||||
|
alpha=0.5,
|
||||||
|
label="Normal Experiments (No Artifical Smoke)",
|
||||||
|
color="blue",
|
||||||
|
density=True,
|
||||||
|
)
|
||||||
|
plt.hist(
|
||||||
|
missing_points_anomaly / data_resolution,
|
||||||
|
bins=bins,
|
||||||
|
alpha=0.5,
|
||||||
|
color="red",
|
||||||
|
label="Anomaly Experiments (With Artifical Smoke)",
|
||||||
|
density=True,
|
||||||
|
)
|
||||||
|
plt.title(title)
|
||||||
|
plt.xlabel("Percentage of Missing Lidar Measurements")
|
||||||
|
plt.ylabel("Density")
|
||||||
|
# display the x axis as percentages
|
||||||
|
plt.gca().set_xticklabels(
|
||||||
|
["{:.0f}%".format(x * 100) for x in plt.gca().get_xticks()]
|
||||||
|
)
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / "missing_points_density_percentage.png")
|
||||||
|
|
||||||
|
# mathplotlib does not support normalizing the histograms to the same scale, so we do it manually using numpy
|
||||||
|
num_bins = 100
|
||||||
|
bin_lims = np.linspace(0, 40000, num_bins + 1)
|
||||||
|
bin_centers = 0.5 * (bin_lims[:-1] + bin_lims[1:])
|
||||||
|
bin_widths = bin_lims[1:] - bin_lims[:-1]
|
||||||
|
# calculate the histogram for normal and anomaly experiments
|
||||||
|
normal_hist, _ = np.histogram(missing_points_normal, bins=bin_lims)
|
||||||
|
anomaly_hist, _ = np.histogram(missing_points_anomaly, bins=bin_lims)
|
||||||
|
# normalize the histograms to the same scale
|
||||||
|
normal_hist_normalized = np.array(normal_hist, dtype=float) / np.max(normal_hist)
|
||||||
|
anomaly_hist_normalized = np.array(anomaly_hist, dtype=float) / np.max(anomaly_hist)
|
||||||
|
# plot the normalized histograms
|
||||||
|
plt.clf()
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.bar(
|
||||||
|
bin_centers,
|
||||||
|
normal_hist_normalized,
|
||||||
|
width=bin_widths,
|
||||||
|
align="center",
|
||||||
|
alpha=0.5,
|
||||||
|
label="Normal Experiments",
|
||||||
|
)
|
||||||
|
plt.bar(
|
||||||
|
bin_centers,
|
||||||
|
anomaly_hist_normalized,
|
||||||
|
width=bin_widths,
|
||||||
|
align="center",
|
||||||
|
alpha=0.5,
|
||||||
|
label="Anomaly Experiments",
|
||||||
|
)
|
||||||
|
plt.title(title)
|
||||||
|
plt.xlabel("Number of Missing Points")
|
||||||
|
plt.ylabel("Normalized Density")
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / "missing_points_normalized.png")
|
||||||
|
|
||||||
|
|
||||||
|
# plot histogram of missing points for normal and anomaly experiments
|
||||||
|
plot_data_points(
|
||||||
|
normal_experiment_paths,
|
||||||
|
anomaly_experiment_paths,
|
||||||
|
"Missing Lidar Measurements per Scan",
|
||||||
|
)
|
||||||
|
|
||||||
|
# delete current latest folder
|
||||||
|
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||||
|
|
||||||
|
# create new latest folder
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# copy contents of output folder to the latest folder
|
||||||
|
for file in output_datetime_path.iterdir():
|
||||||
|
shutil.copy2(file, latest_folder_path)
|
||||||
|
|
||||||
|
# copy this python script to the output datetime folder to preserve the code used to generate the plots
|
||||||
|
shutil.copy2(__file__, output_datetime_path)
|
||||||
|
shutil.copy2(__file__, latest_folder_path)
|
||||||
|
|
||||||
|
# move output date folder to archive
|
||||||
|
shutil.move(output_datetime_path, archive_folder_path)
|
||||||
220
tools/plot_scripts/data_particles_near_sensor.py
Normal file
220
tools/plot_scripts/data_particles_near_sensor.py
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
import pickle
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from pointcloudset import Dataset
|
||||||
|
|
||||||
|
# define data path containing the bag files
|
||||||
|
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||||
|
|
||||||
|
output_path = Path("/home/fedex/mt/plots/data_particles_near_sensor")
|
||||||
|
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
|
|
||||||
|
latest_folder_path = output_path / "latest"
|
||||||
|
archive_folder_path = output_path / "archive"
|
||||||
|
output_datetime_path = output_path / datetime_folder_name
|
||||||
|
|
||||||
|
# if output does not exist, create it
|
||||||
|
output_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||||
|
|
||||||
|
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
|
||||||
|
for bag_file_path in all_data_path.iterdir():
|
||||||
|
if bag_file_path.suffix != ".bag":
|
||||||
|
continue
|
||||||
|
if "smoke" in bag_file_path.name:
|
||||||
|
anomaly_experiment_paths.append(bag_file_path)
|
||||||
|
else:
|
||||||
|
normal_experiment_paths.append(bag_file_path)
|
||||||
|
|
||||||
|
# print out the names of the normal and anomaly experiments that we found
|
||||||
|
print("Normal experiments:")
|
||||||
|
for path in normal_experiment_paths:
|
||||||
|
print(path.name)
|
||||||
|
print("\nAnomaly experiments:")
|
||||||
|
for path in anomaly_experiment_paths:
|
||||||
|
print(path.name)
|
||||||
|
|
||||||
|
# sort anomaly and normal experiments by filesize, ascending
|
||||||
|
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||||
|
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||||
|
|
||||||
|
|
||||||
|
# function that plots histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
|
||||||
|
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||||
|
# function that finds the number of measurements with a range smaller than 0.5m in list of experiments (used for both normal and anomalous)
|
||||||
|
def find_particles_near_sensor(experiment_paths, range_threshold):
|
||||||
|
particles_near_sensor = []
|
||||||
|
for dataset in (
|
||||||
|
Dataset.from_file(experiment_path, topic="/ouster/points")
|
||||||
|
for experiment_path in experiment_paths
|
||||||
|
):
|
||||||
|
particles_near_sensor_per_pc = []
|
||||||
|
for pc in dataset:
|
||||||
|
particles_near_sensor_per_pc.append(
|
||||||
|
pc.data[pc.data["range"] < range_threshold].shape[0]
|
||||||
|
)
|
||||||
|
particles_near_sensor.append(particles_near_sensor_per_pc)
|
||||||
|
return particles_near_sensor
|
||||||
|
|
||||||
|
range_thresholds = [500, 750, 1000, 1250, 1500]
|
||||||
|
|
||||||
|
for rt in range_thresholds:
|
||||||
|
print(f"Processing range threshold {rt}...")
|
||||||
|
|
||||||
|
# check if the data has already been calculated and saved to a pickle file
|
||||||
|
if (output_path / f"particles_near_sensor_counts_{rt}.pkl").exists():
|
||||||
|
with open(
|
||||||
|
output_path / f"particles_near_sensor_counts_{rt}.pkl", "rb"
|
||||||
|
) as file:
|
||||||
|
particles_near_sensor_normal, particles_near_sensor_anomaly = (
|
||||||
|
pickle.load(file)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
particles_near_sensor_normal = find_particles_near_sensor(
|
||||||
|
normal_experiment_paths,
|
||||||
|
rt,
|
||||||
|
)
|
||||||
|
particles_near_sensor_anomaly = find_particles_near_sensor(
|
||||||
|
anomaly_experiment_paths,
|
||||||
|
rt,
|
||||||
|
)
|
||||||
|
|
||||||
|
# for faster subsequent runs, save the data to a pickle file
|
||||||
|
with open(output_path / f"particles_near_sensor_counts_{rt}.pkl", "wb") as file:
|
||||||
|
pickle.dump(
|
||||||
|
(particles_near_sensor_normal, particles_near_sensor_anomaly),
|
||||||
|
file,
|
||||||
|
protocol=pickle.HIGHEST_PROTOCOL,
|
||||||
|
)
|
||||||
|
|
||||||
|
# combine all counts of how many particles are close to the sensor into one list for each type of experiment
|
||||||
|
particles_near_sensor_normal = np.concatenate(particles_near_sensor_normal)
|
||||||
|
particles_near_sensor_anomaly = np.concatenate(particles_near_sensor_anomaly)
|
||||||
|
|
||||||
|
# find min and max of both categories so we can set the same limits for both plots
|
||||||
|
min = np.min(
|
||||||
|
[
|
||||||
|
np.min(particles_near_sensor_normal),
|
||||||
|
np.min(particles_near_sensor_anomaly),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
max = np.max(
|
||||||
|
[
|
||||||
|
np.max(particles_near_sensor_normal),
|
||||||
|
np.max(particles_near_sensor_anomaly),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# create bins array with min and max values
|
||||||
|
bins = np.linspace(min, max, 100)
|
||||||
|
|
||||||
|
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
|
||||||
|
# commented out since boxplot is more informative
|
||||||
|
# plt.clf()
|
||||||
|
# plt.figure(figsize=(10, 5))
|
||||||
|
# plt.hist(
|
||||||
|
# particles_near_sensor_normal,
|
||||||
|
# bins=bins,
|
||||||
|
# alpha=0.5,
|
||||||
|
# label="Normal Experiments (No Artifical Smoke)",
|
||||||
|
# color="blue",
|
||||||
|
# density=True,
|
||||||
|
# )
|
||||||
|
# plt.hist(
|
||||||
|
# particles_near_sensor_anomaly,
|
||||||
|
# bins=bins,
|
||||||
|
# alpha=0.5,
|
||||||
|
# color="red",
|
||||||
|
# label="Anomaly Experiments (With Artifical Smoke)",
|
||||||
|
# density=True,
|
||||||
|
# )
|
||||||
|
# plt.title(title)
|
||||||
|
# plt.xlabel("Number of Particles Near Sensor")
|
||||||
|
# plt.ylabel("Density")
|
||||||
|
# plt.legend()
|
||||||
|
# plt.tight_layout()
|
||||||
|
# plt.savefig(output_datetime_path / f"particles_near_sensor_density_{rt}.png")
|
||||||
|
|
||||||
|
# alternatively create a box plot to show the distribution of the data
|
||||||
|
# instead of plotting the frequency of particles near sensor we'll plot the percentage of points (compared to the total number of points in the pointcloud)
|
||||||
|
|
||||||
|
data_resolution = 32 * 2048
|
||||||
|
|
||||||
|
plt.clf()
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.boxplot(
|
||||||
|
[
|
||||||
|
particles_near_sensor_normal / data_resolution,
|
||||||
|
particles_near_sensor_anomaly / data_resolution,
|
||||||
|
],
|
||||||
|
tick_labels=[
|
||||||
|
"Normal Experiments (No Artifical Smoke)",
|
||||||
|
"Anomaly Experiments (With Artifical Smoke)",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# format the y axis labels as percentages
|
||||||
|
plt.gca().set_yticklabels(
|
||||||
|
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
|
||||||
|
)
|
||||||
|
plt.title("Particles Closer than 0.5m to the Sensor")
|
||||||
|
plt.ylabel("Percentage of measurements closer than 0.5m")
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / f"particles_near_sensor_boxplot_{rt}.png")
|
||||||
|
|
||||||
|
# we create the same boxplot but limit the y-axis to 5% to better see the distribution of the data
|
||||||
|
plt.clf()
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.boxplot(
|
||||||
|
[
|
||||||
|
particles_near_sensor_normal / data_resolution,
|
||||||
|
particles_near_sensor_anomaly / data_resolution,
|
||||||
|
],
|
||||||
|
tick_labels=[
|
||||||
|
"Normal Experiments (No Artifical Smoke)",
|
||||||
|
"Anomaly Experiments (With Artifical Smoke)",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# format the y axis labels as percentages
|
||||||
|
plt.gca().set_yticklabels(
|
||||||
|
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
|
||||||
|
)
|
||||||
|
plt.title("Particles Closer than 0.5m to the Sensor")
|
||||||
|
plt.ylabel("Percentage of measurements closer than 0.5m")
|
||||||
|
plt.ylim(0, 0.05)
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(
|
||||||
|
output_datetime_path / f"particles_near_sensor_boxplot_zoomed_{rt}.png"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# plot histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
|
||||||
|
plot_data_points(
|
||||||
|
normal_experiment_paths,
|
||||||
|
anomaly_experiment_paths,
|
||||||
|
"Density of Number of Particles Near Sensor",
|
||||||
|
)
|
||||||
|
|
||||||
|
# delete current latest folder
|
||||||
|
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||||
|
|
||||||
|
# create new latest folder
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# copy contents of output folder to the latest folder
|
||||||
|
for file in output_datetime_path.iterdir():
|
||||||
|
shutil.copy2(file, latest_folder_path)
|
||||||
|
|
||||||
|
# copy this python script to the output datetime folder to preserve the code used to generate the plots
|
||||||
|
shutil.copy2(__file__, output_datetime_path)
|
||||||
|
shutil.copy2(__file__, latest_folder_path)
|
||||||
|
|
||||||
|
# move output date folder to archive
|
||||||
|
shutil.move(output_datetime_path, archive_folder_path)
|
||||||
188
tools/plot_scripts/data_spherical_projection.py
Normal file
188
tools/plot_scripts/data_spherical_projection.py
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
import argparse
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib import colormaps
|
||||||
|
from matplotlib.cm import ScalarMappable
|
||||||
|
from matplotlib.colors import Colormap, ListedColormap
|
||||||
|
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
|
||||||
|
from PIL import Image # new import
|
||||||
|
|
||||||
|
|
||||||
|
def get_colormap_with_special_missing_color(
|
||||||
|
colormap_name: str, missing_data_color: str = "black", reverse: bool = False
|
||||||
|
) -> Colormap:
|
||||||
|
colormap = (
|
||||||
|
colormaps[colormap_name] if not reverse else colormaps[f"{colormap_name}_r"]
|
||||||
|
)
|
||||||
|
colormap.set_bad(missing_data_color)
|
||||||
|
colormap.set_over("white")
|
||||||
|
return colormap
|
||||||
|
|
||||||
|
|
||||||
|
# --- Setup output folders (similar to data_missing_points.py) ---
|
||||||
|
# Change the output path as needed
|
||||||
|
output_path = Path("/home/fedex/mt/plots/data_2d_projections")
|
||||||
|
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
|
output_datetime_path = output_path / datetime_folder_name
|
||||||
|
latest_folder_path = output_path / "latest"
|
||||||
|
archive_folder_path = output_path / "archive"
|
||||||
|
|
||||||
|
for folder in (
|
||||||
|
output_path,
|
||||||
|
output_datetime_path,
|
||||||
|
latest_folder_path,
|
||||||
|
archive_folder_path,
|
||||||
|
):
|
||||||
|
folder.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# --- Parse command-line arguments ---
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Plot two 2D projections (from .npy files) in vertical subplots"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--input1",
|
||||||
|
type=Path,
|
||||||
|
default=Path(
|
||||||
|
"/home/fedex/mt/data/subter/new_projection/1_loop_closure_illuminated_2023-01-23.npy"
|
||||||
|
),
|
||||||
|
help="Path to first .npy file containing 2D projection data",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--input2",
|
||||||
|
type=Path,
|
||||||
|
default=Path(
|
||||||
|
"/home/fedex/mt/data/subter/new_projection/3_smoke_human_walking_2023-01-23.npy"
|
||||||
|
),
|
||||||
|
help="Path to second .npy file containing 2D projection data",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--frame1",
|
||||||
|
type=int,
|
||||||
|
default=955,
|
||||||
|
help="Frame index to plot from first file (0-indexed)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--frame2",
|
||||||
|
type=int,
|
||||||
|
default=242,
|
||||||
|
help="Frame index to plot from second file (0-indexed)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--colormap",
|
||||||
|
default="viridis",
|
||||||
|
type=str,
|
||||||
|
help="Name of matplotlib colormap to use",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--missing-data-color",
|
||||||
|
default="black",
|
||||||
|
type=str,
|
||||||
|
help="Color to use for missing data in projection",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--reverse-colormap",
|
||||||
|
action="store_true",
|
||||||
|
help="Reverse the colormap if specified",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# --- Load the numpy projection data from the provided files ---
|
||||||
|
# Each file is assumed to be a 3D array: (num_frames, height, width)
|
||||||
|
proj_data1 = np.load(args.input1)
|
||||||
|
proj_data2 = np.load(args.input2)
|
||||||
|
|
||||||
|
# Choose the desired frames
|
||||||
|
try:
|
||||||
|
frame1 = proj_data1[args.frame1]
|
||||||
|
except IndexError:
|
||||||
|
raise ValueError(f"Frame index {args.frame1} out of range for file: {args.input1}")
|
||||||
|
try:
|
||||||
|
frame2 = proj_data2[args.frame2]
|
||||||
|
except IndexError:
|
||||||
|
raise ValueError(f"Frame index {args.frame2} out of range for file: {args.input2}")
|
||||||
|
|
||||||
|
# Determine shared range across both frames (ignoring NaNs)
|
||||||
|
global_vmin = 0 # min(np.nanmin(frame1), np.nanmin(frame2))
|
||||||
|
global_vmax = 0.8 # max(np.nanmax(frame1), np.nanmax(frame2))
|
||||||
|
|
||||||
|
# Create colormap using the utility (to mimic create_2d_projection)
|
||||||
|
cmap = get_colormap_with_special_missing_color(
|
||||||
|
args.colormap, args.missing_data_color, args.reverse_colormap
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Create a figure with 2 vertical subplots ---
|
||||||
|
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
|
||||||
|
for ax, frame, title in zip(
|
||||||
|
(ax1, ax2),
|
||||||
|
(frame1, frame2),
|
||||||
|
(
|
||||||
|
"Projection of Lidar Frame without Degradation",
|
||||||
|
"Projection of Lidar Frame with Degradation (Artifical Smoke)",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
im = ax.imshow(frame, cmap=cmap, aspect="auto", vmin=global_vmin, vmax=global_vmax)
|
||||||
|
ax.set_title(title)
|
||||||
|
ax.axis("off")
|
||||||
|
|
||||||
|
# Adjust layout to fit margins for a paper
|
||||||
|
plt.tight_layout(rect=[0, 0.05, 1, 1])
|
||||||
|
# Add a colorbar with the colormap below the subplots
|
||||||
|
cbar = fig.colorbar(im, ax=[ax1, ax2], orientation="vertical", fraction=0.05)
|
||||||
|
cbar.set_label("Normalized Range")
|
||||||
|
|
||||||
|
# Add a separate colorbar for NaN values
|
||||||
|
sm = ScalarMappable(cmap=ListedColormap([cmap.get_bad(), cmap.get_over()]))
|
||||||
|
divider = make_axes_locatable(cbar.ax)
|
||||||
|
nan_ax = divider.append_axes(
|
||||||
|
"bottom", size="15%", pad="3%", aspect=3, anchor=cbar.ax.get_anchor()
|
||||||
|
)
|
||||||
|
nan_ax.grid(visible=False, which="both", axis="both")
|
||||||
|
nan_cbar = fig.colorbar(sm, cax=nan_ax, orientation="vertical")
|
||||||
|
nan_cbar.set_ticks([0.3, 0.7])
|
||||||
|
nan_cbar.set_ticklabels(["NaN", f"> {global_vmax}"])
|
||||||
|
nan_cbar.ax.tick_params(length=0)
|
||||||
|
|
||||||
|
# Save the combined plot
|
||||||
|
output_file = output_datetime_path / "data_2d_projections.png"
|
||||||
|
plt.savefig(output_file, dpi=300, bbox_inches="tight", pad_inches=0.1)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
print(f"Plot saved to: {output_file}")
|
||||||
|
|
||||||
|
# --- Create grayscale images (high precision) from the numpy frames using Pillow ---
|
||||||
|
# Convert NaN values to 0 and ensure the array is in float32 for 32-bit precision.
|
||||||
|
for degradation_status, frame_number, frame in (
|
||||||
|
("normal", args.frame1, frame1),
|
||||||
|
("smoke", args.frame2, frame2),
|
||||||
|
):
|
||||||
|
frame_gray = np.nan_to_num(frame, nan=0).astype(np.float32)
|
||||||
|
gray_image = Image.fromarray(frame_gray, mode="F")
|
||||||
|
gray_output_file = (
|
||||||
|
output_datetime_path
|
||||||
|
/ f"frame_{frame_number}_grayscale_{degradation_status}.tiff"
|
||||||
|
)
|
||||||
|
gray_image.save(gray_output_file)
|
||||||
|
print(f"Grayscale image saved to: {gray_output_file}")
|
||||||
|
|
||||||
|
# --- Handle folder structure: update latest folder and archive the output folder ---
|
||||||
|
|
||||||
|
# Delete current latest folder and recreate it
|
||||||
|
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# Copy contents of the current output datetime folder to latest folder
|
||||||
|
for file in output_datetime_path.iterdir():
|
||||||
|
shutil.copy2(file, latest_folder_path)
|
||||||
|
|
||||||
|
# Copy this python script to both output datetime folder and latest folder for preservation
|
||||||
|
script_path = Path(__file__)
|
||||||
|
shutil.copy2(script_path, output_datetime_path)
|
||||||
|
shutil.copy2(script_path, latest_folder_path)
|
||||||
|
|
||||||
|
# Move the output datetime folder to the archive folder
|
||||||
|
shutil.move(output_datetime_path, archive_folder_path)
|
||||||
|
print(f"Output archived to: {archive_folder_path}")
|
||||||
160
tools/plot_scripts/deepsad_tsne_latent_space.py
Normal file
160
tools/plot_scripts/deepsad_tsne_latent_space.py
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.decomposition import PCA
|
||||||
|
from sklearn.manifold import TSNE
|
||||||
|
|
||||||
|
# define data path containing the npy output files from your method
|
||||||
|
all_data_path = Path(
|
||||||
|
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/all_infer/inference"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_path = Path("/home/fedex/mt/plots/deepsad_reduced_latent_space")
|
||||||
|
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
|
|
||||||
|
latest_folder_path = output_path / "latest"
|
||||||
|
archive_folder_path = output_path / "archive"
|
||||||
|
output_datetime_path = output_path / datetime_folder_name
|
||||||
|
|
||||||
|
# create required output directories if they do not exist
|
||||||
|
output_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
normal_experiment_paths = []
|
||||||
|
anomaly_experiment_paths = []
|
||||||
|
|
||||||
|
# locate and sort the npy files (experiment outputs) based on file size
|
||||||
|
for file_path in all_data_path.iterdir():
|
||||||
|
if file_path.suffix != ".npy":
|
||||||
|
continue
|
||||||
|
# check if the file name contains "output" to ensure it's an experiment output file
|
||||||
|
if "output" not in file_path.stem:
|
||||||
|
continue
|
||||||
|
if "smoke" in file_path.name:
|
||||||
|
anomaly_experiment_paths.append(file_path)
|
||||||
|
else:
|
||||||
|
normal_experiment_paths.append(file_path)
|
||||||
|
|
||||||
|
print("Normal experiments:")
|
||||||
|
for path in normal_experiment_paths:
|
||||||
|
print(path.name)
|
||||||
|
|
||||||
|
print("\nAnomaly experiments:")
|
||||||
|
for path in anomaly_experiment_paths:
|
||||||
|
print(path.name)
|
||||||
|
|
||||||
|
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||||
|
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||||
|
|
||||||
|
|
||||||
|
def load_latent_space_data(experiment_paths):
|
||||||
|
"""
|
||||||
|
Load latent space data from npy files and return a single numpy array.
|
||||||
|
Modify this function if your file structure is different.
|
||||||
|
"""
|
||||||
|
data_list = []
|
||||||
|
for path in experiment_paths:
|
||||||
|
latent_data = np.load(path)
|
||||||
|
data_list.append(latent_data)
|
||||||
|
return np.vstack(data_list)
|
||||||
|
|
||||||
|
|
||||||
|
def reduce_dimensionality(data, n_components=50):
|
||||||
|
"""
|
||||||
|
Reduce the dimensionality of the data using PCA.
|
||||||
|
This function can be re-used by TSNE or other methods for an initial reduction.
|
||||||
|
"""
|
||||||
|
pca = PCA(n_components=n_components, random_state=42)
|
||||||
|
return pca.fit_transform(data)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_tsne_latent_space(normal_data, anomaly_data, title="TSNE of Latent Space"):
|
||||||
|
"""
|
||||||
|
Plot the TSNE representation of the latent space.
|
||||||
|
This function first applies a PCA-based dimensionality reduction for efficiency.
|
||||||
|
"""
|
||||||
|
# Combine normal and anomaly data
|
||||||
|
combined_data = np.vstack((normal_data, anomaly_data))
|
||||||
|
|
||||||
|
# Initial dimensionality reduction with PCA
|
||||||
|
reduced_data = reduce_dimensionality(combined_data, n_components=50)
|
||||||
|
|
||||||
|
# Apply TSNE transformation on the PCA-reduced data
|
||||||
|
tsne = TSNE(n_components=2, random_state=42)
|
||||||
|
tsne_results = tsne.fit_transform(reduced_data)
|
||||||
|
|
||||||
|
# Split the TSNE results back into normal and anomaly arrays
|
||||||
|
tsne_normal = tsne_results[: len(normal_data)]
|
||||||
|
tsne_anomaly = tsne_results[len(normal_data) :]
|
||||||
|
|
||||||
|
# Plotting TSNE results
|
||||||
|
plt.clf()
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.scatter(
|
||||||
|
tsne_anomaly[:, 0], tsne_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
|
||||||
|
)
|
||||||
|
plt.scatter(
|
||||||
|
tsne_normal[:, 0], tsne_normal[:, 1], label="Normal", alpha=0.6, marker="o"
|
||||||
|
)
|
||||||
|
plt.title(title)
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / "tsne_latent_space_plot.png")
|
||||||
|
|
||||||
|
|
||||||
|
def plot_pca_scatter(normal_data, anomaly_data, title="PCA Scatter Plot"):
|
||||||
|
"""
|
||||||
|
Plot a 2-dimensional scatterplot of the latent space using PCA.
|
||||||
|
This is useful for visualization and can be easily extended.
|
||||||
|
"""
|
||||||
|
# Combine normal and anomaly data
|
||||||
|
combined_data = np.vstack((normal_data, anomaly_data))
|
||||||
|
pca = PCA(n_components=2, random_state=42)
|
||||||
|
pca_results = pca.fit_transform(combined_data)
|
||||||
|
|
||||||
|
# Split the PCA results back into normal and anomaly arrays
|
||||||
|
pca_normal = pca_results[: len(normal_data)]
|
||||||
|
pca_anomaly = pca_results[len(normal_data) :]
|
||||||
|
|
||||||
|
# Plotting PCA scatter results
|
||||||
|
plt.clf()
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.scatter(
|
||||||
|
pca_anomaly[:, 0], pca_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
|
||||||
|
)
|
||||||
|
plt.scatter(
|
||||||
|
pca_normal[:, 0], pca_normal[:, 1], label="Normal", alpha=0.6, marker="o"
|
||||||
|
)
|
||||||
|
plt.title(title)
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_datetime_path / "pca_latent_space_plot.png")
|
||||||
|
|
||||||
|
|
||||||
|
# load latent space data for both normal and anomalous experiments
|
||||||
|
normal_data = load_latent_space_data(normal_experiment_paths)
|
||||||
|
anomaly_data = load_latent_space_data(anomaly_experiment_paths)
|
||||||
|
|
||||||
|
# call your plotting functions
|
||||||
|
plot_tsne_latent_space(normal_data, anomaly_data)
|
||||||
|
plot_pca_scatter(normal_data, anomaly_data)
|
||||||
|
|
||||||
|
# update the 'latest' results folder: delete previous and copy current outputs
|
||||||
|
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
for file in output_datetime_path.iterdir():
|
||||||
|
shutil.copy2(file, latest_folder_path)
|
||||||
|
|
||||||
|
# copy this script to the output folder and to the latest folder to preserve the used code
|
||||||
|
script_path = Path(__file__)
|
||||||
|
shutil.copy2(script_path, output_datetime_path)
|
||||||
|
shutil.copy2(script_path, latest_folder_path)
|
||||||
|
|
||||||
|
# move the output date folder to the archive folder for record keeping
|
||||||
|
shutil.move(output_datetime_path, archive_folder_path)
|
||||||
@@ -20,6 +20,7 @@ dask = "^2024.4.2"
|
|||||||
dask-expr = "^1.1.3"
|
dask-expr = "^1.1.3"
|
||||||
pandas = "^2.2.2"
|
pandas = "^2.2.2"
|
||||||
pathvalidate = "^3.2.0"
|
pathvalidate = "^3.2.0"
|
||||||
|
tabulate = "^0.9.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
|
|||||||
@@ -137,47 +137,47 @@ def process_frame(args) -> Tuple[int, np.ndarray, Optional[Path]]:
|
|||||||
lidar_data["horizontal_position"] = (
|
lidar_data["horizontal_position"] = (
|
||||||
lidar_data["original_id"] % horizontal_resolution
|
lidar_data["original_id"] % horizontal_resolution
|
||||||
)
|
)
|
||||||
lidar_data["horizontal_position_yaw_f"] = (
|
# lidar_data["horizontal_position_yaw_f"] = (
|
||||||
0.5
|
# 0.5
|
||||||
* horizontal_resolution
|
# * horizontal_resolution
|
||||||
* (np.arctan2(lidar_data["y"], lidar_data["x"]) / pi + 1.0)
|
# * (np.arctan2(lidar_data["y"], lidar_data["x"]) / pi + 1.0)
|
||||||
)
|
# )
|
||||||
lidar_data["horizontal_position_yaw"] = np.floor(
|
# lidar_data["horizontal_position_yaw"] = np.floor(
|
||||||
lidar_data["horizontal_position_yaw_f"]
|
# lidar_data["horizontal_position_yaw_f"]
|
||||||
)
|
# )
|
||||||
lidar_data["vertical_position"] = np.floor(
|
lidar_data["vertical_position"] = np.floor(
|
||||||
lidar_data["original_id"] / horizontal_resolution
|
lidar_data["original_id"] / horizontal_resolution
|
||||||
)
|
)
|
||||||
# fov = 32 * pi / 180
|
# fov = 32 * pi / 180
|
||||||
# fov_down = 17 * pi / 180
|
# fov_down = 17 * pi / 180
|
||||||
fov = 31.76 * pi / 180
|
# fov = 31.76 * pi / 180
|
||||||
fov_down = 17.3 * pi / 180
|
# fov_down = 17.3 * pi / 180
|
||||||
lidar_data["vertical_angle"] = np.arcsin(
|
# lidar_data["vertical_angle"] = np.arcsin(
|
||||||
lidar_data["z"]
|
# lidar_data["z"]
|
||||||
/ np.sqrt(lidar_data["x"] ** 2 + lidar_data["y"] ** 2 + lidar_data["z"] ** 2)
|
# / np.sqrt(lidar_data["x"] ** 2 + lidar_data["y"] ** 2 + lidar_data["z"] ** 2)
|
||||||
)
|
# )
|
||||||
lidar_data["vertical_angle_degree"] = lidar_data["vertical_angle"] * 180 / pi
|
# lidar_data["vertical_angle_degree"] = lidar_data["vertical_angle"] * 180 / pi
|
||||||
|
|
||||||
lidar_data["vertical_position_pitch_f"] = vertical_resolution * (
|
# lidar_data["vertical_position_pitch_f"] = vertical_resolution * (
|
||||||
1 - ((lidar_data["vertical_angle"] + fov_down) / fov)
|
# 1 - ((lidar_data["vertical_angle"] + fov_down) / fov)
|
||||||
)
|
# )
|
||||||
lidar_data["vertical_position_pitch"] = np.floor(
|
# lidar_data["vertical_position_pitch"] = np.floor(
|
||||||
lidar_data["vertical_position_pitch_f"]
|
# lidar_data["vertical_position_pitch_f"]
|
||||||
)
|
# )
|
||||||
|
|
||||||
duplicates = lidar_data[
|
# duplicates = lidar_data[
|
||||||
lidar_data.duplicated(
|
# lidar_data.duplicated(
|
||||||
subset=["vertical_position_pitch", "horizontal_position_yaw"],
|
# subset=["vertical_position_pitch", "horizontal_position_yaw"],
|
||||||
keep=False,
|
# keep=False,
|
||||||
)
|
# )
|
||||||
].sort_values(by=["vertical_position_pitch", "horizontal_position_yaw"])
|
# ].sort_values(by=["vertical_position_pitch", "horizontal_position_yaw"])
|
||||||
|
|
||||||
lidar_data["normalized_range"] = 1 / np.sqrt(
|
lidar_data["normalized_range"] = 1 / np.sqrt(
|
||||||
lidar_data["x"] ** 2 + lidar_data["y"] ** 2 + lidar_data["z"] ** 2
|
lidar_data["x"] ** 2 + lidar_data["y"] ** 2 + lidar_data["z"] ** 2
|
||||||
)
|
)
|
||||||
projection_data = lidar_data.pivot(
|
projection_data = lidar_data.pivot(
|
||||||
index="vertical_position_pitch",
|
index="vertical_position",
|
||||||
columns="horizontal_position_yaw",
|
columns="horizontal_position",
|
||||||
values="normalized_range",
|
values="normalized_range",
|
||||||
)
|
)
|
||||||
projection_data = projection_data.reindex(
|
projection_data = projection_data.reindex(
|
||||||
@@ -208,8 +208,8 @@ def process_frame(args) -> Tuple[int, np.ndarray, Optional[Path]]:
|
|||||||
i,
|
i,
|
||||||
projection_data.to_numpy(),
|
projection_data.to_numpy(),
|
||||||
image_path,
|
image_path,
|
||||||
lidar_data["vertical_position_pitch_f"].min(),
|
# lidar_data["vertical_position_pitch_f"].min(),
|
||||||
lidar_data["vertical_position_pitch_f"].max(),
|
# lidar_data["vertical_position_pitch_f"].max(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -304,7 +304,7 @@ def create_projection_data(
|
|||||||
if render_images:
|
if render_images:
|
||||||
return projection_data, rendered_images
|
return projection_data, rendered_images
|
||||||
else:
|
else:
|
||||||
return (projection_data,)
|
return projection_data, None
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
|
|||||||
@@ -1,21 +1,24 @@
|
|||||||
from configargparse import ArgParser, YAMLConfigFileParser, ArgumentDefaultsRawHelpFormatter
|
from pathlib import Path
|
||||||
from sys import exit
|
from sys import exit
|
||||||
from open3d.visualization.rendering import OffscreenRenderer, MaterialRecord
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from configargparse import (
|
||||||
|
ArgParser,
|
||||||
|
ArgumentDefaultsRawHelpFormatter,
|
||||||
|
YAMLConfigFileParser,
|
||||||
|
)
|
||||||
from open3d.io import read_pinhole_camera_parameters, write_image
|
from open3d.io import read_pinhole_camera_parameters, write_image
|
||||||
from open3d.utility import Vector3dVector
|
from open3d.utility import Vector3dVector
|
||||||
from pathlib import Path
|
from open3d.visualization.rendering import MaterialRecord, OffscreenRenderer
|
||||||
from pointcloudset import Dataset
|
from pointcloudset import Dataset
|
||||||
from rich.progress import track
|
from rich.progress import track
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from util import (
|
from util import (
|
||||||
load_dataset,
|
|
||||||
existing_file,
|
|
||||||
create_video_from_images,
|
|
||||||
calculate_average_frame_rate,
|
calculate_average_frame_rate,
|
||||||
|
create_video_from_images,
|
||||||
|
existing_file,
|
||||||
|
load_dataset,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -42,14 +45,18 @@ def render_3d_images(
|
|||||||
distances = np.linalg.norm(points, axis=1)
|
distances = np.linalg.norm(points, axis=1)
|
||||||
max_distance = distances.max()
|
max_distance = distances.max()
|
||||||
min_distance = distances.min()
|
min_distance = distances.min()
|
||||||
normalized_distances = (distances - min_distance) / (max_distance - min_distance)
|
normalized_distances = (distances - min_distance) / (
|
||||||
|
max_distance - min_distance
|
||||||
|
)
|
||||||
colors = plt.get_cmap("jet")(normalized_distances)[:, :3]
|
colors = plt.get_cmap("jet")(normalized_distances)[:, :3]
|
||||||
pcd.colors = Vector3dVector(colors)
|
pcd.colors = Vector3dVector(colors)
|
||||||
return pcd
|
return pcd
|
||||||
|
|
||||||
rendered_images = []
|
rendered_images = []
|
||||||
|
|
||||||
for i, pc in track(enumerate(dataset, 1), description="Rendering images...", total=len(dataset)):
|
for i, pc in track(
|
||||||
|
enumerate(dataset, 1), description="Rendering images...", total=len(dataset)
|
||||||
|
):
|
||||||
o3d_pc = pc.to_instance("open3d")
|
o3d_pc = pc.to_instance("open3d")
|
||||||
o3d_pc = color_points_by_range(o3d_pc)
|
o3d_pc = color_points_by_range(o3d_pc)
|
||||||
renderer.scene.add_geometry("point_cloud", o3d_pc, MaterialRecord())
|
renderer.scene.add_geometry("point_cloud", o3d_pc, MaterialRecord())
|
||||||
@@ -68,19 +75,35 @@ def main() -> int:
|
|||||||
formatter_class=ArgumentDefaultsRawHelpFormatter,
|
formatter_class=ArgumentDefaultsRawHelpFormatter,
|
||||||
description="Render a 3d representation of a point cloud",
|
description="Render a 3d representation of a point cloud",
|
||||||
)
|
)
|
||||||
parser.add_argument("--render-config-file", is_config_file=True, help="yaml config file path")
|
|
||||||
parser.add_argument("--input-bag-path", required=True, type=existing_file, help="path to bag file")
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--tmp-files-path", default=Path("./tmp"), type=Path, help="path temporary files will be written to"
|
"--render-config-file", is_config_file=True, help="yaml config file path"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-images", type=bool, default=True, help="if rendered frames should be outputted as images"
|
"--input-bag-path", required=True, type=existing_file, help="path to bag file"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-images-path", default=Path("./output"), type=Path, help="path rendered frames should be written to"
|
"--tmp-files-path",
|
||||||
|
default=Path("./tmp"),
|
||||||
|
type=Path,
|
||||||
|
help="path temporary files will be written to",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-video", type=bool, default=True, help="if rendered frames should be outputted as a video"
|
"--output-images",
|
||||||
|
type=bool,
|
||||||
|
default=True,
|
||||||
|
help="if rendered frames should be outputted as images",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-images-path",
|
||||||
|
default=Path("./output"),
|
||||||
|
type=Path,
|
||||||
|
help="path rendered frames should be written to",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-video",
|
||||||
|
type=bool,
|
||||||
|
default=True,
|
||||||
|
help="if rendered frames should be outputted as a video",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-video-path",
|
"--output-video-path",
|
||||||
@@ -88,7 +111,12 @@ def main() -> int:
|
|||||||
type=Path,
|
type=Path,
|
||||||
help="path rendered video should be written to",
|
help="path rendered video should be written to",
|
||||||
)
|
)
|
||||||
parser.add_argument("--output-images-prefix", default="2d_render", type=str, help="filename prefix for output")
|
parser.add_argument(
|
||||||
|
"--output-images-prefix",
|
||||||
|
default="2d_render",
|
||||||
|
type=str,
|
||||||
|
help="filename prefix for output",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--camera-config-input-json-path",
|
"--camera-config-input-json-path",
|
||||||
default="./saved_camera_settings.json",
|
default="./saved_camera_settings.json",
|
||||||
@@ -110,12 +138,21 @@ def main() -> int:
|
|||||||
dataset = load_dataset(args.input_bag_path)
|
dataset = load_dataset(args.input_bag_path)
|
||||||
|
|
||||||
images = render_3d_images(
|
images = render_3d_images(
|
||||||
dataset, args.camera_config_input_json_path, args.tmp_files_path, args.output_images_prefix
|
dataset,
|
||||||
|
args.camera_config_input_json_path,
|
||||||
|
args.tmp_files_path,
|
||||||
|
args.output_images_prefix,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.output_video:
|
if args.output_video:
|
||||||
input_images_pattern = f"{args.tmp_files_path / args.output_images_prefix}_%04d.png"
|
input_images_pattern = (
|
||||||
create_video_from_images(input_images_pattern, args.output_video_path, calculate_average_frame_rate(dataset))
|
f"{args.tmp_files_path / args.output_images_prefix}_%04d.png"
|
||||||
|
)
|
||||||
|
create_video_from_images(
|
||||||
|
input_images_pattern,
|
||||||
|
args.output_video_path,
|
||||||
|
calculate_average_frame_rate(dataset),
|
||||||
|
)
|
||||||
|
|
||||||
if not args.output_images:
|
if not args.output_images:
|
||||||
for image in images:
|
for image in images:
|
||||||
|
|||||||
@@ -14,6 +14,12 @@ def load_dataset(
|
|||||||
return Dataset.from_file(bag_file_path, topic=pointcloud_topic)
|
return Dataset.from_file(bag_file_path, topic=pointcloud_topic)
|
||||||
|
|
||||||
|
|
||||||
|
def save_dataset(dataset: Dataset, output_file_path: Path):
|
||||||
|
if not output_file_path.is_dir():
|
||||||
|
raise ArgumentTypeError(f"{output_file_path} has to be a valid folder!")
|
||||||
|
dataset.to_file(output_file_path)
|
||||||
|
|
||||||
|
|
||||||
def calculate_average_frame_rate(dataset: Dataset):
|
def calculate_average_frame_rate(dataset: Dataset):
|
||||||
timestamps = dataset.timestamps
|
timestamps = dataset.timestamps
|
||||||
time_deltas = [
|
time_deltas = [
|
||||||
|
|||||||
Reference in New Issue
Block a user