Compare commits

...

2 Commits

Author SHA1 Message Date
Jan Kowalczyk
e20c2235ed wip 2025-09-15 11:21:40 +02:00
Jan Kowalczyk
e7624d2786 wip inference 2025-09-15 11:21:30 +02:00
10 changed files with 1029 additions and 37 deletions

View File

@@ -261,6 +261,80 @@ class IsoForest(object):
logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
logger.info("Finished testing.")
def inference(
self,
dataset: BaseADDataset,
device: str = "cpu",
n_jobs_dataloader: int = 0,
batch_size: int = 32,
):
"""Perform inference on the dataset using the trained Isolation Forest model."""
logger = logging.getLogger()
# Get inference data loader
_, _, inference_loader = dataset.loaders(
batch_size=batch_size, num_workers=n_jobs_dataloader
)
# Get data from loader
X = ()
idxs = []
file_ids = []
frame_ids = []
logger.info("Starting inference...")
start_time = time.time()
for data in inference_loader:
inputs, idx, (file_id, frame_id) = data
inputs = inputs.to(device)
if self.hybrid:
inputs = self.ae_net.encoder(inputs)
X_batch = inputs.view(inputs.size(0), -1)
X += (X_batch.cpu().data.numpy(),)
# Store indices and metadata
idxs.extend(idx.cpu().data.numpy().tolist())
file_ids.extend(file_id.cpu().data.numpy().tolist())
frame_ids.extend(frame_id.cpu().data.numpy().tolist())
X = np.concatenate(X)
# Get anomaly scores
scores = (-1.0) * self.model.decision_function(X)
scores = scores.flatten()
# Store inference results
self.inference_time = time.time() - start_time
self.inference_indices = np.array(idxs)
self.inference_file_ids = np.array(file_ids)
self.inference_frame_ids = np.array(frame_ids)
# Create index mapping similar to DeepSAD trainer
self.inference_index_mapping = {
"indices": self.inference_indices,
"file_ids": self.inference_file_ids,
"frame_ids": self.inference_frame_ids,
}
# Log inference statistics
logger.info(f"Number of inference samples: {len(self.inference_indices)}")
logger.info(
f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
)
logger.info("Inference Time: {:.3f}s".format(self.inference_time))
logger.info(
"Score statistics: "
f"min={scores.min():.3f}, "
f"max={scores.max():.3f}, "
f"mean={scores.mean():.3f}, "
f"std={scores.std():.3f}"
)
logger.info("Finished inference.")
return scores
def load_ae(self, dataset_name, model_path):
"""Load pretrained autoencoder from model_path for feature extraction in a hybrid Isolation Forest model."""

View File

@@ -453,6 +453,80 @@ class OCSVM(object):
logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
logger.info("Finished testing.")
def inference(
self,
dataset: BaseADDataset,
device: str = "cpu",
n_jobs_dataloader: int = 0,
batch_size: int = 32,
):
"""Perform inference on the dataset using the trained OC-SVM model."""
logger = logging.getLogger()
# Get inference data loader
_, _, inference_loader = dataset.loaders(
batch_size=batch_size, num_workers=n_jobs_dataloader
)
# Get data from loader
X = ()
idxs = []
file_ids = []
frame_ids = []
logger.info("Starting inference...")
start_time = time.time()
for data in inference_loader:
inputs, idx, (file_id, frame_id) = data
inputs = inputs.to(device)
if self.hybrid:
inputs = self.ae_net.encoder(inputs)
X_batch = inputs.view(inputs.size(0), -1)
X += (X_batch.cpu().data.numpy(),)
# Store indices and metadata
idxs.extend(idx.cpu().data.numpy().tolist())
file_ids.extend(file_id.cpu().data.numpy().tolist())
frame_ids.extend(frame_id.cpu().data.numpy().tolist())
X = np.concatenate(X)
# Get anomaly scores
scores = (-1.0) * self.model.decision_function(X)
scores = scores.flatten()
# Store inference results
self.inference_time = time.time() - start_time
self.inference_indices = np.array(idxs)
self.inference_file_ids = np.array(file_ids)
self.inference_frame_ids = np.array(frame_ids)
# Create index mapping similar to DeepSAD trainer
self.inference_index_mapping = {
"indices": self.inference_indices,
"file_ids": self.inference_file_ids,
"frame_ids": self.inference_frame_ids,
}
# Log inference statistics
logger.info(f"Number of inference samples: {len(self.inference_indices)}")
logger.info(
f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
)
logger.info("Inference Time: {:.3f}s".format(self.inference_time))
logger.info(
"Score statistics: "
f"min={scores.min():.3f}, "
f"max={scores.max():.3f}, "
f"mean={scores.mean():.3f}, "
f"std={scores.std():.3f}"
)
logger.info("Finished inference.")
return scores
def load_ae(self, model_path, net_name, device="cpu"):
"""Load pretrained autoencoder from model_path for feature extraction in a hybrid OC-SVM model."""

View File

@@ -338,6 +338,8 @@ class SubTerInference(VisionDataset):
self.frame_ids = np.arange(self.data.shape[0], dtype=np.int32)
self.file_names = {0: experiment_file.name}
self.transform = transform if transform else transforms.ToTensor()
def __len__(self):
return len(self.data)

View File

@@ -638,57 +638,185 @@ def main(
cfg.save_config(export_json=xp_path + "/config.json")
elif action == "infer":
# Inference uses a deterministic, non-shuffled loader to preserve temporal order
dataset = load_dataset(
dataset_name,
cfg.settings["dataset_name"],
data_path,
normal_class,
known_outlier_class,
n_known_outlier_classes,
ratio_known_normal,
ratio_known_outlier,
ratio_pollution,
cfg.settings["normal_class"],
cfg.settings["known_outlier_class"],
cfg.settings["n_known_outlier_classes"],
cfg.settings["ratio_known_normal"],
cfg.settings["ratio_known_outlier"],
cfg.settings["ratio_pollution"],
random_state=np.random.RandomState(cfg.settings["seed"]),
k_fold_num=False,
inference=True,
)
# Log random sample of known anomaly classes if more than 1 class
if n_known_outlier_classes > 1:
logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,))
# Initialize DeepSAD model and set neural network phi
deepSAD = DeepSAD(latent_space_dim, cfg.settings["eta"])
deepSAD.set_network(net_name)
# If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
if not load_model:
# --- Expect a model DIRECTORY (aligned with 'retest') ---
if (
(not load_model)
or (not Path(load_model).exists())
or (not Path(load_model).is_dir())
):
logger.error(
"For inference mode a model has to be loaded! Pass the --load_model option with the model path!"
"For inference mode a model directory has to be loaded! "
"Pass the --load_model option with the model directory path!"
)
return
load_model = Path(load_model)
# Resolve expected model artifacts (single-model / no k-fold suffixes)
deepsad_model_path = load_model / "model_deepsad.tar"
ae_model_path = load_model / "model_ae.tar"
ocsvm_model_path = load_model / "model_ocsvm.pkl"
isoforest_model_path = load_model / "model_isoforest.pkl"
# Sanity check model files exist
model_paths = [
deepsad_model_path,
ae_model_path,
ocsvm_model_path,
isoforest_model_path,
]
missing = [p.name for p in model_paths if not p.exists() or not p.is_file()]
if missing:
logger.error(
"The following model files do not exist in the provided model directory: "
+ ", ".join(missing)
)
return
deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
logger.info("Loading model from %s." % load_model)
# Prepare output paths
inf_dir = Path(xp_path) / "inference"
inf_dir.mkdir(parents=True, exist_ok=True)
base_stem = Path(Path(dataset.root).stem) # keep your previous naming
# DeepSAD outputs (keep legacy filenames for backward compatibility)
deepsad_scores_path = inf_dir / Path(
base_stem.stem + "_deepsad_scores"
).with_suffix(".npy")
deepsad_outputs_path = inf_dir / Path(base_stem.stem + "_outputs").with_suffix(
".npy"
)
# Baselines
ocsvm_scores_path = inf_dir / Path(
base_stem.stem + "_ocsvm_scores"
).with_suffix(".npy")
isoforest_scores_path = inf_dir / Path(
base_stem.stem + "_isoforest_scores"
).with_suffix(".npy")
inference_results, all_outputs = deepSAD.inference(
dataset, device=device, n_jobs_dataloader=n_jobs_dataloader
)
inference_results_path = (
Path(xp_path)
/ "inference"
/ Path(Path(dataset.root).stem).with_suffix(".npy")
)
inference_outputs_path = (
Path(xp_path)
/ "inference"
/ Path(Path(dataset.root).stem + "_outputs").with_suffix(".npy")
# Common loader settings
_n_jobs = (
n_jobs_dataloader
if "n_jobs_dataloader" in locals()
else cfg.settings.get("n_jobs_dataloader", 0)
)
inference_results_path.parent.mkdir(parents=True, exist_ok=True)
np.save(inference_results_path, inference_results, fix_imports=False)
np.save(inference_outputs_path, all_outputs, fix_imports=False)
# ----------------- DeepSAD -----------------
deepSAD = DeepSAD(cfg.settings["latent_space_dim"], cfg.settings["eta"])
deepSAD.set_network(cfg.settings["net_name"])
deepSAD.load_model(
model_path=deepsad_model_path, load_ae=True, map_location=device
)
logger.info("Loaded DeepSAD model from %s.", deepsad_model_path)
deepsad_scores, deepsad_all_outputs = deepSAD.inference(
dataset, device=device, n_jobs_dataloader=_n_jobs
)
np.save(deepsad_scores_path, deepsad_scores)
# np.save(deepsad_outputs_path, deepsad_all_outputs)
logger.info(
f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
"DeepSAD inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
float(np.median(deepsad_scores)),
float(np.mean(deepsad_scores)),
float(np.min(deepsad_scores)),
float(np.max(deepsad_scores)),
)
# ----------------- OCSVM (hybrid) -----------------
ocsvm_scores = None
ocsvm = OCSVM(
kernel=cfg.settings["ocsvm_kernel"],
nu=cfg.settings["ocsvm_nu"],
hybrid=True,
latent_space_dim=cfg.settings["latent_space_dim"],
)
# load AE to build the feature extractor for hybrid OCSVM
ocsvm.load_ae(
net_name=cfg.settings["net_name"],
model_path=ae_model_path,
device=device,
)
ocsvm.load_model(import_path=ocsvm_model_path)
ocsvm_scores = ocsvm.inference(
dataset, device=device, n_jobs_dataloader=_n_jobs, batch_size=32
)
if ocsvm_scores is not None:
np.save(ocsvm_scores_path, ocsvm_scores)
logger.info(
"OCSVM inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
float(np.median(ocsvm_scores)),
float(np.mean(ocsvm_scores)),
float(np.min(ocsvm_scores)),
float(np.max(ocsvm_scores)),
)
else:
logger.warning("OCSVM scores could not be determined; no array saved.")
# ----------------- Isolation Forest -----------------
isoforest_scores = None
Isoforest = IsoForest(
hybrid=False,
n_estimators=cfg.settings["isoforest_n_estimators"],
max_samples=cfg.settings["isoforest_max_samples"],
contamination=cfg.settings["isoforest_contamination"],
n_jobs=cfg.settings["isoforest_n_jobs_model"],
seed=cfg.settings["seed"],
)
Isoforest.load_model(import_path=isoforest_model_path, device=device)
isoforest_scores = Isoforest.inference(
dataset, device=device, n_jobs_dataloader=_n_jobs
)
if isoforest_scores is not None:
np.save(isoforest_scores_path, isoforest_scores)
logger.info(
"IsolationForest inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
float(np.median(isoforest_scores)),
float(np.mean(isoforest_scores)),
float(np.min(isoforest_scores)),
float(np.max(isoforest_scores)),
)
else:
logger.warning(
"Isolation Forest scores could not be determined; no array saved."
)
# Final summary (DeepSAD always runs; baselines are best-effort)
logger.info(
"Inference complete. Saved arrays to %s:\n"
" DeepSAD scores: %s\n"
" DeepSAD outputs: %s\n"
" OCSVM scores: %s\n"
" IsoForest scores: %s",
inf_dir,
deepsad_scores_path.name,
deepsad_outputs_path.name,
ocsvm_scores_path.name if ocsvm_scores is not None else "(not saved)",
isoforest_scores_path.name
if isoforest_scores is not None
else "(not saved)",
)
elif action == "ae_elbow_test":
# Load data once
dataset = load_dataset(

View File

@@ -177,6 +177,8 @@ class DeepSADTrainer(BaseTrainer):
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
)
latent_dim = net.rep_dim
# Set device for network
net = net.to(self.device)
@@ -184,7 +186,9 @@ class DeepSADTrainer(BaseTrainer):
logger.info("Starting inference...")
n_batches = 0
start_time = time.time()
all_outputs = np.zeros((len(inference_loader.dataset), 1024), dtype=np.float32)
all_outputs = np.zeros(
(len(inference_loader.dataset), latent_dim), dtype=np.float32
)
scores = []
net.eval()

Binary file not shown.

View File

@@ -1166,7 +1166,7 @@ To compare the computational efficiency of the two architectures we show the num
%\todo[inline]{rework table and calculate with actual scripts and network archs in deepsad codebase}
\todo[inline]{next paragrpah does not work anymroe?}
As can be seen, the efficient encoder requires an order of magnitude fewer parameters and significantly fewer operations while maintaining a comparable representational capacity. The key reason is the use of depthwise separable convolutions, aggressive pooling along the densely sampled horizontal axis, and a channel squeezing strategy before the fully connected layer. Interestingly, the Efficient network also processes more intermediate channels (up to 32 compared to only 8 in the LeNet variant), which increases its ability to capture a richer set of patterns despite the reduced computational cost. This combination of efficiency and representational power makes the Efficient encoder a more suitable backbone for our anomaly detection task.
@@ -1378,7 +1378,7 @@ Pretraining runtimes for the autoencoders are reported in Table~\ref{tab:ae_pret
\end{tabularx}
\end{table}
The full DeepSAD training times are shown in Table~\ref{tab:train_runtimes_compact}, alongside the two classical baselines Isolation Forest and One-Class SVM. Here the contrast between methods is clear: while DeepSAD requires on the order of 1520 minutes of GPU training per configuration, both baselines complete training in seconds on CPU. The OCSVM training can only be this fast due to the reduced input dimensionality from utilizing DeepSAD's pretraining encoder as a preprocessing step, although other dimensionality reduction methods may also be used which may require less computational resources for this step.
The full DeepSAD training times are shown in Table~\ref{tab:train_runtimes_compact}, alongside the two classical baselines Isolation Forest and One-Class SVM. Here the contrast between methods is clear: while DeepSAD requires on the order of 1520 minutes of GPU training per configuration and fold, both baselines complete training in seconds on CPU. The OCSVM training can only be this fast due to the reduced input dimensionality from utilizing DeepSAD's pretraining encoder as a preprocessing step, although other dimensionality reduction methods may also be used which could require less computational resources for this step.
\begin{table}
\centering

View File

@@ -96,6 +96,21 @@ PRETRAIN_SCHEMA = {
"config_json": pl.Utf8, # full config.json as string (for reference)
}
SCHEMA_INFERENCE = {
# identifiers / dims
"experiment": pl.Utf8, # e.g. "2_static_no_artifacts_illuminated_2023-01-23-001"
"network": pl.Utf8, # e.g. "LeNet", "efficient"
"latent_dim": pl.Int32,
"semi_normals": pl.Int32,
"semi_anomalous": pl.Int32,
"model": pl.Utf8, # "deepsad" | "isoforest" | "ocsvm"
# metrics
"scores": pl.List(pl.Float64),
# timings / housekeeping
"folder": pl.Utf8,
"config_json": pl.Utf8, # full config.json as string (for reference)
}
# ------------------------------------------------------------
# Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
@@ -233,11 +248,11 @@ def normalize_bool_list(a) -> Optional[List[bool]]:
# ------------------------------------------------------------
# Low-level: read one experiment folder
# ------------------------------------------------------------
def read_config(exp_dir: Path) -> dict:
def read_config(exp_dir: Path, k_fold_required: bool = True) -> dict:
cfg = exp_dir / "config.json"
with cfg.open("r") as f:
c = json.load(f)
if not c.get("k_fold"):
if k_fold_required and not c.get("k_fold"):
raise ValueError(f"{exp_dir.name}: not trained as k-fold")
return c
@@ -589,7 +604,129 @@ def load_pretraining_results_dataframe(
return df
def load_inference_results_dataframe(
root: Path,
allow_cache: bool = True,
models: List[str] = MODELS,
) -> pl.DataFrame:
"""Load inference results from experiment folders.
Args:
root: Path to root directory containing experiment folders
allow_cache: Whether to use/create cache file
models: List of models to look for scores
Returns:
pl.DataFrame: DataFrame containing inference results
"""
if allow_cache:
cache = root / "inference_results_cache.parquet"
if cache.exists():
try:
df = pl.read_parquet(cache)
print(f"[info] loaded cached inference frame from {cache}")
return df
except Exception as e:
print(f"[warn] failed to load inference cache {cache}: {e}")
rows: List[dict] = []
exp_dirs = [p for p in root.iterdir() if p.is_dir()]
for exp_dir in sorted(exp_dirs):
try:
# Load and validate config
cfg = read_config(exp_dir, k_fold_required=False)
cfg_json = json.dumps(cfg, sort_keys=True)
# Extract config values
network = cfg.get("net_name")
latent_dim = int(cfg.get("latent_space_dim"))
semi_normals = int(cfg.get("num_known_normal"))
semi_anomalous = int(cfg.get("num_known_outlier"))
# Process each model's scores
inference_dir = exp_dir / "inference"
if not inference_dir.exists():
print(f"[warn] no inference directory for {exp_dir.name}")
continue
# Find all unique experiments in this folder's inference files
score_files = list(inference_dir.glob("*_scores.npy"))
if not score_files:
print(f"[warn] no score files in {inference_dir}")
continue
# Extract unique experiment names from score files
# Format: {experiment}_{model}_scores.npy
experiments = set()
for score_file in score_files:
exp_name = score_file.stem.rsplit("_", 2)[0]
experiments.add(exp_name)
# Load scores for each experiment and model
for experiment in sorted(experiments):
for model in models:
score_file = inference_dir / f"{experiment}_{model}_scores.npy"
if not score_file.exists():
print(f"[warn] missing score file for {experiment}, {model}")
continue
try:
scores = np.load(score_file)
rows.append(
{
"experiment": experiment,
"network": network,
"latent_dim": latent_dim,
"semi_normals": semi_normals,
"semi_anomalous": semi_anomalous,
"model": model,
"scores": scores.tolist(),
"folder": str(exp_dir),
"config_json": cfg_json,
}
)
except Exception as e:
print(
f"[warn] failed to load scores for {experiment}, {model}: {e}"
)
continue
except Exception as e:
print(f"[warn] skipping {exp_dir.name}: {e}")
continue
# If empty, return a typed empty frame
if not rows:
return pl.DataFrame(schema=SCHEMA_INFERENCE)
df = pl.DataFrame(rows, schema=SCHEMA_INFERENCE)
# Optimize datatypes
df = df.with_columns(
[
pl.col("experiment", "network", "model").cast(pl.Categorical),
pl.col("latent_dim", "semi_normals", "semi_anomalous").cast(pl.Int32),
]
)
# Cache if enabled
if allow_cache:
try:
df.write_parquet(cache)
print(f"[info] cached inference frame to {cache}")
except Exception as e:
print(f"[warn] failed to write cache {cache}: {e}")
return df
def main():
inference_root = Path("/home/fedex/mt/results/inference/copy")
df_inference = load_inference_results_dataframe(inference_root, allow_cache=True)
exit(0)
root = Path("/home/fedex/mt/results/copy")
df1 = load_results_dataframe(root, allow_cache=True)
exit(0)

View File

@@ -0,0 +1,269 @@
import json
import pickle
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
# =========================
# User-configurable params
# =========================
# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
methods_scores_path = Path(
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
)
# Root data path containing .bag files used to build the cached stats
all_data_path = Path("/home/fedex/mt/data/subter")
# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
output_path = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed")
# Cache (stats + labels) directory — same as your original script
cache_path = output_path
# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
data_resolution = 32 * 2048
# Frames per second for x-axis time
FPS = 10.0
# Whether to try to align score sign so that higher = more degraded.
ALIGN_SCORE_DIRECTION = True
# =========================
# Smoothing configuration
# =========================
# Options: "none", "moving_average", "gaussian", "ema"
SMOOTHING_METHOD = "ema"
# Moving average window size (in frames). Use odd number for symmetry; <=1 disables.
MA_WINDOW = 11
# Gaussian sigma (in frames). ~2-3 frames is mild smoothing.
GAUSSIAN_SIGMA = 2.0
# Exponential moving average factor in (0,1]; smaller = smoother. ~0.2 is a good start.
EMA_ALPHA = 0.1
# =========================
# Setup output folders
# =========================
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
# =========================
# Discover experiments
# =========================
normal_experiment_paths, anomaly_experiment_paths = [], []
for bag_file_path in all_data_path.iterdir():
if bag_file_path.suffix != ".bag":
continue
if "smoke" in bag_file_path.name:
anomaly_experiment_paths.append(bag_file_path)
else:
normal_experiment_paths.append(bag_file_path)
normal_experiment_paths = sorted(
normal_experiment_paths, key=lambda p: p.stat().st_size
)
anomaly_experiment_paths = sorted(
anomaly_experiment_paths, key=lambda p: p.stat().st_size
)
# Find experiment
exp_path, exp_is_anomaly = None, None
for p in anomaly_experiment_paths:
if p.stem == EXPERIMENT_NAME:
exp_path, exp_is_anomaly = p, True
break
if exp_path is None:
for p in normal_experiment_paths:
if p.stem == EXPERIMENT_NAME:
exp_path, exp_is_anomaly = p, False
break
if exp_path is None:
raise FileNotFoundError(f"Experiment '{EXPERIMENT_NAME}' not found")
exp_index = (
anomaly_experiment_paths.index(exp_path)
if exp_is_anomaly
else normal_experiment_paths.index(exp_path)
)
# =========================
# Load cached statistical data
# =========================
with open(cache_path / "missing_points.pkl", "rb") as f:
missing_points_normal, missing_points_anomaly = pickle.load(f)
with open(cache_path / "particles_near_sensor_counts_500.pkl", "rb") as f:
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
if exp_is_anomaly:
missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
else:
missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
missing_points_pct = (missing_points_series / data_resolution) * 100.0
near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
# =========================
# Load manual anomaly frame borders
# =========================
manually_labeled_anomaly_frames = {}
labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
if labels_json_path.exists():
with open(labels_json_path, "r") as f:
labeled_json = json.load(f)
for file in labeled_json.get("files", []):
manually_labeled_anomaly_frames[file["filename"]] = (
file.get("semi_target_begin_frame"),
file.get("semi_target_end_frame"),
)
exp_npy_filename = exp_path.with_suffix(".npy").name
anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
# =========================
# Load method scores and normalize
# =========================
def zscore_1d(x, eps=1e-12):
mu, sigma = np.mean(x), np.std(x, ddof=0)
return np.zeros_like(x) if sigma < eps else (x - mu) / sigma
def maybe_align_direction(z, window):
start, end = window
if start is None or end is None:
return z
inside_mean = np.mean(z[start:end]) if end > start else 0
outside = np.concatenate([z[:start], z[end:]]) if start > 0 or end < len(z) else []
outside_mean = np.mean(outside) if len(outside) else inside_mean
return z if inside_mean >= outside_mean else -z
methods = ["deepsad", "ocsvm", "isoforest"]
method_zscores = {}
for m in methods:
s = np.load(methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy")
s = np.asarray(s, dtype=float).ravel()
n = min(len(s), len(missing_points_pct))
s, missing_points_pct, near_sensor_pct = (
s[:n],
missing_points_pct[:n],
near_sensor_pct[:n],
)
z = zscore_1d(s)
if ALIGN_SCORE_DIRECTION:
z = maybe_align_direction(z, anomaly_window)
method_zscores[m] = z
# =========================
# Smoothing
# =========================
def moving_average(x, window):
if window <= 1:
return x
if window % 2 == 0:
window += 1
return np.convolve(x, np.ones(window) / window, mode="same")
def gaussian_smooth(x, sigma):
from scipy.ndimage import gaussian_filter1d
return gaussian_filter1d(x, sigma=sigma, mode="nearest") if sigma > 0 else x
def ema(x, alpha):
y = np.empty_like(x)
y[0] = x[0]
for i in range(1, len(x)):
y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
return y
def apply_smoothing(x):
m = SMOOTHING_METHOD.lower()
if m == "none":
return x
if m == "moving_average":
return moving_average(x, MA_WINDOW)
if m == "gaussian":
return gaussian_smooth(x, GAUSSIAN_SIGMA)
if m == "ema":
return ema(x, EMA_ALPHA)
raise ValueError(f"Unknown SMOOTHING_METHOD: {SMOOTHING_METHOD}")
smoothed_z = {k: apply_smoothing(v) for k, v in method_zscores.items()}
smoothed_missing = apply_smoothing(missing_points_pct)
smoothed_near = apply_smoothing(near_sensor_pct)
# =========================
# Plot
# =========================
t = np.arange(len(missing_points_pct)) / FPS
def plot_series(y2, ylabel, fname, title_suffix):
fig, axz = plt.subplots(figsize=(14, 6), constrained_layout=True)
axy = axz.twinx()
for m in methods:
axz.plot(t, smoothed_z[m], label=f"{m} (z)")
axy.plot(t, y2, linestyle="--", label=ylabel)
start, end = anomaly_window
if start and end:
axz.axvline(start / FPS, linestyle=":", alpha=0.6)
axz.axvline(end / FPS, linestyle=":", alpha=0.6)
axz.set_xlabel("Time (s)")
axz.set_ylabel("Anomaly score (z)")
axy.set_ylabel(ylabel)
axz.set_title(f"{EXPERIMENT_NAME}\n{title_suffix}\nSmoothing: {SMOOTHING_METHOD}")
lines1, labels1 = axz.get_legend_handles_labels()
lines2, labels2 = axy.get_legend_handles_labels()
axz.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
axz.grid(True, alpha=0.3)
fig.savefig(output_datetime_path / fname, dpi=150)
plt.close(fig)
plot_series(
smoothed_missing,
"Missing points (%)",
f"{EXPERIMENT_NAME}_zscores_vs_missing.png",
"Degradation vs Missing Points",
)
plot_series(
smoothed_near,
"Near-sensor points (%)",
f"{EXPERIMENT_NAME}_zscores_vs_near.png",
"Degradation vs Near-Sensor Points (<0.5m)",
)
# =========================
# Save & archive
# =========================
shutil.rmtree(latest_folder_path, ignore_errors=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
for f in output_datetime_path.iterdir():
shutil.copy2(f, latest_folder_path)
shutil.copy2(__file__, output_datetime_path)
shutil.copy2(__file__, latest_folder_path)
shutil.move(output_datetime_path, archive_folder_path)
print("Done. Plots saved and archived.")

View File

@@ -0,0 +1,304 @@
import json
import pickle
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
# =========================
# User-configurable params
# =========================
# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
# Adjust this to where you save your per-method scores.
methods_scores_path = Path(
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
)
# Root data path containing .bag files used to build the cached stats
all_data_path = Path("/home/fedex/mt/data/subter")
# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
output_path = Path("/home/fedex/mt/plots/results_inference_timeline")
# Cache (stats + labels) directory — same as your original script
cache_path = output_path
# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
data_resolution = 32 * 2048
# Frames per second for x-axis time
FPS = 10.0
# Whether to try to align score sign so that higher = more degraded.
# If manual labels exist for this experiment, alignment uses anomaly window mean vs. outside.
ALIGN_SCORE_DIRECTION = True
# =========================
# Setup output folders
# =========================
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
# =========================
# Discover experiments to reconstruct indices consistent with caches
# =========================
normal_experiment_paths, anomaly_experiment_paths = [], []
if not all_data_path.exists():
raise FileNotFoundError(f"all_data_path does not exist: {all_data_path}")
for bag_file_path in all_data_path.iterdir():
if bag_file_path.suffix != ".bag":
continue
if "smoke" in bag_file_path.name:
anomaly_experiment_paths.append(bag_file_path)
else:
normal_experiment_paths.append(bag_file_path)
# Sort by filesize to match original ordering used when caches were generated
normal_experiment_paths = sorted(
normal_experiment_paths, key=lambda p: p.stat().st_size
)
anomaly_experiment_paths = sorted(
anomaly_experiment_paths, key=lambda p: p.stat().st_size
)
# Find the path for the requested experiment
exp_path = None
exp_is_anomaly = None
for p in anomaly_experiment_paths:
if p.stem == EXPERIMENT_NAME:
exp_path = p
exp_is_anomaly = True
break
if exp_path is None:
for p in normal_experiment_paths:
if p.stem == EXPERIMENT_NAME:
exp_path = p
exp_is_anomaly = False
break
if exp_path is None:
raise FileNotFoundError(
f"Experiment '{EXPERIMENT_NAME}' not found as a .bag in {all_data_path}"
)
# Get the index within the appropriate list
if exp_is_anomaly:
exp_index = anomaly_experiment_paths.index(exp_path)
else:
exp_index = normal_experiment_paths.index(exp_path)
# =========================
# Load cached statistical data
# =========================
missing_points_cache = Path(cache_path / "missing_points.pkl")
near_sensor_cache = Path(cache_path / "particles_near_sensor_counts_500.pkl")
if not missing_points_cache.exists():
raise FileNotFoundError(f"Missing points cache not found: {missing_points_cache}")
if not near_sensor_cache.exists():
raise FileNotFoundError(f"Near-sensor cache not found: {near_sensor_cache}")
with open(missing_points_cache, "rb") as f:
missing_points_normal, missing_points_anomaly = pickle.load(f)
with open(near_sensor_cache, "rb") as f:
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
if exp_is_anomaly:
missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
else:
missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
# Convert counts to percentages of total points
missing_points_pct = (missing_points_series / data_resolution) * 100.0
near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
# =========================
# Load manual anomaly frame borders (optional; used for sign alignment + vertical markers)
# =========================
manually_labeled_anomaly_frames = {}
labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
if labels_json_path.exists():
with open(labels_json_path, "r") as frame_borders_file:
manually_labeled_anomaly_frames_json = json.load(frame_borders_file)
for file in manually_labeled_anomaly_frames_json.get("files", []):
manually_labeled_anomaly_frames[file["filename"]] = (
file.get("semi_target_begin_frame", None),
file.get("semi_target_end_frame", None),
)
# The JSON uses .npy filenames (as in original script). Create this experiments key.
exp_npy_filename = exp_path.with_suffix(".npy").name
anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
# =========================
# Load method scores and z-score normalize per method
# =========================
def zscore_1d(x: np.ndarray, eps=1e-12):
x = np.asarray(x, dtype=float)
mu = np.mean(x)
sigma = np.std(x, ddof=0)
if sigma < eps:
return np.zeros_like(x)
return (x - mu) / sigma
def maybe_align_direction(z: np.ndarray, window):
"""Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist."""
start, end = window
if start is None or end is None:
return z # no labels → leave as-is
start = int(max(0, start))
end = int(min(len(z), end))
if end <= start or end > len(z):
return z
inside_mean = float(np.mean(z[start:end]))
# outside: everything except [start:end]; handle edge cases
if start == 0 and end == len(z):
return z
outside_parts = []
if start > 0:
outside_parts.append(z[:start])
if end < len(z):
outside_parts.append(z[end:])
if not outside_parts:
return z
outside_mean = float(np.mean(np.concatenate(outside_parts)))
return z if inside_mean >= outside_mean else -z
methods = ["deepsad", "ocsvm", "isoforest"]
method_scores = {}
method_zscores = {}
if not methods_scores_path.exists():
raise FileNotFoundError(
f"Methods scores path does not exist: {methods_scores_path}"
)
for m in methods:
file_path = methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy"
if not file_path.exists():
raise FileNotFoundError(f"Missing scores file for method '{m}': {file_path}")
s = np.load(file_path)
s = np.asarray(s, dtype=float).reshape(-1)
# If needed, truncate or pad to match stats length (should match if generated consistently)
n = min(len(s), len(missing_points_pct))
if len(s) != len(missing_points_pct):
# Align by truncation to the shortest length
s = s[:n]
# Also truncate stats to match
missing_points_pct = missing_points_pct[:n]
near_sensor_pct = near_sensor_pct[:n]
z = zscore_1d(s)
if ALIGN_SCORE_DIRECTION:
z = maybe_align_direction(z, anomaly_window)
method_scores[m] = s
method_zscores[m] = z
# Common time axis in seconds
num_frames = len(missing_points_pct)
t = np.arange(num_frames) / FPS
# =========================
# Plot 1: Missing points (%) vs. method z-scores
# =========================
fig1, axz1 = plt.subplots(figsize=(14, 6), constrained_layout=True)
axy1 = axz1.twinx()
# plot z-scores
for m in methods:
axz1.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
# plot missing points (%)
axy1.plot(t, missing_points_pct, linestyle="--", alpha=0.7, label="Missing points (%)")
# vertical markers for anomaly window if available
start, end = anomaly_window
if start is not None and end is not None and 0 <= start < end <= num_frames:
axz1.axvline(x=start / FPS, linestyle=":", alpha=0.6)
axz1.axvline(x=end / FPS, linestyle=":", alpha=0.6)
axz1.set_xlabel("Time (s)")
axz1.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
axy1.set_ylabel("Missing points (%)")
axz1.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Missing Points")
# Build a combined legend
lines1, labels1 = axz1.get_legend_handles_labels()
lines2, labels2 = axy1.get_legend_handles_labels()
axz1.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
axz1.grid(True, alpha=0.3)
fig1.savefig(
output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_missing_points.png", dpi=150
)
plt.close(fig1)
# =========================
# Plot 2: Near-sensor (%) vs. method z-scores
# =========================
fig2, axz2 = plt.subplots(figsize=(14, 6), constrained_layout=True)
axy2 = axz2.twinx()
for m in methods:
axz2.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
axy2.plot(t, near_sensor_pct, linestyle="--", alpha=0.7, label="Near-sensor <0.5m (%)")
start, end = anomaly_window
if start is not None and end is not None and 0 <= start < end <= num_frames:
axz2.axvline(x=start / FPS, linestyle=":", alpha=0.6)
axz2.axvline(x=end / FPS, linestyle=":", alpha=0.6)
axz2.set_xlabel("Time (s)")
axz2.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
axy2.set_ylabel("Near-sensor points (%)")
axz2.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Near-Sensor Points (<0.5 m)")
lines1, labels1 = axz2.get_legend_handles_labels()
lines2, labels2 = axy2.get_legend_handles_labels()
axz2.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
axz2.grid(True, alpha=0.3)
fig2.savefig(
output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_near_sensor.png", dpi=150
)
plt.close(fig2)
# =========================
# Preserve latest/, archive/, copy script
# =========================
# delete current latest folder
shutil.rmtree(latest_folder_path, ignore_errors=True)
# create new latest folder
latest_folder_path.mkdir(exist_ok=True, parents=True)
# copy contents of output folder to the latest folder
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this python script to preserve the code used
shutil.copy2(__file__, output_datetime_path)
shutil.copy2(__file__, latest_folder_path)
# move output date folder to archive
shutil.move(output_datetime_path, archive_folder_path)
print("Done. Plots saved and archived.")