wip

wip inference
2025-09-15 11:21:40 +02:00 · 2025-09-15 11:21:30 +02:00
10 changed files with 1029 additions and 37 deletions
--- a/Deep-SAD-PyTorch/src/baselines/isoforest.py
+++ b/Deep-SAD-PyTorch/src/baselines/isoforest.py
@@ -261,6 +261,80 @@ class IsoForest(object):
        logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
        logger.info("Finished testing.")

+    def inference(
+        self,
+        dataset: BaseADDataset,
+        device: str = "cpu",
+        n_jobs_dataloader: int = 0,
+        batch_size: int = 32,
+    ):
+        """Perform inference on the dataset using the trained Isolation Forest model."""
+        logger = logging.getLogger()
+
+        # Get inference data loader
+        _, _, inference_loader = dataset.loaders(
+            batch_size=batch_size, num_workers=n_jobs_dataloader
+        )
+
+        # Get data from loader
+        X = ()
+        idxs = []
+        file_ids = []
+        frame_ids = []
+
+        logger.info("Starting inference...")
+        start_time = time.time()
+
+        for data in inference_loader:
+            inputs, idx, (file_id, frame_id) = data
+            inputs = inputs.to(device)
+
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)
+            X_batch = inputs.view(inputs.size(0), -1)
+            X += (X_batch.cpu().data.numpy(),)
+
+            # Store indices and metadata
+            idxs.extend(idx.cpu().data.numpy().tolist())
+            file_ids.extend(file_id.cpu().data.numpy().tolist())
+            frame_ids.extend(frame_id.cpu().data.numpy().tolist())
+
+        X = np.concatenate(X)
+
+        # Get anomaly scores
+        scores = (-1.0) * self.model.decision_function(X)
+        scores = scores.flatten()
+
+        # Store inference results
+        self.inference_time = time.time() - start_time
+        self.inference_indices = np.array(idxs)
+        self.inference_file_ids = np.array(file_ids)
+        self.inference_frame_ids = np.array(frame_ids)
+
+        # Create index mapping similar to DeepSAD trainer
+        self.inference_index_mapping = {
+            "indices": self.inference_indices,
+            "file_ids": self.inference_file_ids,
+            "frame_ids": self.inference_frame_ids,
+        }
+
+        # Log inference statistics
+        logger.info(f"Number of inference samples: {len(self.inference_indices)}")
+        logger.info(
+            f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
+        )
+        logger.info("Inference Time: {:.3f}s".format(self.inference_time))
+        logger.info(
+            "Score statistics: "
+            f"min={scores.min():.3f}, "
+            f"max={scores.max():.3f}, "
+            f"mean={scores.mean():.3f}, "
+            f"std={scores.std():.3f}"
+        )
+        logger.info("Finished inference.")
+
+        return scores
+
    def load_ae(self, dataset_name, model_path):
        """Load pretrained autoencoder from model_path for feature extraction in a hybrid Isolation Forest model."""

--- a/Deep-SAD-PyTorch/src/baselines/ocsvm.py
+++ b/Deep-SAD-PyTorch/src/baselines/ocsvm.py
@@ -453,6 +453,80 @@ class OCSVM(object):
        logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
        logger.info("Finished testing.")

+    def inference(
+        self,
+        dataset: BaseADDataset,
+        device: str = "cpu",
+        n_jobs_dataloader: int = 0,
+        batch_size: int = 32,
+    ):
+        """Perform inference on the dataset using the trained OC-SVM model."""
+        logger = logging.getLogger()
+
+        # Get inference data loader
+        _, _, inference_loader = dataset.loaders(
+            batch_size=batch_size, num_workers=n_jobs_dataloader
+        )
+
+        # Get data from loader
+        X = ()
+        idxs = []
+        file_ids = []
+        frame_ids = []
+
+        logger.info("Starting inference...")
+        start_time = time.time()
+
+        for data in inference_loader:
+            inputs, idx, (file_id, frame_id) = data
+            inputs = inputs.to(device)
+
+            if self.hybrid:
+                inputs = self.ae_net.encoder(inputs)
+            X_batch = inputs.view(inputs.size(0), -1)
+            X += (X_batch.cpu().data.numpy(),)
+
+            # Store indices and metadata
+            idxs.extend(idx.cpu().data.numpy().tolist())
+            file_ids.extend(file_id.cpu().data.numpy().tolist())
+            frame_ids.extend(frame_id.cpu().data.numpy().tolist())
+
+        X = np.concatenate(X)
+
+        # Get anomaly scores
+        scores = (-1.0) * self.model.decision_function(X)
+        scores = scores.flatten()
+
+        # Store inference results
+        self.inference_time = time.time() - start_time
+        self.inference_indices = np.array(idxs)
+        self.inference_file_ids = np.array(file_ids)
+        self.inference_frame_ids = np.array(frame_ids)
+
+        # Create index mapping similar to DeepSAD trainer
+        self.inference_index_mapping = {
+            "indices": self.inference_indices,
+            "file_ids": self.inference_file_ids,
+            "frame_ids": self.inference_frame_ids,
+        }
+
+        # Log inference statistics
+        logger.info(f"Number of inference samples: {len(self.inference_indices)}")
+        logger.info(
+            f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
+        )
+        logger.info("Inference Time: {:.3f}s".format(self.inference_time))
+        logger.info(
+            "Score statistics: "
+            f"min={scores.min():.3f}, "
+            f"max={scores.max():.3f}, "
+            f"mean={scores.mean():.3f}, "
+            f"std={scores.std():.3f}"
+        )
+        logger.info("Finished inference.")
+
+        return scores
+
    def load_ae(self, model_path, net_name, device="cpu"):
        """Load pretrained autoencoder from model_path for feature extraction in a hybrid OC-SVM model."""

--- a/Deep-SAD-PyTorch/src/datasets/subter.py
+++ b/Deep-SAD-PyTorch/src/datasets/subter.py
@@ -338,6 +338,8 @@ class SubTerInference(VisionDataset):
        self.frame_ids = np.arange(self.data.shape[0], dtype=np.int32)
        self.file_names = {0: experiment_file.name}

+        self.transform = transform if transform else transforms.ToTensor()
+
    def __len__(self):
        return len(self.data)

--- a/Deep-SAD-PyTorch/src/main.py
+++ b/Deep-SAD-PyTorch/src/main.py
@@ -638,57 +638,185 @@ def main(
            cfg.save_config(export_json=xp_path + "/config.json")

    elif action == "infer":
+        # Inference uses a deterministic, non-shuffled loader to preserve temporal order
        dataset = load_dataset(
-            dataset_name,
+            cfg.settings["dataset_name"],
            data_path,
-            normal_class,
-            known_outlier_class,
-            n_known_outlier_classes,
-            ratio_known_normal,
-            ratio_known_outlier,
-            ratio_pollution,
+            cfg.settings["normal_class"],
+            cfg.settings["known_outlier_class"],
+            cfg.settings["n_known_outlier_classes"],
+            cfg.settings["ratio_known_normal"],
+            cfg.settings["ratio_known_outlier"],
+            cfg.settings["ratio_pollution"],
            random_state=np.random.RandomState(cfg.settings["seed"]),
+            k_fold_num=False,
            inference=True,
        )
+
        # Log random sample of known anomaly classes if more than 1 class
        if n_known_outlier_classes > 1:
            logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,))

-        # Initialize DeepSAD model and set neural network phi
-        deepSAD = DeepSAD(latent_space_dim, cfg.settings["eta"])
-        deepSAD.set_network(net_name)
-
-        # If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
-        if not load_model:
+        # --- Expect a model DIRECTORY (aligned with 'retest') ---
+        if (
+            (not load_model)
+            or (not Path(load_model).exists())
+            or (not Path(load_model).is_dir())
+        ):
            logger.error(
-                "For inference mode a model has to be loaded! Pass the --load_model option with the model path!"
+                "For inference mode a model directory has to be loaded! "
+                "Pass the --load_model option with the model directory path!"
+            )
+            return
+        load_model = Path(load_model)
+
+        # Resolve expected model artifacts (single-model / no k-fold suffixes)
+        deepsad_model_path = load_model / "model_deepsad.tar"
+        ae_model_path = load_model / "model_ae.tar"
+        ocsvm_model_path = load_model / "model_ocsvm.pkl"
+        isoforest_model_path = load_model / "model_isoforest.pkl"
+
+        # Sanity check model files exist
+        model_paths = [
+            deepsad_model_path,
+            ae_model_path,
+            ocsvm_model_path,
+            isoforest_model_path,
+        ]
+        missing = [p.name for p in model_paths if not p.exists() or not p.is_file()]
+        if missing:
+            logger.error(
+                "The following model files do not exist in the provided model directory: "
+                + ", ".join(missing)
            )
            return

-        deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
-        logger.info("Loading model from %s." % load_model)
+        # Prepare output paths
+        inf_dir = Path(xp_path) / "inference"
+        inf_dir.mkdir(parents=True, exist_ok=True)
+        base_stem = Path(Path(dataset.root).stem)  # keep your previous naming
+        # DeepSAD outputs (keep legacy filenames for backward compatibility)
+        deepsad_scores_path = inf_dir / Path(
+            base_stem.stem + "_deepsad_scores"
+        ).with_suffix(".npy")
+        deepsad_outputs_path = inf_dir / Path(base_stem.stem + "_outputs").with_suffix(
+            ".npy"
+        )
+        # Baselines
+        ocsvm_scores_path = inf_dir / Path(
+            base_stem.stem + "_ocsvm_scores"
+        ).with_suffix(".npy")
+        isoforest_scores_path = inf_dir / Path(
+            base_stem.stem + "_isoforest_scores"
+        ).with_suffix(".npy")

-        inference_results, all_outputs = deepSAD.inference(
-            dataset, device=device, n_jobs_dataloader=n_jobs_dataloader
-        )
-        inference_results_path = (
-            Path(xp_path)
-            / "inference"
-            / Path(Path(dataset.root).stem).with_suffix(".npy")
-        )
-        inference_outputs_path = (
-            Path(xp_path)
-            / "inference"
-            / Path(Path(dataset.root).stem + "_outputs").with_suffix(".npy")
+        # Common loader settings
+        _n_jobs = (
+            n_jobs_dataloader
+            if "n_jobs_dataloader" in locals()
+            else cfg.settings.get("n_jobs_dataloader", 0)
        )

-        inference_results_path.parent.mkdir(parents=True, exist_ok=True)
-        np.save(inference_results_path, inference_results, fix_imports=False)
-        np.save(inference_outputs_path, all_outputs, fix_imports=False)
+        # ----------------- DeepSAD -----------------
+
+        deepSAD = DeepSAD(cfg.settings["latent_space_dim"], cfg.settings["eta"])
+        deepSAD.set_network(cfg.settings["net_name"])
+        deepSAD.load_model(
+            model_path=deepsad_model_path, load_ae=True, map_location=device
+        )
+        logger.info("Loaded DeepSAD model from %s.", deepsad_model_path)
+
+        deepsad_scores, deepsad_all_outputs = deepSAD.inference(
+            dataset, device=device, n_jobs_dataloader=_n_jobs
+        )
+
+        np.save(deepsad_scores_path, deepsad_scores)
+        # np.save(deepsad_outputs_path, deepsad_all_outputs)

        logger.info(
-            f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
+            "DeepSAD inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
+            float(np.median(deepsad_scores)),
+            float(np.mean(deepsad_scores)),
+            float(np.min(deepsad_scores)),
+            float(np.max(deepsad_scores)),
        )
+
+        # ----------------- OCSVM (hybrid) -----------------
+        ocsvm_scores = None
+        ocsvm = OCSVM(
+            kernel=cfg.settings["ocsvm_kernel"],
+            nu=cfg.settings["ocsvm_nu"],
+            hybrid=True,
+            latent_space_dim=cfg.settings["latent_space_dim"],
+        )
+        # load AE to build the feature extractor for hybrid OCSVM
+        ocsvm.load_ae(
+            net_name=cfg.settings["net_name"],
+            model_path=ae_model_path,
+            device=device,
+        )
+        ocsvm.load_model(import_path=ocsvm_model_path)
+
+        ocsvm_scores = ocsvm.inference(
+            dataset, device=device, n_jobs_dataloader=_n_jobs, batch_size=32
+        )
+
+        if ocsvm_scores is not None:
+            np.save(ocsvm_scores_path, ocsvm_scores)
+            logger.info(
+                "OCSVM inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
+                float(np.median(ocsvm_scores)),
+                float(np.mean(ocsvm_scores)),
+                float(np.min(ocsvm_scores)),
+                float(np.max(ocsvm_scores)),
+            )
+        else:
+            logger.warning("OCSVM scores could not be determined; no array saved.")
+
+        # ----------------- Isolation Forest -----------------
+        isoforest_scores = None
+        Isoforest = IsoForest(
+            hybrid=False,
+            n_estimators=cfg.settings["isoforest_n_estimators"],
+            max_samples=cfg.settings["isoforest_max_samples"],
+            contamination=cfg.settings["isoforest_contamination"],
+            n_jobs=cfg.settings["isoforest_n_jobs_model"],
+            seed=cfg.settings["seed"],
+        )
+        Isoforest.load_model(import_path=isoforest_model_path, device=device)
+        isoforest_scores = Isoforest.inference(
+            dataset, device=device, n_jobs_dataloader=_n_jobs
+        )
+        if isoforest_scores is not None:
+            np.save(isoforest_scores_path, isoforest_scores)
+            logger.info(
+                "IsolationForest inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
+                float(np.median(isoforest_scores)),
+                float(np.mean(isoforest_scores)),
+                float(np.min(isoforest_scores)),
+                float(np.max(isoforest_scores)),
+            )
+        else:
+            logger.warning(
+                "Isolation Forest scores could not be determined; no array saved."
+            )
+
+        # Final summary (DeepSAD always runs; baselines are best-effort)
+        logger.info(
+            "Inference complete. Saved arrays to %s:\n"
+            "  DeepSAD scores:   %s\n"
+            "  DeepSAD outputs:  %s\n"
+            "  OCSVM scores:     %s\n"
+            "  IsoForest scores: %s",
+            inf_dir,
+            deepsad_scores_path.name,
+            deepsad_outputs_path.name,
+            ocsvm_scores_path.name if ocsvm_scores is not None else "(not saved)",
+            isoforest_scores_path.name
+            if isoforest_scores is not None
+            else "(not saved)",
+        )
+
    elif action == "ae_elbow_test":
        # Load data once
        dataset = load_dataset(
--- a/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py
+++ b/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py
@@ -177,6 +177,8 @@ class DeepSADTrainer(BaseTrainer):
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
        )

+        latent_dim = net.rep_dim
+
        # Set device for network
        net = net.to(self.device)

@@ -184,7 +186,9 @@ class DeepSADTrainer(BaseTrainer):
        logger.info("Starting inference...")
        n_batches = 0
        start_time = time.time()
-        all_outputs = np.zeros((len(inference_loader.dataset), 1024), dtype=np.float32)
+        all_outputs = np.zeros(
+            (len(inference_loader.dataset), latent_dim), dtype=np.float32
+        )
        scores = []
        net.eval()

--- a/thesis/Main.pdf
+++ b/thesis/Main.pdf
--- a/thesis/Main.tex
+++ b/thesis/Main.tex
@@ -1166,7 +1166,7 @@ To compare the computational efficiency of the two architectures we show the num



-%\todo[inline]{rework table and calculate with actual scripts and network archs in deepsad codebase}
+\todo[inline]{next paragrpah does not work anymroe?}

 As can be seen, the efficient encoder requires an order of magnitude fewer parameters and significantly fewer operations while maintaining a comparable representational capacity. The key reason is the use of depth–wise separable convolutions, aggressive pooling along the densely sampled horizontal axis, and a channel squeezing strategy before the fully connected layer. Interestingly, the Efficient network also processes more intermediate channels (up to 32 compared to only 8 in the LeNet variant), which increases its ability to capture a richer set of patterns despite the reduced computational cost. This combination of efficiency and representational power makes the Efficient encoder a more suitable backbone for our anomaly detection task.

@@ -1378,7 +1378,7 @@ Pretraining runtimes for the autoencoders are reported in Table~\ref{tab:ae_pret
 	\end{tabularx}
 \end{table}

-The full DeepSAD training times are shown in Table~\ref{tab:train_runtimes_compact}, alongside the two classical baselines Isolation Forest and One-Class SVM. Here the contrast between methods is clear: while DeepSAD requires on the order of 15–20 minutes of GPU training per configuration, both baselines complete training in seconds on CPU. The OCSVM training can only be this fast due to the reduced input dimensionality from utilizing DeepSAD's pretraining encoder as a preprocessing step, although other dimensionality reduction methods may also be used which may require less computational resources for this step.
+The full DeepSAD training times are shown in Table~\ref{tab:train_runtimes_compact}, alongside the two classical baselines Isolation Forest and One-Class SVM. Here the contrast between methods is clear: while DeepSAD requires on the order of 15–20 minutes of GPU training per configuration and fold, both baselines complete training in seconds on CPU. The OCSVM training can only be this fast due to the reduced input dimensionality from utilizing DeepSAD's pretraining encoder as a preprocessing step, although other dimensionality reduction methods may also be used which could require less computational resources for this step.

 \begin{table}
 	\centering
--- a/tools/plot_scripts/load_results.py
+++ b/tools/plot_scripts/load_results.py
@@ -96,6 +96,21 @@ PRETRAIN_SCHEMA = {
    "config_json": pl.Utf8,  # full config.json as string (for reference)
 }

+SCHEMA_INFERENCE = {
+    # identifiers / dims
+    "experiment": pl.Utf8,  # e.g. "2_static_no_artifacts_illuminated_2023-01-23-001"
+    "network": pl.Utf8,  # e.g. "LeNet", "efficient"
+    "latent_dim": pl.Int32,
+    "semi_normals": pl.Int32,
+    "semi_anomalous": pl.Int32,
+    "model": pl.Utf8,  # "deepsad" | "isoforest" | "ocsvm"
+    # metrics
+    "scores": pl.List(pl.Float64),
+    # timings / housekeeping
+    "folder": pl.Utf8,
+    "config_json": pl.Utf8,  # full config.json as string (for reference)
+}
+

 # ------------------------------------------------------------
 # Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
@@ -233,11 +248,11 @@ def normalize_bool_list(a) -> Optional[List[bool]]:
 # ------------------------------------------------------------
 # Low-level: read one experiment folder
 # ------------------------------------------------------------
-def read_config(exp_dir: Path) -> dict:
+def read_config(exp_dir: Path, k_fold_required: bool = True) -> dict:
    cfg = exp_dir / "config.json"
    with cfg.open("r") as f:
        c = json.load(f)
-    if not c.get("k_fold"):
+    if k_fold_required and not c.get("k_fold"):
        raise ValueError(f"{exp_dir.name}: not trained as k-fold")
    return c

@@ -589,7 +604,129 @@ def load_pretraining_results_dataframe(
    return df


+def load_inference_results_dataframe(
+    root: Path,
+    allow_cache: bool = True,
+    models: List[str] = MODELS,
+) -> pl.DataFrame:
+    """Load inference results from experiment folders.
+
+    Args:
+        root: Path to root directory containing experiment folders
+        allow_cache: Whether to use/create cache file
+        models: List of models to look for scores
+
+    Returns:
+        pl.DataFrame: DataFrame containing inference results
+    """
+    if allow_cache:
+        cache = root / "inference_results_cache.parquet"
+        if cache.exists():
+            try:
+                df = pl.read_parquet(cache)
+                print(f"[info] loaded cached inference frame from {cache}")
+                return df
+            except Exception as e:
+                print(f"[warn] failed to load inference cache {cache}: {e}")
+
+    rows: List[dict] = []
+
+    exp_dirs = [p for p in root.iterdir() if p.is_dir()]
+    for exp_dir in sorted(exp_dirs):
+        try:
+            # Load and validate config
+            cfg = read_config(exp_dir, k_fold_required=False)
+            cfg_json = json.dumps(cfg, sort_keys=True)
+
+            # Extract config values
+            network = cfg.get("net_name")
+            latent_dim = int(cfg.get("latent_space_dim"))
+            semi_normals = int(cfg.get("num_known_normal"))
+            semi_anomalous = int(cfg.get("num_known_outlier"))
+
+            # Process each model's scores
+            inference_dir = exp_dir / "inference"
+            if not inference_dir.exists():
+                print(f"[warn] no inference directory for {exp_dir.name}")
+                continue
+
+            # Find all unique experiments in this folder's inference files
+            score_files = list(inference_dir.glob("*_scores.npy"))
+            if not score_files:
+                print(f"[warn] no score files in {inference_dir}")
+                continue
+
+            # Extract unique experiment names from score files
+            # Format: {experiment}_{model}_scores.npy
+            experiments = set()
+            for score_file in score_files:
+                exp_name = score_file.stem.rsplit("_", 2)[0]
+                experiments.add(exp_name)
+
+            # Load scores for each experiment and model
+            for experiment in sorted(experiments):
+                for model in models:
+                    score_file = inference_dir / f"{experiment}_{model}_scores.npy"
+                    if not score_file.exists():
+                        print(f"[warn] missing score file for {experiment}, {model}")
+                        continue
+
+                    try:
+                        scores = np.load(score_file)
+                        rows.append(
+                            {
+                                "experiment": experiment,
+                                "network": network,
+                                "latent_dim": latent_dim,
+                                "semi_normals": semi_normals,
+                                "semi_anomalous": semi_anomalous,
+                                "model": model,
+                                "scores": scores.tolist(),
+                                "folder": str(exp_dir),
+                                "config_json": cfg_json,
+                            }
+                        )
+                    except Exception as e:
+                        print(
+                            f"[warn] failed to load scores for {experiment}, {model}: {e}"
+                        )
+                        continue
+
+        except Exception as e:
+            print(f"[warn] skipping {exp_dir.name}: {e}")
+            continue
+
+    # If empty, return a typed empty frame
+    if not rows:
+        return pl.DataFrame(schema=SCHEMA_INFERENCE)
+
+    df = pl.DataFrame(rows, schema=SCHEMA_INFERENCE)
+
+    # Optimize datatypes
+    df = df.with_columns(
+        [
+            pl.col("experiment", "network", "model").cast(pl.Categorical),
+            pl.col("latent_dim", "semi_normals", "semi_anomalous").cast(pl.Int32),
+        ]
+    )
+
+    # Cache if enabled
+    if allow_cache:
+        try:
+            df.write_parquet(cache)
+            print(f"[info] cached inference frame to {cache}")
+        except Exception as e:
+            print(f"[warn] failed to write cache {cache}: {e}")
+
+    return df
+
+
 def main():
+    inference_root = Path("/home/fedex/mt/results/inference/copy")
+    df_inference = load_inference_results_dataframe(inference_root, allow_cache=True)
+
+    exit(0)
+
    root = Path("/home/fedex/mt/results/copy")
    df1 = load_results_dataframe(root, allow_cache=True)
    exit(0)
--- a/tools/plot_scripts/results_inference_timeline
+++ b/tools/plot_scripts/results_inference_timeline
@@ -0,0 +1,269 @@
+import json
+import pickle
+import shutil
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+# =========================
+# User-configurable params
+# =========================
+# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
+EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
+
+# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
+methods_scores_path = Path(
+    "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
+)
+
+# Root data path containing .bag files used to build the cached stats
+all_data_path = Path("/home/fedex/mt/data/subter")
+
+# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
+output_path = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed")
+
+# Cache (stats + labels) directory — same as your original script
+cache_path = output_path
+
+# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
+data_resolution = 32 * 2048
+
+# Frames per second for x-axis time
+FPS = 10.0
+
+# Whether to try to align score sign so that higher = more degraded.
+ALIGN_SCORE_DIRECTION = True
+
+# =========================
+# Smoothing configuration
+# =========================
+# Options: "none", "moving_average", "gaussian", "ema"
+SMOOTHING_METHOD = "ema"
+
+# Moving average window size (in frames). Use odd number for symmetry; <=1 disables.
+MA_WINDOW = 11
+
+# Gaussian sigma (in frames). ~2-3 frames is mild smoothing.
+GAUSSIAN_SIGMA = 2.0
+
+# Exponential moving average factor in (0,1]; smaller = smoother. ~0.2 is a good start.
+EMA_ALPHA = 0.1
+
+# =========================
+# Setup output folders
+# =========================
+datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+latest_folder_path = output_path / "latest"
+archive_folder_path = output_path / "archive"
+output_datetime_path = output_path / datetime_folder_name
+
+output_path.mkdir(exist_ok=True, parents=True)
+output_datetime_path.mkdir(exist_ok=True, parents=True)
+latest_folder_path.mkdir(exist_ok=True, parents=True)
+archive_folder_path.mkdir(exist_ok=True, parents=True)
+
+# =========================
+# Discover experiments
+# =========================
+normal_experiment_paths, anomaly_experiment_paths = [], []
+for bag_file_path in all_data_path.iterdir():
+    if bag_file_path.suffix != ".bag":
+        continue
+    if "smoke" in bag_file_path.name:
+        anomaly_experiment_paths.append(bag_file_path)
+    else:
+        normal_experiment_paths.append(bag_file_path)
+
+normal_experiment_paths = sorted(
+    normal_experiment_paths, key=lambda p: p.stat().st_size
+)
+anomaly_experiment_paths = sorted(
+    anomaly_experiment_paths, key=lambda p: p.stat().st_size
+)
+
+# Find experiment
+exp_path, exp_is_anomaly = None, None
+for p in anomaly_experiment_paths:
+    if p.stem == EXPERIMENT_NAME:
+        exp_path, exp_is_anomaly = p, True
+        break
+if exp_path is None:
+    for p in normal_experiment_paths:
+        if p.stem == EXPERIMENT_NAME:
+            exp_path, exp_is_anomaly = p, False
+            break
+if exp_path is None:
+    raise FileNotFoundError(f"Experiment '{EXPERIMENT_NAME}' not found")
+
+exp_index = (
+    anomaly_experiment_paths.index(exp_path)
+    if exp_is_anomaly
+    else normal_experiment_paths.index(exp_path)
+)
+
+# =========================
+# Load cached statistical data
+# =========================
+with open(cache_path / "missing_points.pkl", "rb") as f:
+    missing_points_normal, missing_points_anomaly = pickle.load(f)
+with open(cache_path / "particles_near_sensor_counts_500.pkl", "rb") as f:
+    near_sensor_normal, near_sensor_anomaly = pickle.load(f)
+
+if exp_is_anomaly:
+    missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
+    near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
+else:
+    missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
+    near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
+
+missing_points_pct = (missing_points_series / data_resolution) * 100.0
+near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
+
+# =========================
+# Load manual anomaly frame borders
+# =========================
+manually_labeled_anomaly_frames = {}
+labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
+if labels_json_path.exists():
+    with open(labels_json_path, "r") as f:
+        labeled_json = json.load(f)
+        for file in labeled_json.get("files", []):
+            manually_labeled_anomaly_frames[file["filename"]] = (
+                file.get("semi_target_begin_frame"),
+                file.get("semi_target_end_frame"),
+            )
+exp_npy_filename = exp_path.with_suffix(".npy").name
+anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
+
+
+# =========================
+# Load method scores and normalize
+# =========================
+def zscore_1d(x, eps=1e-12):
+    mu, sigma = np.mean(x), np.std(x, ddof=0)
+    return np.zeros_like(x) if sigma < eps else (x - mu) / sigma
+
+
+def maybe_align_direction(z, window):
+    start, end = window
+    if start is None or end is None:
+        return z
+    inside_mean = np.mean(z[start:end]) if end > start else 0
+    outside = np.concatenate([z[:start], z[end:]]) if start > 0 or end < len(z) else []
+    outside_mean = np.mean(outside) if len(outside) else inside_mean
+    return z if inside_mean >= outside_mean else -z
+
+
+methods = ["deepsad", "ocsvm", "isoforest"]
+method_zscores = {}
+for m in methods:
+    s = np.load(methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy")
+    s = np.asarray(s, dtype=float).ravel()
+    n = min(len(s), len(missing_points_pct))
+    s, missing_points_pct, near_sensor_pct = (
+        s[:n],
+        missing_points_pct[:n],
+        near_sensor_pct[:n],
+    )
+    z = zscore_1d(s)
+    if ALIGN_SCORE_DIRECTION:
+        z = maybe_align_direction(z, anomaly_window)
+    method_zscores[m] = z
+
+
+# =========================
+# Smoothing
+# =========================
+def moving_average(x, window):
+    if window <= 1:
+        return x
+    if window % 2 == 0:
+        window += 1
+    return np.convolve(x, np.ones(window) / window, mode="same")
+
+
+def gaussian_smooth(x, sigma):
+    from scipy.ndimage import gaussian_filter1d
+
+    return gaussian_filter1d(x, sigma=sigma, mode="nearest") if sigma > 0 else x
+
+
+def ema(x, alpha):
+    y = np.empty_like(x)
+    y[0] = x[0]
+    for i in range(1, len(x)):
+        y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
+    return y
+
+
+def apply_smoothing(x):
+    m = SMOOTHING_METHOD.lower()
+    if m == "none":
+        return x
+    if m == "moving_average":
+        return moving_average(x, MA_WINDOW)
+    if m == "gaussian":
+        return gaussian_smooth(x, GAUSSIAN_SIGMA)
+    if m == "ema":
+        return ema(x, EMA_ALPHA)
+    raise ValueError(f"Unknown SMOOTHING_METHOD: {SMOOTHING_METHOD}")
+
+
+smoothed_z = {k: apply_smoothing(v) for k, v in method_zscores.items()}
+smoothed_missing = apply_smoothing(missing_points_pct)
+smoothed_near = apply_smoothing(near_sensor_pct)
+
+# =========================
+# Plot
+# =========================
+t = np.arange(len(missing_points_pct)) / FPS
+
+
+def plot_series(y2, ylabel, fname, title_suffix):
+    fig, axz = plt.subplots(figsize=(14, 6), constrained_layout=True)
+    axy = axz.twinx()
+    for m in methods:
+        axz.plot(t, smoothed_z[m], label=f"{m} (z)")
+    axy.plot(t, y2, linestyle="--", label=ylabel)
+    start, end = anomaly_window
+    if start and end:
+        axz.axvline(start / FPS, linestyle=":", alpha=0.6)
+        axz.axvline(end / FPS, linestyle=":", alpha=0.6)
+    axz.set_xlabel("Time (s)")
+    axz.set_ylabel("Anomaly score (z)")
+    axy.set_ylabel(ylabel)
+    axz.set_title(f"{EXPERIMENT_NAME}\n{title_suffix}\nSmoothing: {SMOOTHING_METHOD}")
+    lines1, labels1 = axz.get_legend_handles_labels()
+    lines2, labels2 = axy.get_legend_handles_labels()
+    axz.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
+    axz.grid(True, alpha=0.3)
+    fig.savefig(output_datetime_path / fname, dpi=150)
+    plt.close(fig)
+
+
+plot_series(
+    smoothed_missing,
+    "Missing points (%)",
+    f"{EXPERIMENT_NAME}_zscores_vs_missing.png",
+    "Degradation vs Missing Points",
+)
+plot_series(
+    smoothed_near,
+    "Near-sensor points (%)",
+    f"{EXPERIMENT_NAME}_zscores_vs_near.png",
+    "Degradation vs Near-Sensor Points (<0.5m)",
+)
+
+# =========================
+# Save & archive
+# =========================
+shutil.rmtree(latest_folder_path, ignore_errors=True)
+latest_folder_path.mkdir(exist_ok=True, parents=True)
+for f in output_datetime_path.iterdir():
+    shutil.copy2(f, latest_folder_path)
+shutil.copy2(__file__, output_datetime_path)
+shutil.copy2(__file__, latest_folder_path)
+shutil.move(output_datetime_path, archive_folder_path)
+print("Done. Plots saved and archived.")
--- a/tools/plot_scripts/results_inference_timeline.py
+++ b/tools/plot_scripts/results_inference_timeline.py
@@ -0,0 +1,304 @@
+import json
+import pickle
+import shutil
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+# =========================
+# User-configurable params
+# =========================
+# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
+EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
+
+# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
+# Adjust this to where you save your per-method scores.
+methods_scores_path = Path(
+    "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
+)
+
+# Root data path containing .bag files used to build the cached stats
+all_data_path = Path("/home/fedex/mt/data/subter")
+
+# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
+output_path = Path("/home/fedex/mt/plots/results_inference_timeline")
+
+# Cache (stats + labels) directory — same as your original script
+cache_path = output_path
+
+# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
+data_resolution = 32 * 2048
+
+# Frames per second for x-axis time
+FPS = 10.0
+
+# Whether to try to align score sign so that higher = more degraded.
+# If manual labels exist for this experiment, alignment uses anomaly window mean vs. outside.
+ALIGN_SCORE_DIRECTION = True
+
+# =========================
+# Setup output folders
+# =========================
+datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+latest_folder_path = output_path / "latest"
+archive_folder_path = output_path / "archive"
+output_datetime_path = output_path / datetime_folder_name
+
+output_path.mkdir(exist_ok=True, parents=True)
+output_datetime_path.mkdir(exist_ok=True, parents=True)
+latest_folder_path.mkdir(exist_ok=True, parents=True)
+archive_folder_path.mkdir(exist_ok=True, parents=True)
+
+# =========================
+# Discover experiments to reconstruct indices consistent with caches
+# =========================
+normal_experiment_paths, anomaly_experiment_paths = [], []
+if not all_data_path.exists():
+    raise FileNotFoundError(f"all_data_path does not exist: {all_data_path}")
+
+for bag_file_path in all_data_path.iterdir():
+    if bag_file_path.suffix != ".bag":
+        continue
+    if "smoke" in bag_file_path.name:
+        anomaly_experiment_paths.append(bag_file_path)
+    else:
+        normal_experiment_paths.append(bag_file_path)
+
+# Sort by filesize to match original ordering used when caches were generated
+normal_experiment_paths = sorted(
+    normal_experiment_paths, key=lambda p: p.stat().st_size
+)
+anomaly_experiment_paths = sorted(
+    anomaly_experiment_paths, key=lambda p: p.stat().st_size
+)
+
+# Find the path for the requested experiment
+exp_path = None
+exp_is_anomaly = None
+for p in anomaly_experiment_paths:
+    if p.stem == EXPERIMENT_NAME:
+        exp_path = p
+        exp_is_anomaly = True
+        break
+if exp_path is None:
+    for p in normal_experiment_paths:
+        if p.stem == EXPERIMENT_NAME:
+            exp_path = p
+            exp_is_anomaly = False
+            break
+if exp_path is None:
+    raise FileNotFoundError(
+        f"Experiment '{EXPERIMENT_NAME}' not found as a .bag in {all_data_path}"
+    )
+
+# Get the index within the appropriate list
+if exp_is_anomaly:
+    exp_index = anomaly_experiment_paths.index(exp_path)
+else:
+    exp_index = normal_experiment_paths.index(exp_path)
+
+# =========================
+# Load cached statistical data
+# =========================
+missing_points_cache = Path(cache_path / "missing_points.pkl")
+near_sensor_cache = Path(cache_path / "particles_near_sensor_counts_500.pkl")
+
+if not missing_points_cache.exists():
+    raise FileNotFoundError(f"Missing points cache not found: {missing_points_cache}")
+if not near_sensor_cache.exists():
+    raise FileNotFoundError(f"Near-sensor cache not found: {near_sensor_cache}")
+
+with open(missing_points_cache, "rb") as f:
+    missing_points_normal, missing_points_anomaly = pickle.load(f)
+with open(near_sensor_cache, "rb") as f:
+    near_sensor_normal, near_sensor_anomaly = pickle.load(f)
+
+if exp_is_anomaly:
+    missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
+    near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
+else:
+    missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
+    near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
+
+# Convert counts to percentages of total points
+missing_points_pct = (missing_points_series / data_resolution) * 100.0
+near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
+
+# =========================
+# Load manual anomaly frame borders (optional; used for sign alignment + vertical markers)
+# =========================
+manually_labeled_anomaly_frames = {}
+labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
+if labels_json_path.exists():
+    with open(labels_json_path, "r") as frame_borders_file:
+        manually_labeled_anomaly_frames_json = json.load(frame_borders_file)
+        for file in manually_labeled_anomaly_frames_json.get("files", []):
+            manually_labeled_anomaly_frames[file["filename"]] = (
+                file.get("semi_target_begin_frame", None),
+                file.get("semi_target_end_frame", None),
+            )
+
+# The JSON uses .npy filenames (as in original script). Create this experiment’s key.
+exp_npy_filename = exp_path.with_suffix(".npy").name
+anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
+
+
+# =========================
+# Load method scores and z-score normalize per method
+# =========================
+def zscore_1d(x: np.ndarray, eps=1e-12):
+    x = np.asarray(x, dtype=float)
+    mu = np.mean(x)
+    sigma = np.std(x, ddof=0)
+    if sigma < eps:
+        return np.zeros_like(x)
+    return (x - mu) / sigma
+
+
+def maybe_align_direction(z: np.ndarray, window):
+    """Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist."""
+    start, end = window
+    if start is None or end is None:
+        return z  # no labels → leave as-is
+    start = int(max(0, start))
+    end = int(min(len(z), end))
+    if end <= start or end > len(z):
+        return z
+    inside_mean = float(np.mean(z[start:end]))
+    # outside: everything except [start:end]; handle edge cases
+    if start == 0 and end == len(z):
+        return z
+    outside_parts = []
+    if start > 0:
+        outside_parts.append(z[:start])
+    if end < len(z):
+        outside_parts.append(z[end:])
+    if not outside_parts:
+        return z
+    outside_mean = float(np.mean(np.concatenate(outside_parts)))
+    return z if inside_mean >= outside_mean else -z
+
+
+methods = ["deepsad", "ocsvm", "isoforest"]
+method_scores = {}
+method_zscores = {}
+
+if not methods_scores_path.exists():
+    raise FileNotFoundError(
+        f"Methods scores path does not exist: {methods_scores_path}"
+    )
+
+for m in methods:
+    file_path = methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy"
+    if not file_path.exists():
+        raise FileNotFoundError(f"Missing scores file for method '{m}': {file_path}")
+    s = np.load(file_path)
+    s = np.asarray(s, dtype=float).reshape(-1)
+    # If needed, truncate or pad to match stats length (should match if generated consistently)
+    n = min(len(s), len(missing_points_pct))
+    if len(s) != len(missing_points_pct):
+        # Align by truncation to the shortest length
+        s = s[:n]
+        # Also truncate stats to match
+        missing_points_pct = missing_points_pct[:n]
+        near_sensor_pct = near_sensor_pct[:n]
+    z = zscore_1d(s)
+    if ALIGN_SCORE_DIRECTION:
+        z = maybe_align_direction(z, anomaly_window)
+    method_scores[m] = s
+    method_zscores[m] = z
+
+# Common time axis in seconds
+num_frames = len(missing_points_pct)
+t = np.arange(num_frames) / FPS
+
+# =========================
+# Plot 1: Missing points (%) vs. method z-scores
+# =========================
+fig1, axz1 = plt.subplots(figsize=(14, 6), constrained_layout=True)
+axy1 = axz1.twinx()
+
+# plot z-scores
+for m in methods:
+    axz1.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
+
+# plot missing points (%)
+axy1.plot(t, missing_points_pct, linestyle="--", alpha=0.7, label="Missing points (%)")
+
+# vertical markers for anomaly window if available
+start, end = anomaly_window
+if start is not None and end is not None and 0 <= start < end <= num_frames:
+    axz1.axvline(x=start / FPS, linestyle=":", alpha=0.6)
+    axz1.axvline(x=end / FPS, linestyle=":", alpha=0.6)
+
+axz1.set_xlabel("Time (s)")
+axz1.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
+axy1.set_ylabel("Missing points (%)")
+axz1.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Missing Points")
+
+# Build a combined legend
+lines1, labels1 = axz1.get_legend_handles_labels()
+lines2, labels2 = axy1.get_legend_handles_labels()
+axz1.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
+
+axz1.grid(True, alpha=0.3)
+fig1.savefig(
+    output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_missing_points.png", dpi=150
+)
+plt.close(fig1)
+
+# =========================
+# Plot 2: Near-sensor (%) vs. method z-scores
+# =========================
+fig2, axz2 = plt.subplots(figsize=(14, 6), constrained_layout=True)
+axy2 = axz2.twinx()
+
+for m in methods:
+    axz2.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
+
+axy2.plot(t, near_sensor_pct, linestyle="--", alpha=0.7, label="Near-sensor <0.5m (%)")
+
+start, end = anomaly_window
+if start is not None and end is not None and 0 <= start < end <= num_frames:
+    axz2.axvline(x=start / FPS, linestyle=":", alpha=0.6)
+    axz2.axvline(x=end / FPS, linestyle=":", alpha=0.6)
+
+axz2.set_xlabel("Time (s)")
+axz2.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
+axy2.set_ylabel("Near-sensor points (%)")
+axz2.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Near-Sensor Points (<0.5 m)")
+
+lines1, labels1 = axz2.get_legend_handles_labels()
+lines2, labels2 = axy2.get_legend_handles_labels()
+axz2.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
+
+axz2.grid(True, alpha=0.3)
+fig2.savefig(
+    output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_near_sensor.png", dpi=150
+)
+plt.close(fig2)
+
+# =========================
+# Preserve latest/, archive/, copy script
+# =========================
+
+# delete current latest folder
+shutil.rmtree(latest_folder_path, ignore_errors=True)
+
+# create new latest folder
+latest_folder_path.mkdir(exist_ok=True, parents=True)
+
+# copy contents of output folder to the latest folder
+for file in output_datetime_path.iterdir():
+    shutil.copy2(file, latest_folder_path)
+
+# copy this python script to preserve the code used
+shutil.copy2(__file__, output_datetime_path)
+shutil.copy2(__file__, latest_folder_path)
+
+# move output date folder to archive
+shutil.move(output_datetime_path, archive_folder_path)
+
+print("Done. Plots saved and archived.")
Author	SHA1	Message	Date
Jan Kowalczyk	e20c2235ed	wip	2025-09-15 11:21:40 +02:00
Jan Kowalczyk	e7624d2786	wip inference	2025-09-15 11:21:30 +02:00