wip

wip inference
2025-09-15 11:21:40 +02:00 · 2025-09-15 11:21:30 +02:00
10 changed files with 1029 additions and 37 deletions
--- a/Deep-SAD-PyTorch/src/baselines/isoforest.py
+++ b/Deep-SAD-PyTorch/src/baselines/isoforest.py
@@ -261,6 +261,80 @@ class IsoForest(object):
        logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
        logger.info("Finished testing.")
    def inference(
        self,
        dataset: BaseADDataset,
        device: str = "cpu",
        n_jobs_dataloader: int = 0,
        batch_size: int = 32,
    ):
        """Perform inference on the dataset using the trained Isolation Forest model."""
        logger = logging.getLogger()
        # Get inference data loader
        _, _, inference_loader = dataset.loaders(
            batch_size=batch_size, num_workers=n_jobs_dataloader
        )
        # Get data from loader
        X = ()
        idxs = []
        file_ids = []
        frame_ids = []
        logger.info("Starting inference...")
        start_time = time.time()
        for data in inference_loader:
            inputs, idx, (file_id, frame_id) = data
            inputs = inputs.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(inputs)
            X_batch = inputs.view(inputs.size(0), -1)
            X += (X_batch.cpu().data.numpy(),)
            # Store indices and metadata
            idxs.extend(idx.cpu().data.numpy().tolist())
            file_ids.extend(file_id.cpu().data.numpy().tolist())
            frame_ids.extend(frame_id.cpu().data.numpy().tolist())
        X = np.concatenate(X)
        # Get anomaly scores
        scores = (-1.0) * self.model.decision_function(X)
        scores = scores.flatten()
        # Store inference results
        self.inference_time = time.time() - start_time
        self.inference_indices = np.array(idxs)
        self.inference_file_ids = np.array(file_ids)
        self.inference_frame_ids = np.array(frame_ids)
        # Create index mapping similar to DeepSAD trainer
        self.inference_index_mapping = {
            "indices": self.inference_indices,
            "file_ids": self.inference_file_ids,
            "frame_ids": self.inference_frame_ids,
        }
        # Log inference statistics
        logger.info(f"Number of inference samples: {len(self.inference_indices)}")
        logger.info(
            f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
        )
        logger.info("Inference Time: {:.3f}s".format(self.inference_time))
        logger.info(
            "Score statistics: "
            f"min={scores.min():.3f}, "
            f"max={scores.max():.3f}, "
            f"mean={scores.mean():.3f}, "
            f"std={scores.std():.3f}"
        )
        logger.info("Finished inference.")
        return scores
    def load_ae(self, dataset_name, model_path):
        """Load pretrained autoencoder from model_path for feature extraction in a hybrid Isolation Forest model."""
--- a/Deep-SAD-PyTorch/src/baselines/ocsvm.py
+++ b/Deep-SAD-PyTorch/src/baselines/ocsvm.py
@@ -453,6 +453,80 @@ class OCSVM(object):
        logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
        logger.info("Finished testing.")
    def inference(
        self,
        dataset: BaseADDataset,
        device: str = "cpu",
        n_jobs_dataloader: int = 0,
        batch_size: int = 32,
    ):
        """Perform inference on the dataset using the trained OC-SVM model."""
        logger = logging.getLogger()
        # Get inference data loader
        _, _, inference_loader = dataset.loaders(
            batch_size=batch_size, num_workers=n_jobs_dataloader
        )
        # Get data from loader
        X = ()
        idxs = []
        file_ids = []
        frame_ids = []
        logger.info("Starting inference...")
        start_time = time.time()
        for data in inference_loader:
            inputs, idx, (file_id, frame_id) = data
            inputs = inputs.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(inputs)
            X_batch = inputs.view(inputs.size(0), -1)
            X += (X_batch.cpu().data.numpy(),)
            # Store indices and metadata
            idxs.extend(idx.cpu().data.numpy().tolist())
            file_ids.extend(file_id.cpu().data.numpy().tolist())
            frame_ids.extend(frame_id.cpu().data.numpy().tolist())
        X = np.concatenate(X)
        # Get anomaly scores
        scores = (-1.0) * self.model.decision_function(X)
        scores = scores.flatten()
        # Store inference results
        self.inference_time = time.time() - start_time
        self.inference_indices = np.array(idxs)
        self.inference_file_ids = np.array(file_ids)
        self.inference_frame_ids = np.array(frame_ids)
        # Create index mapping similar to DeepSAD trainer
        self.inference_index_mapping = {
            "indices": self.inference_indices,
            "file_ids": self.inference_file_ids,
            "frame_ids": self.inference_frame_ids,
        }
        # Log inference statistics
        logger.info(f"Number of inference samples: {len(self.inference_indices)}")
        logger.info(
            f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
        )
        logger.info("Inference Time: {:.3f}s".format(self.inference_time))
        logger.info(
            "Score statistics: "
            f"min={scores.min():.3f}, "
            f"max={scores.max():.3f}, "
            f"mean={scores.mean():.3f}, "
            f"std={scores.std():.3f}"
        )
        logger.info("Finished inference.")
        return scores
    def load_ae(self, model_path, net_name, device="cpu"):
        """Load pretrained autoencoder from model_path for feature extraction in a hybrid OC-SVM model."""
--- a/Deep-SAD-PyTorch/src/datasets/subter.py
+++ b/Deep-SAD-PyTorch/src/datasets/subter.py
@@ -338,6 +338,8 @@ class SubTerInference(VisionDataset):
        self.frame_ids = np.arange(self.data.shape[0], dtype=np.int32)
        self.file_names = {0: experiment_file.name}
        self.transform = transform if transform else transforms.ToTensor()
    def __len__(self):
        return len(self.data)
--- a/Deep-SAD-PyTorch/src/main.py
+++ b/Deep-SAD-PyTorch/src/main.py
@@ -638,57 +638,185 @@ def main(
            cfg.save_config(export_json=xp_path + "/config.json")
    elif action == "infer":
        # Inference uses a deterministic, non-shuffled loader to preserve temporal order
        dataset = load_dataset(
-            dataset_name,
+            cfg.settings["dataset_name"],
            data_path,
-            normal_class,
+            cfg.settings["normal_class"],
-            known_outlier_class,
+            cfg.settings["known_outlier_class"],
-            n_known_outlier_classes,
+            cfg.settings["n_known_outlier_classes"],
-            ratio_known_normal,
+            cfg.settings["ratio_known_normal"],
-            ratio_known_outlier,
+            cfg.settings["ratio_known_outlier"],
-            ratio_pollution,
+            cfg.settings["ratio_pollution"],
            random_state=np.random.RandomState(cfg.settings["seed"]),
            k_fold_num=False,
            inference=True,
        )
        # Log random sample of known anomaly classes if more than 1 class
        if n_known_outlier_classes > 1:
            logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,))
-        # Initialize DeepSAD model and set neural network phi
+        # --- Expect a model DIRECTORY (aligned with 'retest') ---
-        deepSAD = DeepSAD(latent_space_dim, cfg.settings["eta"])
+        if (
-        deepSAD.set_network(net_name)
+            (not load_model)
-
+            or (not Path(load_model).exists())
-        # If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
+            or (not Path(load_model).is_dir())
-        if not load_model:
+        ):
            logger.error(
-                "For inference mode a model has to be loaded! Pass the --load_model option with the model path!"
+                "For inference mode a model directory has to be loaded! "
                "Pass the --load_model option with the model directory path!"
            )
            return
        load_model = Path(load_model)
        # Resolve expected model artifacts (single-model / no k-fold suffixes)
        deepsad_model_path = load_model / "model_deepsad.tar"
        ae_model_path = load_model / "model_ae.tar"
        ocsvm_model_path = load_model / "model_ocsvm.pkl"
        isoforest_model_path = load_model / "model_isoforest.pkl"
        # Sanity check model files exist
        model_paths = [
            deepsad_model_path,
            ae_model_path,
            ocsvm_model_path,
            isoforest_model_path,
        ]
        missing = [p.name for p in model_paths if not p.exists() or not p.is_file()]
        if missing:
            logger.error(
                "The following model files do not exist in the provided model directory: "
                + ", ".join(missing)
            )
            return
-        deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
+        # Prepare output paths
-        logger.info("Loading model from %s." % load_model)
+        inf_dir = Path(xp_path) / "inference"
        inf_dir.mkdir(parents=True, exist_ok=True)
        base_stem = Path(Path(dataset.root).stem)  # keep your previous naming
        # DeepSAD outputs (keep legacy filenames for backward compatibility)
        deepsad_scores_path = inf_dir / Path(
            base_stem.stem + "_deepsad_scores"
        ).with_suffix(".npy")
        deepsad_outputs_path = inf_dir / Path(base_stem.stem + "_outputs").with_suffix(
            ".npy"
        )
        # Baselines
        ocsvm_scores_path = inf_dir / Path(
            base_stem.stem + "_ocsvm_scores"
        ).with_suffix(".npy")
        isoforest_scores_path = inf_dir / Path(
            base_stem.stem + "_isoforest_scores"
        ).with_suffix(".npy")
-        inference_results, all_outputs = deepSAD.inference(
+        # Common loader settings
-            dataset, device=device, n_jobs_dataloader=n_jobs_dataloader
+        _n_jobs = (
-        )
+            n_jobs_dataloader
-        inference_results_path = (
+            if "n_jobs_dataloader" in locals()
-            Path(xp_path)
+            else cfg.settings.get("n_jobs_dataloader", 0)
            / "inference"
            / Path(Path(dataset.root).stem).with_suffix(".npy")
        )
        inference_outputs_path = (
            Path(xp_path)
            / "inference"
            / Path(Path(dataset.root).stem + "_outputs").with_suffix(".npy")
        )
-        inference_results_path.parent.mkdir(parents=True, exist_ok=True)
+        # ----------------- DeepSAD -----------------
-        np.save(inference_results_path, inference_results, fix_imports=False)
+
-        np.save(inference_outputs_path, all_outputs, fix_imports=False)
+        deepSAD = DeepSAD(cfg.settings["latent_space_dim"], cfg.settings["eta"])
        deepSAD.set_network(cfg.settings["net_name"])
        deepSAD.load_model(
            model_path=deepsad_model_path, load_ae=True, map_location=device
        )
        logger.info("Loaded DeepSAD model from %s.", deepsad_model_path)
        deepsad_scores, deepsad_all_outputs = deepSAD.inference(
            dataset, device=device, n_jobs_dataloader=_n_jobs
        )
        np.save(deepsad_scores_path, deepsad_scores)
        # np.save(deepsad_outputs_path, deepsad_all_outputs)
        logger.info(
-            f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
+            "DeepSAD inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
            float(np.median(deepsad_scores)),
            float(np.mean(deepsad_scores)),
            float(np.min(deepsad_scores)),
            float(np.max(deepsad_scores)),
        )
        # ----------------- OCSVM (hybrid) -----------------
        ocsvm_scores = None
        ocsvm = OCSVM(
            kernel=cfg.settings["ocsvm_kernel"],
            nu=cfg.settings["ocsvm_nu"],
            hybrid=True,
            latent_space_dim=cfg.settings["latent_space_dim"],
        )
        # load AE to build the feature extractor for hybrid OCSVM
        ocsvm.load_ae(
            net_name=cfg.settings["net_name"],
            model_path=ae_model_path,
            device=device,
        )
        ocsvm.load_model(import_path=ocsvm_model_path)
        ocsvm_scores = ocsvm.inference(
            dataset, device=device, n_jobs_dataloader=_n_jobs, batch_size=32
        )
        if ocsvm_scores is not None:
            np.save(ocsvm_scores_path, ocsvm_scores)
            logger.info(
                "OCSVM inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
                float(np.median(ocsvm_scores)),
                float(np.mean(ocsvm_scores)),
                float(np.min(ocsvm_scores)),
                float(np.max(ocsvm_scores)),
            )
        else:
            logger.warning("OCSVM scores could not be determined; no array saved.")
        # ----------------- Isolation Forest -----------------
        isoforest_scores = None
        Isoforest = IsoForest(
            hybrid=False,
            n_estimators=cfg.settings["isoforest_n_estimators"],
            max_samples=cfg.settings["isoforest_max_samples"],
            contamination=cfg.settings["isoforest_contamination"],
            n_jobs=cfg.settings["isoforest_n_jobs_model"],
            seed=cfg.settings["seed"],
        )
        Isoforest.load_model(import_path=isoforest_model_path, device=device)
        isoforest_scores = Isoforest.inference(
            dataset, device=device, n_jobs_dataloader=_n_jobs
        )
        if isoforest_scores is not None:
            np.save(isoforest_scores_path, isoforest_scores)
            logger.info(
                "IsolationForest inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
                float(np.median(isoforest_scores)),
                float(np.mean(isoforest_scores)),
                float(np.min(isoforest_scores)),
                float(np.max(isoforest_scores)),
            )
        else:
            logger.warning(
                "Isolation Forest scores could not be determined; no array saved."
            )
        # Final summary (DeepSAD always runs; baselines are best-effort)
        logger.info(
            "Inference complete. Saved arrays to %s:\n"
            "  DeepSAD scores:   %s\n"
            "  DeepSAD outputs:  %s\n"
            "  OCSVM scores:     %s\n"
            "  IsoForest scores: %s",
            inf_dir,
            deepsad_scores_path.name,
            deepsad_outputs_path.name,
            ocsvm_scores_path.name if ocsvm_scores is not None else "(not saved)",
            isoforest_scores_path.name
            if isoforest_scores is not None
            else "(not saved)",
        )
    elif action == "ae_elbow_test":
        # Load data once
        dataset = load_dataset(
--- a/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py
+++ b/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py
@@ -177,6 +177,8 @@ class DeepSADTrainer(BaseTrainer):
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
        )
        latent_dim = net.rep_dim
        # Set device for network
        net = net.to(self.device)
@@ -184,7 +186,9 @@ class DeepSADTrainer(BaseTrainer):
        logger.info("Starting inference...")
        n_batches = 0
        start_time = time.time()
-        all_outputs = np.zeros((len(inference_loader.dataset), 1024), dtype=np.float32)
+        all_outputs = np.zeros(
            (len(inference_loader.dataset), latent_dim), dtype=np.float32
        )
        scores = []
        net.eval()
--- a/thesis/Main.pdf
+++ b/thesis/Main.pdf
--- a/thesis/Main.tex
+++ b/thesis/Main.tex
@@ -1166,7 +1166,7 @@ To compare the computational efficiency of the two architectures we show the num
-%\todo[inline]{rework table and calculate with actual scripts and network archs in deepsad codebase}
+\todo[inline]{next paragrpah does not work anymroe?}
 As can be seen, the efficient encoder requires an order of magnitude fewer parameters and significantly fewer operations while maintaining a comparable representational capacity. The key reason is the use of depth–wise separable convolutions, aggressive pooling along the densely sampled horizontal axis, and a channel squeezing strategy before the fully connected layer. Interestingly, the Efficient network also processes more intermediate channels (up to 32 compared to only 8 in the LeNet variant), which increases its ability to capture a richer set of patterns despite the reduced computational cost. This combination of efficiency and representational power makes the Efficient encoder a more suitable backbone for our anomaly detection task.
@@ -1378,7 +1378,7 @@ Pretraining runtimes for the autoencoders are reported in Table~\ref{tab:ae_pret
 	\end{tabularx}
 \end{table}
-The full DeepSAD training times are shown in Table~\ref{tab:train_runtimes_compact}, alongside the two classical baselines Isolation Forest and One-Class SVM. Here the contrast between methods is clear: while DeepSAD requires on the order of 15–20 minutes of GPU training per configuration, both baselines complete training in seconds on CPU. The OCSVM training can only be this fast due to the reduced input dimensionality from utilizing DeepSAD's pretraining encoder as a preprocessing step, although other dimensionality reduction methods may also be used which may require less computational resources for this step.
+The full DeepSAD training times are shown in Table~\ref{tab:train_runtimes_compact}, alongside the two classical baselines Isolation Forest and One-Class SVM. Here the contrast between methods is clear: while DeepSAD requires on the order of 15–20 minutes of GPU training per configuration and fold, both baselines complete training in seconds on CPU. The OCSVM training can only be this fast due to the reduced input dimensionality from utilizing DeepSAD's pretraining encoder as a preprocessing step, although other dimensionality reduction methods may also be used which could require less computational resources for this step.
 \begin{table}
 	\centering
--- a/tools/plot_scripts/load_results.py
+++ b/tools/plot_scripts/load_results.py
@@ -96,6 +96,21 @@ PRETRAIN_SCHEMA = {
    "config_json": pl.Utf8,  # full config.json as string (for reference)
 }
 SCHEMA_INFERENCE = {
    # identifiers / dims
    "experiment": pl.Utf8,  # e.g. "2_static_no_artifacts_illuminated_2023-01-23-001"
    "network": pl.Utf8,  # e.g. "LeNet", "efficient"
    "latent_dim": pl.Int32,
    "semi_normals": pl.Int32,
    "semi_anomalous": pl.Int32,
    "model": pl.Utf8,  # "deepsad" | "isoforest" | "ocsvm"
    # metrics
    "scores": pl.List(pl.Float64),
    # timings / housekeeping
    "folder": pl.Utf8,
    "config_json": pl.Utf8,  # full config.json as string (for reference)
 }
 # ------------------------------------------------------------
 # Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
@@ -233,11 +248,11 @@ def normalize_bool_list(a) -> Optional[List[bool]]:
 # ------------------------------------------------------------
 # Low-level: read one experiment folder
 # ------------------------------------------------------------
-def read_config(exp_dir: Path) -> dict:
+def read_config(exp_dir: Path, k_fold_required: bool = True) -> dict:
    cfg = exp_dir / "config.json"
    with cfg.open("r") as f:
        c = json.load(f)
-    if not c.get("k_fold"):
+    if k_fold_required and not c.get("k_fold"):
        raise ValueError(f"{exp_dir.name}: not trained as k-fold")
    return c
@@ -589,7 +604,129 @@ def load_pretraining_results_dataframe(
    return df
 def load_inference_results_dataframe(
    root: Path,
    allow_cache: bool = True,
    models: List[str] = MODELS,
 ) -> pl.DataFrame:
    """Load inference results from experiment folders.
    Args:
        root: Path to root directory containing experiment folders
        allow_cache: Whether to use/create cache file
        models: List of models to look for scores
    Returns:
        pl.DataFrame: DataFrame containing inference results
    """
    if allow_cache:
        cache = root / "inference_results_cache.parquet"
        if cache.exists():
            try:
                df = pl.read_parquet(cache)
                print(f"[info] loaded cached inference frame from {cache}")
                return df
            except Exception as e:
                print(f"[warn] failed to load inference cache {cache}: {e}")
    rows: List[dict] = []
    exp_dirs = [p for p in root.iterdir() if p.is_dir()]
    for exp_dir in sorted(exp_dirs):
        try:
            # Load and validate config
            cfg = read_config(exp_dir, k_fold_required=False)
            cfg_json = json.dumps(cfg, sort_keys=True)
            # Extract config values
            network = cfg.get("net_name")
            latent_dim = int(cfg.get("latent_space_dim"))
            semi_normals = int(cfg.get("num_known_normal"))
            semi_anomalous = int(cfg.get("num_known_outlier"))
            # Process each model's scores
            inference_dir = exp_dir / "inference"
            if not inference_dir.exists():
                print(f"[warn] no inference directory for {exp_dir.name}")
                continue
            # Find all unique experiments in this folder's inference files
            score_files = list(inference_dir.glob("*_scores.npy"))
            if not score_files:
                print(f"[warn] no score files in {inference_dir}")
                continue
            # Extract unique experiment names from score files
            # Format: {experiment}_{model}_scores.npy
            experiments = set()
            for score_file in score_files:
                exp_name = score_file.stem.rsplit("_", 2)[0]
                experiments.add(exp_name)
            # Load scores for each experiment and model
            for experiment in sorted(experiments):
                for model in models:
                    score_file = inference_dir / f"{experiment}_{model}_scores.npy"
                    if not score_file.exists():
                        print(f"[warn] missing score file for {experiment}, {model}")
                        continue
                    try:
                        scores = np.load(score_file)
                        rows.append(
                            {
                                "experiment": experiment,
                                "network": network,
                                "latent_dim": latent_dim,
                                "semi_normals": semi_normals,
                                "semi_anomalous": semi_anomalous,
                                "model": model,
                                "scores": scores.tolist(),
                                "folder": str(exp_dir),
                                "config_json": cfg_json,
                            }
                        )
                    except Exception as e:
                        print(
                            f"[warn] failed to load scores for {experiment}, {model}: {e}"
                        )
                        continue
        except Exception as e:
            print(f"[warn] skipping {exp_dir.name}: {e}")
            continue
    # If empty, return a typed empty frame
    if not rows:
        return pl.DataFrame(schema=SCHEMA_INFERENCE)
    df = pl.DataFrame(rows, schema=SCHEMA_INFERENCE)
    # Optimize datatypes
    df = df.with_columns(
        [
            pl.col("experiment", "network", "model").cast(pl.Categorical),
            pl.col("latent_dim", "semi_normals", "semi_anomalous").cast(pl.Int32),
        ]
    )
    # Cache if enabled
    if allow_cache:
        try:
            df.write_parquet(cache)
            print(f"[info] cached inference frame to {cache}")
        except Exception as e:
            print(f"[warn] failed to write cache {cache}: {e}")
    return df
 def main():
    inference_root = Path("/home/fedex/mt/results/inference/copy")
    df_inference = load_inference_results_dataframe(inference_root, allow_cache=True)
    exit(0)
    root = Path("/home/fedex/mt/results/copy")
    df1 = load_results_dataframe(root, allow_cache=True)
    exit(0)
--- a/tools/plot_scripts/results_inference_timeline
+++ b/tools/plot_scripts/results_inference_timeline
@@ -0,0 +1,269 @@
 import json
 import pickle
 import shutil
 from datetime import datetime
 from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
 # =========================
 # User-configurable params
 # =========================
 # Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
 EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
 # Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
 methods_scores_path = Path(
    "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
 )
 # Root data path containing .bag files used to build the cached stats
 all_data_path = Path("/home/fedex/mt/data/subter")
 # Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
 output_path = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed")
 # Cache (stats + labels) directory — same as your original script
 cache_path = output_path
 # Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
 data_resolution = 32 * 2048
 # Frames per second for x-axis time
 FPS = 10.0
 # Whether to try to align score sign so that higher = more degraded.
 ALIGN_SCORE_DIRECTION = True
 # =========================
 # Smoothing configuration
 # =========================
 # Options: "none", "moving_average", "gaussian", "ema"
 SMOOTHING_METHOD = "ema"
 # Moving average window size (in frames). Use odd number for symmetry; <=1 disables.
 MA_WINDOW = 11
 # Gaussian sigma (in frames). ~2-3 frames is mild smoothing.
 GAUSSIAN_SIGMA = 2.0
 # Exponential moving average factor in (0,1]; smaller = smoother. ~0.2 is a good start.
 EMA_ALPHA = 0.1
 # =========================
 # Setup output folders
 # =========================
 datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 latest_folder_path = output_path / "latest"
 archive_folder_path = output_path / "archive"
 output_datetime_path = output_path / datetime_folder_name
 output_path.mkdir(exist_ok=True, parents=True)
 output_datetime_path.mkdir(exist_ok=True, parents=True)
 latest_folder_path.mkdir(exist_ok=True, parents=True)
 archive_folder_path.mkdir(exist_ok=True, parents=True)
 # =========================
 # Discover experiments
 # =========================
 normal_experiment_paths, anomaly_experiment_paths = [], []
 for bag_file_path in all_data_path.iterdir():
    if bag_file_path.suffix != ".bag":
        continue
    if "smoke" in bag_file_path.name:
        anomaly_experiment_paths.append(bag_file_path)
    else:
        normal_experiment_paths.append(bag_file_path)
 normal_experiment_paths = sorted(
    normal_experiment_paths, key=lambda p: p.stat().st_size
 )
 anomaly_experiment_paths = sorted(
    anomaly_experiment_paths, key=lambda p: p.stat().st_size
 )
 # Find experiment
 exp_path, exp_is_anomaly = None, None
 for p in anomaly_experiment_paths:
    if p.stem == EXPERIMENT_NAME:
        exp_path, exp_is_anomaly = p, True
        break
 if exp_path is None:
    for p in normal_experiment_paths:
        if p.stem == EXPERIMENT_NAME:
            exp_path, exp_is_anomaly = p, False
            break
 if exp_path is None:
    raise FileNotFoundError(f"Experiment '{EXPERIMENT_NAME}' not found")
 exp_index = (
    anomaly_experiment_paths.index(exp_path)
    if exp_is_anomaly
    else normal_experiment_paths.index(exp_path)
 )
 # =========================
 # Load cached statistical data
 # =========================
 with open(cache_path / "missing_points.pkl", "rb") as f:
    missing_points_normal, missing_points_anomaly = pickle.load(f)
 with open(cache_path / "particles_near_sensor_counts_500.pkl", "rb") as f:
    near_sensor_normal, near_sensor_anomaly = pickle.load(f)
 if exp_is_anomaly:
    missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
    near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
 else:
    missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
    near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
 missing_points_pct = (missing_points_series / data_resolution) * 100.0
 near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
 # =========================
 # Load manual anomaly frame borders
 # =========================
 manually_labeled_anomaly_frames = {}
 labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
 if labels_json_path.exists():
    with open(labels_json_path, "r") as f:
        labeled_json = json.load(f)
        for file in labeled_json.get("files", []):
            manually_labeled_anomaly_frames[file["filename"]] = (
                file.get("semi_target_begin_frame"),
                file.get("semi_target_end_frame"),
            )
 exp_npy_filename = exp_path.with_suffix(".npy").name
 anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
 # =========================
 # Load method scores and normalize
 # =========================
 def zscore_1d(x, eps=1e-12):
    mu, sigma = np.mean(x), np.std(x, ddof=0)
    return np.zeros_like(x) if sigma < eps else (x - mu) / sigma
 def maybe_align_direction(z, window):
    start, end = window
    if start is None or end is None:
        return z
    inside_mean = np.mean(z[start:end]) if end > start else 0
    outside = np.concatenate([z[:start], z[end:]]) if start > 0 or end < len(z) else []
    outside_mean = np.mean(outside) if len(outside) else inside_mean
    return z if inside_mean >= outside_mean else -z
 methods = ["deepsad", "ocsvm", "isoforest"]
 method_zscores = {}
 for m in methods:
    s = np.load(methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy")
    s = np.asarray(s, dtype=float).ravel()
    n = min(len(s), len(missing_points_pct))
    s, missing_points_pct, near_sensor_pct = (
        s[:n],
        missing_points_pct[:n],
        near_sensor_pct[:n],
    )
    z = zscore_1d(s)
    if ALIGN_SCORE_DIRECTION:
        z = maybe_align_direction(z, anomaly_window)
    method_zscores[m] = z
 # =========================
 # Smoothing
 # =========================
 def moving_average(x, window):
    if window <= 1:
        return x
    if window % 2 == 0:
        window += 1
    return np.convolve(x, np.ones(window) / window, mode="same")
 def gaussian_smooth(x, sigma):
    from scipy.ndimage import gaussian_filter1d
    return gaussian_filter1d(x, sigma=sigma, mode="nearest") if sigma > 0 else x
 def ema(x, alpha):
    y = np.empty_like(x)
    y[0] = x[0]
    for i in range(1, len(x)):
        y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
    return y
 def apply_smoothing(x):
    m = SMOOTHING_METHOD.lower()
    if m == "none":
        return x
    if m == "moving_average":
        return moving_average(x, MA_WINDOW)
    if m == "gaussian":
        return gaussian_smooth(x, GAUSSIAN_SIGMA)
    if m == "ema":
        return ema(x, EMA_ALPHA)
    raise ValueError(f"Unknown SMOOTHING_METHOD: {SMOOTHING_METHOD}")
 smoothed_z = {k: apply_smoothing(v) for k, v in method_zscores.items()}
 smoothed_missing = apply_smoothing(missing_points_pct)
 smoothed_near = apply_smoothing(near_sensor_pct)
 # =========================
 # Plot
 # =========================
 t = np.arange(len(missing_points_pct)) / FPS
 def plot_series(y2, ylabel, fname, title_suffix):
    fig, axz = plt.subplots(figsize=(14, 6), constrained_layout=True)
    axy = axz.twinx()
    for m in methods:
        axz.plot(t, smoothed_z[m], label=f"{m} (z)")
    axy.plot(t, y2, linestyle="--", label=ylabel)
    start, end = anomaly_window
    if start and end:
        axz.axvline(start / FPS, linestyle=":", alpha=0.6)
        axz.axvline(end / FPS, linestyle=":", alpha=0.6)
    axz.set_xlabel("Time (s)")
    axz.set_ylabel("Anomaly score (z)")
    axy.set_ylabel(ylabel)
    axz.set_title(f"{EXPERIMENT_NAME}\n{title_suffix}\nSmoothing: {SMOOTHING_METHOD}")
    lines1, labels1 = axz.get_legend_handles_labels()
    lines2, labels2 = axy.get_legend_handles_labels()
    axz.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
    axz.grid(True, alpha=0.3)
    fig.savefig(output_datetime_path / fname, dpi=150)
    plt.close(fig)
 plot_series(
    smoothed_missing,
    "Missing points (%)",
    f"{EXPERIMENT_NAME}_zscores_vs_missing.png",
    "Degradation vs Missing Points",
 )
 plot_series(
    smoothed_near,
    "Near-sensor points (%)",
    f"{EXPERIMENT_NAME}_zscores_vs_near.png",
    "Degradation vs Near-Sensor Points (<0.5m)",
 )
 # =========================
 # Save & archive
 # =========================
 shutil.rmtree(latest_folder_path, ignore_errors=True)
 latest_folder_path.mkdir(exist_ok=True, parents=True)
 for f in output_datetime_path.iterdir():
    shutil.copy2(f, latest_folder_path)
 shutil.copy2(__file__, output_datetime_path)
 shutil.copy2(__file__, latest_folder_path)
 shutil.move(output_datetime_path, archive_folder_path)
 print("Done. Plots saved and archived.")
--- a/tools/plot_scripts/results_inference_timeline.py
+++ b/tools/plot_scripts/results_inference_timeline.py
@@ -0,0 +1,304 @@
 import json
 import pickle
 import shutil
 from datetime import datetime
 from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
 # =========================
 # User-configurable params
 # =========================
 # Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
 EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
 # Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
 # Adjust this to where you save your per-method scores.
 methods_scores_path = Path(
    "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
 )
 # Root data path containing .bag files used to build the cached stats
 all_data_path = Path("/home/fedex/mt/data/subter")
 # Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
 output_path = Path("/home/fedex/mt/plots/results_inference_timeline")
 # Cache (stats + labels) directory — same as your original script
 cache_path = output_path
 # Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
 data_resolution = 32 * 2048
 # Frames per second for x-axis time
 FPS = 10.0
 # Whether to try to align score sign so that higher = more degraded.
 # If manual labels exist for this experiment, alignment uses anomaly window mean vs. outside.
 ALIGN_SCORE_DIRECTION = True
 # =========================
 # Setup output folders
 # =========================
 datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 latest_folder_path = output_path / "latest"
 archive_folder_path = output_path / "archive"
 output_datetime_path = output_path / datetime_folder_name
 output_path.mkdir(exist_ok=True, parents=True)
 output_datetime_path.mkdir(exist_ok=True, parents=True)
 latest_folder_path.mkdir(exist_ok=True, parents=True)
 archive_folder_path.mkdir(exist_ok=True, parents=True)
 # =========================
 # Discover experiments to reconstruct indices consistent with caches
 # =========================
 normal_experiment_paths, anomaly_experiment_paths = [], []
 if not all_data_path.exists():
    raise FileNotFoundError(f"all_data_path does not exist: {all_data_path}")
 for bag_file_path in all_data_path.iterdir():
    if bag_file_path.suffix != ".bag":
        continue
    if "smoke" in bag_file_path.name:
        anomaly_experiment_paths.append(bag_file_path)
    else:
        normal_experiment_paths.append(bag_file_path)
 # Sort by filesize to match original ordering used when caches were generated
 normal_experiment_paths = sorted(
    normal_experiment_paths, key=lambda p: p.stat().st_size
 )
 anomaly_experiment_paths = sorted(
    anomaly_experiment_paths, key=lambda p: p.stat().st_size
 )
 # Find the path for the requested experiment
 exp_path = None
 exp_is_anomaly = None
 for p in anomaly_experiment_paths:
    if p.stem == EXPERIMENT_NAME:
        exp_path = p
        exp_is_anomaly = True
        break
 if exp_path is None:
    for p in normal_experiment_paths:
        if p.stem == EXPERIMENT_NAME:
            exp_path = p
            exp_is_anomaly = False
            break
 if exp_path is None:
    raise FileNotFoundError(
        f"Experiment '{EXPERIMENT_NAME}' not found as a .bag in {all_data_path}"
    )
 # Get the index within the appropriate list
 if exp_is_anomaly:
    exp_index = anomaly_experiment_paths.index(exp_path)
 else:
    exp_index = normal_experiment_paths.index(exp_path)
 # =========================
 # Load cached statistical data
 # =========================
 missing_points_cache = Path(cache_path / "missing_points.pkl")
 near_sensor_cache = Path(cache_path / "particles_near_sensor_counts_500.pkl")
 if not missing_points_cache.exists():
    raise FileNotFoundError(f"Missing points cache not found: {missing_points_cache}")
 if not near_sensor_cache.exists():
    raise FileNotFoundError(f"Near-sensor cache not found: {near_sensor_cache}")
 with open(missing_points_cache, "rb") as f:
    missing_points_normal, missing_points_anomaly = pickle.load(f)
 with open(near_sensor_cache, "rb") as f:
    near_sensor_normal, near_sensor_anomaly = pickle.load(f)
 if exp_is_anomaly:
    missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
    near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
 else:
    missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
    near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
 # Convert counts to percentages of total points
 missing_points_pct = (missing_points_series / data_resolution) * 100.0
 near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
 # =========================
 # Load manual anomaly frame borders (optional; used for sign alignment + vertical markers)
 # =========================
 manually_labeled_anomaly_frames = {}
 labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
 if labels_json_path.exists():
    with open(labels_json_path, "r") as frame_borders_file:
        manually_labeled_anomaly_frames_json = json.load(frame_borders_file)
        for file in manually_labeled_anomaly_frames_json.get("files", []):
            manually_labeled_anomaly_frames[file["filename"]] = (
                file.get("semi_target_begin_frame", None),
                file.get("semi_target_end_frame", None),
            )
 # The JSON uses .npy filenames (as in original script). Create this experiment’s key.
 exp_npy_filename = exp_path.with_suffix(".npy").name
 anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
 # =========================
 # Load method scores and z-score normalize per method
 # =========================
 def zscore_1d(x: np.ndarray, eps=1e-12):
    x = np.asarray(x, dtype=float)
    mu = np.mean(x)
    sigma = np.std(x, ddof=0)
    if sigma < eps:
        return np.zeros_like(x)
    return (x - mu) / sigma
 def maybe_align_direction(z: np.ndarray, window):
    """Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist."""
    start, end = window
    if start is None or end is None:
        return z  # no labels → leave as-is
    start = int(max(0, start))
    end = int(min(len(z), end))
    if end <= start or end > len(z):
        return z
    inside_mean = float(np.mean(z[start:end]))
    # outside: everything except [start:end]; handle edge cases
    if start == 0 and end == len(z):
        return z
    outside_parts = []
    if start > 0:
        outside_parts.append(z[:start])
    if end < len(z):
        outside_parts.append(z[end:])
    if not outside_parts:
        return z
    outside_mean = float(np.mean(np.concatenate(outside_parts)))
    return z if inside_mean >= outside_mean else -z
 methods = ["deepsad", "ocsvm", "isoforest"]
 method_scores = {}
 method_zscores = {}
 if not methods_scores_path.exists():
    raise FileNotFoundError(
        f"Methods scores path does not exist: {methods_scores_path}"
    )
 for m in methods:
    file_path = methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy"
    if not file_path.exists():
        raise FileNotFoundError(f"Missing scores file for method '{m}': {file_path}")
    s = np.load(file_path)
    s = np.asarray(s, dtype=float).reshape(-1)
    # If needed, truncate or pad to match stats length (should match if generated consistently)
    n = min(len(s), len(missing_points_pct))
    if len(s) != len(missing_points_pct):
        # Align by truncation to the shortest length
        s = s[:n]
        # Also truncate stats to match
        missing_points_pct = missing_points_pct[:n]
        near_sensor_pct = near_sensor_pct[:n]
    z = zscore_1d(s)
    if ALIGN_SCORE_DIRECTION:
        z = maybe_align_direction(z, anomaly_window)
    method_scores[m] = s
    method_zscores[m] = z
 # Common time axis in seconds
 num_frames = len(missing_points_pct)
 t = np.arange(num_frames) / FPS
 # =========================
 # Plot 1: Missing points (%) vs. method z-scores
 # =========================
 fig1, axz1 = plt.subplots(figsize=(14, 6), constrained_layout=True)
 axy1 = axz1.twinx()
 # plot z-scores
 for m in methods:
    axz1.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
 # plot missing points (%)
 axy1.plot(t, missing_points_pct, linestyle="--", alpha=0.7, label="Missing points (%)")
 # vertical markers for anomaly window if available
 start, end = anomaly_window
 if start is not None and end is not None and 0 <= start < end <= num_frames:
    axz1.axvline(x=start / FPS, linestyle=":", alpha=0.6)
    axz1.axvline(x=end / FPS, linestyle=":", alpha=0.6)
 axz1.set_xlabel("Time (s)")
 axz1.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
 axy1.set_ylabel("Missing points (%)")
 axz1.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Missing Points")
 # Build a combined legend
 lines1, labels1 = axz1.get_legend_handles_labels()
 lines2, labels2 = axy1.get_legend_handles_labels()
 axz1.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
 axz1.grid(True, alpha=0.3)
 fig1.savefig(
    output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_missing_points.png", dpi=150
 )
 plt.close(fig1)
 # =========================
 # Plot 2: Near-sensor (%) vs. method z-scores
 # =========================
 fig2, axz2 = plt.subplots(figsize=(14, 6), constrained_layout=True)
 axy2 = axz2.twinx()
 for m in methods:
    axz2.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
 axy2.plot(t, near_sensor_pct, linestyle="--", alpha=0.7, label="Near-sensor <0.5m (%)")
 start, end = anomaly_window
 if start is not None and end is not None and 0 <= start < end <= num_frames:
    axz2.axvline(x=start / FPS, linestyle=":", alpha=0.6)
    axz2.axvline(x=end / FPS, linestyle=":", alpha=0.6)
 axz2.set_xlabel("Time (s)")
 axz2.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
 axy2.set_ylabel("Near-sensor points (%)")
 axz2.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Near-Sensor Points (<0.5 m)")
 lines1, labels1 = axz2.get_legend_handles_labels()
 lines2, labels2 = axy2.get_legend_handles_labels()
 axz2.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
 axz2.grid(True, alpha=0.3)
 fig2.savefig(
    output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_near_sensor.png", dpi=150
 )
 plt.close(fig2)
 # =========================
 # Preserve latest/, archive/, copy script
 # =========================
 # delete current latest folder
 shutil.rmtree(latest_folder_path, ignore_errors=True)
 # create new latest folder
 latest_folder_path.mkdir(exist_ok=True, parents=True)
 # copy contents of output folder to the latest folder
 for file in output_datetime_path.iterdir():
    shutil.copy2(file, latest_folder_path)
 # copy this python script to preserve the code used
 shutil.copy2(__file__, output_datetime_path)
 shutil.copy2(__file__, latest_folder_path)
 # move output date folder to archive
 shutil.move(output_datetime_path, archive_folder_path)
 print("Done. Plots saved and archived.")
Author	SHA1	Message	Date
Jan Kowalczyk	e20c2235ed	wip	2025-09-15 11:21:40 +02:00
Jan Kowalczyk	e7624d2786	wip inference	2025-09-15 11:21:30 +02:00