data loading and plotting for results wip

2025-09-03 14:55:54 +02:00
parent 3d968c305c
commit ed80faf1e2
16 changed files with 2732 additions and 952 deletions
--- a/tools/plot_scripts/ae_elbow_lenet.py
+++ b/tools/plot_scripts/ae_elbow_lenet.py
@@ -1,118 +1,176 @@
-import pickle
+# ae_elbow_from_df.py
+
+from __future__ import annotations
+
+import json
 import shutil
-import unittest
 from datetime import datetime
 from pathlib import Path

 import matplotlib.pyplot as plt
 import numpy as np
-from tabulate import tabulate
+import polars as pl

-# Configuration
-results_folders = {
-    "LeNet": {
-        "path": Path(
-            "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/test/DeepSAD/subter_ae_elbow_v2/"
-        ),
-        "batch_size": 256,
-    },
-    "LeNet Efficient": {
-        "path": Path(
-            "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/test/DeepSAD/subter_efficient_ae_elbow"
-        ),
-        "batch_size": 64,
-    },
-}
-output_path = Path("/home/fedex/mt/plots/ae_elbow_lenet")
-datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+# CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
+from load_results import load_pretraining_results_dataframe

-latest_folder_path = output_path / "latest"
-archive_folder_path = output_path / "archive"
-output_datetime_path = output_path / datetime_folder_name
+# ----------------------------
+# Config
+# ----------------------------
+ROOT = Path("/home/fedex/mt/results/done")  # experiments root you pass to the loader
+OUTPUT_DIR = Path("/home/fedex/mt/plots/ae_elbow_lenet_from_df")

-# Create output directories
-output_path.mkdir(exist_ok=True, parents=True)
-output_datetime_path.mkdir(exist_ok=True, parents=True)
-latest_folder_path.mkdir(exist_ok=True, parents=True)
-archive_folder_path.mkdir(exist_ok=True, parents=True)
+# Which label field to use from the DF; "labels_exp_based" or "labels_manual_based"
+LABEL_FIELD = "labels_exp_based"


-def calculate_batch_mean_loss(scores, batch_size):
-    """Calculate mean loss over batches similar to the original testing code."""
-    n_samples = len(scores)
-    n_batches = (n_samples + batch_size - 1) // batch_size
-
-    batch_losses = []
-    for i in range(0, n_samples, batch_size):
-        batch_scores = scores[i : i + batch_size]
-        batch_losses.append(np.mean(batch_scores))
-
-    return np.sum(batch_losses) / n_batches
+# ----------------------------
+# Helpers
+# ----------------------------
+def canonicalize_network(name: str) -> str:
+    """Map various net_name strings to clean labels for plotting."""
+    low = (name or "").lower()
+    if "lenet" in low:
+        return "LeNet"
+    if "efficient" in low:
+        return "Efficient"
+    # fallback: show whatever was stored
+    return name or "unknown"


-def test_loss_calculation(results, batch_size):
-    """Test if our loss calculation matches the original implementation."""
-    test = unittest.TestCase()
-    folds = results["ae_results"]
-    dim = results["dimension"]
-
-    for fold_key in folds:
-        fold_data = folds[fold_key]["test"]
-        scores = np.array(fold_data["scores"])
-        original_loss = fold_data["loss"]
-        calculated_loss = calculate_batch_mean_loss(scores, batch_size)
-
-        try:
-            test.assertAlmostEqual(
-                original_loss,
-                calculated_loss,
-                places=5,
-                msg=f"Loss mismatch for dim={dim}, {fold_key}",
-            )
-        except AssertionError as e:
-            print(f"Warning: {str(e)}")
-            print(f"Original: {original_loss:.6f}, Calculated: {calculated_loss:.6f}")
-            raise
+def calculate_batch_mean_loss(scores: np.ndarray, batch_size: int) -> float:
+    """Mean of per-batch means (matches how the original test loss was computed)."""
+    n = len(scores)
+    if n == 0:
+        return np.nan
+    if batch_size <= 0:
+        batch_size = n  # single batch fallback
+    n_batches = (n + batch_size - 1) // batch_size
+    acc = 0.0
+    for i in range(0, n, batch_size):
+        acc += float(np.mean(scores[i : i + batch_size]))
+    return acc / n_batches


-def plot_loss_curve(dims, means, stds, title, color, output_path):
-    """Create and save a single loss curve plot."""
-    plt.figure(figsize=(8, 5))
-    plt.plot(dims, means, marker="o", color=color, label="Mean Test Loss")
-    plt.fill_between(
-        dims,
-        np.array(means) - np.array(stds),
-        np.array(means) + np.array(stds),
-        color=color,
-        alpha=0.2,
-        label="Std Dev",
-    )
-    plt.xlabel("Latent Dimension")
-    plt.ylabel("Test Loss")
-    plt.title(title)
-    plt.legend()
-    plt.grid(True, alpha=0.3)
-    plt.xticks(dims)
-    plt.tight_layout()
-    plt.savefig(output_path, dpi=150, bbox_inches="tight")
-    plt.close()
+def extract_batch_size(cfg_json: str) -> int:
+    """
+    Prefer AE batch size; fall back to general batch_size; then a safe default.
+    We only rely on config_json (no lifted fields).
+    """
+    try:
+        cfg = json.loads(cfg_json) if cfg_json else {}
+    except Exception:
+        cfg = {}
+    return int(cfg.get("ae_batch_size") or cfg.get("batch_size") or 256)
+
+
+def build_arch_curves_from_df(
+    df: pl.DataFrame,
+    label_field: str = "labels_exp_based",
+    only_nets: set[str] | None = None,
+):
+    """
+    From the AE pretraining DF, compute (dims, means, stds) for normal/anomaly/overall
+    grouped by network and latent_dim. Returns:
+        { net_label: {
+            "normal":  (dims, means, stds),
+            "anomaly": (dims, means, stds),
+            "overall": (dims, means, stds),
+        } }
+    """
+    if "split" not in df.columns:
+        raise ValueError("Expected 'split' column in AE dataframe.")
+    if "scores" not in df.columns:
+        raise ValueError("Expected 'scores' column in AE dataframe.")
+    if "network" not in df.columns or "latent_dim" not in df.columns:
+        raise ValueError("Expected 'network' and 'latent_dim' columns in AE dataframe.")
+    if label_field not in df.columns:
+        raise ValueError(f"Expected '{label_field}' column in AE dataframe.")
+
+    # Keep only test split
+    df = df.filter(pl.col("split") == "test")
+
+    groups: dict[tuple[str, int], dict[str, list[float]]] = {}
+
+    for row in df.iter_rows(named=True):
+        net_label = canonicalize_network(row["network"])
+        if only_nets and net_label not in only_nets:
+            continue
+
+        dim = int(row["latent_dim"])
+        batch_size = extract_batch_size(row.get("config_json"))
+        scores = np.asarray(row["scores"] or [], dtype=float)
+
+        labels = row.get(label_field)
+        labels = np.asarray(labels, dtype=int) if labels is not None else None
+
+        overall_loss = calculate_batch_mean_loss(scores, batch_size)
+
+        # Split by labels if available; otherwise we only aggregate overall
+        normal_loss = np.nan
+        anomaly_loss = np.nan
+        if labels is not None and labels.size == scores.size:
+            normal_scores = scores[labels == 1]
+            anomaly_scores = scores[labels == -1]
+            if normal_scores.size > 0:
+                normal_loss = calculate_batch_mean_loss(normal_scores, batch_size)
+            if anomaly_scores.size > 0:
+                anomaly_loss = calculate_batch_mean_loss(anomaly_scores, batch_size)
+
+        key = (net_label, dim)
+        if key not in groups:
+            groups[key] = {"normal": [], "anomaly": [], "overall": []}
+        groups[key]["overall"].append(overall_loss)
+        groups[key]["normal"].append(normal_loss)
+        groups[key]["anomaly"].append(anomaly_loss)
+
+    # Aggregate across folds -> per (net, dim) mean/std
+    per_net_dims: dict[str, set[int]] = {}
+    for net, dim in groups:
+        per_net_dims.setdefault(net, set()).add(dim)
+
+    result: dict[str, dict[str, tuple[list[int], list[float], list[float]]]] = {}
+    for net, dims in per_net_dims.items():
+        dims_sorted = sorted(dims)
+
+        def collect(kind: str):
+            means, stds = [], []
+            for d in dims_sorted:
+                xs = [
+                    x
+                    for (n2, d2), v in groups.items()
+                    if n2 == net and d2 == d
+                    for x in v[kind]
+                    if x is not None and not np.isnan(x)
+                ]
+                if len(xs) == 0:
+                    means.append(np.nan)
+                    stds.append(np.nan)
+                else:
+                    means.append(float(np.mean(xs)))
+                    stds.append(float(np.std(xs)))
+            return dims_sorted, means, stds
+
+        result[net] = {
+            "normal": collect("normal"),
+            "anomaly": collect("anomaly"),
+            "overall": collect("overall"),
+        }
+
+    return result


 def plot_multi_loss_curve(arch_results, title, output_path, colors=None):
-    """Create and save a loss curve plot with multiple architectures.
-
-    Args:
-        arch_results: Dict of format {arch_name: (dims, means, stds)}
-        title: Plot title
-        output_path: Where to save the plot
-        colors: Optional dict of colors for each architecture
+    """
+    arch_results: {arch_name: (dims, means, stds)}
    """
    plt.figure(figsize=(10, 6))

+    # default color map if not provided
    if colors is None:
        colors = {
-            "LeNet": "blue",
-            "LeNet Asymmetric": "red",
+            "LeNet": "tab:blue",
+            "Efficient": "tab:orange",
        }

    # Get unique dimensions across all architectures
@@ -121,219 +179,91 @@ def plot_multi_loss_curve(arch_results, title, output_path, colors=None):
    )

    for arch_name, (dims, means, stds) in arch_results.items():
-        color = colors.get(arch_name, "gray")
-        plt.plot(dims, means, marker="o", color=color, label=arch_name)
-        plt.fill_between(
-            dims,
-            np.array(means) - np.array(stds),
-            np.array(means) + np.array(stds),
-            color=color,
-            alpha=0.2,
-        )
+        color = colors.get(arch_name)
+        # Plot line
+        if color is None:
+            plt.plot(dims, means, marker="o", label=arch_name)
+            plt.fill_between(
+                dims,
+                np.array(means) - np.array(stds),
+                np.array(means) + np.array(stds),
+                alpha=0.2,
+            )
+        else:
+            plt.plot(dims, means, marker="o", color=color, label=arch_name)
+            plt.fill_between(
+                dims,
+                np.array(means) - np.array(stds),
+                np.array(means) + np.array(stds),
+                color=color,
+                alpha=0.2,
+            )

-    plt.xlabel("Latent Dimension")
+    plt.xlabel("Latent Dimensionality")
    plt.ylabel("Test Loss")
    plt.title(title)
    plt.legend()
    plt.grid(True, alpha=0.3)
-    plt.xticks(all_dims)  # Set x-axis ticks to match data points
+    plt.xticks(all_dims)
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches="tight")
    plt.close()


-def evaluate_autoencoder_loss():
-    """Main function to evaluate autoencoder loss across different latent dimensions."""
-    # Results storage for each architecture
-    arch_results = {
-        name: {"dims": [], "normal": [], "anomaly": []} for name in results_folders
-    }
+def main():
+    # Load AE DF (uses your cache if enabled in the loader)
+    df = load_pretraining_results_dataframe(ROOT, allow_cache=True, include_train=False)

-    # Process each architecture
-    for arch_name, config in results_folders.items():
-        results_folder = config["path"]
-        batch_size = config["batch_size"]
-        result_files = sorted(
-            results_folder.glob("ae_elbow_results_subter_*_kfold.pkl")
-        )
+    # Optional: filter to just LeNet vs Efficient; drop this set() to plot all nets
+    wanted_nets = {"LeNet", "Efficient"}

-        dimensions = []
-        normal_means = []
-        normal_stds = []
-        anomaly_means = []
-        anomaly_stds = []
+    curves = build_arch_curves_from_df(
+        df,
+        label_field=LABEL_FIELD,
+        only_nets=wanted_nets,
+    )

-        # Verify loss calculation
-        print(
-            f"\nVerifying loss calculation for {arch_name} (batch_size={batch_size})..."
-        )
-        for result_file in result_files:
-            with open(result_file, "rb") as f:
-                results = pickle.load(f)
-                test_loss_calculation(results, batch_size)
-        print(f"Loss calculation verified successfully for {arch_name}!")
+    # Prepare output dirs
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    ts_dir = OUTPUT_DIR / "archive" / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    ts_dir.mkdir(parents=True, exist_ok=True)

-        # Process files for this architecture
-        for result_file in result_files:
-            with open(result_file, "rb") as f:
-                results = pickle.load(f)
-            dim = int(results["dimension"])
-            folds = results["ae_results"]
+    def pick(kind: str):
+        # kind in {"normal","anomaly","overall"}
+        return {name: payload[kind] for name, payload in curves.items()}

-            normal_fold_losses = []
-            anomaly_fold_losses = []
-
-            all_scores = []  # Collect all scores for overall calculation
-            all_fold_scores = []  # Collect all fold scores for std calculation
-
-            for fold_key in folds:
-                fold_data = folds[fold_key]["test"]
-                scores = np.array(fold_data["scores"])
-                labels = np.array(fold_data["labels_exp_based"])
-
-                normal_scores = scores[labels == 1]
-                anomaly_scores = scores[labels == -1]
-
-                normal_fold_losses.append(
-                    calculate_batch_mean_loss(normal_scores, batch_size)
-                )
-                anomaly_fold_losses.append(
-                    calculate_batch_mean_loss(anomaly_scores, batch_size)
-                )
-
-                all_scores.append(scores)  # Add scores to all_scores
-                all_fold_scores.append(fold_data["scores"])  # Add fold scores
-
-            dimensions.append(dim)
-            normal_means.append(np.mean(normal_fold_losses))
-            normal_stds.append(np.std(normal_fold_losses))
-            anomaly_means.append(np.mean(anomaly_fold_losses))
-            anomaly_stds.append(np.std(anomaly_fold_losses))
-
-        # Sort by dimension
-        sorted_data = sorted(
-            zip(dimensions, normal_means, normal_stds, anomaly_means, anomaly_stds)
-        )
-        dims, n_means, n_stds, a_means, a_stds = zip(*sorted_data)
-
-        # Store results for this architecture
-        arch_results[arch_name] = {
-            "dims": dims,
-            "normal": (dims, n_means, n_stds),
-            "anomaly": (dims, a_means, a_stds),
-            "overall": (
-                dims,
-                [
-                    calculate_batch_mean_loss(scores, batch_size)
-                    for scores in all_scores
-                ],  # Use all scores
-                [
-                    np.std(
-                        [
-                            calculate_batch_mean_loss(fold_scores, batch_size)
-                            for fold_scores in fold_scores_list
-                        ]
-                    )
-                    for fold_scores_list in all_fold_scores
-                ],
-            ),
-        }
-
-    # Create the three plots with all architectures
    plot_multi_loss_curve(
-        {name: results["normal"] for name, results in arch_results.items()},
-        "Normal Class Test Loss vs. Latent Dimension",
-        output_datetime_path / "ae_elbow_test_loss_normal.png",
+        pick("normal"),
+        "Normal Class Test Loss vs. Latent Dimensionality",
+        ts_dir / "ae_elbow_test_loss_normal.png",
    )

    plot_multi_loss_curve(
-        {name: results["anomaly"] for name, results in arch_results.items()},
-        "Anomaly Class Test Loss vs. Latent Dimension",
-        output_datetime_path / "ae_elbow_test_loss_anomaly.png",
+        pick("anomaly"),
+        "Anomaly Class Test Loss vs. Latent Dimensionality",
+        ts_dir / "ae_elbow_test_loss_anomaly.png",
    )

    plot_multi_loss_curve(
-        {name: results["overall"] for name, results in arch_results.items()},
-        "Overall Test Loss vs. Latent Dimension",
-        output_datetime_path / "ae_elbow_test_loss_overall.png",
+        pick("overall"),
+        "Overall Test Loss vs. Latent Dimensionality",
+        ts_dir / "ae_elbow_test_loss_overall.png",
    )

+    # Copy this script to preserve the code used for the outputs
+    script_path = Path(__file__)
+    shutil.copy2(script_path, ts_dir)

-def print_loss_comparison(results_folders):
-    """Print comparison tables of original vs calculated losses for each architecture."""
-    print("\nLoss Comparison Tables")
-    print("=" * 80)
+    # Optionally mirror latest
+    latest = OUTPUT_DIR / "latest"
+    latest.mkdir(exist_ok=True, parents=True)
+    for f in ts_dir.iterdir():
+        if f.is_file():
+            shutil.copy2(f, latest / f.name)

-    for arch_name, config in results_folders.items():
-        results_folder = config["path"]
-        batch_size = config["batch_size"]
-        result_files = sorted(
-            results_folder.glob("ae_elbow_results_subter_*_kfold.pkl")
-        )
-
-        # Prepare table data
-        table_data = []
-        headers = ["Dimension", "Original", "Calculated", "Diff"]
-
-        for result_file in result_files:
-            with open(result_file, "rb") as f:
-                results = pickle.load(f)
-
-            dim = int(results["dimension"])
-            folds = results["ae_results"]
-
-            # Calculate mean original loss across folds
-            orig_losses = []
-            calc_losses = []
-            for fold_key in folds:
-                fold_data = folds[fold_key]["test"]
-                orig_losses.append(fold_data["loss"])
-                calc_losses.append(
-                    calculate_batch_mean_loss(np.array(fold_data["scores"]), batch_size)
-                )
-
-            orig_mean = np.mean(orig_losses)
-            calc_mean = np.mean(calc_losses)
-            diff = abs(orig_mean - calc_mean)
-
-            table_data.append([dim, orig_mean, calc_mean, diff])
-
-        # Sort by dimension
-        table_data.sort(key=lambda x: x[0])
-
-        print(f"\n{arch_name}:")
-        print(
-            tabulate(
-                table_data,
-                headers=headers,
-                floatfmt=".6f",
-                tablefmt="pipe",
-                numalign="right",
-            )
-        )
-
-    print("\n" + "=" * 80)
+    print(f"Saved plots to: {ts_dir}")
+    print(f"Also updated: {latest}")


 if __name__ == "__main__":
-    # Print loss comparisons for all architectures
-    print_loss_comparison(results_folders)
-
-    # Run main analysis
-    evaluate_autoencoder_loss()
-
-    # Archive management
-    # Delete current latest folder
-    shutil.rmtree(latest_folder_path, ignore_errors=True)
-    latest_folder_path.mkdir(exist_ok=True, parents=True)
-
-    # Copy contents to latest folder
-    for file in output_datetime_path.iterdir():
-        shutil.copy2(file, latest_folder_path)
-
-    # Copy this script for reference
-    shutil.copy2(__file__, output_datetime_path)
-    shutil.copy2(__file__, latest_folder_path)
-
-    # Move output to archive
-    shutil.move(output_datetime_path, archive_folder_path)
+    main()
--- a/tools/plot_scripts/data_spherical_projection_as_trained.py
+++ b/tools/plot_scripts/data_spherical_projection_as_trained.py
@@ -0,0 +1,164 @@
+import argparse
+import shutil
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.colors import LinearSegmentedColormap, ListedColormap
+from PIL import Image
+
+# --- Setup output folders ---
+output_path = Path("/home/fedex/mt/plots/data_2d_projections_training")
+datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+output_datetime_path = output_path / datetime_folder_name
+latest_folder_path = output_path / "latest"
+archive_folder_path = output_path / "archive"
+
+for folder in (
+    output_path,
+    output_datetime_path,
+    latest_folder_path,
+    archive_folder_path,
+):
+    folder.mkdir(exist_ok=True, parents=True)
+
+# --- Parse command-line arguments ---
+parser = argparse.ArgumentParser(
+    description="Plot two 2D projections as used in training (unstretched, grayscale)"
+)
+parser.add_argument(
+    "--input1",
+    type=Path,
+    default=Path(
+        "/home/fedex/mt/data/subter/new_projection/1_loop_closure_illuminated_2023-01-23.npy"
+    ),
+    help="Path to first .npy file containing 2D projection data",
+)
+parser.add_argument(
+    "--input2",
+    type=Path,
+    default=Path(
+        "/home/fedex/mt/data/subter/new_projection/3_smoke_human_walking_2023-01-23.npy"
+    ),
+    help="Path to second .npy file containing 2D projection data",
+)
+parser.add_argument(
+    "--frame1",
+    type=int,
+    default=955,
+    help="Frame index to plot from first file (0-indexed)",
+)
+parser.add_argument(
+    "--frame2",
+    type=int,
+    default=242,
+    help="Frame index to plot from second file (0-indexed)",
+)
+
+args = parser.parse_args()
+
+# --- Load the numpy projection data ---
+proj_data1 = np.load(args.input1)
+proj_data2 = np.load(args.input2)
+
+# Choose the desired frames
+try:
+    frame1 = proj_data1[args.frame1]
+    frame2 = proj_data2[args.frame2]
+except IndexError as e:
+    raise ValueError(f"Frame index out of range: {e}")
+
+# Debug info: Print the percentage of missing data in each frame
+print(f"Frame 1 missing data percentage: {np.isnan(frame1).mean() * 100:.2f}%")
+print(f"Frame 2 missing data percentage: {np.isnan(frame2).mean() * 100:.2f}%")
+
+# --- Create a figure with 2 vertical subplots ---
+fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
+
+# Create custom colormap for missing data visualization
+missing_color = [1, 0, 0, 1]  # Red with full alpha
+cmap_missing = ListedColormap([missing_color])
+
+# Replace the plotting section
+for ax, frame, title in zip(
+    (ax1, ax2),
+    (frame1, frame2),
+    (
+        "Normal LiDAR Frame",
+        "Degraded LiDAR Frame (Smoke)",
+    ),
+):
+    # Create mask for missing data (directly from NaN values)
+    missing_mask = np.isnan(frame)
+
+    # Plot the valid data in grayscale
+    frame_valid = np.copy(frame)
+    frame_valid[missing_mask] = 0  # Set missing values to black in base image
+    im = ax.imshow(frame_valid, cmap="gray", aspect="equal", vmin=0, vmax=0.8)
+
+    # Overlay missing data in red with reduced alpha
+    ax.imshow(
+        missing_mask,
+        cmap=ListedColormap([[1, 0, 0, 1]]),  # Pure red
+        alpha=0.3,  # Reduced alpha for better visibility
+    )
+
+    ax.set_title(title)
+    ax.axis("off")
+
+# Adjust layout
+plt.tight_layout()
+
+# Create a more informative legend
+legend_elements = [
+    mpatches.Patch(facecolor="red", alpha=0.7, label="Missing Data"),
+    mpatches.Patch(facecolor="white", label="Close Distance (0m)"),
+    mpatches.Patch(facecolor="gray", label="Mid Distance"),
+    mpatches.Patch(facecolor="black", label="Far Distance (70m)"),
+]
+
+# Add legend with better positioning and formatting
+fig.legend(
+    handles=legend_elements,
+    loc="center right",
+    bbox_to_anchor=(0.98, 0.5),
+    title="Distance Information",
+    framealpha=1.0,
+)
+
+# Save the plot
+output_file = output_datetime_path / "data_2d_projections_training.png"
+plt.savefig(output_file, dpi=300, bbox_inches="tight", pad_inches=0.1)
+plt.close()
+
+print(f"Plot saved to: {output_file}")
+
+# --- Create grayscale training images ---
+for degradation_status, frame_number, frame in (
+    ("normal", args.frame1, frame1),
+    ("smoke", args.frame2, frame2),
+):
+    frame_gray = np.nan_to_num(frame, nan=0).astype(np.float32)
+    gray_image = Image.fromarray(frame_gray, mode="F")
+    gray_output_file = (
+        output_datetime_path
+        / f"frame_{frame_number}_training_{degradation_status}.tiff"
+    )
+    gray_image.save(gray_output_file)
+    print(f"Training image saved to: {gray_output_file}")
+
+# --- Handle folder structure ---
+shutil.rmtree(latest_folder_path, ignore_errors=True)
+latest_folder_path.mkdir(exist_ok=True, parents=True)
+
+for file in output_datetime_path.iterdir():
+    shutil.copy2(file, latest_folder_path)
+
+script_path = Path(__file__)
+shutil.copy2(script_path, output_datetime_path)
+shutil.copy2(script_path, latest_folder_path)
+
+shutil.move(output_datetime_path, archive_folder_path)
+print(f"Output archived to: {archive_folder_path}")
--- a/tools/plot_scripts/load_results.py
+++ b/tools/plot_scripts/load_results.py
@@ -0,0 +1,597 @@
+from __future__ import annotations
+
+import json
+import pickle
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+import polars as pl
+
+# ------------------------------------------------------------
+# Config you can tweak
+# ------------------------------------------------------------
+MODELS = ["deepsad", "isoforest", "ocsvm"]
+EVALS = ["exp_based", "manual_based"]
+
+SCHEMA_STATIC = {
+    # identifiers / dims
+    "network": pl.Utf8,  # e.g. "LeNet", "efficient"
+    "latent_dim": pl.Int32,
+    "semi_normals": pl.Int32,
+    "semi_anomalous": pl.Int32,
+    "model": pl.Utf8,  # "deepsad" | "isoforest" | "ocsvm"
+    "eval": pl.Utf8,  # "exp_based" | "manual_based"
+    "fold": pl.Int32,
+    # metrics
+    "auc": pl.Float64,
+    "ap": pl.Float64,
+    # per-sample scores: list of (idx, label, score)
+    "scores": pl.List(
+        pl.Struct(
+            {
+                "sample_idx": pl.Int32,  # dataloader idx
+                "orig_label": pl.Int8,  # {-1,0,1}
+                "score": pl.Float64,  # anomaly score
+            }
+        )
+    ),
+    # curves (normalized)
+    "roc_curve": pl.Struct(
+        {
+            "fpr": pl.List(pl.Float64),
+            "tpr": pl.List(pl.Float64),
+            "thr": pl.List(pl.Float64),
+        }
+    ),
+    "prc_curve": pl.Struct(
+        {
+            "precision": pl.List(pl.Float64),
+            "recall": pl.List(pl.Float64),
+            "thr": pl.List(pl.Float64),  # may be len(precision)-1
+        }
+    ),
+    # deepsad-only per-eval arrays (None for other models)
+    "sample_indices": pl.List(pl.Int32),
+    "sample_labels": pl.List(pl.Int8),
+    "valid_mask": pl.List(pl.Boolean),
+    # timings / housekeeping
+    "train_time": pl.Float64,
+    "test_time": pl.Float64,
+    "folder": pl.Utf8,
+    "k_fold_num": pl.Int32,
+    "config_json": pl.Utf8,  # full config.json as string (for reference)
+}
+
+# Pretraining-only (AE) schema
+# Pretraining-only (AE) schema — lighter defaults
+PRETRAIN_SCHEMA = {
+    # identifiers / dims
+    "network": pl.Utf8,  # e.g. "LeNet", "efficient"
+    "latent_dim": pl.Int32,
+    "semi_normals": pl.Int32,
+    "semi_anomalous": pl.Int32,
+    "model": pl.Utf8,  # always "ae"
+    "fold": pl.Int32,
+    "split": pl.Utf8,  # "train" | "test"
+    # timings and optimization
+    "time": pl.Float64,
+    "loss": pl.Float64,
+    # per-sample arrays (as lists)
+    "indices": pl.List(pl.Int32),
+    "labels_exp_based": pl.List(pl.Int32),
+    "labels_manual_based": pl.List(pl.Int32),
+    "semi_targets": pl.List(pl.Int32),
+    "file_ids": pl.List(pl.Int32),
+    "frame_ids": pl.List(pl.Int32),
+    "scores": pl.List(pl.Float32),  # <— use Float32 to match source and save space
+    # file id -> name mapping from the result dict
+    "file_names": pl.List(pl.Struct({"file_id": pl.Int32, "name": pl.Utf8})),
+    # housekeeping
+    "folder": pl.Utf8,
+    "k_fold_num": pl.Int32,
+    "config_json": pl.Utf8,  # full config.json as string (for reference)
+}
+
+
+# ------------------------------------------------------------
+# Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
+# ------------------------------------------------------------
+def _tolist(x):
+    if x is None:
+        return None
+    if isinstance(x, np.ndarray):
+        return x.tolist()
+    if isinstance(x, (list, tuple)):
+        return list(x)
+    # best-effort scalar wrap
+    try:
+        return [x]
+    except Exception:
+        return None
+
+
+def normalize_float_list(a) -> Optional[List[float]]:
+    if a is None:
+        return None
+    if isinstance(a, np.ndarray):
+        a = a.tolist()
+    return [None if x is None else float(x) for x in a]
+
+
+def normalize_file_names(d) -> Optional[List[dict]]:
+    """
+    Convert the 'file_names' dict (keys like numpy.int64 -> str) to a
+    list[ {file_id:int, name:str} ], sorted by file_id.
+    """
+    if not isinstance(d, dict):
+        return None
+    out: List[dict] = []
+    for k, v in d.items():
+        try:
+            file_id = int(k)
+        except Exception:
+            # keys are printed as np.int64 in the structure; best-effort cast
+            continue
+        out.append({"file_id": file_id, "name": str(v)})
+    out.sort(key=lambda x: x["file_id"])
+    return out
+
+
+def normalize_roc(obj: Any) -> Optional[dict]:
+    if obj is None:
+        return None
+    fpr = tpr = thr = None
+    if isinstance(obj, (tuple, list)):
+        if len(obj) >= 2:
+            fpr, tpr = _tolist(obj[0]), _tolist(obj[1])
+        if len(obj) >= 3:
+            thr = _tolist(obj[2])
+    elif isinstance(obj, dict):
+        fpr = _tolist(obj.get("fpr") or obj.get("x"))
+        tpr = _tolist(obj.get("tpr") or obj.get("y"))
+        thr = _tolist(obj.get("thr") or obj.get("thresholds"))
+    else:
+        return None
+    if fpr is None or tpr is None:
+        return None
+    return {"fpr": fpr, "tpr": tpr, "thr": thr}
+
+
+def normalize_prc(obj: Any) -> Optional[dict]:
+    if obj is None:
+        return None
+    precision = recall = thr = None
+    if isinstance(obj, (tuple, list)):
+        if len(obj) >= 2:
+            precision, recall = _tolist(obj[0]), _tolist(obj[1])
+        if len(obj) >= 3:
+            thr = _tolist(obj[2])
+    elif isinstance(obj, dict):
+        precision = _tolist(obj.get("precision") or obj.get("y"))
+        recall = _tolist(obj.get("recall") or obj.get("x"))
+        thr = _tolist(obj.get("thr") or obj.get("thresholds"))
+    else:
+        return None
+    if precision is None or recall is None:
+        return None
+    return {"precision": precision, "recall": recall, "thr": thr}
+
+
+def normalize_scores_to_struct(seq) -> Optional[List[dict]]:
+    """
+    Input: list of (idx, label, score) tuples (as produced in your test()).
+    Output: list of dicts with keys sample_idx, orig_label, score.
+    """
+    if seq is None:
+        return None
+    if isinstance(seq, np.ndarray):
+        seq = seq.tolist()
+    if not isinstance(seq, (list, tuple)):
+        return None
+    out: List[dict] = []
+    for item in seq:
+        if isinstance(item, (list, tuple)) and len(item) >= 3:
+            idx, lab, sc = item[0], item[1], item[2]
+            out.append(
+                {
+                    "sample_idx": None if idx is None else int(idx),
+                    "orig_label": None if lab is None else int(lab),
+                    "score": None if sc is None else float(sc),
+                }
+            )
+        else:
+            # fallback: single numeric -> score
+            sc = (
+                float(item)
+                if isinstance(item, (int, float, np.integer, np.floating))
+                else None
+            )
+            out.append({"sample_idx": None, "orig_label": None, "score": sc})
+    return out
+
+
+def normalize_int_list(a) -> Optional[List[int]]:
+    if a is None:
+        return None
+    if isinstance(a, np.ndarray):
+        a = a.tolist()
+    return list(a)
+
+
+def normalize_bool_list(a) -> Optional[List[bool]]:
+    if a is None:
+        return None
+    if isinstance(a, np.ndarray):
+        a = a.tolist()
+    return [bool(x) for x in a]
+
+
+# ------------------------------------------------------------
+# Low-level: read one experiment folder
+# ------------------------------------------------------------
+def read_config(exp_dir: Path) -> dict:
+    cfg = exp_dir / "config.json"
+    with cfg.open("r") as f:
+        c = json.load(f)
+    if not c.get("k_fold"):
+        raise ValueError(f"{exp_dir.name}: not trained as k-fold")
+    return c
+
+
+def read_pickle(p: Path) -> Any:
+    with p.open("rb") as f:
+        return pickle.load(f)
+
+
+# ------------------------------------------------------------
+# Extractors for each model
+# ------------------------------------------------------------
+def rows_from_deepsad(data: dict, evals: List[str]) -> Dict[str, dict]:
+    """
+    deepsad under data['test'][eval], with extra per-eval arrays and AP present.
+    """
+    out: Dict[str, dict] = {}
+    test = data.get("test", {})
+    for ev in evals:
+        evd = test.get(ev)
+        if not isinstance(evd, dict):
+            continue
+        out[ev] = {
+            "auc": float(evd["auc"])
+            if "auc" in evd and evd["auc"] is not None
+            else None,
+            "roc": normalize_roc(evd.get("roc")),
+            "prc": normalize_prc(evd.get("prc")),
+            "ap": float(evd["ap"]) if "ap" in evd and evd["ap"] is not None else None,
+            "scores": normalize_scores_to_struct(evd.get("scores")),
+            "sample_indices": normalize_int_list(evd.get("indices")),
+            "sample_labels": normalize_int_list(evd.get("labels")),
+            "valid_mask": normalize_bool_list(evd.get("valid_mask")),
+            "train_time": data.get("train", {}).get("time"),
+            "test_time": test.get("time"),
+        }
+    return out
+
+
+def rows_from_isoforest(data: dict, evals: List[str]) -> Dict[str, dict]:
+    """
+    Keys: test_auc_<eval>, test_roc_<eval>, test_prc_<eval>, test_ap_<eval>, test_scores_<eval>.
+    """
+    out: Dict[str, dict] = {}
+    for ev in evals:
+        auc = data.get(f"test_auc_{ev}")
+        if auc is None:
+            continue
+        out[ev] = {
+            "auc": float(auc),
+            "roc": normalize_roc(data.get(f"test_roc_{ev}")),
+            "prc": normalize_prc(data.get(f"test_prc_{ev}")),
+            "ap": float(data.get(f"test_ap_{ev}"))
+            if data.get(f"test_ap_{ev}") is not None
+            else None,
+            "scores": normalize_scores_to_struct(data.get(f"test_scores_{ev}")),
+            "sample_indices": None,
+            "sample_labels": None,
+            "valid_mask": None,
+            "train_time": data.get("train_time"),
+            "test_time": data.get("test_time"),
+        }
+    return out
+
+
+def rows_from_ocsvm_default(data: dict, evals: List[str]) -> Dict[str, dict]:
+    """
+    Default OCSVM only (ignore linear variant entirely).
+    """
+    out: Dict[str, dict] = {}
+    for ev in evals:
+        auc = data.get(f"test_auc_{ev}")
+        if auc is None:
+            continue
+        out[ev] = {
+            "auc": float(auc),
+            "roc": normalize_roc(data.get(f"test_roc_{ev}")),
+            "prc": normalize_prc(data.get(f"test_prc_{ev}")),
+            "ap": float(data.get(f"test_ap_{ev}"))
+            if data.get(f"test_ap_{ev}") is not None
+            else None,
+            "scores": normalize_scores_to_struct(data.get(f"test_scores_{ev}")),
+            "sample_indices": None,
+            "sample_labels": None,
+            "valid_mask": None,
+            "train_time": data.get("train_time"),
+            "test_time": data.get("test_time"),
+        }
+    return out
+
+
+# ------------------------------------------------------------
+# Build the Polars DataFrame
+# ------------------------------------------------------------
+def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame:
+    """
+    Walks experiment subdirs under `root`. For each (model, fold) it adds rows:
+    Columns (SCHEMA_STATIC):
+      network, latent_dim, semi_normals, semi_anomalous,
+      model, eval, fold,
+      auc, ap, scores{sample_idx,orig_label,score},
+      roc_curve{fpr,tpr,thr}, prc_curve{precision,recall,thr},
+      sample_indices, sample_labels, valid_mask,
+      train_time, test_time,
+      folder, k_fold_num
+    """
+    if allow_cache:
+        cache = root / "results_cache.parquet"
+        if cache.exists():
+            try:
+                df = pl.read_parquet(cache)
+                print(f"[info] loaded cached results frame from {cache}")
+                return df
+            except Exception as e:
+                print(f"[warn] failed to load cache {cache}: {e}")
+
+    rows: List[dict] = []
+
+    exp_dirs = [p for p in root.iterdir() if p.is_dir()]
+    for exp_dir in sorted(exp_dirs):
+        try:
+            cfg = read_config(exp_dir)
+            cfg_json = json.dumps(cfg, sort_keys=True)
+        except Exception as e:
+            print(f"[warn] skipping {exp_dir.name}: {e}")
+            continue
+
+        network = cfg.get("net_name")
+        latent_dim = int(cfg.get("latent_space_dim"))
+        semi_normals = int(cfg.get("num_known_normal"))
+        semi_anomalous = int(cfg.get("num_known_outlier"))
+        k = int(cfg.get("k_fold_num"))
+
+        for model in MODELS:
+            for fold in range(k):
+                pkl = exp_dir / f"results_{model}_{fold}.pkl"
+                if not pkl.exists():
+                    continue
+
+                try:
+                    data = read_pickle(pkl)
+                except Exception as e:
+                    print(f"[warn] failed to read {pkl.name}: {e}")
+                    continue
+
+                if model == "deepsad":
+                    per_eval = rows_from_deepsad(data, EVALS)  # eval -> dict
+                elif model == "isoforest":
+                    per_eval = rows_from_isoforest(data, EVALS)  # eval -> dict
+                elif model == "ocsvm":
+                    per_eval = rows_from_ocsvm_default(data, EVALS)  # eval -> dict
+                else:
+                    per_eval = {}
+
+                for ev, vals in per_eval.items():
+                    rows.append(
+                        {
+                            "network": network,
+                            "latent_dim": latent_dim,
+                            "semi_normals": semi_normals,
+                            "semi_anomalous": semi_anomalous,
+                            "model": model,
+                            "eval": ev,
+                            "fold": fold,
+                            "auc": vals["auc"],
+                            "ap": vals["ap"],
+                            "scores": vals["scores"],
+                            "roc_curve": vals["roc"],
+                            "prc_curve": vals["prc"],
+                            "sample_indices": vals.get("sample_indices"),
+                            "sample_labels": vals.get("sample_labels"),
+                            "valid_mask": vals.get("valid_mask"),
+                            "train_time": vals["train_time"],
+                            "test_time": vals["test_time"],
+                            "folder": str(exp_dir),
+                            "k_fold_num": k,
+                            "config_json": cfg_json,
+                        }
+                    )
+
+    # If empty, return a typed empty frame
+    if not rows:
+        return pl.DataFrame(schema=SCHEMA_STATIC)
+
+    df = pl.DataFrame(rows, schema=SCHEMA_STATIC)
+
+    # Cast to efficient dtypes (categoricals etc.) – no extra sanitation
+    df = df.with_columns(
+        pl.col("network", "model", "eval").cast(pl.Categorical),
+        pl.col(
+            "latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
+        ).cast(pl.Int32),
+        pl.col("auc", "ap", "train_time", "test_time").cast(pl.Float64),
+        # NOTE: no cast on 'scores' here; it's already List(Struct) per schema.
+    )
+
+    if allow_cache:
+        try:
+            df.write_parquet(cache)
+            print(f"[info] cached results frame to {cache}")
+        except Exception as e:
+            print(f"[warn] failed to write cache {cache}: {e}")
+
+    return df
+
+
+def load_pretraining_results_dataframe(
+    root: Path,
+    allow_cache: bool = True,
+    include_train: bool = False,  # <— default: store only TEST to keep cache tiny
+    keep_file_names: bool = False,  # <— drop file_names by default; they’re repeated
+    parquet_compression: str = "zstd",
+    parquet_compression_level: int = 7,  # <— stronger compression than default
+) -> pl.DataFrame:
+    """
+    Loads only AE pretraining results: files named `results_ae_<fold>.pkl`.
+    Produces one row per (experiment, fold, split). By default we:
+      - include only the TEST split (include_train=False)
+      - store scores as Float32
+      - drop the repeated file_names mapping to save space
+      - write Parquet with zstd(level=7)
+    """
+    if allow_cache:
+        cache = root / "pretraining_results_cache.parquet"
+        if cache.exists():
+            try:
+                df = pl.read_parquet(cache)
+                print(f"[info] loaded cached pretraining frame from {cache}")
+                return df
+            except Exception as e:
+                print(f"[warn] failed to load pretraining cache {cache}: {e}")
+
+    rows: List[dict] = []
+
+    exp_dirs = [p for p in root.iterdir() if p.is_dir()]
+    for exp_dir in sorted(exp_dirs):
+        try:
+            cfg = read_config(exp_dir)
+            cfg_json = json.dumps(cfg, sort_keys=True)
+        except Exception as e:
+            print(f"[warn] skipping {exp_dir.name} (pretraining): {e}")
+            continue
+
+        network = cfg.get("net_name")
+        latent_dim = int(cfg.get("latent_space_dim"))
+        semi_normals = int(cfg.get("num_known_normal"))
+        semi_anomalous = int(cfg.get("num_known_outlier"))
+        k = int(cfg.get("k_fold_num"))
+
+        # Only test split by default (include_train=False)
+        splits = ("train", "test") if include_train else ("test",)
+
+        for fold in range(k):
+            pkl = exp_dir / f"results_ae_{fold}.pkl"
+            if not pkl.exists():
+                continue
+
+            try:
+                data = read_pickle(pkl)  # expected: {"train": {...}, "test": {...}}
+            except Exception as e:
+                print(f"[warn] failed to read {pkl.name}: {e}")
+                continue
+
+            for split in splits:
+                splitd = data.get(split)
+                if not isinstance(splitd, dict):
+                    continue
+
+                rows.append(
+                    {
+                        "network": network,
+                        "latent_dim": latent_dim,
+                        "semi_normals": semi_normals,
+                        "semi_anomalous": semi_anomalous,
+                        "model": "ae",
+                        "fold": fold,
+                        "split": split,
+                        "time": float(splitd.get("time"))
+                        if splitd.get("time") is not None
+                        else None,
+                        "loss": float(splitd.get("loss"))
+                        if splitd.get("loss") is not None
+                        else None,
+                        # ints as Int32, scores as Float32 to save space
+                        "indices": normalize_int_list(splitd.get("indices")),
+                        "labels_exp_based": normalize_int_list(
+                            splitd.get("labels_exp_based")
+                        ),
+                        "labels_manual_based": normalize_int_list(
+                            splitd.get("labels_manual_based")
+                        ),
+                        "semi_targets": normalize_int_list(splitd.get("semi_targets")),
+                        "file_ids": normalize_int_list(splitd.get("file_ids")),
+                        "frame_ids": normalize_int_list(splitd.get("frame_ids")),
+                        "scores": (
+                            None
+                            if splitd.get("scores") is None
+                            else [
+                                float(x)
+                                for x in (
+                                    splitd["scores"].tolist()
+                                    if isinstance(splitd["scores"], np.ndarray)
+                                    else splitd["scores"]
+                                )
+                            ]
+                        ),
+                        "file_names": normalize_file_names(splitd.get("file_names"))
+                        if keep_file_names
+                        else None,
+                        "folder": str(exp_dir),
+                        "k_fold_num": k,
+                        "config_json": cfg_json,
+                    }
+                )
+
+    if not rows:
+        return pl.DataFrame(schema=PRETRAIN_SCHEMA)
+
+    df = pl.DataFrame(rows, schema=PRETRAIN_SCHEMA)
+
+    # Cast/optimize a bit (categoricals, ints, floats)
+    df = df.with_columns(
+        pl.col("network", "model", "split").cast(pl.Categorical),
+        pl.col(
+            "latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
+        ).cast(pl.Int32),
+        pl.col("time", "loss").cast(pl.Float64),
+        pl.col("scores").cast(pl.List(pl.Float32)),  # ensure downcast took
+    )
+
+    if allow_cache:
+        try:
+            cache = root / "pretraining_results_cache.parquet"
+            df.write_parquet(
+                cache,
+                compression=parquet_compression,
+                compression_level=parquet_compression_level,
+                statistics=True,
+            )
+            print(
+                f"[info] cached pretraining frame to {cache} "
+                f"({parquet_compression}, level={parquet_compression_level})"
+            )
+        except Exception as e:
+            print(f"[warn] failed to write pretraining cache {cache}: {e}")
+
+    return df
+
+
+def main():
+    root = Path("/home/fedex/mt/results/done")
+    df = load_results_dataframe(root, allow_cache=True)
+    print(df.shape, df.head())
+
+    df_pre = load_pretraining_results_dataframe(root, allow_cache=True)
+    print("pretraining:", df_pre.shape, df_pre.head())
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/plot_scripts/results_latent_space_comparisons.py
+++ b/tools/plot_scripts/results_latent_space_comparisons.py
@@ -0,0 +1,358 @@
+from __future__ import annotations
+
+import shutil
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import polars as pl
+from matplotlib.lines import Line2D
+
+# CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
+from load_results import load_results_dataframe
+
+# ----------------------------
+# Config
+# ----------------------------
+ROOT = Path("/home/fedex/mt/results/done")  # experiments root you pass to the loader
+OUTPUT_DIR = Path("/home/fedex/mt/plots/results_latent_space_comparisons")
+
+SEMI_LABELING_REGIMES = [(0, 0), (50, 10), (500, 100)]
+
+# Semi-supervised setting to select
+SEMI_NORMALS = 50
+SEMI_ANOMALOUS = 10
+
+# Which evaluation columns to plot
+EVALS = ["exp_based", "manual_based"]
+
+# Latent dimensions to show as 7 subplots
+LATENT_DIMS = [32, 64, 128, 256, 512, 768, 1024]
+
+# Interpolation grids
+ROC_GRID = np.linspace(0.0, 1.0, 200)
+PRC_GRID = np.linspace(0.0, 1.0, 200)
+
+
+# ----------------------------
+# Helpers
+# ----------------------------
+def canonicalize_network(name: str) -> str:
+    """Map net_name strings to clean labels for plotting."""
+    low = (name or "").lower()
+    if "lenet" in low:
+        return "LeNet"
+    if "efficient" in low:
+        return "Efficient"
+    return name or "unknown"
+
+
+def _interp_mean_std(curves: list[tuple[np.ndarray, np.ndarray]], grid: np.ndarray):
+    """
+    Interpolate a list of (x, y) curves onto a common grid.
+    Returns mean_y, std_y on the grid. Skips empty or invalid curves.
+    """
+    if not curves:
+        return np.full_like(grid, np.nan, dtype=float), np.full_like(
+            grid, np.nan, dtype=float
+        )
+
+    interps = []
+    for x, y in curves:
+        if x is None or y is None:
+            continue
+        x = np.asarray(x, dtype=float)
+        y = np.asarray(y, dtype=float)
+        if x.size == 0 or y.size == 0 or x.size != y.size:
+            continue
+        # ensure sorted by x and unique
+        order = np.argsort(x)
+        x = x[order]
+        y = y[order]
+        # deduplicate x (np.interp requires ascending x)
+        uniq_x, uniq_idx = np.unique(x, return_index=True)
+        y = y[uniq_idx]
+        x = uniq_x
+        # bound grid to valid interp range
+        gmin = max(grid[0], x[0])
+        gmax = min(grid[-1], x[-1])
+        g = np.clip(grid, gmin, gmax)
+        yi = np.interp(g, x, y)
+        interps.append(yi)
+
+    if not interps:
+        return np.full_like(grid, np.nan, dtype=float), np.full_like(
+            grid, np.nan, dtype=float
+        )
+
+    A = np.vstack(interps)
+    return np.nanmean(A, axis=0), np.nanstd(A, axis=0)
+
+
+def _net_label_col(df: pl.DataFrame) -> pl.DataFrame:
+    """Add 'net_label' column (LeNet/Efficient/fallback)."""
+    return df.with_columns(
+        pl.when(
+            pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("lenet")
+        )
+        .then(pl.lit("LeNet"))
+        .when(
+            pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("efficient")
+        )
+        .then(pl.lit("Efficient"))
+        .otherwise(pl.col("network").cast(pl.Utf8))
+        .alias("net_label")
+    )
+
+
+def _select_rows(
+    df: pl.DataFrame,
+    *,
+    model: str,
+    eval_type: str,
+    latent_dim: int,
+    net_label: str | None,
+    semi_normals: int,
+    semi_anomalous: int,
+) -> pl.DataFrame:
+    """Polars filter: by model/eval/latent and optionally net_label."""
+    exprs = [
+        pl.col("model") == model,
+        pl.col("eval") == eval_type,
+        pl.col("latent_dim") == latent_dim,
+        pl.col("semi_normals") == semi_normals,
+        pl.col("semi_anomalous") == semi_anomalous,
+    ]
+    if net_label is not None:
+        exprs.append(pl.col("net_label") == net_label)
+    return df.filter(pl.all_horizontal(exprs))
+
+
+def _extract_curves(rows: list[dict], kind: str) -> list[tuple[np.ndarray, np.ndarray]]:
+    """
+    From a list of rows (Python dicts), return list of (x, y) curves for given kind.
+    kind: "roc" or "prc"
+    """
+    curves = []
+    for r in rows:
+        if kind == "roc":
+            c = r.get("roc_curve")
+            if not c:
+                continue
+            x, y = c.get("fpr"), c.get("tpr")
+        else:
+            c = r.get("prc_curve")
+            if not c:
+                continue
+            x, y = c.get("recall"), c.get("precision")
+        if x is None or y is None:
+            continue
+        curves.append((np.asarray(x, dtype=float), np.asarray(y, dtype=float)))
+    return curves
+
+
+def _ensure_dim_axes(fig_title: str):
+    """Return figure, axes array laid out 2x4; last axis is for legend."""
+    fig, axes = plt.subplots(
+        nrows=4, ncols=2, figsize=(12, 16), constrained_layout=True
+    )
+    fig.suptitle(fig_title, fontsize=14)
+    axes = axes.ravel()
+    return fig, axes
+
+
+def _add_legend_to_axis(ax, handles_labels):
+    ax.axis("off")
+    handles, labels = handles_labels
+    ax.legend(
+        handles,
+        labels,
+        loc="center",
+        frameon=False,
+        ncol=1,
+        fontsize=11,
+        borderaxespad=0.5,
+    )
+
+
+def plot_grid_from_df(
+    df: pl.DataFrame,
+    eval_type: str,
+    kind: str,
+    semi_normals: int,
+    semi_anomalous: int,
+    out_path: Path,
+):
+    """
+    Create a 2x4 grid of subplots, one per latent dim; 8th panel holds legend.
+    kind: 'roc' or 'prc'
+    """
+    fig_title = f"{kind.upper()} — {eval_type} (semi = {semi_normals}/{semi_anomalous})"
+    fig, axes = _ensure_dim_axes(fig_title)
+
+    # plotting order & colors
+    series = [
+        (
+            "isoforest",
+            None,
+            "IsolationForest",
+            "tab:purple",
+        ),  # baselines from Efficient only (handled below)
+        ("ocsvm", None, "OC-SVM", "tab:green"),
+        ("deepsad", "LeNet", "DeepSAD (LeNet)", "tab:blue"),
+        ("deepsad", "Efficient", "DeepSAD (Efficient)", "tab:orange"),
+    ]
+
+    # Handles for legend (build from first subplot that has data)
+    legend_handles = []
+    legend_labels = []
+    have_legend = False
+
+    for i, dim in enumerate(LATENT_DIMS):
+        if i >= 7:
+            break  # last slot reserved for legend
+        ax = axes[i]
+        ax.set_title(f"latent_dim = {dim}")
+        ax.grid(True, alpha=0.3)
+
+        if kind == "roc":
+            ax.set_xlim(0, 1)
+            ax.set_ylim(0, 1)
+            ax.set_xlabel("FPR")
+            ax.set_ylabel("TPR")
+            grid = ROC_GRID
+        else:
+            ax.set_xlim(0, 1)
+            ax.set_ylim(0, 1)
+            ax.set_xlabel("Recall")
+            ax.set_ylabel("Precision")
+            grid = PRC_GRID
+
+        plotted_any = False
+
+        for model, net_needed, label, color in series:
+            # baselines: use Efficient only
+            net_filter = net_needed
+            if model in ("isoforest", "ocsvm"):
+                net_filter = "Efficient"
+
+            sub = _select_rows(
+                df,
+                model=model,
+                eval_type=eval_type,
+                latent_dim=dim,
+                net_label=net_filter,
+                semi_normals=semi_normals,
+                semi_anomalous=semi_anomalous,
+            )
+            if sub.height == 0:
+                continue
+
+            rows = sub.select("roc_curve" if kind == "roc" else "prc_curve").to_dicts()
+
+            curves = _extract_curves(rows, kind)
+            if not curves:
+                continue
+
+            mean_y, std_y = _interp_mean_std(curves, grid)
+            # Guard for all-NaN
+            if np.all(np.isnan(mean_y)):
+                continue
+
+            ax.plot(grid, mean_y, label=label, color=color)
+            ax.fill_between(
+                grid, mean_y - std_y, mean_y + std_y, alpha=0.15, color=color
+            )
+            plotted_any = True
+
+            if not have_legend:
+                legend_handles.append(Line2D([0], [0], color=color, lw=2))
+                legend_labels.append(label)
+
+        if not plotted_any:
+            ax.text(
+                0.5, 0.5, "No data", ha="center", va="center", fontsize=12, alpha=0.7
+            )
+            ax.set_xlim(0, 1)
+            ax.set_ylim(0, 1)
+
+        if not have_legend and legend_handles:
+            have_legend = True
+
+    # Legend in 8th slot
+    _add_legend_to_axis(axes[7], (legend_handles, legend_labels))
+
+    # Save
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(out_path, dpi=150, bbox_inches="tight")
+    plt.close(fig)
+
+
+def main():
+    # Load main results DF (uses your cache if enabled in the loader)
+    df = load_results_dataframe(ROOT, allow_cache=True)
+
+    # Add clean network labels
+    complete_df = _net_label_col(df)
+
+    # Prepare output dirs
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    archive_dir = OUTPUT_DIR / "archive"
+    archive_dir.mkdir(parents=True, exist_ok=True)
+    ts_dir = archive_dir / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    ts_dir.mkdir(parents=True, exist_ok=True)
+
+    for semi_normals, semi_anomalous in SEMI_LABELING_REGIMES:
+        # Restrict to our semi-supervised setting
+        df = complete_df.filter(
+            (pl.col("semi_normals") == semi_normals)
+            & (pl.col("semi_anomalous") == semi_anomalous)
+            & (pl.col("model").is_in(["deepsad", "isoforest", "ocsvm"]))
+            & (pl.col("eval").is_in(EVALS))
+            & (pl.col("latent_dim").is_in(LATENT_DIMS))
+        )
+
+        # Plot 4 figures
+        for eval_type in EVALS:
+            # ROC
+            plot_grid_from_df(
+                df,
+                eval_type=eval_type,
+                kind="roc",
+                semi_normals=semi_normals,
+                semi_anomalous=semi_anomalous,
+                out_path=ts_dir
+                / f"roc_semi_{semi_normals}_{semi_anomalous}_{eval_type}.png",
+            )
+            # PRC
+            plot_grid_from_df(
+                df,
+                eval_type=eval_type,
+                kind="prc",
+                semi_normals=semi_normals,
+                semi_anomalous=semi_anomalous,
+                out_path=ts_dir
+                / f"prc_{semi_normals}_{semi_anomalous}_{eval_type}.png",
+            )
+
+    # Copy this script to preserve the code used for the outputs
+    script_path = Path(__file__)
+    shutil.copy2(script_path, ts_dir)
+
+    # Mirror latest
+    latest = OUTPUT_DIR / "latest"
+    latest.mkdir(exist_ok=True, parents=True)
+    for f in latest.iterdir():
+        if f.is_file():
+            f.unlink()
+    for f in ts_dir.iterdir():
+        if f.is_file():
+            shutil.copy2(f, latest / f.name)
+
+    print(f"Saved plots to: {ts_dir}")
+    print(f"Also updated: {latest}")
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/plot_scripts/results_semi_labels_comparison.py
+++ b/tools/plot_scripts/results_semi_labels_comparison.py
@@ -0,0 +1,363 @@
+# curves_2x1_by_net_with_regimes_from_df.py
+from __future__ import annotations
+
+import shutil
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import polars as pl
+from matplotlib.lines import Line2D
+from scipy.stats import sem, t
+
+# CHANGE THIS IMPORT IF YOUR LOADER MODULE NAME IS DIFFERENT
+from load_results import load_results_dataframe
+
+# ---------------------------------
+# Config
+# ---------------------------------
+ROOT = Path("/home/fedex/mt/results/done")
+OUTPUT_DIR = Path("/home/fedex/mt/plots/results_semi_labels_comparison")
+
+LATENT_DIMS = [32, 64, 128, 256, 512, 768, 1024]
+SEMI_REGIMES = [(0, 0), (50, 10), (500, 100)]
+EVALS = ["exp_based", "manual_based"]
+
+# Interp grids
+ROC_GRID = np.linspace(0.0, 1.0, 200)
+PRC_GRID = np.linspace(0.0, 1.0, 200)
+
+# Baselines are duplicated across nets; use Efficient-only to avoid repetition
+BASELINE_NET = "Efficient"
+
+# Colors/styles
+COLOR_BASELINES = {
+    "isoforest": "tab:purple",
+    "ocsvm": "tab:green",
+}
+COLOR_REGIMES = {
+    (0, 0): "tab:blue",
+    (50, 10): "tab:orange",
+    (500, 100): "tab:red",
+}
+LINESTYLES = {
+    (0, 0): "-",
+    (50, 10): "--",
+    (500, 100): "-.",
+}
+
+
+# ---------------------------------
+# Helpers
+# ---------------------------------
+def _net_label_col(df: pl.DataFrame) -> pl.DataFrame:
+    return df.with_columns(
+        pl.when(
+            pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("lenet")
+        )
+        .then(pl.lit("LeNet"))
+        .when(
+            pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("efficient")
+        )
+        .then(pl.lit("Efficient"))
+        .otherwise(pl.col("network").cast(pl.Utf8))
+        .alias("net_label")
+    )
+
+
+def mean_ci(values: list[float], confidence: float = 0.95) -> tuple[float, float]:
+    """Return mean and half-width of the (approx) confidence interval. Robust to n<2."""
+    arr = np.asarray([v for v in values if v is not None], dtype=float)
+    if arr.size == 0:
+        return np.nan, np.nan
+    if arr.size == 1:
+        return float(arr[0]), 0.0
+    m = float(arr.mean())
+    s = sem(arr, nan_policy="omit")
+    h = s * t.ppf((1 + confidence) / 2.0, arr.size - 1)
+    return m, float(h)
+
+
+def _interp_mean_std(curves: list[tuple[np.ndarray, np.ndarray]], grid: np.ndarray):
+    """Interpolate many (x,y) onto grid and return mean±std; robust to duplicates/empty."""
+    if not curves:
+        return np.full_like(grid, np.nan, float), np.full_like(grid, np.nan, float)
+    interps = []
+    for x, y in curves:
+        if x is None or y is None:
+            continue
+        x = np.asarray(x, float)
+        y = np.asarray(y, float)
+        if x.size == 0 or y.size == 0 or x.size != y.size:
+            continue
+        order = np.argsort(x)
+        x, y = x[order], y[order]
+        x, uniq_idx = np.unique(x, return_index=True)
+        y = y[uniq_idx]
+        g = np.clip(grid, x[0], x[-1])
+        yi = np.interp(g, x, y)
+        interps.append(yi)
+    if not interps:
+        return np.full_like(grid, np.nan, float), np.full_like(grid, np.nan, float)
+    A = np.vstack(interps)
+    return np.nanmean(A, axis=0), np.nanstd(A, axis=0)
+
+
+def _extract_curves(rows: list[dict], kind: str) -> list[tuple[np.ndarray, np.ndarray]]:
+    curves = []
+    for r in rows:
+        if kind == "roc":
+            c = r.get("roc_curve")
+            if not c:
+                continue
+            x, y = c.get("fpr"), c.get("tpr")
+        else:
+            c = r.get("prc_curve")
+            if not c:
+                continue
+            x, y = c.get("recall"), c.get("precision")
+        if x is None or y is None:
+            continue
+        curves.append((np.asarray(x, float), np.asarray(y, float)))
+    return curves
+
+
+def _select_rows(
+    df: pl.DataFrame,
+    *,
+    model: str,
+    eval_type: str,
+    latent_dim: int,
+    semi_normals: int | None = None,
+    semi_anomalous: int | None = None,
+    net_label: str | None = None,
+) -> pl.DataFrame:
+    exprs = [
+        pl.col("model") == model,
+        pl.col("eval") == eval_type,
+        pl.col("latent_dim") == latent_dim,
+    ]
+    if semi_normals is not None:
+        exprs.append(pl.col("semi_normals") == semi_normals)
+    if semi_anomalous is not None:
+        exprs.append(pl.col("semi_anomalous") == semi_anomalous)
+    if net_label is not None:
+        exprs.append(pl.col("net_label") == net_label)
+    return df.filter(pl.all_horizontal(exprs))
+
+
+def _auc_list(sub: pl.DataFrame) -> list[float]:
+    return [x for x in sub.select("auc").to_series().to_list() if x is not None]
+
+
+def _ap_list(sub: pl.DataFrame) -> list[float]:
+    return [x for x in sub.select("ap").to_series().to_list() if x is not None]
+
+
+def _plot_panel(
+    ax,
+    df: pl.DataFrame,
+    *,
+    eval_type: str,
+    net_for_deepsad: str,
+    latent_dim: int,
+    kind: str,
+):
+    """
+    Plot one panel: DeepSAD (net_for_deepsad) with 3 regimes + baselines (from Efficient).
+    Legend entries include mean±CI of AUC/AP.
+    """
+    ax.grid(True, alpha=0.3)
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    if kind == "roc":
+        ax.set_xlabel("FPR")
+        ax.set_ylabel("TPR")
+        grid = ROC_GRID
+    else:
+        ax.set_xlabel("Recall")
+        ax.set_ylabel("Precision")
+        grid = PRC_GRID
+
+    handles, labels = [], []
+
+    # ----- Baselines (Efficient)
+    for model in ("isoforest", "ocsvm"):
+        sub_b = _select_rows(
+            df,
+            model=model,
+            eval_type=eval_type,
+            latent_dim=latent_dim,
+            net_label=BASELINE_NET,
+        )
+        if sub_b.height == 0:
+            continue
+        rows = sub_b.select("roc_curve" if kind == "roc" else "prc_curve").to_dicts()
+        curves = _extract_curves(rows, kind)
+        mean_y, std_y = _interp_mean_std(curves, grid)
+        if np.all(np.isnan(mean_y)):
+            continue
+
+        # Metric for legend
+        metric_vals = _auc_list(sub_b) if kind == "roc" else _ap_list(sub_b)
+        m, ci = mean_ci(metric_vals)
+        lab = f"{model} ({'AUC' if kind == 'roc' else 'AP'}={m:.3f}±{ci:.3f})"
+
+        color = COLOR_BASELINES[model]
+        h = ax.plot(grid, mean_y, lw=2, color=color, label=lab)[0]
+        ax.fill_between(grid, mean_y - std_y, mean_y + std_y, alpha=0.15, color=color)
+        handles.append(h)
+        labels.append(lab)
+
+    # ----- DeepSAD (this panel's net) across semi-regimes
+    for regime in SEMI_REGIMES:
+        sn, sa = regime
+        sub_d = _select_rows(
+            df,
+            model="deepsad",
+            eval_type=eval_type,
+            latent_dim=latent_dim,
+            semi_normals=sn,
+            semi_anomalous=sa,
+            net_label=net_for_deepsad,
+        )
+        if sub_d.height == 0:
+            continue
+        rows = sub_d.select("roc_curve" if kind == "roc" else "prc_curve").to_dicts()
+        curves = _extract_curves(rows, kind)
+        mean_y, std_y = _interp_mean_std(curves, grid)
+        if np.all(np.isnan(mean_y)):
+            continue
+
+        metric_vals = _auc_list(sub_d) if kind == "roc" else _ap_list(sub_d)
+        m, ci = mean_ci(metric_vals)
+        lab = f"DeepSAD {net_for_deepsad} — semi {sn}/{sa} ({'AUC' if kind == 'roc' else 'AP'}={m:.3f}±{ci:.3f})"
+
+        color = COLOR_REGIMES[regime]
+        ls = LINESTYLES[regime]
+        h = ax.plot(grid, mean_y, lw=2, color=color, linestyle=ls, label=lab)[0]
+        ax.fill_between(grid, mean_y - std_y, mean_y + std_y, alpha=0.15, color=color)
+        handles.append(h)
+        labels.append(lab)
+
+    # Chance line for ROC
+    if kind == "roc":
+        ax.plot([0, 1], [0, 1], "k--", alpha=0.6, label="Chance")
+
+    # Legend
+    ax.legend(loc="lower right", fontsize=9, frameon=True)
+
+
+def make_figures_for_dim(
+    df: pl.DataFrame, eval_type: str, latent_dim: int, out_dir: Path
+):
+    # ROC: 2×1
+    fig_roc, axes = plt.subplots(
+        nrows=1, ncols=2, figsize=(14, 5), constrained_layout=True
+    )
+    fig_roc.suptitle(f"ROC — {eval_type} — latent_dim={latent_dim}", fontsize=14)
+
+    _plot_panel(
+        axes[0],
+        df,
+        eval_type=eval_type,
+        net_for_deepsad="LeNet",
+        latent_dim=latent_dim,
+        kind="roc",
+    )
+    axes[0].set_title("DeepSAD (LeNet) + baselines")
+
+    _plot_panel(
+        axes[1],
+        df,
+        eval_type=eval_type,
+        net_for_deepsad="Efficient",
+        latent_dim=latent_dim,
+        kind="roc",
+    )
+    axes[1].set_title("DeepSAD (Efficient) + baselines")
+
+    out_roc = out_dir / f"roc_{latent_dim}_{eval_type}.png"
+    fig_roc.savefig(out_roc, dpi=150, bbox_inches="tight")
+    plt.close(fig_roc)
+
+    # PRC: 2×1
+    fig_prc, axes = plt.subplots(
+        nrows=1, ncols=2, figsize=(14, 5), constrained_layout=True
+    )
+    fig_prc.suptitle(f"PRC — {eval_type} — latent_dim={latent_dim}", fontsize=14)
+
+    _plot_panel(
+        axes[0],
+        df,
+        eval_type=eval_type,
+        net_for_deepsad="LeNet",
+        latent_dim=latent_dim,
+        kind="prc",
+    )
+    axes[0].set_title("DeepSAD (LeNet) + baselines")
+
+    _plot_panel(
+        axes[1],
+        df,
+        eval_type=eval_type,
+        net_for_deepsad="Efficient",
+        latent_dim=latent_dim,
+        kind="prc",
+    )
+    axes[1].set_title("DeepSAD (Efficient) + baselines")
+
+    out_prc = out_dir / f"prc_{latent_dim}_{eval_type}.png"
+    fig_prc.savefig(out_prc, dpi=150, bbox_inches="tight")
+    plt.close(fig_prc)
+
+
+def main():
+    # Load dataframe and prep
+    df = load_results_dataframe(ROOT, allow_cache=True)
+    df = _net_label_col(df)
+
+    # Filter to relevant models/evals only once
+    df = df.filter(
+        (pl.col("model").is_in(["deepsad", "isoforest", "ocsvm"]))
+        & (pl.col("eval").is_in(EVALS))
+    )
+
+    # Output/archiving like your AE script
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    archive = OUTPUT_DIR / "archive"
+    archive.mkdir(parents=True, exist_ok=True)
+    ts_dir = archive / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    ts_dir.mkdir(parents=True, exist_ok=True)
+
+    # Generate figures
+    for eval_type in EVALS:
+        for dim in LATENT_DIMS:
+            make_figures_for_dim(
+                df, eval_type=eval_type, latent_dim=dim, out_dir=ts_dir
+            )
+
+    # Copy this script for provenance
+    script_path = Path(__file__)
+    try:
+        shutil.copy2(script_path, ts_dir)
+    except Exception:
+        pass  # best effort if running in environments where __file__ may not exist
+
+    # Update "latest"
+    latest = OUTPUT_DIR / "latest"
+    latest.mkdir(parents=True, exist_ok=True)
+    for f in latest.iterdir():
+        if f.is_file():
+            f.unlink()
+    for f in ts_dir.iterdir():
+        if f.is_file():
+            shutil.copy2(f, latest / f.name)
+
+    print(f"Saved plots to: {ts_dir}")
+    print(f"Also updated: {latest}")
+
+
+if __name__ == "__main__":
+    main()