update

2025-09-10 19:41:00 +02:00
parent ef0c36eed5
commit cf15d5501e
17 changed files with 1198 additions and 720 deletions
--- a/tools/plot_scripts/ae_elbow_lenet.py
+++ b/tools/plot_scripts/ae_elbow_lenet.py
@@ -12,7 +12,7 @@ import numpy as np
 import polars as pl

 # CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
-from load_results import load_pretraining_results_dataframe
+from plot_scripts.load_results import load_pretraining_results_dataframe

 # ----------------------------
 # Config
@@ -212,7 +212,7 @@ def plot_multi_loss_curve(arch_results, title, output_path, colors=None):

 def main():
    # Load AE DF (uses your cache if enabled in the loader)
-    df = load_pretraining_results_dataframe(ROOT, allow_cache=True, include_train=False)
+    df = load_pretraining_results_dataframe(ROOT, allow_cache=True)

    # Optional: filter to just LeNet vs Efficient; drop this set() to plot all nets
    wanted_nets = {"LeNet", "Efficient"}
--- a/tools/plot_scripts/diff_df.py
+++ b/tools/plot_scripts/diff_df.py
@@ -0,0 +1,544 @@
+import json
+import math
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+
+import polars as pl
+
+Number = (int, float)
+
+FLOAT_DTYPES = {pl.Float32, pl.Float64}
+SIMPLE_CASTABLE_DTYPES = (
+    pl.Int8,
+    pl.Int16,
+    pl.Int32,
+    pl.Int64,
+    pl.UInt8,
+    pl.UInt16,
+    pl.UInt32,
+    pl.UInt64,
+    pl.Float32,
+    pl.Float64,
+    pl.Utf8,
+    pl.Boolean,
+    pl.Date,
+    pl.Datetime,
+    pl.Time,
+    pl.Duration,
+)
+
+
+def _is_nan(x):
+    try:
+        return isinstance(x, float) and math.isnan(x)
+    except Exception:
+        return False
+
+
+def _repr_safe(v):
+    try:
+        return json.dumps(v, default=str, ensure_ascii=False)
+    except Exception:
+        return repr(v)
+
+
+def _to_python(v):
+    """
+    Convert any leaf-ish object to plain Python types:
+    - pl.Series -> list (or scalar if length==1)
+    - objects with .to_list()/.tolist() -> list
+    - dict stays dict; list/tuple become list
+    """
+    # Polars Series
+    if isinstance(v, pl.Series):
+        seq = v.to_list()
+        return seq[0] if len(seq) == 1 else seq
+    # Numpy scalars/arrays or anything with tolist()
+    if hasattr(v, "tolist"):
+        try:
+            return v.tolist()
+        except Exception:
+            pass
+    # Polars expressions should not appear; stringify them
+    # Anything iterable that isn't list/dict/str -> convert carefully
+    if isinstance(v, tuple):
+        return [_to_python(x) for x in v]
+    if isinstance(v, list):
+        return [_to_python(x) for x in v]
+    if isinstance(v, dict):
+        return {k: _to_python(val) for k, val in v.items()}
+    return v
+
+
+def _safe_equal(a, b):
+    """
+    Return a plain bool saying whether a and b are equal,
+    without ever producing a vector/Series.
+    """
+    # exact same object
+    if a is b:
+        return True
+    # normalize
+    a_n = _to_python(a)
+    b_n = _to_python(b)
+    # handle NaNs
+    if _is_nan(a_n) and _is_nan(b_n):
+        return True
+    # plain scalars/containers
+    try:
+        eq = a_n == b_n
+        if isinstance(eq, bool):
+            return eq
+    except Exception:
+        pass
+    # fallback: compare stable JSON-ish reprs
+    return _repr_safe(a_n) == _repr_safe(b_n)
+
+
+def _num_close(a: float, b: float, atol: float, rtol: float) -> bool:
+    # NaN==NaN treated equal
+    if _is_nan(a) and _is_nan(b):
+        return True
+    return abs(a - b) <= (atol + rtol * abs(b))
+
+
+def _to_python(v: Any) -> Any:
+    """
+    Convert Polars value to a Python object. Struct -> dict, List -> list, scalars stay scalars.
+    Values coming from Series[i] / .to_list() are already Python, so this usually no-ops.
+    """
+    return v
+
+
+def _repr_safe(v: Any) -> str:
+    try:
+        return json.dumps(v, default=str, ensure_ascii=False)
+    except Exception:
+        return repr(v)
+
+
+def _iter_dict_keys(d: Dict[str, Any]) -> Iterable[str]:
+    # stable order, useful for predictable output
+    return sorted(d.keys())
+
+
+def _recursive_leaf_diffs(a, b, path, out, float_atol, float_rtol):
+    # treat None==None
+    if a is None and b is None:
+        return
+
+    # normalize early
+    a = _to_python(a)
+    b = _to_python(b)
+
+    # tuples -> lists
+    if isinstance(a, tuple):
+        a = list(a)
+    if isinstance(b, tuple):
+        b = list(b)
+
+    # numbers
+    if isinstance(a, (int, float)) and isinstance(b, (int, float)):
+        if _is_nan(a) and _is_nan(b):
+            return
+        # |a-b| <= atol + rtol*|b|
+        if abs(float(a) - float(b)) > (float_atol + float_rtol * abs(float(b))):
+            out.append(
+                {
+                    "path": path or "$",
+                    "left": a,
+                    "right": b,
+                    "abs_delta": abs(float(a) - float(b)),
+                }
+            )
+        return
+
+    # exact types for strings/bools
+    if type(a) is type(b) and isinstance(a, (str, bool)):
+        if not _safe_equal(a, b):
+            out.append({"path": path or "$", "left": a, "right": b, "abs_delta": None})
+        return
+
+    # lists
+    if isinstance(a, list) and isinstance(b, list):
+        if len(a) != len(b):
+            out.append(
+                {
+                    "path": f"{path or '$'}.length",
+                    "left": len(a),
+                    "right": len(b),
+                    "abs_delta": None,
+                }
+            )
+        n = min(len(a), len(b))
+        for i in range(n):
+            _recursive_leaf_diffs(
+                a[i], b[i], f"{path or '$'}[{i}]", out, float_atol, float_rtol
+            )
+        for i in range(n, len(a)):
+            out.append(
+                {
+                    "path": f"{path or '$'}[{i}]",
+                    "left": a[i],
+                    "right": None,
+                    "abs_delta": None,
+                }
+            )
+        for i in range(n, len(b)):
+            out.append(
+                {
+                    "path": f"{path or '$'}[{i}]",
+                    "left": None,
+                    "right": b[i],
+                    "abs_delta": None,
+                }
+            )
+        return
+
+    # dicts
+    if isinstance(a, dict) and isinstance(b, dict):
+        keys = sorted(set(a.keys()) | set(b.keys()))
+        for k in keys:
+            ak = a.get(k, None)
+            bk = b.get(k, None)
+            if k not in a:
+                out.append(
+                    {
+                        "path": f"{path or '$'}.{k}",
+                        "left": None,
+                        "right": bk,
+                        "abs_delta": None,
+                    }
+                )
+            elif k not in b:
+                out.append(
+                    {
+                        "path": f"{path or '$'}.{k}",
+                        "left": ak,
+                        "right": None,
+                        "abs_delta": None,
+                    }
+                )
+            else:
+                _recursive_leaf_diffs(
+                    ak, bk, f"{path or '$'}.{k}", out, float_atol, float_rtol
+                )
+        return
+
+    # fallback (type mismatch / opaque objects)
+    if not _safe_equal(a, b):
+        out.append({"path": path or "$", "left": a, "right": b, "abs_delta": None})
+
+
+def _boolean_mask_simple_equals(s1: pl.Series, s2: pl.Series) -> pl.Series:
+    both_null = s1.is_null() & s2.is_null()
+    return ((s1 == s2) | both_null).fill_null(True)
+
+
+def _boolean_mask_float_close(
+    s1: pl.Series, s2: pl.Series, atol: float, rtol: float
+) -> pl.Series:
+    both_null = s1.is_null() & s2.is_null()
+    both_nan = s1.is_nan() & s2.is_nan()
+    abs_diff = (s1 - s2).abs()
+    near = abs_diff <= (atol + rtol * s2.abs())
+    return (near | both_null | both_nan).fill_null(False)
+
+
+def _candidate_rows_for_nested(col_left: pl.Series, col_right: pl.Series) -> List[int]:
+    """
+    Cheap way to find rows that might differ for nested types:
+    compare JSON dumps of values. This is only a prefilter.
+    """
+    a = col_left.to_list()
+    b = col_right.to_list()
+    cand = []
+    for i, (x, y) in enumerate(zip(a, b)):
+        if _repr_safe(x) != _repr_safe(y):
+            cand.append(i)
+    return cand
+
+
+def recursive_diff_frames(
+    left: pl.DataFrame,
+    right: pl.DataFrame,
+    ignore: Optional[List[str]] = None,
+    float_atol: float = 0.0,
+    float_rtol: float = 0.0,
+    max_rows_per_column: int = 20,
+    max_leafs_per_row: int = 200,
+) -> Tuple[pl.DataFrame, pl.DataFrame]:
+    """
+    Deep diff DataFrames, recursing into List/Struct/dict-like values.
+    Returns (diff_summary, diff_leaves).
+    - diff_summary: [column, n_rows_with_diffs]
+    - diff_leaves: [column, row, path, left, right, abs_delta]
+        left/right are Python values (JSON-serializable where possible).
+    """
+    ignore = set(ignore or [])
+
+    # basic guards
+    if left.height != right.height:
+        raise ValueError(f"Row count differs: {left.height} vs {right.height}")
+
+    lcols = set(left.columns) - ignore
+    rcols = set(right.columns) - ignore
+    if lcols != rcols:
+        raise ValueError(
+            f"Column sets differ after ignoring.\nleft_only={sorted(lcols - rcols)}\nright_only={sorted(rcols - lcols)}"
+        )
+
+    cols = sorted(lcols)
+
+    summary_rows: List[Tuple[str, int]] = []
+    leaves_rows: List[Dict[str, Any]] = []
+
+    for c in cols:
+        s1, s2 = left[c], right[c]
+
+        # Fast path for simple, non-nested types with vectorized comparison
+        simple_dtype = (
+            s1.dtype in SIMPLE_CASTABLE_DTYPES and s2.dtype in SIMPLE_CASTABLE_DTYPES
+        )
+        is_floaty = s1.dtype in FLOAT_DTYPES and s2.dtype in FLOAT_DTYPES
+
+        if simple_dtype and not is_floaty:
+            equal_mask = _boolean_mask_simple_equals(s1, s2)
+            diff_idx = [i for i, ok in enumerate(equal_mask) if not ok]
+        elif simple_dtype and is_floaty:
+            close_mask = _boolean_mask_float_close(s1, s2, float_atol, float_rtol)
+            diff_idx = [i for i, ok in enumerate(close_mask) if not ok]
+        else:
+            # nested or exotic dtype → candidate rows via JSON compare
+            diff_idx = _candidate_rows_for_nested(s1, s2)
+
+        if not diff_idx:
+            continue
+
+        summary_rows.append((c, len(diff_idx)))
+
+        # limit how many rows per column we fully expand
+        for row in diff_idx[:max_rows_per_column]:
+            a = s1[row]
+            b = s2[row]
+            leaf_diffs: List[Dict[str, Any]] = []
+            _recursive_leaf_diffs(
+                a,
+                b,
+                path="",
+                out=leaf_diffs,
+                float_atol=float_atol,
+                float_rtol=float_rtol,
+            )
+
+            # If all leaf_diffs are only float-close (within tol), suppress (can happen for nested)
+            # The recursive function already filters by tolerance for numbers, so we keep what's left.
+
+            # cap the number of leaf diffs to avoid explosion
+            for d in leaf_diffs[:max_leafs_per_row]:
+                left_norm = _repr_safe(_to_python(d["left"]))  # -> str
+                right_norm = _repr_safe(_to_python(d["right"]))  # -> str
+
+                abs_delta_val = d.get("abs_delta", None)
+                try:
+                    abs_delta_norm = (
+                        float(abs_delta_val) if abs_delta_val is not None else None
+                    )
+                except Exception:
+                    abs_delta_norm = None  # just in case something weird sneaks in
+
+                leaves_rows.append(
+                    {
+                        "column": str(c),
+                        "row": int(row),
+                        "path": str(d["path"] or "$"),
+                        "left": left_norm,  # str
+                        "right": right_norm,  # str
+                        "abs_delta": abs_delta_norm,  # float or None
+                    }
+                )
+
+    diff_summary = (
+        pl.DataFrame(summary_rows, schema=["column", "n_rows_with_diffs"]).sort(
+            "n_rows_with_diffs", descending=True
+        )
+        if summary_rows
+        else pl.DataFrame(
+            {
+                "column": pl.Series([], pl.Utf8),
+                "n_rows_with_diffs": pl.Series([], pl.Int64),
+            }
+        )
+    )
+
+    # Build diff_leaves with stable schema; stringify complex left/right to avoid concat issues
+    if leaves_rows:
+        diff_leaves = pl.DataFrame(
+            {
+                "column": [r["column"] for r in leaves_rows],
+                "row": pl.Series([r["row"] for r in leaves_rows], dtype=pl.Int64),
+                "path": [r["path"] for r in leaves_rows],
+                "left": [r["left"] for r in leaves_rows],  # Utf8
+                "right": [r["right"] for r in leaves_rows],  # Utf8
+                "abs_delta": pl.Series(
+                    [r["abs_delta"] for r in leaves_rows], dtype=pl.Float64
+                ),
+            },
+            schema={
+                "column": pl.Utf8,
+                "row": pl.Int64,
+                "path": pl.Utf8,
+                "left": pl.Utf8,
+                "right": pl.Utf8,
+                "abs_delta": pl.Float64,
+            },
+        )
+    else:
+        diff_leaves = pl.DataFrame(
+            {
+                "column": [],
+                "row": [],
+                "path": [],
+                "left": [],
+                "right": [],
+                "abs_delta": [],
+            }
+        )
+
+    return diff_summary, diff_leaves
+
+    # FLOAT_DTYPES = {pl.Float32, pl.Float64}
+
+    # def diff_frames(
+    #     left: pl.DataFrame,
+    #     right: pl.DataFrame,
+    #     ignore: Optional[List[str]] = None,
+    #     float_atol: float = 0.0,
+    #     float_rtol: float = 0.0,
+    #     sample: int = 20,
+    # ) -> Tuple[pl.DataFrame, pl.DataFrame]:
+    #     ignore = set(ignore or [])
+
+    #     if left.height != right.height:
+    #         raise ValueError(f"Row count differs: {left.height} vs {right.height}")
+
+    #     lcols = set(left.columns) - ignore
+    #     rcols = set(right.columns) - ignore
+    #     if lcols != rcols:
+    #         raise ValueError(
+    #             f"Column sets differ after ignoring.\nleft_only={sorted(lcols - rcols)}\nright_only={sorted(rcols - lcols)}"
+    #         )
+
+    #     cols = sorted(lcols)
+    #     row_idx = pl.Series("row", range(left.height), dtype=pl.Int64)
+
+    #     def _float_diff_mask(s1: pl.Series, s2: pl.Series) -> pl.Series:
+    #         both_null = s1.is_null() & s2.is_null()
+    #         both_nan = s1.is_nan() & s2.is_nan()
+    #         abs_diff = (s1 - s2).abs()
+    #         near = abs_diff <= (float_atol + float_rtol * s2.abs())
+    #         return ~(near | both_null | both_nan)
+
+    #     def _nonfloat_diff_mask(s1: pl.Series, s2: pl.Series) -> pl.Series:
+    #         both_null = s1.is_null() & s2.is_null()
+    #         return ~((s1 == s2) | both_null).fill_null(True)
+
+    #     examples_frames = []
+    #     summary_rows = []
+
+    #     for c in cols:
+    #         s1, s2 = left[c], right[c]
+    #         if s1.dtype in FLOAT_DTYPES and s2.dtype in FLOAT_DTYPES:
+    #             diff_mask = _float_diff_mask(s1, s2)
+    #             abs_delta = (s1 - s2).abs()
+    #         else:
+    #             diff_mask = _nonfloat_diff_mask(s1, s2)
+    #             abs_delta = None
+
+    #         diff_mask = diff_mask.cast(pl.Boolean)
+    #         n_diff = int(diff_mask.sum())
+    #         if n_diff == 0:
+    #             continue
+
+    #         summary_rows.append((c, n_diff))
+    #         k = min(sample, n_diff)
+
+    #         idx = row_idx.filter(diff_mask)[:k]
+
+    #         def to_utf8_safe(s: pl.Series) -> pl.Series:
+    #             # Fast path for simple scalars
+    #             if s.dtype in (
+    #                 pl.Int8,
+    #                 pl.Int16,
+    #                 pl.Int32,
+    #                 pl.Int64,
+    #                 pl.UInt8,
+    #                 pl.UInt16,
+    #                 pl.UInt32,
+    #                 pl.UInt64,
+    #                 pl.Float32,
+    #                 pl.Float64,
+    #                 pl.Utf8,
+    #                 pl.Boolean,
+    #                 pl.Date,
+    #                 pl.Datetime,
+    #                 pl.Time,
+    #                 pl.Duration,
+    #             ):
+    #                 return s.cast(pl.Utf8)
+    #             # Fallback for nested/complex types: List, Struct, etc.
+    #             return s.map_elements(
+    #                 lambda v: json.dumps(v, default=str, allow_nan=True),
+    #                 return_dtype=pl.Utf8,
+    #             )
+
+    #         ex_left = to_utf8_safe(s1.filter(diff_mask)[:k])
+    #         ex_right = to_utf8_safe(s2.filter(diff_mask)[:k])
+
+    #         ex = pl.DataFrame(
+    #             {
+    #                 "column": [c] * k,
+    #                 "row": idx,
+    #                 "left": ex_left,
+    #                 "right": ex_right,
+    #                 "dtype_left": [str(s1.dtype)] * k,
+    #                 "dtype_right": [str(s2.dtype)] * k,
+    #             }
+    #         )
+
+    #         # unify schema: always have abs_delta as Float64 (None for non-floats)
+    #         if abs_delta is not None:
+    #             ex = ex.with_columns(
+    #                 abs_delta.filter(diff_mask)[:k].cast(pl.Float64).alias("abs_delta")
+    #             )
+    #         else:
+    #             ex = ex.with_columns(pl.lit(None, dtype=pl.Float64).alias("abs_delta"))
+
+    #         examples_frames.append(ex)
+
+    #     diff_summary = (
+    #         pl.DataFrame(summary_rows, schema=["column", "n_different"]).sort(
+    #             "n_different", descending=True
+    #         )
+    #         if summary_rows
+    #         else pl.DataFrame(
+    #             {
+    #                 "column": pl.Series([], pl.Utf8),
+    #                 "n_different": pl.Series([], pl.Int64),
+    #             }
+    #         )
+    #     )
+    #     diff_examples = (
+    #         pl.concat(examples_frames) if examples_frames else pl.DataFrame()
+    #     )
+
+    #     return diff_summary, diff_examples
+
+    # # --- usage ---
+    # # diff_summary: one row per column with a count of differing rows
+    # # diff_examples: sample rows showing left/right values (and abs_delta for floats)
+    # summary, examples = diff_frames(
+    #     df1, df2, ignore=["timestamp"], float_atol=0.1, float_rtol=0.0, sample=25
+    # )
+
+    # print(summary)  # which columns differ and how much
+    # print(examples)  # sample mismatches with row indices
--- a/tools/plot_scripts/load_results.py
+++ b/tools/plot_scripts/load_results.py
@@ -3,10 +3,12 @@ from __future__ import annotations
 import json
 import pickle
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 import numpy as np
 import polars as pl
+from diff_df import recursive_diff_frames
+from polars.testing import assert_frame_equal

 # ------------------------------------------------------------
 # Config you can tweak
@@ -75,7 +77,8 @@ PRETRAIN_SCHEMA = {
    "fold": pl.Int32,
    "split": pl.Utf8,  # "train" | "test"
    # timings and optimization
-    "time": pl.Float64,
+    "train_time": pl.Float64,
+    "test_time": pl.Float64,
    "loss": pl.Float64,
    # per-sample arrays (as lists)
    "indices": pl.List(pl.Int32),
@@ -247,6 +250,14 @@ def read_pickle(p: Path) -> Any:
 # ------------------------------------------------------------
 # Extractors for each model
 # ------------------------------------------------------------
+
+counting = {
+    (label_method, eval_method): []
+    for label_method in ["exp_based", "manual_based"]
+    for eval_method in ["roc", "prc"]
+}
+
+
 def rows_from_deepsad(data: dict, evals: List[str]) -> Dict[str, dict]:
    """
    deepsad under data['test'][eval], with extra per-eval arrays and AP present.
@@ -257,6 +268,8 @@ def rows_from_deepsad(data: dict, evals: List[str]) -> Dict[str, dict]:
        evd = test.get(ev)
        if not isinstance(evd, dict):
            continue
+        counting[(ev, "roc")].append(len(evd["roc"][0]))
+        counting[(ev, "prc")].append(len(evd["prc"][0]))
        out[ev] = {
            "auc": float(evd["auc"])
            if "auc" in evd and evd["auc"] is not None
@@ -444,7 +457,6 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
 def load_pretraining_results_dataframe(
    root: Path,
    allow_cache: bool = True,
-    include_train: bool = False,  # <— default: store only TEST to keep cache tiny
    keep_file_names: bool = False,  # <— drop file_names by default; they’re repeated
    parquet_compression: str = "zstd",
    parquet_compression_level: int = 7,  # <— stronger compression than default
@@ -484,9 +496,6 @@ def load_pretraining_results_dataframe(
        semi_anomalous = int(cfg.get("num_known_outlier"))
        k = int(cfg.get("k_fold_num"))

-        # Only test split by default (include_train=False)
-        splits = ("train", "test") if include_train else ("test",)
-
        for fold in range(k):
            pkl = exp_dir / f"results_ae_{fold}.pkl"
            if not pkl.exists():
@@ -498,57 +507,53 @@ def load_pretraining_results_dataframe(
                print(f"[warn] failed to read {pkl.name}: {e}")
                continue

-            for split in splits:
-                splitd = data.get(split)
-                if not isinstance(splitd, dict):
-                    continue
+            train_time = data.get("train", {}).get("time")
+            data = data.get("test", {})

-                rows.append(
-                    {
-                        "network": network,
-                        "latent_dim": latent_dim,
-                        "semi_normals": semi_normals,
-                        "semi_anomalous": semi_anomalous,
-                        "model": "ae",
-                        "fold": fold,
-                        "split": split,
-                        "time": float(splitd.get("time"))
-                        if splitd.get("time") is not None
-                        else None,
-                        "loss": float(splitd.get("loss"))
-                        if splitd.get("loss") is not None
-                        else None,
-                        # ints as Int32, scores as Float32 to save space
-                        "indices": normalize_int_list(splitd.get("indices")),
-                        "labels_exp_based": normalize_int_list(
-                            splitd.get("labels_exp_based")
-                        ),
-                        "labels_manual_based": normalize_int_list(
-                            splitd.get("labels_manual_based")
-                        ),
-                        "semi_targets": normalize_int_list(splitd.get("semi_targets")),
-                        "file_ids": normalize_int_list(splitd.get("file_ids")),
-                        "frame_ids": normalize_int_list(splitd.get("frame_ids")),
-                        "scores": (
-                            None
-                            if splitd.get("scores") is None
-                            else [
-                                float(x)
-                                for x in (
-                                    splitd["scores"].tolist()
-                                    if isinstance(splitd["scores"], np.ndarray)
-                                    else splitd["scores"]
-                                )
-                            ]
-                        ),
-                        "file_names": normalize_file_names(splitd.get("file_names"))
-                        if keep_file_names
-                        else None,
-                        "folder": str(exp_dir),
-                        "k_fold_num": k,
-                        "config_json": cfg_json,
-                    }
-                )
+            rows.append(
+                {
+                    "network": network,
+                    "latent_dim": latent_dim,
+                    "semi_normals": semi_normals,
+                    "semi_anomalous": semi_anomalous,
+                    "model": "ae",
+                    "fold": fold,
+                    "train_time": train_time,
+                    "test_time": data.get("time"),
+                    "loss": float(data.get("loss"))
+                    if data.get("loss") is not None
+                    else None,
+                    # ints as Int32, scores as Float32 to save space
+                    "indices": normalize_int_list(data.get("indices")),
+                    "labels_exp_based": normalize_int_list(
+                        data.get("labels_exp_based")
+                    ),
+                    "labels_manual_based": normalize_int_list(
+                        data.get("labels_manual_based")
+                    ),
+                    "semi_targets": normalize_int_list(data.get("semi_targets")),
+                    "file_ids": normalize_int_list(data.get("file_ids")),
+                    "frame_ids": normalize_int_list(data.get("frame_ids")),
+                    "scores": (
+                        None
+                        if data.get("scores") is None
+                        else [
+                            float(x)
+                            for x in (
+                                data["scores"].tolist()
+                                if isinstance(data["scores"], np.ndarray)
+                                else data["scores"]
+                            )
+                        ]
+                    ),
+                    "file_names": normalize_file_names(data.get("file_names"))
+                    if keep_file_names
+                    else None,
+                    "folder": str(exp_dir),
+                    "k_fold_num": k,
+                    "config_json": cfg_json,
+                }
+            )

    if not rows:
        return pl.DataFrame(schema=PRETRAIN_SCHEMA)
@@ -561,7 +566,7 @@ def load_pretraining_results_dataframe(
        pl.col(
            "latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
        ).cast(pl.Int32),
-        pl.col("time", "loss").cast(pl.Float64),
+        pl.col("test_time", "train_time", "loss").cast(pl.Float64),
        pl.col("scores").cast(pl.List(pl.Float32)),  # ensure downcast took
    )

@@ -585,12 +590,53 @@ def load_pretraining_results_dataframe(


 def main():
-    root = Path("/home/fedex/mt/results/done")
-    df = load_results_dataframe(root, allow_cache=True)
-    print(df.shape, df.head())
+    root = Path("/home/fedex/mt/results/copy")
+    df1 = load_results_dataframe(root, allow_cache=True)
+    exit(0)

-    df_pre = load_pretraining_results_dataframe(root, allow_cache=True)
-    print("pretraining:", df_pre.shape, df_pre.head())
+    retest_root = Path("/home/fedex/mt/results/copy/retest_nodrop")
+    df2 = load_results_dataframe(retest_root, allow_cache=False).drop("folder")
+
+    # exact schema & shape first (optional but helpful messages)
+    assert df1.shape == df2.shape, f"Shape differs: {df1.shape} vs {df2.shape}"
+    assert set(df1.columns) == set(df2.columns), (
+        f"Column sets differ: {df1.columns} vs {df2.columns}"
+    )
+
+    # allow small float diffs, ignore column order differences if you want
+    df1_sorted = df1.select(sorted(df1.columns))
+    df2_sorted = df2.select(sorted(df2.columns))
+
+    # Optionally pre-align/sort both frames by a stable key before diffing.
+    summary, leaves = recursive_diff_frames(
+        df1,
+        df2,
+        ignore=["timestamp"],  # columns to ignore
+        float_atol=0.1,  # absolute tolerance for floats
+        float_rtol=0.0,  # relative tolerance for floats
+        max_rows_per_column=20,  # limit expansion per column
+        max_leafs_per_row=200,  # cap leaves per row
+    )
+
+    pl.Config.set_fmt_table_cell_list_len(100)
+    pl.Config.set_tbl_rows(100)
+
+    print(summary)  # which columns differ & how many rows
+    print(leaves)  # exact nested paths + scalar diffs
+
+    # check_exact=False lets us use atol/rtol for floats
+    assert_frame_equal(
+        df1_sorted,
+        df2_sorted,
+        check_exact=False,
+        atol=0.1,  # absolute tolerance for floats
+        rtol=0.0,  # relative tolerance (set if you want % based)
+        check_dtypes=True,  # set False if you only care about values
+    )
+    print("DataFrames match within tolerance ✅")
+
+    # df_pre = load_pretraining_results_dataframe(root, allow_cache=True)
+    # print("pretraining:", df_pre.shape, df_pre.head())


 if __name__ == "__main__":
--- a/tools/plot_scripts/results_latent_space_comparisons.py
+++ b/tools/plot_scripts/results_latent_space_comparisons.py
@@ -10,7 +10,7 @@ import polars as pl
 from matplotlib.lines import Line2D

 # CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
-from load_results import load_results_dataframe
+from plot_scripts.load_results import load_results_dataframe

 # ----------------------------
 # Config
--- a/tools/plot_scripts/results_semi_labels_comparison.py
+++ b/tools/plot_scripts/results_semi_labels_comparison.py
@@ -12,7 +12,7 @@ from matplotlib.lines import Line2D
 from scipy.stats import sem, t

 # CHANGE THIS IMPORT IF YOUR LOADER MODULE NAME IS DIFFERENT
-from load_results import load_results_dataframe
+from plot_scripts.load_results import load_results_dataframe

 # ---------------------------------
 # Config
--- a/tools/plot_scripts/setup_runtime_tables.py
+++ b/tools/plot_scripts/setup_runtime_tables.py
@@ -0,0 +1,704 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import json
+import shutil
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import numpy as np
+import pandas as pd
+import polars as pl
+from load_results import (
+    load_pretraining_results_dataframe,
+    load_results_dataframe,
+)
+
+# ----------------------------
+# Config
+# ----------------------------
+RESULTS_ROOT = Path("/home/fedex/mt/results/done")  # folder with experiment subdirs
+OUTPUT_DIR = Path("/home/fedex/mt/plots/setup_runtime_tables")  # where .tex goes
+
+# If you want to optionally prefer a specific network label for baselines in column names,
+# set to a substring to detect (e.g. "efficient"). If None, keep network as-is.
+BASELINE_NETWORK_HINT: Optional[str] = None  # e.g., "efficient" or None
+
+
+# ----------------------------
+# Helpers
+# ----------------------------
+def _net_label_for_display(net: str | None) -> str:
+    s = (net or "").lower()
+    if "effic" in s:
+        return "Efficient"
+    if "lenet" in s:
+        return "LeNet"
+    return net or ""
+
+
+def _fmt_mean_std_n(
+    mean: float | None, std: float | None, n: int | None, unit: str = ""
+) -> str:
+    if mean is None or (isinstance(mean, float) and (np.isnan(mean) or np.isinf(mean))):
+        return "-"
+    base = f"{mean:.2f}"
+    if std is not None and not (
+        isinstance(std, float) and (np.isnan(std) or np.isinf(std))
+    ):
+        base = f"{base} ± {std:.2f}"
+    if unit:
+        base = f"{base} {unit}"
+    if n is not None and n > 0:
+        base = f"{base} (n={n})"
+    return base
+
+
+def _fmt_pair(n: int, m: int) -> str:
+    return f"{n}/{m}"
+
+
+def _fmt_mean_std(mean: float | None, std: float | None, n: int | None) -> str:
+    if mean is None or (isinstance(mean, float) and (np.isnan(mean) or np.isinf(mean))):
+        return "-"
+    if std is None or (isinstance(std, float) and (np.isnan(std) or np.isinf(std))):
+        return f"{mean:.2f}"
+    if n is None or n < 1:
+        return f"{mean:.2f} ± {std:.2f}"
+    return f"{mean:.2f} ± {std:.2f} (n={n})"
+
+
+def _parse_cfg(cfg_json: Optional[str]) -> Dict[str, Any]:
+    if not cfg_json:
+        return {}
+    try:
+        return json.loads(cfg_json)
+    except Exception:
+        return {}
+
+
+def _key_params(model: str, cfg: Dict[str, Any]) -> str:
+    """Compact, model-specific parameter string for the table."""
+    if model == "deepsad":
+        bs = cfg.get("batch_size")
+        ne = cfg.get("n_epochs")
+        lr = cfg.get("lr")
+        wd = cfg.get("weight_decay")
+        return f"bs={bs}, epochs={ne}, lr={lr}, wd={wd}"
+    if model == "isoforest":
+        est = cfg.get("isoforest_n_estimators")
+        ms = cfg.get("isoforest_max_samples")
+        cont = cfg.get("isoforest_contamination")
+        return f"n_estimators={est}, max_samples={ms}, cont={cont}"
+    if model == "ocsvm":
+        ker = cfg.get("ocsvm_kernel")
+        nu = cfg.get("ocsvm_nu")
+        return f"kernel={ker}, nu={nu}"
+    return "-"
+
+
+def _method_col_name(model: str, network: str) -> str:
+    """
+    Column heading for pivot tables:
+    - deepsad carries the network (e.g., 'DeepSAD / LeNet')
+    - baselines carry their own model name; optionally annotate network
+    """
+    label = model.lower()
+    if label == "deepsad":
+        return f"DeepSAD / {network}"
+    # baselines; optionally simplify/standardize network name
+    if (
+        BASELINE_NETWORK_HINT
+        and BASELINE_NETWORK_HINT.lower() not in (network or "").lower()
+    ):
+        # If you want to collapse baseline duplicates to a single name, you can force it here
+        return model.capitalize()
+    # Otherwise, keep network variant explicit
+    return f"{model.capitalize()} / {network}"
+
+
+def _prepare_per_fold_metrics(df: pl.DataFrame) -> pl.DataFrame:
+    """
+    Returns one row per (folder, model, fold) with:
+      - train_time, test_time
+      - n_test (len(scores))
+      - n_epochs (from config_json; DeepSAD only)
+      - latency_ms = 1000 * test_time / n_test
+      - time_per_epoch = train_time / n_epochs (DeepSAD only)
+    """
+    base = (
+        df.select(
+            "folder",
+            "network",
+            "model",
+            "latent_dim",
+            "semi_normals",
+            "semi_anomalous",
+            "fold",
+            "train_time",
+            "test_time",
+            "scores",
+            "config_json",
+        )
+        .with_columns(
+            n_test=pl.col("scores").list.len(),
+            n_epochs=pl.col("config_json")
+            .str.json_path_match("$.n_epochs")
+            .cast(pl.Int64),
+        )
+        .drop("scores")
+    )
+
+    # de-dup across evals
+    uniq = base.unique(subset=["folder", "model", "fold"])
+
+    # derived metrics
+    uniq = uniq.with_columns(
+        latency_ms=pl.when((pl.col("test_time") > 0) & (pl.col("n_test") > 0))
+        .then(1000.0 * pl.col("test_time") / pl.col("n_test"))
+        .otherwise(None)
+        .cast(pl.Float64),
+        time_per_epoch=pl.when(
+            (pl.col("model") == "deepsad") & (pl.col("n_epochs") > 0)
+        )
+        .then(pl.col("train_time") / pl.col("n_epochs"))
+        .otherwise(None)
+        .cast(pl.Float64),
+        network_disp=pl.col("network")
+        .cast(pl.Utf8)
+        .map_elements(_net_label_for_display, return_dtype=pl.Utf8),
+    )
+    return uniq
+
+
+def _prepare_aggregates(df: pl.DataFrame) -> pl.DataFrame:
+    """
+    Deduplicate across evals, then aggregate times across folds for each
+    (network, model, latent_dim, semi_normals, semi_anomalous).
+    """
+    # Keep only columns we need
+    base = df.select(
+        "folder",
+        "network",
+        "model",
+        "latent_dim",
+        "semi_normals",
+        "semi_anomalous",
+        "fold",
+        "train_time",
+        "test_time",
+        "config_json",
+    )
+
+    # Drop duplicates across evals: same (folder, model, fold) should have identical timings
+    uniq = base.unique(subset=["folder", "model", "fold"]).with_columns(
+        # Normalize network to a simpler display label, if your config used long names
+        pl.col("network").cast(pl.Utf8)
+    )
+
+    # Group across folds
+    agg = (
+        uniq.group_by(
+            ["network", "model", "latent_dim", "semi_normals", "semi_anomalous"]
+        )
+        .agg(
+            pl.len().alias("n_folds"),
+            pl.col("train_time").mean().alias("train_mean"),
+            pl.col("train_time").std(ddof=1).alias("train_std"),
+            pl.col("test_time").mean().alias("test_mean"),
+            pl.col("test_time").std(ddof=1).alias("test_std"),
+            pl.col("config_json")
+            .first()
+            .alias("config_json"),  # one exemplar cfg per group
+        )
+        .sort(["semi_normals", "semi_anomalous", "latent_dim", "network", "model"])
+    )
+    return agg
+
+
+def make_training_runtime_table(df: pl.DataFrame) -> str:
+    """
+    Returns a LaTeX table (string) for TRAIN runtimes: mean ± std (seconds) across folds.
+    Rows: Semi (N/O), Latent Dim
+    Columns: methods split (DeepSAD/LeNet, DeepSAD/Efficient, IsoForest[/net], OCSVM[/net])
+    """
+    agg = _prepare_aggregates(df)
+
+    # Prepare display strings and column keys
+    tbl = agg.with_columns(
+        pl.format("{}/{}", pl.col("semi_normals"), pl.col("semi_anomalous")).alias(
+            "semi"
+        ),
+        pl.col("model").cast(pl.Utf8),
+        pl.col("network").cast(pl.Utf8),
+        pl.col("latent_dim").cast(pl.Int64),
+        # ADD return_dtype here
+        pl.struct(["train_mean", "train_std", "n_folds"])
+        .map_elements(
+            lambda s: _fmt_mean_std(s["train_mean"], s["train_std"], s["n_folds"]),
+            return_dtype=pl.Utf8,
+        )
+        .alias("train_fmt"),
+        # And here
+        pl.struct(["model", "network"])
+        .map_elements(
+            lambda s: _method_col_name(s["model"], s["network"]),
+            return_dtype=pl.Utf8,
+        )
+        .alias("method"),
+    ).select("semi", "latent_dim", "method", "train_fmt")
+
+    # Pivot to wide form: one cell per (semi, latent_dim, method)
+    wide = tbl.pivot(
+        values="train_fmt",
+        index=["semi", "latent_dim"],
+        columns="method",
+        aggregate_function="first",
+    ).sort(["semi", "latent_dim"])
+
+    # Fill missing with '-' and export
+    pdf = wide.fill_null("-").to_pandas()
+    pdf.index = pd.MultiIndex.from_frame(pdf[["semi", "latent_dim"]])
+    pdf = pdf.drop(columns=["semi", "latent_dim"])
+    latex = pdf.to_latex(
+        index=True,
+        escape=True,
+        na_rep="-",
+        multicolumn=True,
+        multicolumn_format="c",
+        bold_rows=False,
+        caption="Training runtime (seconds): mean ± std across folds (n in parentheses).",
+        label="tab:train_runtimes",
+    )
+    return latex
+
+
+def make_inference_runtime_table(df: pl.DataFrame) -> str:
+    """
+    Returns a LaTeX table (string) for TEST/INFERENCE runtimes: mean ± std (seconds) across folds.
+    Same layout as training table.
+    """
+    agg = _prepare_aggregates(df)
+
+    tbl = agg.with_columns(
+        pl.format("{}/{}", pl.col("semi_normals"), pl.col("semi_anomalous")).alias(
+            "semi"
+        ),
+        pl.col("model").cast(pl.Utf8),
+        pl.col("network").cast(pl.Utf8),
+        pl.col("latent_dim").cast(pl.Int64),
+        pl.struct(["test_mean", "test_std", "n_folds"])
+        .map_elements(
+            lambda s: _fmt_mean_std(s["test_mean"], s["test_std"], s["n_folds"]),
+            return_dtype=pl.Utf8,
+        )
+        .alias("test_fmt"),
+        pl.struct(["model", "network"])
+        .map_elements(
+            lambda s: _method_col_name(s["model"], s["network"]),
+            return_dtype=pl.Utf8,
+        )
+        .alias("method"),
+    ).select("semi", "latent_dim", "method", "test_fmt")
+
+    wide = tbl.pivot(
+        values="test_fmt",
+        index=["semi", "latent_dim"],
+        columns="method",
+        aggregate_function="first",
+    ).sort(["semi", "latent_dim"])
+
+    pdf = wide.fill_null("-").to_pandas()
+    pdf.index = pd.MultiIndex.from_frame(pdf[["semi", "latent_dim"]])
+    pdf = pdf.drop(columns=["semi", "latent_dim"])
+    latex = pdf.to_latex(
+        index=True,
+        escape=True,
+        na_rep="-",
+        multicolumn=True,
+        multicolumn_format="c",
+        bold_rows=False,
+        caption="Inference/Test runtime (seconds): mean ± std across folds (n in parentheses).",
+        label="tab:test_runtimes",
+    )
+    return latex
+
+
+def make_longform_train_table_with_params(df: pl.DataFrame) -> str:
+    """
+    (Optional) Long-form table that includes a 'Params' column extracted from config_json.
+    Useful if you want to show per-model settings alongside the runtimes.
+    """
+    agg = _prepare_aggregates(df)
+    # Build params column from JSON for readability
+    long = (
+        agg.with_columns(
+            pl.format("{}/{}", pl.col("semi_normals"), pl.col("semi_anomalous")).alias(
+                "semi"
+            ),
+            pl.col("latent_dim").cast(pl.Int64),
+            pl.struct(["model", "config_json"])
+            .map_elements(
+                lambda s: _key_params(s["model"], _parse_cfg(s["config_json"])),
+                return_dtype=pl.Utf8,
+            )
+            .alias("params"),
+            pl.struct(["train_mean", "train_std", "n_folds"])
+            .map_elements(
+                lambda s: _fmt_mean_std(s["train_mean"], s["train_std"], s["n_folds"])
+            )
+            .alias("train_time_fmt"),
+        )
+        .select(
+            "network",
+            "model",
+            "latent_dim",
+            "semi",
+            "params",
+            "train_time_fmt",
+        )
+        .sort(["semi", "latent_dim", "network", "model"])
+    )
+
+    pdf = long.to_pandas()
+    pdf.rename(
+        columns={
+            "network": "Network",
+            "model": "Method",
+            "latent_dim": "Latent Dim",
+            "semi": "Semi (N/O)",
+            "params": "Params",
+            "train_time_fmt": "Train time [s] (mean ± std)",
+        },
+        inplace=True,
+    )
+    latex = pdf.to_latex(
+        index=False,
+        escape=True,
+        longtable=False,
+        caption="Training runtime with key parameters.",
+        label="tab:train_runtimes_params",
+    )
+    return latex
+
+
+def make_training_runtime_table_compact(df: pl.DataFrame) -> str:
+    per_fold = _prepare_per_fold_metrics(df)
+
+    # DeepSAD: keep LeNet vs Efficient, collapse semis
+    ds = (
+        per_fold.filter(pl.col("model") == "deepsad")
+        .group_by(["model", "network_disp", "latent_dim"])
+        .agg(
+            n=pl.len(),
+            train_mean=pl.mean("train_time"),
+            train_std=pl.std("train_time", ddof=1),
+            tpe_mean=pl.mean("time_per_epoch"),
+            tpe_std=pl.std("time_per_epoch", ddof=1),
+        )
+        .with_columns(
+            method=pl.format("DeepSAD / {}", pl.col("network_disp")),
+        )
+    )
+
+    # Baselines: collapse networks & semis; only vary by latent_dim
+    bl = (
+        per_fold.filter(pl.col("model").is_in(["isoforest", "ocsvm"]))
+        .group_by(["model", "latent_dim"])
+        .agg(
+            n=pl.len(),
+            train_mean=pl.mean("train_time"),
+            train_std=pl.std("train_time", ddof=1),
+        )
+        .with_columns(
+            method=pl.when(pl.col("model") == "isoforest")
+            .then(pl.lit("IsoForest"))
+            .when(pl.col("model") == "ocsvm")
+            .then(pl.lit("OCSVM"))
+            .otherwise(pl.lit("Baseline"))
+        )
+    )
+
+    # --- Standardize schemas before concat ---
+    ds_std = ds.select(
+        pl.col("latent_dim").cast(pl.Int64),
+        pl.col("method").cast(pl.Utf8),
+        pl.col("train_mean").cast(pl.Float64),
+        pl.col("train_std").cast(pl.Float64),
+        pl.col("tpe_mean").cast(pl.Float64),
+        pl.col("tpe_std").cast(pl.Float64),
+        pl.col("n").cast(pl.Int64),
+    )
+
+    bl_std = bl.select(
+        pl.col("latent_dim").cast(pl.Int64),
+        pl.col("method").cast(pl.Utf8),
+        pl.col("train_mean").cast(pl.Float64),
+        pl.col("train_std").cast(pl.Float64),
+        pl.lit(None, dtype=pl.Float64).alias("tpe_mean"),
+        pl.lit(None, dtype=pl.Float64).alias("tpe_std"),
+        pl.col("n").cast(pl.Int64),
+    )
+
+    agg = pl.concat([ds_std, bl_std], how="vertical")
+
+    # Format cell: total [s]; DeepSAD also appends (italic) per-epoch
+    def _fmt_train_cell(s: dict) -> str:
+        total = _fmt_mean_std_n(s["train_mean"], s["train_std"], s["n"], "s")
+        if s.get("tpe_mean") is None or (
+            isinstance(s.get("tpe_mean"), float) and np.isnan(s["tpe_mean"])
+        ):
+            return total
+        tpe = _fmt_mean_std_n(s["tpe_mean"], s["tpe_std"], None, "s/epoch")
+        return f"{total} (\\textit{{{tpe}}})"
+
+    tbl = agg.with_columns(
+        pl.struct(["train_mean", "train_std", "tpe_mean", "tpe_std", "n"])
+        .map_elements(_fmt_train_cell, return_dtype=pl.Utf8)
+        .alias("train_fmt"),
+    ).select("latent_dim", "method", "train_fmt")
+
+    # Pivot and order columns nicely
+    wide = tbl.pivot(
+        values="train_fmt",
+        index=["latent_dim"],
+        columns="method",
+        aggregate_function="first",
+    ).sort("latent_dim")
+
+    pdf = wide.fill_null("-").to_pandas().set_index("latent_dim")
+    desired_cols = [
+        c
+        for c in ["DeepSAD / LeNet", "DeepSAD / Efficient", "IsoForest", "OCSVM"]
+        if c in pdf.columns
+    ]
+    if desired_cols:
+        pdf = pdf.reindex(columns=desired_cols)
+
+    latex = pdf.to_latex(
+        index=True,
+        escape=True,
+        na_rep="-",
+        multicolumn=True,
+        multicolumn_format="c",
+        bold_rows=False,
+        caption="Training runtime: total seconds (mean ± std). DeepSAD cells also show \\textit{seconds per epoch} in parentheses.",
+        label="tab:train_runtimes_compact",
+    )
+    return latex
+
+
+def make_inference_latency_table_compact(df: pl.DataFrame) -> str:
+    per_fold = _prepare_per_fold_metrics(df)
+
+    # DeepSAD: keep networks; collapse semis
+    ds = (
+        per_fold.filter(pl.col("model") == "deepsad")
+        .group_by(["model", "network_disp", "latent_dim"])
+        .agg(
+            n=pl.len(),
+            lat_mean=pl.mean("latency_ms"),
+            lat_std=pl.std("latency_ms", ddof=1),
+        )
+        .with_columns(
+            method=pl.format("DeepSAD / {}", pl.col("network_disp")),
+        )
+    )
+
+    # Baselines: collapse networks & semis
+    bl = (
+        per_fold.filter(pl.col("model").is_in(["isoforest", "ocsvm"]))
+        .group_by(["model", "latent_dim"])
+        .agg(
+            n=pl.len(),
+            lat_mean=pl.mean("latency_ms"),
+            lat_std=pl.std("latency_ms", ddof=1),
+        )
+        .with_columns(
+            method=pl.when(pl.col("model") == "isoforest")
+            .then(pl.lit("IsoForest"))
+            .when(pl.col("model") == "ocsvm")
+            .then(pl.lit("OCSVM"))
+            .otherwise(pl.lit("Baseline"))
+        )
+    )
+
+    # --- Standardize schemas before concat ---
+    ds_std = ds.select(
+        pl.col("latent_dim").cast(pl.Int64),
+        pl.col("method").cast(pl.Utf8),
+        pl.col("lat_mean").cast(pl.Float64),
+        pl.col("lat_std").cast(pl.Float64),
+        pl.col("n").cast(pl.Int64),
+    )
+
+    bl_std = bl.select(
+        pl.col("latent_dim").cast(pl.Int64),
+        pl.col("method").cast(pl.Utf8),
+        pl.col("lat_mean").cast(pl.Float64),
+        pl.col("lat_std").cast(pl.Float64),
+        pl.col("n").cast(pl.Int64),
+    )
+
+    agg = pl.concat([ds_std, bl_std], how="vertical")
+
+    def _fmt_lat_cell(s: dict) -> str:
+        return _fmt_mean_std_n(s["lat_mean"], s["lat_std"], s["n"], "ms")
+
+    tbl = agg.with_columns(
+        pl.struct(["lat_mean", "lat_std", "n"])
+        .map_elements(_fmt_lat_cell, return_dtype=pl.Utf8)
+        .alias("lat_fmt"),
+    ).select("latent_dim", "method", "lat_fmt")
+
+    wide = tbl.pivot(
+        values="lat_fmt",
+        index=["latent_dim"],
+        columns="method",
+        aggregate_function="first",
+    ).sort("latent_dim")
+
+    pdf = wide.fill_null("-").to_pandas().set_index("latent_dim")
+    desired_cols = [
+        c
+        for c in ["DeepSAD / LeNet", "DeepSAD / Efficient", "IsoForest", "OCSVM"]
+        if c in pdf.columns
+    ]
+    if desired_cols:
+        pdf = pdf.reindex(columns=desired_cols)
+
+    latex = pdf.to_latex(
+        index=True,
+        escape=True,
+        na_rep="-",
+        multicolumn=True,
+        multicolumn_format="c",
+        bold_rows=False,
+        caption="Inference latency (ms/sample): mean ± std across folds; baselines collapsed across networks and semi-labeling.",
+        label="tab:inference_latency_compact",
+    )
+    return latex
+
+
+def make_ae_pretraining_runtime_table(df_pre: pl.DataFrame) -> str:
+    """
+    LaTeX table: Autoencoder (pretraining) runtime per latent dim.
+    Rows: latent_dim
+    Cols: AE / LeNet, AE / Efficient   (mean ± std seconds across folds)
+    """
+    # minimal columns we need
+    base = df_pre.select(
+        pl.col("network").cast(pl.Utf8),
+        pl.col("latent_dim").cast(pl.Int64),
+        pl.col("fold").cast(pl.Int64),
+        pl.col("train_time").cast(pl.Float64),
+    ).drop_nulls(subset=["network", "latent_dim", "train_time"])
+
+    # Nice display label for network
+    network_disp = (
+        pl.when(pl.col("network").str.contains("efficient"))
+        .then(pl.lit("Efficient"))
+        .when(pl.col("network").str.contains("LeNet"))
+        .then(pl.lit("LeNet"))
+        .otherwise(pl.col("network"))
+        .alias("network_disp")
+    )
+
+    agg = (
+        base.with_columns(network_disp)
+        .group_by(["network_disp", "latent_dim"])
+        .agg(
+            n=pl.len(),
+            train_mean=pl.mean("train_time"),
+            train_std=pl.std("train_time", ddof=1),
+        )
+        .with_columns(
+            pl.format("AE / {}", pl.col("network_disp")).alias("method"),
+            pl.struct(["train_mean", "train_std", "n"])
+            .map_elements(
+                lambda s: _fmt_mean_std(s["train_mean"], s["train_std"], s["n"]),
+                return_dtype=pl.Utf8,
+            )
+            .alias("train_fmt"),
+        )
+        .select("latent_dim", "method", "train_fmt")
+        .sort(["latent_dim", "method"])
+    )
+
+    wide = agg.pivot(
+        values="train_fmt",
+        index=["latent_dim"],
+        columns="method",
+        aggregate_function="first",
+    ).sort("latent_dim")
+
+    pdf = wide.fill_null("-").to_pandas().set_index("latent_dim")
+
+    # Order columns if both exist
+    desired = [
+        c for c in ["Autoencoder LeNet", "Autoencoder Efficient"] if c in pdf.columns
+    ]
+    if desired:
+        pdf = pdf.reindex(columns=desired)
+
+    latex = pdf.to_latex(
+        index=True,
+        escape=True,
+        na_rep="-",
+        multicolumn=True,
+        multicolumn_format="c",
+        bold_rows=False,
+        caption="Autoencoder pretraining runtime (seconds): mean ± std across folds.",
+        label="tab:ae_pretrain_runtimes",
+    )
+    return latex
+
+
+# ----------------------------
+# Main
+# ----------------------------
+def main():
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    # Main results
+    df = load_results_dataframe(RESULTS_ROOT, allow_cache=True)
+    if "config_json" not in df.columns:
+        df = df.with_columns(pl.lit(None).alias("config_json"))
+
+    # AE pretraining results
+    df_pre = load_pretraining_results_dataframe(RESULTS_ROOT, allow_cache=True)
+
+    # Build LaTeX tables
+    latex_train = make_training_runtime_table(df)
+    latex_test = make_inference_runtime_table(df)
+    latex_train_params = make_longform_train_table_with_params(df)
+    latex_ae = make_ae_pretraining_runtime_table(df_pre)
+
+    # Timestamped output dirs
+    ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    ts_dir = OUTPUT_DIR / "archive" / ts
+    ts_dir.mkdir(parents=True, exist_ok=True)
+
+    # Write files
+    (ts_dir / "train_runtimes.tex").write_text(latex_train)
+    (ts_dir / "test_runtimes.tex").write_text(latex_test)
+    (ts_dir / "train_runtimes_with_params.tex").write_text(latex_train_params)
+    (ts_dir / "ae_pretraining_runtimes.tex").write_text(latex_ae)
+
+    # Save script & mirror latest
+    script_path = Path(__file__)
+    shutil.copy2(script_path, ts_dir)
+
+    latest = OUTPUT_DIR / "latest"
+    latest.mkdir(exist_ok=True, parents=True)
+    for f in ts_dir.iterdir():
+        if f.is_file():
+            shutil.copy2(f, latest / f.name)
+
+    print(f"Saved LaTeX tables to: {ts_dir}")
+    print(f"Also updated: {latest}")
+
+
+if __name__ == "__main__":
+    main()