This commit is contained in:
Jan Kowalczyk
2025-09-22 08:15:54 +02:00
parent a20a4a0832
commit 8e7c210872
5 changed files with 454 additions and 194 deletions

View File

@@ -26,7 +26,8 @@ SCHEMA_STATIC = {
"eval": pl.Utf8, # "exp_based" | "manual_based"
"fold": pl.Int32,
# metrics
"auc": pl.Float64,
"roc_auc": pl.Float64, # <-- renamed from 'auc'
"prc_auc": pl.Float64, # <-- new
"ap": pl.Float64,
# per-sample scores: list of (idx, label, score)
"scores": pl.List(
@@ -114,6 +115,43 @@ SCHEMA_INFERENCE = {
# ------------------------------------------------------------
# Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
# ------------------------------------------------------------
def compute_prc_auc_from_curve(prc_curve: dict | None) -> float | None:
"""
Compute AUC of the Precision-Recall curve via trapezoidal rule.
Expects prc_curve = {"precision": [...], "recall": [...], "thr": [...] (optional)}.
Robust to NaNs, unsorted recall, and missing endpoints; returns np.nan if empty.
"""
if not prc_curve:
return np.nan
precision = np.asarray(prc_curve.get("precision", []), dtype=float)
recall = np.asarray(prc_curve.get("recall", []), dtype=float)
if precision.size == 0 or recall.size == 0:
return np.nan
mask = ~(np.isnan(precision) | np.isnan(recall))
precision, recall = precision[mask], recall[mask]
if recall.size == 0:
return np.nan
# Sort by recall, clip to [0,1]
order = np.argsort(recall)
recall = np.clip(recall[order], 0.0, 1.0)
precision = np.clip(precision[order], 0.0, 1.0)
# Ensure curve spans [0,1] in recall (hold precision constant at ends)
if recall[0] > 0.0:
recall = np.insert(recall, 0, 0.0)
precision = np.insert(precision, 0, precision[0])
if recall[-1] < 1.0:
recall = np.append(recall, 1.0)
precision = np.append(precision, precision[-1])
# Trapezoidal AUC
return float(np.trapezoid(precision, recall))
def _tolist(x):
if x is None:
return None
@@ -357,23 +395,28 @@ def rows_from_ocsvm_default(data: dict, evals: List[str]) -> Dict[str, dict]:
# Build the Polars DataFrame
# ------------------------------------------------------------
def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame:
"""
Walks experiment subdirs under `root`. For each (model, fold) it adds rows:
Columns (SCHEMA_STATIC):
network, latent_dim, semi_normals, semi_anomalous,
model, eval, fold,
auc, ap, scores{sample_idx,orig_label,score},
roc_curve{fpr,tpr,thr}, prc_curve{precision,recall,thr},
sample_indices, sample_labels, valid_mask,
train_time, test_time,
folder, k_fold_num
"""
if allow_cache:
cache = root / "results_cache.parquet"
if cache.exists():
try:
df = pl.read_parquet(cache)
print(f"[info] loaded cached results frame from {cache}")
# Backward-compat: old caches may have 'auc' but no 'roc_auc'/'prc_auc'
if "roc_auc" not in df.columns and "auc" in df.columns:
df = df.rename({"auc": "roc_auc"})
if "prc_auc" not in df.columns and "prc_curve" in df.columns:
df = df.with_columns(
pl.struct(
pl.col("prc_curve").struct.field("precision"),
pl.col("prc_curve").struct.field("recall"),
)
.map_elements(
lambda s: compute_prc_auc_from_curve(
{"precision": s[0], "recall": s[1]}
)
)
.alias("prc_auc")
)
return df
except Exception as e:
print(f"[warn] failed to load cache {cache}: {e}")
@@ -408,15 +451,17 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
continue
if model == "deepsad":
per_eval = rows_from_deepsad(data, EVALS) # eval -> dict
per_eval = rows_from_deepsad(data, EVALS)
elif model == "isoforest":
per_eval = rows_from_isoforest(data, EVALS) # eval -> dict
per_eval = rows_from_isoforest(data, EVALS)
elif model == "ocsvm":
per_eval = rows_from_ocsvm_default(data, EVALS) # eval -> dict
per_eval = rows_from_ocsvm_default(data, EVALS)
else:
per_eval = {}
for ev, vals in per_eval.items():
# compute prc_auc now (fast), rename auc->roc_auc
prc_auc_val = compute_prc_auc_from_curve(vals.get("prc"))
rows.append(
{
"network": network,
@@ -426,7 +471,8 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
"model": model,
"eval": ev,
"fold": fold,
"auc": vals["auc"],
"roc_auc": vals["auc"], # renamed
"prc_auc": prc_auc_val, # new
"ap": vals["ap"],
"scores": vals["scores"],
"roc_curve": vals["roc"],
@@ -442,20 +488,19 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
}
)
# If empty, return a typed empty frame
if not rows:
# Return a typed empty frame (new schema)
return pl.DataFrame(schema=SCHEMA_STATIC)
df = pl.DataFrame(rows, schema=SCHEMA_STATIC)
# Cast to efficient dtypes (categoricals etc.) no extra sanitation
# Cast to efficient dtypes (categoricals etc.)
df = df.with_columns(
pl.col("network", "model", "eval").cast(pl.Categorical),
pl.col(
"latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
).cast(pl.Int32),
pl.col("auc", "ap", "train_time", "test_time").cast(pl.Float64),
# NOTE: no cast on 'scores' here; it's already List(Struct) per schema.
pl.col("roc_auc", "prc_auc", "ap", "train_time", "test_time").cast(pl.Float64),
)
if allow_cache:

View File

@@ -7,10 +7,10 @@ from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import polars as pl
from matplotlib.lines import Line2D
# CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
from plot_scripts.load_results import load_results_dataframe
from load_results import load_results_dataframe
from matplotlib.lines import Line2D
# ----------------------------
# Config
@@ -26,6 +26,10 @@ SEMI_ANOMALOUS = 10
# Which evaluation columns to plot
EVALS = ["exp_based", "manual_based"]
EVALS_LABELS = {
"exp_based": "Experiment-Label-Based",
"manual_based": "Manually-Labeled",
}
# Latent dimensions to show as 7 subplots
LATENT_DIMS = [32, 64, 128, 256, 512, 768, 1024]
@@ -188,7 +192,7 @@ def plot_grid_from_df(
Create a 2x4 grid of subplots, one per latent dim; 8th panel holds legend.
kind: 'roc' or 'prc'
"""
fig_title = f"{kind.upper()}{eval_type} (semi = {semi_normals}/{semi_anomalous})"
fig_title = f"{kind.upper()}{EVALS_LABELS[eval_type]} (Semi-Labeling Regime = {semi_normals}/{semi_anomalous})"
fig, axes = _ensure_dim_axes(fig_title)
# plotting order & colors
@@ -213,7 +217,7 @@ def plot_grid_from_df(
if i >= 7:
break # last slot reserved for legend
ax = axes[i]
ax.set_title(f"latent_dim = {dim}")
ax.set_title(f"Latent Dim. = {dim}")
ax.grid(True, alpha=0.3)
if kind == "roc":

View File

@@ -5,6 +5,7 @@ from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
import numpy as np
import polars as pl
# CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
@@ -41,6 +42,17 @@ DECIMALS = 3 # cells look like 1.000 or 0.928 (3 decimals)
# ----------------------------
# Helpers
# ----------------------------
def _fmt_mean_std(mean: float | None, std: float | None) -> str:
"""Format mean ± std with 3 decimals (leading zero), or '--' if missing."""
if mean is None or not (mean == mean): # NaN check
return "--"
if std is None or not (std == std):
return f"{mean:.3f}"
return f"{mean:.3f}$\\,\\pm\\,{std:.3f}$"
def _with_net_label(df: pl.DataFrame) -> pl.DataFrame:
"""Add a canonical 'net_label' column like the plotting script (LeNet/Efficient/fallback)."""
return df.with_columns(
@@ -68,7 +80,7 @@ def _filter_base(df: pl.DataFrame) -> pl.DataFrame:
"net_label",
"latent_dim",
"fold",
"auc",
"ap",
"eval",
"semi_normals",
"semi_anomalous",
@@ -84,7 +96,7 @@ class Cell:
def _compute_cells(df: pl.DataFrame) -> dict[tuple[str, int, str, str, int, int], Cell]:
"""
Compute per-(eval, latent_dim, model, net_label, semi_normals, semi_anomalous)
mean/std for AUC across folds.
mean/std for AP across folds.
"""
if df.is_empty():
return {}
@@ -107,9 +119,7 @@ def _compute_cells(df: pl.DataFrame) -> dict[tuple[str, int, str, str, int, int]
"semi_anomalous",
]
)
.agg(
pl.col("auc").mean().alias("mean_auc"), pl.col("auc").std().alias("std_auc")
)
.agg(pl.col("ap").mean().alias("mean_ap"), pl.col("ap").std().alias("std_ap"))
.to_dicts()
)
@@ -123,10 +133,96 @@ def _compute_cells(df: pl.DataFrame) -> dict[tuple[str, int, str, str, int, int]
int(row["semi_normals"]),
int(row["semi_anomalous"]),
)
out[key] = Cell(mean=row.get("mean_auc"), std=row.get("std_auc"))
out[key] = Cell(mean=row.get("mean_ap"), std=row.get("std_ap"))
return out
def method_label(model: str, net_label: str) -> str:
"""Map (model, net_label) to the four method names used in headers/caption."""
if model == "deepsad" and net_label == "LeNet":
return "DeepSAD (LeNet)"
if model == "deepsad" and net_label == "Efficient":
return "DeepSAD (Efficient)"
if model == "isoforest":
return "IsoForest"
if model == "ocsvm":
return "OC-SVM"
# ignore anything else (e.g., other backbones)
return ""
def per_method_median_std_from_cells(
cells: dict[tuple[str, int, str, str, int, int], Cell],
) -> dict[str, float]:
"""Compute the median std across all cells, per method."""
stds_by_method: dict[str, list[float]] = {
"DeepSAD (LeNet)": [],
"DeepSAD (Efficient)": [],
"IsoForest": [],
"OC-SVM": [],
}
for key, cell in cells.items():
(ev, dim, model, net, semi_n, semi_a) = key
name = method_label(model, net)
if name and (cell.std is not None) and (cell.std == cell.std): # not NaN
stds_by_method[name].append(cell.std)
return {
name: float(np.median(vals)) if vals else float("nan")
for name, vals in stds_by_method.items()
}
def per_method_max_std_from_cells(
cells: dict[tuple[str, int, str, str, int, int], Cell],
) -> tuple[dict[str, float], dict[str, tuple]]:
"""
Scan the aggregated 'cells' and return:
- max_std_by_method: dict {"DeepSAD (LeNet)": 0.037, ...}
- argmax_key_by_method: which cell (eval, dim, model, net, semi_n, semi_a) produced that max
Only considers the four methods shown in the table.
"""
max_std_by_method: dict[str, float] = {
"DeepSAD (LeNet)": float("nan"),
"DeepSAD (Efficient)": float("nan"),
"IsoForest": float("nan"),
"OC-SVM": float("nan"),
}
argmax_key_by_method: dict[str, tuple] = {}
for key, cell in cells.items():
(ev, dim, model, net, semi_n, semi_a) = key
name = method_label(model, net)
if name == "" or cell.std is None or not (cell.std == cell.std): # empty/NaN
continue
cur = max_std_by_method.get(name, float("nan"))
if (cur != cur) or (cell.std > cur): # handle NaN initial
max_std_by_method[name] = cell.std
argmax_key_by_method[name] = key
# Replace remaining NaNs with 0.0 for nice formatting
for k, v in list(max_std_by_method.items()):
if not (v == v): # NaN
max_std_by_method[k] = 0.0
return max_std_by_method, argmax_key_by_method
def _fmt_val(val: float | None) -> str:
"""
Format value as:
- '--' if None/NaN
- '1.0' if exactly 1 (within 1e-9)
- '.xx' otherwise (2 decimals, no leading 0)
"""
if val is None or not (val == val): # None or NaN
return "--"
if abs(val - 1.0) < 1e-9:
return "1.0"
return f"{val:.2f}".lstrip("0")
def _fmt_mean(mean: float | None) -> str:
return "--" if (mean is None or not (mean == mean)) else f"{mean:.{DECIMALS}f}"
@@ -150,6 +246,61 @@ def _bold_best_mask_display(values: list[float | None], decimals: int) -> list[b
return [(v is not None and v == maxv) for v in rounded]
def _build_exp_based_table(
cells: dict[tuple[str, int, str, str, int, int], Cell],
*,
semi_labeling_regimes: list[tuple[int, int]],
) -> str:
"""
Build LaTeX table with mean ± std values for experiment-based evaluation only.
"""
header_cols = [
r"\rotheader{DeepSAD\\(LeNet)}",
r"\rotheader{DeepSAD\\(Efficient)}",
r"\rotheader{IsoForest}",
r"\rotheader{OC-SVM}",
]
lines: list[str] = []
lines.append(r"\begin{table}[t]")
lines.append(r"\centering")
lines.append(r"\setlength{\tabcolsep}{4pt}")
lines.append(r"\renewcommand{\arraystretch}{1.2}")
lines.append(r"\begin{tabularx}{\textwidth}{c*{4}{Y}}")
lines.append(r"\toprule")
lines.append(r"Latent Dim. & " + " & ".join(header_cols) + r" \\")
lines.append(r"\midrule")
for idx, (semi_n, semi_a) in enumerate(semi_labeling_regimes):
# regime label row
lines.append(
rf"\multicolumn{{5}}{{l}}{{\textbf{{Labeling regime: }}\(\mathbf{{{semi_n}/{semi_a}}}\)}} \\"
)
lines.append(r"\addlinespace[2pt]")
for dim in LATENT_DIMS:
row_vals = []
for model, net in METHOD_COLUMNS:
key = ("exp_based", dim, model, net, semi_n, semi_a)
cell = cells.get(key, Cell(None, None))
row_vals.append(_fmt_mean_std(cell.mean, cell.std))
lines.append(f"{dim} & " + " & ".join(row_vals) + r" \\")
if idx < len(semi_labeling_regimes) - 1:
lines.append(r"\midrule")
lines.append(r"\bottomrule")
lines.append(r"\end{tabularx}")
lines.append(
r"\caption{AP means $\pm$ std across 5 folds for experiment-based evaluation only, grouped by labeling regime.}"
)
lines.append(r"\end{table}")
return "\n".join(lines)
def _build_single_table(
cells: dict[tuple[str, int, str, str, int, int], Cell],
*,
@@ -224,6 +375,12 @@ def _build_single_table(
cell = cells.get(key, Cell(None, None))
means_left.append(cell.mean)
cell_strs_left.append(_fmt_mean(cell.mean))
# mean_str = _fmt_val(cell.mean)
# std_str = _fmt_val(cell.std)
# if mean_str == "--":
# cell_strs_left.append("--")
# else:
# cell_strs_left.append(f"{mean_str} $\\textpm$ {std_str}")
push_std(cell.std)
# Right group: manual_based
@@ -233,6 +390,12 @@ def _build_single_table(
cell = cells.get(key, Cell(None, None))
means_right.append(cell.mean)
cell_strs_right.append(_fmt_mean(cell.mean))
# mean_str = _fmt_val(cell.mean)
# std_str = _fmt_val(cell.std)
# if mean_str == "--":
# cell_strs_right.append("--")
# else:
# cell_strs_right.append(f"{mean_str} $\\textpm$ {std_str}")
push_std(cell.std)
# Bolding per group based on displayed precision
@@ -264,11 +427,23 @@ def _build_single_table(
lines.append(r"\bottomrule")
lines.append(r"\end{tabularx}")
# Caption with max std (not shown in table)
max_std_str = "n/a" if max_std is None else f"{max_std:.{DECIMALS}f}"
# Compute per-method max std across everything included in the table
# max_std_by_method, argmax_key = per_method_max_std_from_cells(cells)
median_std_by_method = per_method_median_std_from_cells(cells)
# Optional: print where each max came from (helps verify)
for name, v in median_std_by_method.items():
print(f"[max-std] {name}: {v:.3f}")
cap_parts = []
for name in ["DeepSAD (LeNet)", "DeepSAD (Efficient)", "IsoForest", "OC-SVM"]:
v = median_std_by_method.get(name, 0.0)
cap_parts.append(f"{name} {v:.3f}")
cap_str = "; ".join(cap_parts)
lines.append(
rf"\caption{{AUC means across 5 folds for both evaluations, grouped by labeling regime. "
rf"Maximum observed standard deviation across all cells (not shown in table): {max_std_str}.}}"
rf"\caption{{AP means across 5 folds for both evaluations, grouped by labeling regime. "
rf"Maximum observed standard deviation per method (not shown in table): {cap_str}.}}"
)
lines.append(r"\end{table}")
@@ -296,10 +471,17 @@ def main():
cells, semi_labeling_regimes=SEMI_LABELING_REGIMES
)
out_name = "auc_table_all_evals_all_regimes.tex"
out_name = "ap_table_all_evals_all_regimes.tex"
out_path = ts_dir / out_name
out_path.write_text(tex, encoding="utf-8")
# Build experiment-based table with mean ± std
tex_exp = _build_exp_based_table(cells, semi_labeling_regimes=SEMI_LABELING_REGIMES)
out_name_exp = "ap_table_exp_based_mean_std.tex"
out_path_exp = ts_dir / out_name_exp
out_path_exp.write_text(tex_exp, encoding="utf-8")
# Copy this script to preserve the code used for the outputs
script_path = Path(__file__)
shutil.copy2(script_path, ts_dir / script_path.name)