results inference discussion

This commit is contained in:
Jan Kowalczyk
2025-09-22 09:41:58 +02:00
parent 8e7c210872
commit 9ec73c5992
8 changed files with 234 additions and 107 deletions

View File

@@ -12,6 +12,8 @@ from typing import Dict, Optional, Tuple
import matplotlib.pyplot as plt
import numpy as np
import polars as pl
from load_results import load_inference_results_dataframe
from matplotlib.lines import Line2D
# =====================================
# User-configurable params
@@ -21,7 +23,7 @@ import polars as pl
INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy")
# Cached stats + manual labels (same location as your earlier scripts)
CACHE_PATH = Path("/home/fedex/mt/plots/data_anomalies_timeline")
CACHE_PATH = Path("/home/fedex/mt/plots/results_inference_exp_compare")
# .bag directory (used only to rebuild experiment order for mapping stats)
ALL_DATA_PATH = Path("/home/fedex/mt/data/subter")
@@ -35,8 +37,8 @@ EXPERIMENT_DEGRADED = "3_smoke_human_walking_2023-01-23"
# Shared model configuration for BOTH experiments
LATENT_DIM = 32
SEMI_NORMALS = 50
SEMI_ANOMALOUS = 10
SEMI_NORMALS = 0
SEMI_ANOMALOUS = 0
# Comparison y-axis mode for methods: "baseline_z" or "baseline_tailprob"
Y_MODE = "baseline_z"
@@ -75,7 +77,6 @@ output_datetime_path.mkdir(exist_ok=True, parents=True)
# =====================================
# Load Polars DataFrame via your helper
# =====================================
from load_results import load_inference_results_dataframe
df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT)
@@ -267,11 +268,11 @@ def pick_method_series(gdf: pl.DataFrame, label: str) -> Optional[np.ndarray]:
sel = gdf.filter(
(pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad")
)
elif label == "OCSVM (LeNet)":
elif label == "OCSVM":
sel = gdf.filter(
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm")
)
elif label == "IsoForest (LeNet)":
elif label == "Isolation Forest":
sel = gdf.filter(
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest")
)
@@ -312,8 +313,8 @@ def compare_two_experiments_progress(
methods = [
"DeepSAD (LeNet)",
"DeepSAD (efficient)",
"OCSVM (LeNet)",
"IsoForest (LeNet)",
"OCSVM",
"Isolation Forest",
]
g_clean = group_slice(
@@ -359,117 +360,211 @@ def compare_two_experiments_progress(
x = np.linspace(0, 100, PROGRESS_BINS)
# ---- Figure 1: scores only
fig1, ax1 = plt.subplots(figsize=(14, 6), constrained_layout=True)
for label in methods:
# Prep stats: absolute %, EMA, progress-binned
def prep_stat_pair(a, b):
if a is None or len(a) == 0 or b is None or len(b) == 0:
return None, None
a_s = ema(a.astype(float), EMA_ALPHA_STATS)
b_s = ema(b.astype(float), EMA_ALPHA_STATS)
return _bin_to_progress(a_s, PROGRESS_BINS), _bin_to_progress(
b_s, PROGRESS_BINS
)
mp_c, mp_d = prep_stat_pair(mp_clean, mp_deg)
ns_c, ns_d = prep_stat_pair(ns_clean, ns_deg)
# Colors & styles
COLOR_METHOD = "#d62728" # vibrant red
COLOR_MISSING = "#9ecae1" # pale blue
COLOR_NEAR = "#a1d99b" # pale green
LS_CLEAN = "--" # dashed for normal/clean
LS_DEG = "-" # solid for anomalous/degraded
LW_METHOD = 1.8
LW_METHOD_CLEAN = 1.2
LW_STATS = 1.6
ALPHA_STATS = 0.95
# Build the 2x2 subplots
fig, axes = plt.subplots(
4, 1, figsize=(12, 16), constrained_layout=True, sharex=False
)
axes = axes.ravel()
method_to_axidx = {
"DeepSAD (LeNet)": 0,
"DeepSAD (efficient)": 1,
"OCSVM": 2,
"Isolation Forest": 3,
}
stats_available = (
mp_c is not None and mp_d is not None and ns_c is not None and ns_d is not None
)
if not stats_available:
print("[WARN] One or both stats missing. Subplots will include methods only.")
for label, axidx in method_to_axidx.items():
ax = axes[axidx]
yc = curves_clean.get(label)
yd = curves_deg.get(label)
if yc is None or yd is None:
ax.text(
0.5, 0.5, "No data", ha="center", va="center", transform=ax.transAxes
)
ax.set_title(label)
ax.grid(True, alpha=0.3)
continue
ax1.plot(x, yd, label=f"{label} — degraded", linewidth=1.8)
ax1.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2)
ax1.set_xlabel("Progress through experiment (%)")
ax1.set_ylabel(y_label)
ax1.set_title(
f"Methods across experiments (progress-normalized)\n"
f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n"
f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS})"
)
ax1.grid(True, alpha=0.3)
ax1.legend(ncol=2, loc="upper right")
out1 = (
f"compare_{experiment_clean}_vs_{experiment_degraded}"
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
f"_methods_{y_mode}.png"
)
fig1.savefig(output_datetime_path / out1, dpi=150)
plt.close(fig1)
made = 1
# Left axis: method score (z or tailprob)
ax.plot(
x,
yd,
linestyle=LS_DEG,
color=COLOR_METHOD,
linewidth=LW_METHOD,
label=f"{label} — degraded",
)
ax.plot(
x,
yc,
linestyle=LS_CLEAN,
color=COLOR_METHOD,
linewidth=LW_METHOD_CLEAN,
label=f"{label} — clean",
)
ax.set_ylabel(y_label)
ax.set_title(label)
ax.grid(True, alpha=0.3)
if include_stats:
# Prep stats: absolute %, EMA, progress-binned
def prep_stat_pair(a, b):
if a is None or len(a) == 0 or b is None or len(b) == 0:
return None, None
a_s = ema(a.astype(float), EMA_ALPHA_STATS)
b_s = ema(b.astype(float), EMA_ALPHA_STATS)
return _bin_to_progress(a_s, PROGRESS_BINS), _bin_to_progress(
b_s, PROGRESS_BINS
)
mp_c, mp_d = prep_stat_pair(mp_clean, mp_deg)
ns_c, ns_d = prep_stat_pair(ns_clean, ns_deg)
# ---- Figure 2: + Missing points (%)
# Right axis #1 (closest to plot): Missing points (%)
axy_miss = ax.twinx()
if mp_c is not None and mp_d is not None:
fig2, ax2 = plt.subplots(figsize=(14, 6), constrained_layout=True)
axy2 = ax2.twinx()
for label in methods:
yc = curves_clean.get(label)
yd = curves_deg.get(label)
if yc is None or yd is None:
continue
ax2.plot(x, yd, label=f"{label} — degraded", linewidth=1.8)
ax2.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2)
axy2.plot(x, mp_d, linestyle="-.", label="Missing points — degraded (%)")
axy2.plot(x, mp_c, linestyle=":", label="Missing points — clean (%)")
ax2.set_xlabel("Progress through experiment (%)")
ax2.set_ylabel(y_label)
axy2.set_ylabel("Missing points (%)")
ax2.set_title(
f"Methods vs Missing points (absolute %) — progress-normalized\n"
f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n"
f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS}, stats α={EMA_ALPHA_STATS})"
axy_miss.plot(
x,
mp_d,
linestyle=LS_DEG,
color=COLOR_MISSING,
alpha=ALPHA_STATS,
linewidth=LW_STATS,
label="Missing points — degraded (%)",
)
ax2.grid(True, alpha=0.3)
L1, N1 = ax2.get_legend_handles_labels()
L2, N2 = axy2.get_legend_handles_labels()
ax2.legend(L1 + L2, N1 + N2, loc="upper right", ncol=2)
out2 = (
f"compare_{experiment_clean}_vs_{experiment_degraded}"
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
f"_{y_mode}_missing.png"
axy_miss.plot(
x,
mp_c,
linestyle=LS_CLEAN,
color=COLOR_MISSING,
alpha=ALPHA_STATS,
linewidth=LW_STATS,
label="Missing points — clean (%)",
)
fig2.savefig(output_datetime_path / out2, dpi=150)
plt.close(fig2)
made += 1
axy_miss.set_ylabel("Missing points (%)")
axy_miss.tick_params(axis="y") # , colors=COLOR_MISSING)
# axy_miss.spines["right"].set_edgecolor(COLOR_MISSING)
# Right axis #2 (slightly offset): Near-sensor points (%)
axy_near = ax.twinx()
# push this spine outward so it doesn't overlap the first right axis
axy_near.spines["right"].set_position(("axes", 1.08))
# make patch invisible so only spine shows
axy_near.set_frame_on(True)
axy_near.patch.set_visible(False)
# ---- Figure 3: + Near-sensor (%)
if ns_c is not None and ns_d is not None:
fig3, ax3 = plt.subplots(figsize=(14, 6), constrained_layout=True)
axy3 = ax3.twinx()
for label in methods:
yc = curves_clean.get(label)
yd = curves_deg.get(label)
if yc is None or yd is None:
continue
ax3.plot(x, yd, label=f"{label} — degraded", linewidth=1.8)
ax3.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2)
axy3.plot(x, ns_d, linestyle="-.", label="Near-sensor — degraded (%)")
axy3.plot(x, ns_c, linestyle=":", label="Near-sensor — clean (%)")
ax3.set_xlabel("Progress through experiment (%)")
ax3.set_ylabel(y_label)
axy3.set_ylabel("Near-sensor points (%)")
ax3.set_title(
f"Methods vs Near-sensor (absolute %) — progress-normalized\n"
f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n"
f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS}, stats α={EMA_ALPHA_STATS})"
axy_near.plot(
x,
ns_d,
linestyle=LS_DEG,
color=COLOR_NEAR,
alpha=ALPHA_STATS,
linewidth=LW_STATS,
label="Near-sensor — degraded (%)",
)
ax3.grid(True, alpha=0.3)
L1, N1 = ax3.get_legend_handles_labels()
L2, N2 = axy3.get_legend_handles_labels()
ax3.legend(L1 + L2, N1 + N2, loc="upper right", ncol=2)
out3 = (
f"compare_{experiment_clean}_vs_{experiment_degraded}"
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
f"_{y_mode}_nearsensor.png"
axy_near.plot(
x,
ns_c,
linestyle=LS_CLEAN,
color=COLOR_NEAR,
alpha=ALPHA_STATS,
linewidth=LW_STATS,
label="Near-sensor — clean (%)",
)
fig3.savefig(output_datetime_path / out3, dpi=150)
plt.close(fig3)
made += 1
axy_near.set_ylabel("Near-sensor points (%)")
axy_near.tick_params(axis="y") # , colors=COLOR_NEAR)
# axy_near.spines["right"].set_edgecolor(COLOR_NEAR)
return made
# Compose legend: show *method name* explicitly, plus the two stats
handles = [
Line2D(
[0],
[0],
color=COLOR_METHOD,
lw=LW_METHOD,
ls=LS_DEG,
label=f"{label} — degraded",
),
Line2D(
[0],
[0],
color=COLOR_METHOD,
lw=LW_METHOD_CLEAN,
ls=LS_CLEAN,
label=f"{label} — clean",
),
Line2D(
[0],
[0],
color=COLOR_MISSING,
lw=LW_STATS,
ls=LS_DEG,
label="Missing points — degraded",
),
Line2D(
[0],
[0],
color=COLOR_MISSING,
lw=LW_STATS,
ls=LS_CLEAN,
label="Missing points — clean",
),
Line2D(
[0],
[0],
color=COLOR_NEAR,
lw=LW_STATS,
ls=LS_DEG,
label="Near-sensor — degraded",
),
Line2D(
[0],
[0],
color=COLOR_NEAR,
lw=LW_STATS,
ls=LS_CLEAN,
label="Near-sensor — clean",
),
]
ax.legend(handles=handles, loc="upper left", fontsize=9, framealpha=0.9)
# Shared labels / super-title
for ax in axes:
ax.set_xlabel("Progress through experiment (%)")
fig.suptitle(
f"AD Method vs Stats Inference — progress-normalized\n"
f"Transform: z-score normalized to non-degraded experiment | EMA(α={EMA_ALPHA_METHODS})",
fontsize=14,
)
fig.tight_layout(rect=[0, 0, 1, 0.99])
out_name = (
f"4up_{EXPERIMENT_CLEAN}_vs_{EXPERIMENT_DEGRADED}"
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}_{y_mode}_methods_vs_stats.png"
)
fig.savefig(output_datetime_path / out_name, dpi=150)
plt.close(fig)
return 1
# =====================================