inference plots, results structure wip
This commit is contained in:
BIN
thesis/Main.pdf
BIN
thesis/Main.pdf
Binary file not shown.
@@ -1426,7 +1426,15 @@ Together, these results provide a comprehensive overview of the computational re
|
|||||||
|
|
||||||
\newchapter{results_discussion}{Results and Discussion}
|
\newchapter{results_discussion}{Results and Discussion}
|
||||||
\newsection{results}{Results}
|
\newsection{results}{Results}
|
||||||
\todo[inline]{some results, ROC curves, for both global and local}
|
|
||||||
|
% \threadtodo
|
||||||
|
% {give overview about hardware setup and how long things take to train}
|
||||||
|
% {we know what we trained but not how long that takes}
|
||||||
|
% {table of hardware and of how long different trainings took}
|
||||||
|
% {experiment setup understood $\rightarrow$ what were the experiments' results}
|
||||||
|
|
||||||
|
\todo{}
|
||||||
|
|
||||||
|
|
||||||
\newsection{hyperparameter_analysis}{Hyperparameter Analysis}
|
\newsection{hyperparameter_analysis}{Hyperparameter Analysis}
|
||||||
\todo[inline]{result for different amounts of labeled data}
|
\todo[inline]{result for different amounts of labeled data}
|
||||||
|
|||||||
@@ -1,269 +0,0 @@
|
|||||||
import json
|
|
||||||
import pickle
|
|
||||||
import shutil
|
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# User-configurable params
|
|
||||||
# =========================
|
|
||||||
# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
|
|
||||||
EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
|
|
||||||
|
|
||||||
# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
|
|
||||||
methods_scores_path = Path(
|
|
||||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Root data path containing .bag files used to build the cached stats
|
|
||||||
all_data_path = Path("/home/fedex/mt/data/subter")
|
|
||||||
|
|
||||||
# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
|
|
||||||
output_path = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed")
|
|
||||||
|
|
||||||
# Cache (stats + labels) directory — same as your original script
|
|
||||||
cache_path = output_path
|
|
||||||
|
|
||||||
# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
|
|
||||||
data_resolution = 32 * 2048
|
|
||||||
|
|
||||||
# Frames per second for x-axis time
|
|
||||||
FPS = 10.0
|
|
||||||
|
|
||||||
# Whether to try to align score sign so that higher = more degraded.
|
|
||||||
ALIGN_SCORE_DIRECTION = True
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Smoothing configuration
|
|
||||||
# =========================
|
|
||||||
# Options: "none", "moving_average", "gaussian", "ema"
|
|
||||||
SMOOTHING_METHOD = "ema"
|
|
||||||
|
|
||||||
# Moving average window size (in frames). Use odd number for symmetry; <=1 disables.
|
|
||||||
MA_WINDOW = 11
|
|
||||||
|
|
||||||
# Gaussian sigma (in frames). ~2-3 frames is mild smoothing.
|
|
||||||
GAUSSIAN_SIGMA = 2.0
|
|
||||||
|
|
||||||
# Exponential moving average factor in (0,1]; smaller = smoother. ~0.2 is a good start.
|
|
||||||
EMA_ALPHA = 0.1
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Setup output folders
|
|
||||||
# =========================
|
|
||||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
||||||
latest_folder_path = output_path / "latest"
|
|
||||||
archive_folder_path = output_path / "archive"
|
|
||||||
output_datetime_path = output_path / datetime_folder_name
|
|
||||||
|
|
||||||
output_path.mkdir(exist_ok=True, parents=True)
|
|
||||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
|
||||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
|
||||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Discover experiments
|
|
||||||
# =========================
|
|
||||||
normal_experiment_paths, anomaly_experiment_paths = [], []
|
|
||||||
for bag_file_path in all_data_path.iterdir():
|
|
||||||
if bag_file_path.suffix != ".bag":
|
|
||||||
continue
|
|
||||||
if "smoke" in bag_file_path.name:
|
|
||||||
anomaly_experiment_paths.append(bag_file_path)
|
|
||||||
else:
|
|
||||||
normal_experiment_paths.append(bag_file_path)
|
|
||||||
|
|
||||||
normal_experiment_paths = sorted(
|
|
||||||
normal_experiment_paths, key=lambda p: p.stat().st_size
|
|
||||||
)
|
|
||||||
anomaly_experiment_paths = sorted(
|
|
||||||
anomaly_experiment_paths, key=lambda p: p.stat().st_size
|
|
||||||
)
|
|
||||||
|
|
||||||
# Find experiment
|
|
||||||
exp_path, exp_is_anomaly = None, None
|
|
||||||
for p in anomaly_experiment_paths:
|
|
||||||
if p.stem == EXPERIMENT_NAME:
|
|
||||||
exp_path, exp_is_anomaly = p, True
|
|
||||||
break
|
|
||||||
if exp_path is None:
|
|
||||||
for p in normal_experiment_paths:
|
|
||||||
if p.stem == EXPERIMENT_NAME:
|
|
||||||
exp_path, exp_is_anomaly = p, False
|
|
||||||
break
|
|
||||||
if exp_path is None:
|
|
||||||
raise FileNotFoundError(f"Experiment '{EXPERIMENT_NAME}' not found")
|
|
||||||
|
|
||||||
exp_index = (
|
|
||||||
anomaly_experiment_paths.index(exp_path)
|
|
||||||
if exp_is_anomaly
|
|
||||||
else normal_experiment_paths.index(exp_path)
|
|
||||||
)
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Load cached statistical data
|
|
||||||
# =========================
|
|
||||||
with open(cache_path / "missing_points.pkl", "rb") as f:
|
|
||||||
missing_points_normal, missing_points_anomaly = pickle.load(f)
|
|
||||||
with open(cache_path / "particles_near_sensor_counts_500.pkl", "rb") as f:
|
|
||||||
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
|
|
||||||
|
|
||||||
if exp_is_anomaly:
|
|
||||||
missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
|
|
||||||
near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
|
|
||||||
else:
|
|
||||||
missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
|
|
||||||
near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
|
|
||||||
|
|
||||||
missing_points_pct = (missing_points_series / data_resolution) * 100.0
|
|
||||||
near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Load manual anomaly frame borders
|
|
||||||
# =========================
|
|
||||||
manually_labeled_anomaly_frames = {}
|
|
||||||
labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
|
|
||||||
if labels_json_path.exists():
|
|
||||||
with open(labels_json_path, "r") as f:
|
|
||||||
labeled_json = json.load(f)
|
|
||||||
for file in labeled_json.get("files", []):
|
|
||||||
manually_labeled_anomaly_frames[file["filename"]] = (
|
|
||||||
file.get("semi_target_begin_frame"),
|
|
||||||
file.get("semi_target_end_frame"),
|
|
||||||
)
|
|
||||||
exp_npy_filename = exp_path.with_suffix(".npy").name
|
|
||||||
anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
|
|
||||||
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Load method scores and normalize
|
|
||||||
# =========================
|
|
||||||
def zscore_1d(x, eps=1e-12):
|
|
||||||
mu, sigma = np.mean(x), np.std(x, ddof=0)
|
|
||||||
return np.zeros_like(x) if sigma < eps else (x - mu) / sigma
|
|
||||||
|
|
||||||
|
|
||||||
def maybe_align_direction(z, window):
|
|
||||||
start, end = window
|
|
||||||
if start is None or end is None:
|
|
||||||
return z
|
|
||||||
inside_mean = np.mean(z[start:end]) if end > start else 0
|
|
||||||
outside = np.concatenate([z[:start], z[end:]]) if start > 0 or end < len(z) else []
|
|
||||||
outside_mean = np.mean(outside) if len(outside) else inside_mean
|
|
||||||
return z if inside_mean >= outside_mean else -z
|
|
||||||
|
|
||||||
|
|
||||||
methods = ["deepsad", "ocsvm", "isoforest"]
|
|
||||||
method_zscores = {}
|
|
||||||
for m in methods:
|
|
||||||
s = np.load(methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy")
|
|
||||||
s = np.asarray(s, dtype=float).ravel()
|
|
||||||
n = min(len(s), len(missing_points_pct))
|
|
||||||
s, missing_points_pct, near_sensor_pct = (
|
|
||||||
s[:n],
|
|
||||||
missing_points_pct[:n],
|
|
||||||
near_sensor_pct[:n],
|
|
||||||
)
|
|
||||||
z = zscore_1d(s)
|
|
||||||
if ALIGN_SCORE_DIRECTION:
|
|
||||||
z = maybe_align_direction(z, anomaly_window)
|
|
||||||
method_zscores[m] = z
|
|
||||||
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Smoothing
|
|
||||||
# =========================
|
|
||||||
def moving_average(x, window):
|
|
||||||
if window <= 1:
|
|
||||||
return x
|
|
||||||
if window % 2 == 0:
|
|
||||||
window += 1
|
|
||||||
return np.convolve(x, np.ones(window) / window, mode="same")
|
|
||||||
|
|
||||||
|
|
||||||
def gaussian_smooth(x, sigma):
|
|
||||||
from scipy.ndimage import gaussian_filter1d
|
|
||||||
|
|
||||||
return gaussian_filter1d(x, sigma=sigma, mode="nearest") if sigma > 0 else x
|
|
||||||
|
|
||||||
|
|
||||||
def ema(x, alpha):
|
|
||||||
y = np.empty_like(x)
|
|
||||||
y[0] = x[0]
|
|
||||||
for i in range(1, len(x)):
|
|
||||||
y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
|
|
||||||
return y
|
|
||||||
|
|
||||||
|
|
||||||
def apply_smoothing(x):
|
|
||||||
m = SMOOTHING_METHOD.lower()
|
|
||||||
if m == "none":
|
|
||||||
return x
|
|
||||||
if m == "moving_average":
|
|
||||||
return moving_average(x, MA_WINDOW)
|
|
||||||
if m == "gaussian":
|
|
||||||
return gaussian_smooth(x, GAUSSIAN_SIGMA)
|
|
||||||
if m == "ema":
|
|
||||||
return ema(x, EMA_ALPHA)
|
|
||||||
raise ValueError(f"Unknown SMOOTHING_METHOD: {SMOOTHING_METHOD}")
|
|
||||||
|
|
||||||
|
|
||||||
smoothed_z = {k: apply_smoothing(v) for k, v in method_zscores.items()}
|
|
||||||
smoothed_missing = apply_smoothing(missing_points_pct)
|
|
||||||
smoothed_near = apply_smoothing(near_sensor_pct)
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Plot
|
|
||||||
# =========================
|
|
||||||
t = np.arange(len(missing_points_pct)) / FPS
|
|
||||||
|
|
||||||
|
|
||||||
def plot_series(y2, ylabel, fname, title_suffix):
|
|
||||||
fig, axz = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
|
||||||
axy = axz.twinx()
|
|
||||||
for m in methods:
|
|
||||||
axz.plot(t, smoothed_z[m], label=f"{m} (z)")
|
|
||||||
axy.plot(t, y2, linestyle="--", label=ylabel)
|
|
||||||
start, end = anomaly_window
|
|
||||||
if start and end:
|
|
||||||
axz.axvline(start / FPS, linestyle=":", alpha=0.6)
|
|
||||||
axz.axvline(end / FPS, linestyle=":", alpha=0.6)
|
|
||||||
axz.set_xlabel("Time (s)")
|
|
||||||
axz.set_ylabel("Anomaly score (z)")
|
|
||||||
axy.set_ylabel(ylabel)
|
|
||||||
axz.set_title(f"{EXPERIMENT_NAME}\n{title_suffix}\nSmoothing: {SMOOTHING_METHOD}")
|
|
||||||
lines1, labels1 = axz.get_legend_handles_labels()
|
|
||||||
lines2, labels2 = axy.get_legend_handles_labels()
|
|
||||||
axz.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
|
|
||||||
axz.grid(True, alpha=0.3)
|
|
||||||
fig.savefig(output_datetime_path / fname, dpi=150)
|
|
||||||
plt.close(fig)
|
|
||||||
|
|
||||||
|
|
||||||
plot_series(
|
|
||||||
smoothed_missing,
|
|
||||||
"Missing points (%)",
|
|
||||||
f"{EXPERIMENT_NAME}_zscores_vs_missing.png",
|
|
||||||
"Degradation vs Missing Points",
|
|
||||||
)
|
|
||||||
plot_series(
|
|
||||||
smoothed_near,
|
|
||||||
"Near-sensor points (%)",
|
|
||||||
f"{EXPERIMENT_NAME}_zscores_vs_near.png",
|
|
||||||
"Degradation vs Near-Sensor Points (<0.5m)",
|
|
||||||
)
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Save & archive
|
|
||||||
# =========================
|
|
||||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
|
||||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
|
||||||
for f in output_datetime_path.iterdir():
|
|
||||||
shutil.copy2(f, latest_folder_path)
|
|
||||||
shutil.copy2(__file__, output_datetime_path)
|
|
||||||
shutil.copy2(__file__, latest_folder_path)
|
|
||||||
shutil.move(output_datetime_path, archive_folder_path)
|
|
||||||
print("Done. Plots saved and archived.")
|
|
||||||
459
tools/plot_scripts/results_inference_timeline_smoothed.py
Normal file
459
tools/plot_scripts/results_inference_timeline_smoothed.py
Normal file
@@ -0,0 +1,459 @@
|
|||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# User-configurable params
|
||||||
|
# =====================================
|
||||||
|
# Root directory that contains per-run outputs (your loader will scan this)
|
||||||
|
INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy")
|
||||||
|
|
||||||
|
# Path that holds cached stats (same as before)
|
||||||
|
CACHE_PATH = Path("/home/fedex/mt/plots/data_anomalies_timeline")
|
||||||
|
|
||||||
|
# Root data path containing .bag files to rebuild ordering (for stats mapping)
|
||||||
|
ALL_DATA_PATH = Path("/home/fedex/mt/data/subter")
|
||||||
|
|
||||||
|
# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
|
||||||
|
OUTPUT_PATH = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed")
|
||||||
|
|
||||||
|
# Frames per second for x-axis time
|
||||||
|
FPS = 10.0
|
||||||
|
|
||||||
|
# ---- Smoothing: EMA only ----
|
||||||
|
EMA_ALPHA = 0.1 # models (0,1], smaller = smoother
|
||||||
|
STATS_EMA_ALPHA = 0.1 # stats (absolute %); tweak independently if desired
|
||||||
|
|
||||||
|
# Whether to z-score per-curve for the model methods (recommended)
|
||||||
|
Z_SCORE_MODELS = True
|
||||||
|
|
||||||
|
# If some model's series is longer/shorter than others in a group, align to min length
|
||||||
|
ALIGN_TO_MIN_LENGTH = True
|
||||||
|
|
||||||
|
# Whether to try to align model score sign so that higher = more degraded using manual window
|
||||||
|
ALIGN_SCORE_DIRECTION = True
|
||||||
|
|
||||||
|
# LiDAR points per frame (for stats -> percent)
|
||||||
|
DATA_RESOLUTION = 32 * 2048
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Setup output folders
|
||||||
|
# =====================================
|
||||||
|
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
|
latest_folder_path = OUTPUT_PATH / "latest"
|
||||||
|
archive_folder_path = OUTPUT_PATH / "archive"
|
||||||
|
output_datetime_path = OUTPUT_PATH / datetime_folder_name
|
||||||
|
|
||||||
|
OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
|
||||||
|
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Load Polars DataFrame via your helper
|
||||||
|
# =====================================
|
||||||
|
from load_results import load_inference_results_dataframe
|
||||||
|
|
||||||
|
df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT)
|
||||||
|
|
||||||
|
# sanity
|
||||||
|
expected_cols = {
|
||||||
|
"experiment",
|
||||||
|
"network",
|
||||||
|
"latent_dim",
|
||||||
|
"semi_normals",
|
||||||
|
"semi_anomalous",
|
||||||
|
"model",
|
||||||
|
"scores",
|
||||||
|
"folder",
|
||||||
|
"config_json",
|
||||||
|
}
|
||||||
|
missing_cols = expected_cols - set(df.columns)
|
||||||
|
if missing_cols:
|
||||||
|
raise KeyError(f"DataFrame missing required columns: {sorted(missing_cols)}")
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Rebuild experiment → stats mapping (like your original)
|
||||||
|
# =====================================
|
||||||
|
def rebuild_experiment_index():
|
||||||
|
normals, anomalies = [], []
|
||||||
|
if not ALL_DATA_PATH.exists():
|
||||||
|
return [], [], {}
|
||||||
|
for bag in ALL_DATA_PATH.iterdir():
|
||||||
|
if bag.suffix != ".bag":
|
||||||
|
continue
|
||||||
|
if "smoke" in bag.name:
|
||||||
|
anomalies.append(bag)
|
||||||
|
else:
|
||||||
|
normals.append(bag)
|
||||||
|
normals = sorted(normals, key=lambda p: p.stat().st_size)
|
||||||
|
anomalies = sorted(anomalies, key=lambda p: p.stat().st_size)
|
||||||
|
mapping = {}
|
||||||
|
for i, p in enumerate(normals):
|
||||||
|
mapping[p.stem] = (False, i, p)
|
||||||
|
for i, p in enumerate(anomalies):
|
||||||
|
mapping[p.stem] = (True, i, p)
|
||||||
|
return normals, anomalies, mapping
|
||||||
|
|
||||||
|
|
||||||
|
normal_paths, anomaly_paths, exp_map = rebuild_experiment_index()
|
||||||
|
|
||||||
|
# Load cached statistical data (+ manual labels)
|
||||||
|
missing_points_cache = CACHE_PATH / "missing_points.pkl"
|
||||||
|
near_sensor_cache = CACHE_PATH / "particles_near_sensor_counts_500.pkl"
|
||||||
|
labels_json_path = CACHE_PATH / "manually_labeled_anomaly_frames.json"
|
||||||
|
|
||||||
|
missing_points_normal = missing_points_anomaly = None
|
||||||
|
near_sensor_normal = near_sensor_anomaly = None
|
||||||
|
if missing_points_cache.exists():
|
||||||
|
with open(missing_points_cache, "rb") as f:
|
||||||
|
missing_points_normal, missing_points_anomaly = pickle.load(f)
|
||||||
|
if near_sensor_cache.exists():
|
||||||
|
with open(near_sensor_cache, "rb") as f:
|
||||||
|
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
|
||||||
|
|
||||||
|
manual_windows = {}
|
||||||
|
if labels_json_path.exists():
|
||||||
|
with open(labels_json_path, "r") as f:
|
||||||
|
labeled_json = json.load(f)
|
||||||
|
for file in labeled_json.get("files", []):
|
||||||
|
manual_windows[file["filename"]] = (
|
||||||
|
file.get("semi_target_begin_frame"),
|
||||||
|
file.get("semi_target_end_frame"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_stats_for_experiment(exp_name: str):
|
||||||
|
"""
|
||||||
|
Returns:
|
||||||
|
missing_pct (np.ndarray) | None,
|
||||||
|
near_pct (np.ndarray) | None,
|
||||||
|
anomaly_window (tuple(start,end)) | (None,None)
|
||||||
|
"""
|
||||||
|
if exp_name not in exp_map:
|
||||||
|
return None, None, (None, None)
|
||||||
|
is_anomaly, idx, path = exp_map[exp_name]
|
||||||
|
missing = None
|
||||||
|
near = None
|
||||||
|
if missing_points_normal is not None and missing_points_anomaly is not None:
|
||||||
|
series = (
|
||||||
|
missing_points_anomaly[idx] if is_anomaly else missing_points_normal[idx]
|
||||||
|
)
|
||||||
|
missing = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||||
|
if near_sensor_normal is not None and near_sensor_anomaly is not None:
|
||||||
|
series = near_sensor_anomaly[idx] if is_anomaly else near_sensor_normal[idx]
|
||||||
|
near = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||||
|
npy_key = path.with_suffix(".npy").name
|
||||||
|
window = manual_windows.get(npy_key, (None, None))
|
||||||
|
return missing, near, window
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Helpers
|
||||||
|
# =====================================
|
||||||
|
def to_np(a):
|
||||||
|
"""Convert a Polars list cell to a 1D NumPy array of float."""
|
||||||
|
if a is None:
|
||||||
|
return None
|
||||||
|
return np.asarray(a, dtype=float).ravel()
|
||||||
|
|
||||||
|
|
||||||
|
def zscore_1d(x, eps=1e-12):
|
||||||
|
if x is None or len(x) == 0:
|
||||||
|
return x
|
||||||
|
mu = float(np.mean(x))
|
||||||
|
sigma = float(np.std(x, ddof=0))
|
||||||
|
return np.zeros_like(x) if sigma < eps else (x - mu) / sigma
|
||||||
|
|
||||||
|
|
||||||
|
def ema(x, alpha):
|
||||||
|
if x is None or len(x) == 0:
|
||||||
|
return x
|
||||||
|
y = np.empty_like(x, dtype=float)
|
||||||
|
y[0] = x[0]
|
||||||
|
for i in range(1, len(x)):
|
||||||
|
y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
|
def apply_ema_models(x):
|
||||||
|
return ema(x, EMA_ALPHA)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_ema_stats(x):
|
||||||
|
return ema(x, STATS_EMA_ALPHA)
|
||||||
|
|
||||||
|
|
||||||
|
def align_lengths(series_dict):
|
||||||
|
"""Truncate all series to the shortest available length."""
|
||||||
|
valid_lengths = [
|
||||||
|
len(v) for v in series_dict.values() if v is not None and len(v) > 0
|
||||||
|
]
|
||||||
|
if not valid_lengths:
|
||||||
|
return series_dict
|
||||||
|
min_len = min(valid_lengths)
|
||||||
|
return {k: (v[:min_len] if v is not None else None) for k, v in series_dict.items()}
|
||||||
|
|
||||||
|
|
||||||
|
def maybe_align_direction(z: np.ndarray, window):
|
||||||
|
"""Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist."""
|
||||||
|
if z is None:
|
||||||
|
return z
|
||||||
|
start, end = window
|
||||||
|
if start is None or end is None:
|
||||||
|
return z
|
||||||
|
start = int(max(0, start))
|
||||||
|
end = int(min(len(z), end))
|
||||||
|
if end <= start or end > len(z):
|
||||||
|
return z
|
||||||
|
inside_mean = float(np.mean(z[start:end]))
|
||||||
|
if start == 0 and end == len(z):
|
||||||
|
return z
|
||||||
|
outside_parts = []
|
||||||
|
if start > 0:
|
||||||
|
outside_parts.append(z[:start])
|
||||||
|
if end < len(z):
|
||||||
|
outside_parts.append(z[end:])
|
||||||
|
if not outside_parts:
|
||||||
|
return z
|
||||||
|
outside_mean = float(np.mean(np.concatenate(outside_parts)))
|
||||||
|
return z if inside_mean >= outside_mean else -z
|
||||||
|
|
||||||
|
|
||||||
|
def safe_title(s: str) -> str:
|
||||||
|
return s.replace("_", " ")
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Model selection per group (network names updated)
|
||||||
|
# =====================================
|
||||||
|
group_cols = ["experiment", "latent_dim", "semi_normals", "semi_anomalous"]
|
||||||
|
|
||||||
|
|
||||||
|
def pick_rows(gdf: pl.DataFrame):
|
||||||
|
sel = {}
|
||||||
|
sel["DeepSAD (LeNet)"] = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "deepsad")
|
||||||
|
)
|
||||||
|
sel["DeepSAD (efficient)"] = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad")
|
||||||
|
)
|
||||||
|
sel["OCSVM (LeNet)"] = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm")
|
||||||
|
)
|
||||||
|
sel["IsoForest (LeNet)"] = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest")
|
||||||
|
)
|
||||||
|
chosen = {}
|
||||||
|
for k, dfk in sel.items():
|
||||||
|
chosen[k] = dfk.row(0) if dfk.height > 0 else None
|
||||||
|
return chosen
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Iterate groups and plot
|
||||||
|
# =====================================
|
||||||
|
plots_made = 0
|
||||||
|
|
||||||
|
for keys, g in df.group_by(group_cols, maintain_order=True):
|
||||||
|
experiment, latent_dim, semi_normals, semi_anomalous = keys
|
||||||
|
|
||||||
|
chosen = pick_rows(g)
|
||||||
|
|
||||||
|
# Extract series for models
|
||||||
|
curves_raw = {}
|
||||||
|
for label, row in chosen.items():
|
||||||
|
if row is None:
|
||||||
|
curves_raw[label] = None
|
||||||
|
continue
|
||||||
|
row_dict = {c: row[i] for i, c in enumerate(df.columns)}
|
||||||
|
scores = to_np(row_dict["scores"])
|
||||||
|
curves_raw[label] = scores
|
||||||
|
|
||||||
|
# If nothing to plot, skip group
|
||||||
|
if all(v is None or len(v) == 0 for v in curves_raw.values()):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Stats for this experiment (absolute %; no z-scoring)
|
||||||
|
missing_pct, near_pct, anomaly_window = get_stats_for_experiment(experiment)
|
||||||
|
|
||||||
|
# Optionally align lengths among model curves
|
||||||
|
curves = curves_raw.copy()
|
||||||
|
if ALIGN_TO_MIN_LENGTH:
|
||||||
|
curves = align_lengths(curves)
|
||||||
|
|
||||||
|
# Prepare processed model curves: z-score (if enabled) + EMA smoothing
|
||||||
|
proc = {}
|
||||||
|
for k, v in curves.items():
|
||||||
|
if v is None:
|
||||||
|
continue
|
||||||
|
x = zscore_1d(v) if Z_SCORE_MODELS else v.astype(float)
|
||||||
|
if ALIGN_SCORE_DIRECTION and anomaly_window != (None, None):
|
||||||
|
x = maybe_align_direction(x, anomaly_window)
|
||||||
|
x = apply_ema_models(x)
|
||||||
|
proc[k] = x
|
||||||
|
|
||||||
|
if not proc:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Establish time axis for model curves
|
||||||
|
any_len = len(next(iter(proc.values())))
|
||||||
|
t_models = np.arange(any_len) / FPS
|
||||||
|
|
||||||
|
# =========== Plot A: Scores-only (models z-scored; stats not shown) ===========
|
||||||
|
figA, axA = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||||
|
for label, y in proc.items():
|
||||||
|
if y is not None:
|
||||||
|
axA.plot(t_models, y, label=label)
|
||||||
|
axA.set_xlabel("Time (s)")
|
||||||
|
axA.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else ""))
|
||||||
|
titleA = (
|
||||||
|
f"{safe_title(experiment)} | latent_dim={latent_dim}, "
|
||||||
|
f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n"
|
||||||
|
f"Smoothing: EMA(alpha={EMA_ALPHA})"
|
||||||
|
)
|
||||||
|
axA.set_title(titleA)
|
||||||
|
axA.grid(True, alpha=0.3)
|
||||||
|
axA.legend(loc="upper right")
|
||||||
|
fnameA = (
|
||||||
|
f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||||
|
f"_scores_EMA-{EMA_ALPHA}{'_z' if Z_SCORE_MODELS else ''}.png"
|
||||||
|
)
|
||||||
|
figA.savefig(output_datetime_path / fnameA, dpi=150)
|
||||||
|
plt.close(figA)
|
||||||
|
|
||||||
|
# =========== Plot B: Models (z-scored) + Missing Points (%) absolute ===========
|
||||||
|
if missing_pct is not None and len(missing_pct) > 0:
|
||||||
|
mp = missing_pct
|
||||||
|
if ALIGN_TO_MIN_LENGTH:
|
||||||
|
mp = mp[:any_len]
|
||||||
|
mp_s = apply_ema_stats(mp)
|
||||||
|
t_stats = np.arange(len(mp_s)) / FPS
|
||||||
|
|
||||||
|
figB, axB = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||||
|
axBy = axB.twinx()
|
||||||
|
for label, y in proc.items():
|
||||||
|
if y is not None:
|
||||||
|
axB.plot(t_models, y, label=label)
|
||||||
|
axBy.plot(t_stats, mp_s, linestyle="--", label="Missing points (%)")
|
||||||
|
|
||||||
|
if anomaly_window != (None, None):
|
||||||
|
start, end = anomaly_window
|
||||||
|
if isinstance(start, int) and isinstance(end, int) and 0 <= start < end:
|
||||||
|
axB.axvline(start / FPS, linestyle=":", alpha=0.6)
|
||||||
|
axB.axvline(end / FPS, linestyle=":", alpha=0.6)
|
||||||
|
|
||||||
|
axB.set_xlabel("Time (s)")
|
||||||
|
axB.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else ""))
|
||||||
|
axBy.set_ylabel("Missing points (%)")
|
||||||
|
titleB = (
|
||||||
|
f"{safe_title(experiment)} | latent_dim={latent_dim}, "
|
||||||
|
f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n"
|
||||||
|
f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Missing points (absolute %)"
|
||||||
|
)
|
||||||
|
axB.set_title(titleB)
|
||||||
|
axB.grid(True, alpha=0.3)
|
||||||
|
lines1, labels1 = axB.get_legend_handles_labels()
|
||||||
|
lines2, labels2 = axBy.get_legend_handles_labels()
|
||||||
|
axB.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
|
||||||
|
|
||||||
|
fnameB = (
|
||||||
|
f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||||
|
f"_scores_plus_missing_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}"
|
||||||
|
f"{'_z' if Z_SCORE_MODELS else ''}.png"
|
||||||
|
)
|
||||||
|
figB.savefig(output_datetime_path / fnameB, dpi=150)
|
||||||
|
plt.close(figB)
|
||||||
|
|
||||||
|
# =========== Plot C: Models (z-scored) + Near-sensor Points (%) absolute ===========
|
||||||
|
if near_pct is not None and len(near_pct) > 0:
|
||||||
|
ns = near_pct
|
||||||
|
if ALIGN_TO_MIN_LENGTH:
|
||||||
|
ns = ns[:any_len]
|
||||||
|
ns_s = apply_ema_stats(ns)
|
||||||
|
t_stats = np.arange(len(ns_s)) / FPS
|
||||||
|
|
||||||
|
figC, axC = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||||
|
axCy = axC.twinx()
|
||||||
|
for label, y in proc.items():
|
||||||
|
if y is not None:
|
||||||
|
axC.plot(t_models, y, label=label)
|
||||||
|
axCy.plot(t_stats, ns_s, linestyle="--", label="Near-sensor <0.5m (%)")
|
||||||
|
|
||||||
|
if anomaly_window != (None, None):
|
||||||
|
start, end = anomaly_window
|
||||||
|
if isinstance(start, int) and isinstance(end, int) and 0 <= start < end:
|
||||||
|
axC.axvline(start / FPS, linestyle=":", alpha=0.6)
|
||||||
|
axC.axvline(end / FPS, linestyle=":", alpha=0.6)
|
||||||
|
|
||||||
|
axC.set_xlabel("Time (s)")
|
||||||
|
axC.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else ""))
|
||||||
|
axCy.set_ylabel("Near-sensor points (%)")
|
||||||
|
titleC = (
|
||||||
|
f"{safe_title(experiment)} | latent_dim={latent_dim}, "
|
||||||
|
f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n"
|
||||||
|
f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Near-sensor <0.5m (absolute %)"
|
||||||
|
)
|
||||||
|
axC.set_title(titleC)
|
||||||
|
axC.grid(True, alpha=0.3)
|
||||||
|
lines1, labels1 = axC.get_legend_handles_labels()
|
||||||
|
lines2, labels2 = axCy.get_legend_handles_labels()
|
||||||
|
axC.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
|
||||||
|
|
||||||
|
fnameC = (
|
||||||
|
f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||||
|
f"_scores_plus_nearsensor_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}"
|
||||||
|
f"{'_z' if Z_SCORE_MODELS else ''}.png"
|
||||||
|
)
|
||||||
|
figC.savefig(output_datetime_path / fnameC, dpi=150)
|
||||||
|
plt.close(figC)
|
||||||
|
|
||||||
|
plots_made += 1
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Preserve latest/, archive/, copy script
|
||||||
|
# =====================================
|
||||||
|
# delete current latest folder
|
||||||
|
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||||
|
# create new latest folder
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# copy contents of output folder to the latest folder
|
||||||
|
for file in output_datetime_path.iterdir():
|
||||||
|
shutil.copy2(file, latest_folder_path)
|
||||||
|
|
||||||
|
# copy this python script to preserve the code used
|
||||||
|
try:
|
||||||
|
shutil.copy2(__file__, output_datetime_path)
|
||||||
|
shutil.copy2(__file__, latest_folder_path)
|
||||||
|
except Exception:
|
||||||
|
# If running interactively, fall back to saving the config snapshot
|
||||||
|
(output_datetime_path / "run_config.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"INFERENCE_ROOT": str(INFERENCE_ROOT),
|
||||||
|
"CACHE_PATH": str(CACHE_PATH),
|
||||||
|
"ALL_DATA_PATH": str(ALL_DATA_PATH),
|
||||||
|
"FPS": FPS,
|
||||||
|
"EMA_ALPHA": EMA_ALPHA,
|
||||||
|
"STATS_EMA_ALPHA": STATS_EMA_ALPHA,
|
||||||
|
"Z_SCORE_MODELS": Z_SCORE_MODELS,
|
||||||
|
"ALIGN_TO_MIN_LENGTH": ALIGN_TO_MIN_LENGTH,
|
||||||
|
"ALIGN_SCORE_DIRECTION": ALIGN_SCORE_DIRECTION,
|
||||||
|
"timestamp": datetime_folder_name,
|
||||||
|
},
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# move output date folder to archive
|
||||||
|
shutil.move(output_datetime_path, archive_folder_path)
|
||||||
|
|
||||||
|
print(f"Done. Plotted {plots_made} groups. Archived under: {archive_folder_path}")
|
||||||
533
tools/plot_scripts/results_inference_timelines_exp_compare.py
Normal file
533
tools/plot_scripts/results_inference_timelines_exp_compare.py
Normal file
@@ -0,0 +1,533 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# results_inference_timelines_exp_compare.py
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Optional, Tuple
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# User-configurable params
|
||||||
|
# =====================================
|
||||||
|
|
||||||
|
# Root directory that contains per-run outputs (your loader will scan this)
|
||||||
|
INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy")
|
||||||
|
|
||||||
|
# Cached stats + manual labels (same location as your earlier scripts)
|
||||||
|
CACHE_PATH = Path("/home/fedex/mt/plots/data_anomalies_timeline")
|
||||||
|
|
||||||
|
# .bag directory (used only to rebuild experiment order for mapping stats)
|
||||||
|
ALL_DATA_PATH = Path("/home/fedex/mt/data/subter")
|
||||||
|
|
||||||
|
# Output base directory (timestamped subfolder will be created here, archived, and copied to latest/)
|
||||||
|
OUTPUT_PATH = Path("/home/fedex/mt/plots/results_inference_exp_compare")
|
||||||
|
|
||||||
|
# Two experiments to compare (exact strings as they appear in your DF’s `experiment` column)
|
||||||
|
EXPERIMENT_CLEAN = "2_static_no_artifacts_illuminated_2023-01-23-001"
|
||||||
|
EXPERIMENT_DEGRADED = "3_smoke_human_walking_2023-01-23"
|
||||||
|
|
||||||
|
# Shared model configuration for BOTH experiments
|
||||||
|
LATENT_DIM = 32
|
||||||
|
SEMI_NORMALS = 50
|
||||||
|
SEMI_ANOMALOUS = 10
|
||||||
|
|
||||||
|
# Comparison y-axis mode for methods: "baseline_z" or "baseline_tailprob"
|
||||||
|
Y_MODE = "baseline_z"
|
||||||
|
|
||||||
|
# Progress axis resolution (number of bins from 0% to 100%)
|
||||||
|
PROGRESS_BINS = 100
|
||||||
|
|
||||||
|
# Frames per second for building time axes before progress-binning (informational only)
|
||||||
|
FPS = 10.0
|
||||||
|
|
||||||
|
# ---- EMA smoothing only ----
|
||||||
|
# Methods (scores) EMA alpha
|
||||||
|
EMA_ALPHA_METHODS = 0.1 # (0,1], smaller = smoother
|
||||||
|
# Stats (absolute %) EMA alpha
|
||||||
|
EMA_ALPHA_STATS = 0.1 # (0,1], smaller = smoother
|
||||||
|
|
||||||
|
# LiDAR points per frame (for stats -> percent)
|
||||||
|
DATA_RESOLUTION = 32 * 2048
|
||||||
|
|
||||||
|
# Copy this script into outputs for provenance (best-effort if not running as a file)
|
||||||
|
COPY_SELF = True
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Setup output folders
|
||||||
|
# =====================================
|
||||||
|
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
|
latest_folder_path = OUTPUT_PATH / "latest"
|
||||||
|
archive_folder_path = OUTPUT_PATH / "archive"
|
||||||
|
output_datetime_path = OUTPUT_PATH / datetime_folder_name
|
||||||
|
|
||||||
|
OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
|
||||||
|
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Load Polars DataFrame via your helper
|
||||||
|
# =====================================
|
||||||
|
from load_results import load_inference_results_dataframe
|
||||||
|
|
||||||
|
df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT)
|
||||||
|
|
||||||
|
required_cols = {
|
||||||
|
"experiment",
|
||||||
|
"network",
|
||||||
|
"latent_dim",
|
||||||
|
"semi_normals",
|
||||||
|
"semi_anomalous",
|
||||||
|
"model",
|
||||||
|
"scores",
|
||||||
|
"folder",
|
||||||
|
"config_json",
|
||||||
|
}
|
||||||
|
missing = required_cols - set(df.columns)
|
||||||
|
if missing:
|
||||||
|
raise KeyError(f"DataFrame missing required columns: {sorted(missing)}")
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Rebuild experiment → stats mapping (like your original)
|
||||||
|
# =====================================
|
||||||
|
def rebuild_experiment_index():
|
||||||
|
normals, anomalies = [], []
|
||||||
|
if not ALL_DATA_PATH.exists():
|
||||||
|
return [], [], {}
|
||||||
|
for bag in ALL_DATA_PATH.iterdir():
|
||||||
|
if bag.suffix != ".bag":
|
||||||
|
continue
|
||||||
|
if "smoke" in bag.name:
|
||||||
|
anomalies.append(bag)
|
||||||
|
else:
|
||||||
|
normals.append(bag)
|
||||||
|
normals = sorted(normals, key=lambda p: p.stat().st_size)
|
||||||
|
anomalies = sorted(anomalies, key=lambda p: p.stat().st_size)
|
||||||
|
mapping = {}
|
||||||
|
for i, p in enumerate(normals):
|
||||||
|
mapping[p.stem] = (False, i, p)
|
||||||
|
for i, p in enumerate(anomalies):
|
||||||
|
mapping[p.stem] = (True, i, p)
|
||||||
|
return normals, anomalies, mapping
|
||||||
|
|
||||||
|
|
||||||
|
normal_paths, anomaly_paths, exp_map = rebuild_experiment_index()
|
||||||
|
|
||||||
|
# Load cached statistical data and manual labels
|
||||||
|
missing_points_cache = CACHE_PATH / "missing_points.pkl"
|
||||||
|
near_sensor_cache = CACHE_PATH / "particles_near_sensor_counts_500.pkl"
|
||||||
|
labels_json_path = CACHE_PATH / "manually_labeled_anomaly_frames.json"
|
||||||
|
|
||||||
|
missing_points_normal = missing_points_anomaly = None
|
||||||
|
near_sensor_normal = near_sensor_anomaly = None
|
||||||
|
if missing_points_cache.exists():
|
||||||
|
with open(missing_points_cache, "rb") as f:
|
||||||
|
missing_points_normal, missing_points_anomaly = pickle.load(f)
|
||||||
|
if near_sensor_cache.exists():
|
||||||
|
with open(near_sensor_cache, "rb") as f:
|
||||||
|
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
|
||||||
|
|
||||||
|
manual_windows = {}
|
||||||
|
if labels_json_path.exists():
|
||||||
|
with open(labels_json_path, "r") as f:
|
||||||
|
labeled_json = json.load(f)
|
||||||
|
for file in labeled_json.get("files", []):
|
||||||
|
manual_windows[file["filename"]] = (
|
||||||
|
file.get("semi_target_begin_frame"),
|
||||||
|
file.get("semi_target_end_frame"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Helpers
|
||||||
|
# =====================================
|
||||||
|
def ema(x: np.ndarray, alpha: float) -> np.ndarray:
|
||||||
|
if x is None or len(x) == 0:
|
||||||
|
return x
|
||||||
|
y = np.empty_like(x, dtype=float)
|
||||||
|
y[0] = x[0]
|
||||||
|
for i in range(1, len(x)):
|
||||||
|
y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
|
def to_np_list(list_cell) -> Optional[np.ndarray]:
|
||||||
|
if list_cell is None:
|
||||||
|
return None
|
||||||
|
return np.asarray(list_cell, dtype=float).ravel()
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_exp_name(name: str) -> str:
|
||||||
|
# strip trailing run suffix like -001, -002 if present
|
||||||
|
return re.sub(r"-\d{3}$", "", name)
|
||||||
|
|
||||||
|
|
||||||
|
def map_experiment_to_stats_stem(exp_name: str) -> Optional[str]:
|
||||||
|
"""Try exact match, then prefix match with / without -### suffix stripped."""
|
||||||
|
if exp_name in exp_map:
|
||||||
|
return exp_name
|
||||||
|
base = normalize_exp_name(exp_name)
|
||||||
|
if base in exp_map:
|
||||||
|
return base
|
||||||
|
for stem in exp_map.keys():
|
||||||
|
if stem.startswith(exp_name) or stem.startswith(base):
|
||||||
|
return stem
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_stats_for_experiment(
|
||||||
|
exp_name: str,
|
||||||
|
) -> Tuple[
|
||||||
|
Optional[np.ndarray], Optional[np.ndarray], Tuple[Optional[int], Optional[int]]
|
||||||
|
]:
|
||||||
|
key = map_experiment_to_stats_stem(exp_name)
|
||||||
|
if key is None:
|
||||||
|
return None, None, (None, None)
|
||||||
|
is_anomaly, idx, path = exp_map[key]
|
||||||
|
missing = near = None
|
||||||
|
if missing_points_normal is not None and missing_points_anomaly is not None:
|
||||||
|
series = (
|
||||||
|
missing_points_anomaly[idx] if is_anomaly else missing_points_normal[idx]
|
||||||
|
)
|
||||||
|
missing = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||||
|
if near_sensor_normal is not None and near_sensor_anomaly is not None:
|
||||||
|
series = near_sensor_anomaly[idx] if is_anomaly else near_sensor_normal[idx]
|
||||||
|
near = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||||
|
npy_key = path.with_suffix(".npy").name
|
||||||
|
window = manual_windows.get(npy_key, (None, None))
|
||||||
|
return missing, near, window
|
||||||
|
|
||||||
|
|
||||||
|
def _bin_to_progress(x: np.ndarray, bins: int = PROGRESS_BINS) -> np.ndarray:
|
||||||
|
"""Average x into fixed #bins across its length (progress-normalized timeline)."""
|
||||||
|
if x is None or len(x) == 0:
|
||||||
|
return x
|
||||||
|
n = len(x)
|
||||||
|
edges = np.linspace(0, n, bins + 1, dtype=int)
|
||||||
|
out = np.empty(bins, dtype=float)
|
||||||
|
for i in range(bins):
|
||||||
|
a, b = edges[i], edges[i + 1]
|
||||||
|
if b <= a:
|
||||||
|
out[i] = out[i - 1] if i > 0 else x[0]
|
||||||
|
else:
|
||||||
|
out[i] = float(np.mean(x[a:b]))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _ecdf(x: np.ndarray):
|
||||||
|
xs = np.sort(np.asarray(x, dtype=float))
|
||||||
|
n = len(xs)
|
||||||
|
|
||||||
|
def F(t):
|
||||||
|
return float(np.searchsorted(xs, t, side="right")) / n
|
||||||
|
|
||||||
|
return F
|
||||||
|
|
||||||
|
|
||||||
|
def baseline_transform(clean: np.ndarray, other: np.ndarray, mode: str):
|
||||||
|
"""Transform using stats from clean only."""
|
||||||
|
assert mode in ("baseline_z", "baseline_tailprob")
|
||||||
|
if clean is None or len(clean) == 0:
|
||||||
|
return clean, other, "raw"
|
||||||
|
if mode == "baseline_z":
|
||||||
|
mu = float(np.mean(clean))
|
||||||
|
sd = float(np.std(clean, ddof=0))
|
||||||
|
if sd < 1e-12:
|
||||||
|
zc = clean - mu
|
||||||
|
zo = other - mu if other is not None else None
|
||||||
|
else:
|
||||||
|
zc = (clean - mu) / sd
|
||||||
|
zo = (other - mu) / sd if other is not None else None
|
||||||
|
return zc, zo, "Anomaly score (σ above clean)"
|
||||||
|
else:
|
||||||
|
F = _ecdf(clean)
|
||||||
|
tp_clean = np.array([1.0 - F(v) for v in clean], dtype=float)
|
||||||
|
tp_other = (
|
||||||
|
np.array([1.0 - F(v) for v in other], dtype=float)
|
||||||
|
if other is not None
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
return tp_clean, tp_other, "Tail probability vs clean (1 - F_clean)"
|
||||||
|
|
||||||
|
|
||||||
|
def pick_method_series(gdf: pl.DataFrame, label: str) -> Optional[np.ndarray]:
|
||||||
|
if label == "DeepSAD (LeNet)":
|
||||||
|
sel = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "deepsad")
|
||||||
|
)
|
||||||
|
elif label == "DeepSAD (efficient)":
|
||||||
|
sel = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad")
|
||||||
|
)
|
||||||
|
elif label == "OCSVM (LeNet)":
|
||||||
|
sel = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm")
|
||||||
|
)
|
||||||
|
elif label == "IsoForest (LeNet)":
|
||||||
|
sel = gdf.filter(
|
||||||
|
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest")
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
sel = pl.DataFrame()
|
||||||
|
if sel.height == 0:
|
||||||
|
return None
|
||||||
|
row = sel.row(0)
|
||||||
|
row_dict = {c: row[i] for i, c in enumerate(sel.columns)}
|
||||||
|
return to_np_list(row_dict["scores"])
|
||||||
|
|
||||||
|
|
||||||
|
def group_slice(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
experiment: str,
|
||||||
|
latent_dim: int,
|
||||||
|
semi_normals: int,
|
||||||
|
semi_anomalous: int,
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
return df.filter(
|
||||||
|
(pl.col("experiment") == experiment)
|
||||||
|
& (pl.col("latent_dim") == latent_dim)
|
||||||
|
& (pl.col("semi_normals") == semi_normals)
|
||||||
|
& (pl.col("semi_anomalous") == semi_anomalous)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def compare_two_experiments_progress(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
experiment_clean: str,
|
||||||
|
experiment_degraded: str,
|
||||||
|
latent_dim: int,
|
||||||
|
semi_normals: int,
|
||||||
|
semi_anomalous: int,
|
||||||
|
y_mode: str = "baseline_z",
|
||||||
|
include_stats: bool = True,
|
||||||
|
):
|
||||||
|
methods = [
|
||||||
|
"DeepSAD (LeNet)",
|
||||||
|
"DeepSAD (efficient)",
|
||||||
|
"OCSVM (LeNet)",
|
||||||
|
"IsoForest (LeNet)",
|
||||||
|
]
|
||||||
|
|
||||||
|
g_clean = group_slice(
|
||||||
|
df, experiment_clean, latent_dim, semi_normals, semi_anomalous
|
||||||
|
)
|
||||||
|
g_deg = group_slice(
|
||||||
|
df, experiment_degraded, latent_dim, semi_normals, semi_anomalous
|
||||||
|
)
|
||||||
|
if g_clean.is_empty() or g_deg.is_empty():
|
||||||
|
print(
|
||||||
|
f"[WARN] Missing one of the experiment groups: clean({g_clean.height}), degraded({g_deg.height}). Skipping."
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Stats (% absolute, EMA smoothed later)
|
||||||
|
mp_clean, ns_clean, _ = get_stats_for_experiment(experiment_clean)
|
||||||
|
mp_deg, ns_deg, _ = get_stats_for_experiment(experiment_degraded)
|
||||||
|
|
||||||
|
# Build baseline-anchored, progress-binned curves per method
|
||||||
|
curves_clean: Dict[str, np.ndarray] = {}
|
||||||
|
curves_deg: Dict[str, np.ndarray] = {}
|
||||||
|
y_label = "Anomaly"
|
||||||
|
|
||||||
|
for label in methods:
|
||||||
|
s_clean = pick_method_series(g_clean, label)
|
||||||
|
s_deg = pick_method_series(g_deg, label)
|
||||||
|
if s_clean is None or s_deg is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Smooth raw with EMA for stability before fitting baseline
|
||||||
|
s_clean_sm = ema(s_clean.astype(float), EMA_ALPHA_METHODS)
|
||||||
|
s_deg_sm = ema(s_deg.astype(float), EMA_ALPHA_METHODS)
|
||||||
|
|
||||||
|
t_clean, t_deg, y_label = baseline_transform(s_clean_sm, s_deg_sm, y_mode)
|
||||||
|
|
||||||
|
# Progress-bin both
|
||||||
|
curves_clean[label] = _bin_to_progress(t_clean, PROGRESS_BINS)
|
||||||
|
curves_deg[label] = _bin_to_progress(t_deg, PROGRESS_BINS)
|
||||||
|
|
||||||
|
if not curves_clean:
|
||||||
|
print("[WARN] No method curves available for comparison in this config.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
x = np.linspace(0, 100, PROGRESS_BINS)
|
||||||
|
|
||||||
|
# ---- Figure 1: scores only
|
||||||
|
fig1, ax1 = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||||
|
for label in methods:
|
||||||
|
yc = curves_clean.get(label)
|
||||||
|
yd = curves_deg.get(label)
|
||||||
|
if yc is None or yd is None:
|
||||||
|
continue
|
||||||
|
ax1.plot(x, yd, label=f"{label} — degraded", linewidth=1.8)
|
||||||
|
ax1.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2)
|
||||||
|
ax1.set_xlabel("Progress through experiment (%)")
|
||||||
|
ax1.set_ylabel(y_label)
|
||||||
|
ax1.set_title(
|
||||||
|
f"Methods across experiments (progress-normalized)\n"
|
||||||
|
f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n"
|
||||||
|
f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS})"
|
||||||
|
)
|
||||||
|
ax1.grid(True, alpha=0.3)
|
||||||
|
ax1.legend(ncol=2, loc="upper right")
|
||||||
|
out1 = (
|
||||||
|
f"compare_{experiment_clean}_vs_{experiment_degraded}"
|
||||||
|
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||||
|
f"_methods_{y_mode}.png"
|
||||||
|
)
|
||||||
|
fig1.savefig(output_datetime_path / out1, dpi=150)
|
||||||
|
plt.close(fig1)
|
||||||
|
|
||||||
|
made = 1
|
||||||
|
|
||||||
|
if include_stats:
|
||||||
|
# Prep stats: absolute %, EMA, progress-binned
|
||||||
|
def prep_stat_pair(a, b):
|
||||||
|
if a is None or len(a) == 0 or b is None or len(b) == 0:
|
||||||
|
return None, None
|
||||||
|
a_s = ema(a.astype(float), EMA_ALPHA_STATS)
|
||||||
|
b_s = ema(b.astype(float), EMA_ALPHA_STATS)
|
||||||
|
return _bin_to_progress(a_s, PROGRESS_BINS), _bin_to_progress(
|
||||||
|
b_s, PROGRESS_BINS
|
||||||
|
)
|
||||||
|
|
||||||
|
mp_c, mp_d = prep_stat_pair(mp_clean, mp_deg)
|
||||||
|
ns_c, ns_d = prep_stat_pair(ns_clean, ns_deg)
|
||||||
|
|
||||||
|
# ---- Figure 2: + Missing points (%)
|
||||||
|
if mp_c is not None and mp_d is not None:
|
||||||
|
fig2, ax2 = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||||
|
axy2 = ax2.twinx()
|
||||||
|
for label in methods:
|
||||||
|
yc = curves_clean.get(label)
|
||||||
|
yd = curves_deg.get(label)
|
||||||
|
if yc is None or yd is None:
|
||||||
|
continue
|
||||||
|
ax2.plot(x, yd, label=f"{label} — degraded", linewidth=1.8)
|
||||||
|
ax2.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2)
|
||||||
|
axy2.plot(x, mp_d, linestyle="-.", label="Missing points — degraded (%)")
|
||||||
|
axy2.plot(x, mp_c, linestyle=":", label="Missing points — clean (%)")
|
||||||
|
ax2.set_xlabel("Progress through experiment (%)")
|
||||||
|
ax2.set_ylabel(y_label)
|
||||||
|
axy2.set_ylabel("Missing points (%)")
|
||||||
|
ax2.set_title(
|
||||||
|
f"Methods vs Missing points (absolute %) — progress-normalized\n"
|
||||||
|
f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n"
|
||||||
|
f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS}, stats α={EMA_ALPHA_STATS})"
|
||||||
|
)
|
||||||
|
ax2.grid(True, alpha=0.3)
|
||||||
|
L1, N1 = ax2.get_legend_handles_labels()
|
||||||
|
L2, N2 = axy2.get_legend_handles_labels()
|
||||||
|
ax2.legend(L1 + L2, N1 + N2, loc="upper right", ncol=2)
|
||||||
|
out2 = (
|
||||||
|
f"compare_{experiment_clean}_vs_{experiment_degraded}"
|
||||||
|
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||||
|
f"_{y_mode}_missing.png"
|
||||||
|
)
|
||||||
|
fig2.savefig(output_datetime_path / out2, dpi=150)
|
||||||
|
plt.close(fig2)
|
||||||
|
made += 1
|
||||||
|
|
||||||
|
# ---- Figure 3: + Near-sensor (%)
|
||||||
|
if ns_c is not None and ns_d is not None:
|
||||||
|
fig3, ax3 = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||||
|
axy3 = ax3.twinx()
|
||||||
|
for label in methods:
|
||||||
|
yc = curves_clean.get(label)
|
||||||
|
yd = curves_deg.get(label)
|
||||||
|
if yc is None or yd is None:
|
||||||
|
continue
|
||||||
|
ax3.plot(x, yd, label=f"{label} — degraded", linewidth=1.8)
|
||||||
|
ax3.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2)
|
||||||
|
axy3.plot(x, ns_d, linestyle="-.", label="Near-sensor — degraded (%)")
|
||||||
|
axy3.plot(x, ns_c, linestyle=":", label="Near-sensor — clean (%)")
|
||||||
|
ax3.set_xlabel("Progress through experiment (%)")
|
||||||
|
ax3.set_ylabel(y_label)
|
||||||
|
axy3.set_ylabel("Near-sensor points (%)")
|
||||||
|
ax3.set_title(
|
||||||
|
f"Methods vs Near-sensor (absolute %) — progress-normalized\n"
|
||||||
|
f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n"
|
||||||
|
f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS}, stats α={EMA_ALPHA_STATS})"
|
||||||
|
)
|
||||||
|
ax3.grid(True, alpha=0.3)
|
||||||
|
L1, N1 = ax3.get_legend_handles_labels()
|
||||||
|
L2, N2 = axy3.get_legend_handles_labels()
|
||||||
|
ax3.legend(L1 + L2, N1 + N2, loc="upper right", ncol=2)
|
||||||
|
out3 = (
|
||||||
|
f"compare_{experiment_clean}_vs_{experiment_degraded}"
|
||||||
|
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||||
|
f"_{y_mode}_nearsensor.png"
|
||||||
|
)
|
||||||
|
fig3.savefig(output_datetime_path / out3, dpi=150)
|
||||||
|
plt.close(fig3)
|
||||||
|
made += 1
|
||||||
|
|
||||||
|
return made
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Run comparison & save
|
||||||
|
# =====================================
|
||||||
|
plots_made = compare_two_experiments_progress(
|
||||||
|
df=df,
|
||||||
|
experiment_clean=EXPERIMENT_CLEAN,
|
||||||
|
experiment_degraded=EXPERIMENT_DEGRADED,
|
||||||
|
latent_dim=LATENT_DIM,
|
||||||
|
semi_normals=SEMI_NORMALS,
|
||||||
|
semi_anomalous=SEMI_ANOMALOUS,
|
||||||
|
y_mode=Y_MODE,
|
||||||
|
include_stats=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# Preserve latest/, archive/, copy script
|
||||||
|
# =====================================
|
||||||
|
# delete current latest folder
|
||||||
|
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||||
|
# create new latest folder
|
||||||
|
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
# copy contents of output folder to the latest folder
|
||||||
|
for file in output_datetime_path.iterdir():
|
||||||
|
shutil.copy2(file, latest_folder_path)
|
||||||
|
|
||||||
|
# copy this python script to preserve the code used (best effort)
|
||||||
|
if COPY_SELF:
|
||||||
|
try:
|
||||||
|
shutil.copy2(__file__, output_datetime_path)
|
||||||
|
shutil.copy2(__file__, latest_folder_path)
|
||||||
|
except Exception:
|
||||||
|
(output_datetime_path / "run_config.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"INFERENCE_ROOT": str(INFERENCE_ROOT),
|
||||||
|
"CACHE_PATH": str(CACHE_PATH),
|
||||||
|
"ALL_DATA_PATH": str(ALL_DATA_PATH),
|
||||||
|
"EXPERIMENT_CLEAN": EXPERIMENT_CLEAN,
|
||||||
|
"EXPERIMENT_DEGRADED": EXPERIMENT_DEGRADED,
|
||||||
|
"LATENT_DIM": LATENT_DIM,
|
||||||
|
"SEMI_NORMALS": SEMI_NORMALS,
|
||||||
|
"SEMI_ANOMALOUS": SEMI_ANOMALOUS,
|
||||||
|
"Y_MODE": Y_MODE,
|
||||||
|
"PROGRESS_BINS": PROGRESS_BINS,
|
||||||
|
"FPS": FPS,
|
||||||
|
"EMA_ALPHA_METHODS": EMA_ALPHA_METHODS,
|
||||||
|
"EMA_ALPHA_STATS": EMA_ALPHA_STATS,
|
||||||
|
"DATA_RESOLUTION": DATA_RESOLUTION,
|
||||||
|
"timestamp": datetime_folder_name,
|
||||||
|
},
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# move output date folder to archive
|
||||||
|
shutil.move(output_datetime_path, archive_folder_path)
|
||||||
|
|
||||||
|
print(f"Done. Wrote {plots_made} figure(s). Archived under: {archive_folder_path}")
|
||||||
Reference in New Issue
Block a user