import json import pickle import shutil from datetime import datetime from pathlib import Path import matplotlib.pyplot as plt import numpy as np import polars as pl # ===================================== # User-configurable params # ===================================== # Root directory that contains per-run outputs (your loader will scan this) INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy") # Path that holds cached stats (same as before) CACHE_PATH = Path("/home/fedex/mt/plots/data_anomalies_timeline") # Root data path containing .bag files to rebuild ordering (for stats mapping) ALL_DATA_PATH = Path("/home/fedex/mt/data/subter") # Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/") OUTPUT_PATH = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed") # Frames per second for x-axis time FPS = 10.0 # ---- Smoothing: EMA only ---- EMA_ALPHA = 0.1 # models (0,1], smaller = smoother STATS_EMA_ALPHA = 0.1 # stats (absolute %); tweak independently if desired # Whether to z-score per-curve for the model methods (recommended) Z_SCORE_MODELS = True # If some model's series is longer/shorter than others in a group, align to min length ALIGN_TO_MIN_LENGTH = True # Whether to try to align model score sign so that higher = more degraded using manual window ALIGN_SCORE_DIRECTION = True # LiDAR points per frame (for stats -> percent) DATA_RESOLUTION = 32 * 2048 # ===================================== # Setup output folders # ===================================== datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") latest_folder_path = OUTPUT_PATH / "latest" archive_folder_path = OUTPUT_PATH / "archive" output_datetime_path = OUTPUT_PATH / datetime_folder_name OUTPUT_PATH.mkdir(exist_ok=True, parents=True) archive_folder_path.mkdir(exist_ok=True, parents=True) latest_folder_path.mkdir(exist_ok=True, parents=True) output_datetime_path.mkdir(exist_ok=True, parents=True) # ===================================== # Load Polars DataFrame via your helper # ===================================== from load_results import load_inference_results_dataframe df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT) # sanity expected_cols = { "experiment", "network", "latent_dim", "semi_normals", "semi_anomalous", "model", "scores", "folder", "config_json", } missing_cols = expected_cols - set(df.columns) if missing_cols: raise KeyError(f"DataFrame missing required columns: {sorted(missing_cols)}") # ===================================== # Rebuild experiment → stats mapping (like your original) # ===================================== def rebuild_experiment_index(): normals, anomalies = [], [] if not ALL_DATA_PATH.exists(): return [], [], {} for bag in ALL_DATA_PATH.iterdir(): if bag.suffix != ".bag": continue if "smoke" in bag.name: anomalies.append(bag) else: normals.append(bag) normals = sorted(normals, key=lambda p: p.stat().st_size) anomalies = sorted(anomalies, key=lambda p: p.stat().st_size) mapping = {} for i, p in enumerate(normals): mapping[p.stem] = (False, i, p) for i, p in enumerate(anomalies): mapping[p.stem] = (True, i, p) return normals, anomalies, mapping normal_paths, anomaly_paths, exp_map = rebuild_experiment_index() # Load cached statistical data (+ manual labels) missing_points_cache = CACHE_PATH / "missing_points.pkl" near_sensor_cache = CACHE_PATH / "particles_near_sensor_counts_500.pkl" labels_json_path = CACHE_PATH / "manually_labeled_anomaly_frames.json" missing_points_normal = missing_points_anomaly = None near_sensor_normal = near_sensor_anomaly = None if missing_points_cache.exists(): with open(missing_points_cache, "rb") as f: missing_points_normal, missing_points_anomaly = pickle.load(f) if near_sensor_cache.exists(): with open(near_sensor_cache, "rb") as f: near_sensor_normal, near_sensor_anomaly = pickle.load(f) manual_windows = {} if labels_json_path.exists(): with open(labels_json_path, "r") as f: labeled_json = json.load(f) for file in labeled_json.get("files", []): manual_windows[file["filename"]] = ( file.get("semi_target_begin_frame"), file.get("semi_target_end_frame"), ) def get_stats_for_experiment(exp_name: str): """ Returns: missing_pct (np.ndarray) | None, near_pct (np.ndarray) | None, anomaly_window (tuple(start,end)) | (None,None) """ if exp_name not in exp_map: return None, None, (None, None) is_anomaly, idx, path = exp_map[exp_name] missing = None near = None if missing_points_normal is not None and missing_points_anomaly is not None: series = ( missing_points_anomaly[idx] if is_anomaly else missing_points_normal[idx] ) missing = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0 if near_sensor_normal is not None and near_sensor_anomaly is not None: series = near_sensor_anomaly[idx] if is_anomaly else near_sensor_normal[idx] near = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0 npy_key = path.with_suffix(".npy").name window = manual_windows.get(npy_key, (None, None)) return missing, near, window # ===================================== # Helpers # ===================================== def to_np(a): """Convert a Polars list cell to a 1D NumPy array of float.""" if a is None: return None return np.asarray(a, dtype=float).ravel() def zscore_1d(x, eps=1e-12): if x is None or len(x) == 0: return x mu = float(np.mean(x)) sigma = float(np.std(x, ddof=0)) return np.zeros_like(x) if sigma < eps else (x - mu) / sigma def ema(x, alpha): if x is None or len(x) == 0: return x y = np.empty_like(x, dtype=float) y[0] = x[0] for i in range(1, len(x)): y[i] = alpha * x[i] + (1 - alpha) * y[i - 1] return y def apply_ema_models(x): return ema(x, EMA_ALPHA) def apply_ema_stats(x): return ema(x, STATS_EMA_ALPHA) def align_lengths(series_dict): """Truncate all series to the shortest available length.""" valid_lengths = [ len(v) for v in series_dict.values() if v is not None and len(v) > 0 ] if not valid_lengths: return series_dict min_len = min(valid_lengths) return {k: (v[:min_len] if v is not None else None) for k, v in series_dict.items()} def maybe_align_direction(z: np.ndarray, window): """Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist.""" if z is None: return z start, end = window if start is None or end is None: return z start = int(max(0, start)) end = int(min(len(z), end)) if end <= start or end > len(z): return z inside_mean = float(np.mean(z[start:end])) if start == 0 and end == len(z): return z outside_parts = [] if start > 0: outside_parts.append(z[:start]) if end < len(z): outside_parts.append(z[end:]) if not outside_parts: return z outside_mean = float(np.mean(np.concatenate(outside_parts))) return z if inside_mean >= outside_mean else -z def safe_title(s: str) -> str: return s.replace("_", " ") # ===================================== # Model selection per group (network names updated) # ===================================== group_cols = ["experiment", "latent_dim", "semi_normals", "semi_anomalous"] def pick_rows(gdf: pl.DataFrame): sel = {} sel["DeepSAD (LeNet)"] = gdf.filter( (pl.col("network") == "subter_LeNet") & (pl.col("model") == "deepsad") ) sel["DeepSAD (efficient)"] = gdf.filter( (pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad") ) sel["OCSVM (LeNet)"] = gdf.filter( (pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm") ) sel["IsoForest (LeNet)"] = gdf.filter( (pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest") ) chosen = {} for k, dfk in sel.items(): chosen[k] = dfk.row(0) if dfk.height > 0 else None return chosen # ===================================== # Iterate groups and plot # ===================================== plots_made = 0 for keys, g in df.group_by(group_cols, maintain_order=True): experiment, latent_dim, semi_normals, semi_anomalous = keys chosen = pick_rows(g) # Extract series for models curves_raw = {} for label, row in chosen.items(): if row is None: curves_raw[label] = None continue row_dict = {c: row[i] for i, c in enumerate(df.columns)} scores = to_np(row_dict["scores"]) curves_raw[label] = scores # If nothing to plot, skip group if all(v is None or len(v) == 0 for v in curves_raw.values()): continue # Stats for this experiment (absolute %; no z-scoring) missing_pct, near_pct, anomaly_window = get_stats_for_experiment(experiment) # Optionally align lengths among model curves curves = curves_raw.copy() if ALIGN_TO_MIN_LENGTH: curves = align_lengths(curves) # Prepare processed model curves: z-score (if enabled) + EMA smoothing proc = {} for k, v in curves.items(): if v is None: continue x = zscore_1d(v) if Z_SCORE_MODELS else v.astype(float) if ALIGN_SCORE_DIRECTION and anomaly_window != (None, None): x = maybe_align_direction(x, anomaly_window) x = apply_ema_models(x) proc[k] = x if not proc: continue # Establish time axis for model curves any_len = len(next(iter(proc.values()))) t_models = np.arange(any_len) / FPS # =========== Plot A: Scores-only (models z-scored; stats not shown) =========== figA, axA = plt.subplots(figsize=(14, 6), constrained_layout=True) for label, y in proc.items(): if y is not None: axA.plot(t_models, y, label=label) axA.set_xlabel("Time (s)") axA.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else "")) titleA = ( f"{safe_title(experiment)} | latent_dim={latent_dim}, " f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n" f"Smoothing: EMA(alpha={EMA_ALPHA})" ) axA.set_title(titleA) axA.grid(True, alpha=0.3) axA.legend(loc="upper right") fnameA = ( f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" f"_scores_EMA-{EMA_ALPHA}{'_z' if Z_SCORE_MODELS else ''}.png" ) figA.savefig(output_datetime_path / fnameA, dpi=150) plt.close(figA) # =========== Plot B: Models (z-scored) + Missing Points (%) absolute =========== if missing_pct is not None and len(missing_pct) > 0: mp = missing_pct if ALIGN_TO_MIN_LENGTH: mp = mp[:any_len] mp_s = apply_ema_stats(mp) t_stats = np.arange(len(mp_s)) / FPS figB, axB = plt.subplots(figsize=(14, 6), constrained_layout=True) axBy = axB.twinx() for label, y in proc.items(): if y is not None: axB.plot(t_models, y, label=label) axBy.plot(t_stats, mp_s, linestyle="--", label="Missing points (%)") if anomaly_window != (None, None): start, end = anomaly_window if isinstance(start, int) and isinstance(end, int) and 0 <= start < end: axB.axvline(start / FPS, linestyle=":", alpha=0.6) axB.axvline(end / FPS, linestyle=":", alpha=0.6) axB.set_xlabel("Time (s)") axB.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else "")) axBy.set_ylabel("Missing points (%)") titleB = ( f"{safe_title(experiment)} | latent_dim={latent_dim}, " f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n" f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Missing points (absolute %)" ) axB.set_title(titleB) axB.grid(True, alpha=0.3) lines1, labels1 = axB.get_legend_handles_labels() lines2, labels2 = axBy.get_legend_handles_labels() axB.legend(lines1 + lines2, labels1 + labels2, loc="upper right") fnameB = ( f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" f"_scores_plus_missing_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}" f"{'_z' if Z_SCORE_MODELS else ''}.png" ) figB.savefig(output_datetime_path / fnameB, dpi=150) plt.close(figB) # =========== Plot C: Models (z-scored) + Near-sensor Points (%) absolute =========== if near_pct is not None and len(near_pct) > 0: ns = near_pct if ALIGN_TO_MIN_LENGTH: ns = ns[:any_len] ns_s = apply_ema_stats(ns) t_stats = np.arange(len(ns_s)) / FPS figC, axC = plt.subplots(figsize=(14, 6), constrained_layout=True) axCy = axC.twinx() for label, y in proc.items(): if y is not None: axC.plot(t_models, y, label=label) axCy.plot(t_stats, ns_s, linestyle="--", label="Near-sensor <0.5m (%)") if anomaly_window != (None, None): start, end = anomaly_window if isinstance(start, int) and isinstance(end, int) and 0 <= start < end: axC.axvline(start / FPS, linestyle=":", alpha=0.6) axC.axvline(end / FPS, linestyle=":", alpha=0.6) axC.set_xlabel("Time (s)") axC.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else "")) axCy.set_ylabel("Near-sensor points (%)") titleC = ( f"{safe_title(experiment)} | latent_dim={latent_dim}, " f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n" f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Near-sensor <0.5m (absolute %)" ) axC.set_title(titleC) axC.grid(True, alpha=0.3) lines1, labels1 = axC.get_legend_handles_labels() lines2, labels2 = axCy.get_legend_handles_labels() axC.legend(lines1 + lines2, labels1 + labels2, loc="upper right") fnameC = ( f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" f"_scores_plus_nearsensor_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}" f"{'_z' if Z_SCORE_MODELS else ''}.png" ) figC.savefig(output_datetime_path / fnameC, dpi=150) plt.close(figC) plots_made += 1 # ===================================== # Preserve latest/, archive/, copy script # ===================================== # delete current latest folder shutil.rmtree(latest_folder_path, ignore_errors=True) # create new latest folder latest_folder_path.mkdir(exist_ok=True, parents=True) # copy contents of output folder to the latest folder for file in output_datetime_path.iterdir(): shutil.copy2(file, latest_folder_path) # copy this python script to preserve the code used try: shutil.copy2(__file__, output_datetime_path) shutil.copy2(__file__, latest_folder_path) except Exception: # If running interactively, fall back to saving the config snapshot (output_datetime_path / "run_config.json").write_text( json.dumps( { "INFERENCE_ROOT": str(INFERENCE_ROOT), "CACHE_PATH": str(CACHE_PATH), "ALL_DATA_PATH": str(ALL_DATA_PATH), "FPS": FPS, "EMA_ALPHA": EMA_ALPHA, "STATS_EMA_ALPHA": STATS_EMA_ALPHA, "Z_SCORE_MODELS": Z_SCORE_MODELS, "ALIGN_TO_MIN_LENGTH": ALIGN_TO_MIN_LENGTH, "ALIGN_SCORE_DIRECTION": ALIGN_SCORE_DIRECTION, "timestamp": datetime_folder_name, }, indent=2, ) ) # move output date folder to archive shutil.move(output_datetime_path, archive_folder_path) print(f"Done. Plotted {plots_made} groups. Archived under: {archive_folder_path}")