data loading and plotting for results wip

2025-09-03 14:55:54 +02:00
parent 3d968c305c
commit ed80faf1e2
16 changed files with 2732 additions and 952 deletions
--- a/tools/verify_loaded_results.py
+++ b/tools/verify_loaded_results.py
@@ -0,0 +1,295 @@
+from __future__ import annotations
+
+from itertools import product
+from pathlib import Path
+from typing import Sequence
+
+import polars as pl
+
+from load_results import load_results_dataframe
+
+# --- configure your intended grid here (use the *canonical* strings used in df) ---
+NETWORKS_EXPECTED = ["subter_LeNet", "subter_efficient"]
+LATENT_DIMS_EXPECTED = [32, 64, 128, 256, 512, 768, 1024]
+SEMI_LABELS_EXPECTED = [(0, 0), (50, 10), (500, 100)]
+MODELS_EXPECTED = ["deepsad", "isoforest", "ocsvm"]
+EVALS_EXPECTED = ["exp_based", "manual_based"]
+
+# If k-fold is uniform, set it. If None, we infer it *per combo* from df.
+EXPECTED_K_FOLD: int | None = None  # e.g., 3
+# utils/shape_checks.py
+
+
+def equal_within_tolerance(lengths: Sequence[int], tol: int = 1) -> bool:
+    """
+    True iff max(lengths) - min(lengths) <= tol.
+    Empty/one-item sequences return True.
+    """
+    if not lengths:
+        return True
+    mn = min(lengths)
+    mx = max(lengths)
+    return (mx - mn) <= tol
+
+
+def add_shape_columns(df: pl.DataFrame) -> pl.DataFrame:
+    return df.with_columns(
+        # scores length
+        scores_len=pl.when(pl.col("scores").is_null())
+        .then(None)
+        .otherwise(pl.col("scores").list.len()),
+        # deepsad-only arrays (None for others)
+        idxs_len=pl.when(pl.col("sample_indices").is_null())
+        .then(None)
+        .otherwise(pl.col("sample_indices").list.len()),
+        labels_len=pl.when(pl.col("sample_labels").is_null())
+        .then(None)
+        .otherwise(pl.col("sample_labels").list.len()),
+        vmask_len=pl.when(pl.col("valid_mask").is_null())
+        .then(None)
+        .otherwise(pl.col("valid_mask").list.len()),
+    )
+
+
+def check_grid_coverage_and_shapes(
+    df: pl.DataFrame,
+    networks=NETWORKS_EXPECTED,
+    latent_dims=LATENT_DIMS_EXPECTED,
+    semi_labels=SEMI_LABELS_EXPECTED,
+    models=MODELS_EXPECTED,
+    evals=EVALS_EXPECTED,
+    expected_k_fold: int | None = EXPECTED_K_FOLD,
+):
+    dfx = add_shape_columns(df)
+
+    # helper: get rows for a specific base combo
+    def subframe(net, lat, s_norm, s_anom, mdl, ev):
+        return dfx.filter(
+            (pl.col("network") == net)
+            & (pl.col("latent_dim") == lat)
+            & (pl.col("semi_normals") == s_norm)
+            & (pl.col("semi_anomalous") == s_anom)
+            & (pl.col("model") == mdl)
+            & (pl.col("eval") == ev)
+        )
+
+    missing = []
+    incomplete = []  # combos missing folds
+    shape_inconsistent = []  # within-combo, across-fold score/idx/label/vmask mismatches
+    cross_model_diffs = []  # across models at fixed (net,lat,semi,eval): scores_len only
+
+    # 1) Coverage + within-combo shapes
+    for net, lat, (s_norm, s_anom), mdl, ev in product(
+        networks, latent_dims, semi_labels, models, evals
+    ):
+        sf = subframe(net, lat, s_norm, s_anom, mdl, ev).select(
+            "fold",
+            "k_fold_num",
+            "scores_len",
+            "idxs_len",
+            "labels_len",
+            "vmask_len",
+        )
+
+        if sf.height == 0:
+            missing.append(
+                dict(
+                    network=net,
+                    latent_dim=lat,
+                    semi_normals=s_norm,
+                    semi_anomalous=s_anom,
+                    model=mdl,
+                    eval=ev,
+                )
+            )
+            continue
+
+        # folds present vs expected
+        folds_present = sorted(sf.get_column("fold").unique().to_list())
+        if expected_k_fold is not None:
+            kexp = expected_k_fold
+        else:
+            kexp = int(sf.get_column("k_fold_num").max())
+        all_expected_folds = list(range(kexp))
+        if folds_present != all_expected_folds:
+            incomplete.append(
+                dict(
+                    network=net,
+                    latent_dim=lat,
+                    semi_normals=s_norm,
+                    semi_anomalous=s_anom,
+                    model=mdl,
+                    eval=ev,
+                    expected_folds=all_expected_folds,
+                    present_folds=folds_present,
+                )
+            )
+
+        # shape consistency across folds (for this combo)
+        shape_cols = ["scores_len", "idxs_len", "labels_len", "vmask_len"]
+        for colname in shape_cols:
+            vals = sf.select(colname).to_series()
+            uniq = sorted({v for v in vals.to_list()})
+            # allow None-only columns (e.g., deepsad-only fields for other models)
+            if len([u for u in uniq if u is not None]) > 1:
+                per_fold = (
+                    sf.select("fold", pl.col(colname))
+                    .sort("fold")
+                    .to_dict(as_series=False)
+                )
+                shape_inconsistent.append(
+                    dict(
+                        network=net,
+                        latent_dim=lat,
+                        semi_normals=s_norm,
+                        semi_anomalous=s_anom,
+                        model=mdl,
+                        eval=ev,
+                        metric=colname,
+                        per_fold=per_fold,
+                    )
+                )
+
+    # 2) Cross-model comparability at fixed (net,lat,semi,eval)
+    # Only check number of test scores; ignore ROC/PRC binning entirely.
+    base_keys = (
+        df.select("network", "latent_dim", "semi_normals", "semi_anomalous", "eval")
+        .unique()
+        .iter_rows()
+    )
+    for net, lat, s_norm, s_anom, ev in base_keys:
+        rows = (
+            dfx.filter(
+                (pl.col("network") == net)
+                & (pl.col("latent_dim") == lat)
+                & (pl.col("semi_normals") == s_norm)
+                & (pl.col("semi_anomalous") == s_anom)
+                & (pl.col("eval") == ev)
+            )
+            .group_by("model")
+            .agg(
+                pl.col("scores_len")
+                .drop_nulls()
+                .unique()
+                .sort()
+                .alias("scores_len_set"),
+            )
+            .to_dict(as_series=False)
+        )
+        if not rows:
+            continue
+
+        mdls = rows["model"]
+        s_sets = [rows["scores_len_set"][i] for i in range(len(mdls))]
+        # normalize: empty => ignore that model (no scores); single value => int; else => list
+        norm = {}
+        for m, vals in zip(mdls, s_sets):
+            if len(vals) == 0:
+                continue
+            norm[m] = vals[0] if len(vals) == 1 else list(vals)
+
+        if len(norm) > 1:
+            # Compare as tuples to allow list values
+            normalized_keys = [
+                v if isinstance(v, int) else tuple(v) for v in norm.values()
+            ]
+            if len(set(normalized_keys)) > 1:
+                cross_model_diffs.append(
+                    dict(
+                        network=net,
+                        latent_dim=lat,
+                        semi_normals=s_norm,
+                        semi_anomalous=s_anom,
+                        eval=ev,
+                        metric="scores_len",
+                        by_model=norm,
+                    )
+                )
+
+    # --- Print a readable report ---
+    print("\n=== GRID COVERAGE ===")
+    print(f"Missing combos: {len(missing)}")
+    for m in missing[:20]:
+        print("  ", m)
+    if len(missing) > 20:
+        print(f"  ... (+{len(missing) - 20} more)")
+
+    print("\nIncomplete combos (folds missing):", len(incomplete))
+    for inc in incomplete[:20]:
+        print(
+            "  ",
+            {
+                k: inc[k]
+                for k in [
+                    "network",
+                    "latent_dim",
+                    "semi_normals",
+                    "semi_anomalous",
+                    "model",
+                    "eval",
+                ]
+            },
+            "expected",
+            inc["expected_folds"],
+            "present",
+            inc["present_folds"],
+        )
+    if len(incomplete) > 20:
+        print(f"  ... (+{len(incomplete) - 20} more)")
+
+    print("\n=== WITHIN-COMBO SHAPE CONSISTENCY (across folds) ===")
+    print(f"Mismatching groups: {len(shape_inconsistent)}")
+    for s in shape_inconsistent[:15]:
+        hdr = {
+            k: s[k]
+            for k in [
+                "network",
+                "latent_dim",
+                "semi_normals",
+                "semi_anomalous",
+                "model",
+                "eval",
+                "metric",
+            ]
+        }
+        print("  ", hdr, "values:", s["per_fold"])
+    if len(shape_inconsistent) > 15:
+        print(f"  ... (+{len(shape_inconsistent) - 15} more)")
+
+    print("\n=== CROSS-MODEL COMPARABILITY (by shape) ===")
+    print(
+        f"Differences across models at fixed (net,lat,semi,eval): {len(cross_model_diffs)}"
+    )
+    for s in cross_model_diffs[:15]:
+        hdr = {
+            k: s[k]
+            for k in [
+                "network",
+                "latent_dim",
+                "semi_normals",
+                "semi_anomalous",
+                "eval",
+                "metric",
+            ]
+        }
+        print("  ", hdr, "by_model:", s["by_model"])
+    if len(cross_model_diffs) > 15:
+        print(f"  ... (+{len(cross_model_diffs) - 15} more)")
+
+    return {
+        "missing": missing,
+        "incomplete": incomplete,
+        "shape_inconsistent": shape_inconsistent,
+        "cross_model_diffs": cross_model_diffs,
+    }
+
+
+def main():
+    root = Path("/home/fedex/mt/results/done")
+    df = load_results_dataframe(root, allow_cache=True)
+    report = check_grid_coverage_and_shapes(df)
+    print(report)
+
+
+if __name__ == "__main__":
+    main()