wip inference
This commit is contained in:
@@ -96,6 +96,21 @@ PRETRAIN_SCHEMA = {
|
||||
"config_json": pl.Utf8, # full config.json as string (for reference)
|
||||
}
|
||||
|
||||
SCHEMA_INFERENCE = {
|
||||
# identifiers / dims
|
||||
"experiment": pl.Utf8, # e.g. "2_static_no_artifacts_illuminated_2023-01-23-001"
|
||||
"network": pl.Utf8, # e.g. "LeNet", "efficient"
|
||||
"latent_dim": pl.Int32,
|
||||
"semi_normals": pl.Int32,
|
||||
"semi_anomalous": pl.Int32,
|
||||
"model": pl.Utf8, # "deepsad" | "isoforest" | "ocsvm"
|
||||
# metrics
|
||||
"scores": pl.List(pl.Float64),
|
||||
# timings / housekeeping
|
||||
"folder": pl.Utf8,
|
||||
"config_json": pl.Utf8, # full config.json as string (for reference)
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
|
||||
@@ -233,11 +248,11 @@ def normalize_bool_list(a) -> Optional[List[bool]]:
|
||||
# ------------------------------------------------------------
|
||||
# Low-level: read one experiment folder
|
||||
# ------------------------------------------------------------
|
||||
def read_config(exp_dir: Path) -> dict:
|
||||
def read_config(exp_dir: Path, k_fold_required: bool = True) -> dict:
|
||||
cfg = exp_dir / "config.json"
|
||||
with cfg.open("r") as f:
|
||||
c = json.load(f)
|
||||
if not c.get("k_fold"):
|
||||
if k_fold_required and not c.get("k_fold"):
|
||||
raise ValueError(f"{exp_dir.name}: not trained as k-fold")
|
||||
return c
|
||||
|
||||
@@ -589,7 +604,129 @@ def load_pretraining_results_dataframe(
|
||||
return df
|
||||
|
||||
|
||||
def load_inference_results_dataframe(
|
||||
root: Path,
|
||||
allow_cache: bool = True,
|
||||
models: List[str] = MODELS,
|
||||
) -> pl.DataFrame:
|
||||
"""Load inference results from experiment folders.
|
||||
|
||||
Args:
|
||||
root: Path to root directory containing experiment folders
|
||||
allow_cache: Whether to use/create cache file
|
||||
models: List of models to look for scores
|
||||
|
||||
Returns:
|
||||
pl.DataFrame: DataFrame containing inference results
|
||||
"""
|
||||
if allow_cache:
|
||||
cache = root / "inference_results_cache.parquet"
|
||||
if cache.exists():
|
||||
try:
|
||||
df = pl.read_parquet(cache)
|
||||
print(f"[info] loaded cached inference frame from {cache}")
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to load inference cache {cache}: {e}")
|
||||
|
||||
rows: List[dict] = []
|
||||
|
||||
exp_dirs = [p for p in root.iterdir() if p.is_dir()]
|
||||
for exp_dir in sorted(exp_dirs):
|
||||
try:
|
||||
# Load and validate config
|
||||
cfg = read_config(exp_dir, k_fold_required=False)
|
||||
cfg_json = json.dumps(cfg, sort_keys=True)
|
||||
|
||||
# Extract config values
|
||||
network = cfg.get("net_name")
|
||||
latent_dim = int(cfg.get("latent_space_dim"))
|
||||
semi_normals = int(cfg.get("num_known_normal"))
|
||||
semi_anomalous = int(cfg.get("num_known_outlier"))
|
||||
|
||||
# Process each model's scores
|
||||
inference_dir = exp_dir / "inference"
|
||||
if not inference_dir.exists():
|
||||
print(f"[warn] no inference directory for {exp_dir.name}")
|
||||
continue
|
||||
|
||||
# Find all unique experiments in this folder's inference files
|
||||
score_files = list(inference_dir.glob("*_scores.npy"))
|
||||
if not score_files:
|
||||
print(f"[warn] no score files in {inference_dir}")
|
||||
continue
|
||||
|
||||
# Extract unique experiment names from score files
|
||||
# Format: {experiment}_{model}_scores.npy
|
||||
experiments = set()
|
||||
for score_file in score_files:
|
||||
exp_name = score_file.stem.rsplit("_", 2)[0]
|
||||
experiments.add(exp_name)
|
||||
|
||||
# Load scores for each experiment and model
|
||||
for experiment in sorted(experiments):
|
||||
for model in models:
|
||||
score_file = inference_dir / f"{experiment}_{model}_scores.npy"
|
||||
if not score_file.exists():
|
||||
print(f"[warn] missing score file for {experiment}, {model}")
|
||||
continue
|
||||
|
||||
try:
|
||||
scores = np.load(score_file)
|
||||
rows.append(
|
||||
{
|
||||
"experiment": experiment,
|
||||
"network": network,
|
||||
"latent_dim": latent_dim,
|
||||
"semi_normals": semi_normals,
|
||||
"semi_anomalous": semi_anomalous,
|
||||
"model": model,
|
||||
"scores": scores.tolist(),
|
||||
"folder": str(exp_dir),
|
||||
"config_json": cfg_json,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[warn] failed to load scores for {experiment}, {model}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"[warn] skipping {exp_dir.name}: {e}")
|
||||
continue
|
||||
|
||||
# If empty, return a typed empty frame
|
||||
if not rows:
|
||||
return pl.DataFrame(schema=SCHEMA_INFERENCE)
|
||||
|
||||
df = pl.DataFrame(rows, schema=SCHEMA_INFERENCE)
|
||||
|
||||
# Optimize datatypes
|
||||
df = df.with_columns(
|
||||
[
|
||||
pl.col("experiment", "network", "model").cast(pl.Categorical),
|
||||
pl.col("latent_dim", "semi_normals", "semi_anomalous").cast(pl.Int32),
|
||||
]
|
||||
)
|
||||
|
||||
# Cache if enabled
|
||||
if allow_cache:
|
||||
try:
|
||||
df.write_parquet(cache)
|
||||
print(f"[info] cached inference frame to {cache}")
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to write cache {cache}: {e}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def main():
|
||||
inference_root = Path("/home/fedex/mt/results/inference/copy")
|
||||
df_inference = load_inference_results_dataframe(inference_root, allow_cache=True)
|
||||
|
||||
exit(0)
|
||||
|
||||
root = Path("/home/fedex/mt/results/copy")
|
||||
df1 = load_results_dataframe(root, allow_cache=True)
|
||||
exit(0)
|
||||
|
||||
Reference in New Issue
Block a user