Compare commits
57 Commits
86d9d96ca4
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7b5accb6c5 | ||
|
|
8f983b890f | ||
|
|
6cd2c7fbef | ||
|
|
62c424cd54 | ||
|
|
bd9171f68e | ||
|
|
efdc33035b | ||
|
|
f2c8fe241d | ||
|
|
ece887860b | ||
|
|
c3830db913 | ||
|
|
3d21171a40 | ||
|
|
5aca00ad67 | ||
|
|
374420727b | ||
|
|
8697c07c0f | ||
|
|
5287f2c557 | ||
|
|
b7faf6e1b6 | ||
|
|
0354ad37e1 | ||
|
|
32ab4e6a11 | ||
|
|
055d403dfb | ||
|
|
28b6eba094 | ||
|
|
436a25df11 | ||
|
|
5d0610a875 | ||
|
|
545b65d3d5 | ||
|
|
8db244901e | ||
|
|
72afe9ebdc | ||
|
|
81c1e5b7af | ||
|
|
6040f5f144 | ||
|
|
d5f5a09d6f | ||
|
|
a6f5ecaba2 | ||
|
|
1f3e607e8d | ||
|
|
3bf457f2cf | ||
|
|
3eb7e662b0 | ||
|
|
2411f8b1a7 | ||
|
|
fe45de00ca | ||
|
|
1e71600102 | ||
|
|
d93f1a52a9 | ||
|
|
e34a374adc | ||
|
|
f36477ed9b | ||
|
|
52dabf0f89 | ||
|
|
e00d1a33e3 | ||
|
|
c270783225 | ||
|
|
cfb77dccab | ||
|
|
4c8df5cae0 | ||
|
|
f93bbaeec1 | ||
|
|
9ec73c5992 | ||
|
|
8e7c210872 | ||
|
|
a20a4a0832 | ||
|
|
8f36bd2e07 | ||
|
|
936d2ecb6e | ||
|
|
95867bde7a | ||
|
|
cc5a8d25d3 | ||
|
|
e20c2235ed | ||
|
|
e7624d2786 | ||
|
|
e4b298cf06 | ||
|
|
35766b9028 | ||
|
|
85cd33cd5b | ||
|
|
cf15d5501e | ||
|
|
ef0c36eed5 |
@@ -4,9 +4,11 @@ let
|
||||
torch-bin
|
||||
torchvision-bin
|
||||
aggdraw # for visualtorch
|
||||
nvidia-ml-py
|
||||
];
|
||||
tools = with pkgs; [
|
||||
ruff
|
||||
dmidecode
|
||||
];
|
||||
in
|
||||
{
|
||||
|
||||
54
Deep-SAD-PyTorch/hardware_survey/hardware_survey_deepio.tex
Normal file
@@ -0,0 +1,54 @@
|
||||
|
||||
% ---- Add to your LaTeX preamble ----
|
||||
% \usepackage{booktabs}
|
||||
% \usepackage{array}
|
||||
% ------------------------------------
|
||||
\begin{table}[p]
|
||||
\centering
|
||||
\caption{Computational Environment (Hardware \& Software)} \label{tab:system_setup}
|
||||
\begin{tabular}{p{0.34\linewidth} p{0.62\linewidth}}
|
||||
\toprule
|
||||
\textbf{Item} & \textbf{Details} \\
|
||||
\midrule
|
||||
\multicolumn{2}{l}{\textbf{System}} \\
|
||||
Operating System & \ttfamily Ubuntu 22.04.5 LTS \\
|
||||
Kernel & \ttfamily 6.5.0-44-generic \\
|
||||
Architecture & \ttfamily x86\_64 \\
|
||||
CPU Model & \ttfamily AMD Ryzen Threadripper 3970X 32-Core Processor \\
|
||||
CPU Cores (physical) & \ttfamily 32 × 1 \\
|
||||
CPU Threads (logical) & \ttfamily 64 \\
|
||||
CPU Base Frequency & \ttfamily 2200 MHz \\
|
||||
CPU Max Frequency & \ttfamily 3700 MHz \\
|
||||
Total RAM & \ttfamily 94.14 GiB \\
|
||||
\addlinespace
|
||||
\multicolumn{2}{l}{\textbf{GPU (Selected Newer Device)}} \\
|
||||
Selected GPU Name & \ttfamily NVIDIA GeForce RTX 4090 \\
|
||||
Selected GPU Memory & \ttfamily 23.99 GiB \\
|
||||
Selected GPU Compute Capability & \ttfamily 8.9 \\
|
||||
NVIDIA Driver Version & \ttfamily 535.161.07 \\
|
||||
CUDA (Driver) Version & \ttfamily 12.2 \\
|
||||
\addlinespace
|
||||
\multicolumn{2}{l}{\textbf{Software Environment}} \\
|
||||
Python & \ttfamily 3.11.13 \\
|
||||
PyTorch & \ttfamily 2.7.1+cu126 \\
|
||||
PyTorch Built CUDA & \ttfamily 12.6 \\
|
||||
cuDNN (PyTorch build) & \ttfamily 90501 \\
|
||||
scikit-learn & \ttfamily 1.7.0 \\
|
||||
NumPy & \ttfamily 2.3.0 \\
|
||||
SciPy & \ttfamily 1.15.3 \\
|
||||
NumPy Build Config & \begin{minipage}[t]{\linewidth}\ttfamily\small "blas": \{
|
||||
"name": "scipy-openblas",
|
||||
"include directory": "/opt/\_internal/cpython-3.11.12/lib/python3.11/site-packages/scipy\_openblas64/include",
|
||||
"lib directory": "/opt/\_internal/cpython-3.11.12/lib/python3.11/site-packages/scipy\_openblas64/lib",
|
||||
"openblas configuration": "OpenBLAS 0.3.29 USE64BITINT DYNAMIC\_ARCH NO\_AFFINITY Haswell MAX\_THREADS=64",
|
||||
"pc file directory": "/project/.openblas"
|
||||
"lapack": \{
|
||||
"name": "scipy-openblas",
|
||||
"include directory": "/opt/\_internal/cpython-3.11.12/lib/python3.11/site-packages/scipy\_openblas64/include",
|
||||
"lib directory": "/opt/\_internal/cpython-3.11.12/lib/python3.11/site-packages/scipy\_openblas64/lib",
|
||||
"openblas configuration": "OpenBLAS 0.3.29 USE64BITINT DYNAMIC\_ARCH NO\_AFFINITY Haswell MAX\_THREADS=64",
|
||||
"pc file directory": "/project/.openblas"\end{minipage} \\
|
||||
\addlinespace
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\end{table}
|
||||
501
Deep-SAD-PyTorch/hardware_survey/main.py
Normal file
@@ -0,0 +1,501 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Generate a LaTeX longtable with system + software info for a thesis (Linux + NVIDIA).
|
||||
|
||||
Requirements (preflight will check and error if missing):
|
||||
- Linux OS
|
||||
- lscpu (util-linux)
|
||||
- Python packages: nvidia-ml-py3 (pynvml), torch, numpy, scipy, scikit-learn
|
||||
- NVIDIA driver present and at least one GPU visible via NVML
|
||||
|
||||
What it reports (per user’s list):
|
||||
System:
|
||||
- OS name + version + distribution (Linux) + kernel version + system arch
|
||||
- CPU model name, number of cores and threads, base frequencies (best-effort via lscpu)
|
||||
- Total RAM capacity
|
||||
- GPU(s): model name (only the newer one; prefer a name matching “4090”, else highest compute capability),
|
||||
memory size, driver version, CUDA (driver) version, cuDNN version (if used via PyTorch)
|
||||
|
||||
Software environment:
|
||||
- Python version
|
||||
- PyTorch version + built CUDA/cuDNN version
|
||||
- scikit-learn version
|
||||
- NumPy / SciPy version (+ NumPy build config summary: MKL/OpenBLAS/etc.)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
# -------------------- Helper --------------------
|
||||
|
||||
|
||||
def _import_nvml():
|
||||
"""
|
||||
Try to import NVML from the supported packages:
|
||||
- 'nvidia-ml-py' (preferred, maintained): provides module 'pynvml'
|
||||
- legacy 'pynvml' (deprecated but still widely installed)
|
||||
Returns the imported module object (with nvml... symbols).
|
||||
"""
|
||||
try:
|
||||
import pynvml as _nvml # provided by 'nvidia-ml-py' or old 'pynvml'
|
||||
|
||||
return _nvml
|
||||
except Exception as e:
|
||||
raise ImportError(
|
||||
"NVML not importable. Please install the maintained package:\n"
|
||||
" pip install nvidia-ml-py\n"
|
||||
"(and uninstall deprecated ones: pip uninstall nvidia-ml-py3 pynvml)"
|
||||
) from e
|
||||
|
||||
|
||||
def _to_text(x) -> str:
|
||||
"""Return a clean str whether NVML gives bytes or str."""
|
||||
if isinstance(x, bytes):
|
||||
try:
|
||||
return x.decode(errors="ignore")
|
||||
except Exception:
|
||||
return x.decode("utf-8", "ignore")
|
||||
return str(x)
|
||||
|
||||
|
||||
# -------------------- Utilities --------------------
|
||||
|
||||
|
||||
def which(cmd: str) -> str:
|
||||
return shutil.which(cmd) or ""
|
||||
|
||||
|
||||
def run(cmd: List[str], timeout: int = 6) -> str:
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=timeout)
|
||||
return out.decode(errors="ignore").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def human_bytes(nbytes: int) -> str:
|
||||
try:
|
||||
n = float(nbytes)
|
||||
except Exception:
|
||||
return ""
|
||||
units = ["B", "KiB", "MiB", "GiB", "TiB"]
|
||||
i = 0
|
||||
while n >= 1024 and i < len(units) - 1:
|
||||
n /= 1024.0
|
||||
i += 1
|
||||
return f"{n:.2f} {units[i]}"
|
||||
|
||||
|
||||
LATEX_SPECIALS = {
|
||||
"\\": r"\textbackslash{}",
|
||||
"&": r"\&",
|
||||
"%": r"\%",
|
||||
"$": r"\$",
|
||||
"#": r"\#",
|
||||
"_": r"\_",
|
||||
"{": r"\{",
|
||||
"}": r"\}",
|
||||
"~": r"\textasciitilde{}",
|
||||
"^": r"\textasciicircum{}",
|
||||
}
|
||||
|
||||
|
||||
def tex_escape(s: str) -> str:
|
||||
if s is None:
|
||||
return ""
|
||||
return "".join(LATEX_SPECIALS.get(ch, ch) for ch in str(s))
|
||||
|
||||
|
||||
def latex_table(sections: List[Tuple[str, Dict[str, str]]], caption: str) -> str:
|
||||
lines = []
|
||||
lines.append(r"\begin{table}[p]") # float; use [p] or [tbp] as you prefer
|
||||
lines.append(r"\centering")
|
||||
lines.append(r"\caption{" + tex_escape(caption) + r"} \label{tab:system_setup}")
|
||||
lines.append(r"\begin{tabular}{p{0.34\linewidth} p{0.62\linewidth}}")
|
||||
lines.append(r"\toprule")
|
||||
lines.append(r"\textbf{Item} & \textbf{Details} \\")
|
||||
lines.append(r"\midrule")
|
||||
|
||||
for title, kv in sections:
|
||||
if not kv:
|
||||
continue
|
||||
lines.append(r"\multicolumn{2}{l}{\textbf{" + tex_escape(title) + r"}} \\")
|
||||
for k, v in kv.items():
|
||||
val = tex_escape(v)
|
||||
if "\n" in v or len(v) > 120:
|
||||
val = (
|
||||
r"\begin{minipage}[t]{\linewidth}\ttfamily\small "
|
||||
+ tex_escape(v)
|
||||
+ r"\end{minipage}"
|
||||
)
|
||||
else:
|
||||
val = r"\ttfamily " + val
|
||||
lines.append(tex_escape(k) + " & " + val + r" \\")
|
||||
lines.append(r"\addlinespace")
|
||||
|
||||
lines.append(r"\bottomrule")
|
||||
lines.append(r"\end{tabular}")
|
||||
lines.append(r"\end{table}")
|
||||
|
||||
preamble_hint = r"""
|
||||
% ---- Add to your LaTeX preamble ----
|
||||
% \usepackage{booktabs}
|
||||
% \usepackage{array}
|
||||
% ------------------------------------
|
||||
"""
|
||||
return preamble_hint + "\n".join(lines)
|
||||
|
||||
|
||||
def latex_longtable(sections: List[Tuple[str, Dict[str, str]]], caption: str) -> str:
|
||||
lines = []
|
||||
lines.append(r"\begin{longtable}{p{0.34\linewidth} p{0.62\linewidth}}")
|
||||
lines.append(r"\caption{" + tex_escape(caption) + r"} \label{tab:system_setup}\\")
|
||||
lines.append(r"\toprule")
|
||||
lines.append(r"\textbf{Item} & \textbf{Details} \\")
|
||||
lines.append(r"\midrule")
|
||||
lines.append(r"\endfirsthead")
|
||||
lines.append(r"\toprule \textbf{Item} & \textbf{Details} \\ \midrule")
|
||||
lines.append(r"\endhead")
|
||||
lines.append(r"\bottomrule")
|
||||
lines.append(r"\endfoot")
|
||||
lines.append(r"\bottomrule")
|
||||
lines.append(r"\endlastfoot")
|
||||
|
||||
for title, kv in sections:
|
||||
if not kv:
|
||||
continue
|
||||
lines.append(r"\multicolumn{2}{l}{\textbf{" + tex_escape(title) + r"}} \\")
|
||||
for k, v in kv.items():
|
||||
val = tex_escape(v)
|
||||
if "\n" in v or len(v) > 120:
|
||||
val = (
|
||||
r"\begin{minipage}[t]{\linewidth}\ttfamily\small "
|
||||
+ tex_escape(v)
|
||||
+ r"\end{minipage}"
|
||||
)
|
||||
else:
|
||||
val = r"\ttfamily " + val
|
||||
lines.append(tex_escape(k) + " & " + val + r" \\")
|
||||
lines.append(r"\addlinespace")
|
||||
lines.append(r"\end{longtable}")
|
||||
|
||||
preamble_hint = r"""
|
||||
% ---- Add to your LaTeX preamble ----
|
||||
% \usepackage{booktabs}
|
||||
% \usepackage{longtable}
|
||||
% \usepackage{array}
|
||||
% ------------------------------------
|
||||
"""
|
||||
return preamble_hint + "\n".join(lines)
|
||||
|
||||
|
||||
# -------------------- Preflight --------------------
|
||||
|
||||
REQUIRED_CMDS = ["lscpu"]
|
||||
REQUIRED_MODULES = [
|
||||
"torch",
|
||||
"numpy",
|
||||
"scipy",
|
||||
"sklearn",
|
||||
"pynvml",
|
||||
] # provided by nvidia-ml-py
|
||||
|
||||
|
||||
def preflight() -> List[str]:
|
||||
errors = []
|
||||
if platform.system().lower() != "linux":
|
||||
errors.append(
|
||||
f"This script supports Linux only (detected: {platform.system()})."
|
||||
)
|
||||
|
||||
for c in ["lscpu"]:
|
||||
if not which(c):
|
||||
errors.append(f"Missing required command: {c}")
|
||||
|
||||
for m in REQUIRED_MODULES:
|
||||
try:
|
||||
__import__(m)
|
||||
except Exception:
|
||||
errors.append(f"Missing required Python package: {m}")
|
||||
|
||||
# NVML driver availability
|
||||
if "pynvml" not in errors:
|
||||
try:
|
||||
pynvml = _import_nvml()
|
||||
pynvml.nvmlInit()
|
||||
count = pynvml.nvmlDeviceGetCount()
|
||||
if count < 1:
|
||||
errors.append("No NVIDIA GPUs detected by NVML.")
|
||||
pynvml.nvmlShutdown()
|
||||
except Exception as e:
|
||||
errors.append(f"NVIDIA NVML not available / driver not loaded: {e}")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
# -------------------- Collectors --------------------
|
||||
|
||||
|
||||
def collect_system() -> Dict[str, str]:
|
||||
info: Dict[str, str] = {}
|
||||
|
||||
# OS / distro / kernel / arch
|
||||
os_pretty = ""
|
||||
try:
|
||||
with open("/etc/os-release", "r") as f:
|
||||
txt = f.read()
|
||||
m = re.search(r'^PRETTY_NAME="?(.*?)"?$', txt, flags=re.M)
|
||||
if m:
|
||||
os_pretty = m.group(1)
|
||||
except Exception:
|
||||
pass
|
||||
info["Operating System"] = os_pretty or f"{platform.system()} {platform.release()}"
|
||||
info["Kernel"] = platform.release()
|
||||
info["Architecture"] = platform.machine()
|
||||
|
||||
# CPU (via lscpu)
|
||||
lscpu = run(["lscpu"])
|
||||
|
||||
def kvs(text: str) -> Dict[str, str]:
|
||||
out = {}
|
||||
for line in text.splitlines():
|
||||
if ":" in line:
|
||||
k, v = line.split(":", 1)
|
||||
out[k.strip()] = v.strip()
|
||||
return out
|
||||
|
||||
d = kvs(lscpu)
|
||||
info["CPU Model"] = d.get("Model name", d.get("Model Name", ""))
|
||||
|
||||
# cores / threads
|
||||
sockets = d.get("Socket(s)", "")
|
||||
cores_per_socket = d.get("Core(s) per socket", "")
|
||||
threads_total = d.get("CPU(s)", "")
|
||||
if sockets and cores_per_socket:
|
||||
info["CPU Cores (physical)"] = f"{cores_per_socket} × {sockets}"
|
||||
else:
|
||||
info["CPU Cores (physical)"] = cores_per_socket or ""
|
||||
info["CPU Threads (logical)"] = threads_total or str(os.cpu_count() or "")
|
||||
|
||||
# base / max freq
|
||||
# Prefer "CPU max MHz" and "CPU min MHz"; lscpu sometimes exposes "CPU MHz" (current)
|
||||
base = d.get("CPU min MHz", "")
|
||||
maxf = d.get("CPU max MHz", "")
|
||||
if base:
|
||||
info["CPU Base Frequency"] = f"{float(base):.0f} MHz"
|
||||
elif "@" in info["CPU Model"]:
|
||||
# fallback: parse from model string like "Intel(R) ... @ 2.30GHz"
|
||||
m = re.search(r"@\s*([\d.]+)\s*([GM]Hz)", info["CPU Model"])
|
||||
if m:
|
||||
info["CPU Base Frequency"] = f"{m.group(1)} {m.group(2)}"
|
||||
else:
|
||||
cur = d.get("CPU MHz", "")
|
||||
if cur:
|
||||
info["CPU (Current) Frequency"] = f"{float(cur):.0f} MHz"
|
||||
if maxf:
|
||||
info["CPU Max Frequency"] = f"{float(maxf):.0f} MHz"
|
||||
|
||||
# RAM total (/proc/meminfo)
|
||||
try:
|
||||
meminfo = open("/proc/meminfo").read()
|
||||
m = re.search(r"^MemTotal:\s+(\d+)\s+kB", meminfo, flags=re.M)
|
||||
if m:
|
||||
total_bytes = int(m.group(1)) * 1024
|
||||
info["Total RAM"] = human_bytes(total_bytes)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def collect_gpu() -> Dict[str, str]:
|
||||
"""
|
||||
Use NVML to enumerate GPUs and select the 'newer' one:
|
||||
1) Prefer a device whose name matches /4090/i
|
||||
2) Else highest CUDA compute capability (major, minor), tiebreaker by total memory
|
||||
Also reports driver version and CUDA driver version.
|
||||
"""
|
||||
pynvml = _import_nvml()
|
||||
pynvml.nvmlInit()
|
||||
try:
|
||||
count = pynvml.nvmlDeviceGetCount()
|
||||
if count < 1:
|
||||
return {"Error": "No NVIDIA GPUs detected by NVML."}
|
||||
|
||||
devices = []
|
||||
for i in range(count):
|
||||
h = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
|
||||
# name can be bytes or str depending on wheel; normalize
|
||||
raw_name = pynvml.nvmlDeviceGetName(h)
|
||||
name = _to_text(raw_name)
|
||||
|
||||
mem_info = pynvml.nvmlDeviceGetMemoryInfo(h)
|
||||
total_mem = getattr(mem_info, "total", 0)
|
||||
|
||||
# compute capability may not exist on very old drivers
|
||||
try:
|
||||
maj, minr = pynvml.nvmlDeviceGetCudaComputeCapability(h)
|
||||
except Exception:
|
||||
maj, minr = (0, 0)
|
||||
|
||||
devices.append(
|
||||
{
|
||||
"index": i,
|
||||
"handle": h,
|
||||
"name": name,
|
||||
"mem": total_mem,
|
||||
"cc": (maj, minr),
|
||||
}
|
||||
)
|
||||
|
||||
# Prefer explicit "4090"
|
||||
pick = next(
|
||||
(d for d in devices if re.search(r"4090", d["name"], flags=re.I)), None
|
||||
)
|
||||
if pick is None:
|
||||
# Highest compute capability, then largest memory
|
||||
devices.sort(key=lambda x: (x["cc"][0], x["cc"][1], x["mem"]), reverse=True)
|
||||
pick = devices[0]
|
||||
|
||||
# Driver version and CUDA driver version can be bytes or str
|
||||
drv_raw = pynvml.nvmlSystemGetDriverVersion()
|
||||
drv = _to_text(drv_raw)
|
||||
|
||||
# CUDA driver version (integer like 12040 -> 12.4)
|
||||
cuda_drv_ver = ""
|
||||
try:
|
||||
v = pynvml.nvmlSystemGetCudaDriverVersion_v2()
|
||||
except Exception:
|
||||
v = pynvml.nvmlSystemGetCudaDriverVersion()
|
||||
try:
|
||||
major = v // 1000
|
||||
minor = (v % 1000) // 10
|
||||
patch = v % 10
|
||||
cuda_drv_ver = f"{major}.{minor}.{patch}" if patch else f"{major}.{minor}"
|
||||
except Exception:
|
||||
cuda_drv_ver = ""
|
||||
|
||||
gpu_info = {
|
||||
"Selected GPU Name": pick["name"],
|
||||
"Selected GPU Memory": human_bytes(pick["mem"]),
|
||||
"Selected GPU Compute Capability": f"{pick['cc'][0]}.{pick['cc'][1]}",
|
||||
"NVIDIA Driver Version": drv,
|
||||
"CUDA (Driver) Version": cuda_drv_ver,
|
||||
}
|
||||
return gpu_info
|
||||
finally:
|
||||
pynvml.nvmlShutdown()
|
||||
|
||||
|
||||
def summarize_numpy_build_config() -> str:
|
||||
"""
|
||||
Capture numpy.__config__.show() and try to extract the BLAS/LAPACK backend line(s).
|
||||
"""
|
||||
import numpy as np
|
||||
from io import StringIO
|
||||
import sys as _sys
|
||||
|
||||
buf = StringIO()
|
||||
_stdout = _sys.stdout
|
||||
try:
|
||||
_sys.stdout = buf
|
||||
np.__config__.show()
|
||||
finally:
|
||||
_sys.stdout = _stdout
|
||||
txt = buf.getvalue()
|
||||
|
||||
# Heuristic: capture lines mentioning MKL, OpenBLAS, BLIS, LAPACK
|
||||
lines = [
|
||||
l
|
||||
for l in txt.splitlines()
|
||||
if re.search(r"(MKL|OpenBLAS|BLAS|LAPACK|BLIS)", l, re.I)
|
||||
]
|
||||
if not lines:
|
||||
# fall back to first ~12 lines
|
||||
lines = txt.splitlines()[:12]
|
||||
# Keep it compact
|
||||
return "\n".join(lines[:20]).strip()
|
||||
|
||||
|
||||
def collect_software() -> Dict[str, str]:
|
||||
info: Dict[str, str] = {}
|
||||
import sys as _sys
|
||||
import torch
|
||||
import numpy as _np
|
||||
import scipy as _sp
|
||||
import sklearn as _sk
|
||||
|
||||
info["Python"] = _sys.version.split()[0]
|
||||
|
||||
# PyTorch + built CUDA/cuDNN + visible GPUs
|
||||
info["PyTorch"] = torch.__version__
|
||||
info["PyTorch Built CUDA"] = getattr(torch.version, "cuda", "") or ""
|
||||
try:
|
||||
cudnn_build = torch.backends.cudnn.version() # integer
|
||||
info["cuDNN (PyTorch build)"] = str(cudnn_build) if cudnn_build else ""
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# scikit-learn
|
||||
info["scikit-learn"] = _sk.__version__
|
||||
|
||||
# NumPy / SciPy + build config
|
||||
info["NumPy"] = _np.__version__
|
||||
info["SciPy"] = _sp.__version__
|
||||
info["NumPy Build Config"] = summarize_numpy_build_config()
|
||||
|
||||
return info
|
||||
|
||||
|
||||
# -------------------- Main --------------------
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(
|
||||
description="Generate LaTeX table of system/software environment for thesis (Linux + NVIDIA)."
|
||||
)
|
||||
ap.add_argument(
|
||||
"--output", "-o", type=str, help="Write LaTeX to this file instead of stdout."
|
||||
)
|
||||
ap.add_argument(
|
||||
"--caption", type=str, default="Computational Environment (Hardware & Software)"
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
errs = preflight()
|
||||
if errs:
|
||||
msg = (
|
||||
"Preflight check failed:\n- "
|
||||
+ "\n- ".join(errs)
|
||||
+ "\n"
|
||||
+ "Please install missing components and re-run."
|
||||
)
|
||||
print(msg, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
sections: List[Tuple[str, Dict[str, str]]] = []
|
||||
sections.append(("System", collect_system()))
|
||||
sections.append(("GPU (Selected Newer Device)", collect_gpu()))
|
||||
sections.append(("Software Environment", collect_software()))
|
||||
|
||||
latex = latex_table(sections, caption=args.caption)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(latex)
|
||||
print(f"Wrote LaTeX to: {args.output}")
|
||||
else:
|
||||
print(latex)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -23,6 +23,8 @@ dependencies = [
|
||||
"scipy>=1.16.0",
|
||||
"seaborn>=0.13.2",
|
||||
"six>=1.17.0",
|
||||
"tabulate>=0.9.0",
|
||||
"thop>=0.1.1.post2209072238",
|
||||
"torch-receptive-field",
|
||||
"torchscan>=0.1.1",
|
||||
"visualtorch>=0.2.4",
|
||||
|
||||
@@ -261,6 +261,80 @@ class IsoForest(object):
|
||||
logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
|
||||
logger.info("Finished testing.")
|
||||
|
||||
def inference(
|
||||
self,
|
||||
dataset: BaseADDataset,
|
||||
device: str = "cpu",
|
||||
n_jobs_dataloader: int = 0,
|
||||
batch_size: int = 32,
|
||||
):
|
||||
"""Perform inference on the dataset using the trained Isolation Forest model."""
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Get inference data loader
|
||||
_, _, inference_loader = dataset.loaders(
|
||||
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Get data from loader
|
||||
X = ()
|
||||
idxs = []
|
||||
file_ids = []
|
||||
frame_ids = []
|
||||
|
||||
logger.info("Starting inference...")
|
||||
start_time = time.time()
|
||||
|
||||
for data in inference_loader:
|
||||
inputs, idx, (file_id, frame_id) = data
|
||||
inputs = inputs.to(device)
|
||||
|
||||
if self.hybrid:
|
||||
inputs = self.ae_net.encoder(inputs)
|
||||
X_batch = inputs.view(inputs.size(0), -1)
|
||||
X += (X_batch.cpu().data.numpy(),)
|
||||
|
||||
# Store indices and metadata
|
||||
idxs.extend(idx.cpu().data.numpy().tolist())
|
||||
file_ids.extend(file_id.cpu().data.numpy().tolist())
|
||||
frame_ids.extend(frame_id.cpu().data.numpy().tolist())
|
||||
|
||||
X = np.concatenate(X)
|
||||
|
||||
# Get anomaly scores
|
||||
scores = (-1.0) * self.model.decision_function(X)
|
||||
scores = scores.flatten()
|
||||
|
||||
# Store inference results
|
||||
self.inference_time = time.time() - start_time
|
||||
self.inference_indices = np.array(idxs)
|
||||
self.inference_file_ids = np.array(file_ids)
|
||||
self.inference_frame_ids = np.array(frame_ids)
|
||||
|
||||
# Create index mapping similar to DeepSAD trainer
|
||||
self.inference_index_mapping = {
|
||||
"indices": self.inference_indices,
|
||||
"file_ids": self.inference_file_ids,
|
||||
"frame_ids": self.inference_frame_ids,
|
||||
}
|
||||
|
||||
# Log inference statistics
|
||||
logger.info(f"Number of inference samples: {len(self.inference_indices)}")
|
||||
logger.info(
|
||||
f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
|
||||
)
|
||||
logger.info("Inference Time: {:.3f}s".format(self.inference_time))
|
||||
logger.info(
|
||||
"Score statistics: "
|
||||
f"min={scores.min():.3f}, "
|
||||
f"max={scores.max():.3f}, "
|
||||
f"mean={scores.mean():.3f}, "
|
||||
f"std={scores.std():.3f}"
|
||||
)
|
||||
logger.info("Finished inference.")
|
||||
|
||||
return scores
|
||||
|
||||
def load_ae(self, dataset_name, model_path):
|
||||
"""Load pretrained autoencoder from model_path for feature extraction in a hybrid Isolation Forest model."""
|
||||
|
||||
|
||||
@@ -453,6 +453,80 @@ class OCSVM(object):
|
||||
logger.info("Test Time: {:.3f}s".format(self.results["test_time"]))
|
||||
logger.info("Finished testing.")
|
||||
|
||||
def inference(
|
||||
self,
|
||||
dataset: BaseADDataset,
|
||||
device: str = "cpu",
|
||||
n_jobs_dataloader: int = 0,
|
||||
batch_size: int = 32,
|
||||
):
|
||||
"""Perform inference on the dataset using the trained OC-SVM model."""
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Get inference data loader
|
||||
_, _, inference_loader = dataset.loaders(
|
||||
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Get data from loader
|
||||
X = ()
|
||||
idxs = []
|
||||
file_ids = []
|
||||
frame_ids = []
|
||||
|
||||
logger.info("Starting inference...")
|
||||
start_time = time.time()
|
||||
|
||||
for data in inference_loader:
|
||||
inputs, idx, (file_id, frame_id) = data
|
||||
inputs = inputs.to(device)
|
||||
|
||||
if self.hybrid:
|
||||
inputs = self.ae_net.encoder(inputs)
|
||||
X_batch = inputs.view(inputs.size(0), -1)
|
||||
X += (X_batch.cpu().data.numpy(),)
|
||||
|
||||
# Store indices and metadata
|
||||
idxs.extend(idx.cpu().data.numpy().tolist())
|
||||
file_ids.extend(file_id.cpu().data.numpy().tolist())
|
||||
frame_ids.extend(frame_id.cpu().data.numpy().tolist())
|
||||
|
||||
X = np.concatenate(X)
|
||||
|
||||
# Get anomaly scores
|
||||
scores = (-1.0) * self.model.decision_function(X)
|
||||
scores = scores.flatten()
|
||||
|
||||
# Store inference results
|
||||
self.inference_time = time.time() - start_time
|
||||
self.inference_indices = np.array(idxs)
|
||||
self.inference_file_ids = np.array(file_ids)
|
||||
self.inference_frame_ids = np.array(frame_ids)
|
||||
|
||||
# Create index mapping similar to DeepSAD trainer
|
||||
self.inference_index_mapping = {
|
||||
"indices": self.inference_indices,
|
||||
"file_ids": self.inference_file_ids,
|
||||
"frame_ids": self.inference_frame_ids,
|
||||
}
|
||||
|
||||
# Log inference statistics
|
||||
logger.info(f"Number of inference samples: {len(self.inference_indices)}")
|
||||
logger.info(
|
||||
f"Number of unique files: {len(np.unique(self.inference_file_ids))}"
|
||||
)
|
||||
logger.info("Inference Time: {:.3f}s".format(self.inference_time))
|
||||
logger.info(
|
||||
"Score statistics: "
|
||||
f"min={scores.min():.3f}, "
|
||||
f"max={scores.max():.3f}, "
|
||||
f"mean={scores.mean():.3f}, "
|
||||
f"std={scores.std():.3f}"
|
||||
)
|
||||
logger.info("Finished inference.")
|
||||
|
||||
return scores
|
||||
|
||||
def load_ae(self, model_path, net_name, device="cpu"):
|
||||
"""Load pretrained autoencoder from model_path for feature extraction in a hybrid OC-SVM model."""
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ def load_dataset(
|
||||
k_fold_num: int = None,
|
||||
num_known_normal: int = 0,
|
||||
num_known_outlier: int = 0,
|
||||
split: float = 0.7,
|
||||
):
|
||||
"""Loads the dataset."""
|
||||
|
||||
@@ -49,6 +50,7 @@ def load_dataset(
|
||||
k_fold_num=k_fold_num,
|
||||
num_known_normal=num_known_normal,
|
||||
num_known_outlier=num_known_outlier,
|
||||
split=split,
|
||||
)
|
||||
|
||||
if dataset_name == "subtersplit":
|
||||
|
||||
@@ -338,6 +338,8 @@ class SubTerInference(VisionDataset):
|
||||
self.frame_ids = np.arange(self.data.shape[0], dtype=np.int32)
|
||||
self.file_names = {0: experiment_file.name}
|
||||
|
||||
self.transform = transform if transform else transforms.ToTensor()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
|
||||
@@ -152,6 +152,12 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
||||
default=0.001,
|
||||
help="Initial learning rate for Deep SAD network training. Default=0.001",
|
||||
)
|
||||
@click.option(
|
||||
"--train_test_split",
|
||||
type=float,
|
||||
default=0.7,
|
||||
help="Ratio of training data in the train-test split (default: 0.7).",
|
||||
)
|
||||
@click.option("--n_epochs", type=int, default=50, help="Number of epochs to train.")
|
||||
@click.option(
|
||||
"--lr_milestone",
|
||||
@@ -307,6 +313,7 @@ def main(
|
||||
seed,
|
||||
optimizer_name,
|
||||
lr,
|
||||
train_test_split,
|
||||
n_epochs,
|
||||
lr_milestone,
|
||||
batch_size,
|
||||
@@ -416,6 +423,7 @@ def main(
|
||||
k_fold_num=k_fold_num,
|
||||
num_known_normal=num_known_normal,
|
||||
num_known_outlier=num_known_outlier,
|
||||
split=train_test_split,
|
||||
)
|
||||
# Log random sample of known anomaly classes if more than 1 class
|
||||
if n_known_outlier_classes > 1:
|
||||
@@ -630,57 +638,185 @@ def main(
|
||||
cfg.save_config(export_json=xp_path + "/config.json")
|
||||
|
||||
elif action == "infer":
|
||||
# Inference uses a deterministic, non-shuffled loader to preserve temporal order
|
||||
dataset = load_dataset(
|
||||
dataset_name,
|
||||
cfg.settings["dataset_name"],
|
||||
data_path,
|
||||
normal_class,
|
||||
known_outlier_class,
|
||||
n_known_outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
cfg.settings["normal_class"],
|
||||
cfg.settings["known_outlier_class"],
|
||||
cfg.settings["n_known_outlier_classes"],
|
||||
cfg.settings["ratio_known_normal"],
|
||||
cfg.settings["ratio_known_outlier"],
|
||||
cfg.settings["ratio_pollution"],
|
||||
random_state=np.random.RandomState(cfg.settings["seed"]),
|
||||
k_fold_num=False,
|
||||
inference=True,
|
||||
)
|
||||
|
||||
# Log random sample of known anomaly classes if more than 1 class
|
||||
if n_known_outlier_classes > 1:
|
||||
logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,))
|
||||
|
||||
# Initialize DeepSAD model and set neural network phi
|
||||
deepSAD = DeepSAD(latent_space_dim, cfg.settings["eta"])
|
||||
deepSAD.set_network(net_name)
|
||||
|
||||
# If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
|
||||
if not load_model:
|
||||
# --- Expect a model DIRECTORY (aligned with 'retest') ---
|
||||
if (
|
||||
(not load_model)
|
||||
or (not Path(load_model).exists())
|
||||
or (not Path(load_model).is_dir())
|
||||
):
|
||||
logger.error(
|
||||
"For inference mode a model has to be loaded! Pass the --load_model option with the model path!"
|
||||
"For inference mode a model directory has to be loaded! "
|
||||
"Pass the --load_model option with the model directory path!"
|
||||
)
|
||||
return
|
||||
load_model = Path(load_model)
|
||||
|
||||
# Resolve expected model artifacts (single-model / no k-fold suffixes)
|
||||
deepsad_model_path = load_model / "model_deepsad.tar"
|
||||
ae_model_path = load_model / "model_ae.tar"
|
||||
ocsvm_model_path = load_model / "model_ocsvm.pkl"
|
||||
isoforest_model_path = load_model / "model_isoforest.pkl"
|
||||
|
||||
# Sanity check model files exist
|
||||
model_paths = [
|
||||
deepsad_model_path,
|
||||
ae_model_path,
|
||||
ocsvm_model_path,
|
||||
isoforest_model_path,
|
||||
]
|
||||
missing = [p.name for p in model_paths if not p.exists() or not p.is_file()]
|
||||
if missing:
|
||||
logger.error(
|
||||
"The following model files do not exist in the provided model directory: "
|
||||
+ ", ".join(missing)
|
||||
)
|
||||
return
|
||||
|
||||
deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
|
||||
logger.info("Loading model from %s." % load_model)
|
||||
# Prepare output paths
|
||||
inf_dir = Path(xp_path) / "inference"
|
||||
inf_dir.mkdir(parents=True, exist_ok=True)
|
||||
base_stem = Path(Path(dataset.root).stem) # keep your previous naming
|
||||
# DeepSAD outputs (keep legacy filenames for backward compatibility)
|
||||
deepsad_scores_path = inf_dir / Path(
|
||||
base_stem.stem + "_deepsad_scores"
|
||||
).with_suffix(".npy")
|
||||
deepsad_outputs_path = inf_dir / Path(base_stem.stem + "_outputs").with_suffix(
|
||||
".npy"
|
||||
)
|
||||
# Baselines
|
||||
ocsvm_scores_path = inf_dir / Path(
|
||||
base_stem.stem + "_ocsvm_scores"
|
||||
).with_suffix(".npy")
|
||||
isoforest_scores_path = inf_dir / Path(
|
||||
base_stem.stem + "_isoforest_scores"
|
||||
).with_suffix(".npy")
|
||||
|
||||
inference_results, all_outputs = deepSAD.inference(
|
||||
dataset, device=device, n_jobs_dataloader=n_jobs_dataloader
|
||||
)
|
||||
inference_results_path = (
|
||||
Path(xp_path)
|
||||
/ "inference"
|
||||
/ Path(Path(dataset.root).stem).with_suffix(".npy")
|
||||
)
|
||||
inference_outputs_path = (
|
||||
Path(xp_path)
|
||||
/ "inference"
|
||||
/ Path(Path(dataset.root).stem + "_outputs").with_suffix(".npy")
|
||||
# Common loader settings
|
||||
_n_jobs = (
|
||||
n_jobs_dataloader
|
||||
if "n_jobs_dataloader" in locals()
|
||||
else cfg.settings.get("n_jobs_dataloader", 0)
|
||||
)
|
||||
|
||||
inference_results_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
np.save(inference_results_path, inference_results, fix_imports=False)
|
||||
np.save(inference_outputs_path, all_outputs, fix_imports=False)
|
||||
# ----------------- DeepSAD -----------------
|
||||
|
||||
deepSAD = DeepSAD(cfg.settings["latent_space_dim"], cfg.settings["eta"])
|
||||
deepSAD.set_network(cfg.settings["net_name"])
|
||||
deepSAD.load_model(
|
||||
model_path=deepsad_model_path, load_ae=True, map_location=device
|
||||
)
|
||||
logger.info("Loaded DeepSAD model from %s.", deepsad_model_path)
|
||||
|
||||
deepsad_scores, deepsad_all_outputs = deepSAD.inference(
|
||||
dataset, device=device, n_jobs_dataloader=_n_jobs
|
||||
)
|
||||
|
||||
np.save(deepsad_scores_path, deepsad_scores)
|
||||
# np.save(deepsad_outputs_path, deepsad_all_outputs)
|
||||
|
||||
logger.info(
|
||||
f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
|
||||
"DeepSAD inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
|
||||
float(np.median(deepsad_scores)),
|
||||
float(np.mean(deepsad_scores)),
|
||||
float(np.min(deepsad_scores)),
|
||||
float(np.max(deepsad_scores)),
|
||||
)
|
||||
|
||||
# ----------------- OCSVM (hybrid) -----------------
|
||||
ocsvm_scores = None
|
||||
ocsvm = OCSVM(
|
||||
kernel=cfg.settings["ocsvm_kernel"],
|
||||
nu=cfg.settings["ocsvm_nu"],
|
||||
hybrid=True,
|
||||
latent_space_dim=cfg.settings["latent_space_dim"],
|
||||
)
|
||||
# load AE to build the feature extractor for hybrid OCSVM
|
||||
ocsvm.load_ae(
|
||||
net_name=cfg.settings["net_name"],
|
||||
model_path=ae_model_path,
|
||||
device=device,
|
||||
)
|
||||
ocsvm.load_model(import_path=ocsvm_model_path)
|
||||
|
||||
ocsvm_scores = ocsvm.inference(
|
||||
dataset, device=device, n_jobs_dataloader=_n_jobs, batch_size=32
|
||||
)
|
||||
|
||||
if ocsvm_scores is not None:
|
||||
np.save(ocsvm_scores_path, ocsvm_scores)
|
||||
logger.info(
|
||||
"OCSVM inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
|
||||
float(np.median(ocsvm_scores)),
|
||||
float(np.mean(ocsvm_scores)),
|
||||
float(np.min(ocsvm_scores)),
|
||||
float(np.max(ocsvm_scores)),
|
||||
)
|
||||
else:
|
||||
logger.warning("OCSVM scores could not be determined; no array saved.")
|
||||
|
||||
# ----------------- Isolation Forest -----------------
|
||||
isoforest_scores = None
|
||||
Isoforest = IsoForest(
|
||||
hybrid=False,
|
||||
n_estimators=cfg.settings["isoforest_n_estimators"],
|
||||
max_samples=cfg.settings["isoforest_max_samples"],
|
||||
contamination=cfg.settings["isoforest_contamination"],
|
||||
n_jobs=cfg.settings["isoforest_n_jobs_model"],
|
||||
seed=cfg.settings["seed"],
|
||||
)
|
||||
Isoforest.load_model(import_path=isoforest_model_path, device=device)
|
||||
isoforest_scores = Isoforest.inference(
|
||||
dataset, device=device, n_jobs_dataloader=_n_jobs
|
||||
)
|
||||
if isoforest_scores is not None:
|
||||
np.save(isoforest_scores_path, isoforest_scores)
|
||||
logger.info(
|
||||
"IsolationForest inference: median=%.6f mean=%.6f min=%.6f max=%.6f",
|
||||
float(np.median(isoforest_scores)),
|
||||
float(np.mean(isoforest_scores)),
|
||||
float(np.min(isoforest_scores)),
|
||||
float(np.max(isoforest_scores)),
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Isolation Forest scores could not be determined; no array saved."
|
||||
)
|
||||
|
||||
# Final summary (DeepSAD always runs; baselines are best-effort)
|
||||
logger.info(
|
||||
"Inference complete. Saved arrays to %s:\n"
|
||||
" DeepSAD scores: %s\n"
|
||||
" DeepSAD outputs: %s\n"
|
||||
" OCSVM scores: %s\n"
|
||||
" IsoForest scores: %s",
|
||||
inf_dir,
|
||||
deepsad_scores_path.name,
|
||||
deepsad_outputs_path.name,
|
||||
ocsvm_scores_path.name if ocsvm_scores is not None else "(not saved)",
|
||||
isoforest_scores_path.name
|
||||
if isoforest_scores is not None
|
||||
else "(not saved)",
|
||||
)
|
||||
|
||||
elif action == "ae_elbow_test":
|
||||
# Load data once
|
||||
dataset = load_dataset(
|
||||
@@ -694,6 +830,7 @@ def main(
|
||||
ratio_pollution,
|
||||
random_state=np.random.RandomState(cfg.settings["seed"]),
|
||||
k_fold_num=k_fold_num,
|
||||
split=train_test_split,
|
||||
)
|
||||
|
||||
# Set up k-fold passes
|
||||
@@ -804,12 +941,14 @@ def main(
|
||||
k_fold_num=cfg.settings["k_fold_num"],
|
||||
num_known_normal=cfg.settings["num_known_normal"],
|
||||
num_known_outlier=cfg.settings["num_known_outlier"],
|
||||
split=train_test_split,
|
||||
)
|
||||
|
||||
train_passes = (
|
||||
range(cfg.settings["k_fold_num"]) if cfg.settings["k_fold"] else [None]
|
||||
)
|
||||
|
||||
retest_autoencoder = False
|
||||
retest_isoforest = True
|
||||
retest_ocsvm = True
|
||||
retest_deepsad = True
|
||||
@@ -865,6 +1004,25 @@ def main(
|
||||
k_fold_idx=fold_idx,
|
||||
)
|
||||
|
||||
if retest_autoencoder:
|
||||
# Initialize DeepSAD model and set neural network phi
|
||||
deepSAD = DeepSAD(cfg.settings["latent_space_dim"], cfg.settings["eta"])
|
||||
deepSAD.set_network(cfg.settings["net_name"])
|
||||
deepSAD.load_model(
|
||||
model_path=ae_model_path, load_ae=True, map_location=device
|
||||
)
|
||||
logger.info("Loading model from %s." % load_model)
|
||||
# Save pretraining results
|
||||
if fold_idx is None:
|
||||
deepSAD.save_ae_results(
|
||||
export_pkl=load_model / "results_ae_retest.pkl"
|
||||
)
|
||||
else:
|
||||
deepSAD.save_ae_results(
|
||||
export_pkl=load_model / f"results_ae_retest_{fold_idx}.pkl"
|
||||
)
|
||||
del deepSAD
|
||||
|
||||
# Initialize DeepSAD model and set neural network phi
|
||||
if retest_deepsad:
|
||||
deepSAD = DeepSAD(cfg.settings["latent_space_dim"], cfg.settings["eta"])
|
||||
|
||||
101
Deep-SAD-PyTorch/src/network_statistics.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import torch
|
||||
from thop import profile
|
||||
|
||||
from networks.subter_LeNet import SubTer_LeNet, SubTer_LeNet_Autoencoder
|
||||
from networks.subter_LeNet_rf import SubTer_Efficient_AE, SubTer_EfficientEncoder
|
||||
|
||||
# Configuration
|
||||
LATENT_DIMS = [32, 64, 128, 256, 512, 768, 1024]
|
||||
BATCH_SIZE = 1
|
||||
INPUT_SHAPE = (BATCH_SIZE, 1, 32, 2048)
|
||||
|
||||
|
||||
def count_parameters(model, input_shape):
|
||||
"""Count MACs and parameters for a model."""
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
input_tensor = torch.randn(input_shape)
|
||||
macs, params = profile(model, inputs=(input_tensor,))
|
||||
return {"MACs": macs, "Parameters": params}
|
||||
|
||||
|
||||
def format_number(num: float) -> str:
|
||||
"""Format large numbers with K, M, B, T suffixes."""
|
||||
for unit in ["", "K", "M", "B", "T"]:
|
||||
if abs(num) < 1000.0 or unit == "T":
|
||||
return f"{num:3.2f}{unit}"
|
||||
num /= 1000.0
|
||||
|
||||
|
||||
def main():
|
||||
# Collect results per latent dimension
|
||||
results = {} # dim -> dict of 8 values
|
||||
for dim in LATENT_DIMS:
|
||||
# Instantiate models for this latent dim
|
||||
lenet_enc = SubTer_LeNet(rep_dim=dim)
|
||||
eff_enc = SubTer_EfficientEncoder(rep_dim=dim)
|
||||
lenet_ae = SubTer_LeNet_Autoencoder(rep_dim=dim)
|
||||
eff_ae = SubTer_Efficient_AE(rep_dim=dim)
|
||||
|
||||
# Profile each
|
||||
lenet_enc_stats = count_parameters(lenet_enc, INPUT_SHAPE)
|
||||
eff_enc_stats = count_parameters(eff_enc, INPUT_SHAPE)
|
||||
lenet_ae_stats = count_parameters(lenet_ae, INPUT_SHAPE)
|
||||
eff_ae_stats = count_parameters(eff_ae, INPUT_SHAPE)
|
||||
|
||||
results[dim] = {
|
||||
"lenet_enc_params": format_number(lenet_enc_stats["Parameters"]),
|
||||
"lenet_enc_macs": format_number(lenet_enc_stats["MACs"]),
|
||||
"eff_enc_params": format_number(eff_enc_stats["Parameters"]),
|
||||
"eff_enc_macs": format_number(eff_enc_stats["MACs"]),
|
||||
"lenet_ae_params": format_number(lenet_ae_stats["Parameters"]),
|
||||
"lenet_ae_macs": format_number(lenet_ae_stats["MACs"]),
|
||||
"eff_ae_params": format_number(eff_ae_stats["Parameters"]),
|
||||
"eff_ae_macs": format_number(eff_ae_stats["MACs"]),
|
||||
}
|
||||
|
||||
# Build LaTeX table with tabularx
|
||||
header = (
|
||||
"\\begin{table}[!ht]\n"
|
||||
"\\centering\n"
|
||||
"\\renewcommand{\\arraystretch}{1.15}\n"
|
||||
"\\begin{tabularx}{\\linewidth}{lXXXXXXXX}\n"
|
||||
"\\hline\n"
|
||||
" & \\multicolumn{4}{c}{\\textbf{Encoders}} & "
|
||||
"\\multicolumn{4}{c}{\\textbf{Autoencoders}} \\\\\n"
|
||||
"\\cline{2-9}\n"
|
||||
"\\textbf{Latent $z$} & "
|
||||
"\\textbf{LeNet Params} & \\textbf{LeNet MACs} & "
|
||||
"\\textbf{Eff. Params} & \\textbf{Eff. MACs} & "
|
||||
"\\textbf{LeNet Params} & \\textbf{LeNet MACs} & "
|
||||
"\\textbf{Eff. Params} & \\textbf{Eff. MACs} \\\\\n"
|
||||
"\\hline\n"
|
||||
)
|
||||
|
||||
rows = []
|
||||
for dim in LATENT_DIMS:
|
||||
r = results[dim]
|
||||
row = (
|
||||
f"{dim} & "
|
||||
f"{r['lenet_enc_params']} & {r['lenet_enc_macs']} & "
|
||||
f"{r['eff_enc_params']} & {r['eff_enc_macs']} & "
|
||||
f"{r['lenet_ae_params']} & {r['lenet_ae_macs']} & "
|
||||
f"{r['eff_ae_params']} & {r['eff_ae_macs']} \\\\"
|
||||
)
|
||||
rows.append(row)
|
||||
|
||||
footer = (
|
||||
"\\hline\n"
|
||||
"\\end{tabularx}\n"
|
||||
"\\caption{Parameter and MAC counts for SubTer variants across latent dimensionalities.}\n"
|
||||
"\\label{tab:subter_counts}\n"
|
||||
"\\end{table}\n"
|
||||
)
|
||||
|
||||
latex_table = header + "\n".join(rows) + "\n" + footer
|
||||
|
||||
print(latex_table)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
155
Deep-SAD-PyTorch/src/network_statistics.tex
Normal file
@@ -0,0 +1,155 @@
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_upsample() for <class 'torch.nn.modules.upsampling.Upsample'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_upsample() for <class 'torch.nn.modules.upsampling.Upsample'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_upsample() for <class 'torch.nn.modules.upsampling.Upsample'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_upsample() for <class 'torch.nn.modules.upsampling.Upsample'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_upsample() for <class 'torch.nn.modules.upsampling.Upsample'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_upsample() for <class 'torch.nn.modules.upsampling.Upsample'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.ConvTranspose2d'>.
|
||||
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
|
||||
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
|
||||
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
|
||||
[INFO] Register count_upsample() for <class 'torch.nn.modules.upsampling.Upsample'>.
|
||||
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
|
||||
\begin{table}[!ht]
|
||||
\centering
|
||||
\renewcommand{\arraystretch}{1.15}
|
||||
\begin{tabularx}{\linewidth}{lXXXXXXXX}
|
||||
\hline
|
||||
& \multicolumn{4}{c}{\textbf{Encoders}} & \multicolumn{4}{c}{\textbf{Autoencoders}} \\
|
||||
\cline{2-9}
|
||||
\textbf{Latent $z$} & \textbf{LeNet Params} & \textbf{LeNet MACs} & \textbf{Eff. Params} & \textbf{Eff. MACs} & \textbf{LeNet Params} & \textbf{LeNet MACs} & \textbf{Eff. Params} & \textbf{Eff. MACs} \\
|
||||
\hline
|
||||
32 & 525.29K & 27.92M & 263.80K & 29.82M & 1.05M & 54.95M & 532.35K & 168.49M \\
|
||||
64 & 1.05M & 28.44M & 525.94K & 30.08M & 2.10M & 56.00M & 1.06M & 169.02M \\
|
||||
128 & 2.10M & 29.49M & 1.05M & 30.61M & 4.20M & 58.10M & 2.11M & 170.07M \\
|
||||
256 & 4.20M & 31.59M & 2.10M & 31.65M & 8.39M & 62.29M & 4.20M & 172.16M \\
|
||||
512 & 8.39M & 35.78M & 4.20M & 33.75M & 16.78M & 70.68M & 8.40M & 176.36M \\
|
||||
768 & 12.58M & 39.98M & 6.29M & 35.85M & 25.17M & 79.07M & 12.59M & 180.55M \\
|
||||
1024 & 16.78M & 44.17M & 8.39M & 37.95M & 33.56M & 87.46M & 16.79M & 184.75M \\
|
||||
\hline
|
||||
\end{tabularx}
|
||||
\caption{Parameter and MAC counts for SubTer variants across latent dimensionalities.}
|
||||
\label{tab:subter_counts}
|
||||
\end{table}
|
||||
|
||||
@@ -177,6 +177,8 @@ class DeepSADTrainer(BaseTrainer):
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
|
||||
latent_dim = net.rep_dim
|
||||
|
||||
# Set device for network
|
||||
net = net.to(self.device)
|
||||
|
||||
@@ -184,7 +186,9 @@ class DeepSADTrainer(BaseTrainer):
|
||||
logger.info("Starting inference...")
|
||||
n_batches = 0
|
||||
start_time = time.time()
|
||||
all_outputs = np.zeros((len(inference_loader.dataset), 1024), dtype=np.float32)
|
||||
all_outputs = np.zeros(
|
||||
(len(inference_loader.dataset), latent_dim), dtype=np.float32
|
||||
)
|
||||
scores = []
|
||||
net.eval()
|
||||
|
||||
|
||||
24
Deep-SAD-PyTorch/uv.lock
generated
@@ -141,6 +141,8 @@ dependencies = [
|
||||
{ name = "scipy" },
|
||||
{ name = "seaborn" },
|
||||
{ name = "six" },
|
||||
{ name = "tabulate" },
|
||||
{ name = "thop" },
|
||||
{ name = "torch-receptive-field" },
|
||||
{ name = "torchscan" },
|
||||
{ name = "visualtorch" },
|
||||
@@ -166,6 +168,8 @@ requires-dist = [
|
||||
{ name = "scipy", specifier = ">=1.16.0" },
|
||||
{ name = "seaborn", specifier = ">=0.13.2" },
|
||||
{ name = "six", specifier = ">=1.17.0" },
|
||||
{ name = "tabulate", specifier = ">=0.9.0" },
|
||||
{ name = "thop", specifier = ">=0.1.1.post2209072238" },
|
||||
{ name = "torch-receptive-field", git = "https://github.com/Fangyh09/pytorch-receptive-field.git" },
|
||||
{ name = "torchscan", specifier = ">=0.1.1" },
|
||||
{ name = "visualtorch", specifier = ">=0.2.4" },
|
||||
@@ -882,6 +886,26 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tabulate"
|
||||
version = "0.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thop"
|
||||
version = "0.1.1.post2209072238"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "torch" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/0f/72beeab4ff5221dc47127c80f8834b4bcd0cb36f6ba91c0b1d04a1233403/thop-0.1.1.post2209072238-py3-none-any.whl", hash = "sha256:01473c225231927d2ad718351f78ebf7cffe6af3bed464c4f1ba1ef0f7cdda27", size = 15443, upload-time = "2022-09-07T14:38:37.211Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "threadpoolctl"
|
||||
version = "3.6.0"
|
||||
|
||||
1052
thesis/Main.bbl
BIN
thesis/Main.pdf
1362
thesis/Main.tex
@@ -24,15 +24,12 @@
|
||||
not used other than the declared sources/resources, and that I have
|
||||
explicitly indicated all material which has been quoted either
|
||||
literally or by content from the sources used.
|
||||
\ifthenelse{\equal{\ThesisTitle}{master's thesis} \or
|
||||
\equal{\ThesisTitle}{diploma thesis} \or
|
||||
\equal{\ThesisTitle}{doctoral thesis}}
|
||||
{The text document uploaded to TUGRAZonline is identical to the present \ThesisTitle.}{\reminder{TODO: fix \textbackslash ThesisTitle}}
|
||||
The text document uploaded to TUGRAZonline is identical to the present \ThesisTitle.
|
||||
|
||||
|
||||
\par\vspace*{4cm}
|
||||
\centerline{
|
||||
\begin{tabular}{m{1.5cm}cm{1.5cm}m{3cm}m{1.5cm}cm{1.5cm}}
|
||||
\cline{1-3} \cline{5-7}
|
||||
& date & & & & (signature) &\\
|
||||
\end{tabular}}
|
||||
\begin{tabular}{m{1.5cm}cm{1.5cm}m{3cm}m{1.5cm}cm{1.5cm}}
|
||||
\cline{1-3} \cline{5-7}
|
||||
& date & & & & (signature) & \\
|
||||
\end{tabular}}
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
\makeatother
|
||||
|
||||
% header and footer texts
|
||||
\clearscrheadfoot % clear everything
|
||||
\clearpairofpagestyles % clear everything
|
||||
\KOMAoptions{headlines=1} % header needs two lines here
|
||||
% [plain]{actual (scrheadings)}
|
||||
\ihead[]{}%
|
||||
@@ -141,46 +141,46 @@
|
||||
\ifthenelse{\equal{\DocumentLanguage}{en}}{\renewcaptionname{USenglish}{\figurename}{Figure}}{}%
|
||||
\ifthenelse{\equal{\DocumentLanguage}{de}}{\renewcaptionname{ngerman}{\figurename}{Abbildung}}{}%
|
||||
\captionsetup{%
|
||||
format=hang,% hanging captions
|
||||
labelformat=simple,% just name and number
|
||||
labelsep=colon,% colon and space
|
||||
justification=justified,%
|
||||
singlelinecheck=true,% center single line captions
|
||||
font={footnotesize,it},% font style of label and text
|
||||
margin=0.025\textwidth,% margin left/right of the caption (to textwidth)
|
||||
indention=0pt,% no further indention (just hanging)
|
||||
hangindent=0pt,% no further indention (just hanging)}
|
||||
aboveskip=8pt,% same spacing above and...
|
||||
belowskip=8pt}% ...below the float (this way tables shouln't be a problem, either)
|
||||
format=hang,% hanging captions
|
||||
labelformat=simple,% just name and number
|
||||
labelsep=colon,% colon and space
|
||||
justification=justified,%
|
||||
singlelinecheck=true,% center single line captions
|
||||
font={footnotesize,it},% font style of label and text
|
||||
margin=0.025\textwidth,% margin left/right of the caption (to textwidth)
|
||||
indention=0pt,% no further indention (just hanging)
|
||||
hangindent=0pt,% no further indention (just hanging)}
|
||||
aboveskip=8pt,% same spacing above and...
|
||||
belowskip=8pt}% ...below the float (this way tables shouln't be a problem, either)
|
||||
|
||||
% code listings
|
||||
\lstloadlanguages{VHDL,Matlab,[ANSI]C,Java,[LaTeX]TeX}
|
||||
\lstset{%
|
||||
% general
|
||||
breaklines=true,% automatically break long lines
|
||||
breakatwhitespace=true,% break only at white spaces
|
||||
breakindent=1cm,% additional indentation for broken lines
|
||||
% positioning
|
||||
linewidth=\linewidth,% set width of whole thing to \linewidth
|
||||
xleftmargin=0.1\linewidth,%
|
||||
% frame and caption
|
||||
frame=tlrb,% frame the entire thing
|
||||
framexleftmargin=1cm,% to include linenumbering into frame
|
||||
captionpos=b,% caption at bottom
|
||||
% format parameters
|
||||
basicstyle=\ttfamily\tiny,% small true type font
|
||||
keywordstyle=\color{black},%
|
||||
identifierstyle=\color{black},%
|
||||
commentstyle=\color[rgb]{0.45,0.45,0.45},% gray
|
||||
stringstyle=\color{black},%
|
||||
showstringspaces=false,%
|
||||
showtabs=false,%
|
||||
tabsize=2,%
|
||||
% linenumbers
|
||||
numberstyle=\tiny,%
|
||||
numbers=left,%
|
||||
numbersep=3mm,%
|
||||
firstnumber=1,%
|
||||
stepnumber=1,% number every line (0: off)
|
||||
numberblanklines=true%
|
||||
% general
|
||||
breaklines=true,% automatically break long lines
|
||||
breakatwhitespace=true,% break only at white spaces
|
||||
breakindent=1cm,% additional indentation for broken lines
|
||||
% positioning
|
||||
linewidth=\linewidth,% set width of whole thing to \linewidth
|
||||
xleftmargin=0.1\linewidth,%
|
||||
% frame and caption
|
||||
frame=tlrb,% frame the entire thing
|
||||
framexleftmargin=1cm,% to include linenumbering into frame
|
||||
captionpos=b,% caption at bottom
|
||||
% format parameters
|
||||
basicstyle=\ttfamily\tiny,% small true type font
|
||||
keywordstyle=\color{black},%
|
||||
identifierstyle=\color{black},%
|
||||
commentstyle=\color[rgb]{0.45,0.45,0.45},% gray
|
||||
stringstyle=\color{black},%
|
||||
showstringspaces=false,%
|
||||
showtabs=false,%
|
||||
tabsize=2,%
|
||||
% linenumbers
|
||||
numberstyle=\tiny,%
|
||||
numbers=left,%
|
||||
numbersep=3mm,%
|
||||
firstnumber=1,%
|
||||
stepnumber=1,% number every line (0: off)
|
||||
numberblanklines=true%
|
||||
}
|
||||
|
||||
@@ -147,22 +147,22 @@
|
||||
% standard
|
||||
\newcommand{\fig}[3]{\begin{figure}\centering\includegraphics[width=\textwidth]{#2}\caption{#3}\label{fig:#1}\end{figure}}%
|
||||
% with controllable parameters
|
||||
\newcommand{\figc}[4]{\begin{figure}\centering\includegraphics[#1]{#2}\caption{#3}\label{fig:#4}\end{figure}}%
|
||||
\newcommand{\figc}[4]{\begin{figure}\centering\includegraphics[#4]{#2}\caption{#3}\label{fig:#1}\end{figure}}%
|
||||
% two subfigures
|
||||
\newcommand{\twofig}[6]{\begin{figure}\centering%
|
||||
\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}}%
|
||||
\subfigure[#4]{\includegraphics[width=0.495\textwidth]{#3}}%
|
||||
\caption{#5}\label{fig:#6}\end{figure}}%
|
||||
\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}}%
|
||||
\subfigure[#4]{\includegraphics[width=0.495\textwidth]{#3}}%
|
||||
\caption{#5}\label{fig:#6}\end{figure}}%
|
||||
% two subfigures with labels for each subplot
|
||||
\newcommand{\twofigs}[8]{\begin{figure}\centering%
|
||||
\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}\label{fig:#8#3}}%
|
||||
\subfigure[#5]{\includegraphics[width=0.495\textwidth]{#4}\label{fig:#8#6}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}\label{fig:#8#3}}%
|
||||
\subfigure[#5]{\includegraphics[width=0.495\textwidth]{#4}\label{fig:#8#6}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
% two subfigures and controllable parameters
|
||||
\newcommand{\twofigc}[8]{\begin{figure}\centering%
|
||||
\subfigure[#3]{\includegraphics[#1]{#2}}%
|
||||
\subfigure[#6]{\includegraphics[#4]{#5}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
\subfigure[#3]{\includegraphics[#1]{#2}}%
|
||||
\subfigure[#6]{\includegraphics[#4]{#5}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
|
||||
% framed figures
|
||||
% standard
|
||||
@@ -171,19 +171,19 @@
|
||||
\newcommand{\figcf}[4]{\begin{figure}\centering\fbox{\includegraphics[#1]{#2}}\caption{#3}\label{fig:#4}\end{figure}}%
|
||||
% two subfigures
|
||||
\newcommand{\twofigf}[6]{\begin{figure}\centering%
|
||||
\fbox{\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}}}%
|
||||
\fbox{\subfigure[#4]{\includegraphics[width=0.495\textwidth]{#3}}}%
|
||||
\caption{#5}\label{fig:#6}\end{figure}}%
|
||||
\fbox{\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}}}%
|
||||
\fbox{\subfigure[#4]{\includegraphics[width=0.495\textwidth]{#3}}}%
|
||||
\caption{#5}\label{fig:#6}\end{figure}}%
|
||||
% two subfigures with labels for each subplot
|
||||
\newcommand{\twofigsf}[8]{\begin{figure}\centering%
|
||||
\fbox{\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}\label{fig:#8#3}}}%
|
||||
\fbox{\subfigure[#5]{\includegraphics[width=0.495\textwidth]{#4}\label{fig:#8#6}}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
\fbox{\subfigure[#2]{\includegraphics[width=0.495\textwidth]{#1}\label{fig:#8#3}}}%
|
||||
\fbox{\subfigure[#5]{\includegraphics[width=0.495\textwidth]{#4}\label{fig:#8#6}}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
% two subfigures and controllable parameters
|
||||
\newcommand{\twofigcf}[8]{\begin{figure}\centering%
|
||||
\fbox{\subfigure[#3]{\includegraphics[#1]{#2}}}%
|
||||
\fbox{\subfigure[#6]{\includegraphics[#4]{#5}}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
\fbox{\subfigure[#3]{\includegraphics[#1]{#2}}}%
|
||||
\fbox{\subfigure[#6]{\includegraphics[#4]{#5}}}%
|
||||
\caption{#7}\label{fig:#8}\end{figure}}%
|
||||
|
||||
% listings
|
||||
\newcommand{\filelisting}[5][]{\lstinputlisting[style=#2,caption={#4},label={lst:#5},#1]{#3}}
|
||||
|
||||
@@ -47,33 +47,33 @@
|
||||
\usepackage{fixltx2e}% LaTeX 2e bugfixes
|
||||
\usepackage{ifthen}% for optional parts
|
||||
\ifthenelse{\equal{\PaperSize}{a4paper}}{
|
||||
\usepackage[paper=\PaperSize,twoside=\Twosided,%
|
||||
textheight=246mm,%
|
||||
textwidth=160mm,%
|
||||
heightrounded=true,% round textheight to multiple of lines (avoids overfull vboxes)
|
||||
ignoreall=true,% do not include header, footer, and margins in calculations
|
||||
marginparsep=5pt,% marginpar only used for signs (centered), thus only small sep. needed
|
||||
marginparwidth=10mm,% prevent margin notes to be out of page
|
||||
hmarginratio=2:1,% set margin ration (inner:outer for twoside) - (2:3 is default)
|
||||
]{geometry}}{}%
|
||||
\usepackage[paper=\PaperSize,twoside=\Twosided,%
|
||||
textheight=246mm,%
|
||||
textwidth=160mm,%
|
||||
heightrounded=true,% round textheight to multiple of lines (avoids overfull vboxes)
|
||||
ignoreall=true,% do not include header, footer, and margins in calculations
|
||||
marginparsep=5pt,% marginpar only used for signs (centered), thus only small sep. needed
|
||||
marginparwidth=10mm,% prevent margin notes to be out of page
|
||||
hmarginratio=2:1,% set margin ration (inner:outer for twoside) - (2:3 is default)
|
||||
]{geometry}}{}%
|
||||
\ifthenelse{\equal{\PaperSize}{letterpaper}}{
|
||||
\usepackage[paper=\PaperSize,twoside=\Twosided,%
|
||||
textheight=9in,%
|
||||
textwidth=6.5in,%
|
||||
heightrounded=true,% round textheight to multiple of lines (avoids overfull vboxes)
|
||||
ignoreheadfoot=false,% do not include header and footer in calculations
|
||||
marginparsep=5pt,% marginpar only used for signs (centered), thus only small sep. needed
|
||||
marginparwidth=10mm,% prevent margin notes to be out of page
|
||||
hmarginratio=3:2,% set margin ration (inner:outer for twoside) - (2:3 is default)
|
||||
]{geometry}}{}%
|
||||
\usepackage[paper=\PaperSize,twoside=\Twosided,%
|
||||
textheight=9in,%
|
||||
textwidth=6.5in,%
|
||||
heightrounded=true,% round textheight to multiple of lines (avoids overfull vboxes)
|
||||
ignoreheadfoot=false,% do not include header and footer in calculations
|
||||
marginparsep=5pt,% marginpar only used for signs (centered), thus only small sep. needed
|
||||
marginparwidth=10mm,% prevent margin notes to be out of page
|
||||
hmarginratio=3:2,% set margin ration (inner:outer for twoside) - (2:3 is default)
|
||||
]{geometry}}{}%
|
||||
\ifthenelse{\equal{\DocumentLanguage}{en}}{\usepackage[T1]{fontenc}\usepackage[utf8]{inputenc}\usepackage[USenglish]{babel}}{}%
|
||||
\ifthenelse{\equal{\DocumentLanguage}{de}}{\usepackage[T1]{fontenc}\usepackage[utf8]{inputenc}\usepackage[ngerman]{babel}}{}%
|
||||
\usepackage[%
|
||||
headtopline,plainheadtopline,% activate all lines (header and footer)
|
||||
headsepline,plainheadsepline,%
|
||||
footsepline,plainfootsepline,%
|
||||
footbotline,plainfootbotline,%
|
||||
automark% auto update \..mark
|
||||
headtopline,plainheadtopline,% activate all lines (header and footer)
|
||||
headsepline,plainheadsepline,%
|
||||
footsepline,plainfootsepline,%
|
||||
footbotline,plainfootbotline,%
|
||||
automark% auto update \..mark
|
||||
]{scrlayer-scrpage}% (KOMA)
|
||||
\usepackage{imakeidx}
|
||||
\usepackage[]{caption}% customize captions
|
||||
@@ -91,7 +91,7 @@ automark% auto update \..mark
|
||||
\usepackage[normalem]{ulem}% cross-out, strike-out, underlines (normalem: keep \emph italic)
|
||||
%\usepackage[safe]{textcomp}% loading in safe mode to avoid problems (see LaTeX companion)
|
||||
%\usepackage[geometry,misc]{ifsym}% technical symbols
|
||||
\usepackage{remreset}%\@removefromreset commands (e.g., for continuous footnote numbering)
|
||||
%\usepackage{remreset}%\@removefromreset commands (e.g., for continuous footnote numbering)
|
||||
\usepackage{paralist}% extended list environments
|
||||
% \usepackage[Sonny]{fncychap}
|
||||
\usepackage[avantgarde]{quotchap}
|
||||
@@ -140,35 +140,35 @@ automark% auto update \..mark
|
||||
\usepackage{mdwlist} %list extensions
|
||||
\ifthenelse{\equal{\DocumentLanguage}{de}}
|
||||
{
|
||||
\usepackage[german]{fancyref} %Bessere Querverweise
|
||||
\usepackage[locale=DE]{siunitx} %Zahlen und SI Einheiten => Binary units aktivieren...
|
||||
\usepackage[autostyle=true, %Anführungszeichen und Übersetzung der Literaturverweise
|
||||
german=quotes]{csquotes} %Anführungszeichen und Übersetzung der Literaturverweise
|
||||
\usepackage[german]{fancyref} %Bessere Querverweise
|
||||
\usepackage[locale=DE]{siunitx} %Zahlen und SI Einheiten => Binary units aktivieren...
|
||||
\usepackage[autostyle=true, %Anführungszeichen und Übersetzung der Literaturverweise
|
||||
german=quotes]{csquotes} %Anführungszeichen und Übersetzung der Literaturverweise
|
||||
}
|
||||
{
|
||||
\usepackage[english]{fancyref} %Bessere Querverweise
|
||||
\usepackage[locale=US]{siunitx} %Zahlen und SI Einheiten => Binary units aktivieren...
|
||||
\usepackage[autostyle=true] %Anführungszeichen und Übersetzung der Literaturverweise
|
||||
{csquotes}
|
||||
\usepackage[english]{fancyref} %Bessere Querverweise
|
||||
\usepackage[locale=US]{siunitx} %Zahlen und SI Einheiten => Binary units aktivieren...
|
||||
\usepackage[autostyle=true] %Anführungszeichen und Übersetzung der Literaturverweise
|
||||
{csquotes}
|
||||
}
|
||||
\sisetup{detect-weight=true, detect-family=true} %format like surrounding environment
|
||||
%extending fancyref for listings in both languages:
|
||||
\newcommand*{\fancyreflstlabelprefix}{lst}
|
||||
\fancyrefaddcaptions{english}{%
|
||||
\providecommand*{\freflstname}{listing}%
|
||||
\providecommand*{\Freflstname}{Listing}%
|
||||
\providecommand*{\freflstname}{listing}%
|
||||
\providecommand*{\Freflstname}{Listing}%
|
||||
}
|
||||
\fancyrefaddcaptions{german}{%
|
||||
\providecommand*{\freflstname}{Listing}%
|
||||
\providecommand*{\Freflstname}{Listing}%
|
||||
\providecommand*{\freflstname}{Listing}%
|
||||
\providecommand*{\Freflstname}{Listing}%
|
||||
}
|
||||
\frefformat{plain}{\fancyreflstlabelprefix}{\freflstname\fancyrefdefaultspacing#1}
|
||||
\Frefformat{plain}{\fancyreflstlabelprefix}{\Freflstname\fancyrefdefaultspacing#1}
|
||||
\frefformat{vario}{\fancyreflstlabelprefix}{%
|
||||
\freflstname\fancyrefdefaultspacing#1#3%
|
||||
\freflstname\fancyrefdefaultspacing#1#3%
|
||||
}
|
||||
\Frefformat{vario}{\fancyreflstlabelprefix}{%
|
||||
\Freflstname\fancyrefdefaultspacing#1#3%
|
||||
\Freflstname\fancyrefdefaultspacing#1#3%
|
||||
}
|
||||
|
||||
\sisetup{separate-uncertainty} %enable uncertainity for siunitx
|
||||
@@ -176,30 +176,30 @@ automark% auto update \..mark
|
||||
\DeclareSIUnit\permille{\text{\textperthousand}} %add \permille to siunitx
|
||||
\usepackage{xfrac} %Schönere brüche für SI Einheiten
|
||||
\sisetup{per-mode=fraction, %Bruchstriche bei SI Einheiten aktivieren
|
||||
fraction-function=\sfrac} %xfrac als Bruchstrichfunktion verwenden
|
||||
fraction-function=\sfrac} %xfrac als Bruchstrichfunktion verwenden
|
||||
\usepackage[scaled=0.78]{inconsolata}%Schreibmaschinenschrift für Quellcode
|
||||
|
||||
\usepackage[backend=biber, %Literaturverweiserweiterung Backend auswählen
|
||||
bibencoding=utf8, %.bib-File ist utf8-codiert...
|
||||
maxbibnames=99, %Immer alle Authoren in der Bibliographie darstellen...
|
||||
style=ieee
|
||||
bibencoding=utf8, %.bib-File ist utf8-codiert...
|
||||
maxbibnames=99, %Immer alle Authoren in der Bibliographie darstellen...
|
||||
style=ieee
|
||||
]{biblatex}
|
||||
\bibliography{bib/bibliography} %literatur.bib wird geladen und als Literaturverweis Datei verwendet
|
||||
|
||||
\ifthenelse{\equal{\FramedLinks}{true}}
|
||||
{
|
||||
\usepackage[%
|
||||
breaklinks=true,% allow line break in links
|
||||
colorlinks=false,% if false: framed link
|
||||
linkcolor=black,anchorcolor=black,citecolor=black,filecolor=black,%
|
||||
menucolor=black,urlcolor=black,bookmarksnumbered=true]{hyperref}% hyperlinks for references
|
||||
\usepackage[%
|
||||
breaklinks=true,% allow line break in links
|
||||
colorlinks=false,% if false: framed link
|
||||
linkcolor=black,anchorcolor=black,citecolor=black,filecolor=black,%
|
||||
menucolor=black,urlcolor=black,bookmarksnumbered=true]{hyperref}% hyperlinks for references
|
||||
}
|
||||
{
|
||||
\usepackage[%
|
||||
breaklinks=true,% allow line break in links
|
||||
colorlinks=true,% if false: framed link
|
||||
linkcolor=black,anchorcolor=black,citecolor=black,filecolor=black,%
|
||||
menucolor=black,urlcolor=black,bookmarksnumbered=true]{hyperref}% hyperlinks for references
|
||||
\usepackage[%
|
||||
breaklinks=true,% allow line break in links
|
||||
colorlinks=true,% if false: framed link
|
||||
linkcolor=black,anchorcolor=black,citecolor=black,filecolor=black,%
|
||||
menucolor=black,urlcolor=black,bookmarksnumbered=true]{hyperref}% hyperlinks for references
|
||||
}
|
||||
|
||||
\setcounter{biburlnumpenalty}{100}%Urls in Bibliographie Zeilenbrechbar machen
|
||||
@@ -213,8 +213,8 @@ style=ieee
|
||||
|
||||
\ifthenelse{\equal{\DocumentLanguage}{de}}
|
||||
{
|
||||
\deftranslation[to=ngerman] %Dem Paket babel den deutschen Abkürzungsverzeichnis-Kapitelnamen
|
||||
{Acronyms}{Abkürzungsverzeichnis} %beibringen
|
||||
\deftranslation[to=ngerman] %Dem Paket babel den deutschen Abkürzungsverzeichnis-Kapitelnamen
|
||||
{Acronyms}{Abkürzungsverzeichnis} %beibringen
|
||||
}{}
|
||||
|
||||
% misc
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
numpages = {58},
|
||||
keywords = {outlier detection, Anomaly detection},
|
||||
},
|
||||
@dataset{alexander_kyuroson_2023_7913307,
|
||||
dataset{alexander_kyuroson_2023_7913307,
|
||||
author = {Alexander Kyuroson and Niklas Dahlquist and Nikolaos Stathoulopoulos
|
||||
and Vignesh Kottayam Viswanathan and Anton Koval and George
|
||||
Nikolakopoulos},
|
||||
@@ -85,37 +85,6 @@
|
||||
pages = {716–721},
|
||||
}
|
||||
,
|
||||
@inproceedings{deepsvdd,
|
||||
title = {Deep One-Class Classification},
|
||||
author = {Ruff, Lukas and Vandermeulen, Robert and Goernitz, Nico and Deecke,
|
||||
Lucas and Siddiqui, Shoaib Ahmed and Binder, Alexander and M{\"u}ller
|
||||
, Emmanuel and Kloft, Marius},
|
||||
booktitle = {Proceedings of the 35th International Conference on Machine
|
||||
Learning},
|
||||
pages = {4393--4402},
|
||||
year = {2018},
|
||||
editor = {Dy, Jennifer and Krause, Andreas},
|
||||
volume = {80},
|
||||
series = {Proceedings of Machine Learning Research},
|
||||
month = {10--15 Jul},
|
||||
publisher = {PMLR},
|
||||
pdf = {http://proceedings.mlr.press/v80/ruff18a/ruff18a.pdf},
|
||||
url = {https://proceedings.mlr.press/v80/ruff18a.html},
|
||||
abstract = {Despite the great advances made by deep learning in many machine
|
||||
learning problems, there is a relative dearth of deep learning
|
||||
approaches for anomaly detection. Those approaches which do exist
|
||||
involve networks trained to perform a task other than anomaly
|
||||
detection, namely generative models or compression, which are in
|
||||
turn adapted for use in anomaly detection; they are not trained on
|
||||
an anomaly detection based objective. In this paper we introduce a
|
||||
new anomaly detection method—Deep Support Vector Data Description—,
|
||||
which is trained on an anomaly detection based objective. The
|
||||
adaptation to the deep regime necessitates that our neural network
|
||||
and training procedure satisfy certain properties, which we
|
||||
demonstrate theoretically. We show the effectiveness of our method
|
||||
on MNIST and CIFAR-10 image benchmark datasets as well as on the
|
||||
detection of adversarial examples of GTSRB stop signs.},
|
||||
},
|
||||
@inproceedings{deep_svdd,
|
||||
title = {Deep One-Class Classification},
|
||||
author = {Ruff, Lukas and Vandermeulen, Robert and Goernitz, Nico and Deecke,
|
||||
@@ -235,7 +204,7 @@
|
||||
performance;Current measurement},
|
||||
doi = {10.1109/IROS51168.2021.9636694},
|
||||
},
|
||||
@article{deep_learning_overview,
|
||||
article{deep_learning_overview,
|
||||
title = {Deep learning in neural networks: An overview},
|
||||
journal = {Neural Networks},
|
||||
volume = {61},
|
||||
@@ -289,7 +258,7 @@
|
||||
autoencoder algorithm are summarized, and prospected for its future
|
||||
development directions are addressed.},
|
||||
},
|
||||
@article{semi_overview,
|
||||
article{semi_overview,
|
||||
author = {Yang, Xiangli and Song, Zixing and King, Irwin and Xu, Zenglin},
|
||||
journal = {IEEE Transactions on Knowledge and Data Engineering},
|
||||
title = {A Survey on Deep Semi-Supervised Learning},
|
||||
@@ -302,7 +271,7 @@
|
||||
learning;semi-supervised learning;deep learning},
|
||||
doi = {10.1109/TKDE.2022.3220219},
|
||||
},
|
||||
@book{ai_fundamentals_book,
|
||||
book{ai_fundamentals_book,
|
||||
title = {Fundamentals of Artificial Intelligence},
|
||||
url = {http://dx.doi.org/10.1007/978-81-322-3972-7},
|
||||
DOI = {10.1007/978-81-322-3972-7},
|
||||
@@ -312,7 +281,7 @@
|
||||
language = {en},
|
||||
},
|
||||
|
||||
@article{machine_learning_overview,
|
||||
article{machine_learning_overview,
|
||||
title = {Machine Learning from Theory to Algorithms: An Overview},
|
||||
volume = {1142},
|
||||
ISSN = {1742-6596},
|
||||
@@ -550,7 +519,7 @@
|
||||
year = {1998},
|
||||
pages = {2278–2324},
|
||||
},
|
||||
@article{ef_concept_source,
|
||||
article{ef_concept_source,
|
||||
title = {Multi-Year ENSO Forecasts Using Parallel Convolutional Neural
|
||||
Networks With Heterogeneous Architecture},
|
||||
volume = {8},
|
||||
@@ -563,8 +532,226 @@
|
||||
and Tian, Hao and Song, Dehai and Wei, Zhiqiang},
|
||||
year = {2021},
|
||||
month = aug,
|
||||
},
|
||||
@article{ml_supervised_unsupervised_figure_source,
|
||||
title = {Virtual reality in biology: could we become virtual naturalists?},
|
||||
volume = {14},
|
||||
ISSN = {1936-6434},
|
||||
url = {http://dx.doi.org/10.1186/s12052-021-00147-x},
|
||||
DOI = {10.1186/s12052-021-00147-x},
|
||||
number = {1},
|
||||
journal = {Evolution: Education and Outreach},
|
||||
publisher = {Springer Science and Business Media LLC},
|
||||
author = {Morimoto, Juliano and Ponton, Fleur},
|
||||
year = {2021},
|
||||
month = may,
|
||||
},
|
||||
@article{ml_autoencoder_figure_source,
|
||||
title = "From Autoencoder to Beta-VAE",
|
||||
author = "Weng, Lilian",
|
||||
journal = "lilianweng.github.io",
|
||||
year = "2018",
|
||||
url = "https://lilianweng.github.io/posts/2018-08-12-vae/",
|
||||
},
|
||||
|
||||
@conference{bg_lidar_figure_source,
|
||||
title = "1D MEMS Micro-Scanning LiDAR",
|
||||
author = "Norbert Druml and Ievgeniia Maksymova and Thomas Thurner and Lierop,
|
||||
{D. van} and Hennecke, {Marcus E.} and Andreas Foroutan",
|
||||
year = "2018",
|
||||
month = sep,
|
||||
day = "16",
|
||||
language = "English",
|
||||
},
|
||||
@book{deep_learning_book,
|
||||
title = {Deep Learning},
|
||||
author = {Ian Goodfellow and Yoshua Bengio and Aaron Courville},
|
||||
publisher = {MIT Press},
|
||||
note = {\url{http://www.deeplearningbook.org}},
|
||||
year = {2016},
|
||||
},
|
||||
@misc{mobilenet,
|
||||
doi = {10.48550/ARXIV.1704.04861},
|
||||
url = {https://arxiv.org/abs/1704.04861},
|
||||
author = {Howard, Andrew G. and Zhu, Menglong and Chen, Bo and Kalenichenko,
|
||||
Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and
|
||||
Adam, Hartwig},
|
||||
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and
|
||||
information sciences, FOS: Computer and information sciences},
|
||||
title = {MobileNets: Efficient Convolutional Neural Networks for Mobile Vision
|
||||
Applications},
|
||||
publisher = {arXiv},
|
||||
year = {2017},
|
||||
copyright = {arXiv.org perpetual, non-exclusive license},
|
||||
},
|
||||
@inproceedings{shufflenet,
|
||||
title = {ShuffleNet: An Extremely Efficient Convolutional Neural Network for
|
||||
Mobile Devices},
|
||||
url = {http://dx.doi.org/10.1109/CVPR.2018.00716},
|
||||
DOI = {10.1109/cvpr.2018.00716},
|
||||
booktitle = {2018 IEEE/CVF Conference on Computer Vision and Pattern
|
||||
Recognition},
|
||||
publisher = {IEEE},
|
||||
author = {Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
|
||||
year = {2018},
|
||||
month = jun,
|
||||
},
|
||||
@article{bg_svm,
|
||||
title = {Support-vector networks},
|
||||
author = {Cortes, Corinna and Vapnik, Vladimir},
|
||||
journal = {Machine learning},
|
||||
volume = {20},
|
||||
number = {3},
|
||||
pages = {273--297},
|
||||
year = {1995},
|
||||
publisher = {Springer},
|
||||
},
|
||||
|
||||
@article{bg_kmeans,
|
||||
author = {Lloyd, S.},
|
||||
journal = {IEEE Transactions on Information Theory},
|
||||
title = {Least squares quantization in PCM},
|
||||
year = {1982},
|
||||
volume = {28},
|
||||
number = {2},
|
||||
pages = {129-137},
|
||||
keywords = {Noise;Quantization (signal);Voltage;Receivers;Pulse
|
||||
modulation;Sufficient conditions;Stochastic processes;Probabilistic
|
||||
logic;Urban areas;Q measurement},
|
||||
doi = {10.1109/TIT.1982.1056489},
|
||||
},
|
||||
|
||||
@inproceedings{bg_dbscan,
|
||||
added-at = {2023-12-13T07:32:13.000+0100},
|
||||
author = {Ester, Martin and Kriegel, Hans-Peter and Sander, Jörg and Xu,
|
||||
Xiaowei},
|
||||
biburl = {
|
||||
https://www.bibsonomy.org/bibtex/279a9f3560daefa3775bd35543b4482e1/admin
|
||||
},
|
||||
booktitle = {KDD},
|
||||
crossref = {conf/kdd/1996},
|
||||
editor = {Simoudis, Evangelos and Han, Jiawei and Fayyad, Usama M.},
|
||||
ee = {http://www.aaai.org/Library/KDD/1996/kdd96-037.php},
|
||||
interhash = {ba33e4d6b4e5b26bd9f543f26b7d250a},
|
||||
intrahash = {79a9f3560daefa3775bd35543b4482e1},
|
||||
isbn = {1-57735-004-9},
|
||||
keywords = {},
|
||||
pages = {226-231},
|
||||
publisher = {AAAI Press},
|
||||
timestamp = {2023-12-13T07:32:13.000+0100},
|
||||
title = {A Density-Based Algorithm for Discovering Clusters in Large Spatial
|
||||
Databases with Noise.},
|
||||
url = {http://dblp.uni-trier.de/db/conf/kdd/kdd96.html#EsterKSX96},
|
||||
year = 1996,
|
||||
},
|
||||
@article{bg_pca,
|
||||
author = { Karl Pearson F.R.S. },
|
||||
title = {LIII. On lines and planes of closest fit to systems of points in
|
||||
space},
|
||||
journal = {The London, Edinburgh, and Dublin Philosophical Magazine and
|
||||
Journal of Science},
|
||||
volume = {2},
|
||||
number = {11},
|
||||
pages = {559-572},
|
||||
year = {1901},
|
||||
publisher = {Taylor & Francis},
|
||||
doi = {10.1080/14786440109462720},
|
||||
},
|
||||
@article{bg_infomax,
|
||||
author = {Linsker, R.},
|
||||
journal = {Computer},
|
||||
title = {Self-organization in a perceptual network},
|
||||
year = {1988},
|
||||
volume = {21},
|
||||
number = {3},
|
||||
pages = {105-117},
|
||||
keywords = {Intelligent networks;Biological information
|
||||
theory;Circuits;Biology computing;Animal
|
||||
structures;Neuroscience;Genetics;System testing;Neural
|
||||
networks;Constraint theory},
|
||||
doi = {10.1109/2.36},
|
||||
},
|
||||
@article{bg_slam,
|
||||
title = {On the Representation and Estimation of Spatial Uncertainty},
|
||||
volume = {5},
|
||||
ISSN = {1741-3176},
|
||||
url = {http://dx.doi.org/10.1177/027836498600500404},
|
||||
DOI = {10.1177/027836498600500404},
|
||||
number = {4},
|
||||
journal = {The International Journal of Robotics Research},
|
||||
publisher = {SAGE Publications},
|
||||
author = {Smith, Randall C. and Cheeseman, Peter},
|
||||
year = {1986},
|
||||
month = dec,
|
||||
pages = {56–68},
|
||||
},
|
||||
@article{roc_vs_prc2,
|
||||
title = {Context discovery for anomaly detection},
|
||||
volume = {19},
|
||||
ISSN = {2364-4168},
|
||||
url = {http://dx.doi.org/10.1007/s41060-024-00586-x},
|
||||
DOI = {10.1007/s41060-024-00586-x},
|
||||
number = {1},
|
||||
journal = {International Journal of Data Science and Analytics},
|
||||
publisher = {Springer Science and Business Media LLC},
|
||||
author = {Calikus, Ece and Nowaczyk, Slawomir and Dikmen, Onur},
|
||||
year = {2024},
|
||||
month = jun,
|
||||
pages = {99–113},
|
||||
},
|
||||
@article{roc_vs_prc,
|
||||
title = {On the evaluation of unsupervised outlier detection: measures,
|
||||
datasets, and an empirical study},
|
||||
volume = {30},
|
||||
ISSN = {1573-756X},
|
||||
url = {http://dx.doi.org/10.1007/s10618-015-0444-8},
|
||||
DOI = {10.1007/s10618-015-0444-8},
|
||||
number = {4},
|
||||
journal = {Data Mining and Knowledge Discovery},
|
||||
publisher = {Springer Science and Business Media LLC},
|
||||
author = {Campos, Guilherme O. and Zimek, Arthur and Sander, J\"{o}rg and
|
||||
Campello, Ricardo J. G. B. and Micenková, Barbora and Schubert, Erich
|
||||
and Assent, Ira and Houle, Michael E.},
|
||||
year = {2016},
|
||||
month = jan,
|
||||
pages = {891–927},
|
||||
},
|
||||
@inproceedings{roc,
|
||||
title = {Basic principles of ROC analysis},
|
||||
author = {Metz, Charles E},
|
||||
booktitle = {Seminars in nuclear medicine},
|
||||
volume = {8},
|
||||
number = {4},
|
||||
pages = {283--298},
|
||||
year = {1978},
|
||||
organization = {Elsevier},
|
||||
},
|
||||
@article{prc,
|
||||
title = {A critical investigation of recall and precision as measures of
|
||||
retrieval system performance},
|
||||
volume = {7},
|
||||
ISSN = {1558-2868},
|
||||
url = {http://dx.doi.org/10.1145/65943.65945},
|
||||
DOI = {10.1145/65943.65945},
|
||||
number = {3},
|
||||
journal = {ACM Transactions on Information Systems},
|
||||
publisher = {Association for Computing Machinery (ACM)},
|
||||
author = {Raghavan, Vijay and Bollmann, Peter and Jung, Gwang S.},
|
||||
year = {1989},
|
||||
month = jul,
|
||||
pages = {205–229},
|
||||
},
|
||||
@article{zscore,
|
||||
title = {Advanced engineering mathematics},
|
||||
author = {Kreyszig, Erwin and Stroud, K and Stephenson, G},
|
||||
journal = {Integration},
|
||||
volume = {9},
|
||||
number = {4},
|
||||
pages = {1014},
|
||||
year = {2008},
|
||||
publisher = {John Wiley \& Sons, Inc. 9 th edition, 2006 Page 2 of 6 Teaching
|
||||
methods~…},
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
\documentclass[tikz,border=10pt]{standalone}
|
||||
\usepackage{tikz}
|
||||
\usepackage{amsfonts}
|
||||
\usetikzlibrary{positioning, shapes.geometric, fit, arrows, arrows.meta, backgrounds}
|
||||
|
||||
% Define box styles
|
||||
@@ -7,9 +8,9 @@
|
||||
databox/.style={rectangle, align=center, draw=black, fill=blue!50, thick, rounded corners},%, inner sep=4},
|
||||
procbox/.style={rectangle, align=center, draw=black, fill=orange!30, thick, rounded corners},
|
||||
hyperbox/.style={rectangle, align=center, draw=black, fill=green!30, thick, rounded corners},
|
||||
stepsbox/.style={rectangle, align=left, draw=black,fill=white, rounded corners, minimum width=6cm, minimum height=1.5cm, font=\small},
|
||||
outputbox/.style={rectangle, align=center, draw=red!80, fill=red!20, rounded corners, minimum width=6cm, minimum height=1.5cm, font=\small},
|
||||
hlabelbox/.style={rectangle, align=center, draw=black,fill=white, rounded corners, minimum width=6cm, minimum height=1.5cm, font=\small},
|
||||
stepsbox/.style={rectangle, align=left, draw=black,fill=white, rounded corners, minimum width=5.2cm, minimum height=1.5cm, font=\small},
|
||||
outputbox/.style={rectangle, align=center, draw=red!80, fill=red!20, rounded corners, minimum width=5.2cm, minimum height=1.5cm, font=\small},
|
||||
hlabelbox/.style={rectangle, align=center, draw=black,fill=white, rounded corners, minimum width=5.2cm, minimum height=1.5cm, font=\small},
|
||||
vlabelbox/.style={rectangle, align=center, draw=black,fill=white, rounded corners, minimum width=3cm, minimum height=1.8cm, font=\small},
|
||||
arrow/.style={-{Latex[length=3mm]}},
|
||||
arrowlabel/.style={fill=white,inner sep=2pt,midway}
|
||||
@@ -25,11 +26,11 @@
|
||||
\begin{tikzpicture}[node distance=1cm and 2cm]
|
||||
|
||||
\node (data) {Data};
|
||||
\node[right=7 of data] (process) {Procedure};
|
||||
\node[right=7 of process] (hyper) {Hyperparameters};
|
||||
\node[right=4.9 of data] (process) {Procedure};
|
||||
\node[right=4.1 of process] (hyper) {Hyperparameters};
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[hlabelbox, below=of data] (unlabeled) {\boxtitle{Unlabeled Data} More normal than \\ anomalous samples required};
|
||||
\node[hlabelbox, below=1.29 of data] (unlabeled) {\boxtitle{Unlabeled Data} Significantly more normal than \\ anomalous samples required};
|
||||
\node[hlabelbox, below=.1 of unlabeled] (labeled) {\boxtitle{Labeled Data} No requirement regarding ratio \\ +1 = normal, -1 = anomalous};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
@@ -39,16 +40,16 @@
|
||||
%\draw[arrow] (latent.east) -- node{} (autoenc.west);
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[stepsbox, below=of process] (pretrainproc) {Train Autoencoder for $E_A$ Epochs \\ with $L_A$ Learning Rate \\ No Labels Used};
|
||||
\node[outputbox, below=.1 of pretrainproc] (pretrainout) {\boxtitle{Outputs} Encoder Network \\ $\mathbf{w}$: Network Weights};
|
||||
\node[stepsbox, below=of process] (pretrainproc) {Train Autoencoder $\mathcal{\phi}_{AE}$ \\ optimize Autoencoding Objective \\ for $E_A$ Epochs \\ with $L_A$ Learning Rate \\ No Labels Used / Required};
|
||||
\node[outputbox, below=.1 of pretrainproc] (pretrainout) {\boxtitle{Outputs} $\mathcal{\phi}$: Encoder / DeepSAD Network \\ $\mathcal{W}_E$: Encoder Network Weights};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
\node[procbox, fit=(pretrainproc) (pretrainout), label={[label distance = 1, name=pretrainlab]above:{\textbf{Pre-Training of Autoencoder}}}] (pretrain) {};
|
||||
\end{pgfonlayer}
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[hlabelbox, below=of hyper] (autoencarch) {\boxtitle{Autoencoder Architecture} Choose based on data type \\ Latent Space Size (based on complexity)};
|
||||
\node[hlabelbox, below=.1 of autoencarch] (pretrainhyper) {\boxtitle{Hyperparameters} $E_A$: Number of Epochs \\ $L_A$: Learning Rate};
|
||||
\node[hlabelbox, below=1.26 of hyper] (autoencarch) {\boxtitle{Autoencoder Architecture} $\mathcal{\phi}_{AE}$: Autoencoder Network \\ $\mathbb{R}^d$: Latent Space Size };
|
||||
\node[hlabelbox, below=.1 of autoencarch] (pretrainhyper) {\boxtitle{Hyperparameters} $E_A$: Number of Epochs \\ $L_A$: Learning Rate AE};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
\node[hyperbox, fit=(autoencarch) (pretrainhyper), label={[label distance = 1, name=autoenclabel]above:{\textbf{Pre-Training Hyperparameters}}}] (pretrainhyp) {};
|
||||
@@ -61,7 +62,7 @@
|
||||
% \draw[arrow] (node cs:name=autoenc,angle=196) |- (node cs:name=pretrain,angle=5);
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[stepsbox, below=1.4 of pretrain] (calccproc) {1. Init Encoder with $\mathbf{w}$ \\ 2. Forward Pass on all data \\ 3. $\mathbf{c}$ = Mean Latent Representation};
|
||||
\node[stepsbox, below=1.4 of pretrain] (calccproc) {Init Network $\mathcal{\phi}$ with $\mathcal{W}_E$ \\ Forward Pass on all data \\ Hypersphere center $\mathbf{c}$ is mean \\ of all Latent Representation};
|
||||
\node[outputbox, below=.1 of calccproc] (calccout) {\boxtitle{Outputs} $\mathbf{c}$: Hypersphere Center};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
@@ -76,21 +77,21 @@
|
||||
%\draw[arrow] (node cs:name=traindata,angle=-45) |- node[arrowlabel]{all training data, labels removed} (node cs:name=calcc,angle=200);
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[stepsbox, below=1.4 of calcc] (maintrainproc) {Train Network for $E_M$ Epochs \\ with $L_M$ Learning Rate \\ Considers Labels with $\eta$ strength};
|
||||
\node[outputbox, below=.1 of maintrainproc] (maintrainout) {\boxtitle{Outputs} Encoder Network \\ $\mathbf{w}$: Network Weights \\ $\mathbf{c}$: Hypersphere Center};
|
||||
\node[stepsbox, below=1.4 of calcc] (maintrainproc) {Init Network $\mathcal{\phi}$ with $\mathcal{W}_E$ \\ Train Network $\mathcal{\phi}$ \\ optimize DeepSAD Objective\\ for $E_M$ Epochs \\ with $L_M$ Learning Rate \\ Considers Labels with $\eta$ strength};
|
||||
\node[outputbox, below=.1 of maintrainproc] (maintrainout) {\boxtitle{Outputs} $\mathcal{\phi}$: DeepSAD Network \\ $\mathcal{W}$: DeepSAD Network Weights \\ $\mathbf{c}$: Hypersphere Center};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
\node[procbox, fit=(maintrainproc) (maintrainout), label={[label distance = 1, name=maintrainlab]above:{\textbf{Main Training}}}] (maintrain) {};
|
||||
\end{pgfonlayer}
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[hlabelbox, below=11.25 of hyper] (maintrainhyper) {$E_M$: Number of Epochs \\ $L_M$: Learning Rate \\ $\eta$: Strength Labeled/Unlabeled};
|
||||
\node[hlabelbox, below=12.48 of hyper] (maintrainhyper) {$E_M$: Number of Epochs \\ $L_M$: Learning Rate \\ $\eta$: Weight Labeled/Unlabeled};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
\node[hyperbox, fit=(maintrainhyper), label={[label distance = 1, name=autoenclabel]above:{\textbf{Main-Training Hyperparameters}}}] (maintrainhyp) {};
|
||||
\end{pgfonlayer}
|
||||
|
||||
\draw[arrow] (node cs:name=pretrain,angle=-20) -- +(1, 0) |- (node cs:name=maintrain,angle=20);
|
||||
\draw[arrow] (node cs:name=pretrain,angle=-50) |- +(1.5, -0.55) -- +(1.5,-5.4) -| (node cs:name=maintrain,angle=50);
|
||||
|
||||
|
||||
%\draw[arrow] (pretrainoutput.south) -- (node cs:name=maintrain,angle=22);
|
||||
@@ -101,7 +102,7 @@
|
||||
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[stepsbox, below=1.4 of maintrain] (inferenceproc) {Forward Pass through Network = $\mathbf{p}$ \\ Calculate Geometric Distance $\mathbf{p} \rightarrow \mathbf{c}$ \\ Anomaly Score = Geometric Distance};
|
||||
\node[stepsbox, below=1.4 of maintrain] (inferenceproc) {Init Network $\mathcal{\phi}$ with $\mathcal{W}$ \\Forward Pass on sample = $\mathbf{p}$ \\ Calculate Distance $\mathbf{p} \rightarrow \mathbf{c}$ \\ Distance = Anomaly Score};
|
||||
\node[outputbox, below=.1 of inferenceproc] (inferenceout) {\boxtitle{Outputs} Anomaly Score (Analog Value) \\ Higher for Anomalies};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
@@ -109,7 +110,7 @@
|
||||
\end{pgfonlayer}
|
||||
|
||||
\begin{pgfonlayer}{foreground}
|
||||
\node[hlabelbox, below=11.8 of traindata] (newdatasample) {\boxtitle{New Data Sample} Same data type as training data};
|
||||
\node[hlabelbox, below=13.32 of traindata] (newdatasample) {\boxtitle{New Data Sample} Same data type as training data};
|
||||
\end{pgfonlayer}
|
||||
\begin{pgfonlayer}{background}
|
||||
\node[databox, fit=(newdatasample), label={[label distance = 1] above:{\textbf{Unseen Data}}}] (newdata) {};
|
||||
|
||||
BIN
thesis/figures/ae_elbow_test_loss_anomaly.png
Normal file
|
After Width: | Height: | Size: 85 KiB |
BIN
thesis/figures/ae_elbow_test_loss_overall.png
Normal file
|
After Width: | Height: | Size: 88 KiB |
BIN
thesis/figures/autoencoder_principle.png
Normal file
|
After Width: | Height: | Size: 134 KiB |
|
Before Width: | Height: | Size: 211 KiB |
BIN
thesis/figures/bg_lidar_principle.png
Normal file
|
After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 1.4 MiB After Width: | Height: | Size: 1.4 MiB |
|
Before Width: | Height: | Size: 220 KiB After Width: | Height: | Size: 211 KiB |
|
Before Width: | Height: | Size: 31 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 37 KiB |
BIN
thesis/figures/ml_learning_schema_concept.png
Normal file
|
After Width: | Height: | Size: 199 KiB |
|
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 36 KiB |
BIN
thesis/figures/results_ap_over_latent.png
Normal file
|
After Width: | Height: | Size: 133 KiB |
BIN
thesis/figures/results_inference_normal_vs_degraded.png
Normal file
|
After Width: | Height: | Size: 718 KiB |
BIN
thesis/figures/results_prc.png
Normal file
|
After Width: | Height: | Size: 691 KiB |
BIN
thesis/figures/results_prc_over_semi.png
Normal file
|
After Width: | Height: | Size: 365 KiB |
11
thesis/filters/drop-images.lua
Normal file
@@ -0,0 +1,11 @@
|
||||
-- drop-images.lua
|
||||
-- Replaces all images (figures, graphics) with a short placeholder.
|
||||
function Image(el) return pandoc.Str("[image omitted]") end
|
||||
|
||||
-- For LaTeX figures that are still raw
|
||||
function RawBlock(el)
|
||||
if el.format == "tex" and el.text:match("\\begin%s*{%s*figure%s*}") then
|
||||
return pandoc.Plain({pandoc.Str("[figure omitted]")})
|
||||
end
|
||||
end
|
||||
|
||||
11
thesis/filters/drop-tables.lua
Normal file
@@ -0,0 +1,11 @@
|
||||
-- drop-tables.lua
|
||||
-- Removes LaTeX tabular and tabularx environments (and their contents).
|
||||
function RawBlock(el)
|
||||
if el.format == "tex" then
|
||||
-- Check for tabular or tabularx environment
|
||||
if el.text:match("\\begin%s*{%s*tabularx?%s*}") then
|
||||
return pandoc.Plain({pandoc.Str("[table omitted]")})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
43
thesis/filters/keep-citations.lua
Normal file
@@ -0,0 +1,43 @@
|
||||
-- keep-citations.lua
|
||||
-- Replace citations with a placeholder and eat any preceding space.
|
||||
local PH = "[citation]"
|
||||
|
||||
-- Pandoc-native citations (if the reader produced Cite nodes)
|
||||
function Cite(el) return pandoc.Str(PH) end
|
||||
|
||||
-- Raw LaTeX \cite-like macros (when not parsed as Cite)
|
||||
function RawInline(el)
|
||||
if el.format and el.format:match("tex") and el.text:match("\\%a-*cite%*?") then
|
||||
return pandoc.Str(PH)
|
||||
end
|
||||
end
|
||||
|
||||
-- Remove a single leading Space before our placeholder
|
||||
local function squash_spaces(inlines)
|
||||
local out = {}
|
||||
local i = 1
|
||||
while i <= #inlines do
|
||||
local cur = inlines[i]
|
||||
local nxt = inlines[i + 1]
|
||||
if cur and cur.t == "Space" and nxt and nxt.t == "Str" and nxt.text ==
|
||||
PH then
|
||||
table.insert(out, nxt)
|
||||
i = i + 2
|
||||
else
|
||||
table.insert(out, cur)
|
||||
i = i + 1
|
||||
end
|
||||
end
|
||||
return out
|
||||
end
|
||||
|
||||
function Para(el)
|
||||
el.content = squash_spaces(el.content)
|
||||
return el
|
||||
end
|
||||
|
||||
function Plain(el)
|
||||
el.content = squash_spaces(el.content)
|
||||
return el
|
||||
end
|
||||
|
||||
48
thesis/filters/math-omit.lua
Normal file
@@ -0,0 +1,48 @@
|
||||
-- math-omit.lua
|
||||
-- Replace any math with a placeholder and ensure a space before it when appropriate.
|
||||
local PH = "[math omitted]"
|
||||
|
||||
function Math(el)
|
||||
-- Emit the placeholder as a Str; spacing is fixed in Para/Plain below.
|
||||
return pandoc.Str(PH)
|
||||
end
|
||||
|
||||
local function ensure_space_before_ph(inlines)
|
||||
local out = {}
|
||||
for i = 1, #inlines do
|
||||
local cur = inlines[i]
|
||||
if cur.t == "Str" and cur.text == PH then
|
||||
local prev = out[#out]
|
||||
local need_space = true
|
||||
|
||||
-- No space if it's the first token in the block
|
||||
if not prev then
|
||||
need_space = false
|
||||
elseif prev.t == "Space" then
|
||||
need_space = false
|
||||
elseif prev.t == "Str" then
|
||||
-- If previous char is an opening bracket/paren/slash/hyphen or whitespace, skip
|
||||
local last = prev.text:sub(-1)
|
||||
if last:match("[%(%[%{%/%-]") or last:match("%s") then
|
||||
need_space = false
|
||||
end
|
||||
end
|
||||
|
||||
if need_space then table.insert(out, pandoc.Space()) end
|
||||
table.insert(out, cur)
|
||||
else
|
||||
table.insert(out, cur)
|
||||
end
|
||||
end
|
||||
return out
|
||||
end
|
||||
|
||||
function Para(el)
|
||||
el.content = ensure_space_before_ph(el.content)
|
||||
return el
|
||||
end
|
||||
|
||||
function Plain(el)
|
||||
el.content = ensure_space_before_ph(el.content)
|
||||
return el
|
||||
end
|
||||
@@ -15,6 +15,8 @@
|
||||
let
|
||||
pkgs = import nixpkgs { inherit system; };
|
||||
|
||||
aspellWithDicts = pkgs.aspellWithDicts (d: [ d.en ]);
|
||||
|
||||
latex-packages = with pkgs; [
|
||||
texlive.combined.scheme-full
|
||||
which
|
||||
@@ -26,16 +28,42 @@
|
||||
zathura
|
||||
wmctrl
|
||||
python312
|
||||
pandoc
|
||||
pandoc-lua-filters
|
||||
];
|
||||
filtersPath = "${pkgs.pandoc-lua-filters}/share/pandoc/filters";
|
||||
in
|
||||
{
|
||||
devShell = pkgs.mkShell {
|
||||
buildInputs = [
|
||||
latex-packages
|
||||
dev-packages
|
||||
aspellWithDicts
|
||||
];
|
||||
};
|
||||
|
||||
shellHook = ''
|
||||
set -eu
|
||||
# local folder in your repo to reference in commands
|
||||
link_target="pandoc-filters"
|
||||
# refresh symlink each time you enter the shell
|
||||
ln -sfn ${filtersPath} "$link_target"
|
||||
echo "Linked $link_target -> ${filtersPath}"
|
||||
|
||||
# (optional) write a defaults file that uses the relative symlink
|
||||
if [ ! -f pandoc.defaults.yaml ]; then
|
||||
cat > pandoc.defaults.yaml <<'YAML'
|
||||
from: latex
|
||||
to: plain
|
||||
wrap: none
|
||||
lua-filter:
|
||||
- pandoc-filters/latex-hyphen.lua
|
||||
- pandoc-filters/pandoc-quotes.lua
|
||||
YAML
|
||||
echo "Wrote pandoc.defaults.yaml"
|
||||
fi
|
||||
'';
|
||||
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
61
thesis/tex2plaintext.sh
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Usage:
|
||||
# ./tex2plaintext.sh [INPUT_TEX] [OUT_BASENAME]
|
||||
#
|
||||
# Defaults:
|
||||
# INPUT_TEX = Main.txt (your original file name)
|
||||
# OUT_BASENAME = thesis (produces thesis.txt, thesis_part1.txt, thesis_part2.txt)
|
||||
|
||||
INPUT_TEX="${1:-Main.tex}"
|
||||
OUT_BASE="${2:-thesis}"
|
||||
|
||||
FLAT_TEX="flat.tex"
|
||||
NO_TABLES_TEX="flat_notables.tex"
|
||||
PLAIN_TXT="${OUT_BASE}.txt"
|
||||
PART1_TXT="${OUT_BASE}_part1.txt"
|
||||
PART2_TXT="${OUT_BASE}_part2.txt"
|
||||
MARKER="Data and Preprocessing"
|
||||
|
||||
echo "[1/5] Flattening with latexpand -> ${FLAT_TEX}"
|
||||
latexpand "${INPUT_TEX}" > "${FLAT_TEX}"
|
||||
|
||||
echo "[2/5] Removing tabular/tabularx environments -> ${NO_TABLES_TEX}"
|
||||
# Replace entire tabular / tabularx environments with a placeholder
|
||||
perl -0777 -pe 's/\\begin\{(tabularx?)\}.*?\\end\{\1\}/[table omitted]/gs' \
|
||||
"${FLAT_TEX}" > "${NO_TABLES_TEX}"
|
||||
|
||||
echo "[3/5] Converting to plain text with pandoc -> ${PLAIN_TXT}"
|
||||
pandoc -f latex -t plain --wrap=none \
|
||||
--lua-filter=filters/keep-citations.lua \
|
||||
--lua-filter=filters/math-omit.lua \
|
||||
"${NO_TABLES_TEX}" -o "${PLAIN_TXT}"
|
||||
|
||||
echo "[4/5] Replacing [] placeholders with [figure]"
|
||||
sed -i 's/\[\]/[figure]/g' "${PLAIN_TXT}"
|
||||
|
||||
echo "[5/5] Splitting ${PLAIN_TXT} before the marker line: \"${MARKER}\""
|
||||
|
||||
# Ensure the marker exists exactly on its own line
|
||||
if ! grep -xq "${MARKER}" "${PLAIN_TXT}"; then
|
||||
echo "ERROR: Marker line not found exactly as \"${MARKER}\" in ${PLAIN_TXT}."
|
||||
echo " (It must be the only content on that line.)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Clean previous outputs if present
|
||||
rm -f -- "${PART1_TXT}" "${PART2_TXT}"
|
||||
|
||||
# Split so the marker line becomes the FIRST line of part 2
|
||||
awk -v marker="${MARKER}" -v out1="${PART1_TXT}" -v out2="${PART2_TXT}" '
|
||||
BEGIN { current = out1 }
|
||||
$0 == marker { current = out2; print $0 > current; next }
|
||||
{ print $0 > current }
|
||||
' "${PLAIN_TXT}"
|
||||
|
||||
echo "Done."
|
||||
echo " - ${PLAIN_TXT}"
|
||||
echo " - ${PART1_TXT}"
|
||||
echo " - ${PART2_TXT}"
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
\addcontentsline{toc}{chapter}{Abstract (English)}
|
||||
\begin{center}\Large\bfseries Abstract (English)\end{center}\vspace*{1cm}\noindent
|
||||
Write some fancy abstract here!
|
||||
\addcontentsline{toc}{chapter}{Abstract}
|
||||
\begin{center}\Large\bfseries Abstract\end{center}\vspace*{1cm}\noindent
|
||||
Autonomous robots are increasingly used in search and rescue (SAR) missions. In these missions, LiDAR sensors are often the most important source of environmental data. However, LiDAR data can degrade under hazardous conditions, especially when airborne particles such as smoke or dust are present. This degradation can lead to errors in mapping and navigation and may endanger both the robot and humans. Therefore, robots need a way to estimate the reliability of their LiDAR data, so that they can make better-informed decisions.
|
||||
\bigskip
|
||||
|
||||
This thesis investigates whether anomaly detection methods can be used to quantify LiDAR data degradation caused by airborne particles such as smoke and dust. We apply a semi-supervised deep learning approach called DeepSAD, which produces an anomaly score for each LiDAR scan, serving as a measure of data reliability.
|
||||
\bigskip
|
||||
|
||||
We evaluate this method against baseline methods on a subterranean dataset that includes LiDAR scans degraded by artificial smoke. Our results show that DeepSAD consistently outperforms the baselines and can clearly distinguish degraded from normal scans. At the same time, we find that the limited availability of labeled data and the lack of robust ground truth remain major challenges. Despite these limitations, our work demonstrates that anomaly detection methods are a promising tool for LiDAR degradation quantification in SAR scenarios.
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
\addcontentsline{toc}{chapter}{Acknowledgements}
|
||||
\begin{center}\Large\bfseries Acknowledgements\end{center}\vspace*{1cm}\noindent
|
||||
Here you can tell us, how thankful you are for this amazing template ;)
|
||||
\addcontentsline{toc}{chapter}{Artificial Intelligence Usage Disclaimer}
|
||||
\begin{center}\Large\bfseries Artificial Intelligence Usage Disclaimer\end{center}\vspace*{1cm}\noindent
|
||||
During the creation of this thesis, an LLM-based Artificial Intelligence tool was used for stylistic and grammatical revision of the author's own work.
|
||||
|
||||
BIN
thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.pdf
vendored
Normal file
@@ -30,7 +30,8 @@ arch = [
|
||||
height=H8 * 1.6,
|
||||
depth=D1,
|
||||
width=W1,
|
||||
caption=f"Latent Space",
|
||||
caption="Latent Space",
|
||||
captionshift=0,
|
||||
),
|
||||
# to_connection("fc1", "latent"),
|
||||
# --------------------------- DECODER ---------------------------
|
||||
@@ -39,19 +40,20 @@ arch = [
|
||||
"fc3",
|
||||
n_filer="{{8×128×8}}",
|
||||
zlabeloffset=0.5,
|
||||
offset="(2,0,0)",
|
||||
offset="(2,-.5,0)",
|
||||
to="(latent-east)",
|
||||
height=H1,
|
||||
depth=D512,
|
||||
width=W1,
|
||||
caption=f"FC",
|
||||
captionshift=20,
|
||||
),
|
||||
to_Conv(
|
||||
"unsqueeze",
|
||||
s_filer="{{128×8}}",
|
||||
zlabeloffset=0.4,
|
||||
n_filer=32,
|
||||
offset="(2,0,0)",
|
||||
offset="(1.4,0,0)",
|
||||
to="(fc3-east)",
|
||||
height=H8,
|
||||
depth=D128,
|
||||
@@ -62,7 +64,7 @@ arch = [
|
||||
# Reshape to 4×8×512
|
||||
to_UnPool(
|
||||
"up1",
|
||||
offset="(2,0,0)",
|
||||
offset="(1.2,0,0)",
|
||||
n_filer=32,
|
||||
to="(unsqueeze-east)",
|
||||
height=H16,
|
||||
@@ -101,7 +103,8 @@ arch = [
|
||||
height=H16,
|
||||
depth=D1024,
|
||||
width=W32,
|
||||
caption="",
|
||||
caption="Deconv2",
|
||||
captionshift=20,
|
||||
),
|
||||
to_Conv(
|
||||
"dwdeconv3",
|
||||
@@ -112,7 +115,7 @@ arch = [
|
||||
height=H16,
|
||||
depth=D1024,
|
||||
width=W1,
|
||||
caption="Deconv2",
|
||||
caption="",
|
||||
),
|
||||
to_Conv(
|
||||
"dwdeconv4",
|
||||
@@ -134,7 +137,8 @@ arch = [
|
||||
height=H32,
|
||||
depth=D2048,
|
||||
width=W16,
|
||||
caption="",
|
||||
caption="Deconv3",
|
||||
captionshift=10,
|
||||
),
|
||||
to_Conv(
|
||||
"dwdeconv5",
|
||||
@@ -145,7 +149,7 @@ arch = [
|
||||
height=H32,
|
||||
depth=D2048,
|
||||
width=W1,
|
||||
caption="Deconv3",
|
||||
caption="",
|
||||
),
|
||||
to_Conv(
|
||||
"dwdeconv6",
|
||||
@@ -164,7 +168,7 @@ arch = [
|
||||
s_filer="{{2048×32}}",
|
||||
zlabeloffset=0.15,
|
||||
n_filer=1,
|
||||
offset="(2,0,0)",
|
||||
offset="(1.5,0,0)",
|
||||
to="(dwdeconv6-east)",
|
||||
height=H32,
|
||||
depth=D2048,
|
||||
@@ -178,12 +182,13 @@ arch = [
|
||||
s_filer="{{2048×32}}",
|
||||
zlabeloffset=0.15,
|
||||
n_filer=1,
|
||||
offset="(2,0,0)",
|
||||
offset="(1.5,0,0)",
|
||||
to="(outconv-east)",
|
||||
height=H32,
|
||||
depth=D2048,
|
||||
width=W1,
|
||||
caption="Output",
|
||||
captionshift=5,
|
||||
),
|
||||
# to_connection("deconv2", "out"),
|
||||
to_end(),
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
{Box={
|
||||
name=latent,
|
||||
caption=Latent Space,
|
||||
captionshift=0,
|
||||
xlabel={{, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=latent dim,
|
||||
@@ -39,10 +40,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (latent-east)
|
||||
\pic[shift={(2,-.5,0)}] at (latent-east)
|
||||
{Box={
|
||||
name=fc3,
|
||||
caption=FC,
|
||||
captionshift=20,
|
||||
xlabel={{" ","dummy"}},
|
||||
zlabeloffset=0.5,
|
||||
zlabel={{8×128×8}},
|
||||
@@ -55,10 +57,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (fc3-east)
|
||||
\pic[shift={(1.4,0,0)}] at (fc3-east)
|
||||
{Box={
|
||||
name=unsqueeze,
|
||||
caption=Unsqueeze,
|
||||
captionshift=0,
|
||||
xlabel={{32, }},
|
||||
zlabeloffset=0.4,
|
||||
zlabel={{128×8}},
|
||||
@@ -70,10 +73,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={ (2,0,0) }] at (unsqueeze-east)
|
||||
\pic[shift={ (1.2,0,0) }] at (unsqueeze-east)
|
||||
{Box={
|
||||
name=up1,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
fill=\UnpoolColor,
|
||||
opacity=0.5,
|
||||
xlabel={{32, }},
|
||||
@@ -88,6 +92,7 @@
|
||||
{Box={
|
||||
name=dwdeconv1,
|
||||
caption=Deconv1,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=,
|
||||
@@ -103,6 +108,7 @@
|
||||
{Box={
|
||||
name=dwdeconv2,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{32, }},
|
||||
zlabeloffset=0.4,
|
||||
zlabel={{256×16}},
|
||||
@@ -117,7 +123,8 @@
|
||||
\pic[shift={ (2,0,0) }] at (dwdeconv2-east)
|
||||
{Box={
|
||||
name=up2,
|
||||
caption=,
|
||||
caption=Deconv2,
|
||||
captionshift=20,
|
||||
fill=\UnpoolColor,
|
||||
opacity=0.5,
|
||||
xlabel={{32, }},
|
||||
@@ -131,7 +138,8 @@
|
||||
\pic[shift={(0,0,0)}] at (up2-east)
|
||||
{Box={
|
||||
name=dwdeconv3,
|
||||
caption=Deconv2,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=,
|
||||
@@ -147,6 +155,7 @@
|
||||
{Box={
|
||||
name=dwdeconv4,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{16, }},
|
||||
zlabeloffset=0.17,
|
||||
zlabel={{1024×16}},
|
||||
@@ -161,7 +170,8 @@
|
||||
\pic[shift={ (2,0,0) }] at (dwdeconv4-east)
|
||||
{Box={
|
||||
name=up3,
|
||||
caption=,
|
||||
caption=Deconv3,
|
||||
captionshift=10,
|
||||
fill=\UnpoolColor,
|
||||
opacity=0.5,
|
||||
xlabel={{16, }},
|
||||
@@ -175,7 +185,8 @@
|
||||
\pic[shift={(0,0,0)}] at (up3-east)
|
||||
{Box={
|
||||
name=dwdeconv5,
|
||||
caption=Deconv3,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=,
|
||||
@@ -191,6 +202,7 @@
|
||||
{Box={
|
||||
name=dwdeconv6,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{8, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
@@ -202,10 +214,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (dwdeconv6-east)
|
||||
\pic[shift={(1.5,0,0)}] at (dwdeconv6-east)
|
||||
{Box={
|
||||
name=outconv,
|
||||
caption=Deconv4,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
@@ -217,10 +230,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (outconv-east)
|
||||
\pic[shift={(1.5,0,0)}] at (outconv-east)
|
||||
{Box={
|
||||
name=out,
|
||||
caption=Output,
|
||||
captionshift=5,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
|
||||
BIN
thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.pdf
vendored
Normal file
@@ -125,7 +125,7 @@ arch = [
|
||||
n_filer=8,
|
||||
zlabeloffset=0.45,
|
||||
s_filer="{{128×8}}",
|
||||
offset="(2,0,0)",
|
||||
offset="(1,0,0)",
|
||||
to="(pool3-east)",
|
||||
height=H8,
|
||||
depth=D128,
|
||||
@@ -137,12 +137,13 @@ arch = [
|
||||
"fc1",
|
||||
n_filer="{{8×128×8}}",
|
||||
zlabeloffset=0.5,
|
||||
offset="(2,0,0)",
|
||||
offset="(2,-.5,0)",
|
||||
to="(squeeze-east)",
|
||||
height=H1,
|
||||
depth=D512,
|
||||
width=W1,
|
||||
caption=f"FC",
|
||||
caption="FC",
|
||||
captionshift=0,
|
||||
),
|
||||
# to_connection("pool2", "fc1"),
|
||||
# --------------------------- LATENT ---------------------------
|
||||
@@ -150,7 +151,7 @@ arch = [
|
||||
"latent",
|
||||
n_filer="",
|
||||
s_filer="latent dim",
|
||||
offset="(2,0,0)",
|
||||
offset="(1.3,0.5,0)",
|
||||
to="(fc1-east)",
|
||||
height=H8 * 1.6,
|
||||
depth=D1,
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
{Box={
|
||||
name=input,
|
||||
caption=Input,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.2,
|
||||
zlabel={{2048×32}},
|
||||
@@ -43,6 +44,7 @@
|
||||
{Box={
|
||||
name=dwconv1,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=,
|
||||
@@ -58,6 +60,7 @@
|
||||
{Box={
|
||||
name=dwconv2,
|
||||
caption=Conv1,
|
||||
captionshift=0,
|
||||
xlabel={{16, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
@@ -76,6 +79,7 @@
|
||||
zlabeloffset=0.3,
|
||||
zlabel={{512×32}},
|
||||
caption=,
|
||||
captionshift=0,
|
||||
fill=\PoolColor,
|
||||
opacity=0.5,
|
||||
height=26,
|
||||
@@ -89,6 +93,7 @@
|
||||
{Box={
|
||||
name=dwconv3,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=,
|
||||
@@ -104,6 +109,7 @@
|
||||
{Box={
|
||||
name=dwconv4,
|
||||
caption=Conv2,
|
||||
captionshift=0,
|
||||
xlabel={{32, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel={{512×32}},
|
||||
@@ -122,6 +128,7 @@
|
||||
zlabeloffset=0.45,
|
||||
zlabel={{256×16}},
|
||||
caption=,
|
||||
captionshift=0,
|
||||
fill=\PoolColor,
|
||||
opacity=0.5,
|
||||
height=18,
|
||||
@@ -138,6 +145,7 @@
|
||||
zlabeloffset=0.45,
|
||||
zlabel={{128×8}},
|
||||
caption=,
|
||||
captionshift=0,
|
||||
fill=\PoolColor,
|
||||
opacity=0.5,
|
||||
height=12,
|
||||
@@ -147,10 +155,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (pool3-east)
|
||||
\pic[shift={(1,0,0)}] at (pool3-east)
|
||||
{Box={
|
||||
name=squeeze,
|
||||
caption=Squeeze,
|
||||
captionshift=0,
|
||||
xlabel={{8, }},
|
||||
zlabeloffset=0.45,
|
||||
zlabel={{128×8}},
|
||||
@@ -162,10 +171,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (squeeze-east)
|
||||
\pic[shift={(2,-.5,0)}] at (squeeze-east)
|
||||
{Box={
|
||||
name=fc1,
|
||||
caption=FC,
|
||||
captionshift=0,
|
||||
xlabel={{" ","dummy"}},
|
||||
zlabeloffset=0.5,
|
||||
zlabel={{8×128×8}},
|
||||
@@ -178,10 +188,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (fc1-east)
|
||||
\pic[shift={(1.3,0.5,0)}] at (fc1-east)
|
||||
{Box={
|
||||
name=latent,
|
||||
caption=Latent Space,
|
||||
captionshift=0,
|
||||
xlabel={{, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=latent dim,
|
||||
|
||||
BIN
thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.pdf
vendored
Normal file
@@ -39,19 +39,20 @@ arch = [
|
||||
"fc3",
|
||||
n_filer="{{4×512×8}}",
|
||||
zlabeloffset=0.35,
|
||||
offset="(2,0,0)",
|
||||
offset="(2,-.5,0)",
|
||||
to="(latent-east)",
|
||||
height=1.3,
|
||||
depth=D512,
|
||||
width=W1,
|
||||
caption=f"FC",
|
||||
captionshift=20,
|
||||
),
|
||||
# to_connection("latent", "fc3"),
|
||||
# Reshape to 4×8×512
|
||||
to_UnPool(
|
||||
"up1",
|
||||
n_filer=4,
|
||||
offset="(2,0,0)",
|
||||
offset="(2.5,0,0)",
|
||||
to="(fc3-east)",
|
||||
height=H16,
|
||||
depth=D1024,
|
||||
@@ -82,7 +83,8 @@ arch = [
|
||||
height=H32,
|
||||
depth=D2048,
|
||||
width=W8,
|
||||
caption="",
|
||||
caption="Deconv2",
|
||||
captionshift=10,
|
||||
),
|
||||
# to_connection("deconv1", "up2"),
|
||||
# DeConv2 (5×5, same): 8->1, 32×2048
|
||||
@@ -96,7 +98,7 @@ arch = [
|
||||
height=H32,
|
||||
depth=D2048,
|
||||
width=W1,
|
||||
caption="Deconv2",
|
||||
caption="",
|
||||
),
|
||||
# to_connection("up2", "deconv2"),
|
||||
# Output
|
||||
@@ -111,6 +113,7 @@ arch = [
|
||||
depth=D2048,
|
||||
width=1.0,
|
||||
caption="Output",
|
||||
captionshift=5,
|
||||
),
|
||||
# to_connection("deconv2", "out"),
|
||||
to_end(),
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
{Box={
|
||||
name=latent,
|
||||
caption=Latent Space,
|
||||
captionshift=0,
|
||||
xlabel={{, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=latent dim,
|
||||
@@ -39,10 +40,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (latent-east)
|
||||
\pic[shift={(2,-.5,0)}] at (latent-east)
|
||||
{Box={
|
||||
name=fc3,
|
||||
caption=FC,
|
||||
captionshift=20,
|
||||
xlabel={{" ","dummy"}},
|
||||
zlabeloffset=0.35,
|
||||
zlabel={{4×512×8}},
|
||||
@@ -55,10 +57,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={ (2,0,0) }] at (fc3-east)
|
||||
\pic[shift={ (2.5,0,0) }] at (fc3-east)
|
||||
{Box={
|
||||
name=up1,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
fill=\UnpoolColor,
|
||||
opacity=0.5,
|
||||
xlabel={{4, }},
|
||||
@@ -73,6 +76,7 @@
|
||||
{Box={
|
||||
name=deconv1,
|
||||
caption=Deconv1,
|
||||
captionshift=0,
|
||||
xlabel={{8, }},
|
||||
zlabeloffset=0.2,
|
||||
zlabel={{1024×16}},
|
||||
@@ -87,7 +91,8 @@
|
||||
\pic[shift={ (2,0,0) }] at (deconv1-east)
|
||||
{Box={
|
||||
name=up2,
|
||||
caption=,
|
||||
caption=Deconv2,
|
||||
captionshift=10,
|
||||
fill=\UnpoolColor,
|
||||
opacity=0.5,
|
||||
xlabel={{8, }},
|
||||
@@ -101,7 +106,8 @@
|
||||
\pic[shift={(0,0,0)}] at (up2-east)
|
||||
{Box={
|
||||
name=deconv2,
|
||||
caption=Deconv2,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
@@ -117,6 +123,7 @@
|
||||
{Box={
|
||||
name=out,
|
||||
caption=Output,
|
||||
captionshift=5,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
|
||||
BIN
thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.pdf
vendored
Normal file
@@ -91,13 +91,14 @@ arch = [
|
||||
to_fc(
|
||||
"fc1",
|
||||
n_filer="{{4×512×8}}",
|
||||
offset="(2,0,0)",
|
||||
offset="(2,-.5,0)",
|
||||
zlabeloffset=0.5,
|
||||
to="(pool2-east)",
|
||||
height=1.3,
|
||||
depth=D512,
|
||||
width=W1,
|
||||
caption=f"FC",
|
||||
captionshift=20,
|
||||
),
|
||||
# to_connection("pool2", "fc1"),
|
||||
# --------------------------- LATENT ---------------------------
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
{Box={
|
||||
name=input,
|
||||
caption=Input,
|
||||
captionshift=0,
|
||||
xlabel={{1, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
@@ -43,6 +44,7 @@
|
||||
{Box={
|
||||
name=conv1,
|
||||
caption=Conv1,
|
||||
captionshift=0,
|
||||
xlabel={{8, }},
|
||||
zlabeloffset=0.15,
|
||||
zlabel={{2048×32}},
|
||||
@@ -61,6 +63,7 @@
|
||||
zlabeloffset=0.3,
|
||||
zlabel={{1024×16}},
|
||||
caption=,
|
||||
captionshift=0,
|
||||
fill=\PoolColor,
|
||||
opacity=0.5,
|
||||
height=18,
|
||||
@@ -74,6 +77,7 @@
|
||||
{Box={
|
||||
name=conv2,
|
||||
caption=Conv2,
|
||||
captionshift=0,
|
||||
xlabel={{4, }},
|
||||
zlabeloffset=0.4,
|
||||
zlabel={{1024×16\hspace{2.5em}512×8}},
|
||||
@@ -92,6 +96,7 @@
|
||||
zlabeloffset=0.3,
|
||||
zlabel={{}},
|
||||
caption=,
|
||||
captionshift=0,
|
||||
fill=\PoolColor,
|
||||
opacity=0.5,
|
||||
height=12,
|
||||
@@ -101,10 +106,11 @@
|
||||
};
|
||||
|
||||
|
||||
\pic[shift={(2,0,0)}] at (pool2-east)
|
||||
\pic[shift={(2,-.5,0)}] at (pool2-east)
|
||||
{Box={
|
||||
name=fc1,
|
||||
caption=FC,
|
||||
captionshift=20,
|
||||
xlabel={{" ","dummy"}},
|
||||
zlabeloffset=0.5,
|
||||
zlabel={{4×512×8}},
|
||||
@@ -121,6 +127,7 @@
|
||||
{Box={
|
||||
name=latent,
|
||||
caption=Latent Space,
|
||||
captionshift=0,
|
||||
xlabel={{, }},
|
||||
zlabeloffset=0.3,
|
||||
zlabel=latent dim,
|
||||
|
||||
10
thesis/third_party/PlotNeuralNet/layers/Box.sty
vendored
@@ -57,8 +57,12 @@
|
||||
\path (b1) edge ["\ylabel",midway] (a1); %height label
|
||||
|
||||
|
||||
\tikzstyle{captionlabel}=[text width=15*\LastEastx/\scale,text centered]
|
||||
\path (\LastEastx/2,-\y/2,+\z/2) + (0,-25pt) coordinate (cap)
|
||||
% \tikzstyle{captionlabel}=[text width=15*\LastEastx/\scale,text centered,xshift=\captionshift pt]
|
||||
% \path (\LastEastx/2,-\y/2,+\z/2) + (0,-25pt) coordinate (cap)
|
||||
% edge ["\textcolor{black}{ \bf \caption}"',captionlabel](cap) ; %Block caption/pic object label
|
||||
|
||||
% Place caption: shift the coordinate by captionshift (NEW)
|
||||
\path (\LastEastx/2,-\y/2,+\z/2) + (\captionshift pt,-25pt) coordinate (cap)
|
||||
edge ["\textcolor{black}{ \bf \caption}"',captionlabel](cap) ; %Block caption/pic object label
|
||||
|
||||
%Define nodes to be used outside on the pic object
|
||||
@@ -103,6 +107,7 @@ ylabel/.store in=\ylabel,
|
||||
zlabel/.store in=\zlabel,
|
||||
zlabeloffset/.store in=\zlabeloffset,
|
||||
caption/.store in=\caption,
|
||||
captionshift/.store in=\captionshift,
|
||||
name/.store in=\name,
|
||||
fill/.store in=\fill,
|
||||
opacity/.store in=\opacity,
|
||||
@@ -117,5 +122,6 @@ ylabel=,
|
||||
zlabel=,
|
||||
zlabeloffset=0.3,
|
||||
caption=,
|
||||
captionshift=0,
|
||||
name=,
|
||||
}
|
||||
|
||||
@@ -75,6 +75,7 @@ def to_Conv(
|
||||
height=40,
|
||||
depth=40,
|
||||
caption=" ",
|
||||
captionshift=0,
|
||||
):
|
||||
return (
|
||||
r"""
|
||||
@@ -90,6 +91,9 @@ def to_Conv(
|
||||
caption="""
|
||||
+ caption
|
||||
+ r""",
|
||||
captionshift="""
|
||||
+ str(captionshift)
|
||||
+ """,
|
||||
xlabel={{"""
|
||||
+ str(n_filer)
|
||||
+ """, }},
|
||||
@@ -182,6 +186,7 @@ def to_Pool(
|
||||
depth=32,
|
||||
opacity=0.5,
|
||||
caption=" ",
|
||||
captionshift=0,
|
||||
):
|
||||
return (
|
||||
r"""
|
||||
@@ -206,6 +211,9 @@ def to_Pool(
|
||||
caption="""
|
||||
+ caption
|
||||
+ r""",
|
||||
captionshift="""
|
||||
+ str(captionshift)
|
||||
+ """,
|
||||
fill=\PoolColor,
|
||||
opacity="""
|
||||
+ str(opacity)
|
||||
@@ -236,6 +244,7 @@ def to_UnPool(
|
||||
depth=32,
|
||||
opacity=0.5,
|
||||
caption=" ",
|
||||
captionshift=0,
|
||||
):
|
||||
return (
|
||||
r"""
|
||||
@@ -251,6 +260,9 @@ def to_UnPool(
|
||||
caption="""
|
||||
+ caption
|
||||
+ r""",
|
||||
captionshift="""
|
||||
+ str(captionshift)
|
||||
+ r""",
|
||||
fill=\UnpoolColor,
|
||||
opacity="""
|
||||
+ str(opacity)
|
||||
@@ -335,6 +347,7 @@ def to_ConvSoftMax(
|
||||
height=40,
|
||||
depth=40,
|
||||
caption=" ",
|
||||
captionshift=0,
|
||||
):
|
||||
return (
|
||||
r"""
|
||||
@@ -350,6 +363,9 @@ def to_ConvSoftMax(
|
||||
caption="""
|
||||
+ caption
|
||||
+ """,
|
||||
captionshift="""
|
||||
+ str(captionshift)
|
||||
+ """,
|
||||
zlabel="""
|
||||
+ str(s_filer)
|
||||
+ """,
|
||||
@@ -380,6 +396,7 @@ def to_SoftMax(
|
||||
depth=25,
|
||||
opacity=0.8,
|
||||
caption=" ",
|
||||
captionshift=0,
|
||||
z_label_offset=0,
|
||||
):
|
||||
return (
|
||||
@@ -396,6 +413,9 @@ def to_SoftMax(
|
||||
caption="""
|
||||
+ caption
|
||||
+ """,
|
||||
captionshift="""
|
||||
+ str(captionshift)
|
||||
+ """,
|
||||
xlabel={{" ","dummy"}},
|
||||
zlabel="""
|
||||
+ str(s_filer)
|
||||
@@ -455,6 +475,7 @@ def to_fc(
|
||||
height=2,
|
||||
depth=10,
|
||||
caption=" ",
|
||||
captionshift=0,
|
||||
# titlepos=0,
|
||||
):
|
||||
return (
|
||||
@@ -471,6 +492,9 @@ def to_fc(
|
||||
caption="""
|
||||
+ caption
|
||||
+ """,
|
||||
captionshift="""
|
||||
+ str(captionshift)
|
||||
+ """,
|
||||
xlabel={{" ","dummy"}},
|
||||
zlabeloffset="""
|
||||
+ str(zlabeloffset)
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
|
||||
from load_results import load_pretraining_results_dataframe, load_results_dataframe
|
||||
from load_results import (
|
||||
load_pretraining_results_dataframe,
|
||||
load_results_dataframe,
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
native_dependencies = with pkgs.python312Packages; [
|
||||
native_dependencies = with pkgs.python311Packages; [
|
||||
torch-bin
|
||||
torchvision-bin
|
||||
aggdraw # for visualtorch
|
||||
@@ -16,7 +16,7 @@ in
|
||||
packages = native_dependencies ++ tools;
|
||||
languages.python = {
|
||||
enable = true;
|
||||
package = pkgs.python312;
|
||||
package = pkgs.python311;
|
||||
uv = {
|
||||
enable = true;
|
||||
sync.enable = true;
|
||||
|
||||
@@ -1,651 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from polars.testing import assert_frame_equal
|
||||
|
||||
from diff_df import recursive_diff_frames
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Config you can tweak
|
||||
# ------------------------------------------------------------
|
||||
MODELS = ["deepsad", "isoforest", "ocsvm"]
|
||||
EVALS = ["exp_based", "manual_based"]
|
||||
|
||||
SCHEMA_STATIC = {
|
||||
# identifiers / dims
|
||||
"network": pl.Utf8, # e.g. "LeNet", "efficient"
|
||||
"latent_dim": pl.Int32,
|
||||
"semi_normals": pl.Int32,
|
||||
"semi_anomalous": pl.Int32,
|
||||
"model": pl.Utf8, # "deepsad" | "isoforest" | "ocsvm"
|
||||
"eval": pl.Utf8, # "exp_based" | "manual_based"
|
||||
"fold": pl.Int32,
|
||||
# metrics
|
||||
"auc": pl.Float64,
|
||||
"ap": pl.Float64,
|
||||
# per-sample scores: list of (idx, label, score)
|
||||
"scores": pl.List(
|
||||
pl.Struct(
|
||||
{
|
||||
"sample_idx": pl.Int32, # dataloader idx
|
||||
"orig_label": pl.Int8, # {-1,0,1}
|
||||
"score": pl.Float64, # anomaly score
|
||||
}
|
||||
)
|
||||
),
|
||||
# curves (normalized)
|
||||
"roc_curve": pl.Struct(
|
||||
{
|
||||
"fpr": pl.List(pl.Float64),
|
||||
"tpr": pl.List(pl.Float64),
|
||||
"thr": pl.List(pl.Float64),
|
||||
}
|
||||
),
|
||||
"prc_curve": pl.Struct(
|
||||
{
|
||||
"precision": pl.List(pl.Float64),
|
||||
"recall": pl.List(pl.Float64),
|
||||
"thr": pl.List(pl.Float64), # may be len(precision)-1
|
||||
}
|
||||
),
|
||||
# deepsad-only per-eval arrays (None for other models)
|
||||
"sample_indices": pl.List(pl.Int32),
|
||||
"sample_labels": pl.List(pl.Int8),
|
||||
"valid_mask": pl.List(pl.Boolean),
|
||||
# timings / housekeeping
|
||||
"train_time": pl.Float64,
|
||||
"test_time": pl.Float64,
|
||||
"folder": pl.Utf8,
|
||||
"k_fold_num": pl.Int32,
|
||||
"config_json": pl.Utf8, # full config.json as string (for reference)
|
||||
}
|
||||
|
||||
# Pretraining-only (AE) schema
|
||||
# Pretraining-only (AE) schema — lighter defaults
|
||||
PRETRAIN_SCHEMA = {
|
||||
# identifiers / dims
|
||||
"network": pl.Utf8, # e.g. "LeNet", "efficient"
|
||||
"latent_dim": pl.Int32,
|
||||
"semi_normals": pl.Int32,
|
||||
"semi_anomalous": pl.Int32,
|
||||
"model": pl.Utf8, # always "ae"
|
||||
"fold": pl.Int32,
|
||||
"split": pl.Utf8, # "train" | "test"
|
||||
# timings and optimization
|
||||
"time": pl.Float64,
|
||||
"loss": pl.Float64,
|
||||
# per-sample arrays (as lists)
|
||||
"indices": pl.List(pl.Int32),
|
||||
"labels_exp_based": pl.List(pl.Int32),
|
||||
"labels_manual_based": pl.List(pl.Int32),
|
||||
"semi_targets": pl.List(pl.Int32),
|
||||
"file_ids": pl.List(pl.Int32),
|
||||
"frame_ids": pl.List(pl.Int32),
|
||||
"scores": pl.List(pl.Float32), # <— use Float32 to match source and save space
|
||||
# file id -> name mapping from the result dict
|
||||
"file_names": pl.List(pl.Struct({"file_id": pl.Int32, "name": pl.Utf8})),
|
||||
# housekeeping
|
||||
"folder": pl.Utf8,
|
||||
"k_fold_num": pl.Int32,
|
||||
"config_json": pl.Utf8, # full config.json as string (for reference)
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
|
||||
# ------------------------------------------------------------
|
||||
def _tolist(x):
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, np.ndarray):
|
||||
return x.tolist()
|
||||
if isinstance(x, (list, tuple)):
|
||||
return list(x)
|
||||
# best-effort scalar wrap
|
||||
try:
|
||||
return [x]
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def normalize_float_list(a) -> Optional[List[float]]:
|
||||
if a is None:
|
||||
return None
|
||||
if isinstance(a, np.ndarray):
|
||||
a = a.tolist()
|
||||
return [None if x is None else float(x) for x in a]
|
||||
|
||||
|
||||
def normalize_file_names(d) -> Optional[List[dict]]:
|
||||
"""
|
||||
Convert the 'file_names' dict (keys like numpy.int64 -> str) to a
|
||||
list[ {file_id:int, name:str} ], sorted by file_id.
|
||||
"""
|
||||
if not isinstance(d, dict):
|
||||
return None
|
||||
out: List[dict] = []
|
||||
for k, v in d.items():
|
||||
try:
|
||||
file_id = int(k)
|
||||
except Exception:
|
||||
# keys are printed as np.int64 in the structure; best-effort cast
|
||||
continue
|
||||
out.append({"file_id": file_id, "name": str(v)})
|
||||
out.sort(key=lambda x: x["file_id"])
|
||||
return out
|
||||
|
||||
|
||||
def normalize_roc(obj: Any) -> Optional[dict]:
|
||||
if obj is None:
|
||||
return None
|
||||
fpr = tpr = thr = None
|
||||
if isinstance(obj, (tuple, list)):
|
||||
if len(obj) >= 2:
|
||||
fpr, tpr = _tolist(obj[0]), _tolist(obj[1])
|
||||
if len(obj) >= 3:
|
||||
thr = _tolist(obj[2])
|
||||
elif isinstance(obj, dict):
|
||||
fpr = _tolist(obj.get("fpr") or obj.get("x"))
|
||||
tpr = _tolist(obj.get("tpr") or obj.get("y"))
|
||||
thr = _tolist(obj.get("thr") or obj.get("thresholds"))
|
||||
else:
|
||||
return None
|
||||
if fpr is None or tpr is None:
|
||||
return None
|
||||
return {"fpr": fpr, "tpr": tpr, "thr": thr}
|
||||
|
||||
|
||||
def normalize_prc(obj: Any) -> Optional[dict]:
|
||||
if obj is None:
|
||||
return None
|
||||
precision = recall = thr = None
|
||||
if isinstance(obj, (tuple, list)):
|
||||
if len(obj) >= 2:
|
||||
precision, recall = _tolist(obj[0]), _tolist(obj[1])
|
||||
if len(obj) >= 3:
|
||||
thr = _tolist(obj[2])
|
||||
elif isinstance(obj, dict):
|
||||
precision = _tolist(obj.get("precision") or obj.get("y"))
|
||||
recall = _tolist(obj.get("recall") or obj.get("x"))
|
||||
thr = _tolist(obj.get("thr") or obj.get("thresholds"))
|
||||
else:
|
||||
return None
|
||||
if precision is None or recall is None:
|
||||
return None
|
||||
return {"precision": precision, "recall": recall, "thr": thr}
|
||||
|
||||
|
||||
def normalize_scores_to_struct(seq) -> Optional[List[dict]]:
|
||||
"""
|
||||
Input: list of (idx, label, score) tuples (as produced in your test()).
|
||||
Output: list of dicts with keys sample_idx, orig_label, score.
|
||||
"""
|
||||
if seq is None:
|
||||
return None
|
||||
if isinstance(seq, np.ndarray):
|
||||
seq = seq.tolist()
|
||||
if not isinstance(seq, (list, tuple)):
|
||||
return None
|
||||
out: List[dict] = []
|
||||
for item in seq:
|
||||
if isinstance(item, (list, tuple)) and len(item) >= 3:
|
||||
idx, lab, sc = item[0], item[1], item[2]
|
||||
out.append(
|
||||
{
|
||||
"sample_idx": None if idx is None else int(idx),
|
||||
"orig_label": None if lab is None else int(lab),
|
||||
"score": None if sc is None else float(sc),
|
||||
}
|
||||
)
|
||||
else:
|
||||
# fallback: single numeric -> score
|
||||
sc = (
|
||||
float(item)
|
||||
if isinstance(item, (int, float, np.integer, np.floating))
|
||||
else None
|
||||
)
|
||||
out.append({"sample_idx": None, "orig_label": None, "score": sc})
|
||||
return out
|
||||
|
||||
|
||||
def normalize_int_list(a) -> Optional[List[int]]:
|
||||
if a is None:
|
||||
return None
|
||||
if isinstance(a, np.ndarray):
|
||||
a = a.tolist()
|
||||
return list(a)
|
||||
|
||||
|
||||
def normalize_bool_list(a) -> Optional[List[bool]]:
|
||||
if a is None:
|
||||
return None
|
||||
if isinstance(a, np.ndarray):
|
||||
a = a.tolist()
|
||||
return [bool(x) for x in a]
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Low-level: read one experiment folder
|
||||
# ------------------------------------------------------------
|
||||
def read_config(exp_dir: Path) -> dict:
|
||||
cfg = exp_dir / "config.json"
|
||||
with cfg.open("r") as f:
|
||||
c = json.load(f)
|
||||
if not c.get("k_fold"):
|
||||
raise ValueError(f"{exp_dir.name}: not trained as k-fold")
|
||||
return c
|
||||
|
||||
|
||||
def read_pickle(p: Path) -> Any:
|
||||
with p.open("rb") as f:
|
||||
return pickle.load(f)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Extractors for each model
|
||||
# ------------------------------------------------------------
|
||||
|
||||
counting = {
|
||||
(label_method, eval_method): []
|
||||
for label_method in ["exp_based", "manual_based"]
|
||||
for eval_method in ["roc", "prc"]
|
||||
}
|
||||
|
||||
|
||||
def rows_from_deepsad(data: dict, evals: List[str]) -> Dict[str, dict]:
|
||||
"""
|
||||
deepsad under data['test'][eval], with extra per-eval arrays and AP present.
|
||||
"""
|
||||
out: Dict[str, dict] = {}
|
||||
test = data.get("test", {})
|
||||
for ev in evals:
|
||||
evd = test.get(ev)
|
||||
if not isinstance(evd, dict):
|
||||
continue
|
||||
counting[(ev, "roc")].append(len(evd["roc"][0]))
|
||||
counting[(ev, "prc")].append(len(evd["prc"][0]))
|
||||
out[ev] = {
|
||||
"auc": float(evd["auc"])
|
||||
if "auc" in evd and evd["auc"] is not None
|
||||
else None,
|
||||
"roc": normalize_roc(evd.get("roc")),
|
||||
"prc": normalize_prc(evd.get("prc")),
|
||||
"ap": float(evd["ap"]) if "ap" in evd and evd["ap"] is not None else None,
|
||||
"scores": normalize_scores_to_struct(evd.get("scores")),
|
||||
"sample_indices": normalize_int_list(evd.get("indices")),
|
||||
"sample_labels": normalize_int_list(evd.get("labels")),
|
||||
"valid_mask": normalize_bool_list(evd.get("valid_mask")),
|
||||
"train_time": data.get("train", {}).get("time"),
|
||||
"test_time": test.get("time"),
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
def rows_from_isoforest(data: dict, evals: List[str]) -> Dict[str, dict]:
|
||||
"""
|
||||
Keys: test_auc_<eval>, test_roc_<eval>, test_prc_<eval>, test_ap_<eval>, test_scores_<eval>.
|
||||
"""
|
||||
out: Dict[str, dict] = {}
|
||||
for ev in evals:
|
||||
auc = data.get(f"test_auc_{ev}")
|
||||
if auc is None:
|
||||
continue
|
||||
out[ev] = {
|
||||
"auc": float(auc),
|
||||
"roc": normalize_roc(data.get(f"test_roc_{ev}")),
|
||||
"prc": normalize_prc(data.get(f"test_prc_{ev}")),
|
||||
"ap": float(data.get(f"test_ap_{ev}"))
|
||||
if data.get(f"test_ap_{ev}") is not None
|
||||
else None,
|
||||
"scores": normalize_scores_to_struct(data.get(f"test_scores_{ev}")),
|
||||
"sample_indices": None,
|
||||
"sample_labels": None,
|
||||
"valid_mask": None,
|
||||
"train_time": data.get("train_time"),
|
||||
"test_time": data.get("test_time"),
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
def rows_from_ocsvm_default(data: dict, evals: List[str]) -> Dict[str, dict]:
|
||||
"""
|
||||
Default OCSVM only (ignore linear variant entirely).
|
||||
"""
|
||||
out: Dict[str, dict] = {}
|
||||
for ev in evals:
|
||||
auc = data.get(f"test_auc_{ev}")
|
||||
if auc is None:
|
||||
continue
|
||||
out[ev] = {
|
||||
"auc": float(auc),
|
||||
"roc": normalize_roc(data.get(f"test_roc_{ev}")),
|
||||
"prc": normalize_prc(data.get(f"test_prc_{ev}")),
|
||||
"ap": float(data.get(f"test_ap_{ev}"))
|
||||
if data.get(f"test_ap_{ev}") is not None
|
||||
else None,
|
||||
"scores": normalize_scores_to_struct(data.get(f"test_scores_{ev}")),
|
||||
"sample_indices": None,
|
||||
"sample_labels": None,
|
||||
"valid_mask": None,
|
||||
"train_time": data.get("train_time"),
|
||||
"test_time": data.get("test_time"),
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Build the Polars DataFrame
|
||||
# ------------------------------------------------------------
|
||||
def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame:
|
||||
"""
|
||||
Walks experiment subdirs under `root`. For each (model, fold) it adds rows:
|
||||
Columns (SCHEMA_STATIC):
|
||||
network, latent_dim, semi_normals, semi_anomalous,
|
||||
model, eval, fold,
|
||||
auc, ap, scores{sample_idx,orig_label,score},
|
||||
roc_curve{fpr,tpr,thr}, prc_curve{precision,recall,thr},
|
||||
sample_indices, sample_labels, valid_mask,
|
||||
train_time, test_time,
|
||||
folder, k_fold_num
|
||||
"""
|
||||
if allow_cache:
|
||||
cache = root / "results_cache.parquet"
|
||||
if cache.exists():
|
||||
try:
|
||||
df = pl.read_parquet(cache)
|
||||
print(f"[info] loaded cached results frame from {cache}")
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to load cache {cache}: {e}")
|
||||
|
||||
rows: List[dict] = []
|
||||
|
||||
exp_dirs = [p for p in root.iterdir() if p.is_dir()]
|
||||
for exp_dir in sorted(exp_dirs):
|
||||
try:
|
||||
cfg = read_config(exp_dir)
|
||||
cfg_json = json.dumps(cfg, sort_keys=True)
|
||||
except Exception as e:
|
||||
print(f"[warn] skipping {exp_dir.name}: {e}")
|
||||
continue
|
||||
|
||||
network = cfg.get("net_name")
|
||||
latent_dim = int(cfg.get("latent_space_dim"))
|
||||
semi_normals = int(cfg.get("num_known_normal"))
|
||||
semi_anomalous = int(cfg.get("num_known_outlier"))
|
||||
k = int(cfg.get("k_fold_num"))
|
||||
|
||||
for model in MODELS:
|
||||
for fold in range(k):
|
||||
pkl = exp_dir / f"results_{model}_{fold}.pkl"
|
||||
if not pkl.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
data = read_pickle(pkl)
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to read {pkl.name}: {e}")
|
||||
continue
|
||||
|
||||
if model == "deepsad":
|
||||
per_eval = rows_from_deepsad(data, EVALS) # eval -> dict
|
||||
elif model == "isoforest":
|
||||
per_eval = rows_from_isoforest(data, EVALS) # eval -> dict
|
||||
elif model == "ocsvm":
|
||||
per_eval = rows_from_ocsvm_default(data, EVALS) # eval -> dict
|
||||
else:
|
||||
per_eval = {}
|
||||
|
||||
for ev, vals in per_eval.items():
|
||||
rows.append(
|
||||
{
|
||||
"network": network,
|
||||
"latent_dim": latent_dim,
|
||||
"semi_normals": semi_normals,
|
||||
"semi_anomalous": semi_anomalous,
|
||||
"model": model,
|
||||
"eval": ev,
|
||||
"fold": fold,
|
||||
"auc": vals["auc"],
|
||||
"ap": vals["ap"],
|
||||
"scores": vals["scores"],
|
||||
"roc_curve": vals["roc"],
|
||||
"prc_curve": vals["prc"],
|
||||
"sample_indices": vals.get("sample_indices"),
|
||||
"sample_labels": vals.get("sample_labels"),
|
||||
"valid_mask": vals.get("valid_mask"),
|
||||
"train_time": vals["train_time"],
|
||||
"test_time": vals["test_time"],
|
||||
"folder": str(exp_dir),
|
||||
"k_fold_num": k,
|
||||
"config_json": cfg_json,
|
||||
}
|
||||
)
|
||||
|
||||
# If empty, return a typed empty frame
|
||||
if not rows:
|
||||
return pl.DataFrame(schema=SCHEMA_STATIC)
|
||||
|
||||
df = pl.DataFrame(rows, schema=SCHEMA_STATIC)
|
||||
|
||||
# Cast to efficient dtypes (categoricals etc.) – no extra sanitation
|
||||
df = df.with_columns(
|
||||
pl.col("network", "model", "eval").cast(pl.Categorical),
|
||||
pl.col(
|
||||
"latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
|
||||
).cast(pl.Int32),
|
||||
pl.col("auc", "ap", "train_time", "test_time").cast(pl.Float64),
|
||||
# NOTE: no cast on 'scores' here; it's already List(Struct) per schema.
|
||||
)
|
||||
|
||||
if allow_cache:
|
||||
try:
|
||||
df.write_parquet(cache)
|
||||
print(f"[info] cached results frame to {cache}")
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to write cache {cache}: {e}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def load_pretraining_results_dataframe(
|
||||
root: Path,
|
||||
allow_cache: bool = True,
|
||||
include_train: bool = False, # <— default: store only TEST to keep cache tiny
|
||||
keep_file_names: bool = False, # <— drop file_names by default; they’re repeated
|
||||
parquet_compression: str = "zstd",
|
||||
parquet_compression_level: int = 7, # <— stronger compression than default
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
Loads only AE pretraining results: files named `results_ae_<fold>.pkl`.
|
||||
Produces one row per (experiment, fold, split). By default we:
|
||||
- include only the TEST split (include_train=False)
|
||||
- store scores as Float32
|
||||
- drop the repeated file_names mapping to save space
|
||||
- write Parquet with zstd(level=7)
|
||||
"""
|
||||
if allow_cache:
|
||||
cache = root / "pretraining_results_cache.parquet"
|
||||
if cache.exists():
|
||||
try:
|
||||
df = pl.read_parquet(cache)
|
||||
print(f"[info] loaded cached pretraining frame from {cache}")
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to load pretraining cache {cache}: {e}")
|
||||
|
||||
rows: List[dict] = []
|
||||
|
||||
exp_dirs = [p for p in root.iterdir() if p.is_dir()]
|
||||
for exp_dir in sorted(exp_dirs):
|
||||
try:
|
||||
cfg = read_config(exp_dir)
|
||||
cfg_json = json.dumps(cfg, sort_keys=True)
|
||||
except Exception as e:
|
||||
print(f"[warn] skipping {exp_dir.name} (pretraining): {e}")
|
||||
continue
|
||||
|
||||
network = cfg.get("net_name")
|
||||
latent_dim = int(cfg.get("latent_space_dim"))
|
||||
semi_normals = int(cfg.get("num_known_normal"))
|
||||
semi_anomalous = int(cfg.get("num_known_outlier"))
|
||||
k = int(cfg.get("k_fold_num"))
|
||||
|
||||
# Only test split by default (include_train=False)
|
||||
splits = ("train", "test") if include_train else ("test",)
|
||||
|
||||
for fold in range(k):
|
||||
pkl = exp_dir / f"results_ae_{fold}.pkl"
|
||||
if not pkl.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
data = read_pickle(pkl) # expected: {"train": {...}, "test": {...}}
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to read {pkl.name}: {e}")
|
||||
continue
|
||||
|
||||
for split in splits:
|
||||
splitd = data.get(split)
|
||||
if not isinstance(splitd, dict):
|
||||
continue
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"network": network,
|
||||
"latent_dim": latent_dim,
|
||||
"semi_normals": semi_normals,
|
||||
"semi_anomalous": semi_anomalous,
|
||||
"model": "ae",
|
||||
"fold": fold,
|
||||
"split": split,
|
||||
"time": float(splitd.get("time"))
|
||||
if splitd.get("time") is not None
|
||||
else None,
|
||||
"loss": float(splitd.get("loss"))
|
||||
if splitd.get("loss") is not None
|
||||
else None,
|
||||
# ints as Int32, scores as Float32 to save space
|
||||
"indices": normalize_int_list(splitd.get("indices")),
|
||||
"labels_exp_based": normalize_int_list(
|
||||
splitd.get("labels_exp_based")
|
||||
),
|
||||
"labels_manual_based": normalize_int_list(
|
||||
splitd.get("labels_manual_based")
|
||||
),
|
||||
"semi_targets": normalize_int_list(splitd.get("semi_targets")),
|
||||
"file_ids": normalize_int_list(splitd.get("file_ids")),
|
||||
"frame_ids": normalize_int_list(splitd.get("frame_ids")),
|
||||
"scores": (
|
||||
None
|
||||
if splitd.get("scores") is None
|
||||
else [
|
||||
float(x)
|
||||
for x in (
|
||||
splitd["scores"].tolist()
|
||||
if isinstance(splitd["scores"], np.ndarray)
|
||||
else splitd["scores"]
|
||||
)
|
||||
]
|
||||
),
|
||||
"file_names": normalize_file_names(splitd.get("file_names"))
|
||||
if keep_file_names
|
||||
else None,
|
||||
"folder": str(exp_dir),
|
||||
"k_fold_num": k,
|
||||
"config_json": cfg_json,
|
||||
}
|
||||
)
|
||||
|
||||
if not rows:
|
||||
return pl.DataFrame(schema=PRETRAIN_SCHEMA)
|
||||
|
||||
df = pl.DataFrame(rows, schema=PRETRAIN_SCHEMA)
|
||||
|
||||
# Cast/optimize a bit (categoricals, ints, floats)
|
||||
df = df.with_columns(
|
||||
pl.col("network", "model", "split").cast(pl.Categorical),
|
||||
pl.col(
|
||||
"latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
|
||||
).cast(pl.Int32),
|
||||
pl.col("time", "loss").cast(pl.Float64),
|
||||
pl.col("scores").cast(pl.List(pl.Float32)), # ensure downcast took
|
||||
)
|
||||
|
||||
if allow_cache:
|
||||
try:
|
||||
cache = root / "pretraining_results_cache.parquet"
|
||||
df.write_parquet(
|
||||
cache,
|
||||
compression=parquet_compression,
|
||||
compression_level=parquet_compression_level,
|
||||
statistics=True,
|
||||
)
|
||||
print(
|
||||
f"[info] cached pretraining frame to {cache} "
|
||||
f"({parquet_compression}, level={parquet_compression_level})"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to write pretraining cache {cache}: {e}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def main():
|
||||
root = Path("/home/fedex/mt/results/copy")
|
||||
df1 = load_results_dataframe(root, allow_cache=True)
|
||||
exit(0)
|
||||
|
||||
retest_root = Path("/home/fedex/mt/results/copy/retest_nodrop")
|
||||
df2 = load_results_dataframe(retest_root, allow_cache=False).drop("folder")
|
||||
|
||||
# exact schema & shape first (optional but helpful messages)
|
||||
assert df1.shape == df2.shape, f"Shape differs: {df1.shape} vs {df2.shape}"
|
||||
assert set(df1.columns) == set(df2.columns), (
|
||||
f"Column sets differ: {df1.columns} vs {df2.columns}"
|
||||
)
|
||||
|
||||
# allow small float diffs, ignore column order differences if you want
|
||||
df1_sorted = df1.select(sorted(df1.columns))
|
||||
df2_sorted = df2.select(sorted(df2.columns))
|
||||
|
||||
# Optionally pre-align/sort both frames by a stable key before diffing.
|
||||
summary, leaves = recursive_diff_frames(
|
||||
df1,
|
||||
df2,
|
||||
ignore=["timestamp"], # columns to ignore
|
||||
float_atol=0.1, # absolute tolerance for floats
|
||||
float_rtol=0.0, # relative tolerance for floats
|
||||
max_rows_per_column=20, # limit expansion per column
|
||||
max_leafs_per_row=200, # cap leaves per row
|
||||
)
|
||||
|
||||
pl.Config.set_fmt_table_cell_list_len(100)
|
||||
pl.Config.set_tbl_rows(100)
|
||||
|
||||
print(summary) # which columns differ & how many rows
|
||||
print(leaves) # exact nested paths + scalar diffs
|
||||
|
||||
# check_exact=False lets us use atol/rtol for floats
|
||||
assert_frame_equal(
|
||||
df1_sorted,
|
||||
df2_sorted,
|
||||
check_exact=False,
|
||||
atol=0.1, # absolute tolerance for floats
|
||||
rtol=0.0, # relative tolerance (set if you want % based)
|
||||
check_dtypes=True, # set False if you only care about values
|
||||
)
|
||||
print("DataFrames match within tolerance ✅")
|
||||
|
||||
# df_pre = load_pretraining_results_dataframe(root, allow_cache=True)
|
||||
# print("pretraining:", df_pre.shape, df_pre.head())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -78,8 +78,8 @@ def build_arch_curves_from_df(
|
||||
"overall": (dims, means, stds),
|
||||
} }
|
||||
"""
|
||||
if "split" not in df.columns:
|
||||
raise ValueError("Expected 'split' column in AE dataframe.")
|
||||
# if "split" not in df.columns:
|
||||
# raise ValueError("Expected 'split' column in AE dataframe.")
|
||||
if "scores" not in df.columns:
|
||||
raise ValueError("Expected 'scores' column in AE dataframe.")
|
||||
if "network" not in df.columns or "latent_dim" not in df.columns:
|
||||
@@ -88,7 +88,7 @@ def build_arch_curves_from_df(
|
||||
raise ValueError(f"Expected '{label_field}' column in AE dataframe.")
|
||||
|
||||
# Keep only test split
|
||||
df = df.filter(pl.col("split") == "test")
|
||||
# df = df.filter(pl.col("split") == "test")
|
||||
|
||||
groups: dict[tuple[str, int], dict[str, list[float]]] = {}
|
||||
|
||||
@@ -201,7 +201,7 @@ def plot_multi_loss_curve(arch_results, title, output_path, colors=None):
|
||||
|
||||
plt.xlabel("Latent Dimensionality")
|
||||
plt.ylabel("Test Loss")
|
||||
plt.title(title)
|
||||
# plt.title(title)
|
||||
plt.legend()
|
||||
plt.grid(True, alpha=0.3)
|
||||
plt.xticks(all_dims)
|
||||
@@ -212,7 +212,7 @@ def plot_multi_loss_curve(arch_results, title, output_path, colors=None):
|
||||
|
||||
def main():
|
||||
# Load AE DF (uses your cache if enabled in the loader)
|
||||
df = load_pretraining_results_dataframe(ROOT, allow_cache=True, include_train=False)
|
||||
df = load_pretraining_results_dataframe(ROOT, allow_cache=True)
|
||||
|
||||
# Optional: filter to just LeNet vs Efficient; drop this set() to plot all nets
|
||||
wanted_nets = {"LeNet", "Efficient"}
|
||||
|
||||
@@ -171,28 +171,28 @@ def plot_combined_timeline(
|
||||
range(num_bins), near_sensor_binned, color=color, linestyle="--", alpha=0.6
|
||||
)
|
||||
|
||||
# Add vertical lines for manually labeled frames if available
|
||||
if all_paths[i].with_suffix(".npy").name in manually_labeled_anomaly_frames:
|
||||
begin_frame, end_frame = manually_labeled_anomaly_frames[
|
||||
all_paths[i].with_suffix(".npy").name
|
||||
]
|
||||
# Convert frame numbers to normalized timeline positions
|
||||
begin_pos = (begin_frame / exp_len) * (num_bins - 1)
|
||||
end_pos = (end_frame / exp_len) * (num_bins - 1)
|
||||
# # Add vertical lines for manually labeled frames if available
|
||||
# if all_paths[i].with_suffix(".npy").name in manually_labeled_anomaly_frames:
|
||||
# begin_frame, end_frame = manually_labeled_anomaly_frames[
|
||||
# all_paths[i].with_suffix(".npy").name
|
||||
# ]
|
||||
# # Convert frame numbers to normalized timeline positions
|
||||
# begin_pos = (begin_frame / exp_len) * (num_bins - 1)
|
||||
# end_pos = (end_frame / exp_len) * (num_bins - 1)
|
||||
|
||||
# Add vertical lines with matching color and loose dotting
|
||||
ax1.axvline(
|
||||
x=begin_pos,
|
||||
color=color,
|
||||
linestyle=":",
|
||||
alpha=0.6,
|
||||
)
|
||||
ax1.axvline(
|
||||
x=end_pos,
|
||||
color=color,
|
||||
linestyle=":",
|
||||
alpha=0.6,
|
||||
)
|
||||
# # Add vertical lines with matching color and loose dotting
|
||||
# ax1.axvline(
|
||||
# x=begin_pos,
|
||||
# color=color,
|
||||
# linestyle=":",
|
||||
# alpha=0.6,
|
||||
# )
|
||||
# ax1.axvline(
|
||||
# x=end_pos,
|
||||
# color=color,
|
||||
# linestyle=":",
|
||||
# alpha=0.6,
|
||||
# )
|
||||
|
||||
# Customize axes
|
||||
ax1.set_xlabel("Normalized Timeline")
|
||||
@@ -202,7 +202,7 @@ def plot_combined_timeline(
|
||||
ax1.set_ylabel("Missing Points (%)")
|
||||
ax2.set_ylabel("Points with <0.5m Range (%)")
|
||||
|
||||
plt.title(title)
|
||||
# plt.title(title)
|
||||
|
||||
# Create legends without fixed positions
|
||||
# First get all lines and labels for experiments
|
||||
@@ -221,7 +221,8 @@ def plot_combined_timeline(
|
||||
)
|
||||
|
||||
# Create single legend in top right corner with consistent margins
|
||||
fig.legend(all_handles, all_labels, loc="upper right", borderaxespad=4.8)
|
||||
# fig.legend(all_handles, all_labels, loc="upper right", borderaxespad=2.8)
|
||||
fig.legend(all_handles, all_labels, bbox_to_anchor=(0.95, 0.99))
|
||||
|
||||
plt.grid(True, alpha=0.3)
|
||||
|
||||
|
||||
@@ -122,8 +122,8 @@ def plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames):
|
||||
|
||||
# prepare data for pie chart
|
||||
labels = [
|
||||
"Normal Lidar Frames\nNon-Degraded Pointclouds",
|
||||
"Anomalous Lidar Frames\nDegraded Pointclouds",
|
||||
"Normal Lidar Frames\nNon-Degraded Point Clouds",
|
||||
"Anomalous Lidar Frames\nDegraded Point Clouds",
|
||||
]
|
||||
sizes = [total_normal_frames, total_anomaly_frames]
|
||||
explode = (0.1, 0) # explode the normal slice
|
||||
@@ -150,9 +150,9 @@ def plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames):
|
||||
va="center",
|
||||
color="black",
|
||||
)
|
||||
plt.title(
|
||||
"Distribution of Normal and Anomalous\nPointclouds in all Experiments (Lidar Frames)"
|
||||
)
|
||||
# plt.title(
|
||||
# "Distribution of Normal and Anomalous\nPointclouds in all Experiments (Lidar Frames)"
|
||||
# )
|
||||
plt.tight_layout()
|
||||
|
||||
# save the plot
|
||||
|
||||
@@ -5,7 +5,6 @@ from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from pointcloudset import Dataset
|
||||
|
||||
# define data path containing the bag files
|
||||
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||
@@ -82,7 +81,7 @@ def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(missing_points_normal, bins=100, alpha=0.5, label="Normal Experiments")
|
||||
plt.hist(missing_points_anomaly, bins=100, alpha=0.5, label="Anomaly Experiments")
|
||||
plt.title(title)
|
||||
# plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Number of Pointclouds")
|
||||
plt.legend()
|
||||
@@ -109,7 +108,7 @@ def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
label="Anomaly Experiments",
|
||||
orientation="horizontal",
|
||||
)
|
||||
plt.title(title)
|
||||
# plt.title(title)
|
||||
plt.xlabel("Number of Pointclouds")
|
||||
plt.ylabel("Number of Missing Points")
|
||||
plt.legend()
|
||||
@@ -142,7 +141,7 @@ def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
label="Anomaly Experiments",
|
||||
density=True,
|
||||
)
|
||||
plt.title(title)
|
||||
# plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Density")
|
||||
plt.legend()
|
||||
@@ -169,7 +168,7 @@ def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
label="Anomaly Experiments (With Artifical Smoke)",
|
||||
density=True,
|
||||
)
|
||||
plt.title(title)
|
||||
# plt.title(title)
|
||||
plt.xlabel("Percentage of Missing Lidar Measurements")
|
||||
plt.ylabel("Density")
|
||||
# display the x axis as percentages
|
||||
@@ -210,7 +209,7 @@ def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
alpha=0.5,
|
||||
label="Anomaly Experiments",
|
||||
)
|
||||
plt.title(title)
|
||||
# plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Normalized Density")
|
||||
plt.legend()
|
||||
|
||||
@@ -5,7 +5,6 @@ from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from pointcloudset import Dataset
|
||||
|
||||
# define data path containing the bag files
|
||||
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||
@@ -164,7 +163,7 @@ def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
plt.gca().set_yticklabels(
|
||||
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
|
||||
)
|
||||
plt.title("Particles Closer than 0.5m to the Sensor")
|
||||
# plt.title("Particles Closer than 0.5m to the Sensor")
|
||||
plt.ylabel("Percentage of measurements closer than 0.5m")
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / f"particles_near_sensor_boxplot_{rt}.png")
|
||||
@@ -186,7 +185,7 @@ def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
plt.gca().set_yticklabels(
|
||||
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
|
||||
)
|
||||
plt.title("Particles Closer than 0.5m to the Sensor")
|
||||
# plt.title("Particles Closer than 0.5m to the Sensor")
|
||||
plt.ylabel("Percentage of measurements closer than 0.5m")
|
||||
plt.ylim(0, 0.05)
|
||||
plt.tight_layout()
|
||||
|
||||
@@ -112,18 +112,27 @@ cmap = get_colormap_with_special_missing_color(
|
||||
args.colormap, args.missing_data_color, args.reverse_colormap
|
||||
)
|
||||
|
||||
# --- Create a figure with 2 vertical subplots ---
|
||||
# --- Create a figure with 2 vertical subplots and move titles to the left ---
|
||||
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
|
||||
for ax, frame, title in zip(
|
||||
# leave extra left margin for the left-side labels
|
||||
fig.subplots_adjust(left=0.14, hspace=0.05)
|
||||
|
||||
for ax, frame, label in zip(
|
||||
(ax1, ax2),
|
||||
(frame1, frame2),
|
||||
(
|
||||
"Projection of Lidar Frame without Degradation",
|
||||
"Projection of Lidar Frame with Degradation (Artifical Smoke)",
|
||||
),
|
||||
("(a)", "(b)"),
|
||||
):
|
||||
im = ax.imshow(frame, cmap=cmap, aspect="auto", vmin=global_vmin, vmax=global_vmax)
|
||||
ax.set_title(title)
|
||||
# place the "title" to the left, vertically centered relative to the axes
|
||||
ax.text(
|
||||
-0.02, # negative x places text left of the axes (in axes coordinates)
|
||||
0.5,
|
||||
label,
|
||||
transform=ax.transAxes,
|
||||
va="center",
|
||||
ha="right",
|
||||
fontsize=12,
|
||||
)
|
||||
ax.axis("off")
|
||||
|
||||
# Adjust layout to fit margins for a paper
|
||||
|
||||
@@ -3,10 +3,12 @@ from __future__ import annotations
|
||||
import json
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from diff_df import recursive_diff_frames
|
||||
from polars.testing import assert_frame_equal
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Config you can tweak
|
||||
@@ -24,7 +26,8 @@ SCHEMA_STATIC = {
|
||||
"eval": pl.Utf8, # "exp_based" | "manual_based"
|
||||
"fold": pl.Int32,
|
||||
# metrics
|
||||
"auc": pl.Float64,
|
||||
"roc_auc": pl.Float64, # <-- renamed from 'auc'
|
||||
"prc_auc": pl.Float64, # <-- new
|
||||
"ap": pl.Float64,
|
||||
# per-sample scores: list of (idx, label, score)
|
||||
"scores": pl.List(
|
||||
@@ -73,9 +76,9 @@ PRETRAIN_SCHEMA = {
|
||||
"semi_anomalous": pl.Int32,
|
||||
"model": pl.Utf8, # always "ae"
|
||||
"fold": pl.Int32,
|
||||
"split": pl.Utf8, # "train" | "test"
|
||||
# timings and optimization
|
||||
"time": pl.Float64,
|
||||
"train_time": pl.Float64,
|
||||
"test_time": pl.Float64,
|
||||
"loss": pl.Float64,
|
||||
# per-sample arrays (as lists)
|
||||
"indices": pl.List(pl.Int32),
|
||||
@@ -93,10 +96,62 @@ PRETRAIN_SCHEMA = {
|
||||
"config_json": pl.Utf8, # full config.json as string (for reference)
|
||||
}
|
||||
|
||||
SCHEMA_INFERENCE = {
|
||||
# identifiers / dims
|
||||
"experiment": pl.Utf8, # e.g. "2_static_no_artifacts_illuminated_2023-01-23-001"
|
||||
"network": pl.Utf8, # e.g. "LeNet", "efficient"
|
||||
"latent_dim": pl.Int32,
|
||||
"semi_normals": pl.Int32,
|
||||
"semi_anomalous": pl.Int32,
|
||||
"model": pl.Utf8, # "deepsad" | "isoforest" | "ocsvm"
|
||||
# metrics
|
||||
"scores": pl.List(pl.Float64),
|
||||
# timings / housekeeping
|
||||
"folder": pl.Utf8,
|
||||
"config_json": pl.Utf8, # full config.json as string (for reference)
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list)
|
||||
# ------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_prc_auc_from_curve(prc_curve: dict | None) -> float | None:
|
||||
"""
|
||||
Compute AUC of the Precision-Recall curve via trapezoidal rule.
|
||||
Expects prc_curve = {"precision": [...], "recall": [...], "thr": [...] (optional)}.
|
||||
Robust to NaNs, unsorted recall, and missing endpoints; returns np.nan if empty.
|
||||
"""
|
||||
if not prc_curve:
|
||||
return np.nan
|
||||
precision = np.asarray(prc_curve.get("precision", []), dtype=float)
|
||||
recall = np.asarray(prc_curve.get("recall", []), dtype=float)
|
||||
if precision.size == 0 or recall.size == 0:
|
||||
return np.nan
|
||||
|
||||
mask = ~(np.isnan(precision) | np.isnan(recall))
|
||||
precision, recall = precision[mask], recall[mask]
|
||||
if recall.size == 0:
|
||||
return np.nan
|
||||
|
||||
# Sort by recall, clip to [0,1]
|
||||
order = np.argsort(recall)
|
||||
recall = np.clip(recall[order], 0.0, 1.0)
|
||||
precision = np.clip(precision[order], 0.0, 1.0)
|
||||
|
||||
# Ensure curve spans [0,1] in recall (hold precision constant at ends)
|
||||
if recall[0] > 0.0:
|
||||
recall = np.insert(recall, 0, 0.0)
|
||||
precision = np.insert(precision, 0, precision[0])
|
||||
if recall[-1] < 1.0:
|
||||
recall = np.append(recall, 1.0)
|
||||
precision = np.append(precision, precision[-1])
|
||||
|
||||
# Trapezoidal AUC
|
||||
return float(np.trapezoid(precision, recall))
|
||||
|
||||
|
||||
def _tolist(x):
|
||||
if x is None:
|
||||
return None
|
||||
@@ -230,11 +285,11 @@ def normalize_bool_list(a) -> Optional[List[bool]]:
|
||||
# ------------------------------------------------------------
|
||||
# Low-level: read one experiment folder
|
||||
# ------------------------------------------------------------
|
||||
def read_config(exp_dir: Path) -> dict:
|
||||
def read_config(exp_dir: Path, k_fold_required: bool = True) -> dict:
|
||||
cfg = exp_dir / "config.json"
|
||||
with cfg.open("r") as f:
|
||||
c = json.load(f)
|
||||
if not c.get("k_fold"):
|
||||
if k_fold_required and not c.get("k_fold"):
|
||||
raise ValueError(f"{exp_dir.name}: not trained as k-fold")
|
||||
return c
|
||||
|
||||
@@ -247,6 +302,14 @@ def read_pickle(p: Path) -> Any:
|
||||
# ------------------------------------------------------------
|
||||
# Extractors for each model
|
||||
# ------------------------------------------------------------
|
||||
|
||||
counting = {
|
||||
(label_method, eval_method): []
|
||||
for label_method in ["exp_based", "manual_based"]
|
||||
for eval_method in ["roc", "prc"]
|
||||
}
|
||||
|
||||
|
||||
def rows_from_deepsad(data: dict, evals: List[str]) -> Dict[str, dict]:
|
||||
"""
|
||||
deepsad under data['test'][eval], with extra per-eval arrays and AP present.
|
||||
@@ -257,6 +320,8 @@ def rows_from_deepsad(data: dict, evals: List[str]) -> Dict[str, dict]:
|
||||
evd = test.get(ev)
|
||||
if not isinstance(evd, dict):
|
||||
continue
|
||||
counting[(ev, "roc")].append(len(evd["roc"][0]))
|
||||
counting[(ev, "prc")].append(len(evd["prc"][0]))
|
||||
out[ev] = {
|
||||
"auc": float(evd["auc"])
|
||||
if "auc" in evd and evd["auc"] is not None
|
||||
@@ -330,23 +395,28 @@ def rows_from_ocsvm_default(data: dict, evals: List[str]) -> Dict[str, dict]:
|
||||
# Build the Polars DataFrame
|
||||
# ------------------------------------------------------------
|
||||
def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame:
|
||||
"""
|
||||
Walks experiment subdirs under `root`. For each (model, fold) it adds rows:
|
||||
Columns (SCHEMA_STATIC):
|
||||
network, latent_dim, semi_normals, semi_anomalous,
|
||||
model, eval, fold,
|
||||
auc, ap, scores{sample_idx,orig_label,score},
|
||||
roc_curve{fpr,tpr,thr}, prc_curve{precision,recall,thr},
|
||||
sample_indices, sample_labels, valid_mask,
|
||||
train_time, test_time,
|
||||
folder, k_fold_num
|
||||
"""
|
||||
if allow_cache:
|
||||
cache = root / "results_cache.parquet"
|
||||
if cache.exists():
|
||||
try:
|
||||
df = pl.read_parquet(cache)
|
||||
print(f"[info] loaded cached results frame from {cache}")
|
||||
# Backward-compat: old caches may have 'auc' but no 'roc_auc'/'prc_auc'
|
||||
if "roc_auc" not in df.columns and "auc" in df.columns:
|
||||
df = df.rename({"auc": "roc_auc"})
|
||||
if "prc_auc" not in df.columns and "prc_curve" in df.columns:
|
||||
df = df.with_columns(
|
||||
pl.struct(
|
||||
pl.col("prc_curve").struct.field("precision"),
|
||||
pl.col("prc_curve").struct.field("recall"),
|
||||
)
|
||||
.map_elements(
|
||||
lambda s: compute_prc_auc_from_curve(
|
||||
{"precision": s[0], "recall": s[1]}
|
||||
)
|
||||
)
|
||||
.alias("prc_auc")
|
||||
)
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to load cache {cache}: {e}")
|
||||
@@ -381,15 +451,17 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
|
||||
continue
|
||||
|
||||
if model == "deepsad":
|
||||
per_eval = rows_from_deepsad(data, EVALS) # eval -> dict
|
||||
per_eval = rows_from_deepsad(data, EVALS)
|
||||
elif model == "isoforest":
|
||||
per_eval = rows_from_isoforest(data, EVALS) # eval -> dict
|
||||
per_eval = rows_from_isoforest(data, EVALS)
|
||||
elif model == "ocsvm":
|
||||
per_eval = rows_from_ocsvm_default(data, EVALS) # eval -> dict
|
||||
per_eval = rows_from_ocsvm_default(data, EVALS)
|
||||
else:
|
||||
per_eval = {}
|
||||
|
||||
for ev, vals in per_eval.items():
|
||||
# compute prc_auc now (fast), rename auc->roc_auc
|
||||
prc_auc_val = compute_prc_auc_from_curve(vals.get("prc"))
|
||||
rows.append(
|
||||
{
|
||||
"network": network,
|
||||
@@ -399,7 +471,8 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
|
||||
"model": model,
|
||||
"eval": ev,
|
||||
"fold": fold,
|
||||
"auc": vals["auc"],
|
||||
"roc_auc": vals["auc"], # renamed
|
||||
"prc_auc": prc_auc_val, # new
|
||||
"ap": vals["ap"],
|
||||
"scores": vals["scores"],
|
||||
"roc_curve": vals["roc"],
|
||||
@@ -415,20 +488,19 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
|
||||
}
|
||||
)
|
||||
|
||||
# If empty, return a typed empty frame
|
||||
if not rows:
|
||||
# Return a typed empty frame (new schema)
|
||||
return pl.DataFrame(schema=SCHEMA_STATIC)
|
||||
|
||||
df = pl.DataFrame(rows, schema=SCHEMA_STATIC)
|
||||
|
||||
# Cast to efficient dtypes (categoricals etc.) – no extra sanitation
|
||||
# Cast to efficient dtypes (categoricals etc.)
|
||||
df = df.with_columns(
|
||||
pl.col("network", "model", "eval").cast(pl.Categorical),
|
||||
pl.col(
|
||||
"latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
|
||||
).cast(pl.Int32),
|
||||
pl.col("auc", "ap", "train_time", "test_time").cast(pl.Float64),
|
||||
# NOTE: no cast on 'scores' here; it's already List(Struct) per schema.
|
||||
pl.col("roc_auc", "prc_auc", "ap", "train_time", "test_time").cast(pl.Float64),
|
||||
)
|
||||
|
||||
if allow_cache:
|
||||
@@ -444,7 +516,6 @@ def load_results_dataframe(root: Path, allow_cache: bool = True) -> pl.DataFrame
|
||||
def load_pretraining_results_dataframe(
|
||||
root: Path,
|
||||
allow_cache: bool = True,
|
||||
include_train: bool = False, # <— default: store only TEST to keep cache tiny
|
||||
keep_file_names: bool = False, # <— drop file_names by default; they’re repeated
|
||||
parquet_compression: str = "zstd",
|
||||
parquet_compression_level: int = 7, # <— stronger compression than default
|
||||
@@ -484,9 +555,6 @@ def load_pretraining_results_dataframe(
|
||||
semi_anomalous = int(cfg.get("num_known_outlier"))
|
||||
k = int(cfg.get("k_fold_num"))
|
||||
|
||||
# Only test split by default (include_train=False)
|
||||
splits = ("train", "test") if include_train else ("test",)
|
||||
|
||||
for fold in range(k):
|
||||
pkl = exp_dir / f"results_ae_{fold}.pkl"
|
||||
if not pkl.exists():
|
||||
@@ -498,57 +566,53 @@ def load_pretraining_results_dataframe(
|
||||
print(f"[warn] failed to read {pkl.name}: {e}")
|
||||
continue
|
||||
|
||||
for split in splits:
|
||||
splitd = data.get(split)
|
||||
if not isinstance(splitd, dict):
|
||||
continue
|
||||
train_time = data.get("train", {}).get("time")
|
||||
data = data.get("test", {})
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"network": network,
|
||||
"latent_dim": latent_dim,
|
||||
"semi_normals": semi_normals,
|
||||
"semi_anomalous": semi_anomalous,
|
||||
"model": "ae",
|
||||
"fold": fold,
|
||||
"split": split,
|
||||
"time": float(splitd.get("time"))
|
||||
if splitd.get("time") is not None
|
||||
else None,
|
||||
"loss": float(splitd.get("loss"))
|
||||
if splitd.get("loss") is not None
|
||||
else None,
|
||||
# ints as Int32, scores as Float32 to save space
|
||||
"indices": normalize_int_list(splitd.get("indices")),
|
||||
"labels_exp_based": normalize_int_list(
|
||||
splitd.get("labels_exp_based")
|
||||
),
|
||||
"labels_manual_based": normalize_int_list(
|
||||
splitd.get("labels_manual_based")
|
||||
),
|
||||
"semi_targets": normalize_int_list(splitd.get("semi_targets")),
|
||||
"file_ids": normalize_int_list(splitd.get("file_ids")),
|
||||
"frame_ids": normalize_int_list(splitd.get("frame_ids")),
|
||||
"scores": (
|
||||
None
|
||||
if splitd.get("scores") is None
|
||||
else [
|
||||
float(x)
|
||||
for x in (
|
||||
splitd["scores"].tolist()
|
||||
if isinstance(splitd["scores"], np.ndarray)
|
||||
else splitd["scores"]
|
||||
)
|
||||
]
|
||||
),
|
||||
"file_names": normalize_file_names(splitd.get("file_names"))
|
||||
if keep_file_names
|
||||
else None,
|
||||
"folder": str(exp_dir),
|
||||
"k_fold_num": k,
|
||||
"config_json": cfg_json,
|
||||
}
|
||||
)
|
||||
rows.append(
|
||||
{
|
||||
"network": network,
|
||||
"latent_dim": latent_dim,
|
||||
"semi_normals": semi_normals,
|
||||
"semi_anomalous": semi_anomalous,
|
||||
"model": "ae",
|
||||
"fold": fold,
|
||||
"train_time": train_time,
|
||||
"test_time": data.get("time"),
|
||||
"loss": float(data.get("loss"))
|
||||
if data.get("loss") is not None
|
||||
else None,
|
||||
# ints as Int32, scores as Float32 to save space
|
||||
"indices": normalize_int_list(data.get("indices")),
|
||||
"labels_exp_based": normalize_int_list(
|
||||
data.get("labels_exp_based")
|
||||
),
|
||||
"labels_manual_based": normalize_int_list(
|
||||
data.get("labels_manual_based")
|
||||
),
|
||||
"semi_targets": normalize_int_list(data.get("semi_targets")),
|
||||
"file_ids": normalize_int_list(data.get("file_ids")),
|
||||
"frame_ids": normalize_int_list(data.get("frame_ids")),
|
||||
"scores": (
|
||||
None
|
||||
if data.get("scores") is None
|
||||
else [
|
||||
float(x)
|
||||
for x in (
|
||||
data["scores"].tolist()
|
||||
if isinstance(data["scores"], np.ndarray)
|
||||
else data["scores"]
|
||||
)
|
||||
]
|
||||
),
|
||||
"file_names": normalize_file_names(data.get("file_names"))
|
||||
if keep_file_names
|
||||
else None,
|
||||
"folder": str(exp_dir),
|
||||
"k_fold_num": k,
|
||||
"config_json": cfg_json,
|
||||
}
|
||||
)
|
||||
|
||||
if not rows:
|
||||
return pl.DataFrame(schema=PRETRAIN_SCHEMA)
|
||||
@@ -557,11 +621,11 @@ def load_pretraining_results_dataframe(
|
||||
|
||||
# Cast/optimize a bit (categoricals, ints, floats)
|
||||
df = df.with_columns(
|
||||
pl.col("network", "model", "split").cast(pl.Categorical),
|
||||
pl.col("network", "model").cast(pl.Categorical),
|
||||
pl.col(
|
||||
"latent_dim", "semi_normals", "semi_anomalous", "fold", "k_fold_num"
|
||||
).cast(pl.Int32),
|
||||
pl.col("time", "loss").cast(pl.Float64),
|
||||
pl.col("test_time", "train_time", "loss").cast(pl.Float64),
|
||||
pl.col("scores").cast(pl.List(pl.Float32)), # ensure downcast took
|
||||
)
|
||||
|
||||
@@ -584,13 +648,176 @@ def load_pretraining_results_dataframe(
|
||||
return df
|
||||
|
||||
|
||||
def main():
|
||||
root = Path("/home/fedex/mt/results/done")
|
||||
df = load_results_dataframe(root, allow_cache=True)
|
||||
print(df.shape, df.head())
|
||||
def load_inference_results_dataframe(
|
||||
root: Path,
|
||||
allow_cache: bool = True,
|
||||
models: List[str] = MODELS,
|
||||
) -> pl.DataFrame:
|
||||
"""Load inference results from experiment folders.
|
||||
|
||||
df_pre = load_pretraining_results_dataframe(root, allow_cache=True)
|
||||
print("pretraining:", df_pre.shape, df_pre.head())
|
||||
Args:
|
||||
root: Path to root directory containing experiment folders
|
||||
allow_cache: Whether to use/create cache file
|
||||
models: List of models to look for scores
|
||||
|
||||
Returns:
|
||||
pl.DataFrame: DataFrame containing inference results
|
||||
"""
|
||||
if allow_cache:
|
||||
cache = root / "inference_results_cache.parquet"
|
||||
if cache.exists():
|
||||
try:
|
||||
df = pl.read_parquet(cache)
|
||||
print(f"[info] loaded cached inference frame from {cache}")
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to load inference cache {cache}: {e}")
|
||||
|
||||
rows: List[dict] = []
|
||||
|
||||
exp_dirs = [p for p in root.iterdir() if p.is_dir()]
|
||||
for exp_dir in sorted(exp_dirs):
|
||||
try:
|
||||
# Load and validate config
|
||||
cfg = read_config(exp_dir, k_fold_required=False)
|
||||
cfg_json = json.dumps(cfg, sort_keys=True)
|
||||
|
||||
# Extract config values
|
||||
network = cfg.get("net_name")
|
||||
latent_dim = int(cfg.get("latent_space_dim"))
|
||||
semi_normals = int(cfg.get("num_known_normal"))
|
||||
semi_anomalous = int(cfg.get("num_known_outlier"))
|
||||
|
||||
# Process each model's scores
|
||||
inference_dir = exp_dir / "inference"
|
||||
if not inference_dir.exists():
|
||||
print(f"[warn] no inference directory for {exp_dir.name}")
|
||||
continue
|
||||
|
||||
# Find all unique experiments in this folder's inference files
|
||||
score_files = list(inference_dir.glob("*_scores.npy"))
|
||||
if not score_files:
|
||||
print(f"[warn] no score files in {inference_dir}")
|
||||
continue
|
||||
|
||||
# Extract unique experiment names from score files
|
||||
# Format: {experiment}_{model}_scores.npy
|
||||
experiments = set()
|
||||
for score_file in score_files:
|
||||
exp_name = score_file.stem.rsplit("_", 2)[0]
|
||||
experiments.add(exp_name)
|
||||
|
||||
# Load scores for each experiment and model
|
||||
for experiment in sorted(experiments):
|
||||
for model in models:
|
||||
score_file = inference_dir / f"{experiment}_{model}_scores.npy"
|
||||
if not score_file.exists():
|
||||
print(f"[warn] missing score file for {experiment}, {model}")
|
||||
continue
|
||||
|
||||
try:
|
||||
scores = np.load(score_file)
|
||||
rows.append(
|
||||
{
|
||||
"experiment": experiment,
|
||||
"network": network,
|
||||
"latent_dim": latent_dim,
|
||||
"semi_normals": semi_normals,
|
||||
"semi_anomalous": semi_anomalous,
|
||||
"model": model,
|
||||
"scores": scores.tolist(),
|
||||
"folder": str(exp_dir),
|
||||
"config_json": cfg_json,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[warn] failed to load scores for {experiment}, {model}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"[warn] skipping {exp_dir.name}: {e}")
|
||||
continue
|
||||
|
||||
# If empty, return a typed empty frame
|
||||
if not rows:
|
||||
return pl.DataFrame(schema=SCHEMA_INFERENCE)
|
||||
|
||||
df = pl.DataFrame(rows, schema=SCHEMA_INFERENCE)
|
||||
|
||||
# Optimize datatypes
|
||||
df = df.with_columns(
|
||||
[
|
||||
pl.col("experiment", "network", "model").cast(pl.Categorical),
|
||||
pl.col("latent_dim", "semi_normals", "semi_anomalous").cast(pl.Int32),
|
||||
]
|
||||
)
|
||||
|
||||
# Cache if enabled
|
||||
if allow_cache:
|
||||
try:
|
||||
df.write_parquet(cache)
|
||||
print(f"[info] cached inference frame to {cache}")
|
||||
except Exception as e:
|
||||
print(f"[warn] failed to write cache {cache}: {e}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def main():
|
||||
inference_root = Path("/home/fedex/mt/results/inference/copy")
|
||||
df_inference = load_inference_results_dataframe(inference_root, allow_cache=True)
|
||||
|
||||
exit(0)
|
||||
|
||||
root = Path("/home/fedex/mt/results/copy")
|
||||
df1 = load_results_dataframe(root, allow_cache=True)
|
||||
exit(0)
|
||||
|
||||
retest_root = Path("/home/fedex/mt/results/copy/retest_nodrop")
|
||||
df2 = load_results_dataframe(retest_root, allow_cache=False).drop("folder")
|
||||
|
||||
# exact schema & shape first (optional but helpful messages)
|
||||
assert df1.shape == df2.shape, f"Shape differs: {df1.shape} vs {df2.shape}"
|
||||
assert set(df1.columns) == set(df2.columns), (
|
||||
f"Column sets differ: {df1.columns} vs {df2.columns}"
|
||||
)
|
||||
|
||||
# allow small float diffs, ignore column order differences if you want
|
||||
df1_sorted = df1.select(sorted(df1.columns))
|
||||
df2_sorted = df2.select(sorted(df2.columns))
|
||||
|
||||
# Optionally pre-align/sort both frames by a stable key before diffing.
|
||||
summary, leaves = recursive_diff_frames(
|
||||
df1,
|
||||
df2,
|
||||
ignore=["timestamp"], # columns to ignore
|
||||
float_atol=0.1, # absolute tolerance for floats
|
||||
float_rtol=0.0, # relative tolerance for floats
|
||||
max_rows_per_column=20, # limit expansion per column
|
||||
max_leafs_per_row=200, # cap leaves per row
|
||||
)
|
||||
|
||||
pl.Config.set_fmt_table_cell_list_len(100)
|
||||
pl.Config.set_tbl_rows(100)
|
||||
|
||||
print(summary) # which columns differ & how many rows
|
||||
print(leaves) # exact nested paths + scalar diffs
|
||||
|
||||
# check_exact=False lets us use atol/rtol for floats
|
||||
assert_frame_equal(
|
||||
df1_sorted,
|
||||
df2_sorted,
|
||||
check_exact=False,
|
||||
atol=0.1, # absolute tolerance for floats
|
||||
rtol=0.0, # relative tolerance (set if you want % based)
|
||||
check_dtypes=True, # set False if you only care about values
|
||||
)
|
||||
print("DataFrames match within tolerance ✅")
|
||||
|
||||
# df_pre = load_pretraining_results_dataframe(root, allow_cache=True)
|
||||
# print("pretraining:", df_pre.shape, df_pre.head())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
306
tools/plot_scripts/results_ae_table.py
Normal file
@@ -0,0 +1,306 @@
|
||||
# ae_losses_table_from_df.py
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
# CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
|
||||
from load_results import load_pretraining_results_dataframe
|
||||
|
||||
# ----------------------------
|
||||
# Config
|
||||
# ----------------------------
|
||||
ROOT = Path("/home/fedex/mt/results/copy") # experiments root you pass to the loader
|
||||
OUTPUT_DIR = Path("/home/fedex/mt/plots/results_ae_table")
|
||||
|
||||
# Which label field to use from the DF; "labels_exp_based" or "labels_manual_based"
|
||||
LABEL_FIELD = "labels_exp_based"
|
||||
|
||||
# Which architectures to include (labels must match canonicalize_network)
|
||||
WANTED_NETS = {"LeNet", "Efficient"}
|
||||
|
||||
# Formatting
|
||||
DECIMALS = 4 # how many decimals to display for losses
|
||||
BOLD_BEST = False # set True to bold per-group best (lower is better)
|
||||
LOWER_IS_BETTER = True # for losses we want the minimum
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Helpers (ported/minified from your plotting script)
|
||||
# ----------------------------
|
||||
def canonicalize_network(name: str) -> str:
|
||||
low = (name or "").lower()
|
||||
if "lenet" in low:
|
||||
return "LeNet"
|
||||
if "efficient" in low:
|
||||
return "Efficient"
|
||||
return name or "unknown"
|
||||
|
||||
|
||||
def calculate_batch_mean_loss(scores: np.ndarray, batch_size: int) -> float:
|
||||
n = len(scores)
|
||||
if n == 0:
|
||||
return np.nan
|
||||
if batch_size <= 0:
|
||||
batch_size = n
|
||||
n_batches = (n + batch_size - 1) // batch_size
|
||||
acc = 0.0
|
||||
for i in range(0, n, batch_size):
|
||||
acc += float(np.mean(scores[i : i + batch_size]))
|
||||
return acc / n_batches
|
||||
|
||||
|
||||
def extract_batch_size(cfg_json: str) -> int:
|
||||
import json
|
||||
|
||||
try:
|
||||
cfg = json.loads(cfg_json) if cfg_json else {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
return int(cfg.get("ae_batch_size") or cfg.get("batch_size") or 256)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Cell:
|
||||
mean: float | None
|
||||
std: float | None
|
||||
|
||||
|
||||
def _fmt(mean: float | None) -> str:
|
||||
return "--" if (mean is None or not (mean == mean)) else f"{mean:.{DECIMALS}f}"
|
||||
|
||||
|
||||
def _bold_mask_display(
|
||||
values: List[float | None], decimals: int, lower_is_better: bool
|
||||
) -> List[bool]:
|
||||
"""
|
||||
Tie-aware bolding mask based on *displayed* precision.
|
||||
For losses, lower is better (min). For metrics where higher is better, set lower_is_better=False.
|
||||
"""
|
||||
|
||||
def disp(v: float | None) -> float | None:
|
||||
if v is None or not (v == v):
|
||||
return None
|
||||
# use string → float to match display rounding exactly
|
||||
return float(f"{v:.{decimals}f}")
|
||||
|
||||
rounded = [disp(v) for v in values]
|
||||
finite = [v for v in rounded if v is not None]
|
||||
if not finite:
|
||||
return [False] * len(values)
|
||||
target = min(finite) if lower_is_better else max(finite)
|
||||
return [(v is not None and v == target) for v in rounded]
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Core
|
||||
# ----------------------------
|
||||
def build_losses_table_from_df(
|
||||
df: pl.DataFrame, label_field: str
|
||||
) -> Tuple[str, float | None]:
|
||||
"""
|
||||
Build a LaTeX table showing Overall loss (LeNet, Efficient) and Anomaly loss (LeNet, Efficient)
|
||||
with one row per latent dimension. Returns (latex_table_string, max_std_overall).
|
||||
"""
|
||||
# Basic validation
|
||||
required_cols = {"scores", "network", "latent_dim"}
|
||||
missing = required_cols - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"Missing required columns in AE dataframe: {missing}")
|
||||
if label_field not in df.columns:
|
||||
raise ValueError(f"Expected '{label_field}' column in AE dataframe.")
|
||||
|
||||
# Canonicalize nets, compute per-row overall/anomaly losses
|
||||
rows: List[dict] = []
|
||||
for row in df.iter_rows(named=True):
|
||||
net = canonicalize_network(row["network"])
|
||||
if WANTED_NETS and net not in WANTED_NETS:
|
||||
continue
|
||||
dim = int(row["latent_dim"])
|
||||
batch_size = extract_batch_size(row.get("config_json"))
|
||||
scores = np.asarray(row["scores"] or [], dtype=float)
|
||||
|
||||
labels = row.get(label_field)
|
||||
labels = np.asarray(labels, dtype=int) if labels is not None else None
|
||||
|
||||
overall_loss = calculate_batch_mean_loss(scores, batch_size)
|
||||
|
||||
anomaly_loss = np.nan
|
||||
if labels is not None and labels.size == scores.size:
|
||||
anomaly_scores = scores[labels == -1]
|
||||
if anomaly_scores.size > 0:
|
||||
anomaly_loss = calculate_batch_mean_loss(anomaly_scores, batch_size)
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"net": net,
|
||||
"latent_dim": dim,
|
||||
"overall": overall_loss,
|
||||
"anomaly": anomaly_loss,
|
||||
}
|
||||
)
|
||||
|
||||
if not rows:
|
||||
raise ValueError(
|
||||
"No rows available after filtering; check WANTED_NETS or input data."
|
||||
)
|
||||
|
||||
df2 = pl.DataFrame(rows)
|
||||
|
||||
# Aggregate across folds per (net, latent_dim)
|
||||
agg = df2.group_by(["net", "latent_dim"]).agg(
|
||||
pl.col("overall").mean().alias("overall_mean"),
|
||||
pl.col("overall").std().alias("overall_std"),
|
||||
pl.col("anomaly").mean().alias("anomaly_mean"),
|
||||
pl.col("anomaly").std().alias("anomaly_std"),
|
||||
)
|
||||
|
||||
# Collect union of dims across both nets
|
||||
dims = sorted(set(agg.get_column("latent_dim").to_list()))
|
||||
|
||||
# Build lookup
|
||||
keymap: Dict[Tuple[str, int], Cell] = {}
|
||||
keymap_anom: Dict[Tuple[str, int], Cell] = {}
|
||||
|
||||
max_std: float | None = None
|
||||
|
||||
def push_std(v: float | None):
|
||||
nonlocal max_std
|
||||
if v is None or not (v == v):
|
||||
return
|
||||
if max_std is None or v > max_std:
|
||||
max_std = v
|
||||
|
||||
for r in agg.iter_rows(named=True):
|
||||
k = (r["net"], int(r["latent_dim"]))
|
||||
keymap[k] = Cell(r.get("overall_mean"), r.get("overall_std"))
|
||||
keymap_anom[k] = Cell(r.get("anomaly_mean"), r.get("anomaly_std"))
|
||||
push_std(r.get("overall_std"))
|
||||
push_std(r.get("anomaly_std"))
|
||||
|
||||
# Ensure nets order consistent
|
||||
nets_order = ["LeNet", "Efficient"]
|
||||
nets_present = [n for n in nets_order if any(k[0] == n for k in keymap.keys())]
|
||||
if not nets_present:
|
||||
nets_present = sorted({k[0] for k in keymap.keys()})
|
||||
|
||||
# Build LaTeX table
|
||||
header_left = [r"LeNet", r"Efficient"]
|
||||
header_right = [r"LeNet", r"Efficient"]
|
||||
|
||||
lines: List[str] = []
|
||||
lines.append(r"\begin{table}[t]")
|
||||
lines.append(r"\centering")
|
||||
lines.append(r"\setlength{\tabcolsep}{4pt}")
|
||||
lines.append(r"\renewcommand{\arraystretch}{1.2}")
|
||||
# vertical bar between the two groups
|
||||
lines.append(r"\begin{tabularx}{\textwidth}{c*{2}{Y}|*{2}{Y}}")
|
||||
lines.append(r"\toprule")
|
||||
lines.append(
|
||||
r" & \multicolumn{2}{c}{Overall loss} & \multicolumn{2}{c}{Anomaly loss} \\"
|
||||
)
|
||||
lines.append(r"\cmidrule(lr){2-3} \cmidrule(lr){4-5}")
|
||||
lines.append(
|
||||
r"Latent Dim. & "
|
||||
+ " & ".join(header_left)
|
||||
+ " & "
|
||||
+ " & ".join(header_right)
|
||||
+ r" \\"
|
||||
)
|
||||
lines.append(r"\midrule")
|
||||
|
||||
for d in dims:
|
||||
# Gather values in order: Overall (LeNet, Efficient), Anomaly (LeNet, Efficient)
|
||||
overall_vals = [keymap.get((n, d), Cell(None, None)).mean for n in nets_present]
|
||||
anomaly_vals = [
|
||||
keymap_anom.get((n, d), Cell(None, None)).mean for n in nets_present
|
||||
]
|
||||
overall_strs = [_fmt(v) for v in overall_vals]
|
||||
anomaly_strs = [_fmt(v) for v in anomaly_vals]
|
||||
|
||||
if BOLD_BEST:
|
||||
mask_overall = _bold_mask_display(overall_vals, DECIMALS, LOWER_IS_BETTER)
|
||||
mask_anom = _bold_mask_display(anomaly_vals, DECIMALS, LOWER_IS_BETTER)
|
||||
overall_strs = [
|
||||
(r"\textbf{" + s + "}") if (m and s != "--") else s
|
||||
for s, m in zip(overall_strs, mask_overall)
|
||||
]
|
||||
anomaly_strs = [
|
||||
(r"\textbf{" + s + "}") if (m and s != "--") else s
|
||||
for s, m in zip(anomaly_strs, mask_anom)
|
||||
]
|
||||
|
||||
lines.append(
|
||||
f"{d} & "
|
||||
+ " & ".join(overall_strs)
|
||||
+ " & "
|
||||
+ " & ".join(anomaly_strs)
|
||||
+ r" \\"
|
||||
)
|
||||
|
||||
lines.append(r"\bottomrule")
|
||||
lines.append(r"\end{tabularx}")
|
||||
|
||||
max_std_str = "n/a" if max_std is None else f"{max_std:.{DECIMALS}f}"
|
||||
lines.append(
|
||||
rf"\caption{{Autoencoder pre-training MSE losses (test split) across latent dimensions. "
|
||||
rf"Left: overall loss; Right: anomaly-only loss. "
|
||||
rf"Cells show means across folds (no $\pm$std). "
|
||||
rf"Maximum observed standard deviation across all cells (not shown): {max_std_str}.}}"
|
||||
)
|
||||
lines.append(r"\end{table}")
|
||||
|
||||
return "\n".join(lines), max_std
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Entry
|
||||
# ----------------------------
|
||||
def main():
|
||||
df = load_pretraining_results_dataframe(ROOT, allow_cache=True)
|
||||
|
||||
# Build LaTeX table
|
||||
tex, max_std = build_losses_table_from_df(df, LABEL_FIELD)
|
||||
|
||||
# Output dirs
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
ts_dir = OUTPUT_DIR / "archive" / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
ts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
out_name = "ae_pretraining_losses_table.tex"
|
||||
out_path = ts_dir / out_name
|
||||
out_path.write_text(tex, encoding="utf-8")
|
||||
|
||||
# Save a copy of this script
|
||||
script_path = Path(__file__)
|
||||
try:
|
||||
shutil.copy2(script_path, ts_dir / script_path.name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Mirror latest
|
||||
latest = OUTPUT_DIR / "latest"
|
||||
latest.mkdir(parents=True, exist_ok=True)
|
||||
# Clear
|
||||
for f in latest.iterdir():
|
||||
if f.is_file():
|
||||
f.unlink()
|
||||
# Copy
|
||||
for f in ts_dir.iterdir():
|
||||
if f.is_file():
|
||||
shutil.copy2(f, latest / f.name)
|
||||
|
||||
print(f"Saved table to: {ts_dir}")
|
||||
print(f"Also updated: {latest}")
|
||||
print(f" - {out_name}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
273
tools/plot_scripts/results_ap_over_latent.py
Normal file
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
|
||||
# =========================
|
||||
# Config
|
||||
# =========================
|
||||
ROOT = Path("/home/fedex/mt/results/copy")
|
||||
OUTPUT_DIR = Path("/home/fedex/mt/plots/results_ap_over_latent")
|
||||
|
||||
# Labeling regimes (shown as separate subplots)
|
||||
SEMI_LABELING_REGIMES: list[tuple[int, int]] = [(0, 0), (50, 10), (500, 100)]
|
||||
|
||||
# Evaluations: separate figure per eval
|
||||
EVALS: list[str] = ["exp_based", "manual_based"]
|
||||
|
||||
# X-axis (latent dims)
|
||||
LATENT_DIMS: list[int] = [32, 64, 128, 256, 512, 768, 1024]
|
||||
|
||||
# Visual style
|
||||
FIGSIZE = (8, 8) # one tall figure with 3 compact subplots
|
||||
MARKERSIZE = 7
|
||||
SCATTER_ALPHA = 0.95
|
||||
LINEWIDTH = 2.0
|
||||
TREND_LINEWIDTH = 2.2
|
||||
BAND_ALPHA = 0.18
|
||||
|
||||
# Toggle: show ±1 std bands (k-fold variability)
|
||||
SHOW_STD_BANDS = True # <<< set to False to hide the bands
|
||||
|
||||
# Colors for the two DeepSAD backbones
|
||||
COLOR_LENET = "#1f77b4" # blue
|
||||
COLOR_EFFICIENT = "#ff7f0e" # orange
|
||||
|
||||
# =========================
|
||||
# Loader
|
||||
# =========================
|
||||
from load_results import load_results_dataframe
|
||||
|
||||
|
||||
# =========================
|
||||
# Helpers
|
||||
# =========================
|
||||
def _with_net_label(df: pl.DataFrame) -> pl.DataFrame:
|
||||
return df.with_columns(
|
||||
pl.when(
|
||||
pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("lenet")
|
||||
)
|
||||
.then(pl.lit("LeNet"))
|
||||
.when(
|
||||
pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("efficient")
|
||||
)
|
||||
.then(pl.lit("Efficient"))
|
||||
.otherwise(pl.col("network").cast(pl.Utf8))
|
||||
.alias("net_label")
|
||||
)
|
||||
|
||||
|
||||
def _filter_deepsad(df: pl.DataFrame) -> pl.DataFrame:
|
||||
return df.filter(
|
||||
(pl.col("model") == "deepsad")
|
||||
& (pl.col("eval").is_in(EVALS))
|
||||
& (pl.col("latent_dim").is_in(LATENT_DIMS))
|
||||
& (pl.col("net_label").is_in(["LeNet", "Efficient"]))
|
||||
).select(
|
||||
"eval",
|
||||
"net_label",
|
||||
"latent_dim",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
"fold",
|
||||
"ap",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Agg:
|
||||
mean: float
|
||||
std: float
|
||||
|
||||
|
||||
def aggregate_ap(df: pl.DataFrame) -> Dict[Tuple[str, str, int, int, int], Agg]:
|
||||
out: Dict[Tuple[str, str, int, int, int], Agg] = {}
|
||||
gb = (
|
||||
df.group_by(
|
||||
["eval", "net_label", "latent_dim", "semi_normals", "semi_anomalous"]
|
||||
)
|
||||
.agg(pl.col("ap").mean().alias("mean"), pl.col("ap").std().alias("std"))
|
||||
.to_dicts()
|
||||
)
|
||||
for row in gb:
|
||||
key = (
|
||||
str(row["eval"]),
|
||||
str(row["net_label"]),
|
||||
int(row["latent_dim"]),
|
||||
int(row["semi_normals"]),
|
||||
int(row["semi_anomalous"]),
|
||||
)
|
||||
m = float(row["mean"]) if row["mean"] == row["mean"] else np.nan
|
||||
s = float(row["std"]) if row["std"] == row["std"] else np.nan
|
||||
out[key] = Agg(mean=m, std=s)
|
||||
return out
|
||||
|
||||
|
||||
def _lin_trend(xs: List[int], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
if len(xs) < 2:
|
||||
return np.array(xs, dtype=float), np.array(ys, dtype=float)
|
||||
x = np.array(xs, dtype=float)
|
||||
y = np.array(ys, dtype=float)
|
||||
a, b = np.polyfit(x, y, 1)
|
||||
x_fit = np.linspace(x.min(), x.max(), 200)
|
||||
y_fit = a * x_fit + b
|
||||
return x_fit, y_fit
|
||||
|
||||
|
||||
def _dynamic_ylim(all_vals: List[float], all_errs: List[float]) -> Tuple[float, float]:
|
||||
vals = np.array(all_vals, dtype=float)
|
||||
errs = np.array(all_errs, dtype=float) if SHOW_STD_BANDS else np.zeros_like(vals)
|
||||
valid = np.isfinite(vals)
|
||||
if not np.any(valid):
|
||||
return (0.0, 1.0)
|
||||
v, e = vals[valid], errs[valid]
|
||||
lo = np.min(v - e)
|
||||
hi = np.max(v + e)
|
||||
span = max(1e-3, hi - lo)
|
||||
pad = 0.08 * span
|
||||
y0 = max(0.0, lo - pad)
|
||||
y1 = min(1.0, hi + pad)
|
||||
if (y1 - y0) < 0.08:
|
||||
mid = 0.5 * (y0 + y1)
|
||||
y0 = max(0.0, mid - 0.04)
|
||||
y1 = min(1.0, mid + 0.04)
|
||||
return (float(y0), float(y1))
|
||||
|
||||
|
||||
def _get_dim_mapping(dims: list[int]) -> dict[int, int]:
|
||||
"""Map actual dimensions to evenly spaced positions (0, 1, 2, ...)"""
|
||||
return {dim: i for i, dim in enumerate(dims)}
|
||||
|
||||
|
||||
def plot_eval(ev: str, agg: Dict[Tuple[str, str, int, int, int], Agg], outdir: Path):
|
||||
fig, axes = plt.subplots(
|
||||
len(SEMI_LABELING_REGIMES),
|
||||
1,
|
||||
figsize=FIGSIZE,
|
||||
constrained_layout=True,
|
||||
sharex=True,
|
||||
)
|
||||
|
||||
if len(SEMI_LABELING_REGIMES) == 1:
|
||||
axes = [axes]
|
||||
|
||||
# Create dimension mapping
|
||||
dim_mapping = _get_dim_mapping(LATENT_DIMS)
|
||||
|
||||
for ax, regime in zip(axes, SEMI_LABELING_REGIMES):
|
||||
semi_n, semi_a = regime
|
||||
data = {}
|
||||
for net in ["LeNet", "Efficient"]:
|
||||
xs, ys, es = [], [], []
|
||||
for dim in LATENT_DIMS:
|
||||
key = (ev, net, dim, semi_n, semi_a)
|
||||
if key in agg:
|
||||
xs.append(
|
||||
dim_mapping[dim]
|
||||
) # Use mapped position instead of actual dim
|
||||
ys.append(agg[key].mean)
|
||||
es.append(agg[key].std)
|
||||
data[net] = (xs, ys, es)
|
||||
|
||||
for net, color in [("LeNet", COLOR_LENET), ("Efficient", COLOR_EFFICIENT)]:
|
||||
xs, ys, es = data[net]
|
||||
if not xs:
|
||||
continue
|
||||
|
||||
# Set evenly spaced ticks with actual dimension labels
|
||||
ax.set_xticks(list(dim_mapping.values()))
|
||||
ax.set_xticklabels(LATENT_DIMS)
|
||||
|
||||
ax.yaxis.set_major_locator(MaxNLocator(nbins=5))
|
||||
ax.scatter(
|
||||
xs, ys, s=35, color=color, alpha=SCATTER_ALPHA, label=f"{net} (points)"
|
||||
)
|
||||
x_fit, y_fit = _lin_trend(xs, ys) # Now using mapped positions
|
||||
ax.plot(
|
||||
x_fit,
|
||||
y_fit,
|
||||
color=color,
|
||||
linewidth=TREND_LINEWIDTH,
|
||||
label=f"{net} (trend)",
|
||||
)
|
||||
if SHOW_STD_BANDS and es and np.any(np.isfinite(es)):
|
||||
ylo = np.clip(np.array(ys) - np.array(es), 0.0, 1.0)
|
||||
yhi = np.clip(np.array(ys) + np.array(es), 0.0, 1.0)
|
||||
ax.fill_between(
|
||||
xs, ylo, yhi, color=color, alpha=BAND_ALPHA, linewidth=0
|
||||
)
|
||||
|
||||
all_vals, all_errs = [], []
|
||||
for net in ["LeNet", "Efficient"]:
|
||||
_, ys, es = data[net]
|
||||
all_vals.extend(ys)
|
||||
all_errs.extend(es)
|
||||
y0, y1 = _dynamic_ylim(all_vals, all_errs)
|
||||
ax.set_ylim(y0, y1)
|
||||
|
||||
ax.set_title(f"Labeling regime {semi_n}/{semi_a}", fontsize=11)
|
||||
ax.grid(True, alpha=0.35)
|
||||
|
||||
axes[-1].set_xlabel("Latent dimension")
|
||||
for ax in axes:
|
||||
ax.set_ylabel("AP")
|
||||
|
||||
handles, labels = axes[0].get_legend_handles_labels()
|
||||
fig.legend(handles, labels, ncol=2, loc="upper center", bbox_to_anchor=(0.75, 0.97))
|
||||
fig.suptitle(f"AP vs. Latent Dimensionality — {ev.replace('_', ' ')}", y=1.05)
|
||||
|
||||
fname = f"ap_trends_{ev}.png"
|
||||
fig.savefig(outdir / fname, dpi=150)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_all(agg: Dict[Tuple[str, str, int, int, int], Agg], outdir: Path):
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
for ev in EVALS:
|
||||
plot_eval(ev, agg, outdir)
|
||||
|
||||
|
||||
def main():
|
||||
df = load_results_dataframe(ROOT, allow_cache=True)
|
||||
df = _with_net_label(df)
|
||||
df = _filter_deepsad(df)
|
||||
agg = aggregate_ap(df)
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
archive_dir = OUTPUT_DIR / "archive"
|
||||
archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts_dir = archive_dir / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
ts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
plot_all(agg, ts_dir)
|
||||
|
||||
try:
|
||||
script_path = Path(__file__)
|
||||
shutil.copy2(script_path, ts_dir / script_path.name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
latest = OUTPUT_DIR / "latest"
|
||||
latest.mkdir(parents=True, exist_ok=True)
|
||||
for f in latest.iterdir():
|
||||
if f.is_file():
|
||||
f.unlink()
|
||||
for f in ts_dir.iterdir():
|
||||
if f.is_file():
|
||||
shutil.copy2(f, latest / f.name)
|
||||
|
||||
print(f"Saved plots to: {ts_dir}")
|
||||
print(f"Also updated: {latest}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
260
tools/plot_scripts/results_ap_over_semi.py
Normal file
@@ -0,0 +1,260 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
|
||||
# =========================
|
||||
# Config
|
||||
# =========================
|
||||
ROOT = Path("/home/fedex/mt/results/copy")
|
||||
OUTPUT_DIR = Path("/home/fedex/mt/plots/results_ap_over_semi")
|
||||
|
||||
# Labeling regimes (shown as separate subplots)
|
||||
SEMI_LABELING_REGIMES: list[tuple[int, int]] = [(0, 0), (50, 10), (500, 100)]
|
||||
|
||||
# Evaluations: separate figure per eval
|
||||
EVALS: list[str] = ["exp_based", "manual_based"]
|
||||
|
||||
# X-axis (latent dims)
|
||||
LATENT_DIMS: list[int] = [32, 64, 128, 256, 512, 768, 1024]
|
||||
LATENT_DIM: int = [32, 64, 128, 256, 512, 768, 1024]
|
||||
|
||||
# Visual style
|
||||
FIGSIZE = (8, 8) # one tall figure with 3 compact subplots
|
||||
MARKERSIZE = 7
|
||||
SCATTER_ALPHA = 0.95
|
||||
LINEWIDTH = 2.0
|
||||
TREND_LINEWIDTH = 2.2
|
||||
BAND_ALPHA = 0.18
|
||||
|
||||
# Toggle: show ±1 std bands (k-fold variability)
|
||||
SHOW_STD_BANDS = True # <<< set to False to hide the bands
|
||||
|
||||
# Colors for the two DeepSAD backbones
|
||||
COLOR_LENET = "#1f77b4" # blue
|
||||
COLOR_EFFICIENT = "#ff7f0e" # orange
|
||||
|
||||
# =========================
|
||||
# Loader
|
||||
# =========================
|
||||
from load_results import load_results_dataframe
|
||||
|
||||
|
||||
# =========================
|
||||
# Helpers
|
||||
# =========================
|
||||
def _with_net_label(df: pl.DataFrame) -> pl.DataFrame:
|
||||
return df.with_columns(
|
||||
pl.when(
|
||||
pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("lenet")
|
||||
)
|
||||
.then(pl.lit("LeNet"))
|
||||
.when(
|
||||
pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("efficient")
|
||||
)
|
||||
.then(pl.lit("Efficient"))
|
||||
.otherwise(pl.col("network").cast(pl.Utf8))
|
||||
.alias("net_label")
|
||||
)
|
||||
|
||||
|
||||
def _filter_deepsad(df: pl.DataFrame) -> pl.DataFrame:
|
||||
return df.filter(
|
||||
(pl.col("model") == "deepsad")
|
||||
& (pl.col("eval").is_in(EVALS))
|
||||
& (pl.col("latent_dim").is_in(LATENT_DIMS))
|
||||
& (pl.col("net_label").is_in(["LeNet", "Efficient"]))
|
||||
).select(
|
||||
"eval",
|
||||
"net_label",
|
||||
"latent_dim",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
"fold",
|
||||
"ap",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Agg:
|
||||
mean: float
|
||||
std: float
|
||||
|
||||
|
||||
def aggregate_ap(df: pl.DataFrame) -> Dict[Tuple[str, str, int, int, int], Agg]:
|
||||
out: Dict[Tuple[str, str, int, int, int], Agg] = {}
|
||||
gb = (
|
||||
df.group_by(
|
||||
["eval", "net_label", "latent_dim", "semi_normals", "semi_anomalous"]
|
||||
)
|
||||
.agg(pl.col("ap").mean().alias("mean"), pl.col("ap").std().alias("std"))
|
||||
.to_dicts()
|
||||
)
|
||||
for row in gb:
|
||||
key = (
|
||||
str(row["eval"]),
|
||||
str(row["net_label"]),
|
||||
int(row["latent_dim"]),
|
||||
int(row["semi_normals"]),
|
||||
int(row["semi_anomalous"]),
|
||||
)
|
||||
m = float(row["mean"]) if row["mean"] == row["mean"] else np.nan
|
||||
s = float(row["std"]) if row["std"] == row["std"] else np.nan
|
||||
out[key] = Agg(mean=m, std=s)
|
||||
return out
|
||||
|
||||
|
||||
def _lin_trend(xs: List[int], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
if len(xs) < 2:
|
||||
return np.array(xs, dtype=float), np.array(ys, dtype=float)
|
||||
x = np.array(xs, dtype=float)
|
||||
y = np.array(ys, dtype=float)
|
||||
a, b = np.polyfit(x, y, 1)
|
||||
x_fit = np.linspace(x.min(), x.max(), 200)
|
||||
y_fit = a * x_fit + b
|
||||
return x_fit, y_fit
|
||||
|
||||
|
||||
def _dynamic_ylim(all_vals: List[float], all_errs: List[float]) -> Tuple[float, float]:
|
||||
vals = np.array(all_vals, dtype=float)
|
||||
errs = np.array(all_errs, dtype=float) if SHOW_STD_BANDS else np.zeros_like(vals)
|
||||
valid = np.isfinite(vals)
|
||||
if not np.any(valid):
|
||||
return (0.0, 1.0)
|
||||
v, e = vals[valid], errs[valid]
|
||||
lo = np.min(v - e)
|
||||
hi = np.max(v + e)
|
||||
span = max(1e-3, hi - lo)
|
||||
pad = 0.08 * span
|
||||
y0 = max(0.0, lo - pad)
|
||||
y1 = min(1.0, hi + pad)
|
||||
if (y1 - y0) < 0.08:
|
||||
mid = 0.5 * (y0 + y1)
|
||||
y0 = max(0.0, mid - 0.04)
|
||||
y1 = min(1.0, mid + 0.04)
|
||||
return (float(y0), float(y1))
|
||||
|
||||
|
||||
def plot_eval(ev: str, agg: Dict[Tuple[str, str, int, int, int], Agg], outdir: Path):
|
||||
fig, axes = plt.subplots(
|
||||
len(SEMI_LABELING_REGIMES),
|
||||
1,
|
||||
figsize=FIGSIZE,
|
||||
constrained_layout=True,
|
||||
sharex=True,
|
||||
)
|
||||
|
||||
if len(SEMI_LABELING_REGIMES) == 1:
|
||||
axes = [axes]
|
||||
|
||||
for ax, regime in zip(axes, SEMI_LABELING_REGIMES):
|
||||
semi_n, semi_a = regime
|
||||
data = {}
|
||||
for net in ["LeNet", "Efficient"]:
|
||||
xs, ys, es = [], [], []
|
||||
for dim in LATENT_DIMS:
|
||||
key = (ev, net, dim, semi_n, semi_a)
|
||||
if key in agg:
|
||||
xs.append(dim)
|
||||
ys.append(agg[key].mean)
|
||||
es.append(agg[key].std)
|
||||
data[net] = (xs, ys, es)
|
||||
|
||||
for net, color in [("LeNet", COLOR_LENET), ("Efficient", COLOR_EFFICIENT)]:
|
||||
xs, ys, es = data[net]
|
||||
if not xs:
|
||||
continue
|
||||
ax.set_xticks(LATENT_DIMS)
|
||||
ax.yaxis.set_major_locator(MaxNLocator(nbins=5)) # e.g., always 5 ticks
|
||||
ax.scatter(
|
||||
xs, ys, s=35, color=color, alpha=SCATTER_ALPHA, label=f"{net} (points)"
|
||||
)
|
||||
x_fit, y_fit = _lin_trend(xs, ys)
|
||||
ax.plot(
|
||||
x_fit,
|
||||
y_fit,
|
||||
color=color,
|
||||
linewidth=TREND_LINEWIDTH,
|
||||
label=f"{net} (trend)",
|
||||
)
|
||||
if SHOW_STD_BANDS and es and np.any(np.isfinite(es)):
|
||||
ylo = np.clip(np.array(ys) - np.array(es), 0.0, 1.0)
|
||||
yhi = np.clip(np.array(ys) + np.array(es), 0.0, 1.0)
|
||||
ax.fill_between(
|
||||
xs, ylo, yhi, color=color, alpha=BAND_ALPHA, linewidth=0
|
||||
)
|
||||
|
||||
all_vals, all_errs = [], []
|
||||
for net in ["LeNet", "Efficient"]:
|
||||
_, ys, es = data[net]
|
||||
all_vals.extend(ys)
|
||||
all_errs.extend(es)
|
||||
y0, y1 = _dynamic_ylim(all_vals, all_errs)
|
||||
ax.set_ylim(y0, y1)
|
||||
|
||||
ax.set_title(f"Labeling regime {semi_n}/{semi_a}", fontsize=11)
|
||||
ax.grid(True, alpha=0.35)
|
||||
|
||||
axes[-1].set_xlabel("Latent dimension")
|
||||
for ax in axes:
|
||||
ax.set_ylabel("AP")
|
||||
|
||||
handles, labels = axes[0].get_legend_handles_labels()
|
||||
fig.legend(handles, labels, ncol=2, loc="upper center", bbox_to_anchor=(0.75, 0.97))
|
||||
fig.suptitle(f"AP vs. Latent Dimensionality — {ev.replace('_', ' ')}", y=1.05)
|
||||
|
||||
fname = f"ap_trends_{ev}.png"
|
||||
fig.savefig(outdir / fname, dpi=150)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_all(agg: Dict[Tuple[str, str, int, int, int], Agg], outdir: Path):
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
for ev in EVALS:
|
||||
plot_eval(ev, agg, outdir)
|
||||
|
||||
|
||||
def main():
|
||||
df = load_results_dataframe(ROOT, allow_cache=True)
|
||||
df = _with_net_label(df)
|
||||
df = _filter_deepsad(df)
|
||||
agg = aggregate_ap(df)
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
archive_dir = OUTPUT_DIR / "archive"
|
||||
archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts_dir = archive_dir / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
ts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
plot_all(agg, ts_dir)
|
||||
|
||||
try:
|
||||
script_path = Path(__file__)
|
||||
shutil.copy2(script_path, ts_dir / script_path.name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
latest = OUTPUT_DIR / "latest"
|
||||
latest.mkdir(parents=True, exist_ok=True)
|
||||
for f in latest.iterdir():
|
||||
if f.is_file():
|
||||
f.unlink()
|
||||
for f in ts_dir.iterdir():
|
||||
if f.is_file():
|
||||
shutil.copy2(f, latest / f.name)
|
||||
|
||||
print(f"Saved plots to: {ts_dir}")
|
||||
print(f"Also updated: {latest}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
304
tools/plot_scripts/results_inference_timeline.py
Normal file
@@ -0,0 +1,304 @@
|
||||
import json
|
||||
import pickle
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
# =========================
|
||||
# User-configurable params
|
||||
# =========================
|
||||
# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23")
|
||||
EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23"
|
||||
|
||||
# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"}
|
||||
# Adjust this to where you save your per-method scores.
|
||||
methods_scores_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference"
|
||||
)
|
||||
|
||||
# Root data path containing .bag files used to build the cached stats
|
||||
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||
|
||||
# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
|
||||
output_path = Path("/home/fedex/mt/plots/results_inference_timeline")
|
||||
|
||||
# Cache (stats + labels) directory — same as your original script
|
||||
cache_path = output_path
|
||||
|
||||
# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original)
|
||||
data_resolution = 32 * 2048
|
||||
|
||||
# Frames per second for x-axis time
|
||||
FPS = 10.0
|
||||
|
||||
# Whether to try to align score sign so that higher = more degraded.
|
||||
# If manual labels exist for this experiment, alignment uses anomaly window mean vs. outside.
|
||||
ALIGN_SCORE_DIRECTION = True
|
||||
|
||||
# =========================
|
||||
# Setup output folders
|
||||
# =========================
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
latest_folder_path = output_path / "latest"
|
||||
archive_folder_path = output_path / "archive"
|
||||
output_datetime_path = output_path / datetime_folder_name
|
||||
|
||||
output_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# =========================
|
||||
# Discover experiments to reconstruct indices consistent with caches
|
||||
# =========================
|
||||
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||
if not all_data_path.exists():
|
||||
raise FileNotFoundError(f"all_data_path does not exist: {all_data_path}")
|
||||
|
||||
for bag_file_path in all_data_path.iterdir():
|
||||
if bag_file_path.suffix != ".bag":
|
||||
continue
|
||||
if "smoke" in bag_file_path.name:
|
||||
anomaly_experiment_paths.append(bag_file_path)
|
||||
else:
|
||||
normal_experiment_paths.append(bag_file_path)
|
||||
|
||||
# Sort by filesize to match original ordering used when caches were generated
|
||||
normal_experiment_paths = sorted(
|
||||
normal_experiment_paths, key=lambda p: p.stat().st_size
|
||||
)
|
||||
anomaly_experiment_paths = sorted(
|
||||
anomaly_experiment_paths, key=lambda p: p.stat().st_size
|
||||
)
|
||||
|
||||
# Find the path for the requested experiment
|
||||
exp_path = None
|
||||
exp_is_anomaly = None
|
||||
for p in anomaly_experiment_paths:
|
||||
if p.stem == EXPERIMENT_NAME:
|
||||
exp_path = p
|
||||
exp_is_anomaly = True
|
||||
break
|
||||
if exp_path is None:
|
||||
for p in normal_experiment_paths:
|
||||
if p.stem == EXPERIMENT_NAME:
|
||||
exp_path = p
|
||||
exp_is_anomaly = False
|
||||
break
|
||||
if exp_path is None:
|
||||
raise FileNotFoundError(
|
||||
f"Experiment '{EXPERIMENT_NAME}' not found as a .bag in {all_data_path}"
|
||||
)
|
||||
|
||||
# Get the index within the appropriate list
|
||||
if exp_is_anomaly:
|
||||
exp_index = anomaly_experiment_paths.index(exp_path)
|
||||
else:
|
||||
exp_index = normal_experiment_paths.index(exp_path)
|
||||
|
||||
# =========================
|
||||
# Load cached statistical data
|
||||
# =========================
|
||||
missing_points_cache = Path(cache_path / "missing_points.pkl")
|
||||
near_sensor_cache = Path(cache_path / "particles_near_sensor_counts_500.pkl")
|
||||
|
||||
if not missing_points_cache.exists():
|
||||
raise FileNotFoundError(f"Missing points cache not found: {missing_points_cache}")
|
||||
if not near_sensor_cache.exists():
|
||||
raise FileNotFoundError(f"Near-sensor cache not found: {near_sensor_cache}")
|
||||
|
||||
with open(missing_points_cache, "rb") as f:
|
||||
missing_points_normal, missing_points_anomaly = pickle.load(f)
|
||||
with open(near_sensor_cache, "rb") as f:
|
||||
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
|
||||
|
||||
if exp_is_anomaly:
|
||||
missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float)
|
||||
near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float)
|
||||
else:
|
||||
missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float)
|
||||
near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float)
|
||||
|
||||
# Convert counts to percentages of total points
|
||||
missing_points_pct = (missing_points_series / data_resolution) * 100.0
|
||||
near_sensor_pct = (near_sensor_series / data_resolution) * 100.0
|
||||
|
||||
# =========================
|
||||
# Load manual anomaly frame borders (optional; used for sign alignment + vertical markers)
|
||||
# =========================
|
||||
manually_labeled_anomaly_frames = {}
|
||||
labels_json_path = cache_path / "manually_labeled_anomaly_frames.json"
|
||||
if labels_json_path.exists():
|
||||
with open(labels_json_path, "r") as frame_borders_file:
|
||||
manually_labeled_anomaly_frames_json = json.load(frame_borders_file)
|
||||
for file in manually_labeled_anomaly_frames_json.get("files", []):
|
||||
manually_labeled_anomaly_frames[file["filename"]] = (
|
||||
file.get("semi_target_begin_frame", None),
|
||||
file.get("semi_target_end_frame", None),
|
||||
)
|
||||
|
||||
# The JSON uses .npy filenames (as in original script). Create this experiment’s key.
|
||||
exp_npy_filename = exp_path.with_suffix(".npy").name
|
||||
anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None))
|
||||
|
||||
|
||||
# =========================
|
||||
# Load method scores and z-score normalize per method
|
||||
# =========================
|
||||
def zscore_1d(x: np.ndarray, eps=1e-12):
|
||||
x = np.asarray(x, dtype=float)
|
||||
mu = np.mean(x)
|
||||
sigma = np.std(x, ddof=0)
|
||||
if sigma < eps:
|
||||
return np.zeros_like(x)
|
||||
return (x - mu) / sigma
|
||||
|
||||
|
||||
def maybe_align_direction(z: np.ndarray, window):
|
||||
"""Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist."""
|
||||
start, end = window
|
||||
if start is None or end is None:
|
||||
return z # no labels → leave as-is
|
||||
start = int(max(0, start))
|
||||
end = int(min(len(z), end))
|
||||
if end <= start or end > len(z):
|
||||
return z
|
||||
inside_mean = float(np.mean(z[start:end]))
|
||||
# outside: everything except [start:end]; handle edge cases
|
||||
if start == 0 and end == len(z):
|
||||
return z
|
||||
outside_parts = []
|
||||
if start > 0:
|
||||
outside_parts.append(z[:start])
|
||||
if end < len(z):
|
||||
outside_parts.append(z[end:])
|
||||
if not outside_parts:
|
||||
return z
|
||||
outside_mean = float(np.mean(np.concatenate(outside_parts)))
|
||||
return z if inside_mean >= outside_mean else -z
|
||||
|
||||
|
||||
methods = ["deepsad", "ocsvm", "isoforest"]
|
||||
method_scores = {}
|
||||
method_zscores = {}
|
||||
|
||||
if not methods_scores_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Methods scores path does not exist: {methods_scores_path}"
|
||||
)
|
||||
|
||||
for m in methods:
|
||||
file_path = methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy"
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"Missing scores file for method '{m}': {file_path}")
|
||||
s = np.load(file_path)
|
||||
s = np.asarray(s, dtype=float).reshape(-1)
|
||||
# If needed, truncate or pad to match stats length (should match if generated consistently)
|
||||
n = min(len(s), len(missing_points_pct))
|
||||
if len(s) != len(missing_points_pct):
|
||||
# Align by truncation to the shortest length
|
||||
s = s[:n]
|
||||
# Also truncate stats to match
|
||||
missing_points_pct = missing_points_pct[:n]
|
||||
near_sensor_pct = near_sensor_pct[:n]
|
||||
z = zscore_1d(s)
|
||||
if ALIGN_SCORE_DIRECTION:
|
||||
z = maybe_align_direction(z, anomaly_window)
|
||||
method_scores[m] = s
|
||||
method_zscores[m] = z
|
||||
|
||||
# Common time axis in seconds
|
||||
num_frames = len(missing_points_pct)
|
||||
t = np.arange(num_frames) / FPS
|
||||
|
||||
# =========================
|
||||
# Plot 1: Missing points (%) vs. method z-scores
|
||||
# =========================
|
||||
fig1, axz1 = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||
axy1 = axz1.twinx()
|
||||
|
||||
# plot z-scores
|
||||
for m in methods:
|
||||
axz1.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
|
||||
|
||||
# plot missing points (%)
|
||||
axy1.plot(t, missing_points_pct, linestyle="--", alpha=0.7, label="Missing points (%)")
|
||||
|
||||
# vertical markers for anomaly window if available
|
||||
start, end = anomaly_window
|
||||
if start is not None and end is not None and 0 <= start < end <= num_frames:
|
||||
axz1.axvline(x=start / FPS, linestyle=":", alpha=0.6)
|
||||
axz1.axvline(x=end / FPS, linestyle=":", alpha=0.6)
|
||||
|
||||
axz1.set_xlabel("Time (s)")
|
||||
axz1.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
|
||||
axy1.set_ylabel("Missing points (%)")
|
||||
axz1.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Missing Points")
|
||||
|
||||
# Build a combined legend
|
||||
lines1, labels1 = axz1.get_legend_handles_labels()
|
||||
lines2, labels2 = axy1.get_legend_handles_labels()
|
||||
axz1.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
|
||||
|
||||
axz1.grid(True, alpha=0.3)
|
||||
fig1.savefig(
|
||||
output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_missing_points.png", dpi=150
|
||||
)
|
||||
plt.close(fig1)
|
||||
|
||||
# =========================
|
||||
# Plot 2: Near-sensor (%) vs. method z-scores
|
||||
# =========================
|
||||
fig2, axz2 = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||
axy2 = axz2.twinx()
|
||||
|
||||
for m in methods:
|
||||
axz2.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9)
|
||||
|
||||
axy2.plot(t, near_sensor_pct, linestyle="--", alpha=0.7, label="Near-sensor <0.5m (%)")
|
||||
|
||||
start, end = anomaly_window
|
||||
if start is not None and end is not None and 0 <= start < end <= num_frames:
|
||||
axz2.axvline(x=start / FPS, linestyle=":", alpha=0.6)
|
||||
axz2.axvline(x=end / FPS, linestyle=":", alpha=0.6)
|
||||
|
||||
axz2.set_xlabel("Time (s)")
|
||||
axz2.set_ylabel("Anomaly score (z-score, ↑ = more degraded)")
|
||||
axy2.set_ylabel("Near-sensor points (%)")
|
||||
axz2.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Near-Sensor Points (<0.5 m)")
|
||||
|
||||
lines1, labels1 = axz2.get_legend_handles_labels()
|
||||
lines2, labels2 = axy2.get_legend_handles_labels()
|
||||
axz2.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
|
||||
|
||||
axz2.grid(True, alpha=0.3)
|
||||
fig2.savefig(
|
||||
output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_near_sensor.png", dpi=150
|
||||
)
|
||||
plt.close(fig2)
|
||||
|
||||
# =========================
|
||||
# Preserve latest/, archive/, copy script
|
||||
# =========================
|
||||
|
||||
# delete current latest folder
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
|
||||
# create new latest folder
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# copy contents of output folder to the latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this python script to preserve the code used
|
||||
shutil.copy2(__file__, output_datetime_path)
|
||||
shutil.copy2(__file__, latest_folder_path)
|
||||
|
||||
# move output date folder to archive
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
|
||||
print("Done. Plots saved and archived.")
|
||||
459
tools/plot_scripts/results_inference_timeline_smoothed.py
Normal file
@@ -0,0 +1,459 @@
|
||||
import json
|
||||
import pickle
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
# =====================================
|
||||
# User-configurable params
|
||||
# =====================================
|
||||
# Root directory that contains per-run outputs (your loader will scan this)
|
||||
INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy")
|
||||
|
||||
# Path that holds cached stats (same as before)
|
||||
CACHE_PATH = Path("/home/fedex/mt/plots/data_anomalies_timeline")
|
||||
|
||||
# Root data path containing .bag files to rebuild ordering (for stats mapping)
|
||||
ALL_DATA_PATH = Path("/home/fedex/mt/data/subter")
|
||||
|
||||
# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/")
|
||||
OUTPUT_PATH = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed")
|
||||
|
||||
# Frames per second for x-axis time
|
||||
FPS = 10.0
|
||||
|
||||
# ---- Smoothing: EMA only ----
|
||||
EMA_ALPHA = 0.1 # models (0,1], smaller = smoother
|
||||
STATS_EMA_ALPHA = 0.1 # stats (absolute %); tweak independently if desired
|
||||
|
||||
# Whether to z-score per-curve for the model methods (recommended)
|
||||
Z_SCORE_MODELS = True
|
||||
|
||||
# If some model's series is longer/shorter than others in a group, align to min length
|
||||
ALIGN_TO_MIN_LENGTH = True
|
||||
|
||||
# Whether to try to align model score sign so that higher = more degraded using manual window
|
||||
ALIGN_SCORE_DIRECTION = True
|
||||
|
||||
# LiDAR points per frame (for stats -> percent)
|
||||
DATA_RESOLUTION = 32 * 2048
|
||||
|
||||
# =====================================
|
||||
# Setup output folders
|
||||
# =====================================
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
latest_folder_path = OUTPUT_PATH / "latest"
|
||||
archive_folder_path = OUTPUT_PATH / "archive"
|
||||
output_datetime_path = OUTPUT_PATH / datetime_folder_name
|
||||
|
||||
OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# =====================================
|
||||
# Load Polars DataFrame via your helper
|
||||
# =====================================
|
||||
from load_results import load_inference_results_dataframe
|
||||
|
||||
df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT)
|
||||
|
||||
# sanity
|
||||
expected_cols = {
|
||||
"experiment",
|
||||
"network",
|
||||
"latent_dim",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
"model",
|
||||
"scores",
|
||||
"folder",
|
||||
"config_json",
|
||||
}
|
||||
missing_cols = expected_cols - set(df.columns)
|
||||
if missing_cols:
|
||||
raise KeyError(f"DataFrame missing required columns: {sorted(missing_cols)}")
|
||||
|
||||
|
||||
# =====================================
|
||||
# Rebuild experiment → stats mapping (like your original)
|
||||
# =====================================
|
||||
def rebuild_experiment_index():
|
||||
normals, anomalies = [], []
|
||||
if not ALL_DATA_PATH.exists():
|
||||
return [], [], {}
|
||||
for bag in ALL_DATA_PATH.iterdir():
|
||||
if bag.suffix != ".bag":
|
||||
continue
|
||||
if "smoke" in bag.name:
|
||||
anomalies.append(bag)
|
||||
else:
|
||||
normals.append(bag)
|
||||
normals = sorted(normals, key=lambda p: p.stat().st_size)
|
||||
anomalies = sorted(anomalies, key=lambda p: p.stat().st_size)
|
||||
mapping = {}
|
||||
for i, p in enumerate(normals):
|
||||
mapping[p.stem] = (False, i, p)
|
||||
for i, p in enumerate(anomalies):
|
||||
mapping[p.stem] = (True, i, p)
|
||||
return normals, anomalies, mapping
|
||||
|
||||
|
||||
normal_paths, anomaly_paths, exp_map = rebuild_experiment_index()
|
||||
|
||||
# Load cached statistical data (+ manual labels)
|
||||
missing_points_cache = CACHE_PATH / "missing_points.pkl"
|
||||
near_sensor_cache = CACHE_PATH / "particles_near_sensor_counts_500.pkl"
|
||||
labels_json_path = CACHE_PATH / "manually_labeled_anomaly_frames.json"
|
||||
|
||||
missing_points_normal = missing_points_anomaly = None
|
||||
near_sensor_normal = near_sensor_anomaly = None
|
||||
if missing_points_cache.exists():
|
||||
with open(missing_points_cache, "rb") as f:
|
||||
missing_points_normal, missing_points_anomaly = pickle.load(f)
|
||||
if near_sensor_cache.exists():
|
||||
with open(near_sensor_cache, "rb") as f:
|
||||
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
|
||||
|
||||
manual_windows = {}
|
||||
if labels_json_path.exists():
|
||||
with open(labels_json_path, "r") as f:
|
||||
labeled_json = json.load(f)
|
||||
for file in labeled_json.get("files", []):
|
||||
manual_windows[file["filename"]] = (
|
||||
file.get("semi_target_begin_frame"),
|
||||
file.get("semi_target_end_frame"),
|
||||
)
|
||||
|
||||
|
||||
def get_stats_for_experiment(exp_name: str):
|
||||
"""
|
||||
Returns:
|
||||
missing_pct (np.ndarray) | None,
|
||||
near_pct (np.ndarray) | None,
|
||||
anomaly_window (tuple(start,end)) | (None,None)
|
||||
"""
|
||||
if exp_name not in exp_map:
|
||||
return None, None, (None, None)
|
||||
is_anomaly, idx, path = exp_map[exp_name]
|
||||
missing = None
|
||||
near = None
|
||||
if missing_points_normal is not None and missing_points_anomaly is not None:
|
||||
series = (
|
||||
missing_points_anomaly[idx] if is_anomaly else missing_points_normal[idx]
|
||||
)
|
||||
missing = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||
if near_sensor_normal is not None and near_sensor_anomaly is not None:
|
||||
series = near_sensor_anomaly[idx] if is_anomaly else near_sensor_normal[idx]
|
||||
near = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||
npy_key = path.with_suffix(".npy").name
|
||||
window = manual_windows.get(npy_key, (None, None))
|
||||
return missing, near, window
|
||||
|
||||
|
||||
# =====================================
|
||||
# Helpers
|
||||
# =====================================
|
||||
def to_np(a):
|
||||
"""Convert a Polars list cell to a 1D NumPy array of float."""
|
||||
if a is None:
|
||||
return None
|
||||
return np.asarray(a, dtype=float).ravel()
|
||||
|
||||
|
||||
def zscore_1d(x, eps=1e-12):
|
||||
if x is None or len(x) == 0:
|
||||
return x
|
||||
mu = float(np.mean(x))
|
||||
sigma = float(np.std(x, ddof=0))
|
||||
return np.zeros_like(x) if sigma < eps else (x - mu) / sigma
|
||||
|
||||
|
||||
def ema(x, alpha):
|
||||
if x is None or len(x) == 0:
|
||||
return x
|
||||
y = np.empty_like(x, dtype=float)
|
||||
y[0] = x[0]
|
||||
for i in range(1, len(x)):
|
||||
y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
|
||||
return y
|
||||
|
||||
|
||||
def apply_ema_models(x):
|
||||
return ema(x, EMA_ALPHA)
|
||||
|
||||
|
||||
def apply_ema_stats(x):
|
||||
return ema(x, STATS_EMA_ALPHA)
|
||||
|
||||
|
||||
def align_lengths(series_dict):
|
||||
"""Truncate all series to the shortest available length."""
|
||||
valid_lengths = [
|
||||
len(v) for v in series_dict.values() if v is not None and len(v) > 0
|
||||
]
|
||||
if not valid_lengths:
|
||||
return series_dict
|
||||
min_len = min(valid_lengths)
|
||||
return {k: (v[:min_len] if v is not None else None) for k, v in series_dict.items()}
|
||||
|
||||
|
||||
def maybe_align_direction(z: np.ndarray, window):
|
||||
"""Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist."""
|
||||
if z is None:
|
||||
return z
|
||||
start, end = window
|
||||
if start is None or end is None:
|
||||
return z
|
||||
start = int(max(0, start))
|
||||
end = int(min(len(z), end))
|
||||
if end <= start or end > len(z):
|
||||
return z
|
||||
inside_mean = float(np.mean(z[start:end]))
|
||||
if start == 0 and end == len(z):
|
||||
return z
|
||||
outside_parts = []
|
||||
if start > 0:
|
||||
outside_parts.append(z[:start])
|
||||
if end < len(z):
|
||||
outside_parts.append(z[end:])
|
||||
if not outside_parts:
|
||||
return z
|
||||
outside_mean = float(np.mean(np.concatenate(outside_parts)))
|
||||
return z if inside_mean >= outside_mean else -z
|
||||
|
||||
|
||||
def safe_title(s: str) -> str:
|
||||
return s.replace("_", " ")
|
||||
|
||||
|
||||
# =====================================
|
||||
# Model selection per group (network names updated)
|
||||
# =====================================
|
||||
group_cols = ["experiment", "latent_dim", "semi_normals", "semi_anomalous"]
|
||||
|
||||
|
||||
def pick_rows(gdf: pl.DataFrame):
|
||||
sel = {}
|
||||
sel["DeepSAD (LeNet)"] = gdf.filter(
|
||||
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "deepsad")
|
||||
)
|
||||
sel["DeepSAD (efficient)"] = gdf.filter(
|
||||
(pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad")
|
||||
)
|
||||
sel["OCSVM (LeNet)"] = gdf.filter(
|
||||
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm")
|
||||
)
|
||||
sel["IsoForest (LeNet)"] = gdf.filter(
|
||||
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest")
|
||||
)
|
||||
chosen = {}
|
||||
for k, dfk in sel.items():
|
||||
chosen[k] = dfk.row(0) if dfk.height > 0 else None
|
||||
return chosen
|
||||
|
||||
|
||||
# =====================================
|
||||
# Iterate groups and plot
|
||||
# =====================================
|
||||
plots_made = 0
|
||||
|
||||
for keys, g in df.group_by(group_cols, maintain_order=True):
|
||||
experiment, latent_dim, semi_normals, semi_anomalous = keys
|
||||
|
||||
chosen = pick_rows(g)
|
||||
|
||||
# Extract series for models
|
||||
curves_raw = {}
|
||||
for label, row in chosen.items():
|
||||
if row is None:
|
||||
curves_raw[label] = None
|
||||
continue
|
||||
row_dict = {c: row[i] for i, c in enumerate(df.columns)}
|
||||
scores = to_np(row_dict["scores"])
|
||||
curves_raw[label] = scores
|
||||
|
||||
# If nothing to plot, skip group
|
||||
if all(v is None or len(v) == 0 for v in curves_raw.values()):
|
||||
continue
|
||||
|
||||
# Stats for this experiment (absolute %; no z-scoring)
|
||||
missing_pct, near_pct, anomaly_window = get_stats_for_experiment(experiment)
|
||||
|
||||
# Optionally align lengths among model curves
|
||||
curves = curves_raw.copy()
|
||||
if ALIGN_TO_MIN_LENGTH:
|
||||
curves = align_lengths(curves)
|
||||
|
||||
# Prepare processed model curves: z-score (if enabled) + EMA smoothing
|
||||
proc = {}
|
||||
for k, v in curves.items():
|
||||
if v is None:
|
||||
continue
|
||||
x = zscore_1d(v) if Z_SCORE_MODELS else v.astype(float)
|
||||
if ALIGN_SCORE_DIRECTION and anomaly_window != (None, None):
|
||||
x = maybe_align_direction(x, anomaly_window)
|
||||
x = apply_ema_models(x)
|
||||
proc[k] = x
|
||||
|
||||
if not proc:
|
||||
continue
|
||||
|
||||
# Establish time axis for model curves
|
||||
any_len = len(next(iter(proc.values())))
|
||||
t_models = np.arange(any_len) / FPS
|
||||
|
||||
# =========== Plot A: Scores-only (models z-scored; stats not shown) ===========
|
||||
figA, axA = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||
for label, y in proc.items():
|
||||
if y is not None:
|
||||
axA.plot(t_models, y, label=label)
|
||||
axA.set_xlabel("Time (s)")
|
||||
axA.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else ""))
|
||||
titleA = (
|
||||
f"{safe_title(experiment)} | latent_dim={latent_dim}, "
|
||||
f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n"
|
||||
f"Smoothing: EMA(alpha={EMA_ALPHA})"
|
||||
)
|
||||
axA.set_title(titleA)
|
||||
axA.grid(True, alpha=0.3)
|
||||
axA.legend(loc="upper right")
|
||||
fnameA = (
|
||||
f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||
f"_scores_EMA-{EMA_ALPHA}{'_z' if Z_SCORE_MODELS else ''}.png"
|
||||
)
|
||||
figA.savefig(output_datetime_path / fnameA, dpi=150)
|
||||
plt.close(figA)
|
||||
|
||||
# =========== Plot B: Models (z-scored) + Missing Points (%) absolute ===========
|
||||
if missing_pct is not None and len(missing_pct) > 0:
|
||||
mp = missing_pct
|
||||
if ALIGN_TO_MIN_LENGTH:
|
||||
mp = mp[:any_len]
|
||||
mp_s = apply_ema_stats(mp)
|
||||
t_stats = np.arange(len(mp_s)) / FPS
|
||||
|
||||
figB, axB = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||
axBy = axB.twinx()
|
||||
for label, y in proc.items():
|
||||
if y is not None:
|
||||
axB.plot(t_models, y, label=label)
|
||||
axBy.plot(t_stats, mp_s, linestyle="--", label="Missing points (%)")
|
||||
|
||||
if anomaly_window != (None, None):
|
||||
start, end = anomaly_window
|
||||
if isinstance(start, int) and isinstance(end, int) and 0 <= start < end:
|
||||
axB.axvline(start / FPS, linestyle=":", alpha=0.6)
|
||||
axB.axvline(end / FPS, linestyle=":", alpha=0.6)
|
||||
|
||||
axB.set_xlabel("Time (s)")
|
||||
axB.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else ""))
|
||||
axBy.set_ylabel("Missing points (%)")
|
||||
titleB = (
|
||||
f"{safe_title(experiment)} | latent_dim={latent_dim}, "
|
||||
f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n"
|
||||
f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Missing points (absolute %)"
|
||||
)
|
||||
axB.set_title(titleB)
|
||||
axB.grid(True, alpha=0.3)
|
||||
lines1, labels1 = axB.get_legend_handles_labels()
|
||||
lines2, labels2 = axBy.get_legend_handles_labels()
|
||||
axB.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
|
||||
|
||||
fnameB = (
|
||||
f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||
f"_scores_plus_missing_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}"
|
||||
f"{'_z' if Z_SCORE_MODELS else ''}.png"
|
||||
)
|
||||
figB.savefig(output_datetime_path / fnameB, dpi=150)
|
||||
plt.close(figB)
|
||||
|
||||
# =========== Plot C: Models (z-scored) + Near-sensor Points (%) absolute ===========
|
||||
if near_pct is not None and len(near_pct) > 0:
|
||||
ns = near_pct
|
||||
if ALIGN_TO_MIN_LENGTH:
|
||||
ns = ns[:any_len]
|
||||
ns_s = apply_ema_stats(ns)
|
||||
t_stats = np.arange(len(ns_s)) / FPS
|
||||
|
||||
figC, axC = plt.subplots(figsize=(14, 6), constrained_layout=True)
|
||||
axCy = axC.twinx()
|
||||
for label, y in proc.items():
|
||||
if y is not None:
|
||||
axC.plot(t_models, y, label=label)
|
||||
axCy.plot(t_stats, ns_s, linestyle="--", label="Near-sensor <0.5m (%)")
|
||||
|
||||
if anomaly_window != (None, None):
|
||||
start, end = anomaly_window
|
||||
if isinstance(start, int) and isinstance(end, int) and 0 <= start < end:
|
||||
axC.axvline(start / FPS, linestyle=":", alpha=0.6)
|
||||
axC.axvline(end / FPS, linestyle=":", alpha=0.6)
|
||||
|
||||
axC.set_xlabel("Time (s)")
|
||||
axC.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else ""))
|
||||
axCy.set_ylabel("Near-sensor points (%)")
|
||||
titleC = (
|
||||
f"{safe_title(experiment)} | latent_dim={latent_dim}, "
|
||||
f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n"
|
||||
f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Near-sensor <0.5m (absolute %)"
|
||||
)
|
||||
axC.set_title(titleC)
|
||||
axC.grid(True, alpha=0.3)
|
||||
lines1, labels1 = axC.get_legend_handles_labels()
|
||||
lines2, labels2 = axCy.get_legend_handles_labels()
|
||||
axC.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
|
||||
|
||||
fnameC = (
|
||||
f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}"
|
||||
f"_scores_plus_nearsensor_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}"
|
||||
f"{'_z' if Z_SCORE_MODELS else ''}.png"
|
||||
)
|
||||
figC.savefig(output_datetime_path / fnameC, dpi=150)
|
||||
plt.close(figC)
|
||||
|
||||
plots_made += 1
|
||||
|
||||
# =====================================
|
||||
# Preserve latest/, archive/, copy script
|
||||
# =====================================
|
||||
# delete current latest folder
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
# create new latest folder
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# copy contents of output folder to the latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this python script to preserve the code used
|
||||
try:
|
||||
shutil.copy2(__file__, output_datetime_path)
|
||||
shutil.copy2(__file__, latest_folder_path)
|
||||
except Exception:
|
||||
# If running interactively, fall back to saving the config snapshot
|
||||
(output_datetime_path / "run_config.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"INFERENCE_ROOT": str(INFERENCE_ROOT),
|
||||
"CACHE_PATH": str(CACHE_PATH),
|
||||
"ALL_DATA_PATH": str(ALL_DATA_PATH),
|
||||
"FPS": FPS,
|
||||
"EMA_ALPHA": EMA_ALPHA,
|
||||
"STATS_EMA_ALPHA": STATS_EMA_ALPHA,
|
||||
"Z_SCORE_MODELS": Z_SCORE_MODELS,
|
||||
"ALIGN_TO_MIN_LENGTH": ALIGN_TO_MIN_LENGTH,
|
||||
"ALIGN_SCORE_DIRECTION": ALIGN_SCORE_DIRECTION,
|
||||
"timestamp": datetime_folder_name,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
|
||||
# move output date folder to archive
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
|
||||
print(f"Done. Plotted {plots_made} groups. Archived under: {archive_folder_path}")
|
||||
631
tools/plot_scripts/results_inference_timelines_exp_compare.py
Normal file
@@ -0,0 +1,631 @@
|
||||
#!/usr/bin/env python3
|
||||
# results_inference_timelines_exp_compare.py
|
||||
|
||||
import json
|
||||
import pickle
|
||||
import re
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from load_results import load_inference_results_dataframe
|
||||
from matplotlib.lines import Line2D
|
||||
|
||||
# =====================================
|
||||
# User-configurable params
|
||||
# =====================================
|
||||
|
||||
# Root directory that contains per-run outputs (your loader will scan this)
|
||||
INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy")
|
||||
|
||||
# Cached stats + manual labels (same location as your earlier scripts)
|
||||
CACHE_PATH = Path("/home/fedex/mt/plots/results_inference_exp_compare")
|
||||
|
||||
# .bag directory (used only to rebuild experiment order for mapping stats)
|
||||
ALL_DATA_PATH = Path("/home/fedex/mt/data/subter")
|
||||
|
||||
# Output base directory (timestamped subfolder will be created here, archived, and copied to latest/)
|
||||
OUTPUT_PATH = Path("/home/fedex/mt/plots/results_inference_exp_compare")
|
||||
|
||||
# Two experiments to compare (exact strings as they appear in your DF’s `experiment` column)
|
||||
EXPERIMENT_CLEAN = "2_static_no_artifacts_illuminated_2023-01-23-001"
|
||||
EXPERIMENT_DEGRADED = "3_smoke_human_walking_2023-01-23"
|
||||
|
||||
# Shared model configuration for BOTH experiments
|
||||
LATENT_DIM = 32
|
||||
SEMI_NORMALS = 0
|
||||
SEMI_ANOMALOUS = 0
|
||||
|
||||
# Comparison y-axis mode for methods: "baseline_z" or "baseline_tailprob"
|
||||
Y_MODE = "baseline_z"
|
||||
|
||||
# Progress axis resolution (number of bins from 0% to 100%)
|
||||
PROGRESS_BINS = 100
|
||||
|
||||
# Frames per second for building time axes before progress-binning (informational only)
|
||||
FPS = 10.0
|
||||
|
||||
# ---- EMA smoothing only ----
|
||||
# Methods (scores) EMA alpha
|
||||
EMA_ALPHA_METHODS = 0.1 # (0,1], smaller = smoother
|
||||
# Stats (absolute %) EMA alpha
|
||||
EMA_ALPHA_STATS = 0.1 # (0,1], smaller = smoother
|
||||
|
||||
# LiDAR points per frame (for stats -> percent)
|
||||
DATA_RESOLUTION = 32 * 2048
|
||||
|
||||
# Copy this script into outputs for provenance (best-effort if not running as a file)
|
||||
COPY_SELF = True
|
||||
|
||||
# =====================================
|
||||
# Setup output folders
|
||||
# =====================================
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
latest_folder_path = OUTPUT_PATH / "latest"
|
||||
archive_folder_path = OUTPUT_PATH / "archive"
|
||||
output_datetime_path = OUTPUT_PATH / datetime_folder_name
|
||||
|
||||
OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# =====================================
|
||||
# Load Polars DataFrame via your helper
|
||||
# =====================================
|
||||
|
||||
df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT)
|
||||
|
||||
required_cols = {
|
||||
"experiment",
|
||||
"network",
|
||||
"latent_dim",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
"model",
|
||||
"scores",
|
||||
"folder",
|
||||
"config_json",
|
||||
}
|
||||
missing = required_cols - set(df.columns)
|
||||
if missing:
|
||||
raise KeyError(f"DataFrame missing required columns: {sorted(missing)}")
|
||||
|
||||
|
||||
# =====================================
|
||||
# Rebuild experiment → stats mapping (like your original)
|
||||
# =====================================
|
||||
def rebuild_experiment_index():
|
||||
normals, anomalies = [], []
|
||||
if not ALL_DATA_PATH.exists():
|
||||
return [], [], {}
|
||||
for bag in ALL_DATA_PATH.iterdir():
|
||||
if bag.suffix != ".bag":
|
||||
continue
|
||||
if "smoke" in bag.name:
|
||||
anomalies.append(bag)
|
||||
else:
|
||||
normals.append(bag)
|
||||
normals = sorted(normals, key=lambda p: p.stat().st_size)
|
||||
anomalies = sorted(anomalies, key=lambda p: p.stat().st_size)
|
||||
mapping = {}
|
||||
for i, p in enumerate(normals):
|
||||
mapping[p.stem] = (False, i, p)
|
||||
for i, p in enumerate(anomalies):
|
||||
mapping[p.stem] = (True, i, p)
|
||||
return normals, anomalies, mapping
|
||||
|
||||
|
||||
normal_paths, anomaly_paths, exp_map = rebuild_experiment_index()
|
||||
|
||||
# Load cached statistical data and manual labels
|
||||
missing_points_cache = CACHE_PATH / "missing_points.pkl"
|
||||
near_sensor_cache = CACHE_PATH / "particles_near_sensor_counts_500.pkl"
|
||||
labels_json_path = CACHE_PATH / "manually_labeled_anomaly_frames.json"
|
||||
|
||||
missing_points_normal = missing_points_anomaly = None
|
||||
near_sensor_normal = near_sensor_anomaly = None
|
||||
if missing_points_cache.exists():
|
||||
with open(missing_points_cache, "rb") as f:
|
||||
missing_points_normal, missing_points_anomaly = pickle.load(f)
|
||||
if near_sensor_cache.exists():
|
||||
with open(near_sensor_cache, "rb") as f:
|
||||
near_sensor_normal, near_sensor_anomaly = pickle.load(f)
|
||||
|
||||
manual_windows = {}
|
||||
if labels_json_path.exists():
|
||||
with open(labels_json_path, "r") as f:
|
||||
labeled_json = json.load(f)
|
||||
for file in labeled_json.get("files", []):
|
||||
manual_windows[file["filename"]] = (
|
||||
file.get("semi_target_begin_frame"),
|
||||
file.get("semi_target_end_frame"),
|
||||
)
|
||||
|
||||
|
||||
# =====================================
|
||||
# Helpers
|
||||
# =====================================
|
||||
def ema(x: np.ndarray, alpha: float) -> np.ndarray:
|
||||
if x is None or len(x) == 0:
|
||||
return x
|
||||
y = np.empty_like(x, dtype=float)
|
||||
y[0] = x[0]
|
||||
for i in range(1, len(x)):
|
||||
y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
|
||||
return y
|
||||
|
||||
|
||||
def to_np_list(list_cell) -> Optional[np.ndarray]:
|
||||
if list_cell is None:
|
||||
return None
|
||||
return np.asarray(list_cell, dtype=float).ravel()
|
||||
|
||||
|
||||
def normalize_exp_name(name: str) -> str:
|
||||
# strip trailing run suffix like -001, -002 if present
|
||||
return re.sub(r"-\d{3}$", "", name)
|
||||
|
||||
|
||||
def map_experiment_to_stats_stem(exp_name: str) -> Optional[str]:
|
||||
"""Try exact match, then prefix match with / without -### suffix stripped."""
|
||||
if exp_name in exp_map:
|
||||
return exp_name
|
||||
base = normalize_exp_name(exp_name)
|
||||
if base in exp_map:
|
||||
return base
|
||||
for stem in exp_map.keys():
|
||||
if stem.startswith(exp_name) or stem.startswith(base):
|
||||
return stem
|
||||
return None
|
||||
|
||||
|
||||
def get_stats_for_experiment(
|
||||
exp_name: str,
|
||||
) -> Tuple[
|
||||
Optional[np.ndarray], Optional[np.ndarray], Tuple[Optional[int], Optional[int]]
|
||||
]:
|
||||
key = map_experiment_to_stats_stem(exp_name)
|
||||
if key is None:
|
||||
return None, None, (None, None)
|
||||
is_anomaly, idx, path = exp_map[key]
|
||||
missing = near = None
|
||||
if missing_points_normal is not None and missing_points_anomaly is not None:
|
||||
series = (
|
||||
missing_points_anomaly[idx] if is_anomaly else missing_points_normal[idx]
|
||||
)
|
||||
missing = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||
if near_sensor_normal is not None and near_sensor_anomaly is not None:
|
||||
series = near_sensor_anomaly[idx] if is_anomaly else near_sensor_normal[idx]
|
||||
near = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0
|
||||
npy_key = path.with_suffix(".npy").name
|
||||
window = manual_windows.get(npy_key, (None, None))
|
||||
return missing, near, window
|
||||
|
||||
|
||||
def _bin_to_progress(x: np.ndarray, bins: int = PROGRESS_BINS) -> np.ndarray:
|
||||
"""Average x into fixed #bins across its length (progress-normalized timeline)."""
|
||||
if x is None or len(x) == 0:
|
||||
return x
|
||||
n = len(x)
|
||||
edges = np.linspace(0, n, bins + 1, dtype=int)
|
||||
out = np.empty(bins, dtype=float)
|
||||
for i in range(bins):
|
||||
a, b = edges[i], edges[i + 1]
|
||||
if b <= a:
|
||||
out[i] = out[i - 1] if i > 0 else x[0]
|
||||
else:
|
||||
out[i] = float(np.mean(x[a:b]))
|
||||
return out
|
||||
|
||||
|
||||
def _ecdf(x: np.ndarray):
|
||||
xs = np.sort(np.asarray(x, dtype=float))
|
||||
n = len(xs)
|
||||
|
||||
def F(t):
|
||||
return float(np.searchsorted(xs, t, side="right")) / n
|
||||
|
||||
return F
|
||||
|
||||
|
||||
def baseline_transform(clean: np.ndarray, other: np.ndarray, mode: str):
|
||||
"""Transform using stats from clean only."""
|
||||
assert mode in ("baseline_z", "baseline_tailprob")
|
||||
if clean is None or len(clean) == 0:
|
||||
return clean, other, "raw"
|
||||
if mode == "baseline_z":
|
||||
mu = float(np.mean(clean))
|
||||
sd = float(np.std(clean, ddof=0))
|
||||
if sd < 1e-12:
|
||||
zc = clean - mu
|
||||
zo = other - mu if other is not None else None
|
||||
else:
|
||||
zc = (clean - mu) / sd
|
||||
zo = (other - mu) / sd if other is not None else None
|
||||
return zc, zo, "Anomaly score (σ above clean)"
|
||||
else:
|
||||
F = _ecdf(clean)
|
||||
tp_clean = np.array([1.0 - F(v) for v in clean], dtype=float)
|
||||
tp_other = (
|
||||
np.array([1.0 - F(v) for v in other], dtype=float)
|
||||
if other is not None
|
||||
else None
|
||||
)
|
||||
return tp_clean, tp_other, "Tail probability vs clean (1 - F_clean)"
|
||||
|
||||
|
||||
def pick_method_series(gdf: pl.DataFrame, label: str) -> Optional[np.ndarray]:
|
||||
if label == "DeepSAD LeNet":
|
||||
sel = gdf.filter(
|
||||
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "deepsad")
|
||||
)
|
||||
elif label == "DeepSAD Efficient":
|
||||
sel = gdf.filter(
|
||||
(pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad")
|
||||
)
|
||||
elif label == "OCSVM":
|
||||
sel = gdf.filter(
|
||||
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm")
|
||||
)
|
||||
elif label == "Isolation Forest":
|
||||
sel = gdf.filter(
|
||||
(pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest")
|
||||
)
|
||||
else:
|
||||
sel = pl.DataFrame()
|
||||
if sel.height == 0:
|
||||
return None
|
||||
row = sel.row(0)
|
||||
row_dict = {c: row[i] for i, c in enumerate(sel.columns)}
|
||||
return to_np_list(row_dict["scores"])
|
||||
|
||||
|
||||
def group_slice(
|
||||
df: pl.DataFrame,
|
||||
experiment: str,
|
||||
latent_dim: int,
|
||||
semi_normals: int,
|
||||
semi_anomalous: int,
|
||||
) -> pl.DataFrame:
|
||||
return df.filter(
|
||||
(pl.col("experiment") == experiment)
|
||||
& (pl.col("latent_dim") == latent_dim)
|
||||
& (pl.col("semi_normals") == semi_normals)
|
||||
& (pl.col("semi_anomalous") == semi_anomalous)
|
||||
)
|
||||
|
||||
|
||||
def compare_two_experiments_progress(
|
||||
df: pl.DataFrame,
|
||||
experiment_clean: str,
|
||||
experiment_degraded: str,
|
||||
latent_dim: int,
|
||||
semi_normals: int,
|
||||
semi_anomalous: int,
|
||||
y_mode: str = "baseline_z",
|
||||
include_stats: bool = True,
|
||||
):
|
||||
methods = [
|
||||
"DeepSAD LeNet",
|
||||
"DeepSAD Efficient",
|
||||
"OCSVM",
|
||||
"Isolation Forest",
|
||||
]
|
||||
|
||||
g_clean = group_slice(
|
||||
df, experiment_clean, latent_dim, semi_normals, semi_anomalous
|
||||
)
|
||||
g_deg = group_slice(
|
||||
df, experiment_degraded, latent_dim, semi_normals, semi_anomalous
|
||||
)
|
||||
if g_clean.is_empty() or g_deg.is_empty():
|
||||
print(
|
||||
f"[WARN] Missing one of the experiment groups: clean({g_clean.height}), degraded({g_deg.height}). Skipping."
|
||||
)
|
||||
return 0
|
||||
|
||||
# Stats (% absolute, EMA smoothed later)
|
||||
mp_clean, ns_clean, _ = get_stats_for_experiment(experiment_clean)
|
||||
mp_deg, ns_deg, _ = get_stats_for_experiment(experiment_degraded)
|
||||
|
||||
# Build baseline-anchored, progress-binned curves per method
|
||||
curves_clean: Dict[str, np.ndarray] = {}
|
||||
curves_deg: Dict[str, np.ndarray] = {}
|
||||
y_label = "Anomaly"
|
||||
|
||||
for label in methods:
|
||||
s_clean = pick_method_series(g_clean, label)
|
||||
s_deg = pick_method_series(g_deg, label)
|
||||
if s_clean is None or s_deg is None:
|
||||
continue
|
||||
|
||||
# Smooth raw with EMA for stability before fitting baseline
|
||||
s_clean_sm = ema(s_clean.astype(float), EMA_ALPHA_METHODS)
|
||||
s_deg_sm = ema(s_deg.astype(float), EMA_ALPHA_METHODS)
|
||||
|
||||
t_clean, t_deg, y_label = baseline_transform(s_clean_sm, s_deg_sm, y_mode)
|
||||
|
||||
# Progress-bin both
|
||||
curves_clean[label] = _bin_to_progress(t_clean, PROGRESS_BINS)
|
||||
curves_deg[label] = _bin_to_progress(t_deg, PROGRESS_BINS)
|
||||
|
||||
if not curves_clean:
|
||||
print("[WARN] No method curves available for comparison in this config.")
|
||||
return 0
|
||||
|
||||
x = np.linspace(0, 100, PROGRESS_BINS)
|
||||
|
||||
# Prep stats: absolute %, EMA, progress-binned
|
||||
def prep_stat_pair(a, b):
|
||||
if a is None or len(a) == 0 or b is None or len(b) == 0:
|
||||
return None, None
|
||||
a_s = ema(a.astype(float), EMA_ALPHA_STATS)
|
||||
b_s = ema(b.astype(float), EMA_ALPHA_STATS)
|
||||
return _bin_to_progress(a_s, PROGRESS_BINS), _bin_to_progress(
|
||||
b_s, PROGRESS_BINS
|
||||
)
|
||||
|
||||
mp_c, mp_d = prep_stat_pair(mp_clean, mp_deg)
|
||||
ns_c, ns_d = prep_stat_pair(ns_clean, ns_deg)
|
||||
|
||||
# Colors & styles
|
||||
COLOR_METHOD = "#d62728" # vibrant red
|
||||
COLOR_MISSING = "#9ecae1" # pale blue
|
||||
COLOR_NEAR = "#a1d99b" # pale green
|
||||
|
||||
LS_CLEAN = "--" # dashed for normal/clean
|
||||
LS_DEG = "-" # solid for anomalous/degraded
|
||||
LW_METHOD = 1.8
|
||||
LW_METHOD_CLEAN = 1.2
|
||||
LW_STATS = 1.6
|
||||
ALPHA_STATS = 0.95
|
||||
|
||||
# Build the 2x2 subplots
|
||||
fig, axes = plt.subplots(
|
||||
4, 1, figsize=(12, 16), constrained_layout=True, sharex=False
|
||||
)
|
||||
axes = axes.ravel()
|
||||
|
||||
method_to_axidx = {
|
||||
"DeepSAD LeNet": 0,
|
||||
"DeepSAD Efficient": 1,
|
||||
"OCSVM": 2,
|
||||
"Isolation Forest": 3,
|
||||
}
|
||||
|
||||
stats_available = (
|
||||
mp_c is not None and mp_d is not None and ns_c is not None and ns_d is not None
|
||||
)
|
||||
if not stats_available:
|
||||
print("[WARN] One or both stats missing. Subplots will include methods only.")
|
||||
|
||||
letters = ["a", "b", "c", "d"]
|
||||
|
||||
for label, axidx in method_to_axidx.items():
|
||||
ax = axes[axidx]
|
||||
yc = curves_clean.get(label)
|
||||
yd = curves_deg.get(label)
|
||||
if yc is None or yd is None:
|
||||
ax.text(
|
||||
0.5, 0.5, "No data", ha="center", va="center", transform=ax.transAxes
|
||||
)
|
||||
ax.set_title(f"({letters[axidx]}) {label}")
|
||||
ax.grid(True, alpha=0.3)
|
||||
continue
|
||||
|
||||
# Left axis: method score (z or tailprob)
|
||||
ax.plot(
|
||||
x,
|
||||
yd,
|
||||
linestyle=LS_DEG,
|
||||
color=COLOR_METHOD,
|
||||
linewidth=LW_METHOD,
|
||||
label=f"{label} — degraded",
|
||||
)
|
||||
ax.plot(
|
||||
x,
|
||||
yc,
|
||||
linestyle=LS_CLEAN,
|
||||
color=COLOR_METHOD,
|
||||
linewidth=LW_METHOD_CLEAN,
|
||||
label=f"{label} — clean",
|
||||
)
|
||||
ax.set_ylabel(y_label)
|
||||
ax.set_title(label)
|
||||
ax.set_title(f"({letters[axidx]}) {label}")
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# Right axis #1 (closest to plot): Missing points (%)
|
||||
axy_miss = ax.twinx()
|
||||
if mp_c is not None and mp_d is not None:
|
||||
axy_miss.plot(
|
||||
x,
|
||||
mp_d,
|
||||
linestyle=LS_DEG,
|
||||
color=COLOR_MISSING,
|
||||
alpha=ALPHA_STATS,
|
||||
linewidth=LW_STATS,
|
||||
label="Missing points — degraded (%)",
|
||||
)
|
||||
axy_miss.plot(
|
||||
x,
|
||||
mp_c,
|
||||
linestyle=LS_CLEAN,
|
||||
color=COLOR_MISSING,
|
||||
alpha=ALPHA_STATS,
|
||||
linewidth=LW_STATS,
|
||||
label="Missing points — clean (%)",
|
||||
)
|
||||
axy_miss.set_ylabel("Missing points (%)")
|
||||
axy_miss.tick_params(axis="y") # , colors=COLOR_MISSING)
|
||||
# axy_miss.spines["right"].set_edgecolor(COLOR_MISSING)
|
||||
|
||||
# Right axis #2 (slightly offset): Near-sensor points (%)
|
||||
axy_near = ax.twinx()
|
||||
# push this spine outward so it doesn't overlap the first right axis
|
||||
axy_near.spines["right"].set_position(("axes", 1.08))
|
||||
# make patch invisible so only spine shows
|
||||
axy_near.set_frame_on(True)
|
||||
axy_near.patch.set_visible(False)
|
||||
|
||||
if ns_c is not None and ns_d is not None:
|
||||
axy_near.plot(
|
||||
x,
|
||||
ns_d,
|
||||
linestyle=LS_DEG,
|
||||
color=COLOR_NEAR,
|
||||
alpha=ALPHA_STATS,
|
||||
linewidth=LW_STATS,
|
||||
label="Near-sensor — degraded (%)",
|
||||
)
|
||||
axy_near.plot(
|
||||
x,
|
||||
ns_c,
|
||||
linestyle=LS_CLEAN,
|
||||
color=COLOR_NEAR,
|
||||
alpha=ALPHA_STATS,
|
||||
linewidth=LW_STATS,
|
||||
label="Near-sensor — clean (%)",
|
||||
)
|
||||
axy_near.set_ylabel("Near-sensor points (%)")
|
||||
axy_near.tick_params(axis="y") # , colors=COLOR_NEAR)
|
||||
# axy_near.spines["right"].set_edgecolor(COLOR_NEAR)
|
||||
|
||||
# Compose legend: show *method name* explicitly, plus the two stats
|
||||
handles = [
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
color=COLOR_METHOD,
|
||||
lw=LW_METHOD,
|
||||
ls=LS_DEG,
|
||||
label=f"{label} — degraded",
|
||||
),
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
color=COLOR_METHOD,
|
||||
lw=LW_METHOD_CLEAN,
|
||||
ls=LS_CLEAN,
|
||||
label=f"{label} — clean",
|
||||
),
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
color=COLOR_MISSING,
|
||||
lw=LW_STATS,
|
||||
ls=LS_DEG,
|
||||
label="Missing points — degraded",
|
||||
),
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
color=COLOR_MISSING,
|
||||
lw=LW_STATS,
|
||||
ls=LS_CLEAN,
|
||||
label="Missing points — clean",
|
||||
),
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
color=COLOR_NEAR,
|
||||
lw=LW_STATS,
|
||||
ls=LS_DEG,
|
||||
label="Near-sensor — degraded",
|
||||
),
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
color=COLOR_NEAR,
|
||||
lw=LW_STATS,
|
||||
ls=LS_CLEAN,
|
||||
label="Near-sensor — clean",
|
||||
),
|
||||
]
|
||||
ax.legend(handles=handles, loc="upper left", fontsize=9, framealpha=0.9)
|
||||
|
||||
# Shared labels / super-title
|
||||
for ax in axes:
|
||||
ax.set_xlabel("Progress through experiment (%)")
|
||||
|
||||
# fig.suptitle(
|
||||
# f"AD Method vs Stats Inference — progress-normalized\n"
|
||||
# f"Transform: z-score normalized to non-degraded experiment | EMA(α={EMA_ALPHA_METHODS})",
|
||||
# fontsize=14,
|
||||
# )
|
||||
fig.tight_layout(rect=[0, 0, 1, 0.99])
|
||||
|
||||
out_name = (
|
||||
f"4up_{EXPERIMENT_CLEAN}_vs_{EXPERIMENT_DEGRADED}"
|
||||
f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}_{y_mode}_methods_vs_stats.png"
|
||||
)
|
||||
fig.savefig(output_datetime_path / out_name, dpi=150)
|
||||
plt.close(fig)
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
# =====================================
|
||||
# Run comparison & save
|
||||
# =====================================
|
||||
plots_made = compare_two_experiments_progress(
|
||||
df=df,
|
||||
experiment_clean=EXPERIMENT_CLEAN,
|
||||
experiment_degraded=EXPERIMENT_DEGRADED,
|
||||
latent_dim=LATENT_DIM,
|
||||
semi_normals=SEMI_NORMALS,
|
||||
semi_anomalous=SEMI_ANOMALOUS,
|
||||
y_mode=Y_MODE,
|
||||
include_stats=True,
|
||||
)
|
||||
|
||||
# =====================================
|
||||
# Preserve latest/, archive/, copy script
|
||||
# =====================================
|
||||
# delete current latest folder
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
# create new latest folder
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# copy contents of output folder to the latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this python script to preserve the code used (best effort)
|
||||
if COPY_SELF:
|
||||
try:
|
||||
shutil.copy2(__file__, output_datetime_path)
|
||||
shutil.copy2(__file__, latest_folder_path)
|
||||
except Exception:
|
||||
(output_datetime_path / "run_config.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"INFERENCE_ROOT": str(INFERENCE_ROOT),
|
||||
"CACHE_PATH": str(CACHE_PATH),
|
||||
"ALL_DATA_PATH": str(ALL_DATA_PATH),
|
||||
"EXPERIMENT_CLEAN": EXPERIMENT_CLEAN,
|
||||
"EXPERIMENT_DEGRADED": EXPERIMENT_DEGRADED,
|
||||
"LATENT_DIM": LATENT_DIM,
|
||||
"SEMI_NORMALS": SEMI_NORMALS,
|
||||
"SEMI_ANOMALOUS": SEMI_ANOMALOUS,
|
||||
"Y_MODE": Y_MODE,
|
||||
"PROGRESS_BINS": PROGRESS_BINS,
|
||||
"FPS": FPS,
|
||||
"EMA_ALPHA_METHODS": EMA_ALPHA_METHODS,
|
||||
"EMA_ALPHA_STATS": EMA_ALPHA_STATS,
|
||||
"DATA_RESOLUTION": DATA_RESOLUTION,
|
||||
"timestamp": datetime_folder_name,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
|
||||
# move output date folder to archive
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
|
||||
print(f"Done. Wrote {plots_made} figure(s). Archived under: {archive_folder_path}")
|
||||
@@ -7,10 +7,10 @@ from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from matplotlib.lines import Line2D
|
||||
|
||||
# CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
|
||||
from load_results import load_results_dataframe
|
||||
from matplotlib.lines import Line2D
|
||||
|
||||
# ----------------------------
|
||||
# Config
|
||||
@@ -26,6 +26,10 @@ SEMI_ANOMALOUS = 10
|
||||
|
||||
# Which evaluation columns to plot
|
||||
EVALS = ["exp_based", "manual_based"]
|
||||
EVALS_LABELS = {
|
||||
"exp_based": "Experiment-Label-Based",
|
||||
"manual_based": "Manually-Labeled",
|
||||
}
|
||||
|
||||
# Latent dimensions to show as 7 subplots
|
||||
LATENT_DIMS = [32, 64, 128, 256, 512, 768, 1024]
|
||||
@@ -157,7 +161,7 @@ def _ensure_dim_axes(fig_title: str):
|
||||
fig, axes = plt.subplots(
|
||||
nrows=4, ncols=2, figsize=(12, 16), constrained_layout=True
|
||||
)
|
||||
fig.suptitle(fig_title, fontsize=14)
|
||||
# fig.suptitle(fig_title, fontsize=14)
|
||||
axes = axes.ravel()
|
||||
return fig, axes
|
||||
|
||||
@@ -188,7 +192,7 @@ def plot_grid_from_df(
|
||||
Create a 2x4 grid of subplots, one per latent dim; 8th panel holds legend.
|
||||
kind: 'roc' or 'prc'
|
||||
"""
|
||||
fig_title = f"{kind.upper()} — {eval_type} (semi = {semi_normals}/{semi_anomalous})"
|
||||
fig_title = f"{kind.upper()} — {EVALS_LABELS[eval_type]} (Semi-Labeling Regime = {semi_normals}/{semi_anomalous})"
|
||||
fig, axes = _ensure_dim_axes(fig_title)
|
||||
|
||||
# plotting order & colors
|
||||
@@ -209,11 +213,13 @@ def plot_grid_from_df(
|
||||
legend_labels = []
|
||||
have_legend = False
|
||||
|
||||
letters = ["a", "b", "c", "d", "e", "f", "g", "h"]
|
||||
|
||||
for i, dim in enumerate(LATENT_DIMS):
|
||||
if i >= 7:
|
||||
break # last slot reserved for legend
|
||||
ax = axes[i]
|
||||
ax.set_title(f"latent_dim = {dim}")
|
||||
ax.set_title(f"({letters[i]}) Latent Dim. = {dim}")
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
if kind == "roc":
|
||||
|
||||
505
tools/plot_scripts/results_latent_space_tables.py
Normal file
@@ -0,0 +1,505 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
# CHANGE THIS IMPORT IF YOUR LOADER MODULE IS NAMED DIFFERENTLY
|
||||
from load_results import load_results_dataframe
|
||||
|
||||
# ----------------------------
|
||||
# Config
|
||||
# ----------------------------
|
||||
ROOT = Path("/home/fedex/mt/results/copy") # experiments root you pass to the loader
|
||||
OUTPUT_DIR = Path("/home/fedex/mt/plots/results_latent_space_tables")
|
||||
|
||||
# Semi-labeling regimes (semi_normals, semi_anomalous) in display order
|
||||
SEMI_LABELING_REGIMES: list[tuple[int, int]] = [(0, 0), (50, 10), (500, 100)]
|
||||
|
||||
# Both evals are shown side-by-side in one table
|
||||
EVALS_BOTH: tuple[str, str] = ("exp_based", "manual_based")
|
||||
|
||||
# Row order (latent dims)
|
||||
LATENT_DIMS: list[int] = [32, 64, 128, 256, 512, 768, 1024]
|
||||
|
||||
# Column order (method shown to the user)
|
||||
# We split DeepSAD into the two network backbones, like your plots.
|
||||
METHOD_COLUMNS = [
|
||||
("deepsad", "LeNet"), # DeepSAD (LeNet)
|
||||
("deepsad", "Efficient"), # DeepSAD (Efficient)
|
||||
("isoforest", "Efficient"), # IsolationForest (Efficient baseline)
|
||||
("ocsvm", "Efficient"), # OC-SVM (Efficient baseline)
|
||||
]
|
||||
|
||||
# Formatting
|
||||
DECIMALS = 3 # cells look like 1.000 or 0.928 (3 decimals)
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Helpers
|
||||
# ----------------------------
|
||||
|
||||
|
||||
def _fmt_mean_std(mean: float | None, std: float | None) -> str:
|
||||
"""Format mean ± std with 3 decimals (leading zero), or '--' if missing."""
|
||||
if mean is None or not (mean == mean): # NaN check
|
||||
return "--"
|
||||
if std is None or not (std == std):
|
||||
return f"{mean:.3f}"
|
||||
return f"{mean:.3f}$\\,\\pm\\,{std:.3f}$"
|
||||
|
||||
|
||||
def _with_net_label(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""Add a canonical 'net_label' column like the plotting script (LeNet/Efficient/fallback)."""
|
||||
return df.with_columns(
|
||||
pl.when(
|
||||
pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("lenet")
|
||||
)
|
||||
.then(pl.lit("LeNet"))
|
||||
.when(
|
||||
pl.col("network").cast(pl.Utf8).str.to_lowercase().str.contains("efficient")
|
||||
)
|
||||
.then(pl.lit("Efficient"))
|
||||
.otherwise(pl.col("network").cast(pl.Utf8))
|
||||
.alias("net_label")
|
||||
)
|
||||
|
||||
|
||||
def _filter_base(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""Restrict to valid dims/models and needed columns (no eval/regime filtering here)."""
|
||||
return df.filter(
|
||||
(pl.col("latent_dim").is_in(LATENT_DIMS))
|
||||
& (pl.col("model").is_in(["deepsad", "isoforest", "ocsvm"]))
|
||||
& (pl.col("eval").is_in(list(EVALS_BOTH)))
|
||||
).select(
|
||||
"model",
|
||||
"net_label",
|
||||
"latent_dim",
|
||||
"fold",
|
||||
"ap",
|
||||
"eval",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Cell:
|
||||
mean: float | None
|
||||
std: float | None
|
||||
|
||||
|
||||
def _compute_cells(df: pl.DataFrame) -> dict[tuple[str, int, str, str, int, int], Cell]:
|
||||
"""
|
||||
Compute per-(eval, latent_dim, model, net_label, semi_normals, semi_anomalous)
|
||||
mean/std for AP across folds.
|
||||
"""
|
||||
if df.is_empty():
|
||||
return {}
|
||||
|
||||
# For baselines (isoforest/ocsvm) constrain to Efficient backbone
|
||||
df = df.filter(
|
||||
pl.when(pl.col("model").is_in(["isoforest", "ocsvm"]))
|
||||
.then(pl.col("net_label") == "Efficient")
|
||||
.otherwise(True)
|
||||
)
|
||||
|
||||
agg = (
|
||||
df.group_by(
|
||||
[
|
||||
"eval",
|
||||
"latent_dim",
|
||||
"model",
|
||||
"net_label",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
]
|
||||
)
|
||||
.agg(pl.col("ap").mean().alias("mean_ap"), pl.col("ap").std().alias("std_ap"))
|
||||
.to_dicts()
|
||||
)
|
||||
|
||||
out: dict[tuple[str, int, str, str, int, int], Cell] = {}
|
||||
for row in agg:
|
||||
key = (
|
||||
str(row["eval"]),
|
||||
int(row["latent_dim"]),
|
||||
str(row["model"]),
|
||||
str(row["net_label"]),
|
||||
int(row["semi_normals"]),
|
||||
int(row["semi_anomalous"]),
|
||||
)
|
||||
out[key] = Cell(mean=row.get("mean_ap"), std=row.get("std_ap"))
|
||||
return out
|
||||
|
||||
|
||||
def method_label(model: str, net_label: str) -> str:
|
||||
"""Map (model, net_label) to the four method names used in headers/caption."""
|
||||
if model == "deepsad" and net_label == "LeNet":
|
||||
return "DeepSAD (LeNet)"
|
||||
if model == "deepsad" and net_label == "Efficient":
|
||||
return "DeepSAD (Efficient)"
|
||||
if model == "isoforest":
|
||||
return "IsoForest"
|
||||
if model == "ocsvm":
|
||||
return "OC-SVM"
|
||||
# ignore anything else (e.g., other backbones)
|
||||
return ""
|
||||
|
||||
|
||||
def per_method_median_std_from_cells(
|
||||
cells: dict[tuple[str, int, str, str, int, int], Cell],
|
||||
) -> dict[str, float]:
|
||||
"""Compute the median std across all cells, per method."""
|
||||
stds_by_method: dict[str, list[float]] = {
|
||||
"DeepSAD (LeNet)": [],
|
||||
"DeepSAD (Efficient)": [],
|
||||
"IsoForest": [],
|
||||
"OC-SVM": [],
|
||||
}
|
||||
|
||||
for key, cell in cells.items():
|
||||
(ev, dim, model, net, semi_n, semi_a) = key
|
||||
name = method_label(model, net)
|
||||
if name and (cell.std is not None) and (cell.std == cell.std): # not NaN
|
||||
stds_by_method[name].append(cell.std)
|
||||
|
||||
return {
|
||||
name: float(np.median(vals)) if vals else float("nan")
|
||||
for name, vals in stds_by_method.items()
|
||||
}
|
||||
|
||||
|
||||
def per_method_max_std_from_cells(
|
||||
cells: dict[tuple[str, int, str, str, int, int], Cell],
|
||||
) -> tuple[dict[str, float], dict[str, tuple]]:
|
||||
"""
|
||||
Scan the aggregated 'cells' and return:
|
||||
- max_std_by_method: dict {"DeepSAD (LeNet)": 0.037, ...}
|
||||
- argmax_key_by_method: which cell (eval, dim, model, net, semi_n, semi_a) produced that max
|
||||
Only considers the four methods shown in the table.
|
||||
"""
|
||||
max_std_by_method: dict[str, float] = {
|
||||
"DeepSAD (LeNet)": float("nan"),
|
||||
"DeepSAD (Efficient)": float("nan"),
|
||||
"IsoForest": float("nan"),
|
||||
"OC-SVM": float("nan"),
|
||||
}
|
||||
argmax_key_by_method: dict[str, tuple] = {}
|
||||
|
||||
for key, cell in cells.items():
|
||||
(ev, dim, model, net, semi_n, semi_a) = key
|
||||
name = method_label(model, net)
|
||||
if name == "" or cell.std is None or not (cell.std == cell.std): # empty/NaN
|
||||
continue
|
||||
cur = max_std_by_method.get(name, float("nan"))
|
||||
if (cur != cur) or (cell.std > cur): # handle NaN initial
|
||||
max_std_by_method[name] = cell.std
|
||||
argmax_key_by_method[name] = key
|
||||
|
||||
# Replace remaining NaNs with 0.0 for nice formatting
|
||||
for k, v in list(max_std_by_method.items()):
|
||||
if not (v == v): # NaN
|
||||
max_std_by_method[k] = 0.0
|
||||
|
||||
return max_std_by_method, argmax_key_by_method
|
||||
|
||||
|
||||
def _fmt_val(val: float | None) -> str:
|
||||
"""
|
||||
Format value as:
|
||||
- '--' if None/NaN
|
||||
- '1.0' if exactly 1 (within 1e-9)
|
||||
- '.xx' otherwise (2 decimals, no leading 0)
|
||||
"""
|
||||
if val is None or not (val == val): # None or NaN
|
||||
return "--"
|
||||
if abs(val - 1.0) < 1e-9:
|
||||
return "1.0"
|
||||
return f"{val:.2f}".lstrip("0")
|
||||
|
||||
|
||||
def _fmt_mean(mean: float | None) -> str:
|
||||
return "--" if (mean is None or not (mean == mean)) else f"{mean:.{DECIMALS}f}"
|
||||
|
||||
|
||||
def _bold_best_mask_display(values: list[float | None], decimals: int) -> list[bool]:
|
||||
"""
|
||||
Bolding mask based on *displayed* precision. Any entries that round (via f-string)
|
||||
to the maximum at 'decimals' places are bolded (ties bolded).
|
||||
"""
|
||||
|
||||
def disp(v: float | None) -> float | None:
|
||||
if v is None or not (v == v):
|
||||
return None
|
||||
return float(f"{v:.{decimals}f}")
|
||||
|
||||
rounded = [disp(v) for v in values]
|
||||
finite = [v for v in rounded if v is not None]
|
||||
if not finite:
|
||||
return [False] * len(values)
|
||||
maxv = max(finite)
|
||||
return [(v is not None and v == maxv) for v in rounded]
|
||||
|
||||
|
||||
def _build_exp_based_table(
|
||||
cells: dict[tuple[str, int, str, str, int, int], Cell],
|
||||
*,
|
||||
semi_labeling_regimes: list[tuple[int, int]],
|
||||
) -> str:
|
||||
"""
|
||||
Build LaTeX table with mean ± std values for experiment-based evaluation only.
|
||||
"""
|
||||
|
||||
header_cols = [
|
||||
r"\rotheader{DeepSAD\\(LeNet)}",
|
||||
r"\rotheader{DeepSAD\\(Efficient)}",
|
||||
r"\rotheader{IsoForest}",
|
||||
r"\rotheader{OC-SVM}",
|
||||
]
|
||||
|
||||
lines: list[str] = []
|
||||
lines.append(r"\begin{table}[t]")
|
||||
lines.append(r"\centering")
|
||||
lines.append(r"\setlength{\tabcolsep}{4pt}")
|
||||
lines.append(r"\renewcommand{\arraystretch}{1.2}")
|
||||
lines.append(r"\begin{tabularx}{\textwidth}{c*{4}{Y}}")
|
||||
lines.append(r"\toprule")
|
||||
lines.append(r"Latent Dim. & " + " & ".join(header_cols) + r" \\")
|
||||
lines.append(r"\midrule")
|
||||
|
||||
for idx, (semi_n, semi_a) in enumerate(semi_labeling_regimes):
|
||||
# regime label row
|
||||
lines.append(
|
||||
rf"\multicolumn{{5}}{{l}}{{\textbf{{Labeling regime: }}\(\mathbf{{{semi_n}/{semi_a}}}\)}} \\"
|
||||
)
|
||||
lines.append(r"\addlinespace[2pt]")
|
||||
|
||||
for dim in LATENT_DIMS:
|
||||
row_vals = []
|
||||
for model, net in METHOD_COLUMNS:
|
||||
key = ("exp_based", dim, model, net, semi_n, semi_a)
|
||||
cell = cells.get(key, Cell(None, None))
|
||||
row_vals.append(_fmt_mean_std(cell.mean, cell.std))
|
||||
|
||||
lines.append(f"{dim} & " + " & ".join(row_vals) + r" \\")
|
||||
|
||||
if idx < len(semi_labeling_regimes) - 1:
|
||||
lines.append(r"\midrule")
|
||||
|
||||
lines.append(r"\bottomrule")
|
||||
lines.append(r"\end{tabularx}")
|
||||
lines.append(
|
||||
r"\caption{AP means $\pm$ std across 5 folds for experiment-based evaluation only, grouped by labeling regime.}"
|
||||
)
|
||||
lines.append(r"\end{table}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_single_table(
|
||||
cells: dict[tuple[str, int, str, str, int, int], Cell],
|
||||
*,
|
||||
semi_labeling_regimes: list[tuple[int, int]],
|
||||
) -> tuple[str, float | None]:
|
||||
"""
|
||||
Build the LaTeX table string with grouped headers and regime blocks.
|
||||
Returns (latex, max_std_overall).
|
||||
"""
|
||||
|
||||
# Rotated header labels (90° slanted)
|
||||
header_cols = [
|
||||
r"\rotheader{DeepSAD\\(LeNet)}",
|
||||
r"\rotheader{DeepSAD\\(Efficient)}",
|
||||
r"\rotheader{IsoForest}",
|
||||
r"\rotheader{OC-SVM}",
|
||||
]
|
||||
|
||||
# Track max std across all cells
|
||||
max_std: float | None = None
|
||||
|
||||
def push_std(std_val: float | None):
|
||||
nonlocal max_std
|
||||
if std_val is None or not (std_val == std_val):
|
||||
return
|
||||
if max_std is None or std_val > max_std:
|
||||
max_std = std_val
|
||||
|
||||
lines: list[str] = []
|
||||
|
||||
# Table preamble / structure
|
||||
lines.append(r"\begin{table}[t]")
|
||||
lines.append(r"\centering")
|
||||
lines.append(r"\setlength{\tabcolsep}{4pt}")
|
||||
lines.append(r"\renewcommand{\arraystretch}{1.2}")
|
||||
# Vertical rule between the two groups for data/header rows:
|
||||
lines.append(r"\begin{tabularx}{\textwidth}{c*{4}{Y}|*{4}{Y}}")
|
||||
lines.append(r"\toprule")
|
||||
lines.append(
|
||||
r" & \multicolumn{4}{c}{Experiment-based eval.} & \multicolumn{4}{c}{Handlabeled eval.} \\"
|
||||
)
|
||||
lines.append(r"\cmidrule(lr){2-5} \cmidrule(lr){6-9}")
|
||||
lines.append(
|
||||
r"Latent Dim. & "
|
||||
+ " & ".join(header_cols)
|
||||
+ " & "
|
||||
+ " & ".join(header_cols)
|
||||
+ r" \\"
|
||||
)
|
||||
lines.append(r"\midrule")
|
||||
|
||||
# Iterate regimes and rows
|
||||
for idx, (semi_n, semi_a) in enumerate(semi_labeling_regimes):
|
||||
# Regime label row (multicolumn suppresses the vertical bar in this row)
|
||||
lines.append(
|
||||
rf"\multicolumn{{9}}{{l}}{{\textbf{{Labeling regime: }}\(\mathbf{{{semi_n}/{semi_a}}}\) "
|
||||
rf"\textit{{(normal/anomalous samples labeled)}}}} \\"
|
||||
)
|
||||
lines.append(r"\addlinespace[2pt]")
|
||||
|
||||
for dim in LATENT_DIMS:
|
||||
# Values in order: left group (exp_based) 4 cols, right group (manual_based) 4 cols
|
||||
means_left: list[float | None] = []
|
||||
means_right: list[float | None] = []
|
||||
cell_strs_left: list[str] = []
|
||||
cell_strs_right: list[str] = []
|
||||
|
||||
# Left group: exp_based
|
||||
eval_type = EVALS_BOTH[0]
|
||||
for model, net in METHOD_COLUMNS:
|
||||
key = (eval_type, dim, model, net, semi_n, semi_a)
|
||||
cell = cells.get(key, Cell(None, None))
|
||||
means_left.append(cell.mean)
|
||||
cell_strs_left.append(_fmt_mean(cell.mean))
|
||||
# mean_str = _fmt_val(cell.mean)
|
||||
# std_str = _fmt_val(cell.std)
|
||||
# if mean_str == "--":
|
||||
# cell_strs_left.append("--")
|
||||
# else:
|
||||
# cell_strs_left.append(f"{mean_str} $\\textpm$ {std_str}")
|
||||
push_std(cell.std)
|
||||
|
||||
# Right group: manual_based
|
||||
eval_type = EVALS_BOTH[1]
|
||||
for model, net in METHOD_COLUMNS:
|
||||
key = (eval_type, dim, model, net, semi_n, semi_a)
|
||||
cell = cells.get(key, Cell(None, None))
|
||||
means_right.append(cell.mean)
|
||||
cell_strs_right.append(_fmt_mean(cell.mean))
|
||||
# mean_str = _fmt_val(cell.mean)
|
||||
# std_str = _fmt_val(cell.std)
|
||||
# if mean_str == "--":
|
||||
# cell_strs_right.append("--")
|
||||
# else:
|
||||
# cell_strs_right.append(f"{mean_str} $\\textpm$ {std_str}")
|
||||
push_std(cell.std)
|
||||
|
||||
# Bolding per group based on displayed precision
|
||||
mask_left = _bold_best_mask_display(means_left, DECIMALS)
|
||||
mask_right = _bold_best_mask_display(means_right, DECIMALS)
|
||||
|
||||
pretty_left = [
|
||||
(r"\textbf{" + s + "}") if (do_bold and s != "--") else s
|
||||
for s, do_bold in zip(cell_strs_left, mask_left)
|
||||
]
|
||||
pretty_right = [
|
||||
(r"\textbf{" + s + "}") if (do_bold and s != "--") else s
|
||||
for s, do_bold in zip(cell_strs_right, mask_right)
|
||||
]
|
||||
|
||||
# Join with the vertical bar between groups automatically handled by column spec
|
||||
lines.append(
|
||||
f"{dim} & "
|
||||
+ " & ".join(pretty_left)
|
||||
+ " & "
|
||||
+ " & ".join(pretty_right)
|
||||
+ r" \\"
|
||||
)
|
||||
|
||||
# Separator between regime blocks (but not after the last one)
|
||||
if idx < len(semi_labeling_regimes) - 1:
|
||||
lines.append(r"\midrule")
|
||||
|
||||
lines.append(r"\bottomrule")
|
||||
lines.append(r"\end{tabularx}")
|
||||
|
||||
# Compute per-method max std across everything included in the table
|
||||
# max_std_by_method, argmax_key = per_method_max_std_from_cells(cells)
|
||||
median_std_by_method = per_method_median_std_from_cells(cells)
|
||||
|
||||
# Optional: print where each max came from (helps verify)
|
||||
for name, v in median_std_by_method.items():
|
||||
print(f"[max-std] {name}: {v:.3f}")
|
||||
|
||||
cap_parts = []
|
||||
for name in ["DeepSAD (LeNet)", "DeepSAD (Efficient)", "IsoForest", "OC-SVM"]:
|
||||
v = median_std_by_method.get(name, 0.0)
|
||||
cap_parts.append(f"{name} {v:.3f}")
|
||||
cap_str = "; ".join(cap_parts)
|
||||
|
||||
lines.append(
|
||||
rf"\caption{{AP means across 5 folds for both evaluations, grouped by labeling regime. "
|
||||
rf"Maximum observed standard deviation per method (not shown in table): {cap_str}.}}"
|
||||
)
|
||||
lines.append(r"\end{table}")
|
||||
|
||||
return "\n".join(lines), max_std
|
||||
|
||||
|
||||
def main():
|
||||
# Load full results DF (cache behavior handled by your loader)
|
||||
df = load_results_dataframe(ROOT, allow_cache=True)
|
||||
df = _with_net_label(df)
|
||||
df = _filter_base(df)
|
||||
|
||||
# Prepare output dirs
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
archive_dir = OUTPUT_DIR / "archive"
|
||||
archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts_dir = archive_dir / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
ts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Pre-compute aggregated cells (mean/std) for all evals/regimes
|
||||
cells = _compute_cells(df)
|
||||
|
||||
# Build the single big table
|
||||
tex, max_std = _build_single_table(
|
||||
cells, semi_labeling_regimes=SEMI_LABELING_REGIMES
|
||||
)
|
||||
|
||||
out_name = "ap_table_all_evals_all_regimes.tex"
|
||||
out_path = ts_dir / out_name
|
||||
out_path.write_text(tex, encoding="utf-8")
|
||||
|
||||
# Build experiment-based table with mean ± std
|
||||
tex_exp = _build_exp_based_table(cells, semi_labeling_regimes=SEMI_LABELING_REGIMES)
|
||||
|
||||
out_name_exp = "ap_table_exp_based_mean_std.tex"
|
||||
out_path_exp = ts_dir / out_name_exp
|
||||
out_path_exp.write_text(tex_exp, encoding="utf-8")
|
||||
|
||||
# Copy this script to preserve the code used for the outputs
|
||||
script_path = Path(__file__)
|
||||
shutil.copy2(script_path, ts_dir / script_path.name)
|
||||
|
||||
# Mirror latest
|
||||
latest = OUTPUT_DIR / "latest"
|
||||
latest.mkdir(exist_ok=True, parents=True)
|
||||
for f in latest.iterdir():
|
||||
if f.is_file():
|
||||
f.unlink()
|
||||
for f in ts_dir.iterdir():
|
||||
if f.is_file():
|
||||
shutil.copy2(f, latest / f.name)
|
||||
|
||||
print(f"Saved table to: {ts_dir}")
|
||||
print(f"Also updated: {latest}")
|
||||
print(f" - {out_name}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -8,11 +8,11 @@ from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from matplotlib.lines import Line2D
|
||||
from scipy.stats import sem, t
|
||||
|
||||
# CHANGE THIS IMPORT IF YOUR LOADER MODULE NAME IS DIFFERENT
|
||||
from load_results import load_results_dataframe
|
||||
from matplotlib.lines import Line2D
|
||||
from scipy.stats import sem, t
|
||||
|
||||
# ---------------------------------
|
||||
# Config
|
||||
@@ -23,6 +23,10 @@ OUTPUT_DIR = Path("/home/fedex/mt/plots/results_semi_labels_comparison")
|
||||
LATENT_DIMS = [32, 64, 128, 256, 512, 768, 1024]
|
||||
SEMI_REGIMES = [(0, 0), (50, 10), (500, 100)]
|
||||
EVALS = ["exp_based", "manual_based"]
|
||||
EVALS_LABELS = {
|
||||
"exp_based": "Experiment-Based Labels",
|
||||
"manual_based": "Manually-Labeled",
|
||||
}
|
||||
|
||||
# Interp grids
|
||||
ROC_GRID = np.linspace(0.0, 1.0, 200)
|
||||
@@ -30,6 +34,10 @@ PRC_GRID = np.linspace(0.0, 1.0, 200)
|
||||
|
||||
# Baselines are duplicated across nets; use Efficient-only to avoid repetition
|
||||
BASELINE_NET = "Efficient"
|
||||
BASELINE_LABELS = {
|
||||
"isoforest": "Isolation Forest",
|
||||
"ocsvm": "One-Class SVM",
|
||||
}
|
||||
|
||||
# Colors/styles
|
||||
COLOR_BASELINES = {
|
||||
@@ -147,12 +155,8 @@ def _select_rows(
|
||||
return df.filter(pl.all_horizontal(exprs))
|
||||
|
||||
|
||||
def _auc_list(sub: pl.DataFrame) -> list[float]:
|
||||
return [x for x in sub.select("auc").to_series().to_list() if x is not None]
|
||||
|
||||
|
||||
def _ap_list(sub: pl.DataFrame) -> list[float]:
|
||||
return [x for x in sub.select("ap").to_series().to_list() if x is not None]
|
||||
def _auc_list(sub: pl.DataFrame, kind: str) -> list[float]:
|
||||
return [x for x in sub.select(f"{kind}_auc").to_series().to_list() if x is not None]
|
||||
|
||||
|
||||
def _plot_panel(
|
||||
@@ -165,7 +169,7 @@ def _plot_panel(
|
||||
kind: str,
|
||||
):
|
||||
"""
|
||||
Plot one panel: DeepSAD (net_for_deepsad) with 3 regimes + baselines (from Efficient).
|
||||
Plot one panel: DeepSAD (net_for_deepsad) with 3 regimes + Baselines (from Efficient).
|
||||
Legend entries include mean±CI of AUC/AP.
|
||||
"""
|
||||
ax.grid(True, alpha=0.3)
|
||||
@@ -200,9 +204,9 @@ def _plot_panel(
|
||||
continue
|
||||
|
||||
# Metric for legend
|
||||
metric_vals = _auc_list(sub_b) if kind == "roc" else _ap_list(sub_b)
|
||||
metric_vals = _auc_list(sub_b, kind)
|
||||
m, ci = mean_ci(metric_vals)
|
||||
lab = f"{model} ({'AUC' if kind == 'roc' else 'AP'}={m:.3f}±{ci:.3f})"
|
||||
lab = f"{BASELINE_LABELS[model]}\n(AUC={m:.3f}±{ci:.3f})"
|
||||
|
||||
color = COLOR_BASELINES[model]
|
||||
h = ax.plot(grid, mean_y, lw=2, color=color, label=lab)[0]
|
||||
@@ -230,9 +234,9 @@ def _plot_panel(
|
||||
if np.all(np.isnan(mean_y)):
|
||||
continue
|
||||
|
||||
metric_vals = _auc_list(sub_d) if kind == "roc" else _ap_list(sub_d)
|
||||
metric_vals = _auc_list(sub_d, kind)
|
||||
m, ci = mean_ci(metric_vals)
|
||||
lab = f"DeepSAD {net_for_deepsad} — semi {sn}/{sa} ({'AUC' if kind == 'roc' else 'AP'}={m:.3f}±{ci:.3f})"
|
||||
lab = f"DeepSAD {net_for_deepsad} — {sn}/{sa}\n(AUC={m:.3f}±{ci:.3f})"
|
||||
|
||||
color = COLOR_REGIMES[regime]
|
||||
ls = LINESTYLES[regime]
|
||||
@@ -246,7 +250,7 @@ def _plot_panel(
|
||||
ax.plot([0, 1], [0, 1], "k--", alpha=0.6, label="Chance")
|
||||
|
||||
# Legend
|
||||
ax.legend(loc="lower right", fontsize=9, frameon=True)
|
||||
ax.legend(loc="upper right", fontsize=9, frameon=True)
|
||||
|
||||
|
||||
def make_figures_for_dim(
|
||||
@@ -254,9 +258,11 @@ def make_figures_for_dim(
|
||||
):
|
||||
# ROC: 2×1
|
||||
fig_roc, axes = plt.subplots(
|
||||
nrows=1, ncols=2, figsize=(14, 5), constrained_layout=True
|
||||
nrows=2, ncols=1, figsize=(7, 10), constrained_layout=True
|
||||
)
|
||||
fig_roc.suptitle(f"ROC — {eval_type} — latent_dim={latent_dim}", fontsize=14)
|
||||
# fig_roc.suptitle(
|
||||
# f"ROC — {EVALS_LABELS[eval_type]} — Latent Dim.={latent_dim}", fontsize=14
|
||||
# )
|
||||
|
||||
_plot_panel(
|
||||
axes[0],
|
||||
@@ -266,7 +272,7 @@ def make_figures_for_dim(
|
||||
latent_dim=latent_dim,
|
||||
kind="roc",
|
||||
)
|
||||
axes[0].set_title("DeepSAD (LeNet) + baselines")
|
||||
axes[0].set_title("(a) DeepSAD (LeNet) + Baselines")
|
||||
|
||||
_plot_panel(
|
||||
axes[1],
|
||||
@@ -276,7 +282,7 @@ def make_figures_for_dim(
|
||||
latent_dim=latent_dim,
|
||||
kind="roc",
|
||||
)
|
||||
axes[1].set_title("DeepSAD (Efficient) + baselines")
|
||||
axes[1].set_title("(b) DeepSAD (Efficient) + Baselines")
|
||||
|
||||
out_roc = out_dir / f"roc_{latent_dim}_{eval_type}.png"
|
||||
fig_roc.savefig(out_roc, dpi=150, bbox_inches="tight")
|
||||
@@ -284,9 +290,11 @@ def make_figures_for_dim(
|
||||
|
||||
# PRC: 2×1
|
||||
fig_prc, axes = plt.subplots(
|
||||
nrows=1, ncols=2, figsize=(14, 5), constrained_layout=True
|
||||
nrows=2, ncols=1, figsize=(7, 10), constrained_layout=True
|
||||
)
|
||||
fig_prc.suptitle(f"PRC — {eval_type} — latent_dim={latent_dim}", fontsize=14)
|
||||
# fig_prc.suptitle(
|
||||
# f"PRC — {EVALS_LABELS[eval_type]} — Latent Dim.={latent_dim}", fontsize=14
|
||||
# )
|
||||
|
||||
_plot_panel(
|
||||
axes[0],
|
||||
@@ -296,7 +304,7 @@ def make_figures_for_dim(
|
||||
latent_dim=latent_dim,
|
||||
kind="prc",
|
||||
)
|
||||
axes[0].set_title("DeepSAD (LeNet) + baselines")
|
||||
axes[0].set_title("(a)")
|
||||
|
||||
_plot_panel(
|
||||
axes[1],
|
||||
@@ -306,7 +314,7 @@ def make_figures_for_dim(
|
||||
latent_dim=latent_dim,
|
||||
kind="prc",
|
||||
)
|
||||
axes[1].set_title("DeepSAD (Efficient) + baselines")
|
||||
axes[1].set_title("(b)")
|
||||
|
||||
out_prc = out_dir / f"prc_{latent_dim}_{eval_type}.png"
|
||||
fig_prc.savefig(out_prc, dpi=150, bbox_inches="tight")
|
||||
|
||||
704
tools/plot_scripts/setup_runtime_tables.py
Normal file
@@ -0,0 +1,704 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import polars as pl
|
||||
from load_results import (
|
||||
load_pretraining_results_dataframe,
|
||||
load_results_dataframe,
|
||||
)
|
||||
|
||||
# ----------------------------
|
||||
# Config
|
||||
# ----------------------------
|
||||
RESULTS_ROOT = Path("/home/fedex/mt/results/done") # folder with experiment subdirs
|
||||
OUTPUT_DIR = Path("/home/fedex/mt/plots/setup_runtime_tables") # where .tex goes
|
||||
|
||||
# If you want to optionally prefer a specific network label for baselines in column names,
|
||||
# set to a substring to detect (e.g. "efficient"). If None, keep network as-is.
|
||||
BASELINE_NETWORK_HINT: Optional[str] = None # e.g., "efficient" or None
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Helpers
|
||||
# ----------------------------
|
||||
def _net_label_for_display(net: str | None) -> str:
|
||||
s = (net or "").lower()
|
||||
if "effic" in s:
|
||||
return "Efficient"
|
||||
if "lenet" in s:
|
||||
return "LeNet"
|
||||
return net or ""
|
||||
|
||||
|
||||
def _fmt_mean_std_n(
|
||||
mean: float | None, std: float | None, n: int | None, unit: str = ""
|
||||
) -> str:
|
||||
if mean is None or (isinstance(mean, float) and (np.isnan(mean) or np.isinf(mean))):
|
||||
return "-"
|
||||
base = f"{mean:.2f}"
|
||||
if std is not None and not (
|
||||
isinstance(std, float) and (np.isnan(std) or np.isinf(std))
|
||||
):
|
||||
base = f"{base} ± {std:.2f}"
|
||||
if unit:
|
||||
base = f"{base} {unit}"
|
||||
if n is not None and n > 0:
|
||||
base = f"{base} (n={n})"
|
||||
return base
|
||||
|
||||
|
||||
def _fmt_pair(n: int, m: int) -> str:
|
||||
return f"{n}/{m}"
|
||||
|
||||
|
||||
def _fmt_mean_std(mean: float | None, std: float | None, n: int | None) -> str:
|
||||
if mean is None or (isinstance(mean, float) and (np.isnan(mean) or np.isinf(mean))):
|
||||
return "-"
|
||||
if std is None or (isinstance(std, float) and (np.isnan(std) or np.isinf(std))):
|
||||
return f"{mean:.2f}"
|
||||
if n is None or n < 1:
|
||||
return f"{mean:.2f} ± {std:.2f}"
|
||||
return f"{mean:.2f} ± {std:.2f} (n={n})"
|
||||
|
||||
|
||||
def _parse_cfg(cfg_json: Optional[str]) -> Dict[str, Any]:
|
||||
if not cfg_json:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(cfg_json)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _key_params(model: str, cfg: Dict[str, Any]) -> str:
|
||||
"""Compact, model-specific parameter string for the table."""
|
||||
if model == "deepsad":
|
||||
bs = cfg.get("batch_size")
|
||||
ne = cfg.get("n_epochs")
|
||||
lr = cfg.get("lr")
|
||||
wd = cfg.get("weight_decay")
|
||||
return f"bs={bs}, epochs={ne}, lr={lr}, wd={wd}"
|
||||
if model == "isoforest":
|
||||
est = cfg.get("isoforest_n_estimators")
|
||||
ms = cfg.get("isoforest_max_samples")
|
||||
cont = cfg.get("isoforest_contamination")
|
||||
return f"n_estimators={est}, max_samples={ms}, cont={cont}"
|
||||
if model == "ocsvm":
|
||||
ker = cfg.get("ocsvm_kernel")
|
||||
nu = cfg.get("ocsvm_nu")
|
||||
return f"kernel={ker}, nu={nu}"
|
||||
return "-"
|
||||
|
||||
|
||||
def _method_col_name(model: str, network: str) -> str:
|
||||
"""
|
||||
Column heading for pivot tables:
|
||||
- deepsad carries the network (e.g., 'DeepSAD / LeNet')
|
||||
- baselines carry their own model name; optionally annotate network
|
||||
"""
|
||||
label = model.lower()
|
||||
if label == "deepsad":
|
||||
return f"DeepSAD / {network}"
|
||||
# baselines; optionally simplify/standardize network name
|
||||
if (
|
||||
BASELINE_NETWORK_HINT
|
||||
and BASELINE_NETWORK_HINT.lower() not in (network or "").lower()
|
||||
):
|
||||
# If you want to collapse baseline duplicates to a single name, you can force it here
|
||||
return model.capitalize()
|
||||
# Otherwise, keep network variant explicit
|
||||
return f"{model.capitalize()} / {network}"
|
||||
|
||||
|
||||
def _prepare_per_fold_metrics(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""
|
||||
Returns one row per (folder, model, fold) with:
|
||||
- train_time, test_time
|
||||
- n_test (len(scores))
|
||||
- n_epochs (from config_json; DeepSAD only)
|
||||
- latency_ms = 1000 * test_time / n_test
|
||||
- time_per_epoch = train_time / n_epochs (DeepSAD only)
|
||||
"""
|
||||
base = (
|
||||
df.select(
|
||||
"folder",
|
||||
"network",
|
||||
"model",
|
||||
"latent_dim",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
"fold",
|
||||
"train_time",
|
||||
"test_time",
|
||||
"scores",
|
||||
"config_json",
|
||||
)
|
||||
.with_columns(
|
||||
n_test=pl.col("scores").list.len(),
|
||||
n_epochs=pl.col("config_json")
|
||||
.str.json_path_match("$.n_epochs")
|
||||
.cast(pl.Int64),
|
||||
)
|
||||
.drop("scores")
|
||||
)
|
||||
|
||||
# de-dup across evals
|
||||
uniq = base.unique(subset=["folder", "model", "fold"])
|
||||
|
||||
# derived metrics
|
||||
uniq = uniq.with_columns(
|
||||
latency_ms=pl.when((pl.col("test_time") > 0) & (pl.col("n_test") > 0))
|
||||
.then(1000.0 * pl.col("test_time") / pl.col("n_test"))
|
||||
.otherwise(None)
|
||||
.cast(pl.Float64),
|
||||
time_per_epoch=pl.when(
|
||||
(pl.col("model") == "deepsad") & (pl.col("n_epochs") > 0)
|
||||
)
|
||||
.then(pl.col("train_time") / pl.col("n_epochs"))
|
||||
.otherwise(None)
|
||||
.cast(pl.Float64),
|
||||
network_disp=pl.col("network")
|
||||
.cast(pl.Utf8)
|
||||
.map_elements(_net_label_for_display, return_dtype=pl.Utf8),
|
||||
)
|
||||
return uniq
|
||||
|
||||
|
||||
def _prepare_aggregates(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""
|
||||
Deduplicate across evals, then aggregate times across folds for each
|
||||
(network, model, latent_dim, semi_normals, semi_anomalous).
|
||||
"""
|
||||
# Keep only columns we need
|
||||
base = df.select(
|
||||
"folder",
|
||||
"network",
|
||||
"model",
|
||||
"latent_dim",
|
||||
"semi_normals",
|
||||
"semi_anomalous",
|
||||
"fold",
|
||||
"train_time",
|
||||
"test_time",
|
||||
"config_json",
|
||||
)
|
||||
|
||||
# Drop duplicates across evals: same (folder, model, fold) should have identical timings
|
||||
uniq = base.unique(subset=["folder", "model", "fold"]).with_columns(
|
||||
# Normalize network to a simpler display label, if your config used long names
|
||||
pl.col("network").cast(pl.Utf8)
|
||||
)
|
||||
|
||||
# Group across folds
|
||||
agg = (
|
||||
uniq.group_by(
|
||||
["network", "model", "latent_dim", "semi_normals", "semi_anomalous"]
|
||||
)
|
||||
.agg(
|
||||
pl.len().alias("n_folds"),
|
||||
pl.col("train_time").mean().alias("train_mean"),
|
||||
pl.col("train_time").std(ddof=1).alias("train_std"),
|
||||
pl.col("test_time").mean().alias("test_mean"),
|
||||
pl.col("test_time").std(ddof=1).alias("test_std"),
|
||||
pl.col("config_json")
|
||||
.first()
|
||||
.alias("config_json"), # one exemplar cfg per group
|
||||
)
|
||||
.sort(["semi_normals", "semi_anomalous", "latent_dim", "network", "model"])
|
||||
)
|
||||
return agg
|
||||
|
||||
|
||||
def make_training_runtime_table(df: pl.DataFrame) -> str:
|
||||
"""
|
||||
Returns a LaTeX table (string) for TRAIN runtimes: mean ± std (seconds) across folds.
|
||||
Rows: Semi (N/O), Latent Dim
|
||||
Columns: methods split (DeepSAD/LeNet, DeepSAD/Efficient, IsoForest[/net], OCSVM[/net])
|
||||
"""
|
||||
agg = _prepare_aggregates(df)
|
||||
|
||||
# Prepare display strings and column keys
|
||||
tbl = agg.with_columns(
|
||||
pl.format("{}/{}", pl.col("semi_normals"), pl.col("semi_anomalous")).alias(
|
||||
"semi"
|
||||
),
|
||||
pl.col("model").cast(pl.Utf8),
|
||||
pl.col("network").cast(pl.Utf8),
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
# ADD return_dtype here
|
||||
pl.struct(["train_mean", "train_std", "n_folds"])
|
||||
.map_elements(
|
||||
lambda s: _fmt_mean_std(s["train_mean"], s["train_std"], s["n_folds"]),
|
||||
return_dtype=pl.Utf8,
|
||||
)
|
||||
.alias("train_fmt"),
|
||||
# And here
|
||||
pl.struct(["model", "network"])
|
||||
.map_elements(
|
||||
lambda s: _method_col_name(s["model"], s["network"]),
|
||||
return_dtype=pl.Utf8,
|
||||
)
|
||||
.alias("method"),
|
||||
).select("semi", "latent_dim", "method", "train_fmt")
|
||||
|
||||
# Pivot to wide form: one cell per (semi, latent_dim, method)
|
||||
wide = tbl.pivot(
|
||||
values="train_fmt",
|
||||
index=["semi", "latent_dim"],
|
||||
columns="method",
|
||||
aggregate_function="first",
|
||||
).sort(["semi", "latent_dim"])
|
||||
|
||||
# Fill missing with '-' and export
|
||||
pdf = wide.fill_null("-").to_pandas()
|
||||
pdf.index = pd.MultiIndex.from_frame(pdf[["semi", "latent_dim"]])
|
||||
pdf = pdf.drop(columns=["semi", "latent_dim"])
|
||||
latex = pdf.to_latex(
|
||||
index=True,
|
||||
escape=True,
|
||||
na_rep="-",
|
||||
multicolumn=True,
|
||||
multicolumn_format="c",
|
||||
bold_rows=False,
|
||||
caption="Training runtime (seconds): mean ± std across folds (n in parentheses).",
|
||||
label="tab:train_runtimes",
|
||||
)
|
||||
return latex
|
||||
|
||||
|
||||
def make_inference_runtime_table(df: pl.DataFrame) -> str:
|
||||
"""
|
||||
Returns a LaTeX table (string) for TEST/INFERENCE runtimes: mean ± std (seconds) across folds.
|
||||
Same layout as training table.
|
||||
"""
|
||||
agg = _prepare_aggregates(df)
|
||||
|
||||
tbl = agg.with_columns(
|
||||
pl.format("{}/{}", pl.col("semi_normals"), pl.col("semi_anomalous")).alias(
|
||||
"semi"
|
||||
),
|
||||
pl.col("model").cast(pl.Utf8),
|
||||
pl.col("network").cast(pl.Utf8),
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
pl.struct(["test_mean", "test_std", "n_folds"])
|
||||
.map_elements(
|
||||
lambda s: _fmt_mean_std(s["test_mean"], s["test_std"], s["n_folds"]),
|
||||
return_dtype=pl.Utf8,
|
||||
)
|
||||
.alias("test_fmt"),
|
||||
pl.struct(["model", "network"])
|
||||
.map_elements(
|
||||
lambda s: _method_col_name(s["model"], s["network"]),
|
||||
return_dtype=pl.Utf8,
|
||||
)
|
||||
.alias("method"),
|
||||
).select("semi", "latent_dim", "method", "test_fmt")
|
||||
|
||||
wide = tbl.pivot(
|
||||
values="test_fmt",
|
||||
index=["semi", "latent_dim"],
|
||||
columns="method",
|
||||
aggregate_function="first",
|
||||
).sort(["semi", "latent_dim"])
|
||||
|
||||
pdf = wide.fill_null("-").to_pandas()
|
||||
pdf.index = pd.MultiIndex.from_frame(pdf[["semi", "latent_dim"]])
|
||||
pdf = pdf.drop(columns=["semi", "latent_dim"])
|
||||
latex = pdf.to_latex(
|
||||
index=True,
|
||||
escape=True,
|
||||
na_rep="-",
|
||||
multicolumn=True,
|
||||
multicolumn_format="c",
|
||||
bold_rows=False,
|
||||
caption="Inference/Test runtime (seconds): mean ± std across folds (n in parentheses).",
|
||||
label="tab:test_runtimes",
|
||||
)
|
||||
return latex
|
||||
|
||||
|
||||
def make_longform_train_table_with_params(df: pl.DataFrame) -> str:
|
||||
"""
|
||||
(Optional) Long-form table that includes a 'Params' column extracted from config_json.
|
||||
Useful if you want to show per-model settings alongside the runtimes.
|
||||
"""
|
||||
agg = _prepare_aggregates(df)
|
||||
# Build params column from JSON for readability
|
||||
long = (
|
||||
agg.with_columns(
|
||||
pl.format("{}/{}", pl.col("semi_normals"), pl.col("semi_anomalous")).alias(
|
||||
"semi"
|
||||
),
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
pl.struct(["model", "config_json"])
|
||||
.map_elements(
|
||||
lambda s: _key_params(s["model"], _parse_cfg(s["config_json"])),
|
||||
return_dtype=pl.Utf8,
|
||||
)
|
||||
.alias("params"),
|
||||
pl.struct(["train_mean", "train_std", "n_folds"])
|
||||
.map_elements(
|
||||
lambda s: _fmt_mean_std(s["train_mean"], s["train_std"], s["n_folds"])
|
||||
)
|
||||
.alias("train_time_fmt"),
|
||||
)
|
||||
.select(
|
||||
"network",
|
||||
"model",
|
||||
"latent_dim",
|
||||
"semi",
|
||||
"params",
|
||||
"train_time_fmt",
|
||||
)
|
||||
.sort(["semi", "latent_dim", "network", "model"])
|
||||
)
|
||||
|
||||
pdf = long.to_pandas()
|
||||
pdf.rename(
|
||||
columns={
|
||||
"network": "Network",
|
||||
"model": "Method",
|
||||
"latent_dim": "Latent Dim",
|
||||
"semi": "Semi (N/O)",
|
||||
"params": "Params",
|
||||
"train_time_fmt": "Train time [s] (mean ± std)",
|
||||
},
|
||||
inplace=True,
|
||||
)
|
||||
latex = pdf.to_latex(
|
||||
index=False,
|
||||
escape=True,
|
||||
longtable=False,
|
||||
caption="Training runtime with key parameters.",
|
||||
label="tab:train_runtimes_params",
|
||||
)
|
||||
return latex
|
||||
|
||||
|
||||
def make_training_runtime_table_compact(df: pl.DataFrame) -> str:
|
||||
per_fold = _prepare_per_fold_metrics(df)
|
||||
|
||||
# DeepSAD: keep LeNet vs Efficient, collapse semis
|
||||
ds = (
|
||||
per_fold.filter(pl.col("model") == "deepsad")
|
||||
.group_by(["model", "network_disp", "latent_dim"])
|
||||
.agg(
|
||||
n=pl.len(),
|
||||
train_mean=pl.mean("train_time"),
|
||||
train_std=pl.std("train_time", ddof=1),
|
||||
tpe_mean=pl.mean("time_per_epoch"),
|
||||
tpe_std=pl.std("time_per_epoch", ddof=1),
|
||||
)
|
||||
.with_columns(
|
||||
method=pl.format("DeepSAD / {}", pl.col("network_disp")),
|
||||
)
|
||||
)
|
||||
|
||||
# Baselines: collapse networks & semis; only vary by latent_dim
|
||||
bl = (
|
||||
per_fold.filter(pl.col("model").is_in(["isoforest", "ocsvm"]))
|
||||
.group_by(["model", "latent_dim"])
|
||||
.agg(
|
||||
n=pl.len(),
|
||||
train_mean=pl.mean("train_time"),
|
||||
train_std=pl.std("train_time", ddof=1),
|
||||
)
|
||||
.with_columns(
|
||||
method=pl.when(pl.col("model") == "isoforest")
|
||||
.then(pl.lit("IsoForest"))
|
||||
.when(pl.col("model") == "ocsvm")
|
||||
.then(pl.lit("OCSVM"))
|
||||
.otherwise(pl.lit("Baseline"))
|
||||
)
|
||||
)
|
||||
|
||||
# --- Standardize schemas before concat ---
|
||||
ds_std = ds.select(
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
pl.col("method").cast(pl.Utf8),
|
||||
pl.col("train_mean").cast(pl.Float64),
|
||||
pl.col("train_std").cast(pl.Float64),
|
||||
pl.col("tpe_mean").cast(pl.Float64),
|
||||
pl.col("tpe_std").cast(pl.Float64),
|
||||
pl.col("n").cast(pl.Int64),
|
||||
)
|
||||
|
||||
bl_std = bl.select(
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
pl.col("method").cast(pl.Utf8),
|
||||
pl.col("train_mean").cast(pl.Float64),
|
||||
pl.col("train_std").cast(pl.Float64),
|
||||
pl.lit(None, dtype=pl.Float64).alias("tpe_mean"),
|
||||
pl.lit(None, dtype=pl.Float64).alias("tpe_std"),
|
||||
pl.col("n").cast(pl.Int64),
|
||||
)
|
||||
|
||||
agg = pl.concat([ds_std, bl_std], how="vertical")
|
||||
|
||||
# Format cell: total [s]; DeepSAD also appends (italic) per-epoch
|
||||
def _fmt_train_cell(s: dict) -> str:
|
||||
total = _fmt_mean_std_n(s["train_mean"], s["train_std"], s["n"], "s")
|
||||
if s.get("tpe_mean") is None or (
|
||||
isinstance(s.get("tpe_mean"), float) and np.isnan(s["tpe_mean"])
|
||||
):
|
||||
return total
|
||||
tpe = _fmt_mean_std_n(s["tpe_mean"], s["tpe_std"], None, "s/epoch")
|
||||
return f"{total} (\\textit{{{tpe}}})"
|
||||
|
||||
tbl = agg.with_columns(
|
||||
pl.struct(["train_mean", "train_std", "tpe_mean", "tpe_std", "n"])
|
||||
.map_elements(_fmt_train_cell, return_dtype=pl.Utf8)
|
||||
.alias("train_fmt"),
|
||||
).select("latent_dim", "method", "train_fmt")
|
||||
|
||||
# Pivot and order columns nicely
|
||||
wide = tbl.pivot(
|
||||
values="train_fmt",
|
||||
index=["latent_dim"],
|
||||
columns="method",
|
||||
aggregate_function="first",
|
||||
).sort("latent_dim")
|
||||
|
||||
pdf = wide.fill_null("-").to_pandas().set_index("latent_dim")
|
||||
desired_cols = [
|
||||
c
|
||||
for c in ["DeepSAD / LeNet", "DeepSAD / Efficient", "IsoForest", "OCSVM"]
|
||||
if c in pdf.columns
|
||||
]
|
||||
if desired_cols:
|
||||
pdf = pdf.reindex(columns=desired_cols)
|
||||
|
||||
latex = pdf.to_latex(
|
||||
index=True,
|
||||
escape=True,
|
||||
na_rep="-",
|
||||
multicolumn=True,
|
||||
multicolumn_format="c",
|
||||
bold_rows=False,
|
||||
caption="Training runtime: total seconds (mean ± std). DeepSAD cells also show \\textit{seconds per epoch} in parentheses.",
|
||||
label="tab:train_runtimes_compact",
|
||||
)
|
||||
return latex
|
||||
|
||||
|
||||
def make_inference_latency_table_compact(df: pl.DataFrame) -> str:
|
||||
per_fold = _prepare_per_fold_metrics(df)
|
||||
|
||||
# DeepSAD: keep networks; collapse semis
|
||||
ds = (
|
||||
per_fold.filter(pl.col("model") == "deepsad")
|
||||
.group_by(["model", "network_disp", "latent_dim"])
|
||||
.agg(
|
||||
n=pl.len(),
|
||||
lat_mean=pl.mean("latency_ms"),
|
||||
lat_std=pl.std("latency_ms", ddof=1),
|
||||
)
|
||||
.with_columns(
|
||||
method=pl.format("DeepSAD / {}", pl.col("network_disp")),
|
||||
)
|
||||
)
|
||||
|
||||
# Baselines: collapse networks & semis
|
||||
bl = (
|
||||
per_fold.filter(pl.col("model").is_in(["isoforest", "ocsvm"]))
|
||||
.group_by(["model", "latent_dim"])
|
||||
.agg(
|
||||
n=pl.len(),
|
||||
lat_mean=pl.mean("latency_ms"),
|
||||
lat_std=pl.std("latency_ms", ddof=1),
|
||||
)
|
||||
.with_columns(
|
||||
method=pl.when(pl.col("model") == "isoforest")
|
||||
.then(pl.lit("IsoForest"))
|
||||
.when(pl.col("model") == "ocsvm")
|
||||
.then(pl.lit("OCSVM"))
|
||||
.otherwise(pl.lit("Baseline"))
|
||||
)
|
||||
)
|
||||
|
||||
# --- Standardize schemas before concat ---
|
||||
ds_std = ds.select(
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
pl.col("method").cast(pl.Utf8),
|
||||
pl.col("lat_mean").cast(pl.Float64),
|
||||
pl.col("lat_std").cast(pl.Float64),
|
||||
pl.col("n").cast(pl.Int64),
|
||||
)
|
||||
|
||||
bl_std = bl.select(
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
pl.col("method").cast(pl.Utf8),
|
||||
pl.col("lat_mean").cast(pl.Float64),
|
||||
pl.col("lat_std").cast(pl.Float64),
|
||||
pl.col("n").cast(pl.Int64),
|
||||
)
|
||||
|
||||
agg = pl.concat([ds_std, bl_std], how="vertical")
|
||||
|
||||
def _fmt_lat_cell(s: dict) -> str:
|
||||
return _fmt_mean_std_n(s["lat_mean"], s["lat_std"], s["n"], "ms")
|
||||
|
||||
tbl = agg.with_columns(
|
||||
pl.struct(["lat_mean", "lat_std", "n"])
|
||||
.map_elements(_fmt_lat_cell, return_dtype=pl.Utf8)
|
||||
.alias("lat_fmt"),
|
||||
).select("latent_dim", "method", "lat_fmt")
|
||||
|
||||
wide = tbl.pivot(
|
||||
values="lat_fmt",
|
||||
index=["latent_dim"],
|
||||
columns="method",
|
||||
aggregate_function="first",
|
||||
).sort("latent_dim")
|
||||
|
||||
pdf = wide.fill_null("-").to_pandas().set_index("latent_dim")
|
||||
desired_cols = [
|
||||
c
|
||||
for c in ["DeepSAD / LeNet", "DeepSAD / Efficient", "IsoForest", "OCSVM"]
|
||||
if c in pdf.columns
|
||||
]
|
||||
if desired_cols:
|
||||
pdf = pdf.reindex(columns=desired_cols)
|
||||
|
||||
latex = pdf.to_latex(
|
||||
index=True,
|
||||
escape=True,
|
||||
na_rep="-",
|
||||
multicolumn=True,
|
||||
multicolumn_format="c",
|
||||
bold_rows=False,
|
||||
caption="Inference latency (ms/sample): mean ± std across folds; baselines collapsed across networks and semi-labeling.",
|
||||
label="tab:inference_latency_compact",
|
||||
)
|
||||
return latex
|
||||
|
||||
|
||||
def make_ae_pretraining_runtime_table(df_pre: pl.DataFrame) -> str:
|
||||
"""
|
||||
LaTeX table: Autoencoder (pretraining) runtime per latent dim.
|
||||
Rows: latent_dim
|
||||
Cols: AE / LeNet, AE / Efficient (mean ± std seconds across folds)
|
||||
"""
|
||||
# minimal columns we need
|
||||
base = df_pre.select(
|
||||
pl.col("network").cast(pl.Utf8),
|
||||
pl.col("latent_dim").cast(pl.Int64),
|
||||
pl.col("fold").cast(pl.Int64),
|
||||
pl.col("train_time").cast(pl.Float64),
|
||||
).drop_nulls(subset=["network", "latent_dim", "train_time"])
|
||||
|
||||
# Nice display label for network
|
||||
network_disp = (
|
||||
pl.when(pl.col("network").str.contains("efficient"))
|
||||
.then(pl.lit("Efficient"))
|
||||
.when(pl.col("network").str.contains("LeNet"))
|
||||
.then(pl.lit("LeNet"))
|
||||
.otherwise(pl.col("network"))
|
||||
.alias("network_disp")
|
||||
)
|
||||
|
||||
agg = (
|
||||
base.with_columns(network_disp)
|
||||
.group_by(["network_disp", "latent_dim"])
|
||||
.agg(
|
||||
n=pl.len(),
|
||||
train_mean=pl.mean("train_time"),
|
||||
train_std=pl.std("train_time", ddof=1),
|
||||
)
|
||||
.with_columns(
|
||||
pl.format("AE / {}", pl.col("network_disp")).alias("method"),
|
||||
pl.struct(["train_mean", "train_std", "n"])
|
||||
.map_elements(
|
||||
lambda s: _fmt_mean_std(s["train_mean"], s["train_std"], s["n"]),
|
||||
return_dtype=pl.Utf8,
|
||||
)
|
||||
.alias("train_fmt"),
|
||||
)
|
||||
.select("latent_dim", "method", "train_fmt")
|
||||
.sort(["latent_dim", "method"])
|
||||
)
|
||||
|
||||
wide = agg.pivot(
|
||||
values="train_fmt",
|
||||
index=["latent_dim"],
|
||||
columns="method",
|
||||
aggregate_function="first",
|
||||
).sort("latent_dim")
|
||||
|
||||
pdf = wide.fill_null("-").to_pandas().set_index("latent_dim")
|
||||
|
||||
# Order columns if both exist
|
||||
desired = [
|
||||
c for c in ["Autoencoder LeNet", "Autoencoder Efficient"] if c in pdf.columns
|
||||
]
|
||||
if desired:
|
||||
pdf = pdf.reindex(columns=desired)
|
||||
|
||||
latex = pdf.to_latex(
|
||||
index=True,
|
||||
escape=True,
|
||||
na_rep="-",
|
||||
multicolumn=True,
|
||||
multicolumn_format="c",
|
||||
bold_rows=False,
|
||||
caption="Autoencoder pretraining runtime (seconds): mean ± std across folds.",
|
||||
label="tab:ae_pretrain_runtimes",
|
||||
)
|
||||
return latex
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# Main
|
||||
# ----------------------------
|
||||
def main():
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Main results
|
||||
df = load_results_dataframe(RESULTS_ROOT, allow_cache=True)
|
||||
if "config_json" not in df.columns:
|
||||
df = df.with_columns(pl.lit(None).alias("config_json"))
|
||||
|
||||
# AE pretraining results
|
||||
df_pre = load_pretraining_results_dataframe(RESULTS_ROOT, allow_cache=True)
|
||||
|
||||
# Build LaTeX tables
|
||||
latex_train = make_training_runtime_table(df)
|
||||
latex_test = make_inference_runtime_table(df)
|
||||
latex_train_params = make_longform_train_table_with_params(df)
|
||||
latex_ae = make_ae_pretraining_runtime_table(df_pre)
|
||||
|
||||
# Timestamped output dirs
|
||||
ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
ts_dir = OUTPUT_DIR / "archive" / ts
|
||||
ts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write files
|
||||
(ts_dir / "train_runtimes.tex").write_text(latex_train)
|
||||
(ts_dir / "test_runtimes.tex").write_text(latex_test)
|
||||
(ts_dir / "train_runtimes_with_params.tex").write_text(latex_train_params)
|
||||
(ts_dir / "ae_pretraining_runtimes.tex").write_text(latex_ae)
|
||||
|
||||
# Save script & mirror latest
|
||||
script_path = Path(__file__)
|
||||
shutil.copy2(script_path, ts_dir)
|
||||
|
||||
latest = OUTPUT_DIR / "latest"
|
||||
latest.mkdir(exist_ok=True, parents=True)
|
||||
for f in ts_dir.iterdir():
|
||||
if f.is_file():
|
||||
shutil.copy2(f, latest / f.name)
|
||||
|
||||
print(f"Saved LaTeX tables to: {ts_dir}")
|
||||
print(f"Also updated: {latest}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
20
tools/print_mat.py
Normal file
@@ -0,0 +1,20 @@
|
||||
rows = 5
|
||||
cols = 4
|
||||
|
||||
mat = [range(0 + (cols * i), cols + (cols * i), 1) for i in range(rows)]
|
||||
|
||||
|
||||
def print_mat(mat):
|
||||
for s in mat:
|
||||
print(*s)
|
||||
|
||||
|
||||
def rotate_mat(mat):
|
||||
mat = [[mat[row][col] for row in range(rows - 1, -1, -1)] for col in range(cols)]
|
||||
return mat
|
||||
|
||||
|
||||
print_mat(mat)
|
||||
mat = rotate_mat(mat)
|
||||
print("rotated:")
|
||||
print_mat(mat)
|
||||
@@ -5,5 +5,9 @@ description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11.9"
|
||||
dependencies = [
|
||||
"pandas>=2.3.2",
|
||||
"pointcloudset>=0.11.0",
|
||||
"polars>=1.33.0",
|
||||
"pyarrow>=21.0.0",
|
||||
"tabulate>=0.9.0",
|
||||
]
|
||||
|
||||
1958
tools/uv.lock
generated
@@ -6,7 +6,7 @@ from typing import Sequence
|
||||
|
||||
import polars as pl
|
||||
|
||||
from load_results import load_results_dataframe
|
||||
from plot_scripts.load_results import load_results_dataframe
|
||||
|
||||
# --- configure your intended grid here (use the *canonical* strings used in df) ---
|
||||
NETWORKS_EXPECTED = ["subter_LeNet", "subter_efficient"]
|
||||
|
||||