Files
mt/Deep-SAD-PyTorch/hardware_survey/main.py
Jan Kowalczyk ef0c36eed5 hardware_survey
2025-09-10 19:40:17 +02:00

502 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Generate a LaTeX longtable with system + software info for a thesis (Linux + NVIDIA).
Requirements (preflight will check and error if missing):
- Linux OS
- lscpu (util-linux)
- Python packages: nvidia-ml-py3 (pynvml), torch, numpy, scipy, scikit-learn
- NVIDIA driver present and at least one GPU visible via NVML
What it reports (per users list):
System:
- OS name + version + distribution (Linux) + kernel version + system arch
- CPU model name, number of cores and threads, base frequencies (best-effort via lscpu)
- Total RAM capacity
- GPU(s): model name (only the newer one; prefer a name matching “4090”, else highest compute capability),
memory size, driver version, CUDA (driver) version, cuDNN version (if used via PyTorch)
Software environment:
- Python version
- PyTorch version + built CUDA/cuDNN version
- scikit-learn version
- NumPy / SciPy version (+ NumPy build config summary: MKL/OpenBLAS/etc.)
"""
import argparse
import os
import platform
import re
import shutil
import subprocess
import sys
from typing import Dict, List, Tuple
# -------------------- Helper --------------------
def _import_nvml():
"""
Try to import NVML from the supported packages:
- 'nvidia-ml-py' (preferred, maintained): provides module 'pynvml'
- legacy 'pynvml' (deprecated but still widely installed)
Returns the imported module object (with nvml... symbols).
"""
try:
import pynvml as _nvml # provided by 'nvidia-ml-py' or old 'pynvml'
return _nvml
except Exception as e:
raise ImportError(
"NVML not importable. Please install the maintained package:\n"
" pip install nvidia-ml-py\n"
"(and uninstall deprecated ones: pip uninstall nvidia-ml-py3 pynvml)"
) from e
def _to_text(x) -> str:
"""Return a clean str whether NVML gives bytes or str."""
if isinstance(x, bytes):
try:
return x.decode(errors="ignore")
except Exception:
return x.decode("utf-8", "ignore")
return str(x)
# -------------------- Utilities --------------------
def which(cmd: str) -> str:
return shutil.which(cmd) or ""
def run(cmd: List[str], timeout: int = 6) -> str:
try:
out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=timeout)
return out.decode(errors="ignore").strip()
except Exception:
return ""
def human_bytes(nbytes: int) -> str:
try:
n = float(nbytes)
except Exception:
return ""
units = ["B", "KiB", "MiB", "GiB", "TiB"]
i = 0
while n >= 1024 and i < len(units) - 1:
n /= 1024.0
i += 1
return f"{n:.2f} {units[i]}"
LATEX_SPECIALS = {
"\\": r"\textbackslash{}",
"&": r"\&",
"%": r"\%",
"$": r"\$",
"#": r"\#",
"_": r"\_",
"{": r"\{",
"}": r"\}",
"~": r"\textasciitilde{}",
"^": r"\textasciicircum{}",
}
def tex_escape(s: str) -> str:
if s is None:
return ""
return "".join(LATEX_SPECIALS.get(ch, ch) for ch in str(s))
def latex_table(sections: List[Tuple[str, Dict[str, str]]], caption: str) -> str:
lines = []
lines.append(r"\begin{table}[p]") # float; use [p] or [tbp] as you prefer
lines.append(r"\centering")
lines.append(r"\caption{" + tex_escape(caption) + r"} \label{tab:system_setup}")
lines.append(r"\begin{tabular}{p{0.34\linewidth} p{0.62\linewidth}}")
lines.append(r"\toprule")
lines.append(r"\textbf{Item} & \textbf{Details} \\")
lines.append(r"\midrule")
for title, kv in sections:
if not kv:
continue
lines.append(r"\multicolumn{2}{l}{\textbf{" + tex_escape(title) + r"}} \\")
for k, v in kv.items():
val = tex_escape(v)
if "\n" in v or len(v) > 120:
val = (
r"\begin{minipage}[t]{\linewidth}\ttfamily\small "
+ tex_escape(v)
+ r"\end{minipage}"
)
else:
val = r"\ttfamily " + val
lines.append(tex_escape(k) + " & " + val + r" \\")
lines.append(r"\addlinespace")
lines.append(r"\bottomrule")
lines.append(r"\end{tabular}")
lines.append(r"\end{table}")
preamble_hint = r"""
% ---- Add to your LaTeX preamble ----
% \usepackage{booktabs}
% \usepackage{array}
% ------------------------------------
"""
return preamble_hint + "\n".join(lines)
def latex_longtable(sections: List[Tuple[str, Dict[str, str]]], caption: str) -> str:
lines = []
lines.append(r"\begin{longtable}{p{0.34\linewidth} p{0.62\linewidth}}")
lines.append(r"\caption{" + tex_escape(caption) + r"} \label{tab:system_setup}\\")
lines.append(r"\toprule")
lines.append(r"\textbf{Item} & \textbf{Details} \\")
lines.append(r"\midrule")
lines.append(r"\endfirsthead")
lines.append(r"\toprule \textbf{Item} & \textbf{Details} \\ \midrule")
lines.append(r"\endhead")
lines.append(r"\bottomrule")
lines.append(r"\endfoot")
lines.append(r"\bottomrule")
lines.append(r"\endlastfoot")
for title, kv in sections:
if not kv:
continue
lines.append(r"\multicolumn{2}{l}{\textbf{" + tex_escape(title) + r"}} \\")
for k, v in kv.items():
val = tex_escape(v)
if "\n" in v or len(v) > 120:
val = (
r"\begin{minipage}[t]{\linewidth}\ttfamily\small "
+ tex_escape(v)
+ r"\end{minipage}"
)
else:
val = r"\ttfamily " + val
lines.append(tex_escape(k) + " & " + val + r" \\")
lines.append(r"\addlinespace")
lines.append(r"\end{longtable}")
preamble_hint = r"""
% ---- Add to your LaTeX preamble ----
% \usepackage{booktabs}
% \usepackage{longtable}
% \usepackage{array}
% ------------------------------------
"""
return preamble_hint + "\n".join(lines)
# -------------------- Preflight --------------------
REQUIRED_CMDS = ["lscpu"]
REQUIRED_MODULES = [
"torch",
"numpy",
"scipy",
"sklearn",
"pynvml",
] # provided by nvidia-ml-py
def preflight() -> List[str]:
errors = []
if platform.system().lower() != "linux":
errors.append(
f"This script supports Linux only (detected: {platform.system()})."
)
for c in ["lscpu"]:
if not which(c):
errors.append(f"Missing required command: {c}")
for m in REQUIRED_MODULES:
try:
__import__(m)
except Exception:
errors.append(f"Missing required Python package: {m}")
# NVML driver availability
if "pynvml" not in errors:
try:
pynvml = _import_nvml()
pynvml.nvmlInit()
count = pynvml.nvmlDeviceGetCount()
if count < 1:
errors.append("No NVIDIA GPUs detected by NVML.")
pynvml.nvmlShutdown()
except Exception as e:
errors.append(f"NVIDIA NVML not available / driver not loaded: {e}")
return errors
# -------------------- Collectors --------------------
def collect_system() -> Dict[str, str]:
info: Dict[str, str] = {}
# OS / distro / kernel / arch
os_pretty = ""
try:
with open("/etc/os-release", "r") as f:
txt = f.read()
m = re.search(r'^PRETTY_NAME="?(.*?)"?$', txt, flags=re.M)
if m:
os_pretty = m.group(1)
except Exception:
pass
info["Operating System"] = os_pretty or f"{platform.system()} {platform.release()}"
info["Kernel"] = platform.release()
info["Architecture"] = platform.machine()
# CPU (via lscpu)
lscpu = run(["lscpu"])
def kvs(text: str) -> Dict[str, str]:
out = {}
for line in text.splitlines():
if ":" in line:
k, v = line.split(":", 1)
out[k.strip()] = v.strip()
return out
d = kvs(lscpu)
info["CPU Model"] = d.get("Model name", d.get("Model Name", ""))
# cores / threads
sockets = d.get("Socket(s)", "")
cores_per_socket = d.get("Core(s) per socket", "")
threads_total = d.get("CPU(s)", "")
if sockets and cores_per_socket:
info["CPU Cores (physical)"] = f"{cores_per_socket} × {sockets}"
else:
info["CPU Cores (physical)"] = cores_per_socket or ""
info["CPU Threads (logical)"] = threads_total or str(os.cpu_count() or "")
# base / max freq
# Prefer "CPU max MHz" and "CPU min MHz"; lscpu sometimes exposes "CPU MHz" (current)
base = d.get("CPU min MHz", "")
maxf = d.get("CPU max MHz", "")
if base:
info["CPU Base Frequency"] = f"{float(base):.0f} MHz"
elif "@" in info["CPU Model"]:
# fallback: parse from model string like "Intel(R) ... @ 2.30GHz"
m = re.search(r"@\s*([\d.]+)\s*([GM]Hz)", info["CPU Model"])
if m:
info["CPU Base Frequency"] = f"{m.group(1)} {m.group(2)}"
else:
cur = d.get("CPU MHz", "")
if cur:
info["CPU (Current) Frequency"] = f"{float(cur):.0f} MHz"
if maxf:
info["CPU Max Frequency"] = f"{float(maxf):.0f} MHz"
# RAM total (/proc/meminfo)
try:
meminfo = open("/proc/meminfo").read()
m = re.search(r"^MemTotal:\s+(\d+)\s+kB", meminfo, flags=re.M)
if m:
total_bytes = int(m.group(1)) * 1024
info["Total RAM"] = human_bytes(total_bytes)
except Exception:
pass
return info
def collect_gpu() -> Dict[str, str]:
"""
Use NVML to enumerate GPUs and select the 'newer' one:
1) Prefer a device whose name matches /4090/i
2) Else highest CUDA compute capability (major, minor), tiebreaker by total memory
Also reports driver version and CUDA driver version.
"""
pynvml = _import_nvml()
pynvml.nvmlInit()
try:
count = pynvml.nvmlDeviceGetCount()
if count < 1:
return {"Error": "No NVIDIA GPUs detected by NVML."}
devices = []
for i in range(count):
h = pynvml.nvmlDeviceGetHandleByIndex(i)
# name can be bytes or str depending on wheel; normalize
raw_name = pynvml.nvmlDeviceGetName(h)
name = _to_text(raw_name)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(h)
total_mem = getattr(mem_info, "total", 0)
# compute capability may not exist on very old drivers
try:
maj, minr = pynvml.nvmlDeviceGetCudaComputeCapability(h)
except Exception:
maj, minr = (0, 0)
devices.append(
{
"index": i,
"handle": h,
"name": name,
"mem": total_mem,
"cc": (maj, minr),
}
)
# Prefer explicit "4090"
pick = next(
(d for d in devices if re.search(r"4090", d["name"], flags=re.I)), None
)
if pick is None:
# Highest compute capability, then largest memory
devices.sort(key=lambda x: (x["cc"][0], x["cc"][1], x["mem"]), reverse=True)
pick = devices[0]
# Driver version and CUDA driver version can be bytes or str
drv_raw = pynvml.nvmlSystemGetDriverVersion()
drv = _to_text(drv_raw)
# CUDA driver version (integer like 12040 -> 12.4)
cuda_drv_ver = ""
try:
v = pynvml.nvmlSystemGetCudaDriverVersion_v2()
except Exception:
v = pynvml.nvmlSystemGetCudaDriverVersion()
try:
major = v // 1000
minor = (v % 1000) // 10
patch = v % 10
cuda_drv_ver = f"{major}.{minor}.{patch}" if patch else f"{major}.{minor}"
except Exception:
cuda_drv_ver = ""
gpu_info = {
"Selected GPU Name": pick["name"],
"Selected GPU Memory": human_bytes(pick["mem"]),
"Selected GPU Compute Capability": f"{pick['cc'][0]}.{pick['cc'][1]}",
"NVIDIA Driver Version": drv,
"CUDA (Driver) Version": cuda_drv_ver,
}
return gpu_info
finally:
pynvml.nvmlShutdown()
def summarize_numpy_build_config() -> str:
"""
Capture numpy.__config__.show() and try to extract the BLAS/LAPACK backend line(s).
"""
import numpy as np
from io import StringIO
import sys as _sys
buf = StringIO()
_stdout = _sys.stdout
try:
_sys.stdout = buf
np.__config__.show()
finally:
_sys.stdout = _stdout
txt = buf.getvalue()
# Heuristic: capture lines mentioning MKL, OpenBLAS, BLIS, LAPACK
lines = [
l
for l in txt.splitlines()
if re.search(r"(MKL|OpenBLAS|BLAS|LAPACK|BLIS)", l, re.I)
]
if not lines:
# fall back to first ~12 lines
lines = txt.splitlines()[:12]
# Keep it compact
return "\n".join(lines[:20]).strip()
def collect_software() -> Dict[str, str]:
info: Dict[str, str] = {}
import sys as _sys
import torch
import numpy as _np
import scipy as _sp
import sklearn as _sk
info["Python"] = _sys.version.split()[0]
# PyTorch + built CUDA/cuDNN + visible GPUs
info["PyTorch"] = torch.__version__
info["PyTorch Built CUDA"] = getattr(torch.version, "cuda", "") or ""
try:
cudnn_build = torch.backends.cudnn.version() # integer
info["cuDNN (PyTorch build)"] = str(cudnn_build) if cudnn_build else ""
except Exception:
pass
# scikit-learn
info["scikit-learn"] = _sk.__version__
# NumPy / SciPy + build config
info["NumPy"] = _np.__version__
info["SciPy"] = _sp.__version__
info["NumPy Build Config"] = summarize_numpy_build_config()
return info
# -------------------- Main --------------------
def main():
ap = argparse.ArgumentParser(
description="Generate LaTeX table of system/software environment for thesis (Linux + NVIDIA)."
)
ap.add_argument(
"--output", "-o", type=str, help="Write LaTeX to this file instead of stdout."
)
ap.add_argument(
"--caption", type=str, default="Computational Environment (Hardware & Software)"
)
args = ap.parse_args()
errs = preflight()
if errs:
msg = (
"Preflight check failed:\n- "
+ "\n- ".join(errs)
+ "\n"
+ "Please install missing components and re-run."
)
print(msg, file=sys.stderr)
sys.exit(1)
sections: List[Tuple[str, Dict[str, str]]] = []
sections.append(("System", collect_system()))
sections.append(("GPU (Selected Newer Device)", collect_gpu()))
sections.append(("Software Environment", collect_software()))
latex = latex_table(sections, caption=args.caption)
if args.output:
with open(args.output, "w", encoding="utf-8") as f:
f.write(latex)
print(f"Wrote LaTeX to: {args.output}")
else:
print(latex)
if __name__ == "__main__":
main()