Deep-SAD-PyTorch/hardware_survey/main.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Generate a LaTeX longtable with system + software info for a thesis (Linux + NVIDIA).

Requirements (preflight will check and error if missing):
- Linux OS
- lscpu (util-linux)
- Python packages: nvidia-ml-py3 (pynvml), torch, numpy, scipy, scikit-learn
- NVIDIA driver present and at least one GPU visible via NVML

What it reports (per user’s list):
System:
- OS name + version + distribution (Linux) + kernel version + system arch
- CPU model name, number of cores and threads, base frequencies (best-effort via lscpu)
- Total RAM capacity
- GPU(s): model name (only the newer one; prefer a name matching “4090”, else highest compute capability),
  memory size, driver version, CUDA (driver) version, cuDNN version (if used via PyTorch)

Software environment:
- Python version
- PyTorch version + built CUDA/cuDNN version
- scikit-learn version
- NumPy / SciPy version (+ NumPy build config summary: MKL/OpenBLAS/etc.)
"""

import argparse
import os
import platform
import re
import shutil
import subprocess
import sys
from typing import Dict, List, Tuple

# -------------------- Helper --------------------


def _import_nvml():
    """
    Try to import NVML from the supported packages:
    - 'nvidia-ml-py' (preferred, maintained): provides module 'pynvml'
    - legacy 'pynvml' (deprecated but still widely installed)
    Returns the imported module object (with nvml... symbols).
    """
    try:
        import pynvml as _nvml  # provided by 'nvidia-ml-py' or old 'pynvml'

        return _nvml
    except Exception as e:
        raise ImportError(
            "NVML not importable. Please install the maintained package:\n"
            "  pip install nvidia-ml-py\n"
            "(and uninstall deprecated ones: pip uninstall nvidia-ml-py3 pynvml)"
        ) from e


def _to_text(x) -> str:
    """Return a clean str whether NVML gives bytes or str."""
    if isinstance(x, bytes):
        try:
            return x.decode(errors="ignore")
        except Exception:
            return x.decode("utf-8", "ignore")
    return str(x)


# -------------------- Utilities --------------------


def which(cmd: str) -> str:
    return shutil.which(cmd) or ""


def run(cmd: List[str], timeout: int = 6) -> str:
    try:
        out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=timeout)
        return out.decode(errors="ignore").strip()
    except Exception:
        return ""


def human_bytes(nbytes: int) -> str:
    try:
        n = float(nbytes)
    except Exception:
        return ""
    units = ["B", "KiB", "MiB", "GiB", "TiB"]
    i = 0
    while n >= 1024 and i < len(units) - 1:
        n /= 1024.0
        i += 1
    return f"{n:.2f} {units[i]}"


LATEX_SPECIALS = {
    "\\": r"\textbackslash{}",
    "&": r"\&",
    "%": r"\%",
    "$": r"\$",
    "#": r"\#",
    "_": r"\_",
    "{": r"\{",
    "}": r"\}",
    "~": r"\textasciitilde{}",
    "^": r"\textasciicircum{}",
}


def tex_escape(s: str) -> str:
    if s is None:
        return ""
    return "".join(LATEX_SPECIALS.get(ch, ch) for ch in str(s))


def latex_table(sections: List[Tuple[str, Dict[str, str]]], caption: str) -> str:
    lines = []
    lines.append(r"\begin{table}[p]")  # float; use [p] or [tbp] as you prefer
    lines.append(r"\centering")
    lines.append(r"\caption{" + tex_escape(caption) + r"} \label{tab:system_setup}")
    lines.append(r"\begin{tabular}{p{0.34\linewidth} p{0.62\linewidth}}")
    lines.append(r"\toprule")
    lines.append(r"\textbf{Item} & \textbf{Details} \\")
    lines.append(r"\midrule")

    for title, kv in sections:
        if not kv:
            continue
        lines.append(r"\multicolumn{2}{l}{\textbf{" + tex_escape(title) + r"}} \\")
        for k, v in kv.items():
            val = tex_escape(v)
            if "\n" in v or len(v) > 120:
                val = (
                    r"\begin{minipage}[t]{\linewidth}\ttfamily\small "
                    + tex_escape(v)
                    + r"\end{minipage}"
                )
            else:
                val = r"\ttfamily " + val
            lines.append(tex_escape(k) + " & " + val + r" \\")
        lines.append(r"\addlinespace")

    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    lines.append(r"\end{table}")

    preamble_hint = r"""
% ---- Add to your LaTeX preamble ----
% \usepackage{booktabs}
% \usepackage{array}
% ------------------------------------
"""
    return preamble_hint + "\n".join(lines)


def latex_longtable(sections: List[Tuple[str, Dict[str, str]]], caption: str) -> str:
    lines = []
    lines.append(r"\begin{longtable}{p{0.34\linewidth} p{0.62\linewidth}}")
    lines.append(r"\caption{" + tex_escape(caption) + r"} \label{tab:system_setup}\\")
    lines.append(r"\toprule")
    lines.append(r"\textbf{Item} & \textbf{Details} \\")
    lines.append(r"\midrule")
    lines.append(r"\endfirsthead")
    lines.append(r"\toprule \textbf{Item} & \textbf{Details} \\ \midrule")
    lines.append(r"\endhead")
    lines.append(r"\bottomrule")
    lines.append(r"\endfoot")
    lines.append(r"\bottomrule")
    lines.append(r"\endlastfoot")

    for title, kv in sections:
        if not kv:
            continue
        lines.append(r"\multicolumn{2}{l}{\textbf{" + tex_escape(title) + r"}} \\")
        for k, v in kv.items():
            val = tex_escape(v)
            if "\n" in v or len(v) > 120:
                val = (
                    r"\begin{minipage}[t]{\linewidth}\ttfamily\small "
                    + tex_escape(v)
                    + r"\end{minipage}"
                )
            else:
                val = r"\ttfamily " + val
            lines.append(tex_escape(k) + " & " + val + r" \\")
        lines.append(r"\addlinespace")
    lines.append(r"\end{longtable}")

    preamble_hint = r"""
% ---- Add to your LaTeX preamble ----
% \usepackage{booktabs}
% \usepackage{longtable}
% \usepackage{array}
% ------------------------------------
"""
    return preamble_hint + "\n".join(lines)


# -------------------- Preflight --------------------

REQUIRED_CMDS = ["lscpu"]
REQUIRED_MODULES = [
    "torch",
    "numpy",
    "scipy",
    "sklearn",
    "pynvml",
]  # provided by nvidia-ml-py


def preflight() -> List[str]:
    errors = []
    if platform.system().lower() != "linux":
        errors.append(
            f"This script supports Linux only (detected: {platform.system()})."
        )

    for c in ["lscpu"]:
        if not which(c):
            errors.append(f"Missing required command: {c}")

    for m in REQUIRED_MODULES:
        try:
            __import__(m)
        except Exception:
            errors.append(f"Missing required Python package: {m}")

    # NVML driver availability
    if "pynvml" not in errors:
        try:
            pynvml = _import_nvml()
            pynvml.nvmlInit()
            count = pynvml.nvmlDeviceGetCount()
            if count < 1:
                errors.append("No NVIDIA GPUs detected by NVML.")
            pynvml.nvmlShutdown()
        except Exception as e:
            errors.append(f"NVIDIA NVML not available / driver not loaded: {e}")

    return errors


# -------------------- Collectors --------------------


def collect_system() -> Dict[str, str]:
    info: Dict[str, str] = {}

    # OS / distro / kernel / arch
    os_pretty = ""
    try:
        with open("/etc/os-release", "r") as f:
            txt = f.read()
        m = re.search(r'^PRETTY_NAME="?(.*?)"?$', txt, flags=re.M)
        if m:
            os_pretty = m.group(1)
    except Exception:
        pass
    info["Operating System"] = os_pretty or f"{platform.system()} {platform.release()}"
    info["Kernel"] = platform.release()
    info["Architecture"] = platform.machine()

    # CPU (via lscpu)
    lscpu = run(["lscpu"])

    def kvs(text: str) -> Dict[str, str]:
        out = {}
        for line in text.splitlines():
            if ":" in line:
                k, v = line.split(":", 1)
                out[k.strip()] = v.strip()
        return out

    d = kvs(lscpu)
    info["CPU Model"] = d.get("Model name", d.get("Model Name", ""))

    # cores / threads
    sockets = d.get("Socket(s)", "")
    cores_per_socket = d.get("Core(s) per socket", "")
    threads_total = d.get("CPU(s)", "")
    if sockets and cores_per_socket:
        info["CPU Cores (physical)"] = f"{cores_per_socket} × {sockets}"
    else:
        info["CPU Cores (physical)"] = cores_per_socket or ""
    info["CPU Threads (logical)"] = threads_total or str(os.cpu_count() or "")

    # base / max freq
    # Prefer "CPU max MHz" and "CPU min MHz"; lscpu sometimes exposes "CPU MHz" (current)
    base = d.get("CPU min MHz", "")
    maxf = d.get("CPU max MHz", "")
    if base:
        info["CPU Base Frequency"] = f"{float(base):.0f} MHz"
    elif "@" in info["CPU Model"]:
        # fallback: parse from model string like "Intel(R) ... @ 2.30GHz"
        m = re.search(r"@\s*([\d.]+)\s*([GM]Hz)", info["CPU Model"])
        if m:
            info["CPU Base Frequency"] = f"{m.group(1)} {m.group(2)}"
    else:
        cur = d.get("CPU MHz", "")
        if cur:
            info["CPU (Current) Frequency"] = f"{float(cur):.0f} MHz"
    if maxf:
        info["CPU Max Frequency"] = f"{float(maxf):.0f} MHz"

    # RAM total (/proc/meminfo)
    try:
        meminfo = open("/proc/meminfo").read()
        m = re.search(r"^MemTotal:\s+(\d+)\s+kB", meminfo, flags=re.M)
        if m:
            total_bytes = int(m.group(1)) * 1024
            info["Total RAM"] = human_bytes(total_bytes)
    except Exception:
        pass

    return info


def collect_gpu() -> Dict[str, str]:
    """
    Use NVML to enumerate GPUs and select the 'newer' one:
    1) Prefer a device whose name matches /4090/i
    2) Else highest CUDA compute capability (major, minor), tiebreaker by total memory
    Also reports driver version and CUDA driver version.
    """
    pynvml = _import_nvml()
    pynvml.nvmlInit()
    try:
        count = pynvml.nvmlDeviceGetCount()
        if count < 1:
            return {"Error": "No NVIDIA GPUs detected by NVML."}

        devices = []
        for i in range(count):
            h = pynvml.nvmlDeviceGetHandleByIndex(i)

            # name can be bytes or str depending on wheel; normalize
            raw_name = pynvml.nvmlDeviceGetName(h)
            name = _to_text(raw_name)

            mem_info = pynvml.nvmlDeviceGetMemoryInfo(h)
            total_mem = getattr(mem_info, "total", 0)

            # compute capability may not exist on very old drivers
            try:
                maj, minr = pynvml.nvmlDeviceGetCudaComputeCapability(h)
            except Exception:
                maj, minr = (0, 0)

            devices.append(
                {
                    "index": i,
                    "handle": h,
                    "name": name,
                    "mem": total_mem,
                    "cc": (maj, minr),
                }
            )

        # Prefer explicit "4090"
        pick = next(
            (d for d in devices if re.search(r"4090", d["name"], flags=re.I)), None
        )
        if pick is None:
            # Highest compute capability, then largest memory
            devices.sort(key=lambda x: (x["cc"][0], x["cc"][1], x["mem"]), reverse=True)
            pick = devices[0]

        # Driver version and CUDA driver version can be bytes or str
        drv_raw = pynvml.nvmlSystemGetDriverVersion()
        drv = _to_text(drv_raw)

        # CUDA driver version (integer like 12040 -> 12.4)
        cuda_drv_ver = ""
        try:
            v = pynvml.nvmlSystemGetCudaDriverVersion_v2()
        except Exception:
            v = pynvml.nvmlSystemGetCudaDriverVersion()
        try:
            major = v // 1000
            minor = (v % 1000) // 10
            patch = v % 10
            cuda_drv_ver = f"{major}.{minor}.{patch}" if patch else f"{major}.{minor}"
        except Exception:
            cuda_drv_ver = ""

        gpu_info = {
            "Selected GPU Name": pick["name"],
            "Selected GPU Memory": human_bytes(pick["mem"]),
            "Selected GPU Compute Capability": f"{pick['cc'][0]}.{pick['cc'][1]}",
            "NVIDIA Driver Version": drv,
            "CUDA (Driver) Version": cuda_drv_ver,
        }
        return gpu_info
    finally:
        pynvml.nvmlShutdown()


def summarize_numpy_build_config() -> str:
    """
    Capture numpy.__config__.show() and try to extract the BLAS/LAPACK backend line(s).
    """
    import numpy as np
    from io import StringIO
    import sys as _sys

    buf = StringIO()
    _stdout = _sys.stdout
    try:
        _sys.stdout = buf
        np.__config__.show()
    finally:
        _sys.stdout = _stdout
    txt = buf.getvalue()

    # Heuristic: capture lines mentioning MKL, OpenBLAS, BLIS, LAPACK
    lines = [
        l
        for l in txt.splitlines()
        if re.search(r"(MKL|OpenBLAS|BLAS|LAPACK|BLIS)", l, re.I)
    ]
    if not lines:
        # fall back to first ~12 lines
        lines = txt.splitlines()[:12]
    # Keep it compact
    return "\n".join(lines[:20]).strip()


def collect_software() -> Dict[str, str]:
    info: Dict[str, str] = {}
    import sys as _sys
    import torch
    import numpy as _np
    import scipy as _sp
    import sklearn as _sk

    info["Python"] = _sys.version.split()[0]

    # PyTorch + built CUDA/cuDNN + visible GPUs
    info["PyTorch"] = torch.__version__
    info["PyTorch Built CUDA"] = getattr(torch.version, "cuda", "") or ""
    try:
        cudnn_build = torch.backends.cudnn.version()  # integer
        info["cuDNN (PyTorch build)"] = str(cudnn_build) if cudnn_build else ""
    except Exception:
        pass

    # scikit-learn
    info["scikit-learn"] = _sk.__version__

    # NumPy / SciPy + build config
    info["NumPy"] = _np.__version__
    info["SciPy"] = _sp.__version__
    info["NumPy Build Config"] = summarize_numpy_build_config()

    return info


# -------------------- Main --------------------


def main():
    ap = argparse.ArgumentParser(
        description="Generate LaTeX table of system/software environment for thesis (Linux + NVIDIA)."
    )
    ap.add_argument(
        "--output", "-o", type=str, help="Write LaTeX to this file instead of stdout."
    )
    ap.add_argument(
        "--caption", type=str, default="Computational Environment (Hardware & Software)"
    )
    args = ap.parse_args()

    errs = preflight()
    if errs:
        msg = (
            "Preflight check failed:\n- "
            + "\n- ".join(errs)
            + "\n"
            + "Please install missing components and re-run."
        )
        print(msg, file=sys.stderr)
        sys.exit(1)

    sections: List[Tuple[str, Dict[str, str]]] = []
    sections.append(("System", collect_system()))
    sections.append(("GPU (Selected Newer Device)", collect_gpu()))
    sections.append(("Software Environment", collect_software()))

    latex = latex_table(sections, caption=args.caption)

    if args.output:
        with open(args.output, "w", encoding="utf-8") as f:
            f.write(latex)
        print(f"Wrote LaTeX to: {args.output}")
    else:
        print(latex)


if __name__ == "__main__":
    main()