Source code for simvx.graphics.gpu.capabilities

"""Unified, immutable render-capability snapshot (Vulkan backend).

``RenderCapabilities`` is built once during ``init_vulkan`` after device
selection and holds **probed facts only** about the host interpreter and the
selected GPU. It subsumes the former ``_query_device_features`` dict: there is
one capability object, read identically everywhere, with no shim.

The web backend mirrors this shape in
``packages/web/src/simvx/web/runtime/js/capabilities.js`` (``Capabilities``):
both expose a ``meets()`` predicate with the same truth-table semantics so a
pass can gate on ``{features, limits}`` regardless of backend.

Probing is deliberately separated from the dataclass. The module-level probe
helpers take raw Vulkan handles and return plain scalars; the frozen dataclass
is constructible directly from those scalars. Unit tests therefore build a
``RenderCapabilities`` from injected values with no GPU, and only the
``probe`` classmethod touches Vulkan.
"""

from __future__ import annotations

import logging
import sys
import sysconfig
from dataclasses import dataclass, field
from typing import Any

import vulkan as vk

__all__ = [
    "RenderCapabilities",
    "probe_free_threaded",
    "probe_gil_disabled_build",
    "probe_device_features",
    "probe_physical_device_count",
    "probe_device_group",
    "probe_external_memory_fd",
    "probe_dedicated_queue_families",
    "probe_gpu_timing",
]

log = logging.getLogger(__name__)



[docs]
def probe_free_threaded() -> bool:
    """True when the running interpreter has the GIL disabled (free-threaded).

    ``sys._is_gil_enabled()`` exists only on builds that know about the flag
    (3.13+); older builds are always GIL-enabled, so absence -> not free-threaded.
    """
    is_gil_enabled = getattr(sys, "_is_gil_enabled", None)
    if is_gil_enabled is None:
        return False
    return not is_gil_enabled()




[docs]
def probe_gil_disabled_build() -> bool:
    """The ``Py_GIL_DISABLED`` build-config flag (the interpreter was *built*
    free-threaded), independent of whether the GIL is currently re-enabled at
    runtime via ``PYTHON_GIL=1``."""
    return bool(sysconfig.get_config_var("Py_GIL_DISABLED"))




[docs]
def probe_device_features(physical_device: Any) -> dict[str, bool]:
    """Query the optional Vulkan device-feature bits SimVX cares about.

    Returns a plain ``dict[str, bool]`` so the logical-device creation path can
    request exactly the features the device reports (see
    ``create_logical_device``). ``RenderCapabilities.probe`` folds the same dict
    into typed fields.
    """
    features = vk.vkGetPhysicalDeviceFeatures(physical_device)
    return {
        "multi_draw_indirect": bool(features.multiDrawIndirect),
        # imageCubeArray gates the samplerCubeArray usage in the forward shader's
        # reflection-probe path. Near-universally supported on desktop GPUs.
        "image_cube_array": bool(features.imageCubeArray),
        # Compressed-texture feature gates. Each is the coarse "is this family
        # legal at all" bit; the per-format SAMPLED feature is checked separately
        # via Engine.format_supported(). BC is near-universal on desktop.
        "texture_compression_bc": bool(features.textureCompressionBC),
        "texture_compression_etc2": bool(features.textureCompressionETC2),
        "texture_compression_astc_ldr": bool(features.textureCompressionASTC_LDR),
    }




[docs]
def probe_physical_device_count(instance: Any) -> int:
    """Number of Vulkan-capable physical devices on this instance."""
    try:
        return len(vk.vkEnumeratePhysicalDevices(instance))
    except Exception as exc:  # noqa: BLE001 (enumeration must never crash init)
        log.warning("vkEnumeratePhysicalDevices failed during capability probe: %s", exc)
        return 0




[docs]
def probe_device_group(instance: Any) -> bool:
    """Whether linked device groups (``VK_KHR_device_group`` / 1.1 core) are
    usable on this instance.

    ``vkEnumeratePhysicalDeviceGroups`` may be unresolvable through the loader on
    an instance created without the right API version / extension (observed on
    the dev box's Iris Xe + 1.2 loader path); we catch that and yield ``False``
    rather than letting an unresolvable-proc error escape init.
    """
    try:
        groups = vk.vkEnumeratePhysicalDeviceGroups(instance)
    except Exception as exc:  # noqa: BLE001 (unresolvable proc / missing ext -> no device groups)
        log.debug("vkEnumeratePhysicalDeviceGroups unavailable: %s", exc)
        return False
    # A group with >1 physical device is the only case that signals true linking;
    # single-device groups are the trivial degenerate case (== one GPU).
    return any(int(g.physicalDeviceCount) > 1 for g in groups)




[docs]
def probe_external_memory_fd(physical_device: Any) -> bool:
    """Whether ``VK_KHR_external_memory_fd`` is exposed by the device.

    Gates the dma-buf zero-copy cross-device transfer path (D8 multi-GPU). The
    extension must also be *enabled* at logical-device creation to be usable; the
    probe only reports availability. Absent on this dev box -> the staging-copy
    floor is always chosen.
    """
    try:
        ext_props = vk.vkEnumerateDeviceExtensionProperties(physical_device, None)
    except Exception as exc:  # noqa: BLE001 (enumeration must never crash init)
        log.debug("vkEnumerateDeviceExtensionProperties failed: %s", exc)
        return False
    name = getattr(vk, "VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME", "VK_KHR_external_memory_fd")
    for p in ext_props:
        ext_name = p.extensionName
        ext_name = ext_name if isinstance(ext_name, str) else ext_name.decode("utf-8")
        if ext_name == name:
            return True
    return False




[docs]
def probe_dedicated_queue_families(physical_device: Any) -> tuple[int | None, int | None]:
    """Locate dedicated compute and transfer queue families.

    Returns ``(dedicated_compute_qf, dedicated_transfer_qf)``:

    - dedicated compute: a family with COMPUTE but **not** GRAPHICS (async compute),
    - dedicated transfer: a family with TRANSFER but neither GRAPHICS nor COMPUTE
      (DMA-only copy engine). COMPUTE/GRAPHICS families implicitly support
      transfer, so we want the genuinely transfer-only one.

    Either is ``None`` when the device exposes no such family (e.g. integrated
    GPUs with a single universal queue family).
    """
    props = vk.vkGetPhysicalDeviceQueueFamilyProperties(physical_device)
    compute_qf: int | None = None
    transfer_qf: int | None = None
    for i, p in enumerate(props):
        flags = p.queueFlags
        is_graphics = bool(flags & vk.VK_QUEUE_GRAPHICS_BIT)
        is_compute = bool(flags & vk.VK_QUEUE_COMPUTE_BIT)
        is_transfer = bool(flags & vk.VK_QUEUE_TRANSFER_BIT)
        if compute_qf is None and is_compute and not is_graphics:
            compute_qf = i
        if transfer_qf is None and is_transfer and not is_graphics and not is_compute:
            transfer_qf = i
    return compute_qf, transfer_qf




[docs]
def probe_gpu_timing(physical_device: Any) -> bool:
    """Whether the device supports GPU timestamp queries (``timestampPeriod > 0``)."""
    limits = vk.vkGetPhysicalDeviceProperties(physical_device).limits
    return float(limits.timestampPeriod) > 0.0




[docs]
@dataclass(frozen=True)
class RenderCapabilities:
    """Immutable snapshot of host + GPU render capabilities.

    Probed facts only: no policy, no derived "should we" decisions. Consumers
    (the renderer, the future threading/queue/multi-GPU paths) read these fields
    and decide. Build via :meth:`probe` at init, or construct directly from
    scalar fields in tests.
    """

    # --- Host interpreter -------------------------------------------------
    free_threaded: bool = False
    """GIL is disabled at runtime (``not sys._is_gil_enabled()``)."""
    gil_disabled_build: bool = False
    """Interpreter was built free-threaded (``Py_GIL_DISABLED`` config var)."""

    # --- Device topology --------------------------------------------------
    physical_device_count: int = 1
    device_group: bool = False
    """A linked device group with >1 physical device is available."""
    external_memory_fd: bool = False
    """``VK_KHR_external_memory_fd`` is *available* (probed) on the selected device.

    Availability only: it does not mean the extension was enabled at logical-device
    creation. Use :attr:`external_memory_fd_enabled` to gate the dma-buf zero-copy
    path; this field reports whether enabling it is even possible. ``False`` on this
    dev box."""
    external_memory_fd_enabled: bool = False
    """``VK_KHR_external_memory_fd`` was *enabled* at logical-device creation.

    This is the gate the D8 multi-GPU cross-device transfer reads to select the
    dma-buf zero-copy path (:mod:`simvx.graphics.gpu.multi_device`): a probed-but-
    not-enabled extension cannot export/import fds, so DMABUF stays unselected and
    the always-available staging-copy floor is used until the rig opts in and the
    secondary + primary devices both enable it. ``False`` on this dev box (the
    single-GPU path never requests it), so the dma-buf raise is never reached."""

    # --- Queues -----------------------------------------------------------
    dedicated_compute_qf: int | None = None
    dedicated_transfer_qf: int | None = None

    # --- Timing -----------------------------------------------------------
    gpu_timing: bool = False

    # --- Device feature bits (folded from the former _query_device_features)
    multi_draw_indirect: bool = False
    image_cube_array: bool = False
    texture_compression_bc: bool = False
    texture_compression_etc2: bool = False
    texture_compression_astc_ldr: bool = False

    # --- Raw GPU limits snapshot (plain dict, mirrors capabilities.js) -----
    limits: dict[str, int] = field(default_factory=dict)


[docs]
    def meets(
        self,
        *,
        features: list[str] | None = None,
        limits: dict[str, int] | None = None,
    ) -> bool:
        """Predicate mirroring the web ``Capabilities.meets()`` truth table.

        ``features`` are capability field names that must be truthy (e.g.
        ``"multi_draw_indirect"``, ``"free_threaded"``, ``"gpu_timing"``).
        ``limits`` maps a GPU-limit key to a minimum required value; every
        requested limit must be present and ``>=`` the requested value. Empty
        requirements pass. An unknown feature name fails (a typo never silently
        passes), matching the web ``has()`` semantics.
        """
        for f in features or []:
            if not getattr(self, f, False):
                return False
        for k, v in (limits or {}).items():
            got = self.limits.get(k)
            if got is None or got < v:
                return False
        return True



[docs]
    @classmethod
    def probe(cls, instance: Any, physical_device: Any) -> RenderCapabilities:
        """Build a capability snapshot from live Vulkan handles + the host build.

        Pure probing: all GPU access is delegated to the module-level
        ``probe_*`` helpers so this stays the single Vulkan touch point.
        """
        feats = probe_device_features(physical_device)
        compute_qf, transfer_qf = probe_dedicated_queue_families(physical_device)
        return cls(
            free_threaded=probe_free_threaded(),
            gil_disabled_build=probe_gil_disabled_build(),
            physical_device_count=probe_physical_device_count(instance),
            device_group=probe_device_group(instance),
            external_memory_fd=probe_external_memory_fd(physical_device),
            dedicated_compute_qf=compute_qf,
            dedicated_transfer_qf=transfer_qf,
            gpu_timing=probe_gpu_timing(physical_device),
            multi_draw_indirect=feats["multi_draw_indirect"],
            image_cube_array=feats["image_cube_array"],
            texture_compression_bc=feats["texture_compression_bc"],
            texture_compression_etc2=feats["texture_compression_etc2"],
            texture_compression_astc_ldr=feats["texture_compression_astc_ldr"],
        )