"""Unified, immutable render-capability snapshot (Vulkan backend).
``RenderCapabilities`` is built once during ``init_vulkan`` after device
selection and holds **probed facts only** about the host interpreter and the
selected GPU. It subsumes the former ``_query_device_features`` dict: there is
one capability object, read identically everywhere, with no shim.
The web backend mirrors this shape in
``packages/web/src/simvx/web/runtime/js/capabilities.js`` (``Capabilities``):
both expose a ``meets()`` predicate with the same truth-table semantics so a
pass can gate on ``{features, limits}`` regardless of backend.
Probing is deliberately separated from the dataclass. The module-level probe
helpers take raw Vulkan handles and return plain scalars; the frozen dataclass
is constructible directly from those scalars. Unit tests therefore build a
``RenderCapabilities`` from injected values with no GPU, and only the
``probe`` classmethod touches Vulkan.
"""
from __future__ import annotations
import logging
import sys
import sysconfig
from dataclasses import dataclass, field
from typing import Any
import vulkan as vk
__all__ = [
"RenderCapabilities",
"probe_free_threaded",
"probe_gil_disabled_build",
"probe_device_features",
"probe_physical_device_count",
"probe_device_group",
"probe_external_memory_fd",
"probe_dedicated_queue_families",
"probe_gpu_timing",
]
log = logging.getLogger(__name__)
[docs]
def probe_free_threaded() -> bool:
"""True when the running interpreter has the GIL disabled (free-threaded).
``sys._is_gil_enabled()`` exists only on builds that know about the flag
(3.13+); older builds are always GIL-enabled, so absence -> not free-threaded.
"""
is_gil_enabled = getattr(sys, "_is_gil_enabled", None)
if is_gil_enabled is None:
return False
return not is_gil_enabled()
[docs]
def probe_gil_disabled_build() -> bool:
"""The ``Py_GIL_DISABLED`` build-config flag (the interpreter was *built*
free-threaded), independent of whether the GIL is currently re-enabled at
runtime via ``PYTHON_GIL=1``."""
return bool(sysconfig.get_config_var("Py_GIL_DISABLED"))
[docs]
def probe_device_features(physical_device: Any) -> dict[str, bool]:
"""Query the optional Vulkan device-feature bits SimVX cares about.
Returns a plain ``dict[str, bool]`` so the logical-device creation path can
request exactly the features the device reports (see
``create_logical_device``). ``RenderCapabilities.probe`` folds the same dict
into typed fields.
"""
features = vk.vkGetPhysicalDeviceFeatures(physical_device)
return {
"multi_draw_indirect": bool(features.multiDrawIndirect),
# imageCubeArray gates the samplerCubeArray usage in the forward shader's
# reflection-probe path. Near-universally supported on desktop GPUs.
"image_cube_array": bool(features.imageCubeArray),
# Compressed-texture feature gates. Each is the coarse "is this family
# legal at all" bit; the per-format SAMPLED feature is checked separately
# via Engine.format_supported(). BC is near-universal on desktop.
"texture_compression_bc": bool(features.textureCompressionBC),
"texture_compression_etc2": bool(features.textureCompressionETC2),
"texture_compression_astc_ldr": bool(features.textureCompressionASTC_LDR),
}
[docs]
def probe_physical_device_count(instance: Any) -> int:
"""Number of Vulkan-capable physical devices on this instance."""
try:
return len(vk.vkEnumeratePhysicalDevices(instance))
except Exception as exc: # noqa: BLE001 (enumeration must never crash init)
log.warning("vkEnumeratePhysicalDevices failed during capability probe: %s", exc)
return 0
[docs]
def probe_device_group(instance: Any) -> bool:
"""Whether linked device groups (``VK_KHR_device_group`` / 1.1 core) are
usable on this instance.
``vkEnumeratePhysicalDeviceGroups`` may be unresolvable through the loader on
an instance created without the right API version / extension (observed on
the dev box's Iris Xe + 1.2 loader path); we catch that and yield ``False``
rather than letting an unresolvable-proc error escape init.
"""
try:
groups = vk.vkEnumeratePhysicalDeviceGroups(instance)
except Exception as exc: # noqa: BLE001 (unresolvable proc / missing ext -> no device groups)
log.debug("vkEnumeratePhysicalDeviceGroups unavailable: %s", exc)
return False
# A group with >1 physical device is the only case that signals true linking;
# single-device groups are the trivial degenerate case (== one GPU).
return any(int(g.physicalDeviceCount) > 1 for g in groups)
[docs]
def probe_external_memory_fd(physical_device: Any) -> bool:
"""Whether ``VK_KHR_external_memory_fd`` is exposed by the device.
Gates the dma-buf zero-copy cross-device transfer path (D8 multi-GPU). The
extension must also be *enabled* at logical-device creation to be usable; the
probe only reports availability. Absent on this dev box -> the staging-copy
floor is always chosen.
"""
try:
ext_props = vk.vkEnumerateDeviceExtensionProperties(physical_device, None)
except Exception as exc: # noqa: BLE001 (enumeration must never crash init)
log.debug("vkEnumerateDeviceExtensionProperties failed: %s", exc)
return False
name = getattr(vk, "VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME", "VK_KHR_external_memory_fd")
for p in ext_props:
ext_name = p.extensionName
ext_name = ext_name if isinstance(ext_name, str) else ext_name.decode("utf-8")
if ext_name == name:
return True
return False
[docs]
def probe_dedicated_queue_families(physical_device: Any) -> tuple[int | None, int | None]:
"""Locate dedicated compute and transfer queue families.
Returns ``(dedicated_compute_qf, dedicated_transfer_qf)``:
- dedicated compute: a family with COMPUTE but **not** GRAPHICS (async compute),
- dedicated transfer: a family with TRANSFER but neither GRAPHICS nor COMPUTE
(DMA-only copy engine). COMPUTE/GRAPHICS families implicitly support
transfer, so we want the genuinely transfer-only one.
Either is ``None`` when the device exposes no such family (e.g. integrated
GPUs with a single universal queue family).
"""
props = vk.vkGetPhysicalDeviceQueueFamilyProperties(physical_device)
compute_qf: int | None = None
transfer_qf: int | None = None
for i, p in enumerate(props):
flags = p.queueFlags
is_graphics = bool(flags & vk.VK_QUEUE_GRAPHICS_BIT)
is_compute = bool(flags & vk.VK_QUEUE_COMPUTE_BIT)
is_transfer = bool(flags & vk.VK_QUEUE_TRANSFER_BIT)
if compute_qf is None and is_compute and not is_graphics:
compute_qf = i
if transfer_qf is None and is_transfer and not is_graphics and not is_compute:
transfer_qf = i
return compute_qf, transfer_qf
[docs]
def probe_gpu_timing(physical_device: Any) -> bool:
"""Whether the device supports GPU timestamp queries (``timestampPeriod > 0``)."""
limits = vk.vkGetPhysicalDeviceProperties(physical_device).limits
return float(limits.timestampPeriod) > 0.0
[docs]
@dataclass(frozen=True)
class RenderCapabilities:
"""Immutable snapshot of host + GPU render capabilities.
Probed facts only: no policy, no derived "should we" decisions. Consumers
(the renderer, the future threading/queue/multi-GPU paths) read these fields
and decide. Build via :meth:`probe` at init, or construct directly from
scalar fields in tests.
"""
# --- Host interpreter -------------------------------------------------
free_threaded: bool = False
"""GIL is disabled at runtime (``not sys._is_gil_enabled()``)."""
gil_disabled_build: bool = False
"""Interpreter was built free-threaded (``Py_GIL_DISABLED`` config var)."""
# --- Device topology --------------------------------------------------
physical_device_count: int = 1
device_group: bool = False
"""A linked device group with >1 physical device is available."""
external_memory_fd: bool = False
"""``VK_KHR_external_memory_fd`` is *available* (probed) on the selected device.
Availability only: it does not mean the extension was enabled at logical-device
creation. Use :attr:`external_memory_fd_enabled` to gate the dma-buf zero-copy
path; this field reports whether enabling it is even possible. ``False`` on this
dev box."""
external_memory_fd_enabled: bool = False
"""``VK_KHR_external_memory_fd`` was *enabled* at logical-device creation.
This is the gate the D8 multi-GPU cross-device transfer reads to select the
dma-buf zero-copy path (:mod:`simvx.graphics.gpu.multi_device`): a probed-but-
not-enabled extension cannot export/import fds, so DMABUF stays unselected and
the always-available staging-copy floor is used until the rig opts in and the
secondary + primary devices both enable it. ``False`` on this dev box (the
single-GPU path never requests it), so the dma-buf raise is never reached."""
# --- Queues -----------------------------------------------------------
dedicated_compute_qf: int | None = None
dedicated_transfer_qf: int | None = None
# --- Timing -----------------------------------------------------------
gpu_timing: bool = False
# --- Device feature bits (folded from the former _query_device_features)
multi_draw_indirect: bool = False
image_cube_array: bool = False
texture_compression_bc: bool = False
texture_compression_etc2: bool = False
texture_compression_astc_ldr: bool = False
# --- Raw GPU limits snapshot (plain dict, mirrors capabilities.js) -----
limits: dict[str, int] = field(default_factory=dict)
[docs]
def meets(
self,
*,
features: list[str] | None = None,
limits: dict[str, int] | None = None,
) -> bool:
"""Predicate mirroring the web ``Capabilities.meets()`` truth table.
``features`` are capability field names that must be truthy (e.g.
``"multi_draw_indirect"``, ``"free_threaded"``, ``"gpu_timing"``).
``limits`` maps a GPU-limit key to a minimum required value; every
requested limit must be present and ``>=`` the requested value. Empty
requirements pass. An unknown feature name fails (a typo never silently
passes), matching the web ``has()`` semantics.
"""
for f in features or []:
if not getattr(self, f, False):
return False
for k, v in (limits or {}).items():
got = self.limits.get(k)
if got is None or got < v:
return False
return True
[docs]
@classmethod
def probe(cls, instance: Any, physical_device: Any) -> RenderCapabilities:
"""Build a capability snapshot from live Vulkan handles + the host build.
Pure probing: all GPU access is delegated to the module-level
``probe_*`` helpers so this stays the single Vulkan touch point.
"""
feats = probe_device_features(physical_device)
compute_qf, transfer_qf = probe_dedicated_queue_families(physical_device)
return cls(
free_threaded=probe_free_threaded(),
gil_disabled_build=probe_gil_disabled_build(),
physical_device_count=probe_physical_device_count(instance),
device_group=probe_device_group(instance),
external_memory_fd=probe_external_memory_fd(physical_device),
dedicated_compute_qf=compute_qf,
dedicated_transfer_qf=transfer_qf,
gpu_timing=probe_gpu_timing(physical_device),
multi_draw_indirect=feats["multi_draw_indirect"],
image_cube_array=feats["image_cube_array"],
texture_compression_bc=feats["texture_compression_bc"],
texture_compression_etc2=feats["texture_compression_etc2"],
texture_compression_astc_ldr=feats["texture_compression_astc_ldr"],
)