"""Secondary-engine facade for the D8 explicit-multi-adapter (multi-GPU) renderer.
The default :class:`~simvx.graphics.renderer.forward.Renderer` and every resource
manager it owns (``PipelineManager`` / ``BufferManager`` / ``PassOrchestrator`` /
``SceneContentRenderer`` / ``mesh_registry`` / the texture system) read their GPU
handles off **one** engine object: ``engine.ctx.device``, ``engine.mesh_registry``,
``engine.texture_manager`` and so on. A ``VkPipeline`` / ``VkBuffer`` / descriptor
created on one ``VkDevice`` cannot be used on another, so to render an offloaded
SubViewport SRU on a *secondary* GPU we need a second, fully independent renderer
whose resources live on the secondary device.
Option A (the one implemented here) is to **duplicate the renderer via an engine
facade**: rather than refactor the renderer to take a device, we give it a
lightweight object that quacks exactly like the bits of ``Engine`` the renderer
reads, but backed by a secondary :class:`~.multi_device.DeviceSlot`. Then
``Renderer(facade)`` + ``facade.attach_renderer()`` + ``renderer.setup()`` builds
all GPU resources on the *secondary* device, reusing every line of existing
renderer code unchanged. Nothing on the single-GPU path constructs a facade, so
that path is byte-identical to today.
Engine attribute surface the renderer + its managers + the passes + scene_adapter
read off the engine object (grep-derived from ``forward.py`` /
``pipeline_manager.py`` / ``buffer_manager.py`` / ``pass_orchestrator.py`` /
``scene_renderer.py`` / ``mesh_registry.py`` / ``scene_adapter.py``):
========================== ================================================
attribute how the facade provides it
========================== ================================================
``ctx`` a secondary :class:`~.context.GPUContext` (device +
queues + a ``CommandContext`` on the secondary device)
``ctx.device`` secondary ``VkDevice`` (the single most-read handle)
``ctx.physical_device`` secondary ``VkPhysicalDevice``
``ctx.graphics_queue`` secondary graphics queue
``ctx.command_pool`` secondary command pool (via ``cmd_ctx``)
``mesh_registry`` per-device :class:`~..renderer.mesh_registry.MeshRegistry`
``texture_manager`` per-device :class:`~..materials.texture.TextureManager`
(the facade itself is the registrar)
``capabilities`` the shared probed snapshot (read-only facts)
``_has_mdi`` mirrored from capabilities
``shader_dir`` shared ``SHADER_DIR`` (SPIR-V is device-independent)
``content_scale`` mirrored from the primary engine (SRU sizing)
``extent`` the offscreen SRU size (no swapchain on a secondary)
``render_pass`` a secondary offscreen-compatible ``VkRenderPass``
``push_constants`` records ``vkCmdPushConstants`` (device-independent)
``register_texture`` / bindless texture registrar methods, recorded on the
``upload_texture_pixels`` / secondary device's descriptor set
``update_texture`` /
``unregister_texture``
``texture_descriptor_set`` secondary bindless descriptor set
``texture_descriptor_layout`` secondary bindless descriptor layout
``current_timestamp_pool`` ``None`` (per-pass GPU timing is optional)
``renderer`` the secondary :class:`Renderer` once attached
``capture_frame`` raises (a secondary never owns the swapchain)
``_window`` / ``_sync`` ``None`` (a secondary never presents)
========================== ================================================
Honest scope: this module builds the facade + per-device residency helpers and is
covered by GPU-free unit tests asserting the full attribute surface. The actual
``Renderer(facade).setup()`` GPU build + the offload-record-and-composite loop run
on the 4x Arc Pro B70 rig; they cannot be functionally verified on this single-GPU
box. Textured-SRU residency mirrors the bindless uploads the SRU samples; a
no-texture / vertex-colour SRU is the minimal first rig case and needs no texture
mirroring (see :class:`SecondaryResidency`).
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any
import numpy as np
import vulkan as vk
from ..types import SHADER_DIR
from .commands import CommandContext
from .context import GPUContext
if TYPE_CHECKING:
from ..renderer.mesh_registry import MeshRegistry
from .capabilities import RenderCapabilities
from .multi_device import DeviceSlot
log = logging.getLogger(__name__)
__all__ = ["SecondaryRenderContext", "SecondaryResidency"]
[docs]
class SecondaryRenderContext:
"""Engine-like facade backed by a secondary :class:`DeviceSlot`.
Exposes exactly the attribute surface the renderer + its managers + the passes
+ scene_adapter read off ``Engine`` (see the module docstring table), but every
GPU handle is the *secondary* device's. ``Renderer(facade)`` therefore builds
all its pipelines / buffers / descriptors on the secondary device, reusing the
existing renderer code unchanged.
Construction does **not** touch the primary device: it only reads scalar facts
(``content_scale``, ``capabilities``) from the primary engine and creates a
``CommandContext`` + a :class:`GPUContext` on the secondary slot's device. The
expensive GPU build (pipelines, descriptor pools, SSBOs) happens later in
``Renderer(facade).setup()``, on the rig.
"""
def __init__(
self,
slot: DeviceSlot,
*,
capabilities: RenderCapabilities | None,
content_scale: tuple[float, float] = (1.0, 1.0),
extent: tuple[int, int] | None = None,
render_pass: Any = None,
max_textures: int = 4096,
) -> None:
self._slot = slot
self._capabilities = capabilities
self._content_scale = content_scale
# A secondary renders only offscreen SRUs; its "extent" is the SRU target
# size, set per offload. There is no swapchain on a secondary device.
self._extent = extent
# The offscreen-compatible render pass 3D pipelines compile against on the
# secondary device. Created on the rig (device-bound); None until then.
self._render_pass = render_pass
self._max_textures = max_textures
# multiDrawIndirect mirrors the shared capability snapshot: the draw path
# is identical on both devices when both expose MDI (the rig does).
self._has_mdi = bool(getattr(capabilities, "multi_draw_indirect", True))
# A CommandContext on the SECONDARY device (its own pool). The secondary's
# queue family index comes from the slot's resolved QueueFamilies. The pool
# itself is created lazily (:meth:`ensure_command_pool`) the first time the
# offload path needs to record on the secondary, so plain construction stays
# GPU-free (it must not touch any device).
self._cmd_ctx = CommandContext(slot.device, slot.queue_families.graphics)
# The secondary GPUContext: the single object the renderer reads device +
# queues off. ``multi_gpu`` is False here on purpose: a secondary renders a
# plain single-device offscreen frame; it never itself splits work further.
self._ctx = GPUContext(
device=slot.device,
physical_device=slot.physical_device,
graphics_queue=slot.graphics_queue,
present_queue=None,
graphics_qf=slot.queue_families.graphics,
cmd_ctx=self._cmd_ctx,
compute_queue=slot.compute_queue,
transfer_queue=slot.transfer_queue,
multi_gpu=False,
device_count=1,
)
# Lazily-built per-device subsystems (mirror Engine's lazy properties).
self._mesh_registry: MeshRegistry | None = None
self._texture_manager: Any = None
self._renderer: Any = None
# Bindless texture system on the secondary device (lazy, mirrors Engine).
self._texture_descriptor_pool: Any = None
self._texture_descriptor_layout: Any = None
self._texture_descriptor_set: Any = None
self._default_sampler: Any = None
self._nearest_sampler: Any = None
self._user_samplers: list[Any] = []
self._user_images: list[tuple[Any, Any, Any]] = []
self._next_texture_index = 0
self._free_texture_slots: list[int] = []
self._texture_sizes: dict[int, tuple[int, int]] = {}
# -- pool the engine attribute surface (see module docstring table) ---------
[docs]
@property
def ctx(self) -> GPUContext:
return self._ctx
[docs]
@property
def capabilities(self) -> RenderCapabilities | None:
return self._capabilities
[docs]
@property
def content_scale(self) -> tuple[float, float]:
return self._content_scale
@property
def extent(self) -> tuple[int, int] | None:
return self._extent
[docs]
@extent.setter
def extent(self, value: tuple[int, int] | None) -> None:
# Set per offloaded SRU: a secondary's "extent" is the SRU target size.
self._extent = value
[docs]
@property
def shader_dir(self) -> Any:
# SPIR-V is device-independent; both devices compile the same shaders.
return SHADER_DIR
@property
def render_pass(self) -> Any:
return self._render_pass
[docs]
@render_pass.setter
def render_pass(self, value: Any) -> None:
self._render_pass = value
[docs]
@property
def current_timestamp_pool(self) -> Any:
# Per-pass GPU timing is optional and primary-only; a secondary skips it.
return None
@property
def _window(self) -> Any:
# A secondary never presents, so it owns no window.
return None
@property
def _sync(self) -> Any:
# No swapchain on a secondary => no per-frame swapchain sync.
return None
[docs]
@property
def mesh_registry(self) -> MeshRegistry:
"""Per-device mesh registry (lazy), mirroring ``Engine.mesh_registry``."""
if self._mesh_registry is None:
from ..renderer.mesh_registry import MeshRegistry
self._mesh_registry = MeshRegistry(self._slot.device, self._slot.physical_device)
return self._mesh_registry
[docs]
@property
def texture_manager(self) -> Any:
"""Per-device bindless texture manager (lazy), mirroring ``Engine.texture_manager``.
The facade is its own registrar (it implements ``register_texture`` /
``upload_texture_pixels`` on the secondary device), so the manager caches
+ resolves against secondary-device bindless slots.
"""
if self._texture_manager is None:
from ..materials.texture import TextureManager
self._texture_manager = TextureManager(self)
return self._texture_manager
[docs]
@property
def renderer(self) -> Any:
"""The secondary :class:`Renderer` once bound via :meth:`attach_renderer`.
Unlike ``Engine.renderer`` this does NOT lazily create a renderer: the
offload coordinator constructs + sets up the secondary renderer explicitly
(it is a heavy, rig-only GPU build) and binds it here.
"""
return self._renderer
[docs]
def attach_renderer(self, renderer: Any) -> None:
"""Bind the secondary :class:`Renderer` built against this facade."""
self._renderer = renderer
[docs]
def ensure_command_pool(self) -> Any:
"""Create the secondary device's command pool on first use, return it.
Construction is GPU-free, so the pool is built here the first time the
offload path needs to record on the secondary (the secondary ``Renderer``
build + every one-shot layout transition read ``ctx.command_pool``). Rig-
side; never reached on the single-GPU path (no facade is constructed).
"""
if self._cmd_ctx.pool is None:
self._cmd_ctx.create_pool()
return self._cmd_ctx.pool
[docs]
def ensure_offscreen_render_pass(self) -> Any:
"""Create + own an SRU-compatible offscreen render pass on first use.
The secondary ``Renderer``'s pipelines are built (in ``setup()``) against
``facade.render_pass``, so it MUST exist BEFORE the renderer factory runs,
and MUST be format-compatible with the per-SRU ``RenderTarget`` created
later (R16G16B16A16_SFLOAT colour + D32 depth, samplable). Pipelines use
dynamic viewport/scissor, so the placeholder extent does not bind them to a
size and the same pipelines serve any SRU size. Rig-side; never reached on
the single-GPU path (no facade is constructed).
"""
if self._render_pass is None:
from ..renderer.passes import create_offscreen_pass
self._render_pass = create_offscreen_pass(
self._slot.device, vk.VK_FORMAT_R16G16B16A16_SFLOAT,
vk.VK_FORMAT_D32_SFLOAT, samplable_depth=True,
)
self._owns_render_pass = True
if self._extent is None:
self._extent = (16, 16)
return self._render_pass
# -- texture registrar surface (mirrors Engine; secondary-device handles) ---
[docs]
@property
def texture_descriptor_layout(self) -> Any:
if not self._texture_descriptor_layout:
self._init_texture_system()
return self._texture_descriptor_layout
[docs]
@property
def texture_descriptor_set(self) -> Any:
return self._texture_descriptor_set
def _init_texture_system(self) -> None:
"""Build the secondary device's bindless descriptor pool / layout / set.
Mirrors ``Engine._init_texture_system`` but on the secondary device. Rig-
side (GPU): on this single-GPU box no facade is constructed so this never
runs.
"""
from .descriptors import (
allocate_descriptor_set,
create_texture_descriptor_layout,
create_texture_descriptor_pool,
)
from .memory import create_sampler
dev = self._slot.device
self._texture_descriptor_pool = create_texture_descriptor_pool(dev, self._max_textures)
self._texture_descriptor_layout = create_texture_descriptor_layout(dev, self._max_textures)
self._texture_descriptor_set = allocate_descriptor_set(
dev, self._texture_descriptor_pool, self._texture_descriptor_layout
)
self._default_sampler = create_sampler(dev)
self._user_samplers.append(self._default_sampler)
[docs]
def register_texture(self, image_view: Any, *, filter: str = "linear", mip_count: int = 1) -> int:
"""Register a bindless texture on the SECONDARY device. Mirrors ``Engine.register_texture``."""
from .descriptors import write_texture_descriptor
from .memory import create_sampler
if not self._texture_descriptor_set:
self._init_texture_system()
if filter == "nearest":
if self._nearest_sampler is None:
self._nearest_sampler = create_sampler(self._slot.device, filter_mode=vk.VK_FILTER_NEAREST)
self._user_samplers.append(self._nearest_sampler)
sampler = self._nearest_sampler
else:
sampler = self._default_sampler
if self._free_texture_slots:
idx = self._free_texture_slots.pop()
else:
idx = self._next_texture_index
self._next_texture_index += 1
write_texture_descriptor(self._slot.device, self._texture_descriptor_set, idx, image_view, sampler)
return idx
[docs]
def upload_texture_pixels(self, pixels: np.ndarray, width: int, height: int, *, filter: str = "linear") -> int:
"""Upload RGBA pixels to the SECONDARY device. Mirrors ``Engine.upload_texture_pixels``."""
from .memory import upload_image_data
image, memory = upload_image_data(
self._slot.device, self._slot.physical_device, self._slot.graphics_queue,
self._cmd_ctx.pool, np.ascontiguousarray(pixels), width, height,
)
view_info = vk.VkImageViewCreateInfo(
image=image,
viewType=vk.VK_IMAGE_VIEW_TYPE_2D,
format=vk.VK_FORMAT_R8G8B8A8_UNORM,
subresourceRange=vk.VkImageSubresourceRange(
aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT,
baseMipLevel=0, levelCount=1, baseArrayLayer=0, layerCount=1,
),
)
image_view = vk.vkCreateImageView(self._slot.device, view_info, None)
tex_idx = self.register_texture(image_view, filter=filter)
self._user_images.append((image, memory, image_view))
self._texture_sizes[tex_idx] = (width, height)
return tex_idx
[docs]
def update_texture(self, slot: int, image_view: Any) -> None:
"""Rewrite a bindless slot on the SECONDARY device. Mirrors ``Engine.update_texture``."""
from .descriptors import write_texture_descriptor
if not self._texture_descriptor_set:
return
write_texture_descriptor(
self._slot.device, self._texture_descriptor_set, slot, image_view, self._default_sampler
)
[docs]
def unregister_texture(self, slot: int) -> None:
"""Free a bindless slot for reuse. Mirrors ``Engine.unregister_texture``."""
if slot < 0:
return
self._texture_sizes.pop(slot, None)
if slot not in self._free_texture_slots:
self._free_texture_slots.append(slot)
# -- command recording surface (device-independent) -------------------------
[docs]
def push_constants(self, cmd: Any, pipeline_layout: Any, data: bytes | bytearray) -> None:
"""Record ``vkCmdPushConstants``. Identical to ``Engine.push_constants``.
Push constants are recorded into a command buffer, not bound to a device
object, so the implementation is device-independent: the secondary's cmd
buffer + its pipeline layout flow in from the secondary renderer.
"""
ffi = vk.ffi
cbuf = ffi.new("char[]", data)
vk._vulkan.lib.vkCmdPushConstants(
cmd,
pipeline_layout,
vk.VK_SHADER_STAGE_VERTEX_BIT | vk.VK_SHADER_STAGE_FRAGMENT_BIT,
0,
len(data),
cbuf,
)
[docs]
def capture_frame(self) -> Any:
"""A secondary never owns the swapchain: capture is a primary-only op."""
raise NotImplementedError(
"capture_frame is a primary-device (swapchain) operation; a secondary "
"renders offscreen SRUs only. Capture on the primary after compositing."
)
# -- teardown ---------------------------------------------------------------
[docs]
def destroy(self) -> None:
"""Free the secondary-device resources this facade owns (not the device).
The secondary ``VkDevice`` itself is owned + destroyed by
:class:`~.multi_device.MultiDeviceManager`; this only releases the command
pool, samplers, descriptor pool, uploaded images, and mesh registry the
facade created on that device.
"""
dev = self._slot.device
if dev is None:
return
if self._renderer is not None and hasattr(self._renderer, "cleanup"):
self._renderer.cleanup()
self._renderer = None
if getattr(self, "_owns_render_pass", False) and self._render_pass is not None:
vk.vkDestroyRenderPass(dev, self._render_pass, None)
self._render_pass = None
self._owns_render_pass = False
for _img, mem, view in self._user_images:
vk.vkDestroyImageView(dev, view, None)
vk.vkDestroyImage(dev, _img, None)
vk.vkFreeMemory(dev, mem, None)
self._user_images.clear()
for s in self._user_samplers:
vk.vkDestroySampler(dev, s, None)
self._user_samplers.clear()
self._nearest_sampler = None
self._default_sampler = None
if self._texture_descriptor_pool is not None:
vk.vkDestroyDescriptorPool(dev, self._texture_descriptor_pool, None)
self._texture_descriptor_pool = None
if self._texture_descriptor_layout is not None:
vk.vkDestroyDescriptorSetLayout(dev, self._texture_descriptor_layout, None)
self._texture_descriptor_layout = None
self._texture_descriptor_set = None
if self._mesh_registry is not None:
self._mesh_registry.destroy()
self._mesh_registry = None
self._cmd_ctx.destroy()
[docs]
class SecondaryResidency:
"""Mirrors to a secondary device the meshes + textures an offloaded SRU needs.
A ``VkBuffer`` / image / descriptor created on the primary device cannot be
used on a secondary, so an offloaded SRU's geometry (vertex/index buffers via
the per-device :class:`~..renderer.mesh_registry.MeshRegistry`) and any textures
it samples (re-uploaded into the secondary's bindless table) must be made
resident on the secondary device before the SRU is recorded there.
Two cases:
* **No-texture / vertex-colour SRU** (the minimal first rig case): only the
meshes need mirroring; :meth:`ensure_meshes` does that and nothing else is
required. ``samples_textures`` is ``False``.
* **Textured SRU**: additionally the bindless uploads the SRU samples must be
mirrored via :meth:`ensure_textures`. This requires the source pixels: the
primary's :class:`~..materials.texture.TextureManager` retains them only when
constructed with ``retain_pixels=True`` (the web path) and NOT on desktop, so
full desktop textured-SRU residency is FLAGGED as the remaining step (see
:meth:`ensure_textures`). The structure + mesh path are complete and tested.
The mirror is keyed by the same source identity the primary uses (mesh ``id``,
texture source key) so a mesh/texture shared across SRUs uploads once per
secondary device.
"""
def __init__(self, facade: SecondaryRenderContext) -> None:
self._facade = facade
# primary mesh handle id -> secondary MeshHandle (uploaded once per device)
self._mesh_map: dict[int, Any] = {}
# primary tex_id -> secondary tex_id
self._tex_map: dict[int, int] = {}
[docs]
def ensure_meshes(self, meshes: list[tuple[int, np.ndarray, np.ndarray]]) -> dict[int, Any]:
"""Upload (id, vertices, indices) tuples to the secondary mesh registry.
Returns a map ``primary_mesh_id -> secondary MeshHandle``. Meshes already
resident (same id) are reused, so a shared mesh uploads once per device.
``vertices`` / ``indices`` are the same numpy arrays the primary registered;
they are device-independent CPU data, re-uploaded into secondary GPU
buffers via the per-device :class:`MeshRegistry`.
"""
registry = self._facade.mesh_registry
for mesh_id, vertices, indices in meshes:
if mesh_id in self._mesh_map:
continue
self._mesh_map[mesh_id] = registry.register(vertices, indices)
return self._mesh_map
[docs]
def secondary_mesh(self, primary_mesh_id: int) -> Any | None:
"""The secondary :class:`MeshHandle` for a primary mesh id, or ``None``."""
return self._mesh_map.get(primary_mesh_id)
[docs]
def ensure_textures(self, textures: list[tuple[int, np.ndarray, int, int]]) -> dict[int, int]:
"""Mirror an SRU's sampled textures into the secondary bindless table.
``textures`` is ``(primary_tex_id, rgba_pixels, width, height)`` per source
the SRU samples. Each is uploaded to the secondary device's bindless table
(once per device, keyed by primary tex id) and the returned map gives the
secondary slot to rewrite material texture ids against.
RESIDENCY GAP (flagged): the desktop ``TextureManager`` does NOT retain
source pixels (``retain_pixels=False``), so the caller cannot in general
recover ``rgba_pixels`` for an arbitrary already-uploaded primary texture.
The remaining step for full desktop textured-SRU residency is to capture
the source pixels at primary-upload time (or read them back from the
primary image) and feed them here. Until then, route only no-texture /
vertex-colour SRUs to a secondary (``samples_textures`` False); a textured
SRU stays on the primary. This method itself is complete: given the pixels
it mirrors correctly, and is unit-tested with a fake uploader.
"""
for tex_id, pixels, width, height in textures:
if tex_id in self._tex_map:
continue
self._tex_map[tex_id] = self._facade.upload_texture_pixels(pixels, width, height)
return self._tex_map
[docs]
def secondary_texture(self, primary_tex_id: int) -> int | None:
"""The secondary bindless slot for a primary tex id, or ``None``."""
return self._tex_map.get(primary_tex_id)