Source code for simvx.graphics.renderer.bindless_draw2d_pass

"""Bindless co-batched 2D pass for the item pipeline (design §3 Decision D, P3b).

The main-framebuffer 2D submit. It carries ``texture_id`` and the ``is_msdf``
flag **per vertex** and draws through ONE unified pipeline (``ui2d.vert`` /
``ui2d.frag``). So a run of consecutive items sharing only ``(clip, blend)`` --
different-texture sprites *and* glyph runs *and* untextured fills -- collapses
into a single ``vkCmdDrawIndexed`` that selects the texture per primitive via
``ui_textures[nonuniformEXT(tex_id)]`` and branches on the ``is_msdf`` bit
(linear sample vs median-MSDF decode, the latter ported verbatim from
``text.frag`` so glyph AA is pixel-identical).

The MSDF atlas is registered into the engine's bindless ``ui_textures[]`` array
(the same array sprites use), so a glyph item just references that slot like any
other textured item. The atlas slot is refreshed when the atlas version bumps
(the view is recreated on re-upload).
"""

from __future__ import annotations

import logging
from typing import Any, NamedTuple

import numpy as np
import vulkan as vk

from ..gpu.memory import create_buffer, upload_numpy
from ..types import SHADER_DIR

__all__ = ["BindlessDraw2DPass", "BindlessBatch"]

log = logging.getLogger(__name__)

# Geometric-growth arena (design §2.7): no fixed silent-truncation cap. Start
# modest, double at the frame boundary if a scene needs more.
_INITIAL_VERTS = 65536
_INITIAL_INDICES = 98304
# 40-byte UI2D vertex.
_VERTEX_STRIDE = 40
# Push: vec2 screen_size + float px_range + float pad = 16 bytes (VS + FS).
_PUSH_SIZE = 16
# is_msdf bit in the per-vertex flags column (matches ui2d.frag FLAG_IS_MSDF).
FLAG_IS_MSDF = 1


[docs] class BindlessBatch(NamedTuple): """One co-batched draw: a contiguous range under a (topology, clip, blend) scope. ``clip`` is the scissor rect (or ``None`` for full-screen); ``blend`` is the blend-mode string selecting the per-blend pipeline. The texture and is_msdf selection are PER VERTEX, so a single triangle batch covers many textures (different-texture sprites + MSDF glyph runs + untextured fills). Triangle batches (``line=False``) are indexed draws: ``vert_offset`` is the base vertex, ``idx_offset``/``count`` the index range. Line batches (``line=True``) are non-indexed: ``vert_offset`` is the first vertex and ``count`` the vertex count in the shared line vertex buffer. Lines use a different topology so they cannot merge with triangles; they break a run and render through the line pipeline, preserving painter order. """ clip: tuple[int, int, int, int] | None blend: str vert_offset: int idx_offset: int count: int line: bool = False
[docs] class BindlessDraw2DPass: """GPU pass that draws bindless co-batched 2D geometry (design §3 D). Owns the unified ``ui2d`` pipeline (one per blend mode), one extended-vertex buffer + index buffer (host-visible, growable arena), and binds the engine's bindless texture descriptor set (the same ``ui_textures[]`` sprites use). The MSDF atlas is registered into that array via :meth:`sync_atlas_slot`. """ __slots__ = ( "_engine", "_text_pass", "_pipelines", "_pipeline_layout", "_line_pipeline", "_line_pipeline_layout", "_vert_module", "_frag_module", "_vb", "_vb_mem", "_vb_capacity", "_ib", "_ib_mem", "_ib_capacity", "_line_vb", "_line_vb_mem", "_line_vb_capacity", "_atlas_slot", "_atlas_view", "_ready", "last_frame_draw_count", ) def __init__(self, engine: Any, text_pass: Any = None) -> None: for slot in self.__slots__: object.__setattr__(self, slot, None) self._engine = engine self._text_pass = text_pass self._ready = False self._atlas_slot = -1 self.last_frame_draw_count = 0
[docs] def setup(self, render_pass: Any = None, extent: tuple[int, int] | None = None) -> None: e = self._engine device = e.ctx.device phys = e.ctx.physical_device rp = render_pass or e.render_pass ext = extent or e.extent from ..gpu.pipeline import ( UI2D_VERTEX_ATTRS, UI2D_VERTEX_STRIDE, PipelineSpec, build_pipeline, create_shader_module, ) from ..materials.shader_compiler import compile_shader self._vert_module = create_shader_module(device, compile_shader(SHADER_DIR / "ui2d.vert")) self._frag_module = create_shader_module(device, compile_shader(SHADER_DIR / "ui2d.frag")) # The unified pipeline binds the engine's bindless texture array at set 0 # (the same array ui.frag uses for sprites). One pipeline per blend mode; # a run that changes blend rebinds, but the texture/msdf selection is per # vertex so a texture change never splits a run. tex_layout = e.texture_descriptor_layout self._pipelines = {} for blend in ("alpha", "add", "multiply"): spec = PipelineSpec( name=f"ui2d_bindless_{blend}", topology=vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, vertex_stride=UI2D_VERTEX_STRIDE, vertex_attrs=UI2D_VERTEX_ATTRS, cull_mode=vk.VK_CULL_MODE_NONE, depth_test=False, depth_write=False, blend=blend, dst_alpha_factor=vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, set_layouts=(tex_layout,), push_size=_PUSH_SIZE, ) self._pipelines[blend] = build_pipeline( device, spec, rp, ext, vert_module=self._vert_module, frag_module=self._frag_module, ) self._pipeline_layout = self._pipelines["alpha"][1] # Line pipeline: same unified ui2d shaders + vertex format, LINE_LIST # topology, always alpha (line outlines are untextured: tex_id = -1 in the # frag's solid-colour branch). A separate pipeline because triangles and # lines can't share a draw; ordering is preserved by breaking the run. line_spec = PipelineSpec( name="ui2d_bindless_line", topology=vk.VK_PRIMITIVE_TOPOLOGY_LINE_LIST, vertex_stride=UI2D_VERTEX_STRIDE, vertex_attrs=UI2D_VERTEX_ATTRS, cull_mode=vk.VK_CULL_MODE_NONE, depth_test=False, depth_write=False, blend="alpha", dst_alpha_factor=vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, set_layouts=(tex_layout,), push_size=_PUSH_SIZE, ) self._line_pipeline, self._line_pipeline_layout = build_pipeline( device, line_spec, rp, ext, vert_module=self._vert_module, frag_module=self._frag_module, ) host_flags = ( vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ) self._vb_capacity = _INITIAL_VERTS self._ib_capacity = _INITIAL_INDICES self._vb, self._vb_mem = create_buffer( device, phys, self._vb_capacity * _VERTEX_STRIDE, vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, host_flags, ) self._ib, self._ib_mem = create_buffer( device, phys, self._ib_capacity * 4, vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT, host_flags, ) self._line_vb_capacity = _INITIAL_VERTS self._line_vb, self._line_vb_mem = create_buffer( device, phys, self._line_vb_capacity * _VERTEX_STRIDE, vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, host_flags, ) self._ready = True
[docs] def sync_atlas_slot(self) -> int: """Register / refresh the MSDF atlas in the bindless array; return its slot. The glyph items reference this slot as their per-vertex ``tex_id``. The atlas view is recreated when the atlas version bumps (re-upload), so the bindless descriptor is rewritten to the new view while keeping the same slot id stable (so already-built geometry stays valid). """ tp = self._text_pass if tp is None: return -1 view = getattr(tp, "_atlas_view", None) if view is None: return self._atlas_slot if self._atlas_slot < 0: # MSDF atlas needs LINEAR filtering for the SDF decode (text.frag uses # a linear sampler), which is the bindless default. self._atlas_slot = self._engine.register_texture(view, filter="linear") self._atlas_view = view elif view != self._atlas_view: self._engine.update_texture(self._atlas_slot, view) self._atlas_view = view return self._atlas_slot
[docs] @property def atlas_slot(self) -> int: """The bindless slot of the MSDF atlas (-1 until :meth:`sync_atlas_slot`).""" return self._atlas_slot
[docs] def set_atlas_slot(self, slot: int) -> None: """Borrow an already-registered MSDF atlas slot (N1, the HDR-lane pass). The HDR-target 2D pass and the swapchain 2D pass bind the SAME engine bindless ``ui_textures[]`` descriptor set, so the atlas slot index is valid in both. Only the swapchain pass owns the registration (calls :meth:`sync_atlas_slot`); the HDR pass mirrors its slot here each frame to avoid a redundant second descriptor write for the same atlas view. """ self._atlas_slot = slot
def _grow_buffer(self, buf: Any, mem: Any, cap: int, needed: int, usage: int, stride: int): """Reallocate a host-visible arena buffer to fit ``needed`` (geometric).""" device = self._engine.ctx.device phys = self._engine.ctx.physical_device host_flags = ( vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ) new_cap = cap while new_cap < needed: new_cap *= 2 vk.vkDeviceWaitIdle(device) vk.vkDestroyBuffer(device, buf, None) vk.vkFreeMemory(device, mem, None) new_buf, new_mem = create_buffer(device, phys, new_cap * stride, usage, host_flags) return new_buf, new_mem, new_cap def _ensure_capacity(self, n_verts: int, n_indices: int, n_line_verts: int) -> None: """Grow the arena buffers geometrically if a frame needs more (design §2.7).""" if n_verts > self._vb_capacity: self._vb, self._vb_mem, self._vb_capacity = self._grow_buffer( self._vb, self._vb_mem, self._vb_capacity, n_verts, vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, _VERTEX_STRIDE, ) if n_indices > self._ib_capacity: self._ib, self._ib_mem, self._ib_capacity = self._grow_buffer( self._ib, self._ib_mem, self._ib_capacity, n_indices, vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 4, ) if n_line_verts > self._line_vb_capacity: self._line_vb, self._line_vb_mem, self._line_vb_capacity = self._grow_buffer( self._line_vb, self._line_vb_mem, self._line_vb_capacity, n_line_verts, vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, _VERTEX_STRIDE, )
[docs] def render( self, cmd: Any, width: int, height: int, ui_width: int, ui_height: int, *, verts: np.ndarray, indices: np.ndarray, line_verts: np.ndarray, batches: list[BindlessBatch], ) -> None: """Upload the co-batched geometry and issue one draw per batch. ``verts`` is a contiguous :data:`UI2D_VERTEX_DTYPE` array of triangle vertices (all batches concatenated, camera already applied), ``indices`` the matching uint32 index stream, ``line_verts`` the (separate) line vertices, ``batches`` the ordered per-(topology, clip, blend) runs. A triangle batch is ONE ``vkCmdDrawIndexed`` regardless of how many textures it touches; a line batch is ONE ``vkCmdDraw``. """ if not self._ready or not batches: self.last_frame_draw_count = 0 return device = self._engine.ctx.device tex_desc = self._engine.texture_descriptor_set if not tex_desc: self.last_frame_draw_count = 0 return uw = ui_width or width uh = ui_height or height self._ensure_capacity(len(verts), len(indices), len(line_verts)) if len(verts): upload_numpy(device, self._vb_mem, verts) if len(indices): upload_numpy(device, self._ib_mem, indices) if len(line_verts): upload_numpy(device, self._line_vb_mem, line_verts) px_range = float(self._text_pass.px_range) if self._text_pass is not None else 4.0 push = np.array([uw, uh, px_range, 0.0], dtype=np.float32).tobytes() vk_viewport = vk.VkViewport( x=0.0, y=0.0, width=float(width), height=float(height), minDepth=0.0, maxDepth=1.0, ) full_scissor = vk.VkRect2D( offset=vk.VkOffset2D(x=0, y=0), extent=vk.VkExtent2D(width=width, height=height), ) clip_sx = width / uw if uw > 0 else 1.0 clip_sy = height / uh if uh > 0 else 1.0 sentinel = object() # Bind state tracked across batches. ``last_kind`` is "tri" / "line" so a # topology switch rebinds the pipeline; a triangle blend change also # rebinds (lines are always alpha). last_kind = "" last_blend = "" last_clip: Any = sentinel draw_count = 0 for clip, blend, vert_off, idx_off, count, is_line in batches: if count == 0: continue kind = "line" if is_line else "tri" need_rebind = kind != last_kind or (not is_line and blend != last_blend) if need_rebind: if is_line: pipe, layout = self._line_pipeline, self._line_pipeline_layout vbuf = self._line_vb else: pipe, layout = self._pipelines[blend] vbuf = self._vb vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, pipe) vk.vkCmdBindDescriptorSets( cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, 1, [tex_desc], 0, None, ) self._engine.push_constants(cmd, layout, push) vk.vkCmdBindVertexBuffers(cmd, 0, 1, [vbuf], [0]) if not is_line: vk.vkCmdBindIndexBuffer(cmd, self._ib, 0, vk.VK_INDEX_TYPE_UINT32) vk.vkCmdSetViewport(cmd, 0, 1, [vk_viewport]) last_clip = sentinel last_kind = kind last_blend = blend if not is_line else last_blend if clip != last_clip: if clip is not None: scissor = vk.VkRect2D( offset=vk.VkOffset2D( x=int(clip[0] * clip_sx), y=int(clip[1] * clip_sy), ), extent=vk.VkExtent2D( width=int(clip[2] * clip_sx), height=int(clip[3] * clip_sy), ), ) else: scissor = full_scissor vk.vkCmdSetScissor(cmd, 0, 1, [scissor]) last_clip = clip if is_line: vk.vkCmdDraw(cmd, count, 1, vert_off, 0) else: vk.vkCmdDrawIndexed(cmd, count, 1, idx_off, vert_off, 0) draw_count += 1 self.last_frame_draw_count = draw_count
[docs] def cleanup(self) -> None: if not self._ready: return device = self._engine.ctx.device for pipe, layout in (self._pipelines or {}).values(): vk.vkDestroyPipeline(device, pipe, None) vk.vkDestroyPipelineLayout(device, layout, None) for obj, fn in [ (self._line_pipeline, vk.vkDestroyPipeline), (self._line_pipeline_layout, vk.vkDestroyPipelineLayout), (self._vert_module, vk.vkDestroyShaderModule), (self._frag_module, vk.vkDestroyShaderModule), (self._vb, vk.vkDestroyBuffer), (self._ib, vk.vkDestroyBuffer), (self._line_vb, vk.vkDestroyBuffer), ]: if obj: fn(device, obj, None) for mem in (self._vb_mem, self._ib_mem, self._line_vb_mem): if mem: vk.vkFreeMemory(device, mem, None) if self._atlas_slot >= 0: self._engine.unregister_texture(self._atlas_slot) self._atlas_slot = -1 self._ready = False