"""Bindless co-batched 2D pass for the item pipeline (design §3 Decision D, P3b).
The main-framebuffer 2D submit. It carries ``texture_id`` and the ``is_msdf``
flag **per vertex** and draws through ONE unified pipeline (``ui2d.vert`` /
``ui2d.frag``). So a run of consecutive items sharing only ``(clip, blend)`` --
different-texture sprites *and* glyph runs *and* untextured fills -- collapses
into a single ``vkCmdDrawIndexed`` that selects the texture per primitive via
``ui_textures[nonuniformEXT(tex_id)]`` and branches on the ``is_msdf`` bit
(linear sample vs median-MSDF decode, the latter ported verbatim from
``text.frag`` so glyph AA is pixel-identical).
The MSDF atlas is registered into the engine's bindless ``ui_textures[]`` array
(the same array sprites use), so a glyph item just references that slot like any
other textured item. The atlas slot is refreshed when the atlas version bumps
(the view is recreated on re-upload).
"""
from __future__ import annotations
import logging
from typing import Any, NamedTuple
import numpy as np
import vulkan as vk
from ..gpu.memory import create_buffer, upload_numpy
from ..types import SHADER_DIR
__all__ = ["BindlessDraw2DPass", "BindlessBatch"]
log = logging.getLogger(__name__)
# Geometric-growth arena (design §2.7): no fixed silent-truncation cap. Start
# modest, double at the frame boundary if a scene needs more.
_INITIAL_VERTS = 65536
_INITIAL_INDICES = 98304
# 40-byte UI2D vertex.
_VERTEX_STRIDE = 40
# Push: vec2 screen_size + float px_range + float pad = 16 bytes (VS + FS).
_PUSH_SIZE = 16
# is_msdf bit in the per-vertex flags column (matches ui2d.frag FLAG_IS_MSDF).
FLAG_IS_MSDF = 1
[docs]
class BindlessBatch(NamedTuple):
"""One co-batched draw: a contiguous range under a (topology, clip, blend) scope.
``clip`` is the scissor rect (or ``None`` for full-screen); ``blend`` is the
blend-mode string selecting the per-blend pipeline. The texture and is_msdf
selection are PER VERTEX, so a single triangle batch covers many textures
(different-texture sprites + MSDF glyph runs + untextured fills).
Triangle batches (``line=False``) are indexed draws: ``vert_offset`` is the
base vertex, ``idx_offset``/``count`` the index range. Line batches
(``line=True``) are non-indexed: ``vert_offset`` is the first vertex and
``count`` the vertex count in the shared line vertex buffer. Lines use a
different topology so they cannot merge with triangles; they break a run and
render through the line pipeline, preserving painter order.
"""
clip: tuple[int, int, int, int] | None
blend: str
vert_offset: int
idx_offset: int
count: int
line: bool = False
[docs]
class BindlessDraw2DPass:
"""GPU pass that draws bindless co-batched 2D geometry (design §3 D).
Owns the unified ``ui2d`` pipeline (one per blend mode), one extended-vertex
buffer + index buffer (host-visible, growable arena), and binds the engine's
bindless texture descriptor set (the same ``ui_textures[]`` sprites use). The
MSDF atlas is registered into that array via :meth:`sync_atlas_slot`.
"""
__slots__ = (
"_engine", "_text_pass",
"_pipelines", "_pipeline_layout", "_line_pipeline", "_line_pipeline_layout",
"_vert_module", "_frag_module",
"_vb", "_vb_mem", "_vb_capacity",
"_ib", "_ib_mem", "_ib_capacity",
"_line_vb", "_line_vb_mem", "_line_vb_capacity",
"_atlas_slot", "_atlas_view",
"_ready", "last_frame_draw_count",
)
def __init__(self, engine: Any, text_pass: Any = None) -> None:
for slot in self.__slots__:
object.__setattr__(self, slot, None)
self._engine = engine
self._text_pass = text_pass
self._ready = False
self._atlas_slot = -1
self.last_frame_draw_count = 0
[docs]
def setup(self, render_pass: Any = None, extent: tuple[int, int] | None = None) -> None:
e = self._engine
device = e.ctx.device
phys = e.ctx.physical_device
rp = render_pass or e.render_pass
ext = extent or e.extent
from ..gpu.pipeline import (
UI2D_VERTEX_ATTRS,
UI2D_VERTEX_STRIDE,
PipelineSpec,
build_pipeline,
create_shader_module,
)
from ..materials.shader_compiler import compile_shader
self._vert_module = create_shader_module(device, compile_shader(SHADER_DIR / "ui2d.vert"))
self._frag_module = create_shader_module(device, compile_shader(SHADER_DIR / "ui2d.frag"))
# The unified pipeline binds the engine's bindless texture array at set 0
# (the same array ui.frag uses for sprites). One pipeline per blend mode;
# a run that changes blend rebinds, but the texture/msdf selection is per
# vertex so a texture change never splits a run.
tex_layout = e.texture_descriptor_layout
self._pipelines = {}
for blend in ("alpha", "add", "multiply"):
spec = PipelineSpec(
name=f"ui2d_bindless_{blend}",
topology=vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
vertex_stride=UI2D_VERTEX_STRIDE,
vertex_attrs=UI2D_VERTEX_ATTRS,
cull_mode=vk.VK_CULL_MODE_NONE,
depth_test=False,
depth_write=False,
blend=blend,
dst_alpha_factor=vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
set_layouts=(tex_layout,),
push_size=_PUSH_SIZE,
)
self._pipelines[blend] = build_pipeline(
device, spec, rp, ext,
vert_module=self._vert_module, frag_module=self._frag_module,
)
self._pipeline_layout = self._pipelines["alpha"][1]
# Line pipeline: same unified ui2d shaders + vertex format, LINE_LIST
# topology, always alpha (line outlines are untextured: tex_id = -1 in the
# frag's solid-colour branch). A separate pipeline because triangles and
# lines can't share a draw; ordering is preserved by breaking the run.
line_spec = PipelineSpec(
name="ui2d_bindless_line",
topology=vk.VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
vertex_stride=UI2D_VERTEX_STRIDE,
vertex_attrs=UI2D_VERTEX_ATTRS,
cull_mode=vk.VK_CULL_MODE_NONE,
depth_test=False,
depth_write=False,
blend="alpha",
dst_alpha_factor=vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
set_layouts=(tex_layout,),
push_size=_PUSH_SIZE,
)
self._line_pipeline, self._line_pipeline_layout = build_pipeline(
device, line_spec, rp, ext,
vert_module=self._vert_module, frag_module=self._frag_module,
)
host_flags = (
vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
| vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
)
self._vb_capacity = _INITIAL_VERTS
self._ib_capacity = _INITIAL_INDICES
self._vb, self._vb_mem = create_buffer(
device, phys, self._vb_capacity * _VERTEX_STRIDE,
vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, host_flags,
)
self._ib, self._ib_mem = create_buffer(
device, phys, self._ib_capacity * 4,
vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT, host_flags,
)
self._line_vb_capacity = _INITIAL_VERTS
self._line_vb, self._line_vb_mem = create_buffer(
device, phys, self._line_vb_capacity * _VERTEX_STRIDE,
vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, host_flags,
)
self._ready = True
[docs]
def sync_atlas_slot(self) -> int:
"""Register / refresh the MSDF atlas in the bindless array; return its slot.
The glyph items reference this slot as their per-vertex ``tex_id``. The
atlas view is recreated when the atlas version bumps (re-upload), so the
bindless descriptor is rewritten to the new view while keeping the same
slot id stable (so already-built geometry stays valid).
"""
tp = self._text_pass
if tp is None:
return -1
view = getattr(tp, "_atlas_view", None)
if view is None:
return self._atlas_slot
if self._atlas_slot < 0:
# MSDF atlas needs LINEAR filtering for the SDF decode (text.frag uses
# a linear sampler), which is the bindless default.
self._atlas_slot = self._engine.register_texture(view, filter="linear")
self._atlas_view = view
elif view != self._atlas_view:
self._engine.update_texture(self._atlas_slot, view)
self._atlas_view = view
return self._atlas_slot
[docs]
@property
def atlas_slot(self) -> int:
"""The bindless slot of the MSDF atlas (-1 until :meth:`sync_atlas_slot`)."""
return self._atlas_slot
[docs]
def set_atlas_slot(self, slot: int) -> None:
"""Borrow an already-registered MSDF atlas slot (N1, the HDR-lane pass).
The HDR-target 2D pass and the swapchain 2D pass bind the SAME engine
bindless ``ui_textures[]`` descriptor set, so the atlas slot index is valid
in both. Only the swapchain pass owns the registration (calls
:meth:`sync_atlas_slot`); the HDR pass mirrors its slot here each frame to
avoid a redundant second descriptor write for the same atlas view.
"""
self._atlas_slot = slot
def _grow_buffer(self, buf: Any, mem: Any, cap: int, needed: int, usage: int, stride: int):
"""Reallocate a host-visible arena buffer to fit ``needed`` (geometric)."""
device = self._engine.ctx.device
phys = self._engine.ctx.physical_device
host_flags = (
vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
| vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
)
new_cap = cap
while new_cap < needed:
new_cap *= 2
vk.vkDeviceWaitIdle(device)
vk.vkDestroyBuffer(device, buf, None)
vk.vkFreeMemory(device, mem, None)
new_buf, new_mem = create_buffer(device, phys, new_cap * stride, usage, host_flags)
return new_buf, new_mem, new_cap
def _ensure_capacity(self, n_verts: int, n_indices: int, n_line_verts: int) -> None:
"""Grow the arena buffers geometrically if a frame needs more (design §2.7)."""
if n_verts > self._vb_capacity:
self._vb, self._vb_mem, self._vb_capacity = self._grow_buffer(
self._vb, self._vb_mem, self._vb_capacity, n_verts,
vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, _VERTEX_STRIDE,
)
if n_indices > self._ib_capacity:
self._ib, self._ib_mem, self._ib_capacity = self._grow_buffer(
self._ib, self._ib_mem, self._ib_capacity, n_indices,
vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 4,
)
if n_line_verts > self._line_vb_capacity:
self._line_vb, self._line_vb_mem, self._line_vb_capacity = self._grow_buffer(
self._line_vb, self._line_vb_mem, self._line_vb_capacity, n_line_verts,
vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, _VERTEX_STRIDE,
)
[docs]
def render(
self,
cmd: Any,
width: int,
height: int,
ui_width: int,
ui_height: int,
*,
verts: np.ndarray,
indices: np.ndarray,
line_verts: np.ndarray,
batches: list[BindlessBatch],
) -> None:
"""Upload the co-batched geometry and issue one draw per batch.
``verts`` is a contiguous :data:`UI2D_VERTEX_DTYPE` array of triangle
vertices (all batches concatenated, camera already applied), ``indices``
the matching uint32 index stream, ``line_verts`` the (separate) line
vertices, ``batches`` the ordered per-(topology, clip, blend) runs. A
triangle batch is ONE ``vkCmdDrawIndexed`` regardless of how many textures
it touches; a line batch is ONE ``vkCmdDraw``.
"""
if not self._ready or not batches:
self.last_frame_draw_count = 0
return
device = self._engine.ctx.device
tex_desc = self._engine.texture_descriptor_set
if not tex_desc:
self.last_frame_draw_count = 0
return
uw = ui_width or width
uh = ui_height or height
self._ensure_capacity(len(verts), len(indices), len(line_verts))
if len(verts):
upload_numpy(device, self._vb_mem, verts)
if len(indices):
upload_numpy(device, self._ib_mem, indices)
if len(line_verts):
upload_numpy(device, self._line_vb_mem, line_verts)
px_range = float(self._text_pass.px_range) if self._text_pass is not None else 4.0
push = np.array([uw, uh, px_range, 0.0], dtype=np.float32).tobytes()
vk_viewport = vk.VkViewport(
x=0.0, y=0.0, width=float(width), height=float(height),
minDepth=0.0, maxDepth=1.0,
)
full_scissor = vk.VkRect2D(
offset=vk.VkOffset2D(x=0, y=0),
extent=vk.VkExtent2D(width=width, height=height),
)
clip_sx = width / uw if uw > 0 else 1.0
clip_sy = height / uh if uh > 0 else 1.0
sentinel = object()
# Bind state tracked across batches. ``last_kind`` is "tri" / "line" so a
# topology switch rebinds the pipeline; a triangle blend change also
# rebinds (lines are always alpha).
last_kind = ""
last_blend = ""
last_clip: Any = sentinel
draw_count = 0
for clip, blend, vert_off, idx_off, count, is_line in batches:
if count == 0:
continue
kind = "line" if is_line else "tri"
need_rebind = kind != last_kind or (not is_line and blend != last_blend)
if need_rebind:
if is_line:
pipe, layout = self._line_pipeline, self._line_pipeline_layout
vbuf = self._line_vb
else:
pipe, layout = self._pipelines[blend]
vbuf = self._vb
vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, pipe)
vk.vkCmdBindDescriptorSets(
cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, layout,
0, 1, [tex_desc], 0, None,
)
self._engine.push_constants(cmd, layout, push)
vk.vkCmdBindVertexBuffers(cmd, 0, 1, [vbuf], [0])
if not is_line:
vk.vkCmdBindIndexBuffer(cmd, self._ib, 0, vk.VK_INDEX_TYPE_UINT32)
vk.vkCmdSetViewport(cmd, 0, 1, [vk_viewport])
last_clip = sentinel
last_kind = kind
last_blend = blend if not is_line else last_blend
if clip != last_clip:
if clip is not None:
scissor = vk.VkRect2D(
offset=vk.VkOffset2D(
x=int(clip[0] * clip_sx), y=int(clip[1] * clip_sy),
),
extent=vk.VkExtent2D(
width=int(clip[2] * clip_sx), height=int(clip[3] * clip_sy),
),
)
else:
scissor = full_scissor
vk.vkCmdSetScissor(cmd, 0, 1, [scissor])
last_clip = clip
if is_line:
vk.vkCmdDraw(cmd, count, 1, vert_off, 0)
else:
vk.vkCmdDrawIndexed(cmd, count, 1, idx_off, vert_off, 0)
draw_count += 1
self.last_frame_draw_count = draw_count
[docs]
def cleanup(self) -> None:
if not self._ready:
return
device = self._engine.ctx.device
for pipe, layout in (self._pipelines or {}).values():
vk.vkDestroyPipeline(device, pipe, None)
vk.vkDestroyPipelineLayout(device, layout, None)
for obj, fn in [
(self._line_pipeline, vk.vkDestroyPipeline),
(self._line_pipeline_layout, vk.vkDestroyPipelineLayout),
(self._vert_module, vk.vkDestroyShaderModule),
(self._frag_module, vk.vkDestroyShaderModule),
(self._vb, vk.vkDestroyBuffer),
(self._ib, vk.vkDestroyBuffer),
(self._line_vb, vk.vkDestroyBuffer),
]:
if obj:
fn(device, obj, None)
for mem in (self._vb_mem, self._ib_mem, self._line_vb_mem):
if mem:
vk.vkFreeMemory(device, mem, None)
if self._atlas_slot >= 0:
self._engine.unregister_texture(self._atlas_slot)
self._atlas_slot = -1
self._ready = False