Source code for simvx.graphics.renderer.bindless_draw2d_pass

"""Bindless co-batched 2D pass for the item pipeline (design §3 Decision D, P3b).

The main-framebuffer 2D submit. It carries ``texture_id`` and the ``is_msdf``
flag **per vertex** and draws through ONE unified pipeline (``ui2d.vert`` /
``ui2d.frag``). So a run of consecutive items sharing only ``(clip, blend)`` --
different-texture sprites *and* glyph runs *and* untextured fills -- collapses
into a single ``vkCmdDrawIndexed`` that selects the texture per primitive via
``ui_textures[nonuniformEXT(tex_id)]`` and branches on the ``is_msdf`` bit
(linear sample vs median-MSDF decode, the latter ported verbatim from
``text.frag`` so glyph AA is pixel-identical).

The MSDF atlas is registered into the engine's bindless ``ui_textures[]`` array
(the same array sprites use), so a glyph item just references that slot like any
other textured item. The atlas slot is refreshed when the atlas version bumps
(the view is recreated on re-upload).
"""

from __future__ import annotations

import logging
from typing import Any, NamedTuple

import numpy as np
import vulkan as vk

from ..gpu.memory import create_buffer, upload_numpy
from ..types import SHADER_DIR

__all__ = ["BindlessDraw2DPass", "BindlessBatch"]

log = logging.getLogger(__name__)

# Geometric-growth arena (design §2.7): no fixed silent-truncation cap. Start
# modest, double at the frame boundary if a scene needs more.
_INITIAL_VERTS = 65536
_INITIAL_INDICES = 98304
# 40-byte UI2D vertex.
_VERTEX_STRIDE = 40
# Push: vec2 screen_size + float px_range + float pad = 16 bytes (VS + FS).
_PUSH_SIZE = 16
# is_msdf bit in the per-vertex flags column (matches ui2d.frag FLAG_IS_MSDF).
FLAG_IS_MSDF = 1



[docs]
class BindlessBatch(NamedTuple):
    """One co-batched draw: a contiguous range under a (topology, clip, blend) scope.

    ``clip`` is the scissor rect (or ``None`` for full-screen); ``blend`` is the
    blend-mode string selecting the per-blend pipeline. The texture and is_msdf
    selection are PER VERTEX, so a single triangle batch covers many textures
    (different-texture sprites + MSDF glyph runs + untextured fills).

    Triangle batches (``line=False``) are indexed draws: ``vert_offset`` is the
    base vertex, ``idx_offset``/``count`` the index range. Line batches
    (``line=True``) are non-indexed: ``vert_offset`` is the first vertex and
    ``count`` the vertex count in the shared line vertex buffer. Lines use a
    different topology so they cannot merge with triangles; they break a run and
    render through the line pipeline, preserving painter order.
    """

    clip: tuple[int, int, int, int] | None
    blend: str
    vert_offset: int
    idx_offset: int
    count: int
    line: bool = False




[docs]
class BindlessDraw2DPass:
    """GPU pass that draws bindless co-batched 2D geometry (design §3 D).

    Owns the unified ``ui2d`` pipeline (one per blend mode), one extended-vertex
    buffer + index buffer (host-visible, growable arena), and binds the engine's
    bindless texture descriptor set (the same ``ui_textures[]`` sprites use). The
    MSDF atlas is registered into that array via :meth:`sync_atlas_slot`.
    """

    __slots__ = (
        "_engine", "_text_pass",
        "_pipelines", "_pipeline_layout", "_line_pipeline", "_line_pipeline_layout",
        "_vert_module", "_frag_module",
        "_vb", "_vb_mem", "_vb_capacity",
        "_ib", "_ib_mem", "_ib_capacity",
        "_line_vb", "_line_vb_mem", "_line_vb_capacity",
        "_atlas_slot", "_atlas_view",
        "_ready", "last_frame_draw_count",
    )

    def __init__(self, engine: Any, text_pass: Any = None) -> None:
        for slot in self.__slots__:
            object.__setattr__(self, slot, None)
        self._engine = engine
        self._text_pass = text_pass
        self._ready = False
        self._atlas_slot = -1
        self.last_frame_draw_count = 0


[docs]
    def setup(self, render_pass: Any = None, extent: tuple[int, int] | None = None) -> None:
        e = self._engine
        device = e.ctx.device
        phys = e.ctx.physical_device
        rp = render_pass or e.render_pass
        ext = extent or e.extent

        from ..gpu.pipeline import (
            UI2D_VERTEX_ATTRS,
            UI2D_VERTEX_STRIDE,
            PipelineSpec,
            build_pipeline,
            create_shader_module,
        )
        from ..materials.shader_compiler import compile_shader

        self._vert_module = create_shader_module(device, compile_shader(SHADER_DIR / "ui2d.vert"))
        self._frag_module = create_shader_module(device, compile_shader(SHADER_DIR / "ui2d.frag"))

        # The unified pipeline binds the engine's bindless texture array at set 0
        # (the same array ui.frag uses for sprites). One pipeline per blend mode;
        # a run that changes blend rebinds, but the texture/msdf selection is per
        # vertex so a texture change never splits a run.
        tex_layout = e.texture_descriptor_layout
        self._pipelines = {}
        for blend in ("alpha", "add", "multiply"):
            spec = PipelineSpec(
                name=f"ui2d_bindless_{blend}",
                topology=vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
                vertex_stride=UI2D_VERTEX_STRIDE,
                vertex_attrs=UI2D_VERTEX_ATTRS,
                cull_mode=vk.VK_CULL_MODE_NONE,
                depth_test=False,
                depth_write=False,
                blend=blend,
                dst_alpha_factor=vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
                set_layouts=(tex_layout,),
                push_size=_PUSH_SIZE,
            )
            self._pipelines[blend] = build_pipeline(
                device, spec, rp, ext,
                vert_module=self._vert_module, frag_module=self._frag_module,
            )
        self._pipeline_layout = self._pipelines["alpha"][1]

        # Line pipeline: same unified ui2d shaders + vertex format, LINE_LIST
        # topology, always alpha (line outlines are untextured: tex_id = -1 in the
        # frag's solid-colour branch). A separate pipeline because triangles and
        # lines can't share a draw; ordering is preserved by breaking the run.
        line_spec = PipelineSpec(
            name="ui2d_bindless_line",
            topology=vk.VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
            vertex_stride=UI2D_VERTEX_STRIDE,
            vertex_attrs=UI2D_VERTEX_ATTRS,
            cull_mode=vk.VK_CULL_MODE_NONE,
            depth_test=False,
            depth_write=False,
            blend="alpha",
            dst_alpha_factor=vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
            set_layouts=(tex_layout,),
            push_size=_PUSH_SIZE,
        )
        self._line_pipeline, self._line_pipeline_layout = build_pipeline(
            device, line_spec, rp, ext,
            vert_module=self._vert_module, frag_module=self._frag_module,
        )

        host_flags = (
            vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
            | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
        )
        self._vb_capacity = _INITIAL_VERTS
        self._ib_capacity = _INITIAL_INDICES
        self._vb, self._vb_mem = create_buffer(
            device, phys, self._vb_capacity * _VERTEX_STRIDE,
            vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, host_flags,
        )
        self._ib, self._ib_mem = create_buffer(
            device, phys, self._ib_capacity * 4,
            vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT, host_flags,
        )
        self._line_vb_capacity = _INITIAL_VERTS
        self._line_vb, self._line_vb_mem = create_buffer(
            device, phys, self._line_vb_capacity * _VERTEX_STRIDE,
            vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, host_flags,
        )
        self._ready = True



[docs]
    def sync_atlas_slot(self) -> int:
        """Register / refresh the MSDF atlas in the bindless array; return its slot.

        The glyph items reference this slot as their per-vertex ``tex_id``. The
        atlas view is recreated when the atlas version bumps (re-upload), so the
        bindless descriptor is rewritten to the new view while keeping the same
        slot id stable (so already-built geometry stays valid).
        """
        tp = self._text_pass
        if tp is None:
            return -1
        view = getattr(tp, "_atlas_view", None)
        if view is None:
            return self._atlas_slot
        if self._atlas_slot < 0:
            # MSDF atlas needs LINEAR filtering for the SDF decode (text.frag uses
            # a linear sampler), which is the bindless default.
            self._atlas_slot = self._engine.register_texture(view, filter="linear")
            self._atlas_view = view
        elif view != self._atlas_view:
            self._engine.update_texture(self._atlas_slot, view)
            self._atlas_view = view
        return self._atlas_slot



[docs]
    @property
    def atlas_slot(self) -> int:
        """The bindless slot of the MSDF atlas (-1 until :meth:`sync_atlas_slot`)."""
        return self._atlas_slot



[docs]
    def set_atlas_slot(self, slot: int) -> None:
        """Borrow an already-registered MSDF atlas slot (N1, the HDR-lane pass).

        The HDR-target 2D pass and the swapchain 2D pass bind the SAME engine
        bindless ``ui_textures[]`` descriptor set, so the atlas slot index is valid
        in both. Only the swapchain pass owns the registration (calls
        :meth:`sync_atlas_slot`); the HDR pass mirrors its slot here each frame to
        avoid a redundant second descriptor write for the same atlas view.
        """
        self._atlas_slot = slot


    def _grow_buffer(self, buf: Any, mem: Any, cap: int, needed: int, usage: int, stride: int):
        """Reallocate a host-visible arena buffer to fit ``needed`` (geometric)."""
        device = self._engine.ctx.device
        phys = self._engine.ctx.physical_device
        host_flags = (
            vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
            | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
        )
        new_cap = cap
        while new_cap < needed:
            new_cap *= 2
        vk.vkDeviceWaitIdle(device)
        vk.vkDestroyBuffer(device, buf, None)
        vk.vkFreeMemory(device, mem, None)
        new_buf, new_mem = create_buffer(device, phys, new_cap * stride, usage, host_flags)
        return new_buf, new_mem, new_cap

    def _ensure_capacity(self, n_verts: int, n_indices: int, n_line_verts: int) -> None:
        """Grow the arena buffers geometrically if a frame needs more (design §2.7)."""
        if n_verts > self._vb_capacity:
            self._vb, self._vb_mem, self._vb_capacity = self._grow_buffer(
                self._vb, self._vb_mem, self._vb_capacity, n_verts,
                vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, _VERTEX_STRIDE,
            )
        if n_indices > self._ib_capacity:
            self._ib, self._ib_mem, self._ib_capacity = self._grow_buffer(
                self._ib, self._ib_mem, self._ib_capacity, n_indices,
                vk.VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 4,
            )
        if n_line_verts > self._line_vb_capacity:
            self._line_vb, self._line_vb_mem, self._line_vb_capacity = self._grow_buffer(
                self._line_vb, self._line_vb_mem, self._line_vb_capacity, n_line_verts,
                vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, _VERTEX_STRIDE,
            )


[docs]
    def render(
        self,
        cmd: Any,
        width: int,
        height: int,
        ui_width: int,
        ui_height: int,
        *,
        verts: np.ndarray,
        indices: np.ndarray,
        line_verts: np.ndarray,
        batches: list[BindlessBatch],
    ) -> None:
        """Upload the co-batched geometry and issue one draw per batch.

        ``verts`` is a contiguous :data:`UI2D_VERTEX_DTYPE` array of triangle
        vertices (all batches concatenated, camera already applied), ``indices``
        the matching uint32 index stream, ``line_verts`` the (separate) line
        vertices, ``batches`` the ordered per-(topology, clip, blend) runs. A
        triangle batch is ONE ``vkCmdDrawIndexed`` regardless of how many textures
        it touches; a line batch is ONE ``vkCmdDraw``.
        """
        if not self._ready or not batches:
            self.last_frame_draw_count = 0
            return

        device = self._engine.ctx.device
        tex_desc = self._engine.texture_descriptor_set
        if not tex_desc:
            self.last_frame_draw_count = 0
            return

        uw = ui_width or width
        uh = ui_height or height

        self._ensure_capacity(len(verts), len(indices), len(line_verts))
        if len(verts):
            upload_numpy(device, self._vb_mem, verts)
        if len(indices):
            upload_numpy(device, self._ib_mem, indices)
        if len(line_verts):
            upload_numpy(device, self._line_vb_mem, line_verts)

        px_range = float(self._text_pass.px_range) if self._text_pass is not None else 4.0
        push = np.array([uw, uh, px_range, 0.0], dtype=np.float32).tobytes()

        vk_viewport = vk.VkViewport(
            x=0.0, y=0.0, width=float(width), height=float(height),
            minDepth=0.0, maxDepth=1.0,
        )
        full_scissor = vk.VkRect2D(
            offset=vk.VkOffset2D(x=0, y=0),
            extent=vk.VkExtent2D(width=width, height=height),
        )
        clip_sx = width / uw if uw > 0 else 1.0
        clip_sy = height / uh if uh > 0 else 1.0

        sentinel = object()
        # Bind state tracked across batches. ``last_kind`` is "tri" / "line" so a
        # topology switch rebinds the pipeline; a triangle blend change also
        # rebinds (lines are always alpha).
        last_kind = ""
        last_blend = ""
        last_clip: Any = sentinel
        draw_count = 0
        for clip, blend, vert_off, idx_off, count, is_line in batches:
            if count == 0:
                continue
            kind = "line" if is_line else "tri"
            need_rebind = kind != last_kind or (not is_line and blend != last_blend)
            if need_rebind:
                if is_line:
                    pipe, layout = self._line_pipeline, self._line_pipeline_layout
                    vbuf = self._line_vb
                else:
                    pipe, layout = self._pipelines[blend]
                    vbuf = self._vb
                vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, pipe)
                vk.vkCmdBindDescriptorSets(
                    cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, layout,
                    0, 1, [tex_desc], 0, None,
                )
                self._engine.push_constants(cmd, layout, push)
                vk.vkCmdBindVertexBuffers(cmd, 0, 1, [vbuf], [0])
                if not is_line:
                    vk.vkCmdBindIndexBuffer(cmd, self._ib, 0, vk.VK_INDEX_TYPE_UINT32)
                vk.vkCmdSetViewport(cmd, 0, 1, [vk_viewport])
                last_clip = sentinel
                last_kind = kind
                last_blend = blend if not is_line else last_blend

            if clip != last_clip:
                if clip is not None:
                    scissor = vk.VkRect2D(
                        offset=vk.VkOffset2D(
                            x=int(clip[0] * clip_sx), y=int(clip[1] * clip_sy),
                        ),
                        extent=vk.VkExtent2D(
                            width=int(clip[2] * clip_sx), height=int(clip[3] * clip_sy),
                        ),
                    )
                else:
                    scissor = full_scissor
                vk.vkCmdSetScissor(cmd, 0, 1, [scissor])
                last_clip = clip

            if is_line:
                vk.vkCmdDraw(cmd, count, 1, vert_off, 0)
            else:
                vk.vkCmdDrawIndexed(cmd, count, 1, idx_off, vert_off, 0)
            draw_count += 1

        self.last_frame_draw_count = draw_count



[docs]
    def cleanup(self) -> None:
        if not self._ready:
            return
        device = self._engine.ctx.device
        for pipe, layout in (self._pipelines or {}).values():
            vk.vkDestroyPipeline(device, pipe, None)
            vk.vkDestroyPipelineLayout(device, layout, None)
        for obj, fn in [
            (self._line_pipeline, vk.vkDestroyPipeline),
            (self._line_pipeline_layout, vk.vkDestroyPipelineLayout),
            (self._vert_module, vk.vkDestroyShaderModule),
            (self._frag_module, vk.vkDestroyShaderModule),
            (self._vb, vk.vkDestroyBuffer),
            (self._ib, vk.vkDestroyBuffer),
            (self._line_vb, vk.vkDestroyBuffer),
        ]:
            if obj:
                fn(device, obj, None)
        for mem in (self._vb_mem, self._ib_mem, self._line_vb_mem):
            if mem:
                vk.vkFreeMemory(device, mem, None)
        if self._atlas_slot >= 0:
            self._engine.unregister_texture(self._atlas_slot)
            self._atlas_slot = -1
        self._ready = False