Source code for simvx.graphics.renderer.shadow_pass

"""Cascaded Shadow Map (CSM) rendering pass."""

import logging
from typing import Any

import numpy as np
import vulkan as vk

from ..gpu.pipeline import PipelineSpec, build_pipeline
from ..types import ALPHA_CUTOFF
from . import shadow_math
from .gpu_batch import GPUBatch
from .pass_helpers import load_shader_modules
from .passes import create_shadow_pass
from .vertex_layouts import POSITION_ONLY_BINDINGS, POSITION_UV_BINDINGS

__all__ = ["ShadowPass"]

log = logging.getLogger(__name__)

MAX_CASCADE_COUNT = shadow_math.MAX_CASCADE_COUNT
DEFAULT_CASCADE_COUNT = shadow_math.DEFAULT_CASCADE_COUNT
SHADOW_MAP_SIZE = 2048
DEPTH_FORMAT = vk.VK_FORMAT_D32_SFLOAT


def _cutoff_caster_slots(instances: list, materials: np.ndarray | None) -> list[int]:
    """Return the per-instance slots whose material is alpha-tested.

    Empty (byte-identical opaque path) when no material array is supplied or no
    instance uses ``blend="cutoff"``.
    """
    if materials is None or len(materials) == 0:
        return []
    n = len(materials)
    slots: list[int] = []
    for i, (_mh, _t, mat_id, _vid, _rl) in enumerate(instances):
        if 0 <= mat_id < n and materials[mat_id]["alpha_mode"] == ALPHA_CUTOFF:
            slots.append(i)
    return slots



[docs]
class ShadowPass:
    """Renders depth from directional light's POV into a cascaded shadow map atlas.

    Atlas layout: ``MAX_CASCADE_COUNT`` images side-by-side horizontally.
    Total size: ``SHADOW_MAP_SIZE * MAX_CASCADE_COUNT × SHADOW_MAP_SIZE``.
    The fragment shader's loop iterates ``active_cascade_count`` slices so
    1–4 cascades can be selected at runtime via
    ``WorldEnvironment.shadow_cascade_count`` without recompiling shaders.
    """

    def __init__(self, engine: Any):
        self._engine = engine
        self._render_pass: Any = None
        self._framebuffer: Any = None
        self._pipeline: Any = None
        self._pipeline_layout: Any = None
        self._vert_module: Any = None
        self._frag_module: Any = None
        # Alpha-tested (cutout) caster pipeline. Lazily used: opaque
        # casters never touch it, so the opaque path stays byte-identical.
        self._cutoff_pipeline: Any = None
        self._cutoff_layout: Any = None
        self._cutoff_vert: Any = None
        self._cutoff_frag: Any = None
        self._depth_image: Any = None
        self._depth_memory: Any = None
        self._depth_view: Any = None
        self._sampler: Any = None
        self._texture_index: int = -1
        self._ready = False
        self._batch: GPUBatch | None = None

        # Active cascade count: driven by ``WorldEnvironment.shadow_cascade_count``
        # via the env-sync spec. Atlas + SSBO are sized for the maximum.
        self.cascade_count: int = DEFAULT_CASCADE_COUNT

        # Output buffers: always allocated at MAX_CASCADE_COUNT so a runtime
        # change to ``cascade_count`` doesn't require reallocation. Slots
        # beyond ``cascade_count`` are zeroed each frame by ``ShadowRenderer``.
        self.cascade_vps = np.zeros((MAX_CASCADE_COUNT, 4, 4), dtype=np.float32)
        self.cascade_splits = np.zeros(MAX_CASCADE_COUNT + 1, dtype=np.float32)

        # When True, the fragment shader tints lit surfaces by cascade index
        # (red=near, green=mid, blue=far). The flag is propagated through the
        # shadow SSBO (``debug_cascades_flag``) by ``ShadowRenderer.render_shadows``.
        self.debug_cascades: bool = False


[docs]
    def setup(self, ssbo_layout: Any) -> None:
        """Initialize shadow map resources."""
        e = self._engine
        device = e.ctx.device
        phys = e.ctx.physical_device
        atlas_w = SHADOW_MAP_SIZE * MAX_CASCADE_COUNT
        atlas_h = SHADOW_MAP_SIZE

        # Render pass (depth-only)
        self._render_pass = create_shadow_pass(device)

        # Depth image (atlas)
        img_info = vk.VkImageCreateInfo(
            imageType=vk.VK_IMAGE_TYPE_2D,
            format=DEPTH_FORMAT,
            extent=vk.VkExtent3D(width=atlas_w, height=atlas_h, depth=1),
            mipLevels=1,
            arrayLayers=1,
            samples=vk.VK_SAMPLE_COUNT_1_BIT,
            tiling=vk.VK_IMAGE_TILING_OPTIMAL,
            usage=(vk.VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | vk.VK_IMAGE_USAGE_SAMPLED_BIT),
            sharingMode=vk.VK_SHARING_MODE_EXCLUSIVE,
            initialLayout=vk.VK_IMAGE_LAYOUT_UNDEFINED,
        )
        self._depth_image = vk.vkCreateImage(device, img_info, None)

        mem_reqs = vk.vkGetImageMemoryRequirements(device, self._depth_image)
        from ..gpu.memory import _find_memory_type

        alloc_info = vk.VkMemoryAllocateInfo(
            allocationSize=mem_reqs.size,
            memoryTypeIndex=_find_memory_type(
                phys,
                mem_reqs.memoryTypeBits,
                vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
            ),
        )
        self._depth_memory = vk.vkAllocateMemory(device, alloc_info, None)
        vk.vkBindImageMemory(device, self._depth_image, self._depth_memory, 0)

        # Image view
        view_ci = vk.VkImageViewCreateInfo(
            image=self._depth_image,
            viewType=vk.VK_IMAGE_VIEW_TYPE_2D,
            format=DEPTH_FORMAT,
            subresourceRange=vk.VkImageSubresourceRange(
                aspectMask=vk.VK_IMAGE_ASPECT_DEPTH_BIT,
                baseMipLevel=0,
                levelCount=1,
                baseArrayLayer=0,
                layerCount=1,
            ),
        )
        self._depth_view = vk.vkCreateImageView(device, view_ci, None)

        # Framebuffer
        fb_ci = vk.VkFramebufferCreateInfo(
            renderPass=self._render_pass,
            attachmentCount=1,
            pAttachments=[self._depth_view],
            width=atlas_w,
            height=atlas_h,
            layers=1,
        )
        self._framebuffer = vk.vkCreateFramebuffer(device, fb_ci, None)

        # Sampler (comparison sampler for PCF)
        sampler_ci = vk.VkSamplerCreateInfo(
            magFilter=vk.VK_FILTER_LINEAR,
            minFilter=vk.VK_FILTER_LINEAR,
            addressModeU=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
            addressModeV=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
            addressModeW=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
            borderColor=vk.VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
            compareEnable=vk.VK_FALSE,
            anisotropyEnable=vk.VK_FALSE,
            unnormalizedCoordinates=vk.VK_FALSE,
            mipmapMode=vk.VK_SAMPLER_MIPMAP_MODE_NEAREST,
        )
        self._sampler = vk.vkCreateSampler(device, sampler_ci, None)

        # Register shadow map in bindless texture array (with own sampler for depth)
        from ..gpu.descriptors import write_texture_descriptor

        if not e.texture_descriptor_set:
            e._init_texture_system()
        self._texture_index = e._next_texture_index
        write_texture_descriptor(
            device,
            e.texture_descriptor_set,
            self._texture_index,
            self._depth_view,
            self._sampler,
        )
        e._next_texture_index += 1

        # Shadow pipeline
        self._vert_module, self._frag_module = load_shader_modules(
            device,
            e.shader_dir,
            "shadow.vert",
            "shadow.frag",
        )

        self._create_pipeline(device, self._render_pass, ssbo_layout)

        # Cutout caster pipeline: position + shading (UV) streams, plus
        # the material SSBO (set 0) and bindless textures (set 1) for the albedo
        # alpha discard. Setup cost only; zero per-frame cost when no cutout
        # caster is present.
        self._cutoff_vert, self._cutoff_frag = load_shader_modules(
            device,
            e.shader_dir,
            "shadow_cutoff.vert",
            "shadow_cutoff.frag",
        )
        self._create_cutoff_pipeline(device, self._render_pass, ssbo_layout, e.texture_descriptor_layout)

        self._batch = GPUBatch(e, device, phys, max_draws=10_000)

        self._ready = True
        log.debug("Shadow pass initialized (%dx%d atlas, max %d cascades)", atlas_w, atlas_h, MAX_CASCADE_COUNT)


    def _create_pipeline(self, device: Any, render_pass: Any, ssbo_layout: Any) -> None:
        """Create the depth-only shadow pipeline via the declarative builder.

        Front-face culling (``CLOCKWISE`` winding) reduces peter-panning, and a
        constant+slope depth bias fights shadow acne. Depth-only: binds just
        the position stream (12-byte stride, D5); the pass writes no colour
        attachment (``blend="none"``). Push constant: one ``mat4``
        light-space VP (64 bytes, vertex stage). Viewport/scissor are dynamic,
        so the extent placeholder is inert.
        """
        spec = PipelineSpec(
            name="shadow",
            topology=vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
            vertex_bindings=POSITION_ONLY_BINDINGS,  # depth-only: position stream alone
            cull_mode=vk.VK_CULL_MODE_FRONT_BIT,  # front-cull reduces peter-panning
            front_face=vk.VK_FRONT_FACE_CLOCKWISE,
            depth_bias=(1.25, 1.75),  # (constantFactor, slopeFactor) -- fights acne
            depth_test=True,
            depth_write=True,
            depth_compare=vk.VK_COMPARE_OP_LESS_OR_EQUAL,
            blend="none",  # depth-only, no colour attachment
            set_layouts=(ssbo_layout,),
            push_size=64,  # mat4 light_vp
            push_stages=vk.VK_SHADER_STAGE_VERTEX_BIT,
        )
        self._pipeline, self._pipeline_layout = build_pipeline(
            device,
            spec,
            render_pass,
            (SHADOW_MAP_SIZE, SHADOW_MAP_SIZE),
            vert_module=self._vert_module,
            frag_module=self._frag_module,
        )

    def _create_cutoff_pipeline(self, device: Any, render_pass: Any, ssbo_layout: Any, texture_layout: Any) -> None:
        """Create the alpha-tested (cutout) shadow-caster pipeline.

        Identical fixed-function state to the opaque shadow pipeline (front-cull,
        same depth bias, no colour attachment) so a cutout caster's shadow lines
        up with opaque casters, but binds the position + shading (UV) streams and
        adds the bindless texture set so the fragment shader can sample the
        albedo alpha and discard holed fragments.
        """
        spec = PipelineSpec(
            name="shadow_cutoff",
            topology=vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
            vertex_bindings=POSITION_UV_BINDINGS,  # position + UV (no normal)
            cull_mode=vk.VK_CULL_MODE_FRONT_BIT,
            front_face=vk.VK_FRONT_FACE_CLOCKWISE,
            depth_bias=(1.25, 1.75),
            depth_test=True,
            depth_write=True,
            depth_compare=vk.VK_COMPARE_OP_LESS_OR_EQUAL,
            blend="none",  # depth-only, no colour attachment
            set_layouts=(ssbo_layout, texture_layout),
            push_size=64,  # mat4 light_vp
            push_stages=vk.VK_SHADER_STAGE_VERTEX_BIT,
        )
        self._cutoff_pipeline, self._cutoff_layout = build_pipeline(
            device,
            spec,
            render_pass,
            (SHADOW_MAP_SIZE, SHADOW_MAP_SIZE),
            vert_module=self._cutoff_vert,
            frag_module=self._cutoff_frag,
        )


[docs]
    @property
    def shadow_texture_index(self) -> int:
        """Bindless index of the shadow map atlas texture."""
        return self._texture_index



[docs]
    def compute_cascades(
        self,
        view: np.ndarray,
        proj: np.ndarray,
        light_dir: np.ndarray,
        near: float = 0.0,
        far: float = 0.0,
    ) -> None:
        """Compute cascade split distances and light-space VP matrices.

        Delegates to ``shadow_math.compute_cascades`` for ``self.cascade_count``
        slices and transposes to GLSL column-major for the SSBO upload.
        Slots beyond ``self.cascade_count`` are zeroed so the shader's
        ``active_cascade_count`` clamp leaves them untouched.
        """
        n = max(1, min(MAX_CASCADE_COUNT, int(self.cascade_count)))
        cascade_vps, splits = shadow_math.compute_cascades(
            view,
            proj,
            light_dir,
            near=near if near > 0 else None,
            far=far if far > 0 else None,
            cascade_count=n,
        )
        self.cascade_splits[:] = 0.0
        self.cascade_splits[: n + 1] = splits
        self.cascade_vps[:] = 0.0
        for c in range(n):
            self.cascade_vps[c] = cascade_vps[c].T



[docs]
    def render(
        self,
        cmd: Any,
        instances: list,
        ssbo_set: Any,
        mesh_registry: Any,
        multimesh_draws: list | None = None,
        materials: np.ndarray | None = None,
    ) -> None:
        """Record shadow depth rendering commands for per-instance and MultiMesh draws.

        ``multimesh_draws`` entries are 9-tuples:
        (mesh_handle, base, count, vp_id, double_sided, centres, radii, union_min, union_max).
        Each block is drawn as ONE instanced shadow command (first_instance=base,
        instance_count=count) per cascade -- coarse, no per-instance cull.

        ``materials`` is the renderer's material SSBO array; per-instance casters
        whose material is alpha-tested (``blend="cutoff"``) are routed through the
        cutout pipeline, which discards holed fragments so the shadow
        shows the cutout. With no cutout caster present the cutout list is empty
        and the opaque draw set is byte-identical to the plain opaque pass.
        """
        if not self._ready or (not instances and not multimesh_draws):
            return

        cutoff_slots = _cutoff_caster_slots(instances, materials)
        cutoff_lookup = set(cutoff_slots)

        atlas_w = SHADOW_MAP_SIZE * MAX_CASCADE_COUNT
        atlas_h = SHADOW_MAP_SIZE
        active = max(1, min(MAX_CASCADE_COUNT, int(self.cascade_count)))

        # Begin shadow render pass
        clear = vk.VkClearValue(
            depthStencil=vk.VkClearDepthStencilValue(depth=1.0, stencil=0),
        )
        rp_info = vk.VkRenderPassBeginInfo(
            renderPass=self._render_pass,
            framebuffer=self._framebuffer,
            renderArea=vk.VkRect2D(
                offset=vk.VkOffset2D(x=0, y=0),
                extent=vk.VkExtent2D(width=atlas_w, height=atlas_h),
            ),
            clearValueCount=1,
            pClearValues=[clear],
        )
        vk.vkCmdBeginRenderPass(cmd, rp_info, vk.VK_SUBPASS_CONTENTS_INLINE)

        vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._pipeline)

        # Bind transform SSBO
        vk.vkCmdBindDescriptorSets(
            cmd,
            vk.VK_PIPELINE_BIND_POINT_GRAPHICS,
            self._pipeline_layout,
            0,
            1,
            [ssbo_set],
            0,
            None,
        )

        # Group per-instance by mesh (opaque casters only; cutout casters are
        # drawn separately through the cutout pipeline below).
        mesh_groups: dict[int, list[int]] = {}
        for i, (mesh_handle, _, _, _, _) in enumerate(instances):
            if i in cutoff_lookup:
                continue
            mesh_groups.setdefault(mesh_handle.id, []).append(i)

        for cascade in range(active):
            # A previous cascade's cutout draws leave the cutout pipeline bound;
            # restore the opaque pipeline. Skipped entirely when no cutout caster
            # is present, so the opaque path stays byte-identical.
            if cutoff_slots:
                vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._pipeline)

            # Set viewport to this cascade's region
            vk_vp = vk.VkViewport(
                x=float(cascade * SHADOW_MAP_SIZE),
                y=0.0,
                width=float(SHADOW_MAP_SIZE),
                height=float(SHADOW_MAP_SIZE),
                minDepth=0.0,
                maxDepth=1.0,
            )
            vk.vkCmdSetViewport(cmd, 0, 1, [vk_vp])
            scissor = vk.VkRect2D(
                offset=vk.VkOffset2D(x=cascade * SHADOW_MAP_SIZE, y=0),
                extent=vk.VkExtent2D(width=SHADOW_MAP_SIZE, height=SHADOW_MAP_SIZE),
            )
            vk.vkCmdSetScissor(cmd, 0, 1, [scissor])

            # Push light VP matrix for this cascade (column-major, already transposed)
            pc_data = np.ascontiguousarray(self.cascade_vps[cascade]).tobytes()
            ffi = vk.ffi
            cbuf = ffi.new("char[]", pc_data)
            vk._vulkan.lib.vkCmdPushConstants(
                cmd,
                self._pipeline_layout,
                vk.VK_SHADER_STAGE_VERTEX_BIT,
                0,
                64,
                cbuf,
            )

            # Build indirect draw commands for this cascade and execute via multi-draw.
            # Coalesce contiguous slot runs into instanced draws (a MultiMesh -> one
            # draw); the shadow pass is depth-only with no per-instance occlusion cull,
            # so collapsing instances is safe.
            self._batch.reset()
            group_ranges: list[tuple[Any, int, int]] = []  # (handle, batch_offset, command_count)
            for _mesh_id, indices in mesh_groups.items():
                mesh_handle = instances[indices[0]][0]
                batch_offset, count = self._batch.add_instanced_runs(mesh_handle.index_count, indices)
                group_ranges.append((mesh_handle, batch_offset, count))
            # MultiMesh blocks: one instanced command per block (first_instance=base,
            # instance_count=count). All instances in the block cast shadows together.
            if multimesh_draws:
                for draw in multimesh_draws:
                    mm_mesh, mm_base, mm_count = draw[0], draw[1], draw[2]
                    slots = np.arange(mm_base, mm_base + mm_count, dtype=np.uint32)
                    batch_offset, ncmds = self._batch.add_instanced_runs(mm_mesh.index_count, slots)
                    group_ranges.append((mm_mesh, batch_offset, ncmds))
            self._batch.upload()

            for mesh_handle, batch_offset, count in group_ranges:
                bufs = mesh_registry.get_buffers(mesh_handle)
                vk.vkCmdBindVertexBuffers(cmd, 0, 1, [bufs.position], [0])
                vk.vkCmdBindIndexBuffer(cmd, bufs.index, 0, vk.VK_INDEX_TYPE_UINT32)
                self._batch.draw_range(cmd, batch_offset, count)

            # Cutout casters: switch to the cutout pipeline (adds the
            # bindless texture set) and draw each per instance so its material
            # index is dynamically uniform for the albedo sample. Cutout casters
            # are the exception, so per-instance draws are cheap.
            if cutoff_slots:
                self._render_cutoff(cmd, cascade, instances, cutoff_slots, ssbo_set, mesh_registry)

        vk.vkCmdEndRenderPass(cmd)


    def _render_cutoff(
        self,
        cmd: Any,
        cascade: int,
        instances: list,
        cutoff_slots: list[int],
        ssbo_set: Any,
        mesh_registry: Any,
    ) -> None:
        """Draw the alpha-tested (cutout) casters for one cascade."""
        e = self._engine
        tex_ds = e.texture_descriptor_set
        vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._cutoff_pipeline)
        vk.vkCmdBindDescriptorSets(
            cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._cutoff_layout, 0, 1, [ssbo_set], 0, None
        )
        vk.vkCmdBindDescriptorSets(
            cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._cutoff_layout, 1, 1, [tex_ds], 0, None
        )
        pc_data = np.ascontiguousarray(self.cascade_vps[cascade]).tobytes()
        ffi = vk.ffi
        cbuf = ffi.new("char[]", pc_data)
        vk._vulkan.lib.vkCmdPushConstants(
            cmd, self._cutoff_layout, vk.VK_SHADER_STAGE_VERTEX_BIT, 0, 64, cbuf
        )
        for idx in cutoff_slots:
            mesh_handle = instances[idx][0]
            bufs = mesh_registry.get_buffers(mesh_handle)
            vk.vkCmdBindVertexBuffers(cmd, 0, 2, [bufs.position, bufs.shading], [0, 0])
            vk.vkCmdBindIndexBuffer(cmd, bufs.index, 0, vk.VK_INDEX_TYPE_UINT32)
            vk.vkCmdDrawIndexed(cmd, mesh_handle.index_count, 1, 0, 0, idx)


[docs]
    def cleanup(self) -> None:
        """Release all GPU resources."""
        if not self._ready:
            return
        device = self._engine.ctx.device
        for obj, fn in [
            (self._framebuffer, vk.vkDestroyFramebuffer),
            (self._pipeline, vk.vkDestroyPipeline),
            (self._pipeline_layout, vk.vkDestroyPipelineLayout),
            (self._cutoff_pipeline, vk.vkDestroyPipeline),
            (self._cutoff_layout, vk.vkDestroyPipelineLayout),
            (self._cutoff_vert, vk.vkDestroyShaderModule),
            (self._cutoff_frag, vk.vkDestroyShaderModule),
            (self._vert_module, vk.vkDestroyShaderModule),
            (self._frag_module, vk.vkDestroyShaderModule),
            (self._depth_view, vk.vkDestroyImageView),
            (self._depth_image, vk.vkDestroyImage),
            (self._sampler, vk.vkDestroySampler),
            (self._render_pass, vk.vkDestroyRenderPass),
        ]:
            if obj:
                fn(device, obj, None)
        if self._depth_memory:
            vk.vkFreeMemory(device, self._depth_memory, None)
        if self._batch:
            self._batch.destroy()
        self._ready = False