Source code for simvx.graphics.renderer.post_process

"""Post-processing pass: HDR tone mapping, FXAA, bloom, DoF, motion blur, film grain, vignette, chromatic aberration.

"""

import logging
import time
from typing import Any

import numpy as np
import vulkan as vk

from ..gpu.memory import create_buffer, create_sampler
from ..gpu.pipeline import create_shader_module
from ..materials.shader_compiler import compile_shader
from .bloom_pass import BloomPass
from .render_target import RenderTarget

__all__ = ["PostProcessPass"]

log = logging.getLogger(__name__)

# Push constant layout (112 bytes):
#   offset  type   name
#   0       vec2   screen_size
#   8       float  exposure
#   12      uint   flags
#   16      float  bloom_intensity
#   20      float  dof_focus_distance
#   24      float  dof_focus_range
#   28      float  dof_max_blur            (pixels)
#   32      float  grain_intensity
#   36      float  vignette_intensity
#   40      float  vignette_smoothness
#   44      float  chromatic_intensity
#   48      float  time
#   52      float  motion_blur_intensity
#   56      uint   motion_blur_samples
#   60      (4 bytes padding for vec4 alignment)
#   64      vec4   fog_colour              (rgb + density in alpha)
#   80      vec4   fog_params              (start, end, mode, fog_enabled)
#   96      vec4   tonemap_params          (mode[float], white, pad, pad)
# Total = 112 bytes
_PC_SIZE = 112

# Flag bits
FLAG_FXAA = 1 << 0
FLAG_BLOOM = 1 << 1
FLAG_SSAO = 1 << 2
FLAG_DOF = 1 << 3
FLAG_GRAIN = 1 << 4
FLAG_VIGNETTE = 1 << 5
FLAG_CHROMATIC = 1 << 6
FLAG_MOTION_BLUR = 1 << 7
# Set when the volumetric fog pass composited this frame: suppresses the
# analytic distance-fog branch in tonemap.frag (mutually exclusive).
FLAG_VOLUMETRIC_FOG = 1 << 8


[docs]
class PostProcessPass:
    """Renders HDR scene to offscreen target, then tone-maps to swapchain.

    The pass owns an HDR render target. When enabled:
    - 3D scene renders to the HDR target (pre_render phase)
    - Bloom extraction + blur (optional)
    - Tone mapping + FXAA + cinematic effects renders to swapchain (main render pass)
    """

    def __init__(self, engine: Any):
        self._engine = engine
        self._enabled = False
        self._start_time = time.perf_counter()

        # HDR render target
        self._hdr_target: RenderTarget | None = None

        # Tonemap pipeline
        self._pipeline: Any = None
        self._pipeline_layout: Any = None
        self._vert_module: Any = None
        self._frag_module: Any = None

        # HDR + bloom + depth texture descriptors
        self._sampler: Any = None
        self._depth_sampler: Any = None
        self._descriptor_pool: Any = None
        self._descriptor_layout: Any = None
        self._descriptor_set: Any = None

        # Bloom
        self._bloom_pass: BloomPass | None = None
        self._bloom_enabled = False
        self.bloom_intensity = 0.3
        self.bloom_threshold = 1.0

        # Settings: core
        self.exposure = 1.0
        # Match WorldEnvironment.fxaa_enabled's default (False). FXAA is now
        # driven by the env spec rather than being hard-on.
        self.fxaa_enabled = False

        # Tonemap operator selection. Mode enumeration matches the web WGSL
        # tonemap (0=aces, 1=neutral, 2=reinhard, 3=uchimura). ``tonemap_white``
        # is the luminance that maps to 1.0 (white-point scaling).
        self.tonemap_mode = 0
        self.tonemap_white = 1.0

        # Depth of Field
        self.dof_enabled = False
        self.dof_focus_distance = 0.5
        self.dof_focus_range = 0.1
        self.dof_max_blur = 6.0

        # Film Grain
        self.grain_enabled = False
        self.grain_intensity = 0.05

        # Vignette
        self.vignette_enabled = False
        self.vignette_intensity = 0.8
        self.vignette_smoothness = 0.4

        # Chromatic Aberration
        self.chromatic_aberration_enabled = False
        self.chromatic_aberration_intensity = 0.005

        # SSAO
        self.ssao_enabled = False

        # Motion Blur
        self._motion_blur_enabled = False
        self.motion_blur_intensity = 1.0
        self.motion_blur_samples = 8

        # Fog (applied in tonemap shader, post-ACES, in LDR space)
        self.fog_enabled = False
        self.fog_colour: tuple[float, float, float] = (0.7, 0.8, 0.9)
        self.fog_density: float = 0.03
        self.fog_start: float = 10.0
        self.fog_end: float = 100.0
        self.fog_mode: float = 1.0  # 0=linear, 1=exp, 2=exp2
        # Set by EnvironmentSync when WorldEnvironment.volumetric_fog_enabled:
        # gates off the analytic fog branch so the two don't double up.
        self.volumetric_fog_active = False

        # Motion blur UBO: inv_vp(mat4=64) + prev_vp(mat4=64) = 128 bytes
        self._mb_ubo_buf: Any = None
        self._mb_ubo_mem: Any = None
        self._prev_vp: np.ndarray = np.eye(4, dtype=np.float32)
        self._has_prev_vp = False


[docs]
    @property
    def enabled(self) -> bool:
        return self._enabled



[docs]
    @property
    def hdr_target(self) -> RenderTarget | None:
        return self._hdr_target


    @property
    def bloom_enabled(self) -> bool:
        return self._bloom_enabled


[docs]
    @bloom_enabled.setter
    def bloom_enabled(self, value: bool) -> None:
        self._bloom_enabled = value


    @property
    def motion_blur_enabled(self) -> bool:
        return self._motion_blur_enabled


[docs]
    @motion_blur_enabled.setter
    def motion_blur_enabled(self, value: bool) -> None:
        self._motion_blur_enabled = value


    def _build_flags(self) -> int:
        """Compute flags bitmask from current settings."""
        flags = 0
        if self.fxaa_enabled:
            flags |= FLAG_FXAA
        if self._bloom_enabled and self._bloom_pass:
            flags |= FLAG_BLOOM
        if self.ssao_enabled:
            flags |= FLAG_SSAO
        if self.dof_enabled:
            flags |= FLAG_DOF
        if self.grain_enabled:
            flags |= FLAG_GRAIN
        if self.vignette_enabled:
            flags |= FLAG_VIGNETTE
        if self.chromatic_aberration_enabled:
            flags |= FLAG_CHROMATIC
        if self._motion_blur_enabled and self._has_prev_vp:
            flags |= FLAG_MOTION_BLUR
        if self.volumetric_fog_active:
            flags |= FLAG_VOLUMETRIC_FOG
        return flags


[docs]
    def setup(self) -> None:
        """Initialize HDR target and tonemap pipeline."""
        e = self._engine
        device = e.ctx.device
        w, h = e.extent

        # Create HDR render target (16-bit float) with samplable depth for motion blur
        self._hdr_target = RenderTarget(
            device, e.ctx.physical_device, w, h,
            colour_format=vk.VK_FORMAT_R16G16B16A16_SFLOAT,
            use_depth=True,
            samplable_depth=True,
            queue=e.ctx.graphics_queue, command_pool=e.ctx.command_pool,
        )

        # Samplers
        self._sampler = create_sampler(device)
        self._depth_sampler = self._create_depth_sampler(device)

        # Camera UBO: inv_vp(mat4=64) + prev_vp(mat4=64) + camera_pos_near(vec4=16)
        # + clip_planes(vec4=16) = 160 bytes. Shared by motion blur (matrices),
        # DoF + analytic fog (camera_pos_near.w = near, clip_planes.x = far for
        # depth linearisation). All four members must be uploaded or DoF/fog read
        # zeroed near/far and produce no blur / wrong distances.
        self._mb_ubo_buf, self._mb_ubo_mem = create_buffer(
            device, e.ctx.physical_device, 160,
            vk.VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
            vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
        )

        # Descriptor layout: 4 combined image samplers (HDR + bloom + depth + SSAO) + 1 UBO (motion blur)
        bindings = [
            vk.VkDescriptorSetLayoutBinding(
                binding=b,
                descriptorType=vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
                descriptorCount=1,
                stageFlags=vk.VK_SHADER_STAGE_FRAGMENT_BIT,
            )
            for b in range(3)
        ]
        bindings.append(vk.VkDescriptorSetLayoutBinding(
            binding=3,
            descriptorType=vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
            descriptorCount=1,
            stageFlags=vk.VK_SHADER_STAGE_FRAGMENT_BIT,
        ))
        bindings.append(vk.VkDescriptorSetLayoutBinding(
            binding=4,
            descriptorType=vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
            descriptorCount=1,
            stageFlags=vk.VK_SHADER_STAGE_FRAGMENT_BIT,
        ))
        self._descriptor_layout = vk.vkCreateDescriptorSetLayout(device,
            vk.VkDescriptorSetLayoutCreateInfo(
                bindingCount=len(bindings), pBindings=bindings,
            ), None)

        # Descriptor pool: 4 samplers + 1 UBO in 1 set
        pool_sizes = [
            vk.VkDescriptorPoolSize(
                type=vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
                descriptorCount=4,
            ),
            vk.VkDescriptorPoolSize(
                type=vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
                descriptorCount=1,
            ),
        ]
        self._descriptor_pool = vk.vkCreateDescriptorPool(device,
            vk.VkDescriptorPoolCreateInfo(
                maxSets=1, poolSizeCount=len(pool_sizes), pPoolSizes=pool_sizes,
            ), None)

        # Allocate and write descriptor
        sets = vk.vkAllocateDescriptorSets(device, vk.VkDescriptorSetAllocateInfo(
            descriptorPool=self._descriptor_pool,
            descriptorSetCount=1,
            pSetLayouts=[self._descriptor_layout],
        ))
        self._descriptor_set = sets[0]

        # Write descriptors for HDR (0), bloom placeholder (1), depth (2)
        self._write_descriptors(device)

        # Compile tonemap shaders
        shader_dir = e.shader_dir
        vert_spv = compile_shader(shader_dir / "tonemap.vert")
        frag_spv = compile_shader(shader_dir / "tonemap.frag")
        self._vert_module = create_shader_module(device, vert_spv)
        self._frag_module = create_shader_module(device, frag_spv)

        # Create tonemap pipeline
        self._create_pipeline(device, e.render_pass, (w, h))

        # Initialize bloom pass
        self._bloom_pass = BloomPass(self._engine)
        self._bloom_pass.setup(self._hdr_target.colour_view)
        self._bloom_pass.threshold = self.bloom_threshold
        self._bloom_enabled = True

        # Update bloom descriptor binding now that bloom is ready
        self._update_bloom_descriptor()

        self._enabled = True
        log.debug("Post-processing pass initialized (%dx%d HDR, bloom=%s)", w, h, self._bloom_enabled)


    def _create_depth_sampler(self, device: Any) -> Any:
        """Create a sampler suitable for depth texture sampling."""
        sampler_ci = vk.VkSamplerCreateInfo(
            magFilter=vk.VK_FILTER_LINEAR,
            minFilter=vk.VK_FILTER_LINEAR,
            mipmapMode=vk.VK_SAMPLER_MIPMAP_MODE_NEAREST,
            addressModeU=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
            addressModeV=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
            addressModeW=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
            minLod=0.0,
            maxLod=0.0,
        )
        return vk.vkCreateSampler(device, sampler_ci, None)

    def _write_descriptors(self, device: Any) -> None:
        """Write all descriptor bindings (HDR, bloom, depth, motion blur UBO, SSAO)."""
        rt = self._hdr_target
        hdr_info = vk.VkDescriptorImageInfo(
            sampler=self._sampler,
            imageView=rt.colour_view,
            imageLayout=vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
        )
        bloom_info = vk.VkDescriptorImageInfo(
            sampler=self._sampler,
            imageView=rt.colour_view,  # placeholder until bloom setup
            imageLayout=vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
        )
        depth_info = vk.VkDescriptorImageInfo(
            sampler=self._depth_sampler,
            imageView=rt.depth_view,
            imageLayout=vk.VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
        )
        ssao_info = vk.VkDescriptorImageInfo(
            sampler=self._sampler,
            imageView=rt.colour_view,  # placeholder until SSAO setup
            imageLayout=vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
        )
        writes = [
            vk.VkWriteDescriptorSet(
                dstSet=self._descriptor_set, dstBinding=i, dstArrayElement=0,
                descriptorCount=1,
                descriptorType=vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
                pImageInfo=[info],
            )
            for i, info in enumerate([hdr_info, bloom_info, depth_info])
        ]
        # SSAO placeholder at binding 4
        writes.append(vk.VkWriteDescriptorSet(
            dstSet=self._descriptor_set, dstBinding=4, dstArrayElement=0,
            descriptorCount=1,
            descriptorType=vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
            pImageInfo=[ssao_info],
        ))
        # Motion blur UBO at binding 3
        if self._mb_ubo_buf:
            ubo_info = vk.VkDescriptorBufferInfo(
                buffer=self._mb_ubo_buf, offset=0, range=160,
            )
            writes.append(vk.VkWriteDescriptorSet(
                dstSet=self._descriptor_set, dstBinding=3, dstArrayElement=0,
                descriptorCount=1,
                descriptorType=vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
                pBufferInfo=[ubo_info],
            ))
        vk.vkUpdateDescriptorSets(device, len(writes), writes, 0, None)

    def _update_bloom_descriptor(self) -> None:
        """Update binding 1 with the bloom pass output image view."""
        if not self._bloom_pass or not self._bloom_pass.bloom_image_view:
            return
        bloom_info = vk.VkDescriptorImageInfo(
            sampler=self._sampler,
            imageView=self._bloom_pass.bloom_image_view,
            imageLayout=vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
        )
        vk.vkUpdateDescriptorSets(self._engine.ctx.device, 1, [vk.VkWriteDescriptorSet(
            dstSet=self._descriptor_set, dstBinding=1, dstArrayElement=0,
            descriptorCount=1,
            descriptorType=vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
            pImageInfo=[bloom_info],
        )], 0, None)


[docs]
    def update_ssao_descriptor(self, ao_view: Any) -> None:
        """Update binding 4 with the SSAO output image view."""
        if not self._descriptor_set or not self._sampler:
            return
        # SSAO compute output stays in GENERAL layout; use GENERAL here to match.
        ssao_info = vk.VkDescriptorImageInfo(
            sampler=self._sampler,
            imageView=ao_view,
            imageLayout=vk.VK_IMAGE_LAYOUT_GENERAL,
        )
        vk.vkUpdateDescriptorSets(self._engine.ctx.device, 1, [vk.VkWriteDescriptorSet(
            dstSet=self._descriptor_set, dstBinding=4, dstArrayElement=0,
            descriptorCount=1,
            descriptorType=vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
            pImageInfo=[ssao_info],
        )], 0, None)


    @staticmethod
    def _near_far_from_proj(proj: np.ndarray) -> tuple[float, float]:
        """Recover (near, far) from a perspective projection matrix.

        For SimVX's perspective (``perspective()`` in core.math): row-major,
        ``m[2,2] = (f+n)/(n-f)``, ``m[2,3] = 2fn/(n-f)`` (the Vulkan Y-flip only
        touches ``m[1,1]``). Inverting gives near/far. Falls back to a sane
        0.1/100 if the matrix is degenerate (e.g. orthographic).
        """
        m22 = float(proj[2, 2])
        m23 = float(proj[2, 3])
        denom_n = m22 - 1.0
        denom_f = m22 + 1.0
        if abs(denom_n) < 1e-9 or abs(denom_f) < 1e-9:
            return 0.1, 100.0
        near = m23 / denom_n
        far = m23 / denom_f
        if not (0.0 < near < far):
            return 0.1, 100.0
        return near, far


[docs]
    def update_motion_blur_matrices(self, view: np.ndarray, proj: np.ndarray) -> None:
        """Update the motion blur UBO with current inverse VP and previous VP.

        Call once per frame before render_tonemap() with the current camera matrices.
        Uses temporal smoothing to prevent sudden jumps from frame time variance.
        """
        vp = (proj @ view).astype(np.float32)
        try:
            inv_vp = np.linalg.inv(vp).astype(np.float32)
        except np.linalg.LinAlgError:
            inv_vp = np.eye(4, dtype=np.float32)

        # On first frame, set prev_vp = current to avoid a massive initial velocity spike
        if not self._has_prev_vp:
            self._prev_vp = vp.copy()

        # Recover camera world position from the inverse view matrix (its
        # translation column) and near/far from the projection matrix, so DoF
        # and analytic fog can linearise depth. Both backends and the shader
        # expect camera_pos_near.w = near and clip_planes.x = far.
        try:
            inv_view = np.linalg.inv(view).astype(np.float32)
            cam_pos = inv_view[:3, 3]
        except np.linalg.LinAlgError:
            cam_pos = np.zeros(3, dtype=np.float32)
        near, far = self._near_far_from_proj(proj)

        # Upload to GPU
        if self._mb_ubo_mem:
            inv_vp_t = np.ascontiguousarray(inv_vp.T, dtype=np.float32)
            prev_vp_t = np.ascontiguousarray(self._prev_vp.T, dtype=np.float32)
            cam_pos_near = np.array([cam_pos[0], cam_pos[1], cam_pos[2], near], dtype=np.float32)
            clip_planes = np.array([far, 0.0, 0.0, 0.0], dtype=np.float32)

            from ..gpu.memory import upload_numpy
            data = np.concatenate([
                inv_vp_t.ravel(), prev_vp_t.ravel(), cam_pos_near, clip_planes
            ])
            upload_numpy(self._engine.ctx.device, self._mb_ubo_mem, data)

        # Store current VP for next frame
        self._prev_vp = vp.copy()
        self._has_prev_vp = True


    def _create_pipeline(self, device: Any, render_pass: Any, extent: tuple[int, int]) -> None:
        """Create the tone mapping fullscreen pipeline."""
        ffi = vk.ffi

        push_range = ffi.new("VkPushConstantRange*")
        push_range.stageFlags = vk.VK_SHADER_STAGE_FRAGMENT_BIT
        push_range.offset = 0
        push_range.size = _PC_SIZE

        # Pipeline layout
        layout_ci = ffi.new("VkPipelineLayoutCreateInfo*")
        layout_ci.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
        set_layouts = ffi.new("VkDescriptorSetLayout[1]", [self._descriptor_layout])
        layout_ci.setLayoutCount = 1
        layout_ci.pSetLayouts = set_layouts
        layout_ci.pushConstantRangeCount = 1
        layout_ci.pPushConstantRanges = push_range

        layout_out = ffi.new("VkPipelineLayout*")
        result = vk._vulkan._callApi(
            vk._vulkan.lib.vkCreatePipelineLayout,
            device, layout_ci, ffi.NULL, layout_out,
        )
        if result != vk.VK_SUCCESS:
            raise RuntimeError(f"vkCreatePipelineLayout failed: {result}")
        self._pipeline_layout = layout_out[0]

        # Pipeline create info
        pi = ffi.new("VkGraphicsPipelineCreateInfo*")
        pi.sType = vk.VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO

        # Shader stages
        stages = ffi.new("VkPipelineShaderStageCreateInfo[2]")
        main_name = ffi.new("char[]", b"main")
        stages[0].sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
        stages[0].stage = vk.VK_SHADER_STAGE_VERTEX_BIT
        stages[0].module = self._vert_module
        stages[0].pName = main_name
        stages[1].sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
        stages[1].stage = vk.VK_SHADER_STAGE_FRAGMENT_BIT
        stages[1].module = self._frag_module
        stages[1].pName = main_name
        pi.stageCount = 2
        pi.pStages = stages

        # No vertex input (fullscreen triangle generated in shader)
        vi = ffi.new("VkPipelineVertexInputStateCreateInfo*")
        vi.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO
        pi.pVertexInputState = vi

        # Input assembly
        ia = ffi.new("VkPipelineInputAssemblyStateCreateInfo*")
        ia.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO
        ia.topology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST
        pi.pInputAssemblyState = ia

        # Viewport state
        vps = ffi.new("VkPipelineViewportStateCreateInfo*")
        vps.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO
        vps.viewportCount = 1
        viewport = ffi.new("VkViewport*")
        viewport.width = float(extent[0])
        viewport.height = float(extent[1])
        viewport.maxDepth = 1.0
        vps.pViewports = viewport
        scissor = ffi.new("VkRect2D*")
        scissor.extent.width = extent[0]
        scissor.extent.height = extent[1]
        vps.scissorCount = 1
        vps.pScissors = scissor
        pi.pViewportState = vps

        # Rasterization
        rs = ffi.new("VkPipelineRasterizationStateCreateInfo*")
        rs.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO
        rs.polygonMode = vk.VK_POLYGON_MODE_FILL
        rs.lineWidth = 1.0
        rs.cullMode = vk.VK_CULL_MODE_NONE
        pi.pRasterizationState = rs

        # Multisample
        ms = ffi.new("VkPipelineMultisampleStateCreateInfo*")
        ms.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO
        ms.rasterizationSamples = vk.VK_SAMPLE_COUNT_1_BIT
        pi.pMultisampleState = ms

        # No depth test
        dss = ffi.new("VkPipelineDepthStencilStateCreateInfo*")
        dss.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO
        dss.depthTestEnable = 0
        dss.depthWriteEnable = 0
        pi.pDepthStencilState = dss

        # Colour blend (no blending)
        cba = ffi.new("VkPipelineColorBlendAttachmentState*")
        cba.colorWriteMask = (
            vk.VK_COLOR_COMPONENT_R_BIT | vk.VK_COLOR_COMPONENT_G_BIT
            | vk.VK_COLOR_COMPONENT_B_BIT | vk.VK_COLOR_COMPONENT_A_BIT
        )
        cb = ffi.new("VkPipelineColorBlendStateCreateInfo*")
        cb.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO
        cb.attachmentCount = 1
        cb.pAttachments = cba
        pi.pColorBlendState = cb

        # Dynamic state
        dyn_states = ffi.new("VkDynamicState[2]", [vk.VK_DYNAMIC_STATE_VIEWPORT, vk.VK_DYNAMIC_STATE_SCISSOR])
        ds = ffi.new("VkPipelineDynamicStateCreateInfo*")
        ds.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO
        ds.dynamicStateCount = 2
        ds.pDynamicStates = dyn_states
        pi.pDynamicState = ds

        pi.layout = self._pipeline_layout
        pi.renderPass = render_pass

        pipeline_out = ffi.new("VkPipeline*")
        result = vk._vulkan._callApi(
            vk._vulkan.lib.vkCreateGraphicsPipelines,
            device, ffi.NULL, 1, pi, ffi.NULL, pipeline_out,
        )
        if result != vk.VK_SUCCESS:
            raise RuntimeError(f"vkCreateGraphicsPipelines failed: {result}")
        self._pipeline = pipeline_out[0]


[docs]
    def begin_hdr_pass(self, cmd: Any) -> None:
        """Begin the HDR render pass (call before 3D rendering)."""
        if not self._enabled or not self._hdr_target:
            return
        rt = self._hdr_target
        cc = getattr(self._engine, "clear_colour", [0.0, 0.0, 0.0, 1.0])
        clear_values = [
            vk.VkClearValue(color=vk.VkClearColorValue(float32=cc)),
            vk.VkClearValue(depthStencil=vk.VkClearDepthStencilValue(depth=1.0, stencil=0)),
        ]
        rp_begin = vk.VkRenderPassBeginInfo(
            renderPass=rt.render_pass,
            framebuffer=rt.framebuffer,
            renderArea=vk.VkRect2D(
                offset=vk.VkOffset2D(x=0, y=0),
                extent=vk.VkExtent2D(width=rt.width, height=rt.height),
            ),
            clearValueCount=len(clear_values),
            pClearValues=clear_values,
        )
        vk.vkCmdBeginRenderPass(cmd, rp_begin, vk.VK_SUBPASS_CONTENTS_INLINE)



[docs]
    def end_hdr_pass(self, cmd: Any) -> None:
        """End the HDR render pass."""
        if not self._enabled:
            return
        vk.vkCmdEndRenderPass(cmd)



[docs]
    def render_bloom(self, cmd: Any) -> None:
        """Execute bloom pass (extract + blur). Call after end_hdr_pass."""
        if not self._bloom_enabled or not self._bloom_pass:
            return
        self._bloom_pass.threshold = self.bloom_threshold
        self._bloom_pass.render(cmd)



[docs]
    def render_tonemap(self, cmd: Any, width: int, height: int) -> None:
        """Render tone-mapped fullscreen quad to current render pass (swapchain)."""
        if not self._enabled or not self._pipeline:
            return

        # Set viewport/scissor
        vk_viewport = vk.VkViewport(
            x=0.0, y=0.0,
            width=float(width), height=float(height),
            minDepth=0.0, maxDepth=1.0,
        )
        vk.vkCmdSetViewport(cmd, 0, 1, [vk_viewport])
        scissor = vk.VkRect2D(
            offset=vk.VkOffset2D(x=0, y=0),
            extent=vk.VkExtent2D(width=width, height=height),
        )
        vk.vkCmdSetScissor(cmd, 0, 1, [scissor])

        # Bind pipeline and descriptor
        vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._pipeline)
        vk.vkCmdBindDescriptorSets(
            cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._pipeline_layout,
            0, 1, [self._descriptor_set], 0, None,
        )

        # Build push constants (see layout table at module top: 112 bytes)
        flags = self._build_flags()
        elapsed = time.perf_counter() - self._start_time

        pc_data = bytearray(_PC_SIZE)
        pc_data[0:8] = np.array([float(width), float(height)], dtype=np.float32).tobytes()
        pc_data[8:12] = np.array([self.exposure], dtype=np.float32).tobytes()
        pc_data[12:16] = np.array([flags], dtype=np.uint32).tobytes()
        pc_data[16:20] = np.array([self.bloom_intensity], dtype=np.float32).tobytes()
        pc_data[20:24] = np.array([self.dof_focus_distance], dtype=np.float32).tobytes()
        pc_data[24:28] = np.array([self.dof_focus_range], dtype=np.float32).tobytes()
        pc_data[28:32] = np.array([self.dof_max_blur], dtype=np.float32).tobytes()
        pc_data[32:36] = np.array([self.grain_intensity], dtype=np.float32).tobytes()
        pc_data[36:40] = np.array([self.vignette_intensity], dtype=np.float32).tobytes()
        pc_data[40:44] = np.array([self.vignette_smoothness], dtype=np.float32).tobytes()
        pc_data[44:48] = np.array([self.chromatic_aberration_intensity], dtype=np.float32).tobytes()
        pc_data[48:52] = np.array([elapsed], dtype=np.float32).tobytes()
        mb_intensity = max(0.0, min(2.0, self.motion_blur_intensity))
        mb_samples = max(4, min(32, self.motion_blur_samples))
        pc_data[52:56] = np.array([mb_intensity], dtype=np.float32).tobytes()
        pc_data[56:60] = np.array([mb_samples], dtype=np.uint32).tobytes()
        # bytes 60-63: padding for vec4 alignment (zeroed by bytearray init)
        # Fog params (applied post-tonemap in LDR space)
        fc = self.fog_colour
        pc_data[64:80] = np.array([fc[0], fc[1], fc[2], self.fog_density], dtype=np.float32).tobytes()
        fog_enabled_f = 1.0 if self.fog_enabled else 0.0
        pc_data[80:96] = np.array([self.fog_start, self.fog_end, self.fog_mode, fog_enabled_f], dtype=np.float32).tobytes()
        # Tonemap params: mode + white-point + 2 pad floats. Mode is carried as
        # a float (like fog_mode) and rounded back to an int in the shader.
        pc_data[96:112] = np.array(
            [float(self.tonemap_mode), max(1e-4, self.tonemap_white), 0.0, 0.0], dtype=np.float32
        ).tobytes()

        ffi = vk.ffi
        cbuf = ffi.new("char[]", bytes(pc_data))
        vk._vulkan.lib.vkCmdPushConstants(
            cmd, self._pipeline_layout,
            vk.VK_SHADER_STAGE_FRAGMENT_BIT,
            0, _PC_SIZE, cbuf,
        )

        # Draw fullscreen triangle (3 vertices, no vertex buffer)
        vk.vkCmdDraw(cmd, 3, 1, 0, 0)



[docs]
    def resize(self, width: int, height: int) -> None:
        """Recreate HDR target and pipeline for new dimensions."""
        if not self._enabled:
            return
        e = self._engine
        device = e.ctx.device

        # Destroy old resources
        if self._pipeline:
            vk.vkDestroyPipeline(device, self._pipeline, None)
        if self._pipeline_layout:
            vk.vkDestroyPipelineLayout(device, self._pipeline_layout, None)
        if self._hdr_target:
            self._hdr_target.destroy()

        # Recreate with samplable depth
        self._hdr_target = RenderTarget(
            device, e.ctx.physical_device, width, height,
            colour_format=vk.VK_FORMAT_R16G16B16A16_SFLOAT,
            use_depth=True,
            samplable_depth=True,
            queue=e.ctx.graphics_queue, command_pool=e.ctx.command_pool,
        )

        # Update descriptors (HDR + depth)
        self._write_descriptors(device)

        # Resize bloom pass
        if self._bloom_pass:
            self._bloom_pass.resize(width, height, self._hdr_target.colour_view)
            self._update_bloom_descriptor()

        self._create_pipeline(device, e.render_pass, (width, height))



[docs]
    def cleanup(self) -> None:
        """Release all GPU resources."""
        if not self._enabled:
            return
        device = self._engine.ctx.device
        if self._bloom_pass:
            self._bloom_pass.cleanup()
        if self._pipeline:
            vk.vkDestroyPipeline(device, self._pipeline, None)
        if self._pipeline_layout:
            vk.vkDestroyPipelineLayout(device, self._pipeline_layout, None)
        if self._vert_module:
            vk.vkDestroyShaderModule(device, self._vert_module, None)
        if self._frag_module:
            vk.vkDestroyShaderModule(device, self._frag_module, None)
        if self._descriptor_pool:
            vk.vkDestroyDescriptorPool(device, self._descriptor_pool, None)
        if self._descriptor_layout:
            vk.vkDestroyDescriptorSetLayout(device, self._descriptor_layout, None)
        if self._sampler:
            vk.vkDestroySampler(device, self._sampler, None)
        if self._depth_sampler:
            vk.vkDestroySampler(device, self._depth_sampler, None)
        if self._mb_ubo_buf:
            vk.vkDestroyBuffer(device, self._mb_ubo_buf, None)
        if self._mb_ubo_mem:
            vk.vkFreeMemory(device, self._mb_ubo_mem, None)
        if self._hdr_target:
            self._hdr_target.destroy()
        self._enabled = False