"""GPU-batched TileMap renderer — single draw call per layer via instanced SSBO quads."""
from __future__ import annotations
import logging
from typing import Any
import numpy as np
import vulkan as vk
from ..gpu.memory import create_buffer, upload_numpy
from ..gpu.pipeline import create_shader_module
from ..materials.shader_compiler import compile_shader
__all__ = ["TileMapPass"]
log = logging.getLogger(__name__)
# Must match TileInstance struct in tilemap.vert
TILE_INSTANCE_DTYPE = np.dtype(
[
("position", np.float32, 2), # world position (x, y)
("tile_uv_offset", np.float32, 2), # UV offset into tileset atlas
("tile_uv_size", np.float32, 2), # UV size of one tile in atlas
("flip_h", np.uint32), # horizontal flip flag
("flip_v", np.uint32), # vertical flip flag
]
)
MAX_TILES = 65_536 # Max tiles per frame across all layers
_TILE_STRIDE = TILE_INSTANCE_DTYPE.itemsize # 32 bytes
[docs]
class TileMapPass:
"""Renders tilemap layers as instanced quads via SSBO.
Each layer is submitted as a contiguous block of tile instances.
All layers share a single SSBO upload; each layer draws with an offset.
"""
def __init__(self, engine: Any):
self._engine = engine
self._pipeline: Any = None
self._pipeline_layout: Any = None
self._vert_module: Any = None
self._frag_module: Any = None
self._ssbo_layout: Any = None
self._ssbo_pool: Any = None
self._ssbo_set: Any = None
self._tile_buf: Any = None
self._tile_mem: Any = None
self._ready = False
# Per-frame submissions: (tile_data, tileset_texture_id, tile_size)
self._submissions: list[tuple[np.ndarray, int, tuple[float, float]]] = []
[docs]
def setup(self) -> None:
"""Create GPU resources: SSBO, pipeline, descriptors."""
e = self._engine
device = e.ctx.device
phys = e.ctx.physical_device
# Tile instance SSBO
buf_size = MAX_TILES * _TILE_STRIDE
self._tile_buf, self._tile_mem = create_buffer(
device,
phys,
buf_size,
vk.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
)
# Descriptor set for tile SSBO (set 0, binding 0)
from ..gpu.descriptors import (
allocate_descriptor_set,
create_descriptor_pool,
create_ssbo_layout,
write_ssbo_descriptor,
)
self._ssbo_layout = create_ssbo_layout(device, binding_count=1)
self._ssbo_pool = create_descriptor_pool(device, max_sets=1)
self._ssbo_set = allocate_descriptor_set(device, self._ssbo_pool, self._ssbo_layout)
write_ssbo_descriptor(device, self._ssbo_set, 0, self._tile_buf, buf_size)
# Compile shaders
shader_dir = e.shader_dir
vert_spv = compile_shader(shader_dir / "tilemap.vert")
frag_spv = compile_shader(shader_dir / "tilemap.frag")
self._vert_module = create_shader_module(device, vert_spv)
self._frag_module = create_shader_module(device, frag_spv)
# Pipeline
tex_layout = e.texture_descriptor_layout
self._pipeline, self._pipeline_layout = _create_tilemap_pipeline(
device,
self._vert_module,
self._frag_module,
e.render_pass,
self._ssbo_layout,
tex_layout,
)
self._ready = True
log.debug("TileMap pass initialized (max %d tiles)", MAX_TILES)
[docs]
def begin_frame(self) -> None:
"""Clear per-frame submissions."""
self._submissions.clear()
[docs]
def submit_layer(
self,
tile_data: np.ndarray,
tileset_texture_id: int,
tile_size: tuple[float, float],
) -> None:
"""Queue a tile layer for rendering.
Args:
tile_data: Structured array with TILE_INSTANCE_DTYPE.
tileset_texture_id: Bindless texture index for the tileset atlas.
tile_size: (width, height) of each tile in world units.
"""
if len(tile_data) == 0:
return
self._submissions.append((tile_data, tileset_texture_id, tile_size))
[docs]
def render(self, cmd: Any, view_matrix: np.ndarray, extent: tuple[int, int]) -> None:
"""Record draw commands for all queued tile layers."""
if not self._ready or not self._submissions:
return
e = self._engine
# Concatenate all layer data into one upload
all_data = np.concatenate([s[0] for s in self._submissions])
total = min(len(all_data), MAX_TILES)
if total < len(all_data):
log.warning("TileMap overflow: %d tiles (max %d)", len(all_data), MAX_TILES)
upload_numpy(e.ctx.device, self._tile_mem, all_data[:total])
# Viewport + scissor
vk_vp = vk.VkViewport(
x=0.0,
y=0.0,
width=float(extent[0]),
height=float(extent[1]),
minDepth=0.0,
maxDepth=1.0,
)
vk.vkCmdSetViewport(cmd, 0, 1, [vk_vp])
scissor = vk.VkRect2D(
offset=vk.VkOffset2D(x=0, y=0),
extent=vk.VkExtent2D(width=extent[0], height=extent[1]),
)
vk.vkCmdSetScissor(cmd, 0, 1, [scissor])
# Bind pipeline
vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_GRAPHICS, self._pipeline)
# Bind SSBO descriptor (set 0)
vk.vkCmdBindDescriptorSets(
cmd,
vk.VK_PIPELINE_BIND_POINT_GRAPHICS,
self._pipeline_layout,
0,
1,
[self._ssbo_set],
0,
None,
)
# Bind texture array descriptor (set 1) from engine
tex_ds = e.texture_descriptor_set
if tex_ds:
vk.vkCmdBindDescriptorSets(
cmd,
vk.VK_PIPELINE_BIND_POINT_GRAPHICS,
self._pipeline_layout,
1,
1,
[tex_ds],
0,
None,
)
# Transpose view matrix for column-major GLSL
view_transposed = np.ascontiguousarray(view_matrix.T, dtype=np.float32)
# Draw each layer with its own push constants
offset = 0
ffi = vk.ffi
for tile_data, tex_id, tile_size in self._submissions:
count = min(len(tile_data), total - offset)
if count <= 0:
break
# Push constants: mat4(64) + vec2 tile_size(8) + int tex_id(4) + pad(4) = 80 bytes
pc = np.zeros(20, dtype=np.float32)
pc[:16] = view_transposed.ravel()
pc[16] = tile_size[0]
pc[17] = tile_size[1]
# Pack texture ID as uint32 at float index 18
pc.view(np.uint32)[18] = np.uint32(tex_id)
pc_bytes = pc.tobytes()
cbuf = ffi.new("char[]", pc_bytes)
vk._vulkan.lib.vkCmdPushConstants(
cmd,
self._pipeline_layout,
vk.VK_SHADER_STAGE_VERTEX_BIT | vk.VK_SHADER_STAGE_FRAGMENT_BIT,
0,
len(pc_bytes),
cbuf,
)
# 6 vertices per tile (quad), draw as instanced with base vertex offset
vk.vkCmdDraw(cmd, count * 6, 1, offset * 6, 0)
offset += count
[docs]
def cleanup(self) -> None:
"""Destroy all GPU resources."""
if not self._ready:
return
device = self._engine.ctx.device
for obj, fn in [
(self._pipeline, vk.vkDestroyPipeline),
(self._pipeline_layout, vk.vkDestroyPipelineLayout),
(self._vert_module, vk.vkDestroyShaderModule),
(self._frag_module, vk.vkDestroyShaderModule),
(self._ssbo_layout, vk.vkDestroyDescriptorSetLayout),
(self._ssbo_pool, vk.vkDestroyDescriptorPool),
]:
if obj:
fn(device, obj, None)
if self._tile_buf:
vk.vkDestroyBuffer(device, self._tile_buf, None)
if self._tile_mem:
vk.vkFreeMemory(device, self._tile_mem, None)
self._ready = False
def _create_tilemap_pipeline(
device: Any,
vert_module: Any,
frag_module: Any,
render_pass: Any,
ssbo_layout: Any,
texture_layout: Any,
) -> tuple[Any, Any]:
"""Create tilemap pipeline: alpha blend, no depth write, no vertex input.
Set 0: tile SSBO, Set 1: bindless texture array.
Push constants: mat4 view(64) + vec2 tile_size(8) + int tex_id(4) + pad(4) = 80 bytes.
"""
ffi = vk.ffi
# Push constant range: 80 bytes, vertex + fragment
push_range = ffi.new("VkPushConstantRange*")
push_range.stageFlags = vk.VK_SHADER_STAGE_VERTEX_BIT | vk.VK_SHADER_STAGE_FRAGMENT_BIT
push_range.offset = 0
push_range.size = 80
# Pipeline layout: set 0 = SSBO, set 1 = textures
set_layouts = ffi.new("VkDescriptorSetLayout[2]", [ssbo_layout, texture_layout])
layout_ci = ffi.new("VkPipelineLayoutCreateInfo*")
layout_ci.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO
layout_ci.setLayoutCount = 2
layout_ci.pSetLayouts = set_layouts
layout_ci.pushConstantRangeCount = 1
layout_ci.pPushConstantRanges = push_range
layout_out = ffi.new("VkPipelineLayout*")
result = vk._vulkan._callApi(
vk._vulkan.lib.vkCreatePipelineLayout,
device,
layout_ci,
ffi.NULL,
layout_out,
)
if result != vk.VK_SUCCESS:
raise RuntimeError(f"vkCreatePipelineLayout failed: {result}")
pipeline_layout = layout_out[0]
pi = ffi.new("VkGraphicsPipelineCreateInfo*")
pi.sType = vk.VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO
# Shader stages
stages = ffi.new("VkPipelineShaderStageCreateInfo[2]")
main_name = ffi.new("char[]", b"main")
stages[0].sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
stages[0].stage = vk.VK_SHADER_STAGE_VERTEX_BIT
stages[0].module = vert_module
stages[0].pName = main_name
stages[1].sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO
stages[1].stage = vk.VK_SHADER_STAGE_FRAGMENT_BIT
stages[1].module = frag_module
stages[1].pName = main_name
pi.stageCount = 2
pi.pStages = stages
# No vertex input (vertices generated in shader from SSBO)
vi = ffi.new("VkPipelineVertexInputStateCreateInfo*")
vi.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO
pi.pVertexInputState = vi
# Input assembly: triangle list
ia = ffi.new("VkPipelineInputAssemblyStateCreateInfo*")
ia.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO
ia.topology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST
pi.pInputAssemblyState = ia
# Viewport (dynamic)
vps = ffi.new("VkPipelineViewportStateCreateInfo*")
vps.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO
vps.viewportCount = 1
vps.pViewports = ffi.new("VkViewport*")
vps.scissorCount = 1
vps.pScissors = ffi.new("VkRect2D*")
pi.pViewportState = vps
# Rasterization: no culling for 2D tiles
rs = ffi.new("VkPipelineRasterizationStateCreateInfo*")
rs.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO
rs.polygonMode = vk.VK_POLYGON_MODE_FILL
rs.lineWidth = 1.0
rs.cullMode = vk.VK_CULL_MODE_NONE
rs.frontFace = vk.VK_FRONT_FACE_CLOCKWISE
pi.pRasterizationState = rs
# Multisample
ms = ffi.new("VkPipelineMultisampleStateCreateInfo*")
ms.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO
ms.rasterizationSamples = vk.VK_SAMPLE_COUNT_1_BIT
pi.pMultisampleState = ms
# Depth: test but no write (tiles render over geometry, under UI)
dss = ffi.new("VkPipelineDepthStencilStateCreateInfo*")
dss.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO
dss.depthTestEnable = 0
dss.depthWriteEnable = 0
pi.pDepthStencilState = dss
# Alpha blending
blend_att = ffi.new("VkPipelineColorBlendAttachmentState*")
blend_att.blendEnable = 1
blend_att.srcColorBlendFactor = vk.VK_BLEND_FACTOR_SRC_ALPHA
blend_att.dstColorBlendFactor = vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA
blend_att.colorBlendOp = vk.VK_BLEND_OP_ADD
blend_att.srcAlphaBlendFactor = vk.VK_BLEND_FACTOR_ONE
blend_att.dstAlphaBlendFactor = vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA
blend_att.alphaBlendOp = vk.VK_BLEND_OP_ADD
blend_att.colorWriteMask = 0xF
cb = ffi.new("VkPipelineColorBlendStateCreateInfo*")
cb.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO
cb.attachmentCount = 1
cb.pAttachments = blend_att
pi.pColorBlendState = cb
# Dynamic state: viewport + scissor
dyn_states = ffi.new(
"VkDynamicState[2]",
[
vk.VK_DYNAMIC_STATE_VIEWPORT,
vk.VK_DYNAMIC_STATE_SCISSOR,
],
)
ds = ffi.new("VkPipelineDynamicStateCreateInfo*")
ds.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO
ds.dynamicStateCount = 2
ds.pDynamicStates = dyn_states
pi.pDynamicState = ds
pi.layout = pipeline_layout
pi.renderPass = render_pass
pipeline_out = ffi.new("VkPipeline*")
result = vk._vulkan._callApi(
vk._vulkan.lib.vkCreateGraphicsPipelines,
device,
ffi.NULL,
1,
pi,
ffi.NULL,
pipeline_out,
)
if result != vk.VK_SUCCESS:
raise RuntimeError(f"vkCreateGraphicsPipelines failed: {result}")
log.debug("TileMap pipeline created")
return pipeline_out[0], pipeline_layout