From 8fd99405886f7378f507326e04ebadcdb9b67139 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Thu, 2 Apr 2026 00:08:18 +0100 Subject: [PATCH 1/5] feat: add GPU frustum culling compute shader and CullingSystem infrastructure (#375) Implements the GPU-side frustum culling infrastructure: - culling.comp: compute shader testing chunk AABBs against 6 frustum planes - culling_system.zig: buffer management, dispatch, and readback - Registered shader path in shader_registry.zig - Added glslangValidator check in build.zig Closes #375 --- assets/shaders/vulkan/culling.comp | 76 ++++ build.zig | 2 + src/engine/graphics/vulkan/culling_system.zig | 393 ++++++++++++++++++ .../graphics/vulkan/shader_registry.zig | 2 + 4 files changed, 473 insertions(+) create mode 100644 assets/shaders/vulkan/culling.comp create mode 100644 src/engine/graphics/vulkan/culling_system.zig diff --git a/assets/shaders/vulkan/culling.comp b/assets/shaders/vulkan/culling.comp new file mode 100644 index 00000000..e03ae786 --- /dev/null +++ b/assets/shaders/vulkan/culling.comp @@ -0,0 +1,76 @@ +#version 450 + +layout(local_size_x = 64) in; + +struct DrawIndirectCommand { + uint vertexCount; + uint instanceCount; + uint firstVertex; + uint firstInstance; +}; + +struct ChunkAABB { + vec4 min_point; + vec4 max_point; +}; + +layout(std430, binding = 0) readonly buffer ChunkAABBs { + ChunkAABB chunks[]; +} aabb_buffer; + +layout(std430, binding = 1) writeonly buffer DrawCommands { + uint visible_count; + uint _pad0; + uint _pad1; + uint _pad2; + DrawIndirectCommand commands[]; +} cmd_buffer; + +layout(std430, binding = 2) buffer VisibleCountBuffer { + uint count; + uint _pad_v0; + uint _pad_v1; + uint _pad_v2; +} visible_counter; + +layout(push_constant) uniform FrustumPlanes { + vec4 planes[6]; +} frustum; + +bool aabbVisible(vec3 aabb_min, vec3 aabb_max) { + for (int i = 0; i < 6; i++) { + vec3 normal = frustum.planes[i].xyz; + float dist = frustum.planes[i].w; + + vec3 positive_vertex = vec3( + (normal.x > 0.0) ? aabb_max.x : aabb_min.x, + (normal.y > 0.0) ? aabb_max.y : aabb_min.y, + (normal.z > 0.0) ? aabb_max.z : aabb_min.z + ); + + if (dot(normal, positive_vertex) + dist < 0.0) { + return false; + } + } + return true; +} + +void main() { + uint idx = gl_GlobalInvocationID.x; + + if (idx >= aabb_buffer.chunks.length()) { + return; + } + + vec3 aabb_min = aabb_buffer.chunks[idx].min_point.xyz; + vec3 aabb_max = aabb_buffer.chunks[idx].max_point.xyz; + + if (aabbVisible(aabb_min, aabb_max)) { + uint slot = atomicAdd(visible_counter.count, 1); + + cmd_buffer.commands[slot].vertexCount = 0; + cmd_buffer.commands[slot].instanceCount = 1; + cmd_buffer.commands[slot].firstVertex = 0; + cmd_buffer.commands[slot].firstInstance = idx; + } +} diff --git a/build.zig b/build.zig index 7fab3da4..3ff6c46d 100644 --- a/build.zig +++ b/build.zig @@ -183,6 +183,7 @@ pub fn build(b: *std.Build) void { const validate_vulkan_taa_frag = b.addSystemCommand(&.{ "glslangValidator", "-V", "assets/shaders/vulkan/taa.frag" }); const validate_vulkan_lpv_inject_comp = b.addSystemCommand(&.{ "glslangValidator", "-V", "assets/shaders/vulkan/lpv_inject.comp" }); const validate_vulkan_lpv_propagate_comp = b.addSystemCommand(&.{ "glslangValidator", "-V", "assets/shaders/vulkan/lpv_propagate.comp" }); + const validate_vulkan_culling_comp = b.addSystemCommand(&.{ "glslangValidator", "-V", "assets/shaders/vulkan/culling.comp" }); test_step.dependOn(&validate_vulkan_terrain_vert.step); test_step.dependOn(&validate_vulkan_terrain_frag.step); @@ -206,4 +207,5 @@ pub fn build(b: *std.Build) void { test_step.dependOn(&validate_vulkan_taa_frag.step); test_step.dependOn(&validate_vulkan_lpv_inject_comp.step); test_step.dependOn(&validate_vulkan_lpv_propagate_comp.step); + test_step.dependOn(&validate_vulkan_culling_comp.step); } diff --git a/src/engine/graphics/vulkan/culling_system.zig b/src/engine/graphics/vulkan/culling_system.zig new file mode 100644 index 00000000..a1f387de --- /dev/null +++ b/src/engine/graphics/vulkan/culling_system.zig @@ -0,0 +1,393 @@ +const std = @import("std"); +const c = @import("../../c.zig").c; +const rhi_pkg = @import("../rhi.zig"); +const log = @import("../core/log.zig"); +const Mat4 = @import("../math/mat4.zig").Mat4; +const VulkanContext = @import("vulkan/rhi_context_types.zig").VulkanContext; +const Utils = @import("vulkan/utils.zig"); + +pub const CULLING_SHADER_PATH = "assets/shaders/vulkan/culling.comp.spv"; +pub const MAX_CULLABLE_CHUNKS: usize = 16384; +pub const WORKGROUP_SIZE: u32 = 64; + +pub const ChunkCullData = extern struct { + min_point: [4]f32, + max_point: [4]f32, +}; + +const FrustumPushConstants = extern struct { + planes: [6][4]f32, +}; + +pub const CullingSystem = struct { + allocator: std.mem.Allocator, + rhi: rhi_pkg.RHI, + vk_ctx: *VulkanContext, + + aabb_buffer: Utils.VulkanBuffer = .{}, + command_buffer: Utils.VulkanBuffer = .{}, + counter_buffer: Utils.VulkanBuffer = .{}, + counter_readback_buffer: Utils.VulkanBuffer = .{}, + + descriptor_pool: c.VkDescriptorPool = null, + descriptor_set_layout: c.VkDescriptorSetLayout = null, + descriptor_set: c.VkDescriptorSet = null, + pipeline_layout: c.VkPipelineLayout = null, + pipeline: c.VkPipeline = null, + + max_chunks: usize, + + pub fn init( + allocator: std.mem.Allocator, + rhi: rhi_pkg.RHI, + max_chunks: usize, + ) !*CullingSystem { + const self = try allocator.create(CullingSystem); + errdefer allocator.destroy(self); + + const vk_ctx: *VulkanContext = @ptrCast(@alignCast(rhi.ptr)); + const clamped_max = std.math.clamp(max_chunks, 1, MAX_CULLABLE_CHUNKS); + + self.* = .{ + .allocator = allocator, + .rhi = rhi, + .vk_ctx = vk_ctx, + .max_chunks = clamped_max, + }; + + const aabb_size = clamped_max * @sizeOf(ChunkCullData); + self.aabb_buffer = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + aabb_size, + c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + c.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | c.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + ); + errdefer self.destroyBuffers(); + + const cmd_size = @sizeOf(u32) * 4 + clamped_max * @sizeOf(c.VkDrawIndirectCommand); + self.command_buffer = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + cmd_size, + c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | c.VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, + c.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + ); + errdefer self.destroyBuffers(); + + self.counter_buffer = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + 16, + c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | c.VK_BUFFER_USAGE_TRANSFER_DST_BIT, + c.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + ); + errdefer self.destroyBuffers(); + + self.counter_readback_buffer = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + 16, + c.VK_BUFFER_USAGE_TRANSFER_DST_BIT, + c.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | c.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + ); + errdefer self.destroyBuffers(); + + try ensureShaderFileExists(CULLING_SHADER_PATH); + + errdefer self.deinitComputeResources(); + try self.initComputeResources(); + + return self; + } + + pub fn deinit(self: *CullingSystem) void { + self.deinitComputeResources(); + self.destroyBuffers(); + self.allocator.destroy(self); + } + + pub fn updateAABBData(self: *CullingSystem, chunks: []const ChunkCullData) void { + if (self.aabb_buffer.mapped_ptr == null) return; + const copy_len = @min(chunks.len, self.max_chunks) * @sizeOf(ChunkCullData); + if (copy_len == 0) return; + @memcpy( + @as([*]u8, @ptrCast(self.aabb_buffer.mapped_ptr.?))[0..copy_len], + @as([*]const u8, @ptrCast(chunks.ptr))[0..copy_len], + ); + } + + pub fn dispatch( + self: *CullingSystem, + view_proj: Mat4, + chunk_count: u32, + ) void { + if (chunk_count == 0) return; + const cmd = self.vk_ctx.frames.command_buffers[self.vk_ctx.frames.current_frame]; + if (cmd == null) return; + + self.resetCounter(cmd); + + var host_barrier = std.mem.zeroes(c.VkMemoryBarrier); + host_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; + host_barrier.srcAccessMask = c.VK_ACCESS_HOST_WRITE_BIT; + host_barrier.dstAccessMask = c.VK_ACCESS_SHADER_READ_BIT; + c.vkCmdPipelineBarrier( + cmd, + c.VK_PIPELINE_STAGE_HOST_BIT, + c.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 1, + &host_barrier, + 0, + null, + 0, + null, + ); + + const push = extractFrustumPlanes(view_proj); + + c.vkCmdBindPipeline(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline); + c.vkCmdBindDescriptorSets(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline_layout, 0, 1, &self.descriptor_set, 0, null); + c.vkCmdPushConstants(cmd, self.pipeline_layout, c.VK_SHADER_STAGE_COMPUTE_BIT, 0, @sizeOf(FrustumPushConstants), &push); + + const groups = divCeil(chunk_count, WORKGROUP_SIZE); + c.vkCmdDispatch(cmd, groups, 1, 1); + + var compute_barrier = std.mem.zeroes(c.VkMemoryBarrier); + compute_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; + compute_barrier.srcAccessMask = c.VK_ACCESS_SHADER_WRITE_BIT; + compute_barrier.dstAccessMask = c.VK_ACCESS_INDIRECT_COMMAND_READ_BIT | c.VK_ACCESS_TRANSFER_READ_BIT; + c.vkCmdPipelineBarrier( + cmd, + c.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + c.VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | c.VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 1, + &compute_barrier, + 0, + null, + 0, + null, + ); + } + + pub fn readVisibleCount(self: *CullingSystem) u32 { + if (self.counter_readback_buffer.mapped_ptr == null) return 0; + const ptr: *align(1) u32 = @ptrCast(@alignCast(self.counter_readback_buffer.mapped_ptr.?)); + return ptr.*; + } + + fn resetCounter(self: *CullingSystem, cmd: c.VkCommandBuffer) void { + const zero: u32 = 0; + c.vkCmdFillBuffer(cmd, self.counter_buffer.buffer, 0, 16, zero); + + var fill_barrier = std.mem.zeroes(c.VkMemoryBarrier); + fill_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; + fill_barrier.srcAccessMask = c.VK_ACCESS_TRANSFER_WRITE_BIT; + fill_barrier.dstAccessMask = c.VK_ACCESS_SHADER_READ_BIT | c.VK_ACCESS_SHADER_WRITE_BIT; + c.vkCmdPipelineBarrier( + cmd, + c.VK_PIPELINE_STAGE_TRANSFER_BIT, + c.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + 1, + &fill_barrier, + 0, + null, + 0, + null, + ); + } + + fn initComputeResources(self: *CullingSystem) !void { + const vk = self.vk_ctx.vulkan_device.vk_device; + + var pool_sizes = [_]c.VkDescriptorPoolSize{ + .{ .type = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 3 }, + }; + + var pool_info = std.mem.zeroes(c.VkDescriptorPoolCreateInfo); + pool_info.sType = c.VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_info.maxSets = 1; + pool_info.poolSizeCount = pool_sizes.len; + pool_info.pPoolSizes = &pool_sizes; + try Utils.checkVk(c.vkCreateDescriptorPool(vk, &pool_info, null, &self.descriptor_pool)); + + const bindings = [_]c.VkDescriptorSetLayoutBinding{ + .{ .binding = 0, .descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .stageFlags = c.VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = null }, + .{ .binding = 1, .descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .stageFlags = c.VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = null }, + .{ .binding = 2, .descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .stageFlags = c.VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = null }, + }; + + var layout_info = std.mem.zeroes(c.VkDescriptorSetLayoutCreateInfo); + layout_info.sType = c.VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layout_info.bindingCount = bindings.len; + layout_info.pBindings = &bindings; + try Utils.checkVk(c.vkCreateDescriptorSetLayout(vk, &layout_info, null, &self.descriptor_set_layout)); + + var alloc_info = std.mem.zeroes(c.VkDescriptorSetAllocateInfo); + alloc_info.sType = c.VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + alloc_info.descriptorPool = self.descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &self.descriptor_set_layout; + try Utils.checkVk(c.vkAllocateDescriptorSets(vk, &alloc_info, &self.descriptor_set)); + + self.updateDescriptorSets(); + + var pc_range = std.mem.zeroes(c.VkPushConstantRange); + pc_range.stageFlags = c.VK_SHADER_STAGE_COMPUTE_BIT; + pc_range.offset = 0; + pc_range.size = @sizeOf(FrustumPushConstants); + + var pipeline_layout_info = std.mem.zeroes(c.VkPipelineLayoutCreateInfo); + pipeline_layout_info.sType = c.VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_info.setLayoutCount = 1; + pipeline_layout_info.pSetLayouts = &self.descriptor_set_layout; + pipeline_layout_info.pushConstantRangeCount = 1; + pipeline_layout_info.pPushConstantRanges = &pc_range; + try Utils.checkVk(c.vkCreatePipelineLayout(vk, &pipeline_layout_info, null, &self.pipeline_layout)); + + const shader_module = try loadShaderModule(vk, CULLING_SHADER_PATH, self.allocator); + defer c.vkDestroyShaderModule(vk, shader_module, null); + + var stage = std.mem.zeroes(c.VkPipelineShaderStageCreateInfo); + stage.sType = c.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage.stage = c.VK_SHADER_STAGE_COMPUTE_BIT; + stage.module = shader_module; + stage.pName = "main"; + + var pipeline_info = std.mem.zeroes(c.VkComputePipelineCreateInfo); + pipeline_info.sType = c.VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_info.stage = stage; + pipeline_info.layout = self.pipeline_layout; + try Utils.checkVk(c.vkCreateComputePipelines(vk, null, 1, &pipeline_info, null, &self.pipeline)); + } + + fn updateDescriptorSets(self: *CullingSystem) void { + const vk = self.vk_ctx.vulkan_device.vk_device; + + var aabb_info = c.VkDescriptorBufferInfo{ + .buffer = self.aabb_buffer.buffer, + .offset = 0, + .range = self.max_chunks * @sizeOf(ChunkCullData), + }; + var cmd_info = c.VkDescriptorBufferInfo{ + .buffer = self.command_buffer.buffer, + .offset = 0, + .range = c.VK_WHOLE_SIZE, + }; + var counter_info = c.VkDescriptorBufferInfo{ + .buffer = self.counter_buffer.buffer, + .offset = 0, + .range = 16, + }; + + var writes: [3]c.VkWriteDescriptorSet = undefined; + + writes[0] = std.mem.zeroes(c.VkWriteDescriptorSet); + writes[0].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[0].dstSet = self.descriptor_set; + writes[0].dstBinding = 0; + writes[0].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[0].descriptorCount = 1; + writes[0].pBufferInfo = &aabb_info; + + writes[1] = std.mem.zeroes(c.VkWriteDescriptorSet); + writes[1].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[1].dstSet = self.descriptor_set; + writes[1].dstBinding = 1; + writes[1].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[1].descriptorCount = 1; + writes[1].pBufferInfo = &cmd_info; + + writes[2] = std.mem.zeroes(c.VkWriteDescriptorSet); + writes[2].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[2].dstSet = self.descriptor_set; + writes[2].dstBinding = 2; + writes[2].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[2].descriptorCount = 1; + writes[2].pBufferInfo = &counter_info; + + c.vkUpdateDescriptorSets(vk, writes.len, &writes[0], 0, null); + } + + fn deinitComputeResources(self: *CullingSystem) void { + const vk = self.vk_ctx.vulkan_device.vk_device; + if (self.pipeline != null) c.vkDestroyPipeline(vk, self.pipeline, null); + if (self.pipeline_layout != null) c.vkDestroyPipelineLayout(vk, self.pipeline_layout, null); + if (self.descriptor_set_layout != null) c.vkDestroyDescriptorSetLayout(vk, self.descriptor_set_layout, null); + if (self.descriptor_pool != null) c.vkDestroyDescriptorPool(vk, self.descriptor_pool, null); + + self.pipeline = null; + self.pipeline_layout = null; + self.descriptor_set_layout = null; + self.descriptor_pool = null; + self.descriptor_set = null; + } + + fn destroyBuffers(self: *CullingSystem) void { + const vk = self.vk_ctx.vulkan_device.vk_device; + + if (self.aabb_buffer.buffer != null) c.vkDestroyBuffer(vk, self.aabb_buffer.buffer, null); + if (self.aabb_buffer.memory != null) c.vkFreeMemory(vk, self.aabb_buffer.memory, null); + if (self.command_buffer.buffer != null) c.vkDestroyBuffer(vk, self.command_buffer.buffer, null); + if (self.command_buffer.memory != null) c.vkFreeMemory(vk, self.command_buffer.memory, null); + if (self.counter_buffer.buffer != null) c.vkDestroyBuffer(vk, self.counter_buffer.buffer, null); + if (self.counter_buffer.memory != null) c.vkFreeMemory(vk, self.counter_buffer.memory, null); + if (self.counter_readback_buffer.buffer != null) c.vkDestroyBuffer(vk, self.counter_readback_buffer.buffer, null); + if (self.counter_readback_buffer.memory != null) c.vkFreeMemory(vk, self.counter_readback_buffer.memory, null); + + self.aabb_buffer = .{}; + self.command_buffer = .{}; + self.counter_buffer = .{}; + self.counter_readback_buffer = .{}; + } +}; + +fn extractFrustumPlanes(vp: Mat4) FrustumPushConstants { + const m = vp.data; + var planes: [6][4]f32 = undefined; + + planes[0] = .{ m[3][0] + m[0][0], m[3][1] + m[0][1], m[3][2] + m[0][2], m[3][3] + m[0][3] }; + planes[1] = .{ m[3][0] - m[0][0], m[3][1] - m[0][1], m[3][2] - m[0][2], m[3][3] - m[0][3] }; + planes[2] = .{ m[3][0] - m[1][0], m[3][1] - m[1][1], m[3][2] - m[1][2], m[3][3] - m[1][3] }; + planes[3] = .{ m[3][0] + m[1][0], m[3][1] + m[1][1], m[3][2] + m[1][2], m[3][3] + m[1][3] }; + planes[4] = .{ m[3][0] + m[2][0], m[3][1] + m[2][1], m[3][2] + m[2][2], m[3][3] + m[2][3] }; + planes[5] = .{ m[3][0] - m[2][0], m[3][1] - m[2][1], m[3][2] - m[2][2], m[3][3] - m[2][3] }; + + for (&planes) |*plane| { + const len = @sqrt(plane[0] * plane[0] + plane[1] * plane[1] + plane[2] * plane[2]); + if (len > 0.0001) { + plane[0] /= len; + plane[1] /= len; + plane[2] /= len; + plane[3] /= len; + } + } + + return .{ .planes = planes }; +} + +fn loadShaderModule(vk: c.VkDevice, path: []const u8, allocator: std.mem.Allocator) !c.VkShaderModule { + const bytes = try std.fs.cwd().readFileAlloc(path, allocator, @enumFromInt(16 * 1024 * 1024)); + defer allocator.free(bytes); + if (bytes.len % 4 != 0) return error.InvalidShader; + + var info = std.mem.zeroes(c.VkShaderModuleCreateInfo); + info.sType = c.VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + info.codeSize = bytes.len; + info.pCode = @ptrCast(@alignCast(bytes.ptr)); + + var module: c.VkShaderModule = null; + try Utils.checkVk(c.vkCreateShaderModule(vk, &info, null, &module)); + return module; +} + +fn ensureShaderFileExists(path: []const u8) !void { + std.fs.cwd().access(path, .{}) catch |err| { + log.log.errWithTrace("Culling shader artifact missing: {s} ({})", .{ path, err }); + log.log.err("Run `nix develop --command zig build` to regenerate Vulkan SPIR-V shaders.", .{}); + return err; + }; +} + +fn divCeil(v: u32, d: u32) u32 { + return @divFloor(v + d - 1, d); +} diff --git a/src/engine/graphics/vulkan/shader_registry.zig b/src/engine/graphics/vulkan/shader_registry.zig index 2c020718..39f8e96d 100644 --- a/src/engine/graphics/vulkan/shader_registry.zig +++ b/src/engine/graphics/vulkan/shader_registry.zig @@ -37,3 +37,5 @@ pub const DEBUG_SHADOW_FRAG = "assets/shaders/vulkan/debug_shadow.frag.spv"; pub const CLOUD_VERT = "assets/shaders/vulkan/cloud.vert.spv"; pub const CLOUD_FRAG = "assets/shaders/vulkan/cloud.frag.spv"; + +pub const CULLING_COMP = "assets/shaders/vulkan/culling.comp.spv"; From 307963caf3888c05d3d75bbddee8fd1a3b117ac7 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Thu, 2 Apr 2026 00:20:58 +0100 Subject: [PATCH 2/5] fix: address review feedback on GPU frustum culling system - Double-buffer all GPU resources with MAX_FRAMES_IN_FLIGHT to prevent data races when frames overlap - Add compute-to-transfer barrier before resetCounter to avoid racing with previous frame's shader writes - Add vkUnmapMemory before vkFreeMemory in buffer teardown - Add copyCounterToReadback with vkCmdCopyBuffer + host-read barrier so readVisibleCount returns actual GPU-visible count - Per-frame descriptor sets for correct buffer binding isolation - Fix relative import paths (rhi_context_types, utils) --- src/engine/graphics/vulkan/culling_system.zig | 305 +++++++++++------- 1 file changed, 191 insertions(+), 114 deletions(-) diff --git a/src/engine/graphics/vulkan/culling_system.zig b/src/engine/graphics/vulkan/culling_system.zig index a1f387de..a946eb77 100644 --- a/src/engine/graphics/vulkan/culling_system.zig +++ b/src/engine/graphics/vulkan/culling_system.zig @@ -3,12 +3,13 @@ const c = @import("../../c.zig").c; const rhi_pkg = @import("../rhi.zig"); const log = @import("../core/log.zig"); const Mat4 = @import("../math/mat4.zig").Mat4; -const VulkanContext = @import("vulkan/rhi_context_types.zig").VulkanContext; -const Utils = @import("vulkan/utils.zig"); +const VulkanContext = @import("rhi_context_types.zig").VulkanContext; +const Utils = @import("utils.zig"); pub const CULLING_SHADER_PATH = "assets/shaders/vulkan/culling.comp.spv"; pub const MAX_CULLABLE_CHUNKS: usize = 16384; pub const WORKGROUP_SIZE: u32 = 64; +const MAX_FRAMES_IN_FLIGHT = rhi_pkg.MAX_FRAMES_IN_FLIGHT; pub const ChunkCullData = extern struct { min_point: [4]f32, @@ -24,14 +25,14 @@ pub const CullingSystem = struct { rhi: rhi_pkg.RHI, vk_ctx: *VulkanContext, - aabb_buffer: Utils.VulkanBuffer = .{}, - command_buffer: Utils.VulkanBuffer = .{}, - counter_buffer: Utils.VulkanBuffer = .{}, - counter_readback_buffer: Utils.VulkanBuffer = .{}, + aabb_buffers: [MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer, + command_buffers: [MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer, + counter_buffers: [MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer, + counter_readback_buffers: [MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer, descriptor_pool: c.VkDescriptorPool = null, descriptor_set_layout: c.VkDescriptorSetLayout = null, - descriptor_set: c.VkDescriptorSet = null, + descriptor_sets: [MAX_FRAMES_IN_FLIGHT]c.VkDescriptorSet, pipeline_layout: c.VkPipelineLayout = null, pipeline: c.VkPipeline = null, @@ -53,41 +54,46 @@ pub const CullingSystem = struct { .rhi = rhi, .vk_ctx = vk_ctx, .max_chunks = clamped_max, + .aabb_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer), + .command_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer), + .counter_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer), + .counter_readback_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer), + .descriptor_sets = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]c.VkDescriptorSet), }; + errdefer self.destroyAllBuffers(); const aabb_size = clamped_max * @sizeOf(ChunkCullData); - self.aabb_buffer = try Utils.createVulkanBuffer( - &vk_ctx.vulkan_device, - aabb_size, - c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - c.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | c.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - ); - errdefer self.destroyBuffers(); - const cmd_size = @sizeOf(u32) * 4 + clamped_max * @sizeOf(c.VkDrawIndirectCommand); - self.command_buffer = try Utils.createVulkanBuffer( - &vk_ctx.vulkan_device, - cmd_size, - c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | c.VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, - c.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - ); - errdefer self.destroyBuffers(); - self.counter_buffer = try Utils.createVulkanBuffer( - &vk_ctx.vulkan_device, - 16, - c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | c.VK_BUFFER_USAGE_TRANSFER_DST_BIT, - c.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - ); - errdefer self.destroyBuffers(); - - self.counter_readback_buffer = try Utils.createVulkanBuffer( - &vk_ctx.vulkan_device, - 16, - c.VK_BUFFER_USAGE_TRANSFER_DST_BIT, - c.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | c.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - ); - errdefer self.destroyBuffers(); + for (0..MAX_FRAMES_IN_FLIGHT) |i| { + self.aabb_buffers[i] = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + aabb_size, + c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + c.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | c.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + ); + + self.command_buffers[i] = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + cmd_size, + c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | c.VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, + c.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + ); + + self.counter_buffers[i] = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + 16, + c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | c.VK_BUFFER_USAGE_TRANSFER_DST_BIT | c.VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + c.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + ); + + self.counter_readback_buffers[i] = try Utils.createVulkanBuffer( + &vk_ctx.vulkan_device, + 16, + c.VK_BUFFER_USAGE_TRANSFER_DST_BIT, + c.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | c.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + ); + } try ensureShaderFileExists(CULLING_SHADER_PATH); @@ -99,16 +105,17 @@ pub const CullingSystem = struct { pub fn deinit(self: *CullingSystem) void { self.deinitComputeResources(); - self.destroyBuffers(); + self.destroyAllBuffers(); self.allocator.destroy(self); } - pub fn updateAABBData(self: *CullingSystem, chunks: []const ChunkCullData) void { - if (self.aabb_buffer.mapped_ptr == null) return; + pub fn updateAABBData(self: *CullingSystem, frame_index: usize, chunks: []const ChunkCullData) void { + const buf = &self.aabb_buffers[frame_index]; + if (buf.mapped_ptr == null) return; const copy_len = @min(chunks.len, self.max_chunks) * @sizeOf(ChunkCullData); if (copy_len == 0) return; @memcpy( - @as([*]u8, @ptrCast(self.aabb_buffer.mapped_ptr.?))[0..copy_len], + @as([*]u8, @ptrCast(buf.mapped_ptr.?))[0..copy_len], @as([*]const u8, @ptrCast(chunks.ptr))[0..copy_len], ); } @@ -119,10 +126,28 @@ pub const CullingSystem = struct { chunk_count: u32, ) void { if (chunk_count == 0) return; - const cmd = self.vk_ctx.frames.command_buffers[self.vk_ctx.frames.current_frame]; + const fi = self.vk_ctx.frames.current_frame; + const cmd = self.vk_ctx.frames.command_buffers[fi]; if (cmd == null) return; - self.resetCounter(cmd); + var prev_barrier = std.mem.zeroes(c.VkMemoryBarrier); + prev_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; + prev_barrier.srcAccessMask = c.VK_ACCESS_SHADER_WRITE_BIT; + prev_barrier.dstAccessMask = c.VK_ACCESS_TRANSFER_WRITE_BIT; + c.vkCmdPipelineBarrier( + cmd, + c.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + c.VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 1, + &prev_barrier, + 0, + null, + 0, + null, + ); + + self.resetCounter(cmd, fi); var host_barrier = std.mem.zeroes(c.VkMemoryBarrier); host_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; @@ -144,7 +169,7 @@ pub const CullingSystem = struct { const push = extractFrustumPlanes(view_proj); c.vkCmdBindPipeline(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline); - c.vkCmdBindDescriptorSets(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline_layout, 0, 1, &self.descriptor_set, 0, null); + c.vkCmdBindDescriptorSets(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline_layout, 0, 1, &self.descriptor_sets[fi], 0, null); c.vkCmdPushConstants(cmd, self.pipeline_layout, c.VK_SHADER_STAGE_COMPUTE_BIT, 0, @sizeOf(FrustumPushConstants), &push); const groups = divCeil(chunk_count, WORKGROUP_SIZE); @@ -166,17 +191,48 @@ pub const CullingSystem = struct { 0, null, ); + + self.copyCounterToReadback(cmd, fi); } - pub fn readVisibleCount(self: *CullingSystem) u32 { - if (self.counter_readback_buffer.mapped_ptr == null) return 0; - const ptr: *align(1) u32 = @ptrCast(@alignCast(self.counter_readback_buffer.mapped_ptr.?)); + pub fn readVisibleCount(self: *CullingSystem, frame_index: usize) u32 { + const buf = &self.counter_readback_buffers[frame_index]; + if (buf.mapped_ptr == null) return 0; + const ptr: *align(1) u32 = @ptrCast(@alignCast(buf.mapped_ptr.?)); return ptr.*; } - fn resetCounter(self: *CullingSystem, cmd: c.VkCommandBuffer) void { + fn copyCounterToReadback(self: *CullingSystem, cmd: c.VkCommandBuffer, frame_index: usize) void { + const src = self.counter_buffers[frame_index]; + const dst = self.counter_readback_buffers[frame_index]; + + var copy_region = std.mem.zeroes(c.VkBufferCopy); + copy_region.srcOffset = 0; + copy_region.dstOffset = 0; + copy_region.size = 16; + c.vkCmdCopyBuffer(cmd, src.buffer, dst.buffer, 1, ©_region); + + var copy_barrier = std.mem.zeroes(c.VkMemoryBarrier); + copy_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; + copy_barrier.srcAccessMask = c.VK_ACCESS_TRANSFER_WRITE_BIT; + copy_barrier.dstAccessMask = c.VK_ACCESS_HOST_READ_BIT; + c.vkCmdPipelineBarrier( + cmd, + c.VK_PIPELINE_STAGE_TRANSFER_BIT, + c.VK_PIPELINE_STAGE_HOST_BIT, + 0, + 1, + ©_barrier, + 0, + null, + 0, + null, + ); + } + + fn resetCounter(self: *CullingSystem, cmd: c.VkCommandBuffer, frame_index: usize) void { const zero: u32 = 0; - c.vkCmdFillBuffer(cmd, self.counter_buffer.buffer, 0, 16, zero); + c.vkCmdFillBuffer(cmd, self.counter_buffers[frame_index].buffer, 0, 16, zero); var fill_barrier = std.mem.zeroes(c.VkMemoryBarrier); fill_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; @@ -200,12 +256,12 @@ pub const CullingSystem = struct { const vk = self.vk_ctx.vulkan_device.vk_device; var pool_sizes = [_]c.VkDescriptorPoolSize{ - .{ .type = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 3 }, + .{ .type = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 3 * MAX_FRAMES_IN_FLIGHT }, }; var pool_info = std.mem.zeroes(c.VkDescriptorPoolCreateInfo); pool_info.sType = c.VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - pool_info.maxSets = 1; + pool_info.maxSets = MAX_FRAMES_IN_FLIGHT; pool_info.poolSizeCount = pool_sizes.len; pool_info.pPoolSizes = &pool_sizes; try Utils.checkVk(c.vkCreateDescriptorPool(vk, &pool_info, null, &self.descriptor_pool)); @@ -222,14 +278,17 @@ pub const CullingSystem = struct { layout_info.pBindings = &bindings; try Utils.checkVk(c.vkCreateDescriptorSetLayout(vk, &layout_info, null, &self.descriptor_set_layout)); + var layouts = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]c.VkDescriptorSetLayout); + for (&layouts) |*l| l.* = self.descriptor_set_layout; + var alloc_info = std.mem.zeroes(c.VkDescriptorSetAllocateInfo); alloc_info.sType = c.VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; alloc_info.descriptorPool = self.descriptor_pool; - alloc_info.descriptorSetCount = 1; - alloc_info.pSetLayouts = &self.descriptor_set_layout; - try Utils.checkVk(c.vkAllocateDescriptorSets(vk, &alloc_info, &self.descriptor_set)); + alloc_info.descriptorSetCount = MAX_FRAMES_IN_FLIGHT; + alloc_info.pSetLayouts = &layouts; + try Utils.checkVk(c.vkAllocateDescriptorSets(vk, &alloc_info, &self.descriptor_sets)); - self.updateDescriptorSets(); + self.updateAllDescriptorSets(); var pc_range = std.mem.zeroes(c.VkPushConstantRange); pc_range.stageFlags = c.VK_SHADER_STAGE_COMPUTE_BIT; @@ -260,52 +319,62 @@ pub const CullingSystem = struct { try Utils.checkVk(c.vkCreateComputePipelines(vk, null, 1, &pipeline_info, null, &self.pipeline)); } - fn updateDescriptorSets(self: *CullingSystem) void { + fn updateAllDescriptorSets(self: *CullingSystem) void { const vk = self.vk_ctx.vulkan_device.vk_device; + const aabb_range = self.max_chunks * @sizeOf(ChunkCullData); + + var writes: [3 * MAX_FRAMES_IN_FLIGHT]c.VkWriteDescriptorSet = undefined; + var aabb_infos: [MAX_FRAMES_IN_FLIGHT]c.VkDescriptorBufferInfo = undefined; + var cmd_infos: [MAX_FRAMES_IN_FLIGHT]c.VkDescriptorBufferInfo = undefined; + var counter_infos: [MAX_FRAMES_IN_FLIGHT]c.VkDescriptorBufferInfo = undefined; + var n: usize = 0; + + for (0..MAX_FRAMES_IN_FLIGHT) |i| { + aabb_infos[i] = c.VkDescriptorBufferInfo{ + .buffer = self.aabb_buffers[i].buffer, + .offset = 0, + .range = aabb_range, + }; + cmd_infos[i] = c.VkDescriptorBufferInfo{ + .buffer = self.command_buffers[i].buffer, + .offset = 0, + .range = c.VK_WHOLE_SIZE, + }; + counter_infos[i] = c.VkDescriptorBufferInfo{ + .buffer = self.counter_buffers[i].buffer, + .offset = 0, + .range = 16, + }; + + writes[n] = std.mem.zeroes(c.VkWriteDescriptorSet); + writes[n].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[n].dstSet = self.descriptor_sets[i]; + writes[n].dstBinding = 0; + writes[n].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[n].descriptorCount = 1; + writes[n].pBufferInfo = &aabb_infos[i]; + n += 1; + + writes[n] = std.mem.zeroes(c.VkWriteDescriptorSet); + writes[n].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[n].dstSet = self.descriptor_sets[i]; + writes[n].dstBinding = 1; + writes[n].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[n].descriptorCount = 1; + writes[n].pBufferInfo = &cmd_infos[i]; + n += 1; + + writes[n] = std.mem.zeroes(c.VkWriteDescriptorSet); + writes[n].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[n].dstSet = self.descriptor_sets[i]; + writes[n].dstBinding = 2; + writes[n].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[n].descriptorCount = 1; + writes[n].pBufferInfo = &counter_infos[i]; + n += 1; + } - var aabb_info = c.VkDescriptorBufferInfo{ - .buffer = self.aabb_buffer.buffer, - .offset = 0, - .range = self.max_chunks * @sizeOf(ChunkCullData), - }; - var cmd_info = c.VkDescriptorBufferInfo{ - .buffer = self.command_buffer.buffer, - .offset = 0, - .range = c.VK_WHOLE_SIZE, - }; - var counter_info = c.VkDescriptorBufferInfo{ - .buffer = self.counter_buffer.buffer, - .offset = 0, - .range = 16, - }; - - var writes: [3]c.VkWriteDescriptorSet = undefined; - - writes[0] = std.mem.zeroes(c.VkWriteDescriptorSet); - writes[0].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[0].dstSet = self.descriptor_set; - writes[0].dstBinding = 0; - writes[0].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[0].descriptorCount = 1; - writes[0].pBufferInfo = &aabb_info; - - writes[1] = std.mem.zeroes(c.VkWriteDescriptorSet); - writes[1].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[1].dstSet = self.descriptor_set; - writes[1].dstBinding = 1; - writes[1].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[1].descriptorCount = 1; - writes[1].pBufferInfo = &cmd_info; - - writes[2] = std.mem.zeroes(c.VkWriteDescriptorSet); - writes[2].sType = c.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[2].dstSet = self.descriptor_set; - writes[2].dstBinding = 2; - writes[2].descriptorType = c.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[2].descriptorCount = 1; - writes[2].pBufferInfo = &counter_info; - - c.vkUpdateDescriptorSets(vk, writes.len, &writes[0], 0, null); + c.vkUpdateDescriptorSets(vk, @intCast(n), &writes[0], 0, null); } fn deinitComputeResources(self: *CullingSystem) void { @@ -319,28 +388,36 @@ pub const CullingSystem = struct { self.pipeline_layout = null; self.descriptor_set_layout = null; self.descriptor_pool = null; - self.descriptor_set = null; + self.descriptor_sets = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]c.VkDescriptorSet); } - fn destroyBuffers(self: *CullingSystem) void { + fn destroyAllBuffers(self: *CullingSystem) void { const vk = self.vk_ctx.vulkan_device.vk_device; - if (self.aabb_buffer.buffer != null) c.vkDestroyBuffer(vk, self.aabb_buffer.buffer, null); - if (self.aabb_buffer.memory != null) c.vkFreeMemory(vk, self.aabb_buffer.memory, null); - if (self.command_buffer.buffer != null) c.vkDestroyBuffer(vk, self.command_buffer.buffer, null); - if (self.command_buffer.memory != null) c.vkFreeMemory(vk, self.command_buffer.memory, null); - if (self.counter_buffer.buffer != null) c.vkDestroyBuffer(vk, self.counter_buffer.buffer, null); - if (self.counter_buffer.memory != null) c.vkFreeMemory(vk, self.counter_buffer.memory, null); - if (self.counter_readback_buffer.buffer != null) c.vkDestroyBuffer(vk, self.counter_readback_buffer.buffer, null); - if (self.counter_readback_buffer.memory != null) c.vkFreeMemory(vk, self.counter_readback_buffer.memory, null); - - self.aabb_buffer = .{}; - self.command_buffer = .{}; - self.counter_buffer = .{}; - self.counter_readback_buffer = .{}; + for (0..MAX_FRAMES_IN_FLIGHT) |i| { + unmapAndDestroy(vk, &self.aabb_buffers[i]); + unmapAndDestroy(vk, &self.command_buffers[i]); + unmapAndDestroy(vk, &self.counter_buffers[i]); + unmapAndDestroy(vk, &self.counter_readback_buffers[i]); + } + + self.aabb_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer); + self.command_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer); + self.counter_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer); + self.counter_readback_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer); } }; +fn unmapAndDestroy(vk: c.VkDevice, buf: *Utils.VulkanBuffer) void { + if (buf.mapped_ptr != null) { + c.vkUnmapMemory(vk, buf.memory); + buf.mapped_ptr = null; + } + if (buf.buffer != null) c.vkDestroyBuffer(vk, buf.buffer, null); + if (buf.memory != null) c.vkFreeMemory(vk, buf.memory, null); + buf.* = .{}; +} + fn extractFrustumPlanes(vp: Mat4) FrustumPushConstants { const m = vp.data; var planes: [6][4]f32 = undefined; From acecc9fba1e4b09230e7a17a89556ab8c13d6a66 Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Thu, 2 Apr 2026 00:45:03 +0100 Subject: [PATCH 3/5] fix: bounds-check visible slot in culling shader, clamp chunk_count - Add bounds check before writing to cmd_buffer.commands[slot] to prevent GPU out-of-bounds writes when visible count exceeds buffer - Clamp chunk_count to max_chunks before divCeil to prevent u32 overflow in workgroup calculation --- assets/shaders/vulkan/culling.comp | 10 ++++++---- src/engine/graphics/vulkan/culling_system.zig | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/assets/shaders/vulkan/culling.comp b/assets/shaders/vulkan/culling.comp index e03ae786..0bf868b0 100644 --- a/assets/shaders/vulkan/culling.comp +++ b/assets/shaders/vulkan/culling.comp @@ -68,9 +68,11 @@ void main() { if (aabbVisible(aabb_min, aabb_max)) { uint slot = atomicAdd(visible_counter.count, 1); - cmd_buffer.commands[slot].vertexCount = 0; - cmd_buffer.commands[slot].instanceCount = 1; - cmd_buffer.commands[slot].firstVertex = 0; - cmd_buffer.commands[slot].firstInstance = idx; + if (slot < cmd_buffer.commands.length()) { + cmd_buffer.commands[slot].vertexCount = 0; + cmd_buffer.commands[slot].instanceCount = 1; + cmd_buffer.commands[slot].firstVertex = 0; + cmd_buffer.commands[slot].firstInstance = idx; + } } } diff --git a/src/engine/graphics/vulkan/culling_system.zig b/src/engine/graphics/vulkan/culling_system.zig index a946eb77..b8911f2c 100644 --- a/src/engine/graphics/vulkan/culling_system.zig +++ b/src/engine/graphics/vulkan/culling_system.zig @@ -172,7 +172,8 @@ pub const CullingSystem = struct { c.vkCmdBindDescriptorSets(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline_layout, 0, 1, &self.descriptor_sets[fi], 0, null); c.vkCmdPushConstants(cmd, self.pipeline_layout, c.VK_SHADER_STAGE_COMPUTE_BIT, 0, @sizeOf(FrustumPushConstants), &push); - const groups = divCeil(chunk_count, WORKGROUP_SIZE); + const clamped_count = @min(chunk_count, @as(u32, @intCast(self.max_chunks))); + const groups = divCeil(clamped_count, WORKGROUP_SIZE); c.vkCmdDispatch(cmd, groups, 1, 1); var compute_barrier = std.mem.zeroes(c.VkMemoryBarrier); From 72f58c1e0ab4e5d5fbbe5df5a7e02f784cb94c9b Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Thu, 2 Apr 2026 00:53:47 +0100 Subject: [PATCH 4/5] fix: add coherent qualifiers to culling shader SSBOs Ensures memory visibility between shader invocations for the atomic counter and command output buffer under high contention. --- assets/shaders/vulkan/culling.comp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/shaders/vulkan/culling.comp b/assets/shaders/vulkan/culling.comp index 0bf868b0..8865da64 100644 --- a/assets/shaders/vulkan/culling.comp +++ b/assets/shaders/vulkan/culling.comp @@ -18,7 +18,7 @@ layout(std430, binding = 0) readonly buffer ChunkAABBs { ChunkAABB chunks[]; } aabb_buffer; -layout(std430, binding = 1) writeonly buffer DrawCommands { +layout(std430, binding = 1) coherent writeonly buffer DrawCommands { uint visible_count; uint _pad0; uint _pad1; @@ -26,7 +26,7 @@ layout(std430, binding = 1) writeonly buffer DrawCommands { DrawIndirectCommand commands[]; } cmd_buffer; -layout(std430, binding = 2) buffer VisibleCountBuffer { +layout(std430, binding = 2) coherent buffer VisibleCountBuffer { uint count; uint _pad_v0; uint _pad_v1; From 864624dd810e03664335cc908c35c9c64ce76e0a Mon Sep 17 00:00:00 2001 From: MichaelFisher1997 Date: Thu, 2 Apr 2026 01:11:37 +0100 Subject: [PATCH 5/5] fix: replace hardcoded counter sizes, cache frustum planes - Use @sizeOf(u32) instead of magic 16 for counter buffer sizes, copy regions, fill commands, and descriptor ranges - Cache extracted frustum planes across dispatch calls, only re-extract when view_proj matrix changes --- src/engine/graphics/vulkan/culling_system.zig | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/engine/graphics/vulkan/culling_system.zig b/src/engine/graphics/vulkan/culling_system.zig index b8911f2c..0b2a7393 100644 --- a/src/engine/graphics/vulkan/culling_system.zig +++ b/src/engine/graphics/vulkan/culling_system.zig @@ -36,6 +36,10 @@ pub const CullingSystem = struct { pipeline_layout: c.VkPipelineLayout = null, pipeline: c.VkPipeline = null, + cached_view_proj: Mat4 = Mat4.zero, + cached_planes: FrustumPushConstants = undefined, + planes_cached: bool = false, + max_chunks: usize, pub fn init( @@ -59,6 +63,7 @@ pub const CullingSystem = struct { .counter_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer), .counter_readback_buffers = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]Utils.VulkanBuffer), .descriptor_sets = std.mem.zeroes([MAX_FRAMES_IN_FLIGHT]c.VkDescriptorSet), + .planes_cached = false, }; errdefer self.destroyAllBuffers(); @@ -82,14 +87,14 @@ pub const CullingSystem = struct { self.counter_buffers[i] = try Utils.createVulkanBuffer( &vk_ctx.vulkan_device, - 16, + @sizeOf(u32), c.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | c.VK_BUFFER_USAGE_TRANSFER_DST_BIT | c.VK_BUFFER_USAGE_TRANSFER_SRC_BIT, c.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, ); self.counter_readback_buffers[i] = try Utils.createVulkanBuffer( &vk_ctx.vulkan_device, - 16, + @sizeOf(u32), c.VK_BUFFER_USAGE_TRANSFER_DST_BIT, c.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | c.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, ); @@ -166,7 +171,12 @@ pub const CullingSystem = struct { null, ); - const push = extractFrustumPlanes(view_proj); + if (!self.planes_cached or !mat4Equal(self.cached_view_proj, view_proj)) { + self.cached_planes = extractFrustumPlanes(view_proj); + self.cached_view_proj = view_proj; + self.planes_cached = true; + } + const push = self.cached_planes; c.vkCmdBindPipeline(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline); c.vkCmdBindDescriptorSets(cmd, c.VK_PIPELINE_BIND_POINT_COMPUTE, self.pipeline_layout, 0, 1, &self.descriptor_sets[fi], 0, null); @@ -210,7 +220,7 @@ pub const CullingSystem = struct { var copy_region = std.mem.zeroes(c.VkBufferCopy); copy_region.srcOffset = 0; copy_region.dstOffset = 0; - copy_region.size = 16; + copy_region.size = @sizeOf(u32); c.vkCmdCopyBuffer(cmd, src.buffer, dst.buffer, 1, ©_region); var copy_barrier = std.mem.zeroes(c.VkMemoryBarrier); @@ -233,7 +243,7 @@ pub const CullingSystem = struct { fn resetCounter(self: *CullingSystem, cmd: c.VkCommandBuffer, frame_index: usize) void { const zero: u32 = 0; - c.vkCmdFillBuffer(cmd, self.counter_buffers[frame_index].buffer, 0, 16, zero); + c.vkCmdFillBuffer(cmd, self.counter_buffers[frame_index].buffer, 0, @sizeOf(u32), zero); var fill_barrier = std.mem.zeroes(c.VkMemoryBarrier); fill_barrier.sType = c.VK_STRUCTURE_TYPE_MEMORY_BARRIER; @@ -344,7 +354,7 @@ pub const CullingSystem = struct { counter_infos[i] = c.VkDescriptorBufferInfo{ .buffer = self.counter_buffers[i].buffer, .offset = 0, - .range = 16, + .range = @sizeOf(u32), }; writes[n] = std.mem.zeroes(c.VkWriteDescriptorSet); @@ -409,6 +419,15 @@ pub const CullingSystem = struct { } }; +fn mat4Equal(a: Mat4, b: Mat4) bool { + for (0..4) |col| { + for (0..4) |row| { + if (a.data[col][row] != b.data[col][row]) return false; + } + } + return true; +} + fn unmapAndDestroy(vk: c.VkDevice, buf: *Utils.VulkanBuffer) void { if (buf.mapped_ptr != null) { c.vkUnmapMemory(vk, buf.memory);