From 846c058b13b0b3c2f3988e59ce91d4cd785c9c4b Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 2 Mar 2026 23:36:06 -0500 Subject: [PATCH 1/3] Respect MemoryAccessVolatileMask on OpLoad to prevent forwarding When an OpLoad carries the Volatile memory access flag, the loaded value must not be forwarded (inlined) at each use site. Without this, SPIRV-Cross re-evaluates the pointer dereference expression at every use, which produces wrong results when the pointed-to memory is modified between the original load and subsequent uses (e.g. an insertion sort shifting elements in the same array accessed via PhysicalStorageBuffer pointers). Made-with: Cursor --- spirv_glsl.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 8a3c21da6..da33cbec5 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -12429,6 +12429,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // If an expression is mutable and forwardable, we speculate that it is immutable. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + // Volatile memory access requires the value be read exactly once from + // memory. Do not forward the expression so that re-evaluation at each + // use site cannot re-read potentially modified memory. + if (forward && length >= 4 && (ops[3] & MemoryAccessVolatileMask) != 0) + forward = false; + // If loading a non-native row-major matrix, mark the expression as need_transpose. bool need_transpose = false; bool old_need_transpose = false; From 2dcbe8d7d19ee78e2ab999ad62f8cf001a560c84 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Tue, 3 Mar 2026 12:44:09 -0500 Subject: [PATCH 2/3] Add regression tests for volatile PhysicalStorageBuffer load Add MSL and Vulkan GLSL test cases that verify an OpLoad with Volatile|Aligned from a PhysicalStorageBuffer pointer is not forwarded. Without the previous commit, the loaded value would be re-dereferenced at each use site. Made-with: Cursor --- ...le-phys-buf-load-no-forward.asm.msl24.comp | 19 ++++++ ...uf-load-no-forward.nocompat.vk.asm.comp.vk | 30 ++++++++++ ...le-phys-buf-load-no-forward.asm.msl24.comp | 60 +++++++++++++++++++ ...s-buf-load-no-forward.nocompat.vk.asm.comp | 60 +++++++++++++++++++ 4 files changed, 169 insertions(+) create mode 100644 reference/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp create mode 100644 reference/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp.vk create mode 100644 shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp create mode 100644 shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp diff --git a/reference/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp b/reference/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp new file mode 100644 index 000000000..2a513c35a --- /dev/null +++ b/reference/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct Registers +{ + ulong addr; + ulong addr2; +}; + +kernel void main0(constant Registers& registers [[buffer(0)]]) +{ + device int* _21 = reinterpret_cast(registers.addr2); + int _22 = *(reinterpret_cast(registers.addr)); + *_21 = _22; + *(reinterpret_cast(reinterpret_cast(_21) + 4ul)) = _22; +} + diff --git a/reference/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp.vk new file mode 100644 index 000000000..ceb86a172 --- /dev/null +++ b/reference/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp.vk @@ -0,0 +1,30 @@ +#version 450 +#if defined(GL_ARB_gpu_shader_int64) +#extension GL_ARB_gpu_shader_int64 : require +#else +#error No extension available for 64-bit integers. +#endif +#extension GL_EXT_buffer_reference2 : require +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer intPointer; + +layout(buffer_reference, buffer_reference_align = 4) buffer intPointer +{ + int value; +}; + +layout(push_constant, std430) uniform Registers +{ + uint64_t addr; + uint64_t addr2; +} registers; + +void main() +{ + intPointer _21 = intPointer(registers.addr2); + int _22 = intPointer(registers.addr).value; + _21.value = _22; + intPointer(uint64_t(_21) + 4ul).value = _22; +} + diff --git a/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp b/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp new file mode 100644 index 000000000..7770ef68d --- /dev/null +++ b/shaders-msl-no-opt/asm/comp/volatile-phys-buf-load-no-forward.asm.msl24.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %Registers "Registers" + OpMemberName %Registers 0 "addr" + OpMemberName %Registers 1 "addr2" + OpName %registers "registers" + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ulong = OpTypeInt 64 0 + %Registers = OpTypeStruct %ulong %ulong +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %ulong_4 = OpConstant %ulong 4 +%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong +%_ptr_PhysicalStorageBuffer_int = OpTypePointer PhysicalStorageBuffer %int + %main = OpFunction %void None %3 + %5 = OpLabel + + %pc0 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0 + %addr0 = OpLoad %ulong %pc0 + %src_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr0 + + %pc1 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_1 + %addr1 = OpLoad %ulong %pc1 + %dst_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr1 + + ; Volatile load from src — must NOT be forwarded + %ld = OpLoad %int %src_p Volatile|Aligned 4 + + ; Store the loaded value into dst (first use) + OpStore %dst_p %ld Aligned 4 + + ; Compute dst + 1 element via integer arithmetic + %dst_u64 = OpConvertPtrToU %ulong %dst_p + %dst_plus4 = OpIAdd %ulong %dst_u64 %ulong_4 + %dst_p2 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %dst_plus4 + + ; Store the same loaded value again (second use) + OpStore %dst_p2 %ld Aligned 4 + + OpReturn + OpFunctionEnd diff --git a/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp b/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp new file mode 100644 index 000000000..7770ef68d --- /dev/null +++ b/shaders-no-opt/asm/comp/volatile-phys-buf-load-no-forward.nocompat.vk.asm.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %main "main" + OpName %Registers "Registers" + OpMemberName %Registers 0 "addr" + OpMemberName %Registers 1 "addr2" + OpName %registers "registers" + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ulong = OpTypeInt 64 0 + %Registers = OpTypeStruct %ulong %ulong +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %ulong_4 = OpConstant %ulong 4 +%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong +%_ptr_PhysicalStorageBuffer_int = OpTypePointer PhysicalStorageBuffer %int + %main = OpFunction %void None %3 + %5 = OpLabel + + %pc0 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0 + %addr0 = OpLoad %ulong %pc0 + %src_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr0 + + %pc1 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_1 + %addr1 = OpLoad %ulong %pc1 + %dst_p = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %addr1 + + ; Volatile load from src — must NOT be forwarded + %ld = OpLoad %int %src_p Volatile|Aligned 4 + + ; Store the loaded value into dst (first use) + OpStore %dst_p %ld Aligned 4 + + ; Compute dst + 1 element via integer arithmetic + %dst_u64 = OpConvertPtrToU %ulong %dst_p + %dst_plus4 = OpIAdd %ulong %dst_u64 %ulong_4 + %dst_p2 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_int %dst_plus4 + + ; Store the same loaded value again (second use) + OpStore %dst_p2 %ld Aligned 4 + + OpReturn + OpFunctionEnd From 6933ffc8d6eb7bf3dd0559c3bb1141a447102645 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 23 Mar 2026 09:55:05 -0400 Subject: [PATCH 3/3] Update spirv_glsl.cpp Co-authored-by: Hans-Kristian Arntzen --- spirv_glsl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index da33cbec5..aa8bb4ebc 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -12432,6 +12432,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Volatile memory access requires the value be read exactly once from // memory. Do not forward the expression so that re-evaluation at each // use site cannot re-read potentially modified memory. + // FIXME: To force implementations to actually respect the volatile nature of the load, + // the block itself must be marked volatile, or VulkanMM is used to do an explicit volatile load. if (forward && length >= 4 && (ops[3] & MemoryAccessVolatileMask) != 0) forward = false;