Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/light/light.hh
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,11 @@ public:
setting_bool novanilla;
setting_scalar gate;
setting_int32 sunsamples;
setting_bool gpusunmerge; // -gpusunmerge: approximate-merge nearby GPU sun jitter rays
setting_scalar gpusunmergequality; // -gpusunmergequality: 0 fast/rough, 1 slow/high quality
setting_bool gpusourcecull; // -gpusourcecull: use approximate GPU per-face source culling
setting_scalar gpusourcecullquality; // -gpusourcecullquality: 0 fast/aggressive, 1 safest/conservative
settings::setting_bool gpu; // -gpu: use Vulkan GPU ray-query backend when available
setting_bool arghradcompat;
setting_bool nolighting;
setting_vec3 debugface;
Expand Down
47 changes: 47 additions & 0 deletions include/light/trace_embree.hh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/

#pragma once
#include <light/trace_gpu.hh>

#include <common/aligned_allocator.hh>
#include <common/qvec.hh>
Expand Down Expand Up @@ -280,6 +281,52 @@ public:
if (!_rays.size())
return;

#if defined(HAVE_GPU_LIGHT)
// Optional large-batch occlusion path. v5 direct lighting uses
// direct_phase.comp; small fallback raystreams stay on Embree.
constexpr size_t GPU_OCCLUSION_MIN_BATCH = 262144;

if (_rays.size() >= GPU_OCCLUSION_MIN_BATCH && GPU_TraceAvailable()) {
std::vector<gpu_light::ray_t> gpu_rays;
std::vector<gpu_light::occlusion_result_t> gpu_results;

gpu_rays.resize(_rays.size());
gpu_results.resize(_rays.size());

for (size_t i = 0; i < _rays.size(); ++i) {
const auto &src = _rays[i].ray.ray;
auto &dst = gpu_rays[i];

dst.origin[0] = src.org_x;
dst.origin[1] = src.org_y;
dst.origin[2] = src.org_z;
dst.tmin = src.tnear;

dst.direction[0] = src.dir_x;
dst.direction[1] = src.dir_y;
dst.direction[2] = src.dir_z;
dst.tmax = src.tfar;

dst.shadow_mask = static_cast<std::uint32_t>(shadowmask);
dst.user_index = static_cast<std::uint32_t>(i);
}

if (gpu_light::trace_occlusion_batch(
self,
static_cast<std::uint32_t>(shadowmask),
gpu_rays.data(),
gpu_results.data(),
gpu_rays.size())) {
for (size_t i = 0; i < _rays.size(); ++i) {
if (gpu_results[i].occluded) {
_rays[i].ray.ray.tfar = -std::abs(_rays[i].ray.ray.tfar);
}
}
return;
}
}
#endif

ray_source_info ctx2(this, self, shadowmask);
RTCOccludedArguments embree4_args = ctx2.setup_occluded_arguments();
for (auto &ray : _rays)
Expand Down
152 changes: 152 additions & 0 deletions include/light/trace_gpu.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/* GPU trace backend
* Prototype overlay generated for Linux/Vulkan ray-query development.
*/
#pragma once

#include <cstdint>
#include <cstddef>
#include <vector>

struct mbsp_t;
class modelinfo_t;

#ifndef HAVE_GPU_LIGHT
#define GPU_LIGHT_COMPILED 0
#else
#define GPU_LIGHT_COMPILED 1
#endif

namespace gpu_light {

struct ray_t {
float origin[3] = {0, 0, 0};
float tmin = 0.01f;
float direction[3] = {0, 0, 1};
float tmax = 0.0f;
std::uint32_t shadow_mask = 0xffffffffu;
std::uint32_t user_index = 0;
};

struct occlusion_result_t {
std::uint32_t occluded = 0;
std::uint32_t reserved0 = 0;
float transmittance[3] = {1.0f, 1.0f, 1.0f};
};

struct direct_job_t {
float ox = 0, oy = 0, oz = 0, tmin = 0.01f;
float dx = 0, dy = 0, dz = 1, tmax = 0.0f;
float cr = 0, cg = 0, cb = 0, pad0 = 0;
float nr = 0, ng = 0, nb = 0, pad1 = 0;
std::uint32_t sample_index = 0;
std::uint32_t flags = 0;
std::uint32_t reserved0 = 0;
std::uint32_t reserved1 = 0;
};

struct direct_sample_range_t {
std::uint32_t first = 0;
std::uint32_t count = 0;
};

struct direct_accum_t {
float cr = 0, cg = 0, cb = 0, pad0 = 0;
float nr = 0, ng = 0, nb = 0, pad1 = 0;
std::uint32_t hit = 0;
std::uint32_t reserved0 = 0;
std::uint32_t reserved1 = 0;
std::uint32_t reserved2 = 0;
};

struct direct_phase_sample_t {
float px = 0, py = 0, pz = 0, occlusion = 1;
float nx = 0, ny = 0, nz = 1, twosided = 0;
std::uint32_t face_index = 0;
std::uint32_t reserved0 = 0;
std::uint32_t reserved1 = 0;
std::uint32_t reserved2 = 0;
};

struct direct_phase_face_range_t {
std::uint32_t source_begin = 0;
std::uint32_t source_count = 0;
};

struct direct_phase_source_t {
float px = 0, py = 0, pz = 0, light = 0;
float dx = 0, dy = 0, dz = 1, dist = 65536.0f;
float cr = 1, cg = 1, cb = 1, atten = 1;
std::uint32_t type = 0; // 0 = point, 1 = sun
std::uint32_t formula = 0; // light_formula_t for point lights
std::uint32_t flags = 0; // bit 0: dirt
std::uint32_t reserved0 = 0;
float anglescale = 1;
float dirt = 0;
float falloff = 0;
float pad0 = 0;
};

using direct_phase_accum_t = direct_accum_t;


enum class backend_state_t {
unavailable,
initialized,
failed
};

struct stats_t {
std::uint64_t batches = 0;
std::uint64_t rays = 0;
std::uint64_t gpu_batches = 0;
std::uint64_t fallback_batches = 0;
};

bool requested();
backend_state_t state();
const char *state_string();
const char *last_error();
stats_t stats();

bool init(const mbsp_t *bsp);
void shutdown();

// Returns true when the batch was handled by the GPU backend. Returns false to
// tell the caller to run the existing CPU/Embree path.
bool trace_occlusion_batch(
const modelinfo_t *self,
std::uint32_t shadow_mask,
const ray_t *rays,
occlusion_result_t *results,
std::size_t count);


bool trace_direct_phase_batch(
const direct_phase_source_t *sources,
std::size_t source_count,
const direct_phase_sample_t *samples,
direct_phase_accum_t *accum,
std::size_t sample_count,
const direct_phase_face_range_t *face_ranges,
std::size_t face_range_count,
const std::uint32_t *face_source_indices,
std::size_t face_source_index_count);

bool trace_direct_accumulate_batch(
const modelinfo_t *self,
std::uint32_t shadow_mask,
const direct_job_t *jobs,
std::size_t job_count,
const direct_sample_range_t *ranges,
direct_accum_t *accum,
std::size_t sample_count);

} // namespace gpu_light

bool GPU_TraceInit(const mbsp_t *bsp);
void GPU_TraceShutdown();
bool GPU_TraceAvailable();
const char *GPU_TraceLastError();

// Flushes pending sample-driven direct-light work.
void GPU_DirectQueue_Flush(const mbsp_t *bsp);
34 changes: 33 additions & 1 deletion light/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
option(LIGHT_ENABLE_VULKAN_GPU "Enable Vulkan GPU ray-query backend for light" OFF)
option(SKIP_TBB_INSTALL "Skip TBB Library Installation" OFF)
option(SKIP_EMBREE_INSTALL "Skip Embree Library Installation" OFF)

Expand All @@ -9,7 +10,7 @@ set(LIGHT_INCLUDES
../include/light/bounce.hh
../include/light/surflight.hh
../include/light/ltface.hh
../include/light/trace.hh
../include/light/trace.hh ../include/light/trace_gpu.hh
../include/light/write.hh
../include/light/spatialindex.hh
)
Expand Down Expand Up @@ -47,9 +48,40 @@ endif(embree_FOUND)

add_library(liblight STATIC ${LIGHT_SOURCES})

if (LIGHT_ENABLE_VULKAN_GPU)
find_package(Vulkan REQUIRED)
find_program(GLSLANG_VALIDATOR glslangValidator REQUIRED)

target_sources(liblight PRIVATE
trace_gpu.cc
trace_gpu_vulkan.cc
)
target_compile_definitions(liblight PRIVATE HAVE_GPU_LIGHT=1)
target_link_libraries(liblight PRIVATE Vulkan::Vulkan)

set(GPU_SHADER_SPVS)
foreach(GPU_SHADER_NAME occlusion direct_phase)
set(GPU_SHADER_SRC "${CMAKE_CURRENT_SOURCE_DIR}/gpu_shaders/${GPU_SHADER_NAME}.comp")
set(GPU_SHADER_SPV "${CMAKE_CURRENT_BINARY_DIR}/gpu_shaders/${GPU_SHADER_NAME}.comp.spv")
add_custom_command(
OUTPUT "${GPU_SHADER_SPV}"
COMMAND "${CMAKE_COMMAND}" -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/gpu_shaders"
COMMAND "${GLSLANG_VALIDATOR}" -V "${GPU_SHADER_SRC}" -o "${GPU_SHADER_SPV}"
DEPENDS "${GPU_SHADER_SRC}"
VERBATIM)
list(APPEND GPU_SHADER_SPVS "${GPU_SHADER_SPV}")
endforeach()
add_custom_target(light_gpu_shaders DEPENDS ${GPU_SHADER_SPVS})
add_dependencies(liblight light_gpu_shaders)
endif()

target_link_libraries(liblight PRIVATE common ${CMAKE_THREAD_LIBS_INIT} fmt::fmt jsoncpp_static)

add_executable(light main.cc)
if (LIGHT_ENABLE_VULKAN_GPU)
add_dependencies(light light_gpu_shaders)
endif()

target_link_libraries(light PRIVATE common liblight)

if (embree_FOUND)
Expand Down
Loading