diff --git a/.jules/bolt.md b/.jules/bolt.md index a7a7902..5277584 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,7 @@ ## 2025-03-10 - [PPM Framebuffer IO Optimization] **Learning:** `std::fwrite` has overhead for every call. Writing 3 bytes per pixel individually for an 800x600 image causes 480,000 library calls per frame. Buffering an entire row of pixels (or the whole image) and making one `fwrite` call per row (or per image) provides a massive performance boost (nearly 2x speedup for the offline demo application). **Action:** Always batch I/O operations. When writing image files or any large binary data, buffer the data in memory and write in large chunks rather than making many small `fwrite` calls. + +## 2025-03-17 - [Renderer Dynamic Vector Allocation Avoidance] +**Learning:** `std::vector` allocations in `Renderer::drawMesh` per draw call cause large memory overhead when called thousands of times per frame. Using class members instead and just calling `.resize()` helps reuse their capacity. +**Action:** Always allocate vectors as members in render loops and prefer `.resize()` instead of local instantiations when the vector sizes match vertex or index counts. diff --git a/include/soft_render/render/renderer.hpp b/include/soft_render/render/renderer.hpp index ca757f2..29f3827 100644 --- a/include/soft_render/render/renderer.hpp +++ b/include/soft_render/render/renderer.hpp @@ -55,6 +55,8 @@ class Renderer { pipeline::FragmentShader fs_; pipeline::Uniforms uniforms_; pipeline::SceneLighting lighting_; + std::vector transformedBuffer_; + std::vector triangleBuffer_; }; } diff --git a/src/render/renderer.cpp b/src/render/renderer.cpp index 4a0cebf..b3f1ab7 100644 --- a/src/render/renderer.cpp +++ b/src/render/renderer.cpp @@ -58,16 +58,16 @@ void Renderer::drawMesh(const pipeline::Vertex* verts, int vCount, const uint32_t* indices, int iCount, const pipeline::Material& mat) { // Transform all vertices - std::vector transformed(vCount); - vp_.processBatch(verts, transformed.data(), vCount, uniforms_); + transformedBuffer_.resize(vCount); + vp_.processBatch(verts, transformedBuffer_.data(), vCount, uniforms_); // Build triangle list int triCount = iCount / 3; - std::vector tris(triCount); + triangleBuffer_.resize(triCount); for (int i = 0; i < triCount; ++i) { - tris[i].v[0] = transformed[indices[i*3 + 0]]; - tris[i].v[1] = transformed[indices[i*3 + 1]]; - tris[i].v[2] = transformed[indices[i*3 + 2]]; + triangleBuffer_[i].v[0] = transformedBuffer_[indices[i*3 + 0]]; + triangleBuffer_[i].v[1] = transformedBuffer_[indices[i*3 + 1]]; + triangleBuffer_[i].v[2] = transformedBuffer_[indices[i*3 + 2]]; } // Fragment shader @@ -76,7 +76,7 @@ void Renderer::drawMesh(const pipeline::Vertex* verts, int vCount, // Rasterize pipeline::Rasterizer rast(fb_); - rast.rasterizeBatch(tris.data(), triCount, fragCb); + rast.rasterizeBatch(triangleBuffer_.data(), triCount, fragCb); } void Renderer::draw(const DrawCall& call) {