From 3615f9986e8959e705da37f262ad340a5da9a5db Mon Sep 17 00:00:00 2001 From: Cary Phillips Date: Tue, 17 Mar 2026 17:37:13 -0700 Subject: [PATCH 1/2] Fix 32-bit ARM SIGBUS: align elastic allocator payload for coded_lists::buf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: OpenJPH’s mem_elastic_allocator lays out memory as: [ stores_list header ][ payload... ] Payload holds placement-new `coded_lists` instances via get_buffer(): p = new (cur_store->data) coded_lists(needed_bytes); Each `coded_lists` sets its bitstream pointer as: buf = (ui8*)this + sizeof(coded_lists) So the address of `buf` is: slab + offset(stores_list → data) + sizeof(coded_lists) Previously, `data` was set to immediately after the `stores_list` object (i.e. offset sizeof(stores_list) from the slab start). The compiler-chosen size of `stores_list` is not guaranteed to be a multiple of 8 or 16. For some layouts, the first `coded_lists` header therefore landed at an offset such that `buf` ended up at an address congruent to 4 (mod 8): 4-byte aligned but not 8-byte aligned. On strict 32-bit ARM, libc memcpy used when copying or flushing these bitstreams (e.g. wide loads such as LDRD) can require 8-byte alignment. Misaligned `buf` then triggers SIGBUS. This showed up in practice when OpenJPH was used from OpenEXR (Huffman-table / compressed data flush paths) on armv6 and armv7 machines. Root cause: The failure is not in `coded_lists` itself but in where the payload region begins relative to the slab. Padding only inside `coded_lists` or rounding the *user* allocation size is insufficient if the *first* byte of the payload region is still at a bad offset from the malloc base. Solution -------- 1. stores_list: Introduce offset16() = round_up(sizeof(stores_list), 16). Construct with: data = orig_data = (ui8*)this + offset16() so the first byte available for `coded_lists` is 16-byte aligned from the start of the slab. eval_store_bytes() adds this offset so malloc allocates enough space for header + padding + payload. 2. get_buffer: Round (needed_bytes + sizeof(coded_lists)) up to a multiple of 16 so successive `coded_lists` regions in the same store keep consistent alignment as `data` advances. Together, `coded_lists` headers and their `buf` pointers stay suitably aligned for memcpy/fwrite on 32-bit ARM while preserving existing allocator behavior on other platforms. Analysis and solution made with the help of Cursor / Claude Opus 4.5 Signed-off-by: Cary Phillips --- src/core/openjph/ojph_mem.h | 12 ++++++++++-- src/core/others/ojph_mem.cpp | 5 ++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/core/openjph/ojph_mem.h b/src/core/openjph/ojph_mem.h index 599f36d..202da8d 100644 --- a/src/core/openjph/ojph_mem.h +++ b/src/core/openjph/ojph_mem.h @@ -245,11 +245,19 @@ namespace ojph { private: struct stores_list { + // Payload (coded_lists + bitstream) must start at a multiple of 16 bytes. + // Otherwise coded_lists::buf can be 4 mod 8, which causes misalignment + // on 32-bit architectures. So round sizeof(stores_list) to next + // multiple of 16. + static constexpr ui32 offset16() + { + return (ui32) ((sizeof (stores_list) + 15u) & ~15u); + } stores_list(ui32 available_bytes) { this->next_store = NULL; this->orig_size = this->available = available_bytes; - this->orig_data = this->data = (ui8*)this + sizeof(stores_list); + this->orig_data = this->data = (ui8*)this + offset16(); } void restart() { @@ -259,7 +267,7 @@ namespace ojph { } static ui32 eval_store_bytes(ui32 available_bytes) { // calculates how many bytes need to be allocated - return available_bytes + (ui32)sizeof(stores_list); + return available_bytes + offset16(); } stores_list *next_store; ui8 *orig_data, *data; diff --git a/src/core/others/ojph_mem.cpp b/src/core/others/ojph_mem.cpp index fcb9e63..ca7a561 100644 --- a/src/core/others/ojph_mem.cpp +++ b/src/core/others/ojph_mem.cpp @@ -112,7 +112,10 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// void mem_elastic_allocator::get_buffer(ui32 needed_bytes, coded_lists* &p) { - ui32 extended_bytes = needed_bytes + (ui32)sizeof(coded_lists); + // Round up so each coded_lists (and coded_lists::buf) stays 16-byte aligned + // within the store; avoids alignment fault on 32-bit architectures + ui32 raw = needed_bytes + (ui32)sizeof (coded_lists); + ui32 extended_bytes = (raw + 15u) & ~15u; if (store == NULL) cur_store = store = allocate(&store, extended_bytes); From a81d88c1c86c9eac4ed182059f8206a0388b15c3 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 19 Mar 2026 22:57:28 +1100 Subject: [PATCH 2/2] Changed offset16() to stores_list_size16() in ojph_mem.h Changed function name from offset16() to the more descriptive name stores_list_size16(). --- src/core/openjph/ojph_mem.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/openjph/ojph_mem.h b/src/core/openjph/ojph_mem.h index 202da8d..d9f22b5 100644 --- a/src/core/openjph/ojph_mem.h +++ b/src/core/openjph/ojph_mem.h @@ -249,7 +249,7 @@ namespace ojph { // Otherwise coded_lists::buf can be 4 mod 8, which causes misalignment // on 32-bit architectures. So round sizeof(stores_list) to next // multiple of 16. - static constexpr ui32 offset16() + static constexpr ui32 stores_list_size16() { return (ui32) ((sizeof (stores_list) + 15u) & ~15u); } @@ -257,7 +257,7 @@ namespace ojph { { this->next_store = NULL; this->orig_size = this->available = available_bytes; - this->orig_data = this->data = (ui8*)this + offset16(); + this->orig_data = this->data = (ui8*)this + stores_list_size16(); } void restart() { @@ -267,7 +267,7 @@ namespace ojph { } static ui32 eval_store_bytes(ui32 available_bytes) { // calculates how many bytes need to be allocated - return available_bytes + offset16(); + return available_bytes + stores_list_size16(); } stores_list *next_store; ui8 *orig_data, *data;