From 6e95843a3a47ed7e6fab99cf7fbae992bdd13242 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:52:47 +0200 Subject: [PATCH 01/37] resolve conflicts --- .gitignore | 3 + LocalPackages/DynamicTest/.gitignore | 8 + LocalPackages/DynamicTest/Package.swift | 22 ++ .../Sources/DynamicTest/DynamicTest.swift | 3 + LocalPackages/MallocInterposerC/.gitignore | 8 + LocalPackages/MallocInterposerC/Package.swift | 28 ++ .../MallocInterposerC/include/interposer.h | 77 ++++ .../MallocInterposerC/src/interposer-darwin.c | 266 +++++++++++++ .../MallocInterposerC/src/interposer-unix.c | 289 ++++++++++++++ .../MallocInterposerTests.swift | 29 ++ .../MallocInterposerSwift/Package.resolved | 15 + .../MallocInterposerSwift/Package.swift | 33 ++ .../Sources/MallocInterposerSwift/.swift | 1 + .../MallocInterposerSwift.swift | 140 +++++++ .../SwiftTestClient/SwiftTestClient.swift | 30 ++ Package.resolved | 21 +- Package.swift | 35 +- Sources/Benchmark/BenchmarkExecutor.swift | 34 +- .../Benchmark/BenchmarkMetric+Defaults.swift | 117 +++--- Sources/Benchmark/BenchmarkMetric.swift | 92 +++-- .../MallocStats+jemalloc-support.swift | 363 ------------------ .../OperatingSystemAndMallocTests.swift | 3 + 22 files changed, 1094 insertions(+), 523 deletions(-) create mode 100644 LocalPackages/DynamicTest/.gitignore create mode 100644 LocalPackages/DynamicTest/Package.swift create mode 100644 LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift create mode 100644 LocalPackages/MallocInterposerC/.gitignore create mode 100644 LocalPackages/MallocInterposerC/Package.swift create mode 100644 LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h create mode 100644 LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c create mode 100644 LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c create mode 100644 LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift create mode 100644 LocalPackages/MallocInterposerSwift/Package.resolved create mode 100644 LocalPackages/MallocInterposerSwift/Package.swift create mode 100644 LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift create mode 100644 LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift create mode 100644 LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift delete mode 100644 Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift diff --git a/.gitignore b/.gitignore index 0dea6af5..8d9bea4a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ # # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore + +.DS_Store + ## User settings xcuserdata/ diff --git a/LocalPackages/DynamicTest/.gitignore b/LocalPackages/DynamicTest/.gitignore new file mode 100644 index 00000000..0023a534 --- /dev/null +++ b/LocalPackages/DynamicTest/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +/.build +/Packages +xcuserdata/ +DerivedData/ +.swiftpm/configuration/registries.json +.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata +.netrc diff --git a/LocalPackages/DynamicTest/Package.swift b/LocalPackages/DynamicTest/Package.swift new file mode 100644 index 00000000..5f586fd5 --- /dev/null +++ b/LocalPackages/DynamicTest/Package.swift @@ -0,0 +1,22 @@ +// swift-tools-version: 6.1 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "DynamicTest", + products: [ + // Products define the executables and libraries a package produces, making them visible to other packages. + .library( + name: "DynamicTest", + type: .dynamic, + targets: ["DynamicTest"]), + ], + targets: [ + // Targets are the basic building blocks of a package, defining a module or a test suite. + // Targets can depend on other targets in this package and products from dependencies. + .target( + name: "DynamicTest" + ) + ] +) diff --git a/LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift b/LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift new file mode 100644 index 00000000..7876a26c --- /dev/null +++ b/LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift @@ -0,0 +1,3 @@ +public func dynamicTest() { + print("Dynamic Test") +} diff --git a/LocalPackages/MallocInterposerC/.gitignore b/LocalPackages/MallocInterposerC/.gitignore new file mode 100644 index 00000000..0023a534 --- /dev/null +++ b/LocalPackages/MallocInterposerC/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +/.build +/Packages +xcuserdata/ +DerivedData/ +.swiftpm/configuration/registries.json +.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata +.netrc diff --git a/LocalPackages/MallocInterposerC/Package.swift b/LocalPackages/MallocInterposerC/Package.swift new file mode 100644 index 00000000..fe9152b4 --- /dev/null +++ b/LocalPackages/MallocInterposerC/Package.swift @@ -0,0 +1,28 @@ +// swift-tools-version: 6.1 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "MallocInterposer", + products: [ + // Products define the executables and libraries a package produces, making them visible to other packages. + .library( + name: "MallocInterposerC", + type: .dynamic, + targets: ["MallocInterposerC"]), + ], + targets: [ + // Targets are the basic building blocks of a package, defining a module or a test suite. + // Targets can depend on other targets in this package and products from dependencies. + .target( + name: "MallocInterposerC", + linkerSettings: [ + .linkedLibrary("dl"), + ]), + .testTarget( + name: "MallocInterposerTests", + dependencies: ["MallocInterposerC"] + ), + ] +) diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h new file mode 100644 index 00000000..09a13dac --- /dev/null +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h @@ -0,0 +1,77 @@ +#ifndef HOOKED_FREE +#define HOOKED_FREE + +#include +#include +#include +#if __APPLE__ +# include +#endif + +// Hook function types +typedef void (*malloc_hook_t)(size_t size); +typedef void (*free_hook_t)(void* ptr); +typedef void (*calloc_hook_t)(size_t nmemb, size_t size); +typedef void (*realloc_hook_t)(void* ptr, size_t size); +typedef void (*valloc_hook_t)(size_t size); +typedef void (*posix_memalign_hook_t)(void **memptr, size_t alignment, size_t size); + +#if __APPLE__ +typedef void (*malloc_zone_hook_t)(malloc_zone_t *zone, size_t size); +typedef void (*malloc_zone_calloc_hook_t)(malloc_zone_t *zone, size_t num_items, size_t size); +typedef void (*malloc_zone_realloc_hook_t)(malloc_zone_t *zone, void *ptr, size_t size); +typedef void (*malloc_zone_memalign_hook_t)(malloc_zone_t *zone, size_t alignment, size_t size); +typedef void (*malloc_zone_valloc_hook_t)(malloc_zone_t *zone, size_t size); +typedef void (*malloc_zone_free_hook_t)(malloc_zone_t *zone, void *ptr); +#endif + + +// Hook management functions +void set_malloc_hook(malloc_hook_t hook); +void set_free_hook(free_hook_t hook); +void set_calloc_hook(calloc_hook_t hook); +void set_realloc_hook(realloc_hook_t hook); + +#if __APPLE__ +void set_malloc_zone_hook(malloc_zone_hook_t hook); +void set_malloc_zone_calloc_hook(malloc_zone_calloc_hook_t hook); +void set_malloc_zone_realloc_hook(malloc_zone_realloc_hook_t hook); +void set_malloc_zone_memalign_hook(malloc_zone_memalign_hook_t hook); +void set_malloc_zone_valloc_hook(malloc_zone_valloc_hook_t hook); +void set_malloc_zone_free_hook(malloc_zone_free_hook_t hook); +#endif + +void clear_malloc_hook(void); +void clear_free_hook(void); +void clear_calloc_hook(void); +void clear_realloc_hook(void); + +#if __APPLE__ +void clear_malloc_zone_hook(void); +void clear_malloc_zone_calloc_hook(void); +void clear_malloc_zone_realloc_hook(void); +void clear_malloc_zone_memalign_hook(void); +void clear_malloc_zone_valloc_hook(void); +void clear_malloc_zone_free_hook(void); +#endif + +// Replacement functions +void *replacement_malloc(size_t size); +void replacement_free(void *ptr); +void *replacement_calloc(size_t nmemb, size_t size); +void *replacement_realloc(void *ptr, size_t size); +void *replacement_reallocf(void *ptr, size_t size); +void *replacement_valloc(size_t size); +int replacement_posix_memalign(void **memptr, size_t alignment, size_t size); + + +#if __APPLE__ +void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size); +void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size); +void *replacement_malloc_zone_valloc(malloc_zone_t *zone, size_t size); +void *replacement_malloc_zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +void *replacement_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size); +void replacement_malloc_zone_free(malloc_zone_t *zone, void *ptr); +#endif + +#endif diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c new file mode 100644 index 00000000..5f2c6dd6 --- /dev/null +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c @@ -0,0 +1,266 @@ +#include +#if __APPLE__ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Global hooks +static malloc_hook_t g_malloc_hook = NULL; +static free_hook_t g_free_hook = NULL; +static calloc_hook_t g_calloc_hook = NULL; +static realloc_hook_t g_realloc_hook = NULL; +static valloc_hook_t g_valloc_hook = NULL; +static posix_memalign_hook_t g_posix_memalign_hook = NULL; +static malloc_zone_hook_t g_malloc_zone_hook = NULL; +static malloc_zone_realloc_hook_t g_malloc_zone_realloc_hook = NULL; +static malloc_zone_calloc_hook_t g_malloc_zone_calloc_hook = NULL; +static malloc_zone_valloc_hook_t g_malloc_zone_valloc_hook = NULL; +static malloc_zone_memalign_hook_t g_malloc_zone_memalign_hook = NULL; +static malloc_zone_free_hook_t g_malloc_zone_free_hook = NULL; + +// Statistics +static pthread_mutex_t hook_mutex = PTHREAD_MUTEX_INITIALIZER; + +#define DYLD_INTERPOSE(_replacement,_replacee) \ + __attribute__((used)) static struct { const void *replacement; const void *replacee; } _interpose_##_replacee \ + __attribute__ ((section("__DATA,__interpose"))) = { (const void *)(unsigned long)&_replacement, (const void *)(unsigned long)&_replacee }; + +/* on Darwin calling the original function is super easy, just call it, done. */ +#define JUMP_INTO_LIBC_FUN(_fun, ...) /* \ +*/ do { /* \ +*/ return _fun(__VA_ARGS__); /* \ +*/ } while(0) + +// Hook management functions +void set_malloc_hook(malloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_free_hook(free_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_free_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_calloc_hook(calloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_calloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_realloc_hook(realloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_realloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_valloc_hook(valloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_valloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_posix_memalign_hook(posix_memalign_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_posix_memalign_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_malloc_zone_hook(malloc_zone_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_zone_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_malloc_zone_realloc_hook(malloc_zone_realloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_zone_realloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_malloc_zone_calloc_hook(malloc_zone_calloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_zone_calloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_malloc_zone_valloc_hook(malloc_zone_valloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_zone_valloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_malloc_zone_memalign_hook(malloc_zone_memalign_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_zone_memalign_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_malloc_zone_free_hook(malloc_zone_free_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_zone_free_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +// Clear hooks +void clear_malloc_hook(void) { set_malloc_hook(NULL); } +void clear_free_hook(void) { set_free_hook(NULL); } +void clear_calloc_hook(void) { set_calloc_hook(NULL); } +void clear_realloc_hook(void) { set_realloc_hook(NULL); } +void clear_valloc_hook(void) { set_valloc_hook(NULL); } +void clear_posix_memalign_hook(void) { set_posix_memalign_hook(NULL); } +void clear_malloc_zone_hook(void) { set_malloc_zone_hook(NULL); } +void clear_malloc_zone_realloc_hook(void) { set_malloc_zone_realloc_hook(NULL); } +void clear_malloc_zone_calloc_hook(void) { set_malloc_zone_calloc_hook(NULL); } +void clear_malloc_zone_valloc_hook(void) { set_malloc_zone_valloc_hook(NULL); } +void clear_malloc_zone_memalign_hook(void) { set_malloc_zone_memalign_hook(NULL); } +void clear_malloc_zone_free_hook(void) { set_malloc_zone_free_hook(NULL); } + +// Replacement functions +void replacement_free(void *ptr) { + + // Call hook if set + if (g_free_hook) { + g_free_hook(ptr); + } + + JUMP_INTO_LIBC_FUN(free, ptr); +} + +void *replacement_malloc(size_t size) { + + // Call hook if set + if (g_malloc_hook) { + g_malloc_hook(size); + } + + JUMP_INTO_LIBC_FUN(malloc, size); +} + +void *replacement_realloc(void *ptr, size_t size) { + if (g_realloc_hook) { + g_realloc_hook(ptr, size); + } + + JUMP_INTO_LIBC_FUN(realloc, ptr, size); +} + +void *replacement_calloc(size_t count, size_t size) { + if (g_calloc_hook) { + g_calloc_hook(count, size); + } + + JUMP_INTO_LIBC_FUN(calloc, count, size); +} + +void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size) { + if (g_malloc_zone_hook) { + g_malloc_zone_hook(zone, size); + } + + JUMP_INTO_LIBC_FUN(malloc_zone_malloc, zone, size); +} + +void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size) { + if (g_malloc_zone_calloc_hook) { + g_malloc_zone_calloc_hook(zone, num_items, size); + } + + JUMP_INTO_LIBC_FUN(malloc_zone_calloc, zone, num_items, size); +} + +void *replacement_malloc_zone_valloc(malloc_zone_t *zone, size_t size) { + if (g_malloc_zone_valloc_hook) { + g_malloc_zone_valloc_hook(zone, size); + } + + JUMP_INTO_LIBC_FUN(malloc_zone_valloc, zone, size); +} + +void *replacement_malloc_zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { + if (0 == size) { + replacement_free(ptr); + return NULL; + } + if (!ptr) { + return replacement_malloc(size); + } + + if (g_malloc_zone_realloc_hook) { + g_malloc_zone_realloc_hook(zone, ptr, size); + } + + JUMP_INTO_LIBC_FUN(realloc, ptr, size); +} + +void *replacement_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { + if (g_malloc_zone_memalign_hook) { + g_malloc_zone_memalign_hook(zone, alignment, size); + } + + JUMP_INTO_LIBC_FUN(malloc_zone_memalign, zone, alignment, size); +} + +void replacement_malloc_zone_free(malloc_zone_t *zone, void *ptr) { + if (g_malloc_zone_free_hook) { + g_malloc_zone_free_hook(zone, ptr); + } + + JUMP_INTO_LIBC_FUN(malloc_zone_free, zone, ptr); +} + +void *replacement_reallocf(void *ptr, size_t size) { + void *new_ptr = replacement_realloc(ptr, size); + if (!new_ptr) { + replacement_free(new_ptr); + } + return new_ptr; +} + +void *replacement_valloc(size_t size) { + if (g_valloc_hook) { + g_valloc_hook(size); + } + + JUMP_INTO_LIBC_FUN(valloc, size); +} + +int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { + if (g_posix_memalign_hook) { + g_posix_memalign_hook(memptr, alignment, size); + } + + JUMP_INTO_LIBC_FUN(posix_memalign, memptr, alignment, size); +} + +DYLD_INTERPOSE(replacement_free, free) +DYLD_INTERPOSE(replacement_malloc, malloc) +DYLD_INTERPOSE(replacement_realloc, realloc) +DYLD_INTERPOSE(replacement_calloc, calloc) +DYLD_INTERPOSE(replacement_reallocf, reallocf) +DYLD_INTERPOSE(replacement_valloc, valloc) +DYLD_INTERPOSE(replacement_posix_memalign, posix_memalign) +DYLD_INTERPOSE(replacement_malloc_zone_malloc, malloc_zone_malloc) +DYLD_INTERPOSE(replacement_malloc_zone_calloc, malloc_zone_calloc) +DYLD_INTERPOSE(replacement_malloc_zone_valloc, malloc_zone_valloc) +DYLD_INTERPOSE(replacement_malloc_zone_realloc, malloc_zone_realloc) +DYLD_INTERPOSE(replacement_malloc_zone_memalign, malloc_zone_memalign) +DYLD_INTERPOSE(replacement_malloc_zone_free, malloc_zone_free) +#endif diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c new file mode 100644 index 00000000..319a4e22 --- /dev/null +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c @@ -0,0 +1,289 @@ +#ifndef __APPLE__ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* a big block of memory that we'll use for recursive mallocs */ +static char g_recursive_malloc_mem[10 * 1024 * 1024] = {0}; +/* the index of the first free byte */ +static _Atomic ptrdiff_t g_recursive_malloc_next_free_ptr = ATOMIC_VAR_INIT(0); + +#define LIBC_SYMBOL(_fun) "" # _fun + +/* Some thread-local flags we use to check if we're recursively in a hooked function. */ +static __thread bool g_in_malloc = false; +static __thread bool g_in_realloc = false; +static __thread bool g_in_free = false; +static __thread bool g_in_socket = false; +static __thread bool g_in_accept = false; +static __thread bool g_in_accept4 = false; +static __thread bool g_in_close = false; + +/* The types of the variables holding the libc function pointers. */ +typedef void *(*type_libc_malloc)(size_t); +typedef void *(*type_libc_realloc)(void *, size_t); +typedef void (*type_libc_free)(void *); +typedef int (*type_libc_socket)(int, int, int); +typedef int (*type_libc_accept)(int, struct sockaddr*, socklen_t *); +typedef int (*type_libc_accept4)(int, struct sockaddr *, socklen_t *, int); +typedef int (*type_libc_close)(int); + +/* The (atomic) globals holding the pointer to the original libc implementation. */ +_Atomic type_libc_malloc g_libc_malloc; +_Atomic type_libc_realloc g_libc_realloc; +_Atomic type_libc_free g_libc_free; +_Atomic type_libc_socket g_libc_socket; +_Atomic type_libc_accept g_libc_accept; +_Atomic type_libc_accept4 g_libc_accept4; +_Atomic type_libc_close g_libc_close; + +// Global hooks +static malloc_hook_t g_malloc_hook = NULL; +static free_hook_t g_free_hook = NULL; +static calloc_hook_t g_calloc_hook = NULL; +static realloc_hook_t g_realloc_hook = NULL; +static valloc_hook_t g_valloc_hook = NULL; +static posix_memalign_hook_t g_posix_memalign_hook = NULL; + +// Statistics +static pthread_mutex_t hook_mutex = PTHREAD_MUTEX_INITIALIZER; + +// Hook management functions +void set_malloc_hook(malloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_malloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_free_hook(free_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_free_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_calloc_hook(calloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_calloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_realloc_hook(realloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_realloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_valloc_hook(valloc_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_valloc_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +void set_posix_memalign_hook(posix_memalign_hook_t hook) { + pthread_mutex_lock(&hook_mutex); + g_posix_memalign_hook = hook; + pthread_mutex_unlock(&hook_mutex); +} + +// Clear hooks +void clear_malloc_hook(void) { set_malloc_hook(NULL); } +void clear_free_hook(void) { set_free_hook(NULL); } +void clear_calloc_hook(void) { set_calloc_hook(NULL); } +void clear_realloc_hook(void) { set_realloc_hook(NULL); } +void clear_valloc_hook(void) { set_valloc_hook(NULL); } +void clear_posix_memalign_hook(void) { set_posix_memalign_hook(NULL); } + +// this is called if malloc is called whilst trying to resolve libc's realloc. +// we just vend out pointers to a large block in the BSS (which we never free). +// This block should be large enough because it's only used when malloc is +// called from dlsym which should only happen once per thread. +static void *recursive_malloc(size_t size_in) { + size_t size = size_in; + if ((size & 0xf) != 0) { + // make size 16 byte aligned + size = (size + 0xf) & (~(size_t)0xf); + } + + ptrdiff_t next = atomic_fetch_add_explicit(&g_recursive_malloc_next_free_ptr, + size, + memory_order_relaxed); + if ((size_t)next >= sizeof(g_recursive_malloc_mem)) { + // we ran out of memory + return NULL; + } + return (void *)((intptr_t)g_recursive_malloc_mem + next); +} + +static bool is_recursive_malloc_block(void *ptr) { + uintptr_t block_begin = (uintptr_t)g_recursive_malloc_mem; + uintptr_t block_end = block_begin + sizeof(g_recursive_malloc_mem); + uintptr_t user_ptr = (uintptr_t)ptr; + + return user_ptr >= block_begin && user_ptr < block_end; +} + +// this is called if realloc is called whilst trying to resolve libc's realloc. +static void *recursive_realloc(void *ptr, size_t size) { + // not implemented yet... + abort(); +} + +// this is called if free is called whilst trying to resolve libc's free. +static void recursive_free(void *ptr) { + // not implemented yet... + abort(); +} + +// this is called if socket is called whilst trying to resolve libc's socket. +static int recursive_socket(int domain, int type, int protocol) { + // not possible + abort(); +} + +// this is called if accept is called whilst trying to resolve libc's accept. +static int recursive_accept(int socket, struct sockaddr *restrict address, socklen_t *restrict address_len) { + // not possible + abort(); +} + +// this is called if accept4 is called whilst trying to resolve libc's accept4. +static int recursive_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) { + // not possible + abort(); +} + +// this is called if close is called whilst trying to resolve libc's close. +static int recursive_close(int fildes) { + // not possible + abort(); +} + +/* On Apple platforms getting to the original libc function from a hooked + * function is easy. On other UNIX systems this is slightly harder because we + * have to look up the function with the dynamic linker. Because that isn't + * super performant we cache the lookup result in an (atomic) global. + * + * Calling into the libc function if we have already cached it is easy, we + * (atomically) load it and call into it. If have not yet cached it, we need to + * resolve it which we do by using dlsym and then write it into the (atomic) + * global. There's only one slight problem: dlsym might call back into the + * function we're just trying to resolve (dlsym does call malloc). In that case + * we need to emulate that function (named recursive_*). But that's all then. + */ +#define JUMP_INTO_LIBC_FUN(_fun, ...) /* \ +*/ do { /* \ +*/ /* Let's see if somebody else already resolved that function for us */ /* \ +*/ type_libc_ ## _fun local_fun = atomic_load(&g_libc_ ## _fun); /* \ +*/ if (!local_fun) { /* \ +*/ /* No, we're the first ones to use this function. */ /* \ +*/ if (!g_in_ ## _fun) { /* \ +*/ g_in_ ## _fun = true; /* \ +*/ /* If we're here, we're at least not recursively in ourselves. */ /* \ +*/ /* That means we can use dlsym to resolve the libc function. */ /* \ +*/ type_libc_ ## _fun desired = dlsym(RTLD_NEXT, LIBC_SYMBOL(_fun)); /* \ +*/ if (atomic_compare_exchange_strong(&g_libc_ ## _fun, &local_fun, desired)) { /* \ +*/ /* If we're here, we won the race, so let's use our resolved function. */ /* \ +*/ local_fun = desired; /* \ +*/ } else { /* \ +*/ /* Lost the race, let's load the global again */ /* \ +*/ local_fun = atomic_load(&g_libc_ ## _fun); /* \ +*/ } /* \ +*/ } else { /* \ +*/ /* Okay, we can't jump into libc here and need to use our own version. */ /* \ +*/ return recursive_ ## _fun (__VA_ARGS__); /* \ +*/ } /* \ +*/ } /* \ +*/ return local_fun(__VA_ARGS__); /* \ +*/ } while(0) + +void replacement_free(void *ptr) { + if (ptr) { + + if (g_free_hook) { + g_free_hook(ptr); + } + + if (!is_recursive_malloc_block(ptr)) { + JUMP_INTO_LIBC_FUN(free, ptr); + } + } +} + +void *replacement_malloc(size_t size) { + if (g_malloc_hook) { + g_malloc_hook(size); + } + + JUMP_INTO_LIBC_FUN(malloc, size); +} + +void *replacement_realloc(void *ptr, size_t size) { + if (0 == size) { + replacement_free(ptr); + return NULL; + } + if (!ptr) { + return replacement_malloc(size); + } + + if (g_realloc_hook) { + g_realloc_hook(ptr, size); + } + + JUMP_INTO_LIBC_FUN(realloc, ptr, size); +} + +void *replacement_calloc(size_t count, size_t size) { + void *ptr = replacement_malloc(count * size); + memset(ptr, 0, count * size); + + if (g_calloc_hook) { + g_calloc_hook(count, size); + } + + return ptr; +} + +void *replacement_reallocf(void *ptr, size_t size) { + void *new_ptr = replacement_realloc(ptr, size); + if (!new_ptr) { + replacement_free(new_ptr); + } + return new_ptr; +} + +void *replacement_valloc(size_t size) { + if (g_valloc_hook) { + g_valloc_hook(size); + } + // not aligning correctly (should be PAGE_SIZE) but good enough + return replacement_malloc(size); +} + +int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { + if (g_posix_memalign_hook) { + g_posix_memalign_hook(memptr, alignment, size); + } + + // not aligning correctly (should be `alignment`) but good enough + void *ptr = replacement_malloc(size); + if (ptr && memptr) { + *memptr = ptr; + return 0; + } else { + return 1; + } +} +#endif diff --git a/LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift b/LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift new file mode 100644 index 00000000..31d77848 --- /dev/null +++ b/LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift @@ -0,0 +1,29 @@ +//import Testing +//@testable import MallocInterposer +//import Darwin +// +//final class Foo { +// var bar: Int = 0 +// +// init() {} +//} +// +//@Test func example() async throws { +// var hookCalled = false +// var allocSize = 0 +// +// MallocHooks.setMallocHook { size, originalResult in +// hookCalled = true +// allocSize = size +// return originalResult +// } +// +// let foo = Foo() +// print(foo.bar) +// +// #expect(hookCalled == true) +// #expect(allocSize == 1024) +// +// let stats = MallocInterposer.shared.getStatistics() +// #expect(stats.mallocCount == 1) +//} diff --git a/LocalPackages/MallocInterposerSwift/Package.resolved b/LocalPackages/MallocInterposerSwift/Package.resolved new file mode 100644 index 00000000..30a41cda --- /dev/null +++ b/LocalPackages/MallocInterposerSwift/Package.resolved @@ -0,0 +1,15 @@ +{ + "originHash" : "f9d52b4684b4f378f6711fa01082569f9206a98fc7e9e15cb2fc72bbeafb9737", + "pins" : [ + { + "identity" : "swift-atomics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-atomics.git", + "state" : { + "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", + "version" : "1.3.0" + } + } + ], + "version" : 3 +} diff --git a/LocalPackages/MallocInterposerSwift/Package.swift b/LocalPackages/MallocInterposerSwift/Package.swift new file mode 100644 index 00000000..5f165b9f --- /dev/null +++ b/LocalPackages/MallocInterposerSwift/Package.swift @@ -0,0 +1,33 @@ +// swift-tools-version: 6.1 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "MallocInterposerSwift", + products: [ + // Products define the executables and libraries a package produces, making them visible to other packages. + .library( + name: "MallocInterposerSwift", + type: .dynamic, + targets: ["MallocInterposerSwift"]), + ], + dependencies: [ + .package(path: "../MallocInterposerC"), + .package(url: "https://github.com/apple/swift-atomics.git", from: "1.3.0") + ], + targets: [ + // Targets are the basic building blocks of a package, defining a module or a test suite. + // Targets can depend on other targets in this package and products from dependencies. + .target( + name: "MallocInterposerSwift", + dependencies: [ + .product(name: "MallocInterposerC", package: "MallocInterposerC"), + .product(name: "Atomics", package: "swift-atomics") + ]), + .executableTarget( + name: "SwiftTestClient", + dependencies: ["MallocInterposerSwift"] + ), + ] +) diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift @@ -0,0 +1 @@ + diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift new file mode 100644 index 00000000..0f7ae955 --- /dev/null +++ b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift @@ -0,0 +1,140 @@ +import Foundation +import Atomics +import MallocInterposerC + +/// Swift-friendly hook types +public typealias MallocHook = @convention(c) (Int) -> Void +public typealias FreeHook = @convention(c) (UnsafeMutableRawPointer?) -> Void +public typealias CallocHook = @convention(c) (Int, Int) -> Void +public typealias ReallocHook = @convention(c) (UnsafeMutableRawPointer?, Int) -> Void + +#if canImport(Darwin) +public typealias MallocZoneHook = @convention(c) (UnsafeMutablePointer?, Int) -> Void +public typealias MallocZoneFreeHook = @convention(c) (UnsafeMutablePointer?, UnsafeMutableRawPointer?) -> Void +public typealias MallocZoneCallocHook = @convention(c) (UnsafeMutablePointer?, Int, Int) -> Void +public typealias MallocZoneReallocHook = @convention(c) (UnsafeMutablePointer?, UnsafeMutableRawPointer?, Int) -> Void +public typealias MallocZoneVallocHook = @convention(c) (UnsafeMutablePointer?, Int) -> Void +public typealias MallocZoneMemalignHook = @convention(c) (UnsafeMutablePointer?, Int, Int) -> Void +#endif + +/// Main class for managing malloc interposition +public class MallocInterposerSwift: @unchecked Sendable { + /// We use `UnsafeAtomic` in order to avoid malloc calls during interposition + nonisolated(unsafe) private static var mallocCountStorage = UnsafeAtomic.Storage.init(0) + static let mallocCount = UnsafeAtomic.init(at: &mallocCountStorage) + nonisolated(unsafe) private static var mallocBytesCountStorage = UnsafeAtomic.Storage.init(0) + static let mallocBytesCount = UnsafeAtomic.init(at: &mallocBytesCountStorage) + nonisolated(unsafe) private static var freeCountStorage = UnsafeAtomic.Storage(0) + static let freeCount = UnsafeAtomic.init(at: &freeCountStorage) + /// Clear all counters + private static func clearAllCounters() { + mallocCount.store(0, ordering: .relaxed) + mallocBytesCount.store(0, ordering: .relaxed) + freeCount.store(0, ordering: .relaxed) + } + + private init() {} + + public static func hook() { + clearAllCounters() + + let mallocHook: MallocHook = { size in + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + } + + let freeHook: FreeHook = { pointer in + MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) + } + + let callocHook: CallocHook = { num, size in + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + } + + let reallocHook: ReallocHook = { pointer, size in + MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + } + + #if canImport(Darwin) + let mallocZoneHook: MallocZoneHook = { zone, size in + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + } + let mallocZoneFreeHook: MallocZoneFreeHook = { zone, pointer in + MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) + } + let mallocZoneCallocHook: MallocZoneCallocHook = { zone, num, size in + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: num * size, ordering: .relaxed) + } + let mallocZoneReallocHook: MallocZoneReallocHook = { zone, pointer, size in + MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + } + let mallocZoneVallocHook: MallocZoneVallocHook = { zone, size in + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + } + let mallocZoneMemalignHook: MallocZoneMemalignHook = { zone, alignment, size in + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + } + + set_malloc_zone_hook(mallocZoneHook) + set_malloc_zone_free_hook(mallocZoneFreeHook) + set_malloc_zone_calloc_hook(mallocZoneCallocHook) + set_malloc_zone_realloc_hook(mallocZoneReallocHook) + set_malloc_zone_valloc_hook(mallocZoneVallocHook) + set_malloc_zone_memalign_hook(mallocZoneMemalignHook) + #endif + + set_malloc_hook(mallocHook) + set_free_hook(freeHook) + set_calloc_hook(callocHook) + set_realloc_hook(reallocHook) + } + + public static func unhook() { + set_malloc_hook(nil) + set_free_hook(nil) + set_calloc_hook(nil) + set_realloc_hook(nil) + + #if canImport(Darwin) + set_malloc_zone_hook(nil) + set_malloc_zone_free_hook(nil) + set_malloc_zone_calloc_hook(nil) + set_malloc_zone_realloc_hook(nil) + set_malloc_zone_valloc_hook(nil) + set_malloc_zone_memalign_hook(nil) + #endif + } + + public static func getStatistics() -> Statistics { + let stats = Statistics( + mallocCount: mallocCount.load(ordering: .relaxed), + mallocBytesCount: mallocBytesCount.load(ordering: .relaxed), + freeCount: freeCount.load(ordering: .relaxed) + ) + + return stats + } +} + +public extension MallocInterposerSwift { + struct Statistics { + public let mallocCount: Int + public let mallocBytesCount: Int + public let freeCount: Int + + public init(mallocCount: Int, mallocBytesCount: Int, freeCount: Int) { + self.mallocCount = mallocCount + self.mallocBytesCount = mallocBytesCount + self.freeCount = freeCount + } + } +} diff --git a/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift b/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift new file mode 100644 index 00000000..3433ca99 --- /dev/null +++ b/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift @@ -0,0 +1,30 @@ +import Foundation +import MallocInterposerSwift +import MallocInterposerC + +@main +enum TestClient { + @_optimize(none) + static func main() { + print("=== MallocInterposerSwift Test ===" ) + // Reset statistics to start clean + MallocInterposerSwift.hook() + + let ptr = malloc(1000) + let ptr2 = malloc(500) + + free(ptr) + free(ptr2) + + MallocInterposerSwift.unhook() + + // Print final statistics + let stats = MallocInterposerSwift.getStatistics() + + print("Total malloc count: \(stats.mallocCount)") + print("Total allocated memory: \(stats.mallocBytesCount) bytes") + print("Total free count: \(stats.freeCount)") + + print("\n--- Test complete ---") + } +} diff --git a/Package.resolved b/Package.resolved index 9f4c1b20..4b021085 100644 --- a/Package.resolved +++ b/Package.resolved @@ -9,22 +9,13 @@ "version" : "0.1.3" } }, - { - "identity" : "package-jemalloc", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-jemalloc.git", - "state" : { - "revision" : "e8a5db026963f5bfeac842d9d3f2cc8cde323b49", - "version" : "1.0.0" - } - }, { "identity" : "swift-argument-parser", "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-argument-parser.git", "state" : { - "revision" : "41982a3656a71c768319979febd796c6fd111d5c", - "version" : "1.5.0" + "revision" : "309a47b2b1d9b5e991f36961c983ecec72275be3", + "version" : "1.6.1" } }, { @@ -32,8 +23,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-atomics.git", "state" : { - "revision" : "cd142fd2f64be2100422d658e7411e39489da985", - "version" : "1.2.0" + "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", + "version" : "1.3.0" } }, { @@ -50,8 +41,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-system.git", "state" : { - "revision" : "a34201439c74b53f0fd71ef11741af7e7caf01e1", - "version" : "1.4.2" + "revision" : "890830fff1a577dc83134890c7984020c5f6b43b", + "version" : "1.6.2" } }, { diff --git a/Package.swift b/Package.swift index e6790598..2da8394d 100644 --- a/Package.swift +++ b/Package.swift @@ -2,11 +2,6 @@ import PackageDescription -import class Foundation.ProcessInfo - -// If the environment variable BENCHMARK_DISABLE_JEMALLOC is set, we'll build the package without Jemalloc support -let disableJemalloc = ProcessInfo.processInfo.environment["BENCHMARK_DISABLE_JEMALLOC"] - let package = Package( name: "Benchmark", platforms: [ @@ -27,6 +22,7 @@ let package = Package( .package(url: "https://github.com/ordo-one/TextTable.git", .upToNextMajor(from: "0.0.1")), .package(url: "https://github.com/HdrHistogram/hdrhistogram-swift.git", .upToNextMajor(from: "0.1.0")), .package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.0.0")), + .package(path: "LocalPackages/MallocInterposerSwift") ], targets: [ // Plugins used by users of the package @@ -115,23 +111,8 @@ let package = Package( ), ] ) -// Check if this is a SPI build, then we need to disable jemalloc for macOS - -let macOSSPIBuild: Bool // Disables jemalloc for macOS SPI builds as the infrastructure doesn't have jemalloc there - -#if canImport(Darwin) -if let spiBuildEnvironment = ProcessInfo.processInfo.environment["SPI_BUILD"], spiBuildEnvironment == "1" { - macOSSPIBuild = true - print("Building for SPI@macOS, disabling Jemalloc") -} else { - macOSSPIBuild = false -} -#else -macOSSPIBuild = false -#endif // Add Benchmark target dynamically - // Shared dependencies var dependencies: [PackageDescription.Target.Dependency] = [ .product(name: "Histogram", package: "hdrhistogram-swift"), @@ -142,19 +123,7 @@ var dependencies: [PackageDescription.Target.Dependency] = [ .product(name: "Atomics", package: "swift-atomics"), "SwiftRuntimeHooks", "BenchmarkShared", + "MallocInterposerSwift" ] -if macOSSPIBuild == false { // jemalloc always disable for macOSSPIBuild - if let disableJemalloc, disableJemalloc != "false", disableJemalloc != "0" { - print("Jemalloc disabled through environment variable.") - } else { - package.dependencies += [ - .package(url: "https://github.com/ordo-one/package-jemalloc.git", .upToNextMajor(from: "1.0.0")) - ] - dependencies += [ - .product(name: "jemalloc", package: "package-jemalloc", condition: .when(platforms: [.macOS, .linux])) - ] - } -} - package.targets += [.target(name: "Benchmark", dependencies: dependencies)] diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index 99e07086..5fea4f23 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -11,6 +11,7 @@ #if canImport(OSLog) import OSLog #endif +import MallocInterposerSwift // swiftlint:disable file_length @@ -25,8 +26,11 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length // swiftlint:disable cyclomatic_complexity function_body_length func run(_ benchmark: Benchmark) -> [BenchmarkResult] { var wallClockDuration: Duration = .zero - var startMallocStats = MallocStats() - var stopMallocStats = MallocStats() + var _mallocStats = MallocInterposerSwift.Statistics( + mallocCount: 0, + mallocBytesCount: 0, + freeCount: 0 + ) var startOperatingSystemStats = OperatingSystemStats() var stopOperatingSystemStats = OperatingSystemStats() var startPerformanceCounters = PerformanceCounters() @@ -106,9 +110,6 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length var iterations = 0 let initialStartTime = BenchmarkClock.now - // 'Warmup' to remove initial mallocs from stats in p100 - _ = MallocStatsProducer.makeMallocStats() // baselineMallocStats - // Calculate typical sys call check overhead and deduct that to get 'clean' stats for the actual benchmark var operatingSystemStatsOverhead = OperatingSystemStats() var baselinePeakMemoryResidentDelta = 0 @@ -154,7 +155,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length #endif if mallocStatsRequested { - startMallocStats = MallocStatsProducer.makeMallocStats() + MallocInterposerSwift.hook() } if arcStatsRequested { @@ -191,7 +192,8 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { - stopMallocStats = MallocStatsProducer.makeMallocStats() + MallocInterposerSwift.unhook() + _mallocStats = MallocInterposerSwift.getStatistics() } #if canImport(OSLog) @@ -239,21 +241,15 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { - delta = stopMallocStats.mallocCountTotal - startMallocStats.mallocCountTotal - statistics[BenchmarkMetric.mallocCountTotal.index].add(Int(delta)) - - delta = stopMallocStats.mallocCountSmall - startMallocStats.mallocCountSmall - statistics[BenchmarkMetric.mallocCountSmall.index].add(Int(delta)) + statistics[BenchmarkMetric.mallocCountTotal.index].add(Int(_mallocStats.mallocCount)) - delta = stopMallocStats.mallocCountLarge - startMallocStats.mallocCountLarge - statistics[BenchmarkMetric.mallocCountLarge.index].add(Int(delta)) + statistics[BenchmarkMetric.freeCountTotal.index].add(Int(_mallocStats.freeCount)) - delta = stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory - statistics[BenchmarkMetric.memoryLeaked.index].add(Int(delta)) + // TODO: figure out how to get resident memory +// delta = stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory +// statistics[BenchmarkMetric.memoryLeaked.index].add(Int(delta)) - // delta = stopMallocStats.allocatedResidentMemory - baselineMallocStats.allocatedResidentMemory // baselineMallocStats! - statistics[BenchmarkMetric.allocatedResidentMemory.index] - .add(Int(stopMallocStats.allocatedResidentMemory)) + statistics[BenchmarkMetric.mallocBytesCount.index].add(Int(_mallocStats.mallocBytesCount)) } if operatingSystemStatsRequested { diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift index 6ec46b88..c95126b8 100644 --- a/Sources/Benchmark/BenchmarkMetric+Defaults.swift +++ b/Sources/Benchmark/BenchmarkMetric+Defaults.swift @@ -30,43 +30,42 @@ public extension BenchmarkMetric { /// There is also an convenience extension on Array defined such that you can write just `.default` rather than `BenchmarkMetric.default` /// static var `default`: [BenchmarkMetric] { - [ - .wallClock, - .cpuTotal, - .mallocCountTotal, - .throughput, - .instructions, - .peakMemoryResident, - ] + [.wallClock, + .cpuTotal, + .mallocCountTotal, + .freeCountTotal, + .mallocBytesCount, + .throughput, + .instructions, + .peakMemoryResident] } /// A collection of extended system benchmarks. static var extended: [BenchmarkMetric] { - [ - .wallClock, - .cpuUser, - .cpuTotal, - .mallocCountTotal, - .throughput, - .peakMemoryResident, - .memoryLeaked, - .syscalls, - .instructions, - ] + [.wallClock, + .cpuUser, + .cpuTotal, + .mallocCountTotal, + .freeCountTotal, + .mallocBytesCount, + .throughput, + .peakMemoryResident, + .memoryLeaked, + .syscalls, + .instructions] } /// A collection of memory benchmarks. static var memory: [BenchmarkMetric] { - [ - .peakMemoryResident, - .peakMemoryResidentDelta, - .peakMemoryVirtual, - .mallocCountSmall, - .mallocCountLarge, - .mallocCountTotal, - .memoryLeaked, - .allocatedResidentMemory, - ] + [.peakMemoryResident, + .peakMemoryResidentDelta, + .peakMemoryVirtual, + .mallocCountSmall, + .mallocCountLarge, + .mallocCountTotal, + .mallocBytesCount, + .memoryLeaked, + .allocatedResidentMemory] } /// A collection of ARC metrics @@ -105,36 +104,36 @@ public extension BenchmarkMetric { /// A collection of all benchmarks supported by this library. static var all: [BenchmarkMetric] { - [ - .cpuUser, - .cpuSystem, - .cpuTotal, - .wallClock, - .throughput, - .peakMemoryResident, - .peakMemoryResidentDelta, - .peakMemoryVirtual, - .mallocCountSmall, - .mallocCountLarge, - .mallocCountTotal, - .memoryLeaked, - .syscalls, - .contextSwitches, - .threads, - .threadsRunning, - .readSyscalls, - .writeSyscalls, - .readBytesLogical, - .writeBytesLogical, - .readBytesPhysical, - .writeBytesPhysical, - .instructions, - .allocatedResidentMemory, - .objectAllocCount, - .retainCount, - .releaseCount, - .retainReleaseDelta, - ] + [.cpuUser, + .cpuSystem, + .cpuTotal, + .wallClock, + .throughput, + .peakMemoryResident, + .peakMemoryResidentDelta, + .peakMemoryVirtual, + .mallocCountSmall, + .mallocCountLarge, + .mallocCountTotal, + .freeCountTotal, + .mallocBytesCount, + .memoryLeaked, + .syscalls, + .contextSwitches, + .threads, + .threadsRunning, + .readSyscalls, + .writeSyscalls, + .readBytesLogical, + .writeBytesLogical, + .readBytesPhysical, + .writeBytesPhysical, + .instructions, + .allocatedResidentMemory, + .objectAllocCount, + .retainCount, + .releaseCount, + .retainReleaseDelta] } } diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index b5d06096..0154eb13 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -37,6 +37,10 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib case mallocCountLarge /// Number of small+large mallocs case mallocCountTotal + /// Number of totatl free calls + case freeCountTotal + /// The amount of memory allocated in bytes through malloc calls + case mallocBytesCount /// The amount of allocated resident memory according to the memory allocator /// by the application (does not include metadata overhead etc) case allocatedResidentMemory @@ -175,6 +179,8 @@ public extension BenchmarkMetric { return "Malloc (large)" case .mallocCountTotal: return "Malloc (total)" + case .mallocBytesCount: + return "Malloc (bytes total)" case .allocatedResidentMemory: return "Memory (allocated resident)" case .memoryLeaked: @@ -215,6 +221,8 @@ public extension BenchmarkMetric { return "Δ %" case let .custom(name, _, _): return name + case .freeCountTotal: + return "Free (total)" } } @@ -244,47 +252,51 @@ public extension BenchmarkMetric { return 10 case .mallocCountTotal: return 11 - case .allocatedResidentMemory: + case .mallocBytesCount: return 12 - case .memoryLeaked: + case .allocatedResidentMemory: return 13 - case .syscalls: + case .memoryLeaked: return 14 - case .contextSwitches: + case .syscalls: return 15 - case .threads: + case .contextSwitches: return 16 - case .threadsRunning: + case .threads: return 17 - case .readSyscalls: + case .threadsRunning: return 18 - case .writeSyscalls: + case .readSyscalls: return 19 - case .readBytesLogical: + case .writeSyscalls: return 20 - case .writeBytesLogical: + case .readBytesLogical: return 21 - case .readBytesPhysical: + case .writeBytesLogical: return 22 - case .writeBytesPhysical: + case .readBytesPhysical: return 23 - case .objectAllocCount: + case .writeBytesPhysical: return 24 - case .retainCount: + case .objectAllocCount: return 25 - case .releaseCount: + case .retainCount: return 26 - case .retainReleaseDelta: + case .releaseCount: return 27 - case .instructions: + case .retainReleaseDelta: return 28 + case .instructions: + return 29 + case .freeCountTotal: + return 30 default: return 0 // custom payloads must be stored in dictionary } } @_documentation(visibility: internal) - static var maxIndex: Int { 28 } // + static var maxIndex: Int { 30 } // // Used by the Benchmark Executor for efficient indexing into results @_documentation(visibility: internal) @@ -313,39 +325,43 @@ public extension BenchmarkMetric { case 11: return .mallocCountTotal case 12: - return .allocatedResidentMemory + return .mallocBytesCount case 13: - return .memoryLeaked + return .allocatedResidentMemory case 14: - return .syscalls + return .memoryLeaked case 15: - return .contextSwitches + return .syscalls case 16: - return .threads + return .contextSwitches case 17: - return .threadsRunning + return .threads case 18: - return .readSyscalls + return .threadsRunning case 19: - return .writeSyscalls + return .readSyscalls case 20: - return .readBytesLogical + return .writeSyscalls case 21: - return .writeBytesLogical + return .readBytesLogical case 22: - return .readBytesPhysical + return .writeBytesLogical case 23: - return .writeBytesPhysical + return .readBytesPhysical case 24: - return .objectAllocCount + return .writeBytesPhysical case 25: - return .retainCount + return .objectAllocCount case 26: - return .releaseCount + return .retainCount case 27: - return .retainReleaseDelta + return .releaseCount case 28: + return .retainReleaseDelta + case 29: return .instructions + case 30: + return .freeCountTotal default: break } @@ -379,6 +395,8 @@ public extension BenchmarkMetric { return "mallocCountLarge" case .mallocCountTotal: return "mallocCountTotal" + case .mallocBytesCount: + return "mallocBytesCount" case .allocatedResidentMemory: return "allocatedResidentMemory" case .memoryLeaked: @@ -419,6 +437,8 @@ public extension BenchmarkMetric { return "Δ %" case let .custom(name, _, _): return name + case .freeCountTotal: + return "freeCountTotal" } } } @@ -451,6 +471,8 @@ public extension BenchmarkMetric { self = BenchmarkMetric.mallocCountLarge case "mallocCountTotal": self = BenchmarkMetric.mallocCountTotal + case "mallocBytesCount": + self = BenchmarkMetric.mallocBytesCount case "allocatedResidentMemory": self = BenchmarkMetric.allocatedResidentMemory case "memoryLeaked": @@ -485,6 +507,8 @@ public extension BenchmarkMetric { self = BenchmarkMetric.releaseCount case "retainReleaseDelta": self = BenchmarkMetric.retainReleaseDelta + case "freeCountTotal": + self = BenchmarkMetric.freeCountTotal default: self = BenchmarkMetric.custom(argument) } diff --git a/Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift b/Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift deleted file mode 100644 index 38e34761..00000000 --- a/Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift +++ /dev/null @@ -1,363 +0,0 @@ -// swiftlint:disable all -// -// Copyright (c) 2022 Ordo One AB. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// - -// This file was generated from JSON Schema using quicktype, do not modify it directly. - -// Generated using https://app.quicktype.io with paired down output from -// let optionString = "J" -// malloc_stats_print(nil, nil, optionString) - -// MARK: - Pokedex - -struct Pokedex: Codable { - let jemalloc: Jemalloc -} - -// MARK: - Jemalloc - -struct Jemalloc: Codable { - let version: String - let config: Config - let opt: Opt - let arenas: Arenas - let stats: Stats - let statsArenas: StatsArenas - - enum CodingKeys: String, CodingKey { - case version, config, opt, arenas, stats - case statsArenas = "stats.arenas" - } -} - -// MARK: - Arenas - -struct Arenas: Codable { - let narenas, dirtyDecayMS, muzzyDecayMS, quantum: Int - let page, tcacheMax, nbins, nhbins: Int - let bin: [ArenasBin] - let nlextents: Int - let lextent: [ArenasLextent] - - enum CodingKeys: String, CodingKey { - case narenas - case dirtyDecayMS = "dirty_decay_ms" - case muzzyDecayMS = "muzzy_decay_ms" - case quantum, page - case tcacheMax = "tcache_max" - case nbins, nhbins, bin, nlextents, lextent - } -} - -// MARK: - ArenasBin - -struct ArenasBin: Codable { - let size, nregs, slabSize, nshards: Int - - enum CodingKeys: String, CodingKey { - case size, nregs - case slabSize = "slab_size" - case nshards - } -} - -// MARK: - ArenasLextent - -struct ArenasLextent: Codable { - let size: Double -} - -// MARK: - Config - -struct Config: Codable { - let cacheOblivious, debug, fill, lazyLock: Bool - let mallocConf: String - let optSafetyChecks, prof, profLibgcc, profLibunwind: Bool - let stats, utrace, xmalloc: Bool - - enum CodingKeys: String, CodingKey { - case cacheOblivious = "cache_oblivious" - case debug, fill - case lazyLock = "lazy_lock" - case mallocConf = "malloc_conf" - case optSafetyChecks = "opt_safety_checks" - case prof - case profLibgcc = "prof_libgcc" - case profLibunwind = "prof_libunwind" - case stats, utrace, xmalloc - } -} - -// MARK: - Opt - -struct Opt: Codable { - let abort, abortConf, cacheOblivious, confirmConf: Bool - let retain: Bool - let dss: String - let narenas: Int - let percpuArena: String - let oversizeThreshold: Int - let hpa: Bool - let hpaSlabMaxAlloc, hpaHugificationThreshold, hpaHugifyDelayMS, hpaMinPurgeIntervalMS: Int - let hpaDirtyMult: String - let hpaSECNshards, hpaSECMaxAlloc, hpaSECMaxBytes, hpaSECBytesAfterFlush: Int - let hpaSECBatchFillExtra: Int - let metadataThp: String - let mutexMaxSpin, dirtyDecayMS, muzzyDecayMS, lgExtentMaxActiveFit: Int - let junk: String - let zero, experimentalInfallibleNew, tcache: Bool - let tcacheMax, tcacheNslotsSmallMin, tcacheNslotsSmallMax, tcacheNslotsLarge: Int - let lgTcacheNslotsMul, tcacheGcIncrBytes, tcacheGcDelayBytes, lgTcacheFlushSmallDiv: Int - let lgTcacheFlushLargeDiv: Int - let thp: String - let statsPrint: Bool - let statsPrintOpts: String - let statsInterval: Int - let statsIntervalOpts, zeroRealloc: String - - enum CodingKeys: String, CodingKey { - case abort - case abortConf = "abort_conf" - case cacheOblivious = "cache_oblivious" - case confirmConf = "confirm_conf" - case retain, dss, narenas - case percpuArena = "percpu_arena" - case oversizeThreshold = "oversize_threshold" - case hpa - case hpaSlabMaxAlloc = "hpa_slab_max_alloc" - case hpaHugificationThreshold = "hpa_hugification_threshold" - case hpaHugifyDelayMS = "hpa_hugify_delay_ms" - case hpaMinPurgeIntervalMS = "hpa_min_purge_interval_ms" - case hpaDirtyMult = "hpa_dirty_mult" - case hpaSECNshards = "hpa_sec_nshards" - case hpaSECMaxAlloc = "hpa_sec_max_alloc" - case hpaSECMaxBytes = "hpa_sec_max_bytes" - case hpaSECBytesAfterFlush = "hpa_sec_bytes_after_flush" - case hpaSECBatchFillExtra = "hpa_sec_batch_fill_extra" - case metadataThp = "metadata_thp" - case mutexMaxSpin = "mutex_max_spin" - case dirtyDecayMS = "dirty_decay_ms" - case muzzyDecayMS = "muzzy_decay_ms" - case lgExtentMaxActiveFit = "lg_extent_max_active_fit" - case junk, zero - case experimentalInfallibleNew = "experimental_infallible_new" - case tcache - case tcacheMax = "tcache_max" - case tcacheNslotsSmallMin = "tcache_nslots_small_min" - case tcacheNslotsSmallMax = "tcache_nslots_small_max" - case tcacheNslotsLarge = "tcache_nslots_large" - case lgTcacheNslotsMul = "lg_tcache_nslots_mul" - case tcacheGcIncrBytes = "tcache_gc_incr_bytes" - case tcacheGcDelayBytes = "tcache_gc_delay_bytes" - case lgTcacheFlushSmallDiv = "lg_tcache_flush_small_div" - case lgTcacheFlushLargeDiv = "lg_tcache_flush_large_div" - case thp - case statsPrint = "stats_print" - case statsPrintOpts = "stats_print_opts" - case statsInterval = "stats_interval" - case statsIntervalOpts = "stats_interval_opts" - case zeroRealloc = "zero_realloc" - } -} - -// MARK: - Stats - -struct Stats: Codable { - let allocated, active, metadata, metadataThp: Int - let resident, mapped, retained, zeroReallocs: Int - let backgroundThread: StatsBackgroundThread - let mutexes: Mutexes - - enum CodingKeys: String, CodingKey { - case allocated, active, metadata - case metadataThp = "metadata_thp" - case resident, mapped, retained - case zeroReallocs = "zero_reallocs" - case backgroundThread = "background_thread" - case mutexes - } -} - -// MARK: - StatsBackgroundThread - -struct StatsBackgroundThread: Codable { - let numThreads, numRuns, runInterval: Int - - enum CodingKeys: String, CodingKey { - case numThreads = "num_threads" - case numRuns = "num_runs" - case runInterval = "run_interval" - } -} - -// MARK: - Mutexes - -struct Mutexes: Codable { - let backgroundThread, maxPerBgThd, ctl, prof: BackgroundThreadValue - let profThdsData, profDump, profRecentAlloc, profRecentDump: BackgroundThreadValue - let profStats: BackgroundThreadValue - - enum CodingKeys: String, CodingKey { - case backgroundThread = "background_thread" - case maxPerBgThd = "max_per_bg_thd" - case ctl, prof - case profThdsData = "prof_thds_data" - case profDump = "prof_dump" - case profRecentAlloc = "prof_recent_alloc" - case profRecentDump = "prof_recent_dump" - case profStats = "prof_stats" - } -} - -// MARK: - BackgroundThreadValue - -struct BackgroundThreadValue: Codable { - let numOps, numWait, numSpinAcq, numOwnerSwitch: Int - let totalWaitTime, maxWaitTime, maxNumThds: Int - - enum CodingKeys: String, CodingKey { - case numOps = "num_ops" - case numWait = "num_wait" - case numSpinAcq = "num_spin_acq" - case numOwnerSwitch = "num_owner_switch" - case totalWaitTime = "total_wait_time" - case maxWaitTime = "max_wait_time" - case maxNumThds = "max_num_thds" - } -} - -// MARK: - StatsArenas - -struct StatsArenas: Codable { - let merged: Merged -} - -// MARK: - Merged - -struct Merged: Codable { - let nthreads, uptimeNS: Int - let dss: String - let dirtyDecayMS, muzzyDecayMS, pactive, pdirty: Int - let pmuzzy, dirtyNpurge, dirtyNmadvise, dirtyPurged: Int - let muzzyNpurge, muzzyNmadvise, muzzyPurged: Int - let small, large: Large - let mapped, retained, base, mergedInternal: Int - let metadataThp, tcacheBytes, tcacheStashedBytes, resident: Int - let abandonedVM, extentAvail: Int - let mutexes: [String: BackgroundThreadValue] - let bins: [MergedBin] - let lextents: [MergedLextent] - let extents: [Extent] - let secBytes: Int - let hpaShard: HpaShard - - enum CodingKeys: String, CodingKey { - case nthreads - case uptimeNS = "uptime_ns" - case dss - case dirtyDecayMS = "dirty_decay_ms" - case muzzyDecayMS = "muzzy_decay_ms" - case pactive, pdirty, pmuzzy - case dirtyNpurge = "dirty_npurge" - case dirtyNmadvise = "dirty_nmadvise" - case dirtyPurged = "dirty_purged" - case muzzyNpurge = "muzzy_npurge" - case muzzyNmadvise = "muzzy_nmadvise" - case muzzyPurged = "muzzy_purged" - case small, large, mapped, retained, base - case mergedInternal = "internal" - case metadataThp = "metadata_thp" - case tcacheBytes = "tcache_bytes" - case tcacheStashedBytes = "tcache_stashed_bytes" - case resident - case abandonedVM = "abandoned_vm" - case extentAvail = "extent_avail" - case mutexes, bins, lextents, extents - case secBytes = "sec_bytes" - case hpaShard = "hpa_shard" - } -} - -// MARK: - MergedBin - -struct MergedBin: Codable { - let nmalloc, ndalloc, curregs, nrequests: Int - let nfills, nflushes, nreslabs, curslabs: Int - let nonfullSlabs: Int - let mutex: BackgroundThreadValue - - enum CodingKeys: String, CodingKey { - case nmalloc, ndalloc, curregs, nrequests, nfills, nflushes, nreslabs, curslabs - case nonfullSlabs = "nonfull_slabs" - case mutex - } -} - -// MARK: - Extent - -struct Extent: Codable { - let ndirty, nmuzzy, nretained, dirtyBytes: Int - let muzzyBytes, retainedBytes: Int - - enum CodingKeys: String, CodingKey { - case ndirty, nmuzzy, nretained - case dirtyBytes = "dirty_bytes" - case muzzyBytes = "muzzy_bytes" - case retainedBytes = "retained_bytes" - } -} - -// MARK: - HpaShard - -struct HpaShard: Codable { - let npurgePasses, npurges, nhugifies, ndehugifies: Int - let fullSlabs, emptySlabs: EmptySlabs - let nonfullSlabs: [EmptySlabs] - - enum CodingKeys: String, CodingKey { - case npurgePasses = "npurge_passes" - case npurges, nhugifies, ndehugifies - case fullSlabs = "full_slabs" - case emptySlabs = "empty_slabs" - case nonfullSlabs = "nonfull_slabs" - } -} - -// MARK: - EmptySlabs - -struct EmptySlabs: Codable { - let npageslabsHuge, nactiveHuge, npageslabsNonhuge, nactiveNonhuge: Int - let ndirtyNonhuge: Int - let ndirtyHuge: Int? - - enum CodingKeys: String, CodingKey { - case npageslabsHuge = "npageslabs_huge" - case nactiveHuge = "nactive_huge" - case npageslabsNonhuge = "npageslabs_nonhuge" - case nactiveNonhuge = "nactive_nonhuge" - case ndirtyNonhuge = "ndirty_nonhuge" - case ndirtyHuge = "ndirty_huge" - } -} - -// MARK: - Large - -struct Large: Codable { - let allocated, nmalloc, ndalloc, nrequests: Int - let nfills, nflushes: Int -} - -// MARK: - MergedLextent - -struct MergedLextent: Codable { - let curlextents: Int -} - -// swiftlint:enable all diff --git a/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift b/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift index 11ab6f83..e2562304 100644 --- a/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift +++ b/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift @@ -60,6 +60,7 @@ final class OperatingSystemAndMallocTests: XCTestCase { blackHole(operatingSystemStatsProducer.metricSupported(.throughput)) } +<<<<<<< HEAD #if canImport(jemalloc) func testMallocProducerLeaks() throws { let startMallocStats = MallocStatsProducer.makeMallocStats() @@ -78,6 +79,8 @@ final class OperatingSystemAndMallocTests: XCTestCase { } #endif +======= +>>>>>>> 66b6a42 (feat(major): [sc-23696] replace jemalloc with custom malloc interposer) func testARCStatsProducer() throws { let array = [3] ARCStatsProducer.hook() From 2ee5b648942f2b07b25de3e4dbb49d728a195d5e Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Wed, 27 Aug 2025 20:11:12 +0200 Subject: [PATCH 02/37] linux fix --- .../MallocInterposerC/include/interposer.h | 17 +++++++++++--- .../MallocInterposerC/src/interposer-unix.c | 23 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h index 09a13dac..ffa42718 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h @@ -1,5 +1,5 @@ -#ifndef HOOKED_FREE -#define HOOKED_FREE +#ifndef INTERPOSER_H +#define INTERPOSER_H #include #include @@ -25,7 +25,6 @@ typedef void (*malloc_zone_valloc_hook_t)(malloc_zone_t *zone, size_t size); typedef void (*malloc_zone_free_hook_t)(malloc_zone_t *zone, void *ptr); #endif - // Hook management functions void set_malloc_hook(malloc_hook_t hook); void set_free_hook(free_hook_t hook); @@ -64,6 +63,18 @@ void *replacement_reallocf(void *ptr, size_t size); void *replacement_valloc(size_t size); int replacement_posix_memalign(void **memptr, size_t alignment, size_t size); +// On Linux we use LD_PRELOAD to interpose the standard malloc functions +// and we have to declare them ourselves +#if !__APPLE__ +void free(void *ptr); +void *malloc(size_t size); +void *calloc(size_t nmemb, size_t size); +void *realloc(void *ptr, size_t size); +void *reallocf(void *ptr, size_t size); +void *valloc(size_t size); +int posix_memalign(void **memptr, size_t alignment, size_t size); +#endif + #if __APPLE__ void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size); diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c index 319a4e22..a13ba0c8 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -286,4 +287,26 @@ int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { return 1; } } + +void free(void *ptr) { + replacement_free(ptr); +} +void *malloc(size_t size) { + return replacement_malloc(size); +} +void *calloc(size_t nmemb, size_t size) { + return replacement_calloc(nmemb, size); +} +void *realloc(void *ptr, size_t size) { + return replacement_realloc(ptr, size); +} +void *reallocf(void *ptr, size_t size) { + return replacement_reallocf(ptr, size); +} +void *valloc(size_t size) { + return replacement_valloc(size); +} +int posix_memalign(void **memptr, size_t alignment, size_t size) { + return replacement_posix_memalign(memptr, alignment, size); +} #endif From 5d4506122bbcd38baab050cc5b1e23a77b40870c Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 12:21:02 +0200 Subject: [PATCH 03/37] drop malloc small & large --- .../BenchmarkExecutor+Extensions.swift | 6 +- Sources/Benchmark/BenchmarkExecutor.swift | 5 +- .../Benchmark/BenchmarkMetric+Defaults.swift | 5 +- Sources/Benchmark/BenchmarkMetric.swift | 108 +++++++----------- 4 files changed, 47 insertions(+), 77 deletions(-) diff --git a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift index 559741f2..f1ee9568 100644 --- a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift +++ b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift @@ -23,16 +23,14 @@ extension BenchmarkExecutor { extension BenchmarkExecutor { func mallocStatsProducerNeeded(_ metric: BenchmarkMetric) -> Bool { switch metric { - case .mallocCountLarge: - return true case .memoryLeaked: return true - case .mallocCountSmall: - return true case .mallocCountTotal: return true case .allocatedResidentMemory: return true + case .freeCountTotal: + return true default: return false } diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index 5fea4f23..e4b56ff5 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -245,9 +245,8 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length statistics[BenchmarkMetric.freeCountTotal.index].add(Int(_mallocStats.freeCount)) - // TODO: figure out how to get resident memory -// delta = stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory -// statistics[BenchmarkMetric.memoryLeaked.index].add(Int(delta)) + delta = _mallocStats.mallocCount - _mallocStats.freeCount + statistics[BenchmarkMetric.memoryLeaked.index].add(Int(delta)) statistics[BenchmarkMetric.mallocBytesCount.index].add(Int(_mallocStats.mallocBytesCount)) } diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift index c95126b8..f6988d66 100644 --- a/Sources/Benchmark/BenchmarkMetric+Defaults.swift +++ b/Sources/Benchmark/BenchmarkMetric+Defaults.swift @@ -35,6 +35,7 @@ public extension BenchmarkMetric { .mallocCountTotal, .freeCountTotal, .mallocBytesCount, + .memoryLeaked, .throughput, .instructions, .peakMemoryResident] @@ -60,8 +61,6 @@ public extension BenchmarkMetric { [.peakMemoryResident, .peakMemoryResidentDelta, .peakMemoryVirtual, - .mallocCountSmall, - .mallocCountLarge, .mallocCountTotal, .mallocBytesCount, .memoryLeaked, @@ -112,8 +111,6 @@ public extension BenchmarkMetric { .peakMemoryResident, .peakMemoryResidentDelta, .peakMemoryVirtual, - .mallocCountSmall, - .mallocCountLarge, .mallocCountTotal, .freeCountTotal, .mallocBytesCount, diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index 0154eb13..88566cd0 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -31,11 +31,7 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib case peakMemoryResidentDelta /// Measure virtual memory usage - sampled during runtime case peakMemoryVirtual - /// Number of small malloc calls - case mallocCountSmall - /// Number of large malloc calls - case mallocCountLarge - /// Number of small+large mallocs + /// Number of total mallocs case mallocCountTotal /// Number of totatl free calls case freeCountTotal @@ -124,7 +120,7 @@ public extension BenchmarkMetric { switch self { case .cpuSystem, .cpuTotal, .cpuUser, .wallClock: return true - case .mallocCountLarge, .mallocCountSmall, .mallocCountTotal, .memoryLeaked: + case .mallocCountTotal, .memoryLeaked: return true case .syscalls: return true @@ -173,10 +169,6 @@ public extension BenchmarkMetric { return "Memory Δ (resident peak)" case .peakMemoryVirtual: return "Memory (virtual peak)" - case .mallocCountSmall: - return "Malloc (small)" - case .mallocCountLarge: - return "Malloc (large)" case .mallocCountTotal: return "Malloc (total)" case .mallocBytesCount: @@ -246,57 +238,53 @@ public extension BenchmarkMetric { return 7 case .peakMemoryVirtual: return 8 - case .mallocCountSmall: + case .mallocCountTotal: return 9 - case .mallocCountLarge: + case .freeCountTotal: return 10 - case .mallocCountTotal: - return 11 case .mallocBytesCount: - return 12 + return 11 case .allocatedResidentMemory: - return 13 + return 12 case .memoryLeaked: - return 14 + return 13 case .syscalls: - return 15 + return 14 case .contextSwitches: - return 16 + return 15 case .threads: - return 17 + return 16 case .threadsRunning: - return 18 + return 17 case .readSyscalls: - return 19 + return 18 case .writeSyscalls: - return 20 + return 19 case .readBytesLogical: - return 21 + return 20 case .writeBytesLogical: - return 22 + return 21 case .readBytesPhysical: - return 23 + return 22 case .writeBytesPhysical: - return 24 + return 23 case .objectAllocCount: - return 25 + return 24 case .retainCount: - return 26 + return 25 case .releaseCount: - return 27 + return 26 case .retainReleaseDelta: - return 28 + return 27 case .instructions: - return 29 - case .freeCountTotal: - return 30 + return 28 default: return 0 // custom payloads must be stored in dictionary } } @_documentation(visibility: internal) - static var maxIndex: Int { 30 } // + static var maxIndex: Int { 28 } // // Used by the Benchmark Executor for efficient indexing into results @_documentation(visibility: internal) @@ -319,49 +307,45 @@ public extension BenchmarkMetric { case 8: return .peakMemoryVirtual case 9: - return .mallocCountSmall + return .mallocCountTotal case 10: - return .mallocCountLarge + return .freeCountTotal case 11: - return .mallocCountTotal - case 12: return .mallocBytesCount - case 13: + case 12: return .allocatedResidentMemory - case 14: + case 13: return .memoryLeaked - case 15: + case 14: return .syscalls - case 16: + case 15: return .contextSwitches - case 17: + case 16: return .threads - case 18: + case 17: return .threadsRunning - case 19: + case 18: return .readSyscalls - case 20: + case 19: return .writeSyscalls - case 21: + case 20: return .readBytesLogical - case 22: + case 21: return .writeBytesLogical - case 23: + case 22: return .readBytesPhysical - case 24: + case 23: return .writeBytesPhysical - case 25: + case 24: return .objectAllocCount - case 26: + case 25: return .retainCount - case 27: + case 26: return .releaseCount - case 28: + case 27: return .retainReleaseDelta - case 29: + case 28: return .instructions - case 30: - return .freeCountTotal default: break } @@ -389,10 +373,6 @@ public extension BenchmarkMetric { return "peakMemoryResidentDelta" case .peakMemoryVirtual: return "peakMemoryVirtual" - case .mallocCountSmall: - return "mallocCountSmall" - case .mallocCountLarge: - return "mallocCountLarge" case .mallocCountTotal: return "mallocCountTotal" case .mallocBytesCount: @@ -465,10 +445,6 @@ public extension BenchmarkMetric { self = BenchmarkMetric.peakMemoryResidentDelta case "peakMemoryVirtual": self = BenchmarkMetric.peakMemoryVirtual - case "mallocCountSmall": - self = BenchmarkMetric.mallocCountSmall - case "mallocCountLarge": - self = BenchmarkMetric.mallocCountLarge case "mallocCountTotal": self = BenchmarkMetric.mallocCountTotal case "mallocBytesCount": From d17af85e3b3955ab74a85604a6aadf6fefff42da Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:55:00 +0200 Subject: [PATCH 04/37] resolve --- .../BenchmarkCommandPlugin.swift | 56 +++++++++++++++++-- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift index c4f55e78..eb1963e7 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift @@ -12,6 +12,7 @@ // Running the `BenchmarkTool` for each benchmark target. import PackagePlugin +import Foundation #if canImport(Darwin) import Darwin @@ -149,6 +150,7 @@ import Glibc let packageBenchmarkIdentifier = "package-benchmark" let benchmarkToolName = "BenchmarkTool" let benchmarkTool: PackagePlugin.Path // = try context.tool(named: benchmarkToolName) + let interposerLib: PackagePlugin.Path var args: [String] = [ benchmarkToolName, @@ -360,11 +362,38 @@ import Glibc } // Build the BenchmarkTool manually in release mode to work around https://github.com/apple/swift-package-manager/issues/7210 - guard - let benchmarkToolModule = benchmarkToolModuleTargets.first(where: { - $0.kind == .executable && $0.name == benchmarkToolName - }) - else { + if let benchmarkToolModule = benchmarkToolModuleTargets.first(where: { $0.kind == .executable && $0.name == benchmarkToolName}) { + if outputFormat == .text { + if quietRunning == 0 { + print("Building \(benchmarkToolModule.name) in release mode...") + } + } + + var buildParameters = PackageManager.BuildParameters(configuration: .release) + + buildParameters.otherSwiftcFlags.append(contentsOf: otherSwiftFlagsSpecified.map { "-\($0)" }) + + let buildResult = try packageManager.build( + .product(benchmarkToolModule.name), + parameters: buildParameters + ) + + guard buildResult.succeeded else { + print(buildResult.logText) + print("Benchmark failed to build the BenchmarkTool in release mode.") + throw MyError.buildFailed + } + + let tool = buildResult.builtArtifacts.first(where: {$0.kind == .executable && $0.path.lastComponent == benchmarkToolName }) + let lib = buildResult.builtArtifacts.first(where: { $0.kind == .dynamicLibrary && $0.path.lastComponent.contains("libMallocInterposerC") }) + + guard let tool, let lib else { + throw MyError.buildFailed + } + + benchmarkTool = tool.path + interposerLib = lib.path + } else { print("Benchmark failed to find the BenchmarkTool target.") throw MyError.buildFailed } @@ -471,8 +500,25 @@ import Glibc return } + #if os(Linux) + var environment = ProcessInfo.processInfo.environment + environment["LD_PRELOAD"] = interposerLib.string + + let envp = environment.map { "\($0.key)=\($0.value)" }.map { $0.withCString(strdup) } + [nil] + defer { + for i in 0.. Date: Thu, 28 Aug 2025 13:44:41 +0200 Subject: [PATCH 05/37] fix linux env path --- .../BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift index eb1963e7..fa1f4b9a 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift @@ -150,7 +150,7 @@ import Glibc let packageBenchmarkIdentifier = "package-benchmark" let benchmarkToolName = "BenchmarkTool" let benchmarkTool: PackagePlugin.Path // = try context.tool(named: benchmarkToolName) - let interposerLib: PackagePlugin.Path + let interposerLib: String var args: [String] = [ benchmarkToolName, @@ -385,14 +385,13 @@ import Glibc } let tool = buildResult.builtArtifacts.first(where: {$0.kind == .executable && $0.path.lastComponent == benchmarkToolName }) - let lib = buildResult.builtArtifacts.first(where: { $0.kind == .dynamicLibrary && $0.path.lastComponent.contains("libMallocInterposerC") }) - guard let tool, let lib else { + guard let tool else { throw MyError.buildFailed } benchmarkTool = tool.path - interposerLib = lib.path + interposerLib = tool.path.removingLastComponent().appending(subpath: "libMallocInterposerC.so").string } else { print("Benchmark failed to find the BenchmarkTool target.") throw MyError.buildFailed @@ -502,7 +501,7 @@ import Glibc #if os(Linux) var environment = ProcessInfo.processInfo.environment - environment["LD_PRELOAD"] = interposerLib.string + environment["LD_PRELOAD"] = interposerLib let envp = environment.map { "\($0.key)=\($0.value)" }.map { $0.withCString(strdup) } + [nil] defer { From ba18ccd75861c84e18033c0ad6c5b1a845da9bc8 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:46:43 +0200 Subject: [PATCH 06/37] add comment --- Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift index fa1f4b9a..ae599ce5 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift @@ -499,6 +499,8 @@ import Glibc return } + // On Linux we need to set LD_PRELOAD to get the malloc interposer working + // while on Darwin this is done with DYLD interpose mechanism #if os(Linux) var environment = ProcessInfo.processInfo.environment environment["LD_PRELOAD"] = interposerLib From 3a3e0f249a7af4890a69db02315ad38b292ea13d Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:57:40 +0200 Subject: [PATCH 07/37] rebase with main --- .../BenchmarkCommandPlugin.swift | 33 ++----------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift index ae599ce5..3daa68af 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift @@ -362,37 +362,7 @@ import Glibc } // Build the BenchmarkTool manually in release mode to work around https://github.com/apple/swift-package-manager/issues/7210 - if let benchmarkToolModule = benchmarkToolModuleTargets.first(where: { $0.kind == .executable && $0.name == benchmarkToolName}) { - if outputFormat == .text { - if quietRunning == 0 { - print("Building \(benchmarkToolModule.name) in release mode...") - } - } - - var buildParameters = PackageManager.BuildParameters(configuration: .release) - - buildParameters.otherSwiftcFlags.append(contentsOf: otherSwiftFlagsSpecified.map { "-\($0)" }) - - let buildResult = try packageManager.build( - .product(benchmarkToolModule.name), - parameters: buildParameters - ) - - guard buildResult.succeeded else { - print(buildResult.logText) - print("Benchmark failed to build the BenchmarkTool in release mode.") - throw MyError.buildFailed - } - - let tool = buildResult.builtArtifacts.first(where: {$0.kind == .executable && $0.path.lastComponent == benchmarkToolName }) - - guard let tool else { - throw MyError.buildFailed - } - - benchmarkTool = tool.path - interposerLib = tool.path.removingLastComponent().appending(subpath: "libMallocInterposerC.so").string - } else { + guard let benchmarkToolModule = benchmarkToolModuleTargets.first(where: { $0.kind == .executable && $0.name == benchmarkToolName}) else { print("Benchmark failed to find the BenchmarkTool target.") throw MyError.buildFailed } @@ -426,6 +396,7 @@ import Glibc } benchmarkTool = tool.path + interposerLib = tool.path.removingLastComponent().appending(subpath: "libMallocInterposerC.so").string let filteredTargets = swiftSourceModuleTargets From 837209437ab7ca7bd03164ee2960abe27c7987af Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:58:13 +0200 Subject: [PATCH 08/37] remove dynamic test --- LocalPackages/DynamicTest/.gitignore | 8 ------- LocalPackages/DynamicTest/Package.swift | 22 ------------------- .../Sources/DynamicTest/DynamicTest.swift | 3 --- 3 files changed, 33 deletions(-) delete mode 100644 LocalPackages/DynamicTest/.gitignore delete mode 100644 LocalPackages/DynamicTest/Package.swift delete mode 100644 LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift diff --git a/LocalPackages/DynamicTest/.gitignore b/LocalPackages/DynamicTest/.gitignore deleted file mode 100644 index 0023a534..00000000 --- a/LocalPackages/DynamicTest/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.DS_Store -/.build -/Packages -xcuserdata/ -DerivedData/ -.swiftpm/configuration/registries.json -.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata -.netrc diff --git a/LocalPackages/DynamicTest/Package.swift b/LocalPackages/DynamicTest/Package.swift deleted file mode 100644 index 5f586fd5..00000000 --- a/LocalPackages/DynamicTest/Package.swift +++ /dev/null @@ -1,22 +0,0 @@ -// swift-tools-version: 6.1 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "DynamicTest", - products: [ - // Products define the executables and libraries a package produces, making them visible to other packages. - .library( - name: "DynamicTest", - type: .dynamic, - targets: ["DynamicTest"]), - ], - targets: [ - // Targets are the basic building blocks of a package, defining a module or a test suite. - // Targets can depend on other targets in this package and products from dependencies. - .target( - name: "DynamicTest" - ) - ] -) diff --git a/LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift b/LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift deleted file mode 100644 index 7876a26c..00000000 --- a/LocalPackages/DynamicTest/Sources/DynamicTest/DynamicTest.swift +++ /dev/null @@ -1,3 +0,0 @@ -public func dynamicTest() { - print("Dynamic Test") -} From 95c613c2ab4d20963b4bec68ae417f8846da6deb Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 13:59:18 +0200 Subject: [PATCH 09/37] run swift format --- .../Basic/BenchmarkRunner+Basic.swift | 2 +- .../Benchmarks/Histogram/Histogram.swift | 4 +- LocalPackages/MallocInterposerC/Package.swift | 4 +- .../MallocInterposerSwift/Package.swift | 6 +- .../MallocInterposerSwift.swift | 6 +- .../SwiftTestClient/SwiftTestClient.swift | 4 +- Package.swift | 4 +- .../BenchmarkBoilerplateGenerator.swift | 2 +- .../BenchmarkCommandPlugin.swift | 17 +-- .../BenchmarkTool+Baselines.swift | 44 +++---- .../BenchmarkTool+CreateBenchmark.swift | 16 +-- .../BenchmarkTool+Export+JMHFormatter.swift | 4 +- .../BenchmarkTool/BenchmarkTool+Machine.swift | 4 +- .../BenchmarkTool+Operations.swift | 6 +- .../BenchmarkTool+PrettyPrinting.swift | 4 +- ...chmarkTool+ReadP90AbsoluteThresholds.swift | 2 +- Plugins/BenchmarkTool/BenchmarkTool.swift | 12 +- .../BenchmarkTool/FilePath+Additions.swift | 2 +- .../Benchmark+ConvenienceInitializers.swift | 8 +- Sources/Benchmark/Benchmark.swift | 20 +-- Sources/Benchmark/BenchmarkClock.swift | 2 +- Sources/Benchmark/BenchmarkExecutor.swift | 21 ++-- Sources/Benchmark/BenchmarkInternals.swift | 8 +- .../Benchmark/BenchmarkMetric+Defaults.swift | 118 ++++++++++-------- Sources/Benchmark/BenchmarkMetric.swift | 10 +- Sources/Benchmark/BenchmarkResult.swift | 22 ++-- Sources/Benchmark/BenchmarkRunner.swift | 2 +- Sources/Benchmark/Blackhole.swift | 4 +- .../Benchmark/MallocStats/MallocStats.swift | 2 +- .../OperatingSystemStatsProducer+Darwin.swift | 4 +- .../OperatingSystemStatsProducer+Linux.swift | 8 +- .../Benchmark/Progress/ProgressElements.swift | 2 +- Sources/Benchmark/Statistics.swift | 18 +-- .../BenchmarkTests/BenchmarkRunnerTests.swift | 2 +- 34 files changed, 202 insertions(+), 192 deletions(-) diff --git a/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift b/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift index 3e7b4114..837c0cb1 100644 --- a/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift +++ b/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift @@ -125,7 +125,7 @@ let benchmarks: @Sendable () -> Void = { } } - let parameterization = (0...5).map { 1 << $0 } // 1, 2, 4, ... + let parameterization = (0...5).map { 1 << $0 } // 1, 2, 4, ... parameterization.forEach { count in Benchmark("Parameterized", configuration: .init(tags: ["count": count.description])) { benchmark in diff --git a/Benchmarks/Benchmarks/Histogram/Histogram.swift b/Benchmarks/Benchmarks/Histogram/Histogram.swift index 71fd3f26..e41cc7f1 100644 --- a/Benchmarks/Benchmarks/Histogram/Histogram.swift +++ b/Benchmarks/Benchmarks/Histogram/Histogram.swift @@ -35,7 +35,7 @@ let benchmarks: @Sendable () -> Void = { var histogram = Histogram(highestTrackableValue: maxValue, numberOfSignificantValueDigits: .three) - let numValues = 1_024 // so compiler can optimize modulo below + let numValues = 1_024 // so compiler can optimize modulo below let values = [UInt64]((0.. Void = { benchmark.startMeasurement() var histogram = Histogram(numberOfSignificantValueDigits: .three) - let numValues = 1_024 // so compiler can optimize modulo below + let numValues = 1_024 // so compiler can optimize modulo below let values = [UInt64]((0.. Bool { @@ -48,8 +48,8 @@ struct BenchmarkIdentifier: Codable, Hashable { self.name = name } - var target: String // The name of the executable benchmark target id - var name: String // The name of the benchmark + var target: String // The name of the executable benchmark target id + var name: String // The name of the benchmark public func hash(into hasher: inout Hasher) { hasher.combine(target) @@ -178,7 +178,7 @@ let baselinesDirectory: String = ".benchmarkBaselines" extension BenchmarkTool { func printAllBaselines() { var storagePath = FilePath(baselineStoragePath) - storagePath.append(baselinesDirectory) // package/.benchmarkBaselines + storagePath.append(baselinesDirectory) // package/.benchmarkBaselines for file in storagePath.directoryEntries { if file.ends(with: ".") == false, file.ends(with: "..") == false @@ -206,7 +206,7 @@ extension BenchmarkTool { var storagePath = FilePath(baselineStoragePath) let filemanager = FileManager.default - storagePath.append(baselinesDirectory) // package/.benchmarkBaselines + storagePath.append(baselinesDirectory) // package/.benchmarkBaselines for file in storagePath.directoryEntries { if file.ends(with: ".") == false, file.ends(with: "..") == false @@ -256,14 +256,14 @@ extension BenchmarkTool { /* We store the baselines in a .benchmarkBaselines directory, by default in the package root path unless otherwise specified. - + The 'default' folder is used when no specific named baseline have been specified with the command line. Specified 'named' baselines is useful for convenient A/B/C testing and comparisons. Unless a host identifier have been specified on the command line (or in an environment variable), we by default store results in 'results.json', otherwise we will use the environment variable or command line to optionally specify a 'hostIdentifier' that allow for separation between different hosts if checking in baselines in repos. - + .benchmarkBaselines ├── target1 │ ├── default @@ -284,14 +284,14 @@ extension BenchmarkTool { │ └── ... └── ... */ - var outputPath = FilePath(baselineStoragePath) // package - var subPath = FilePath() // subpath rooted in package used for directory creation + var outputPath = FilePath(baselineStoragePath) // package + var subPath = FilePath() // subpath rooted in package used for directory creation - subPath.append(baselinesDirectory) // package/.benchmarkBaselines - subPath.append("\(target)") // package/.benchmarkBaselines/myTarget1 - subPath.append(baselineName) // package/.benchmarkBaselines/myTarget1/named1 + subPath.append(baselinesDirectory) // package/.benchmarkBaselines + subPath.append("\(target)") // package/.benchmarkBaselines/myTarget1 + subPath.append(baselineName) // package/.benchmarkBaselines/myTarget1/named1 - outputPath.createSubPath(subPath) // Create destination subpath if needed + outputPath.createSubPath(subPath) // Create destination subpath if needed outputPath.append(subPath.components) @@ -348,13 +348,13 @@ extension BenchmarkTool { baselineIdentifier: String? = nil ) throws -> BenchmarkBaseline? { var path = FilePath(baselineStoragePath) - path.append(baselinesDirectory) // package/.benchmarkBaselines - path.append(FilePath.Component(target)!) // package/.benchmarkBaselines/myTarget1 + path.append(baselinesDirectory) // package/.benchmarkBaselines + path.append(FilePath.Component(target)!) // package/.benchmarkBaselines/myTarget1 if let baselineIdentifier { - path.append(baselineIdentifier) // package/.benchmarkBaselines/myTarget1/named1 + path.append(baselineIdentifier) // package/.benchmarkBaselines/myTarget1/named1 } else { - path.append("default") // // package/.benchmarkBaselines/myTarget1/default + path.append("default") // // package/.benchmarkBaselines/myTarget1/default } if let hostIdentifier { @@ -376,7 +376,7 @@ extension BenchmarkTool { let bufferSize = 16 * 1_024 * 1_024 var done = false - while done == false { // readBytes.count < bufferLength { + while done == false { // readBytes.count < bufferLength { let nextBytes = try [UInt8](unsafeUninitializedCapacity: bufferSize) { buf, count in count = try fd.read(into: UnsafeMutableRawBufferPointer(buf)) if count == 0 { @@ -396,7 +396,7 @@ extension BenchmarkTool { print("Failed to close fd for \(path) after reading.") } } catch { - if errno != ENOENT { // file not found is ok, e.g. when no baselines exist + if errno != ENOENT { // file not found is ok, e.g. when no baselines exist print("Failed to open file \(path), errno = [\(errno)]") } } @@ -522,11 +522,11 @@ extension BenchmarkBaseline: Equatable { for (lhsBenchmarkIdentifier, lhsBenchmarkResults) in lhs.results { for lhsBenchmarkResult in lhsBenchmarkResults { - guard let rhsResults = rhs.results.first(where: { $0.key == lhsBenchmarkIdentifier }) else { // We couldn't find a result for one of the tests + guard let rhsResults = rhs.results.first(where: { $0.key == lhsBenchmarkIdentifier }) else { // We couldn't find a result for one of the tests return false } guard let rhsBenchmarkResult = rhsResults.value.first(where: { $0.metric == lhsBenchmarkResult.metric }) - else { // We couldn't find the specific metric + else { // We couldn't find the specific metric return false } if lhsBenchmarkResult != rhsBenchmarkResult { diff --git a/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift b/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift index 86c7c574..e48f8f61 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift @@ -44,9 +44,9 @@ extension BenchmarkTool { ] """ - var outputPath = FilePath(baselineStoragePath) // package - var subPath = FilePath() // subpath rooted in package used for directory creation - subPath.append("Package.swift") // package/Benchmarks/targetName + var outputPath = FilePath(baselineStoragePath) // package + var subPath = FilePath() // subpath rooted in package used for directory creation + subPath.append("Package.swift") // package/Benchmarks/targetName outputPath.append(subPath.components) print("Adding new executable target \(targetName) to \(outputPath.description)") @@ -110,13 +110,13 @@ extension BenchmarkTool { """ - var outputPath = FilePath(baselineStoragePath) // package - var subPath = FilePath() // subpath rooted in package used for directory creation + var outputPath = FilePath(baselineStoragePath) // package + var subPath = FilePath() // subpath rooted in package used for directory creation - subPath.append(benchmarksDirectory) // package/Benchmarks - subPath.append("\(targetName)") // package/Benchmarks/targetName + subPath.append(benchmarksDirectory) // package/Benchmarks + subPath.append("\(targetName)") // package/Benchmarks/targetName - outputPath.createSubPath(subPath) // Create destination subpath if needed + outputPath.createSubPath(subPath) // Create destination subpath if needed outputPath.append(subPath.components) outputPath.append("\(targetName).swift") diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift b/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift index 15d00576..40d00caf 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift @@ -56,7 +56,7 @@ extension JMHPrimaryMetric { if result.metric.countable { scoreUnit = result.metric == .throughput ? "# / s" : "#" } else { - scoreUnit = "μs" // result.timeUnits.description + scoreUnit = "μs" // result.timeUnits.description } rawData = [recordedValues] } @@ -66,7 +66,7 @@ extension BenchmarkTool { func convertToJMH(_ baseline: BenchmarkBaseline) throws -> String { var resultString = "" var jmhElements: [JMHElement] = [] - var secondaryMetrics: [String: JMHPrimaryMetric] = [:] // could move to OrderedDictionary for consistent output + var secondaryMetrics: [String: JMHPrimaryMetric] = [:] // could move to OrderedDictionary for consistent output baseline.targets.forEach { benchmarkTarget in diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift b/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift index 08934f49..82418fee 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift @@ -23,7 +23,7 @@ import Glibc extension BenchmarkTool { func benchmarkMachine() -> BenchmarkMachine { let processors = sysconf(Int32(_SC_NPROCESSORS_ONLN)) - let memory = sysconf(Int32(_SC_PHYS_PAGES)) / 1_024 * sysconf(Int32(_SC_PAGESIZE)) / (1_024 * 1_024) // avoid overflow + let memory = sysconf(Int32(_SC_PHYS_PAGES)) / 1_024 * sysconf(Int32(_SC_PAGESIZE)) / (1_024 * 1_024) // avoid overflow var uuname = utsname() _ = uname(&uuname) @@ -48,7 +48,7 @@ extension BenchmarkTool { String(cString: $0) } } - + let releaseSize = MemoryLayout.size(ofValue: uuname.release) let release = withUnsafePointer(to: &uuname.release) { $0.withMemoryRebound(to: UInt8.self, capacity: releaseSize) { diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift b/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift index 28275c8d..7f573afc 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift @@ -129,7 +129,7 @@ extension BenchmarkTool { return } - if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare + if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare currentBaseline.results.keys.forEach { baselineKey in if let benchmark: Benchmark = .init(baselineKey.name, closure: { _ in }) { benchmark.target = baselineKey.target @@ -282,7 +282,7 @@ extension BenchmarkTool { return } - if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare + if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare currentBaseline.results.keys.forEach { baselineKey in if let benchmark: Benchmark = .init(baselineKey.name, closure: { _ in }) { benchmark.target = baselineKey.target @@ -302,7 +302,7 @@ extension BenchmarkTool { var p90Thresholds: [BenchmarkIdentifier: [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]] = [:] - if let benchmarkPath = checkAbsolutePath { // load statically defined thresholds for .p90 + if let benchmarkPath = checkAbsolutePath { // load statically defined thresholds for .p90 benchmarks.forEach { benchmark in if let thresholds = BenchmarkTool.makeBenchmarkThresholds( path: benchmarkPath, diff --git a/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift b/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift index a47ead05..147b3683 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift @@ -39,7 +39,7 @@ extension BenchmarkTool { } private func formatLargeNumber(_ value: Int) -> String { - if abs(value) >= 10_000_000 { // 8 digits or more + if abs(value) >= 10_000_000 { // 8 digits or more let doubleValue = Double(value) return String(format: "%.2e", doubleValue) } @@ -193,7 +193,7 @@ extension BenchmarkTool { func prettyPrint( _ baseline: BenchmarkBaseline, - header: String, // = "Benchmark results", + header: String, // = "Benchmark results", hostIdentifier _: String? = nil ) { guard quiet == false else { return } diff --git a/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift b/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift index 5fad206b..066dd554 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift @@ -89,7 +89,7 @@ extension BenchmarkTool { print("Failed to close fd for \(path) after reading.") } } catch { - if errno != ENOENT { // file not found is ok, e.g. no thresholds found, then silently return nil + if errno != ENOENT { // file not found is ok, e.g. no thresholds found, then silently return nil print("Failed to open file \(path), errno = [\(errno)] \(Errno(rawValue: errno).description)") } } diff --git a/Plugins/BenchmarkTool/BenchmarkTool.swift b/Plugins/BenchmarkTool/BenchmarkTool.swift index a2ecf655..6bb6233c 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool.swift @@ -28,7 +28,7 @@ enum BenchmarkOperation: String, ExpressibleByArgument { case thresholds case list case run - case query // query all benchmarks from target, used internally in tool + case query // query all benchmarks from target, used internally in tool case `init` } @@ -127,7 +127,7 @@ struct BenchmarkTool: AsyncParsableCommand { var outputFD: CInt = 0 var benchmarks: [Benchmark] = [] - var benchmarkBaselines: [BenchmarkBaseline] = [] // The baselines read from disk, merged + current run if needed + var benchmarkBaselines: [BenchmarkBaseline] = [] // The baselines read from disk, merged + current run if needed var comparisonBaseline: BenchmarkBaseline? var checkBaseline: BenchmarkBaseline? @@ -192,9 +192,9 @@ struct BenchmarkTool: AsyncParsableCommand { mutating func readBaselines() throws { func readBaseline(_ baselineName: String) throws -> BenchmarkBaseline? { // read all specified baselines - var readBaselines: [BenchmarkBaseline] = [] // The baselines read from disk + var readBaselines: [BenchmarkBaseline] = [] // The baselines read from disk - try targets.forEach { target in // read from all the targets (baselines are stored separately) + try targets.forEach { target in // read from all the targets (baselines are stored separately) let currentBaseline = try read(target: target, baselineIdentifier: baselineName) if let currentBaseline { @@ -214,7 +214,7 @@ struct BenchmarkTool: AsyncParsableCommand { return nil } - try baseline.forEach { baselineName in // for all specified baselines at command line + try baseline.forEach { baselineName in // for all specified baselines at command line if let baseline = try readBaseline(baselineName) { benchmarkBaselines.append(baseline) } else { @@ -401,7 +401,7 @@ struct BenchmarkTool: AsyncParsableCommand { case .`init`: fatalError("Should never come here") case .query: - try queryBenchmarks(benchmarkPath) // Get all available benchmarks first + try queryBenchmarks(benchmarkPath) // Get all available benchmarks first case .list: try listBenchmarks() case .baseline, .thresholds, .run: diff --git a/Plugins/BenchmarkTool/FilePath+Additions.swift b/Plugins/BenchmarkTool/FilePath+Additions.swift index 7659e7cc..228bc7a6 100644 --- a/Plugins/BenchmarkTool/FilePath+Additions.swift +++ b/Plugins/BenchmarkTool/FilePath+Additions.swift @@ -38,7 +38,7 @@ public extension FilePath { } catch { print("failed close directory") } } catch { switch errno { - case ENOENT: // doesn't exist, let's create it + case ENOENT: // doesn't exist, let's create it if mkdir(creationPath.string, S_IRWXU | S_IRWXG | S_IRWXO) == -1 { if errno == EPERM { print("Lacking permissions to write to \(creationPath)") diff --git a/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift b/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift index e40eb651..9921c169 100644 --- a/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift +++ b/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift @@ -17,7 +17,7 @@ public extension Benchmark { teardown: BenchmarkTeardownHook? = nil ) { self.init(name, configuration: configuration) { benchmark in - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast closure(benchmark, setupResult) } teardown: { try await teardown?() @@ -46,7 +46,7 @@ public extension Benchmark { teardown: BenchmarkTeardownHook? = nil ) { self.init(name, configuration: configuration) { benchmark in - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast await closure(benchmark, setupResult) } teardown: { try await teardown?() @@ -79,7 +79,7 @@ public extension Benchmark { configuration: configuration, closure: { benchmark in do { - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast try closure(benchmark, setupResult) } catch { benchmark.error("Benchmark \(name) failed with \(String(reflecting: error))") @@ -115,7 +115,7 @@ public extension Benchmark { configuration: configuration, closure: { benchmark in do { - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast try await closure(benchmark, setupResult) } catch { benchmark.error("Benchmark \(name) failed with \(String(reflecting: error))") diff --git a/Sources/Benchmark/Benchmark.swift b/Sources/Benchmark/Benchmark.swift index 44e811f2..c0bc7f44 100644 --- a/Sources/Benchmark/Benchmark.swift +++ b/Sources/Benchmark/Benchmark.swift @@ -14,7 +14,7 @@ import Foundation // swiftlint:disable file_length identifier_name /// Defines a benchmark -public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type_body_length +public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type_body_length @_documentation(visibility: internal) public typealias BenchmarkClosure = (_ benchmark: Benchmark) -> Void @_documentation(visibility: internal) @@ -36,11 +36,11 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type @_documentation(visibility: internal) @ThreadSafeProperty(wrappedValue: nil, lock: setupTeardownLock) - public static var _startupHook: BenchmarkSetupHook? // Should be removed when going to 2.0, just kept for API compatiblity + public static var _startupHook: BenchmarkSetupHook? // Should be removed when going to 2.0, just kept for API compatiblity @_documentation(visibility: internal) @ThreadSafeProperty(wrappedValue: nil, lock: setupTeardownLock) - public static var _shutdownHook: BenchmarkTeardownHook? // Should be removed when going to 2.0, just kept for API compatiblity + public static var _shutdownHook: BenchmarkTeardownHook? // Should be removed when going to 2.0, just kept for API compatiblity @_documentation(visibility: internal) @ThreadSafeProperty(wrappedValue: nil, lock: setupTeardownLock) @@ -111,7 +111,7 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type public static var checkAbsoluteThresholds = false @_documentation(visibility: internal) - public static var benchmarks: [Benchmark] = [] // Bookkeeping of all registered benchmarks + public static var benchmarks: [Benchmark] = [] // Bookkeeping of all registered benchmarks /// The name of the benchmark without any of the tags appended public var baseName: String @@ -150,9 +150,9 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type @_documentation(visibility: internal) public var executablePath: String? /// closure: The actual benchmark closure that will be measured - var closure: BenchmarkClosure? // The actual benchmark to run + var closure: BenchmarkClosure? // The actual benchmark to run /// asyncClosure: The actual benchmark (async) closure that will be measured - var asyncClosure: BenchmarkAsyncClosure? // The actual benchmark to run + var asyncClosure: BenchmarkAsyncClosure? // The actual benchmark to run // setup/teardown hooks for the instance var setup: BenchmarkSetupHook? var teardown: BenchmarkTeardownHook? @@ -205,8 +205,8 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type } #endif - static var testSkipBenchmarkRegistrations = false // true in test to avoid bench registration fail - var measurementCompleted = false // Keep track so we skip multiple 'end of measurement' + static var testSkipBenchmarkRegistrations = false // true in test to avoid bench registration fail + var measurementCompleted = false // Keep track so we skip multiple 'end of measurement' enum CodingKeys: String, CodingKey { case baseName = "name" @@ -406,7 +406,7 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type } private func _stopMeasurement(_ explicitStartStop: Bool) { - guard measurementCompleted == false else { // This is to skip the implicit stop if we did an explicit before + guard measurementCompleted == false else { // This is to skip the implicit stop if we did an explicit before return } @@ -560,7 +560,7 @@ public extension Benchmark { /// } /// } /// ``` - @_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf + @_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf static func blackHole(_: some Any) {} } diff --git a/Sources/Benchmark/BenchmarkClock.swift b/Sources/Benchmark/BenchmarkClock.swift index 7f44c3b5..fa3e7604 100644 --- a/Sources/Benchmark/BenchmarkClock.swift +++ b/Sources/Benchmark/BenchmarkClock.swift @@ -78,7 +78,7 @@ extension BenchmarkClock: Clock { /// The current continuous instant. public static var now: BenchmarkClock.Instant { #if canImport(Darwin) - let nanos = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) // to get ns resolution on macOS + let nanos = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) // to get ns resolution on macOS let seconds: UInt64 = nanos / 1_000_000_000 let attoseconds: UInt64 = (nanos % 1_000_000_000) * 1_000_000_000 diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index e4b56ff5..06c31bef 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -8,14 +8,15 @@ // http://www.apache.org/licenses/LICENSE-2.0 // +import MallocInterposerSwift + #if canImport(OSLog) import OSLog #endif -import MallocInterposerSwift // swiftlint:disable file_length -struct BenchmarkExecutor { // swiftlint:disable:this type_body_length +struct BenchmarkExecutor { // swiftlint:disable:this type_body_length init(quiet: Bool = false) { self.quiet = quiet } @@ -130,7 +131,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length for _ in 0.. .zero { // macOS sometimes gives us identical timestamps so let's skip those. + if runningTime > .zero { // macOS sometimes gives us identical timestamps so let's skip those. let nanoSeconds = runningTime.nanoseconds() statistics[BenchmarkMetric.wallClock.index].add(Int(nanoSeconds)) @@ -230,10 +231,10 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length let objectAllocDelta = stopARCStats.objectAllocCount - startARCStats.objectAllocCount statistics[BenchmarkMetric.objectAllocCount.index].add(Int(objectAllocDelta)) - let retainDelta = stopARCStats.retainCount - startARCStats.retainCount - 1 // due to some ARC traffic in the path + let retainDelta = stopARCStats.retainCount - startARCStats.retainCount - 1 // due to some ARC traffic in the path statistics[BenchmarkMetric.retainCount.index].add(Int(retainDelta)) - let releaseDelta = stopARCStats.releaseCount - startARCStats.releaseCount - 1 // due to some ARC traffic in the path + let releaseDelta = stopARCStats.releaseCount - startARCStats.releaseCount - 1 // due to some ARC traffic in the path statistics[BenchmarkMetric.releaseCount.index].add(Int(releaseDelta)) statistics[BenchmarkMetric.retainReleaseDelta.index] @@ -336,7 +337,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length || benchmark.configuration.metrics.contains(.peakMemoryResidentDelta) || benchmark.configuration.metrics.contains(.peakMemoryVirtual) { - operatingSystemStatsProducer.startSampling(5_000) // ~5 ms + operatingSystemStatsProducer.startSampling(5_000) // ~5 ms if benchmark.configuration.metrics.contains(.peakMemoryResidentDelta) { baselinePeakMemoryResidentDelta = @@ -394,7 +395,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length iterations += 1 - if iterations < 1_000 || iterations.isMultiple(of: 500) { // only update for low iteration count benchmarks, else 1/500 + if iterations < 1_000 || iterations.isMultiple(of: 500) { // only update for low iteration count benchmarks, else 1/500 if var progressBar { let iterationsPercentage = 100.0 * Double(iterations) / Double(benchmark.configuration.maxIterations) diff --git a/Sources/Benchmark/BenchmarkInternals.swift b/Sources/Benchmark/BenchmarkInternals.swift index 1c624116..e942c577 100644 --- a/Sources/Benchmark/BenchmarkInternals.swift +++ b/Sources/Benchmark/BenchmarkInternals.swift @@ -17,7 +17,7 @@ public enum BenchmarkCommandRequest: Codable { case list case run(benchmark: Benchmark) - case end // exit the benchmark + case end // exit the benchmark } // Replies from benchmark under measure to benchmark runner @@ -25,10 +25,10 @@ public enum BenchmarkCommandRequest: Codable { public enum BenchmarkCommandReply: Codable { case list(benchmark: Benchmark) case ready - case result(benchmark: Benchmark, results: [BenchmarkResult]) // receives results from built-in metric collectors + case result(benchmark: Benchmark, results: [BenchmarkResult]) // receives results from built-in metric collectors case run - case end // end of query for list/result - case error(_ description: String) // error while performing operation (e.g. 'run') + case end // end of query for list/result + case error(_ description: String) // error while performing operation (e.g. 'run') } // swiftlint:enable all diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift index f6988d66..7d35412b 100644 --- a/Sources/Benchmark/BenchmarkMetric+Defaults.swift +++ b/Sources/Benchmark/BenchmarkMetric+Defaults.swift @@ -30,41 +30,47 @@ public extension BenchmarkMetric { /// There is also an convenience extension on Array defined such that you can write just `.default` rather than `BenchmarkMetric.default` /// static var `default`: [BenchmarkMetric] { - [.wallClock, - .cpuTotal, - .mallocCountTotal, - .freeCountTotal, - .mallocBytesCount, - .memoryLeaked, - .throughput, - .instructions, - .peakMemoryResident] + [ + .wallClock, + .cpuTotal, + .mallocCountTotal, + .freeCountTotal, + .mallocBytesCount, + .memoryLeaked, + .throughput, + .instructions, + .peakMemoryResident, + ] } /// A collection of extended system benchmarks. static var extended: [BenchmarkMetric] { - [.wallClock, - .cpuUser, - .cpuTotal, - .mallocCountTotal, - .freeCountTotal, - .mallocBytesCount, - .throughput, - .peakMemoryResident, - .memoryLeaked, - .syscalls, - .instructions] + [ + .wallClock, + .cpuUser, + .cpuTotal, + .mallocCountTotal, + .freeCountTotal, + .mallocBytesCount, + .throughput, + .peakMemoryResident, + .memoryLeaked, + .syscalls, + .instructions, + ] } /// A collection of memory benchmarks. static var memory: [BenchmarkMetric] { - [.peakMemoryResident, - .peakMemoryResidentDelta, - .peakMemoryVirtual, - .mallocCountTotal, - .mallocBytesCount, - .memoryLeaked, - .allocatedResidentMemory] + [ + .peakMemoryResident, + .peakMemoryResidentDelta, + .peakMemoryVirtual, + .mallocCountTotal, + .mallocBytesCount, + .memoryLeaked, + .allocatedResidentMemory, + ] } /// A collection of ARC metrics @@ -103,34 +109,36 @@ public extension BenchmarkMetric { /// A collection of all benchmarks supported by this library. static var all: [BenchmarkMetric] { - [.cpuUser, - .cpuSystem, - .cpuTotal, - .wallClock, - .throughput, - .peakMemoryResident, - .peakMemoryResidentDelta, - .peakMemoryVirtual, - .mallocCountTotal, - .freeCountTotal, - .mallocBytesCount, - .memoryLeaked, - .syscalls, - .contextSwitches, - .threads, - .threadsRunning, - .readSyscalls, - .writeSyscalls, - .readBytesLogical, - .writeBytesLogical, - .readBytesPhysical, - .writeBytesPhysical, - .instructions, - .allocatedResidentMemory, - .objectAllocCount, - .retainCount, - .releaseCount, - .retainReleaseDelta] + [ + .cpuUser, + .cpuSystem, + .cpuTotal, + .wallClock, + .throughput, + .peakMemoryResident, + .peakMemoryResidentDelta, + .peakMemoryVirtual, + .mallocCountTotal, + .freeCountTotal, + .mallocBytesCount, + .memoryLeaked, + .syscalls, + .contextSwitches, + .threads, + .threadsRunning, + .readSyscalls, + .writeSyscalls, + .readBytesLogical, + .writeBytesLogical, + .readBytesPhysical, + .writeBytesPhysical, + .instructions, + .allocatedResidentMemory, + .objectAllocCount, + .retainCount, + .releaseCount, + .retainReleaseDelta, + ] } } diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index 88566cd0..e915c0c8 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -96,7 +96,7 @@ public extension BenchmarkMetric { public extension BenchmarkMetric { /// A constant that states whether larger or smaller measurements, relative to a set baseline, indicate better performance. - enum Polarity: Codable, Sendable { // same naming as XCTest uses, polarity is known for all metrics except custom + enum Polarity: Codable, Sendable { // same naming as XCTest uses, polarity is known for all metrics except custom /// A performance measurement where a larger value, relative to a set baseline, indicates better performance. case prefersLarger /// A performance measurement where a smaller value, relative to a set baseline, indicates better performance. @@ -279,16 +279,16 @@ public extension BenchmarkMetric { case .instructions: return 28 default: - return 0 // custom payloads must be stored in dictionary + return 0 // custom payloads must be stored in dictionary } } @_documentation(visibility: internal) - static var maxIndex: Int { 28 } // + static var maxIndex: Int { 28 } // // Used by the Benchmark Executor for efficient indexing into results @_documentation(visibility: internal) - func metricFor(index: Int) -> BenchmarkMetric { // swiftlint:disable:this cyclomatic_complexity function_body_length + func metricFor(index: Int) -> BenchmarkMetric { // swiftlint:disable:this cyclomatic_complexity function_body_length switch index { case 1: return .cpuUser @@ -355,7 +355,7 @@ public extension BenchmarkMetric { @_documentation(visibility: internal) public extension BenchmarkMetric { - var rawDescription: String { // As we can't have raw values due to custom support, we do this... + var rawDescription: String { // As we can't have raw values due to custom support, we do this... switch self { case .cpuUser: return "cpuUser" diff --git a/Sources/Benchmark/BenchmarkResult.swift b/Sources/Benchmark/BenchmarkResult.swift index 80350f2c..276f28aa 100644 --- a/Sources/Benchmark/BenchmarkResult.swift +++ b/Sources/Benchmark/BenchmarkResult.swift @@ -31,7 +31,7 @@ public enum BenchmarkTimeUnits: String, Codable, CustomStringConvertible, CaseIt case seconds case kiloseconds case megaseconds - case automatic // will pick time unit above automatically + case automatic // will pick time unit above automatically public var factor: Int { switch self { case .nanoseconds: @@ -43,7 +43,7 @@ public enum BenchmarkTimeUnits: String, Codable, CustomStringConvertible, CaseIt case .seconds: return 1 case .kiloseconds: - return 2 // Yeah, not right but we need to refactor to get rid of this, works for now + return 2 // Yeah, not right but we need to refactor to get rid of this, works for now case .megaseconds: return 3 case .automatic: @@ -98,7 +98,7 @@ public enum BenchmarkUnits: Int, Codable, CustomStringConvertible, CaseIterable case giga = 1_000_000_000 case tera = 1_000_000_000_000 case peta = 1_000_000_000_000_000 - case automatic // will pick unit above automatically + case automatic // will pick unit above automatically public var description: String { switch self { @@ -169,17 +169,17 @@ public extension BenchmarkTimeUnits { /// Use a scaling factor when running your short benchmarks to provide greater numerical stability to the results. public enum BenchmarkScalingFactor: Int, Codable { /// No scaling factor, the raw count of iterations. - case one = 1 // e.g. nanoseconds, or count + case one = 1 // e.g. nanoseconds, or count /// Scaling factor of 1e03. - case kilo = 1_000 // microseconds + case kilo = 1_000 // microseconds /// Scaling factor of 1e06. - case mega = 1_000_000 // milliseconds + case mega = 1_000_000 // milliseconds /// Scaling factor of 1e09. - case giga = 1_000_000_000 // seconds + case giga = 1_000_000_000 // seconds /// Scaling factor of 1e12. - case tera = 1_000_000_000_000 // 1K seconds + case tera = 1_000_000_000_000 // 1K seconds /// Scaling factor of 1e15. - case peta = 1_000_000_000_000_000 // 1M + case peta = 1_000_000_000_000_000 // 1M public var description: String { switch self { @@ -279,7 +279,7 @@ public struct BenchmarkResult: Codable, Comparable, Equatable { return .microseconds case .giga: return .nanoseconds - case .tera, .peta: // shouldn't be possible as tera is only used internally to present scaled up throughput + case .tera, .peta: // shouldn't be possible as tera is only used internally to present scaled up throughput break } default: @@ -312,7 +312,7 @@ public struct BenchmarkResult: Codable, Comparable, Equatable { return y * x } else if n.isMultiple(of: 2) { return expBySq(y, x * x, n / 2) - } else { // n is odd + } else { // n is odd return expBySq(y * x, x * x, (n - 1) / 2) } } diff --git a/Sources/Benchmark/BenchmarkRunner.swift b/Sources/Benchmark/BenchmarkRunner.swift index 8ed30363..4946da62 100644 --- a/Sources/Benchmark/BenchmarkRunner.swift +++ b/Sources/Benchmark/BenchmarkRunner.swift @@ -119,7 +119,7 @@ public struct BenchmarkRunner: AsyncParsableCommand, BenchmarkRunnerReadWrite { let suppressor = OutputSuppressor() while true { - if debug { // in debug mode we run all benchmarks matching filter/skip specified + if debug { // in debug mode we run all benchmarks matching filter/skip specified var benchmark: Benchmark? benchmarkCommand = .list diff --git a/Sources/Benchmark/Blackhole.swift b/Sources/Benchmark/Blackhole.swift index bd6bae62..37642626 100644 --- a/Sources/Benchmark/Blackhole.swift +++ b/Sources/Benchmark/Blackhole.swift @@ -29,10 +29,10 @@ /// } /// } /// ``` -@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf +@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf public func blackHole(_: some Any) {} -@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf +@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf public func identity(_ value: T) -> T { value } diff --git a/Sources/Benchmark/MallocStats/MallocStats.swift b/Sources/Benchmark/MallocStats/MallocStats.swift index 2056d413..14e1c10e 100644 --- a/Sources/Benchmark/MallocStats/MallocStats.swift +++ b/Sources/Benchmark/MallocStats/MallocStats.swift @@ -23,5 +23,5 @@ struct MallocStats { /// , and unused dirty pages. This is a maximum rather than precise because pages may /// not actually be physically resident if they correspond to demand-zeroed virtual memory /// that has not yet been touched. This is a multiple of the page size. - var allocatedResidentMemory: Int = 0 // in bytes + var allocatedResidentMemory: Int = 0 // in bytes } diff --git a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift index 7e5f6c90..bfc478dc 100644 --- a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift +++ b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift @@ -94,7 +94,7 @@ final class OperatingSystemStatsProducer { } #endif - func startSampling(_: Int = 10_000) { // sample rate in microseconds + func startSampling(_: Int = 10_000) { // sample rate in microseconds #if os(macOS) let sampleSemaphore = DispatchSemaphore(value: 0) @@ -139,7 +139,7 @@ final class OperatingSystemStatsProducer { let quit = self.runState self.lock.unlock() - if firstEventSampled == false { // allow calling thread to continue when we have captured a sample + if firstEventSampled == false { // allow calling thread to continue when we have captured a sample firstEventSampled = true sampleSemaphore.signal() } diff --git a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift index ee35d299..b882f7ab 100644 --- a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift +++ b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift @@ -75,7 +75,7 @@ final class OperatingSystemStatsProducer { print("Failed to close fileDescriptor for \(path) after reading.") } } catch { - if errno != ENOENT { // file not found is ok, e.g. when no baselines exist + if errno != ENOENT { // file not found is ok, e.g. when no baselines exist print("Failed to open file \(path), errno = [\(errno)]") } } @@ -140,7 +140,7 @@ final class OperatingSystemStatsProducer { syscalls: 0, contextSwitches: 0, threads: threads, - threadsRunning: threadsRunning, // we can go dig in /proc/self/task/ later if want this + threadsRunning: threadsRunning, // we can go dig in /proc/self/task/ later if want this readSyscalls: Int(ioStats.readSyscalls), writeSyscalls: Int(ioStats.writeSyscalls), readBytesLogical: Int(ioStats.readBytesLogical), @@ -163,7 +163,7 @@ final class OperatingSystemStatsProducer { } } - func startSampling(_: Int = 10_000) { // sample rate in microseconds + func startSampling(_: Int = 10_000) { // sample rate in microseconds let sampleSemaphore = DispatchSemaphore(value: 0) DispatchQueue.global(qos: .userInitiated) @@ -205,7 +205,7 @@ final class OperatingSystemStatsProducer { self.lock.unlock() - if firstEventSampled == false { // allow calling thread to continue when we have captured a sample + if firstEventSampled == false { // allow calling thread to continue when we have captured a sample firstEventSampled = true sampleSemaphore.signal() } diff --git a/Sources/Benchmark/Progress/ProgressElements.swift b/Sources/Benchmark/Progress/ProgressElements.swift index 26c4efa5..95920beb 100644 --- a/Sources/Benchmark/Progress/ProgressElements.swift +++ b/Sources/Benchmark/Progress/ProgressElements.swift @@ -83,7 +83,7 @@ public struct ProgressPercent: ProgressElementType { while padded.count < 4 { padded = " " + padded } - return padded // "\(percentDone.format(decimalPlaces))%" + return padded // "\(percentDone.format(decimalPlaces))%" } } diff --git a/Sources/Benchmark/Statistics.swift b/Sources/Benchmark/Statistics.swift index 49dec756..6cbe7fc0 100644 --- a/Sources/Benchmark/Statistics.swift +++ b/Sources/Benchmark/Statistics.swift @@ -15,18 +15,18 @@ import Numerics // A type that provides distribution / percentile calculations of latency measurements @_documentation(visibility: internal) public final class Statistics: Codable { - public static let defaultMaximumMeasurement = 1_000_000_000 // 1 second in nanoseconds + public static let defaultMaximumMeasurement = 1_000_000_000 // 1 second in nanoseconds public static let defaultPercentilesToCalculate = [0.0, 25.0, 50.0, 75.0, 90.0, 99.0, 100.0] public static let defaultPercentilesToCalculateP90Index = 4 public enum Units: Int, Codable, CaseIterable { - case count = 1 // e.g. nanoseconds - case kilo = 1_000 // microseconds - case mega = 1_000_000 // milliseconds - case giga = 1_000_000_000 // seconds - case tera = 1_000_000_000_000 // 1K seconds - case peta = 1_000_000_000_000_000 // 1M seconds - case automatic = 0 // will pick time unit above automatically + case count = 1 // e.g. nanoseconds + case kilo = 1_000 // microseconds + case mega = 1_000_000 // milliseconds + case giga = 1_000_000_000 // seconds + case tera = 1_000_000_000_000 // 1K seconds + case peta = 1_000_000_000_000_000 // 1M seconds + case automatic = 0 // will pick time unit above automatically public var description: String { switch self { @@ -173,7 +173,7 @@ public final class Statistics: Codable { @inline(__always) public func add(_ measurement: Int) { guard measurement >= 0 else { - return // We sometimes got a <0 measurement, should run with fatalError and try to see how that could occur + return // We sometimes got a <0 measurement, should run with fatalError and try to see how that could occur // fatalError() } diff --git a/Tests/BenchmarkTests/BenchmarkRunnerTests.swift b/Tests/BenchmarkTests/BenchmarkRunnerTests.swift index 31f17580..a7724d65 100644 --- a/Tests/BenchmarkTests/BenchmarkRunnerTests.swift +++ b/Tests/BenchmarkTests/BenchmarkRunnerTests.swift @@ -56,7 +56,7 @@ final class BenchmarkRunnerTests: XCTestCase, BenchmarkRunnerReadWrite { runner.quiet = false runner.timeUnits = .nanoseconds try await runner.run() - XCTAssertEqual(writeCount, 6) // 3 tests results + 3 end markers + XCTAssertEqual(writeCount, 6) // 3 tests results + 3 end markers } } From 1dc55b4739b6febe94fb110a1bd2517566a68af6 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 14:17:50 +0200 Subject: [PATCH 10/37] remove test target --- LocalPackages/MallocInterposerC/Package.swift | 6 +--- .../MallocInterposerTests.swift | 29 ------------------- 2 files changed, 1 insertion(+), 34 deletions(-) delete mode 100644 LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift diff --git a/LocalPackages/MallocInterposerC/Package.swift b/LocalPackages/MallocInterposerC/Package.swift index c1474c43..e3300781 100644 --- a/LocalPackages/MallocInterposerC/Package.swift +++ b/LocalPackages/MallocInterposerC/Package.swift @@ -19,10 +19,6 @@ let package = Package( name: "MallocInterposerC", linkerSettings: [ .linkedLibrary("dl") - ]), - .testTarget( - name: "MallocInterposerTests", - dependencies: ["MallocInterposerC"] - ), + ]) ] ) diff --git a/LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift b/LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift deleted file mode 100644 index 31d77848..00000000 --- a/LocalPackages/MallocInterposerC/Tests/MallocInterposerTests/MallocInterposerTests.swift +++ /dev/null @@ -1,29 +0,0 @@ -//import Testing -//@testable import MallocInterposer -//import Darwin -// -//final class Foo { -// var bar: Int = 0 -// -// init() {} -//} -// -//@Test func example() async throws { -// var hookCalled = false -// var allocSize = 0 -// -// MallocHooks.setMallocHook { size, originalResult in -// hookCalled = true -// allocSize = size -// return originalResult -// } -// -// let foo = Foo() -// print(foo.bar) -// -// #expect(hookCalled == true) -// #expect(allocSize == 1024) -// -// let stats = MallocInterposer.shared.getStatistics() -// #expect(stats.mallocCount == 1) -//} From 7aa8368b730ed7459b9d472b6e54f115eda71c60 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 14:26:58 +0200 Subject: [PATCH 11/37] fix tests --- .../BenchmarkMetricsTests.swift | 8 +++---- .../OperatingSystemAndMallocTests.swift | 21 ------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/Tests/BenchmarkTests/BenchmarkMetricsTests.swift b/Tests/BenchmarkTests/BenchmarkMetricsTests.swift index e3822010..889128cf 100644 --- a/Tests/BenchmarkTests/BenchmarkMetricsTests.swift +++ b/Tests/BenchmarkTests/BenchmarkMetricsTests.swift @@ -22,9 +22,9 @@ final class BenchmarkMetricsTests: XCTestCase { .peakMemoryResident, .peakMemoryResidentDelta, .peakMemoryVirtual, - .mallocCountSmall, - .mallocCountLarge, .mallocCountTotal, + .mallocBytesCount, + .freeCountTotal, .allocatedResidentMemory, .memoryLeaked, .syscalls, @@ -55,9 +55,9 @@ final class BenchmarkMetricsTests: XCTestCase { "peakMemoryResident", "peakMemoryResidentDelta", "peakMemoryVirtual", - "mallocCountSmall", - "mallocCountLarge", "mallocCountTotal", + "mallocBytesCount", + "freeCountTotal", "allocatedResidentMemory", "memoryLeaked", "syscalls", diff --git a/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift b/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift index e2562304..60d69d75 100644 --- a/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift +++ b/Tests/BenchmarkTests/OperatingSystemAndMallocTests.swift @@ -60,27 +60,6 @@ final class OperatingSystemAndMallocTests: XCTestCase { blackHole(operatingSystemStatsProducer.metricSupported(.throughput)) } -<<<<<<< HEAD - #if canImport(jemalloc) - func testMallocProducerLeaks() throws { - let startMallocStats = MallocStatsProducer.makeMallocStats() - - for outerloop in 1...100 { - blackHole(malloc(outerloop * 1_024)) - } - - let stopMallocStats = MallocStatsProducer.makeMallocStats() - - XCTAssertGreaterThanOrEqual(stopMallocStats.mallocCountTotal - startMallocStats.mallocCountTotal, 100) - XCTAssertGreaterThanOrEqual( - stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory, - 100 * 1_024 - ) - } - #endif - -======= ->>>>>>> 66b6a42 (feat(major): [sc-23696] replace jemalloc with custom malloc interposer) func testARCStatsProducer() throws { let array = [3] ARCStatsProducer.hook() From 675f682419d074aa0bb6355d89da24188838f1d3 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 28 Aug 2025 14:55:30 +0200 Subject: [PATCH 12/37] fix swift lint --- Sources/Benchmark/BenchmarkExecutor.swift | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index 06c31bef..63e52dc5 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -27,7 +27,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length // swiftlint:disable cyclomatic_complexity function_body_length func run(_ benchmark: Benchmark) -> [BenchmarkResult] { var wallClockDuration: Duration = .zero - var _mallocStats = MallocInterposerSwift.Statistics( + var mallocStats = MallocInterposerSwift.Statistics( mallocCount: 0, mallocBytesCount: 0, freeCount: 0 @@ -194,7 +194,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length if mallocStatsRequested { MallocInterposerSwift.unhook() - _mallocStats = MallocInterposerSwift.getStatistics() + mallocStats = MallocInterposerSwift.getStatistics() } #if canImport(OSLog) @@ -242,14 +242,14 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { - statistics[BenchmarkMetric.mallocCountTotal.index].add(Int(_mallocStats.mallocCount)) + statistics[BenchmarkMetric.mallocCountTotal.index].add(Int(mallocStats.mallocCount)) - statistics[BenchmarkMetric.freeCountTotal.index].add(Int(_mallocStats.freeCount)) + statistics[BenchmarkMetric.freeCountTotal.index].add(Int(mallocStats.freeCount)) - delta = _mallocStats.mallocCount - _mallocStats.freeCount + delta = mallocStats.mallocCount - mallocStats.freeCount statistics[BenchmarkMetric.memoryLeaked.index].add(Int(delta)) - statistics[BenchmarkMetric.mallocBytesCount.index].add(Int(_mallocStats.mallocBytesCount)) + statistics[BenchmarkMetric.mallocBytesCount.index].add(Int(mallocStats.mallocBytesCount)) } if operatingSystemStatsRequested { From 6858ed0e92ad46810c81248669a884ef4de44c94 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Mon, 1 Sep 2025 17:15:23 +0200 Subject: [PATCH 13/37] resolve pr comments --- Benchmarks/Package.resolved | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved index b48e8308..b7658529 100644 --- a/Benchmarks/Package.resolved +++ b/Benchmarks/Package.resolved @@ -1,4 +1,5 @@ { + "originHash" : "f1d359a544b71b52c6788ad2e4cd2952f7f166b62ddb07316768f66be7ba4099", "pins" : [ { "identity" : "hdrhistogram-swift", @@ -18,15 +19,6 @@ "version" : "1.0.2" } }, - { - "identity" : "package-jemalloc", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-jemalloc", - "state" : { - "revision" : "e8a5db026963f5bfeac842d9d3f2cc8cde323b49", - "version" : "1.0.0" - } - }, { "identity" : "swift-argument-parser", "kind" : "remoteSourceControl", @@ -41,8 +33,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-atomics", "state" : { - "revision" : "cd142fd2f64be2100422d658e7411e39489da985", - "version" : "1.2.0" + "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", + "version" : "1.3.0" } }, { @@ -73,5 +65,5 @@ } } ], - "version" : 2 + "version" : 3 } From 2bbc2ad00167cc9c86fb65e4776115a47f9b9421 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Mon, 1 Sep 2025 17:15:38 +0200 Subject: [PATCH 14/37] resolve pr comments --- LocalPackages/MallocInterposerC/Package.swift | 2 +- .../MallocInterposerC/include/interposer.h | 10 ++ .../MallocInterposerC/src/interposer-darwin.c | 10 ++ .../MallocInterposerC/src/interposer-unix.c | 10 ++ .../MallocInterposerSwift/Package.swift | 2 +- .../MallocInterposerSwift.swift | 116 ++++++++++++--- .../SwiftTestClient/SwiftTestClient.swift | 10 ++ .../BenchmarkExecutor+Extensions.swift | 2 + Sources/Benchmark/BenchmarkExecutor.swift | 46 ++++-- .../Benchmark/BenchmarkMetric+Defaults.swift | 12 ++ Sources/Benchmark/BenchmarkMetric.swift | 132 +++++++++++------- 11 files changed, 273 insertions(+), 79 deletions(-) diff --git a/LocalPackages/MallocInterposerC/Package.swift b/LocalPackages/MallocInterposerC/Package.swift index e3300781..6dba7002 100644 --- a/LocalPackages/MallocInterposerC/Package.swift +++ b/LocalPackages/MallocInterposerC/Package.swift @@ -1,4 +1,4 @@ -// swift-tools-version: 6.1 +// swift-tools-version: 5.10 // The swift-tools-version declares the minimum version of Swift required to build this package. import PackageDescription diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h index ffa42718..a3e91660 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h @@ -1,3 +1,13 @@ +// +// Copyright (c) 2022 Ordo One AB. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// + #ifndef INTERPOSER_H #define INTERPOSER_H diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c index 5f2c6dd6..03ea9a14 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c @@ -1,3 +1,13 @@ +// +// Copyright (c) 2022 Ordo One AB. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// + #include #if __APPLE__ diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c index a13ba0c8..6e17ae55 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c @@ -1,3 +1,13 @@ +// +// Copyright (c) 2022 Ordo One AB. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// + #ifndef __APPLE__ #define _GNU_SOURCE diff --git a/LocalPackages/MallocInterposerSwift/Package.swift b/LocalPackages/MallocInterposerSwift/Package.swift index af5d0a8c..bc7edc3d 100644 --- a/LocalPackages/MallocInterposerSwift/Package.swift +++ b/LocalPackages/MallocInterposerSwift/Package.swift @@ -1,4 +1,4 @@ -// swift-tools-version: 6.1 +// swift-tools-version: 5.10 // The swift-tools-version declares the minimum version of Swift required to build this package. import PackageDescription diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift index 92d25364..cfbb64ca 100644 --- a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift +++ b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift @@ -1,6 +1,19 @@ +// +// Copyright (c) 2022 Ordo One AB. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// + import Atomics import Foundation import MallocInterposerC +#if canImport(Glibc) +import Glibc +#endif /// Swift-friendly hook types public typealias MallocHook = @convention(c) (Int) -> Void @@ -20,68 +33,118 @@ public typealias MallocZoneMemalignHook = @convention(c) (UnsafeMutablePointer.Storage.init(0) - static let mallocCount = UnsafeAtomic.init(at: &mallocCountStorage) - nonisolated(unsafe) private static var mallocBytesCountStorage = UnsafeAtomic.Storage.init(0) - static let mallocBytesCount = UnsafeAtomic.init(at: &mallocBytesCountStorage) - nonisolated(unsafe) private static var freeCountStorage = UnsafeAtomic.Storage(0) - static let freeCount = UnsafeAtomic.init(at: &freeCountStorage) - /// Clear all counters - private static func clearAllCounters() { - mallocCount.store(0, ordering: .relaxed) - mallocBytesCount.store(0, ordering: .relaxed) - freeCount.store(0, ordering: .relaxed) - } + static let mallocCount = UnsafeAtomic.create(0) + static let mallocBytesCount = UnsafeAtomic.create(0) + static let freeCount = UnsafeAtomic.create(0) + static let freeBytesCount = UnsafeAtomic.create(0) + static let mallocSmallCount = UnsafeAtomic.create(0) + static let mallocLargeCount = UnsafeAtomic.create(0) + static let pageSize = getpagesize() private init() {} public static func hook() { - clearAllCounters() let mallocHook: MallocHook = { size in MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + + if size > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } let freeHook: FreeHook = { pointer in MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) + #if canImport(Darwin) + let size = malloc_size(pointer) + #else + let size = malloc_usable_size(pointer) + #endif + MallocInterposerSwift.freeBytesCount.wrappingIncrement(by: size, ordering: .relaxed) } let callocHook: CallocHook = { num, size in MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + let total = num * size + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: total, ordering: .relaxed) + + if total > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } let reallocHook: ReallocHook = { pointer, size in MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + + if size > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } #if canImport(Darwin) let mallocZoneHook: MallocZoneHook = { zone, size in MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + + if size > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } let mallocZoneFreeHook: MallocZoneFreeHook = { zone, pointer in MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) } let mallocZoneCallocHook: MallocZoneCallocHook = { zone, num, size in MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: num * size, ordering: .relaxed) + let total = num * size + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: total, ordering: .relaxed) + + if total > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } let mallocZoneReallocHook: MallocZoneReallocHook = { zone, pointer, size in MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + + if size > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } let mallocZoneVallocHook: MallocZoneVallocHook = { zone, size in MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + + if size > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } let mallocZoneMemalignHook: MallocZoneMemalignHook = { zone, alignment, size in MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + + if size > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } } set_malloc_zone_hook(mallocZoneHook) @@ -114,6 +177,12 @@ public class MallocInterposerSwift: @unchecked Sendable { #endif } + public static func reset() { + mallocCount.store(0, ordering: .relaxed) + mallocBytesCount.store(0, ordering: .relaxed) + freeCount.store(0, ordering: .relaxed) + } + public static func getStatistics() -> Statistics { let stats = Statistics( mallocCount: mallocCount.load(ordering: .relaxed), @@ -129,12 +198,25 @@ public extension MallocInterposerSwift { struct Statistics { public let mallocCount: Int public let mallocBytesCount: Int + public let mallocSmallCount: Int + public let mallocLargeCount: Int public let freeCount: Int - - public init(mallocCount: Int, mallocBytesCount: Int, freeCount: Int) { + public let freeBytesCount: Int + + public init( + mallocCount: Int = 0, + mallocBytesCount: Int = 0, + mallocSmallCount: Int = 0, + mallocLargeCount: Int = 0, + freeCount: Int = 0, + freeBytesCount: Int = 0 + ) { self.mallocCount = mallocCount self.mallocBytesCount = mallocBytesCount + self.mallocSmallCount = mallocSmallCount + self.mallocLargeCount = mallocLargeCount self.freeCount = freeCount + self.freeBytesCount = freeBytesCount } } } diff --git a/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift b/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift index ba4c34d7..475c03c5 100644 --- a/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift +++ b/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift @@ -1,3 +1,13 @@ +// +// Copyright (c) 2022 Ordo One AB. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// + import Foundation import MallocInterposerC import MallocInterposerSwift diff --git a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift index f1ee9568..72d2ab70 100644 --- a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift +++ b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift @@ -25,6 +25,8 @@ extension BenchmarkExecutor { switch metric { case .memoryLeaked: return true + case .memoryLeakedBytes: + return true case .mallocCountTotal: return true case .allocatedResidentMemory: diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index 63e52dc5..4793250a 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -27,11 +27,8 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length // swiftlint:disable cyclomatic_complexity function_body_length func run(_ benchmark: Benchmark) -> [BenchmarkResult] { var wallClockDuration: Duration = .zero - var mallocStats = MallocInterposerSwift.Statistics( - mallocCount: 0, - mallocBytesCount: 0, - freeCount: 0 - ) + var startMallocStats = MallocInterposerSwift.Statistics() + var stopMallocStats = MallocInterposerSwift.Statistics() var startOperatingSystemStats = OperatingSystemStats() var stopOperatingSystemStats = OperatingSystemStats() var startPerformanceCounters = PerformanceCounters() @@ -156,7 +153,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length #endif if mallocStatsRequested { - MallocInterposerSwift.hook() + startMallocStats = MallocInterposerSwift.getStatistics() } if arcStatsRequested { @@ -193,8 +190,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { - MallocInterposerSwift.unhook() - mallocStats = MallocInterposerSwift.getStatistics() + stopMallocStats = MallocInterposerSwift.getStatistics() } #if canImport(OSLog) @@ -242,14 +238,30 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { - statistics[BenchmarkMetric.mallocCountTotal.index].add(Int(mallocStats.mallocCount)) + let mallocCount = stopMallocStats.mallocCount - startMallocStats.mallocCount + statistics[BenchmarkMetric.mallocCountTotal.index].add(mallocCount) - statistics[BenchmarkMetric.freeCountTotal.index].add(Int(mallocStats.freeCount)) + let mallocBytesCount = stopMallocStats.mallocBytesCount - startMallocStats.mallocBytesCount + statistics[BenchmarkMetric.mallocBytesCount.index].add(mallocBytesCount) - delta = mallocStats.mallocCount - mallocStats.freeCount - statistics[BenchmarkMetric.memoryLeaked.index].add(Int(delta)) + // For backwards compatibility we keep allocatedResidentMemory as the total malloc bytes + statistics[BenchmarkMetric.allocatedResidentMemory.index].add(mallocBytesCount) - statistics[BenchmarkMetric.mallocBytesCount.index].add(Int(mallocStats.mallocBytesCount)) + let mallocSmallCount = stopMallocStats.mallocSmallCount - startMallocStats.mallocSmallCount + statistics[BenchmarkMetric.mallocCountSmall.index].add(mallocSmallCount) + + let mallocLargeCount = stopMallocStats.mallocLargeCount - startMallocStats.mallocLargeCount + statistics[BenchmarkMetric.mallocCountLarge.index].add(mallocLargeCount) + + let freeCount = stopMallocStats.freeCount - startMallocStats.freeCount + statistics[BenchmarkMetric.freeCountTotal.index].add(freeCount) + + let memoryLeakedCount = mallocCount - freeCount + statistics[BenchmarkMetric.memoryLeaked.index].add(Int(memoryLeakedCount)) + + let freeBytes = stopMallocStats.freeBytesCount - startMallocStats.freeBytesCount + let memoryLeakedBytes = mallocBytesCount - freeBytes + statistics[BenchmarkMetric.memoryLeakedBytes.index].add(Int(memoryLeakedBytes)) } if operatingSystemStatsRequested { @@ -331,6 +343,10 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length ARCStatsProducer.hook() } + if mallocStatsRequested { + MallocInterposerSwift.hook() + } + if benchmark.configuration.metrics.contains(.threads) || benchmark.configuration.metrics.contains(.threadsRunning) || benchmark.configuration.metrics.contains(.peakMemoryResident) @@ -421,6 +437,10 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length ARCStatsProducer.unhook() } + if mallocStatsRequested { + MallocInterposerSwift.unhook() + } + #if canImport(OSLog) signPost.endInterval("Benchmark", benchmarkInterval, "\(iterations)") #endif diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift index 7d35412b..c634b297 100644 --- a/Sources/Benchmark/BenchmarkMetric+Defaults.swift +++ b/Sources/Benchmark/BenchmarkMetric+Defaults.swift @@ -33,10 +33,13 @@ public extension BenchmarkMetric { [ .wallClock, .cpuTotal, + .mallocCountSmall, + .mallocCountLarge, .mallocCountTotal, .freeCountTotal, .mallocBytesCount, .memoryLeaked, + .memoryLeakedBytes, .throughput, .instructions, .peakMemoryResident, @@ -49,12 +52,15 @@ public extension BenchmarkMetric { .wallClock, .cpuUser, .cpuTotal, + .mallocCountSmall, + .mallocCountLarge, .mallocCountTotal, .freeCountTotal, .mallocBytesCount, .throughput, .peakMemoryResident, .memoryLeaked, + .memoryLeakedBytes, .syscalls, .instructions, ] @@ -66,9 +72,12 @@ public extension BenchmarkMetric { .peakMemoryResident, .peakMemoryResidentDelta, .peakMemoryVirtual, + .mallocCountSmall, + .mallocCountLarge, .mallocCountTotal, .mallocBytesCount, .memoryLeaked, + .memoryLeakedBytes, .allocatedResidentMemory, ] } @@ -118,10 +127,13 @@ public extension BenchmarkMetric { .peakMemoryResident, .peakMemoryResidentDelta, .peakMemoryVirtual, + .mallocCountSmall, + .mallocCountLarge, .mallocCountTotal, .freeCountTotal, .mallocBytesCount, .memoryLeaked, + .memoryLeakedBytes, .syscalls, .contextSwitches, .threads, diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index e915c0c8..8c605bac 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -31,7 +31,11 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib case peakMemoryResidentDelta /// Measure virtual memory usage - sampled during runtime case peakMemoryVirtual - /// Number of total mallocs + /// Number of small malloc calls + case mallocCountSmall + /// Number of large malloc calls + case mallocCountLarge + /// Number of total malloc calls (small+large) case mallocCountTotal /// Number of totatl free calls case freeCountTotal @@ -39,9 +43,13 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib case mallocBytesCount /// The amount of allocated resident memory according to the memory allocator /// by the application (does not include metadata overhead etc) + /// **Deprecated** in favour of ``mallocBytesCount``. It value is equal to ``mallocBytesCount``. + @available(*, deprecated, message: "Deprecated in favor of mallocBytesCount") case allocatedResidentMemory /// Number of small+large mallocs - small+large frees in resident memory case memoryLeaked + /// Leaked memeory in bytes + case memoryLeakedBytes /// Measure number of syscalls made during the test case syscalls /// Measure number of context switches made during the test @@ -120,7 +128,7 @@ public extension BenchmarkMetric { switch self { case .cpuSystem, .cpuTotal, .cpuUser, .wallClock: return true - case .mallocCountTotal, .memoryLeaked: + case .mallocCountTotal, .memoryLeaked, .memoryLeakedBytes: return true case .syscalls: return true @@ -169,6 +177,10 @@ public extension BenchmarkMetric { return "Memory Δ (resident peak)" case .peakMemoryVirtual: return "Memory (virtual peak)" + case .mallocCountSmall: + return "Malloc (small)" + case .mallocCountLarge: + return "Malloc (large)" case .mallocCountTotal: return "Malloc (total)" case .mallocBytesCount: @@ -177,6 +189,8 @@ public extension BenchmarkMetric { return "Memory (allocated resident)" case .memoryLeaked: return "Malloc / free Δ" + case .memoryLeakedBytes: + return "Malloc / free Δ (bytes)" case .syscalls: return "Syscalls (total)" case .contextSwitches: @@ -236,55 +250,61 @@ public extension BenchmarkMetric { return 6 case .peakMemoryResidentDelta: return 7 - case .peakMemoryVirtual: + case .peakMemoryVirtual: return 8 - case .mallocCountTotal: + case .mallocCountSmall: return 9 - case .freeCountTotal: + case .mallocCountLarge: return 10 - case .mallocBytesCount: + case .mallocCountTotal: return 11 - case .allocatedResidentMemory: + case .freeCountTotal: return 12 - case .memoryLeaked: + case .mallocBytesCount: return 13 - case .syscalls: + case .allocatedResidentMemory: return 14 - case .contextSwitches: + case .memoryLeaked: return 15 - case .threads: + case .memoryLeakedBytes: return 16 - case .threadsRunning: + case .syscalls: return 17 - case .readSyscalls: + case .contextSwitches: return 18 - case .writeSyscalls: + case .threads: return 19 - case .readBytesLogical: + case .threadsRunning: return 20 - case .writeBytesLogical: + case .readSyscalls: return 21 - case .readBytesPhysical: + case .writeSyscalls: return 22 - case .writeBytesPhysical: + case .readBytesLogical: return 23 - case .objectAllocCount: + case .writeBytesLogical: return 24 - case .retainCount: + case .readBytesPhysical: return 25 - case .releaseCount: + case .writeBytesPhysical: return 26 - case .retainReleaseDelta: + case .objectAllocCount: return 27 - case .instructions: + case .retainCount: return 28 + case .releaseCount: + return 29 + case .retainReleaseDelta: + return 30 + case .instructions: + return 31 default: return 0 // custom payloads must be stored in dictionary } } @_documentation(visibility: internal) - static var maxIndex: Int { 28 } // + static var maxIndex: Int { 31 } // // Used by the Benchmark Executor for efficient indexing into results @_documentation(visibility: internal) @@ -307,44 +327,50 @@ public extension BenchmarkMetric { case 8: return .peakMemoryVirtual case 9: - return .mallocCountTotal + return .mallocCountSmall case 10: - return .freeCountTotal + return .mallocCountLarge case 11: - return .mallocBytesCount + return .mallocCountTotal case 12: - return .allocatedResidentMemory + return .freeCountTotal case 13: - return .memoryLeaked + return .mallocBytesCount case 14: - return .syscalls + return .allocatedResidentMemory case 15: - return .contextSwitches + return .memoryLeaked case 16: - return .threads + return .memoryLeakedBytes case 17: - return .threadsRunning + return .syscalls case 18: - return .readSyscalls + return .contextSwitches case 19: - return .writeSyscalls + return .threads case 20: - return .readBytesLogical + return .threadsRunning case 21: - return .writeBytesLogical + return .readSyscalls case 22: - return .readBytesPhysical + return .writeSyscalls case 23: - return .writeBytesPhysical + return .readBytesLogical case 24: - return .objectAllocCount + return .writeBytesLogical case 25: - return .retainCount + return .readBytesPhysical case 26: - return .releaseCount + return .writeBytesPhysical case 27: - return .retainReleaseDelta + return .objectAllocCount case 28: + return .retainCount + case 29: + return .releaseCount + case 30: + return .retainReleaseDelta + case 31: return .instructions default: break @@ -373,14 +399,22 @@ public extension BenchmarkMetric { return "peakMemoryResidentDelta" case .peakMemoryVirtual: return "peakMemoryVirtual" + case .mallocCountSmall: + return "mallocCountSmall" + case .mallocCountLarge: + return "mallocCountLarge" case .mallocCountTotal: return "mallocCountTotal" + case .freeCountTotal: + return "freeCountTotal" case .mallocBytesCount: return "mallocBytesCount" case .allocatedResidentMemory: return "allocatedResidentMemory" case .memoryLeaked: return "memoryLeaked" + case .memoryLeakedBytes: + return "memoryLeakedBytes" case .syscalls: return "syscalls" case .contextSwitches: @@ -417,8 +451,6 @@ public extension BenchmarkMetric { return "Δ %" case let .custom(name, _, _): return name - case .freeCountTotal: - return "freeCountTotal" } } } @@ -445,14 +477,22 @@ public extension BenchmarkMetric { self = BenchmarkMetric.peakMemoryResidentDelta case "peakMemoryVirtual": self = BenchmarkMetric.peakMemoryVirtual + case "mallocCountSmall": + self = BenchmarkMetric.mallocCountSmall + case "mallocCountLarge": + self = BenchmarkMetric.mallocCountLarge case "mallocCountTotal": self = BenchmarkMetric.mallocCountTotal + case "freeCountTotal": + self = BenchmarkMetric.freeCountTotal case "mallocBytesCount": self = BenchmarkMetric.mallocBytesCount case "allocatedResidentMemory": self = BenchmarkMetric.allocatedResidentMemory case "memoryLeaked": self = BenchmarkMetric.memoryLeaked + case "memoryLeakedBytes": + self = BenchmarkMetric.memoryLeakedBytes case "syscalls": self = BenchmarkMetric.syscalls case "contextSwitches": @@ -483,8 +523,6 @@ public extension BenchmarkMetric { self = BenchmarkMetric.releaseCount case "retainReleaseDelta": self = BenchmarkMetric.retainReleaseDelta - case "freeCountTotal": - self = BenchmarkMetric.freeCountTotal default: self = BenchmarkMetric.custom(argument) } From 87e6ec1c772639bb9a370f7eca54bbec1448b246 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 2 Sep 2025 14:59:58 +0200 Subject: [PATCH 15/37] fixes for malloc interposer --- .../MallocInterposerC/include/interposer.h | 1 + .../Sources/MallocInterposerSwift/.swift | 1 - .../MallocInterposerSwift.swift | 46 ++++++++++++++++--- .../SwiftTestClient/SwiftTestClient.swift | 36 +++++++++++++-- .../Benchmark/BenchmarkMetric+Defaults.swift | 1 + Sources/Benchmark/BenchmarkRunner.swift | 2 + 6 files changed, 75 insertions(+), 12 deletions(-) delete mode 100644 LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h index a3e91660..f4d4118f 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h @@ -40,6 +40,7 @@ void set_malloc_hook(malloc_hook_t hook); void set_free_hook(free_hook_t hook); void set_calloc_hook(calloc_hook_t hook); void set_realloc_hook(realloc_hook_t hook); +void set_posix_memalign_hook(posix_memalign_hook_t hook); #if __APPLE__ void set_malloc_zone_hook(malloc_zone_hook_t hook); diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift deleted file mode 100644 index 8b137891..00000000 --- a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/.swift +++ /dev/null @@ -1 +0,0 @@ - diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift index cfbb64ca..2396589e 100644 --- a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift +++ b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift @@ -20,6 +20,7 @@ public typealias MallocHook = @convention(c) (Int) -> Void public typealias FreeHook = @convention(c) (UnsafeMutableRawPointer?) -> Void public typealias CallocHook = @convention(c) (Int, Int) -> Void public typealias ReallocHook = @convention(c) (UnsafeMutableRawPointer?, Int) -> Void +public typealias PosixMemalignHook = @convention(c) (UnsafeMutablePointer?, Int, Int) -> Void #if canImport(Darwin) public typealias MallocZoneHook = @convention(c) (UnsafeMutablePointer?, Int) -> Void @@ -33,15 +34,27 @@ public typealias MallocZoneMemalignHook = @convention(c) (UnsafeMutablePointer.create(0) - static let mallocBytesCount = UnsafeAtomic.create(0) - static let freeCount = UnsafeAtomic.create(0) - static let freeBytesCount = UnsafeAtomic.create(0) - static let mallocSmallCount = UnsafeAtomic.create(0) - static let mallocLargeCount = UnsafeAtomic.create(0) + nonisolated(unsafe) private static var mallocCount: ManagedAtomic! + nonisolated(unsafe) private static var mallocBytesCount: ManagedAtomic! + nonisolated(unsafe) private static var freeCount: ManagedAtomic! + nonisolated(unsafe) private static var freeBytesCount: ManagedAtomic! + nonisolated(unsafe) private static var mallocSmallCount: ManagedAtomic! + nonisolated(unsafe) private static var mallocLargeCount: ManagedAtomic! static let pageSize = getpagesize() private init() {} + + + // Initialize the atomic counters before hooking + // because ManagedAtomic calls into malloc + public static func initialize() { + mallocCount = ManagedAtomic(0) + mallocBytesCount = ManagedAtomic(0) + freeCount = ManagedAtomic(0) + freeBytesCount = ManagedAtomic(0) + mallocSmallCount = ManagedAtomic(0) + mallocLargeCount = ManagedAtomic(0) + } public static func hook() { @@ -90,6 +103,17 @@ public class MallocInterposerSwift: @unchecked Sendable { } } + let posixMemalignHook: PosixMemalignHook = { pointer, alignment, size in + MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) + MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) + + if size > MallocInterposerSwift.pageSize { + MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) + } else { + MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) + } + } + #if canImport(Darwin) let mallocZoneHook: MallocZoneHook = { zone, size in MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) @@ -159,6 +183,7 @@ public class MallocInterposerSwift: @unchecked Sendable { set_free_hook(freeHook) set_calloc_hook(callocHook) set_realloc_hook(reallocHook) + set_posix_memalign_hook(posixMemalignHook) } public static func unhook() { @@ -166,6 +191,7 @@ public class MallocInterposerSwift: @unchecked Sendable { set_free_hook(nil) set_calloc_hook(nil) set_realloc_hook(nil) + set_posix_memalign_hook(nil) #if canImport(Darwin) set_malloc_zone_hook(nil) @@ -181,13 +207,19 @@ public class MallocInterposerSwift: @unchecked Sendable { mallocCount.store(0, ordering: .relaxed) mallocBytesCount.store(0, ordering: .relaxed) freeCount.store(0, ordering: .relaxed) + freeBytesCount.store(0, ordering: .relaxed) + mallocSmallCount.store(0, ordering: .relaxed) + mallocLargeCount.store(0, ordering: .relaxed) } public static func getStatistics() -> Statistics { let stats = Statistics( mallocCount: mallocCount.load(ordering: .relaxed), mallocBytesCount: mallocBytesCount.load(ordering: .relaxed), - freeCount: freeCount.load(ordering: .relaxed) + mallocSmallCount: mallocSmallCount.load(ordering: .relaxed), + mallocLargeCount: mallocLargeCount.load(ordering: .relaxed), + freeCount: freeCount.load(ordering: .relaxed), + freeBytesCount: freeBytesCount.load(ordering: .relaxed) ) return stats diff --git a/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift b/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift index 475c03c5..823dfe9f 100644 --- a/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift +++ b/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift @@ -14,17 +14,42 @@ import MallocInterposerSwift @main enum TestClient { + + @_optimize(none) + static func blackHole(_ value: Any) { + + } + + static func performAllocations(count: Int, size: Int, shouldFree: Bool = true) { + var index = 0 + repeat { + let x = malloc(size) + if shouldFree { + free(x) + } + index += 1 + } while index < count + } + + @_optimize(none) static func main() { print("=== MallocInterposerSwift Test ===") // Reset statistics to start clean + MallocInterposerSwift.initialize() MallocInterposerSwift.hook() - let ptr = malloc(1000) - let ptr2 = malloc(500) +// let ptr = malloc(1000) +// let ptr2 = malloc(500) +// +// free(ptr) +// free(ptr2) + + // let x: UnsafeMutablePointer = UnsafeMutablePointer.allocate(capacity: 5000) + + performAllocations(count: 1, size: 11 * 1024 * 1024) + //performAllocations(count: 1, size: 32 * 1024 * 1024, shouldFree: false) - free(ptr) - free(ptr2) MallocInterposerSwift.unhook() @@ -32,8 +57,11 @@ enum TestClient { let stats = MallocInterposerSwift.getStatistics() print("Total malloc count: \(stats.mallocCount)") + print("Malloc small count: \(stats.mallocSmallCount)") + print("Malloc large count: \(stats.mallocLargeCount)") print("Total allocated memory: \(stats.mallocBytesCount) bytes") print("Total free count: \(stats.freeCount)") + print("Total freed memory: \(stats.freeBytesCount) bytes") print("\n--- Test complete ---") } diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift index c634b297..ece0725a 100644 --- a/Sources/Benchmark/BenchmarkMetric+Defaults.swift +++ b/Sources/Benchmark/BenchmarkMetric+Defaults.swift @@ -76,6 +76,7 @@ public extension BenchmarkMetric { .mallocCountLarge, .mallocCountTotal, .mallocBytesCount, + .freeCountTotal, .memoryLeaked, .memoryLeakedBytes, .allocatedResidentMemory, diff --git a/Sources/Benchmark/BenchmarkRunner.swift b/Sources/Benchmark/BenchmarkRunner.swift index 4946da62..255cc31d 100644 --- a/Sources/Benchmark/BenchmarkRunner.swift +++ b/Sources/Benchmark/BenchmarkRunner.swift @@ -10,6 +10,7 @@ import ArgumentParser import BenchmarkShared +import MallocInterposerSwift #if canImport(Darwin) import Darwin @@ -112,6 +113,7 @@ public struct BenchmarkRunner: AsyncParsableCommand, BenchmarkRunnerReadWrite { var debugIterator = Benchmark.benchmarks.makeIterator() var benchmarkCommand: BenchmarkCommandRequest + MallocInterposerSwift.initialize() let benchmarkExecutor = BenchmarkExecutor(quiet: quiet) var benchmark: Benchmark? var results: [BenchmarkResult] = [] From 59deb29806c2b4d970601a481bf7127d1f4ceac6 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 2 Sep 2025 15:50:31 +0200 Subject: [PATCH 16/37] fix malloc size linux --- .../Sources/MallocInterposerSwift/MallocInterposerSwift.swift | 3 +++ 1 file changed, 3 insertions(+) diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift index 2396589e..ab920df9 100644 --- a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift +++ b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift @@ -13,6 +13,9 @@ import Foundation import MallocInterposerC #if canImport(Glibc) import Glibc +// We need to expose malloc_usable_size manually since it's not exposed through Glibc +@_silgen_name("malloc_usable_size") +func malloc_usable_size(_ ptr: UnsafeMutableRawPointer?) -> Int #endif /// Swift-friendly hook types From c884d7b7629fb1a7decbf7a9027a39095f90bc62 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 11 Dec 2025 13:37:25 +0100 Subject: [PATCH 17/37] run swift-format --- .../Basic/BenchmarkRunner+Basic.swift | 2 +- .../Benchmarks/Histogram/Histogram.swift | 4 +- .../MallocInterposerSwift.swift | 2 +- .../SwiftTestClient/SwiftTestClient.swift | 14 +++--- .../BenchmarkBoilerplateGenerator.swift | 2 +- .../BenchmarkCommandPlugin.swift | 10 ++--- .../BenchmarkTool+Baselines.swift | 44 +++++++++---------- .../BenchmarkTool+CreateBenchmark.swift | 16 +++---- .../BenchmarkTool+Export+JMHFormatter.swift | 4 +- .../BenchmarkTool/BenchmarkTool+Machine.swift | 4 +- .../BenchmarkTool+Operations.swift | 14 +++--- .../BenchmarkTool+PrettyPrinting.swift | 4 +- ...chmarkTool+ReadP90AbsoluteThresholds.swift | 2 +- Plugins/BenchmarkTool/BenchmarkTool.swift | 12 ++--- .../BenchmarkTool/FilePath+Additions.swift | 2 +- .../Benchmark+ConvenienceInitializers.swift | 8 ++-- Sources/Benchmark/Benchmark.swift | 20 ++++----- Sources/Benchmark/BenchmarkClock.swift | 2 +- Sources/Benchmark/BenchmarkExecutor.swift | 18 ++++---- Sources/Benchmark/BenchmarkInternals.swift | 8 ++-- Sources/Benchmark/BenchmarkMetric.swift | 20 ++++----- Sources/Benchmark/BenchmarkResult.swift | 22 +++++----- Sources/Benchmark/BenchmarkRunner.swift | 4 +- Sources/Benchmark/Blackhole.swift | 4 +- .../Benchmark/MallocStats/MallocStats.swift | 2 +- .../OperatingSystemStatsProducer+Darwin.swift | 4 +- .../OperatingSystemStatsProducer+Linux.swift | 8 ++-- .../Benchmark/Progress/ProgressElements.swift | 2 +- Sources/Benchmark/Statistics.swift | 18 ++++---- .../BenchmarkTests/BenchmarkRunnerTests.swift | 2 +- 30 files changed, 138 insertions(+), 140 deletions(-) diff --git a/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift b/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift index 837c0cb1..3e7b4114 100644 --- a/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift +++ b/Benchmarks/Benchmarks/Basic/BenchmarkRunner+Basic.swift @@ -125,7 +125,7 @@ let benchmarks: @Sendable () -> Void = { } } - let parameterization = (0...5).map { 1 << $0 } // 1, 2, 4, ... + let parameterization = (0...5).map { 1 << $0 } // 1, 2, 4, ... parameterization.forEach { count in Benchmark("Parameterized", configuration: .init(tags: ["count": count.description])) { benchmark in diff --git a/Benchmarks/Benchmarks/Histogram/Histogram.swift b/Benchmarks/Benchmarks/Histogram/Histogram.swift index e41cc7f1..71fd3f26 100644 --- a/Benchmarks/Benchmarks/Histogram/Histogram.swift +++ b/Benchmarks/Benchmarks/Histogram/Histogram.swift @@ -35,7 +35,7 @@ let benchmarks: @Sendable () -> Void = { var histogram = Histogram(highestTrackableValue: maxValue, numberOfSignificantValueDigits: .three) - let numValues = 1_024 // so compiler can optimize modulo below + let numValues = 1_024 // so compiler can optimize modulo below let values = [UInt64]((0.. Void = { benchmark.startMeasurement() var histogram = Histogram(numberOfSignificantValueDigits: .three) - let numValues = 1_024 // so compiler can optimize modulo below + let numValues = 1_024 // so compiler can optimize modulo below let values = [UInt64]((0.. = UnsafeMutablePointer.allocate(capacity: 5000) + // let x: UnsafeMutablePointer = UnsafeMutablePointer.allocate(capacity: 5000) performAllocations(count: 1, size: 11 * 1024 * 1024) //performAllocations(count: 1, size: 32 * 1024 * 1024, shouldFree: false) - MallocInterposerSwift.unhook() // Print final statistics diff --git a/Plugins/BenchmarkBoilerplateGenerator/BenchmarkBoilerplateGenerator.swift b/Plugins/BenchmarkBoilerplateGenerator/BenchmarkBoilerplateGenerator.swift index 03bcffd8..2da0884d 100644 --- a/Plugins/BenchmarkBoilerplateGenerator/BenchmarkBoilerplateGenerator.swift +++ b/Plugins/BenchmarkBoilerplateGenerator/BenchmarkBoilerplateGenerator.swift @@ -20,7 +20,7 @@ struct Benchmark: AsyncParsableCommand { var output: String mutating func run() async throws { - let outputPath = FilePath(output) // package + let outputPath = FilePath(output) // package var boilerplate = """ import Benchmark diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift index 4ea934e0..9c2821cb 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift @@ -38,7 +38,7 @@ import Glibc let specifiedTargets = try argumentExtractor.extractSpecifiedTargets(in: context.package, withOption: "target") let skipTargets = try argumentExtractor.extractSpecifiedTargets(in: context.package, withOption: "skip-target") let outputFormats = argumentExtractor.extractOption(named: "format") - let pathSpecified = argumentExtractor.extractOption(named: "path") // export path + let pathSpecified = argumentExtractor.extractOption(named: "path") // export path let quietRunning = argumentExtractor.extractFlag(named: "quiet") let noProgress = argumentExtractor.extractFlag(named: "no-progress") let checkAbsoluteThresholdsPath = argumentExtractor.extractOption(named: "check-absolute-path") @@ -140,16 +140,16 @@ import Glibc ) throw MyError.invalidArgument } - } catch { // We will throw if we can use the target name (it's unused!) + } catch { // We will throw if we can use the target name (it's unused!) } } let swiftSourceModuleTargets: [SwiftSourceModuleTarget] - var shouldBuildTargets = true // We don't rebuild the targets when we dont need to execute them, e.g. baseline read/compare + var shouldBuildTargets = true // We don't rebuild the targets when we dont need to execute them, e.g. baseline read/compare let packageBenchmarkIdentifier = "package-benchmark" let benchmarkToolName = "BenchmarkTool" - let benchmarkTool: PackagePlugin.Path // = try context.tool(named: benchmarkToolName) + let benchmarkTool: PackagePlugin.Path // = try context.tool(named: benchmarkToolName) let interposerLib: String var args: [String] = [ @@ -435,7 +435,7 @@ import Glibc } let buildResult = try packageManager.build( - .product(target.name), // .all(includingTests: false), + .product(target.name), // .all(includingTests: false), parameters: .init(configuration: .release) ) diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift b/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift index 867a3914..9e97e2b1 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift @@ -33,8 +33,8 @@ struct BenchmarkMachine: Codable, Equatable { var hostname: String var processors: Int - var processorType: String // e.g. arm64e - var memory: Int // in GB + var processorType: String // e.g. arm64e + var memory: Int // in GB var kernelVersion: String public static func == (lhs: BenchmarkMachine, rhs: BenchmarkMachine) -> Bool { @@ -48,8 +48,8 @@ struct BenchmarkIdentifier: Codable, Hashable { self.name = name } - var target: String // The name of the executable benchmark target id - var name: String // The name of the benchmark + var target: String // The name of the executable benchmark target id + var name: String // The name of the benchmark public func hash(into hasher: inout Hasher) { hasher.combine(target) @@ -178,7 +178,7 @@ let baselinesDirectory: String = ".benchmarkBaselines" extension BenchmarkTool { func printAllBaselines() { var storagePath = FilePath(baselineStoragePath) - storagePath.append(baselinesDirectory) // package/.benchmarkBaselines + storagePath.append(baselinesDirectory) // package/.benchmarkBaselines for file in storagePath.directoryEntries { if file.ends(with: ".") == false, file.ends(with: "..") == false @@ -206,7 +206,7 @@ extension BenchmarkTool { var storagePath = FilePath(baselineStoragePath) let filemanager = FileManager.default - storagePath.append(baselinesDirectory) // package/.benchmarkBaselines + storagePath.append(baselinesDirectory) // package/.benchmarkBaselines for file in storagePath.directoryEntries { if file.ends(with: ".") == false, file.ends(with: "..") == false @@ -256,14 +256,14 @@ extension BenchmarkTool { /* We store the baselines in a .benchmarkBaselines directory, by default in the package root path unless otherwise specified. - + The 'default' folder is used when no specific named baseline have been specified with the command line. Specified 'named' baselines is useful for convenient A/B/C testing and comparisons. Unless a host identifier have been specified on the command line (or in an environment variable), we by default store results in 'results.json', otherwise we will use the environment variable or command line to optionally specify a 'hostIdentifier' that allow for separation between different hosts if checking in baselines in repos. - + .benchmarkBaselines ├── target1 │ ├── default @@ -284,14 +284,14 @@ extension BenchmarkTool { │ └── ... └── ... */ - var outputPath = FilePath(baselineStoragePath) // package - var subPath = FilePath() // subpath rooted in package used for directory creation + var outputPath = FilePath(baselineStoragePath) // package + var subPath = FilePath() // subpath rooted in package used for directory creation - subPath.append(baselinesDirectory) // package/.benchmarkBaselines - subPath.append("\(target)") // package/.benchmarkBaselines/myTarget1 - subPath.append(baselineName) // package/.benchmarkBaselines/myTarget1/named1 + subPath.append(baselinesDirectory) // package/.benchmarkBaselines + subPath.append("\(target)") // package/.benchmarkBaselines/myTarget1 + subPath.append(baselineName) // package/.benchmarkBaselines/myTarget1/named1 - outputPath.createSubPath(subPath) // Create destination subpath if needed + outputPath.createSubPath(subPath) // Create destination subpath if needed outputPath.append(subPath.components) @@ -348,13 +348,13 @@ extension BenchmarkTool { baselineIdentifier: String? = nil ) throws -> BenchmarkBaseline? { var path = FilePath(baselineStoragePath) - path.append(baselinesDirectory) // package/.benchmarkBaselines - path.append(FilePath.Component(target)!) // package/.benchmarkBaselines/myTarget1 + path.append(baselinesDirectory) // package/.benchmarkBaselines + path.append(FilePath.Component(target)!) // package/.benchmarkBaselines/myTarget1 if let baselineIdentifier { - path.append(baselineIdentifier) // package/.benchmarkBaselines/myTarget1/named1 + path.append(baselineIdentifier) // package/.benchmarkBaselines/myTarget1/named1 } else { - path.append("default") // // package/.benchmarkBaselines/myTarget1/default + path.append("default") // // package/.benchmarkBaselines/myTarget1/default } if let hostIdentifier { @@ -376,7 +376,7 @@ extension BenchmarkTool { let bufferSize = 16 * 1_024 * 1_024 var done = false - while done == false { // readBytes.count < bufferLength { + while done == false { // readBytes.count < bufferLength { let nextBytes = try [UInt8](unsafeUninitializedCapacity: bufferSize) { buf, count in count = try fd.read(into: UnsafeMutableRawBufferPointer(buf)) if count == 0 { @@ -396,7 +396,7 @@ extension BenchmarkTool { print("Failed to close fd for \(path) after reading.") } } catch { - if errno != ENOENT { // file not found is ok, e.g. when no baselines exist + if errno != ENOENT { // file not found is ok, e.g. when no baselines exist print("Failed to open file \(path), errno = [\(errno)]") } } @@ -523,11 +523,11 @@ extension BenchmarkBaseline: Equatable { for (lhsBenchmarkIdentifier, lhsBenchmarkResults) in lhs.results { for lhsBenchmarkResult in lhsBenchmarkResults { - guard let rhsResults = rhs.results.first(where: { $0.key == lhsBenchmarkIdentifier }) else { // We couldn't find a result for one of the tests + guard let rhsResults = rhs.results.first(where: { $0.key == lhsBenchmarkIdentifier }) else { // We couldn't find a result for one of the tests return false } guard let rhsBenchmarkResult = rhsResults.value.first(where: { $0.metric == lhsBenchmarkResult.metric }) - else { // We couldn't find the specific metric + else { // We couldn't find the specific metric return false } if lhsBenchmarkResult != rhsBenchmarkResult { diff --git a/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift b/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift index e48f8f61..86c7c574 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+CreateBenchmark.swift @@ -44,9 +44,9 @@ extension BenchmarkTool { ] """ - var outputPath = FilePath(baselineStoragePath) // package - var subPath = FilePath() // subpath rooted in package used for directory creation - subPath.append("Package.swift") // package/Benchmarks/targetName + var outputPath = FilePath(baselineStoragePath) // package + var subPath = FilePath() // subpath rooted in package used for directory creation + subPath.append("Package.swift") // package/Benchmarks/targetName outputPath.append(subPath.components) print("Adding new executable target \(targetName) to \(outputPath.description)") @@ -110,13 +110,13 @@ extension BenchmarkTool { """ - var outputPath = FilePath(baselineStoragePath) // package - var subPath = FilePath() // subpath rooted in package used for directory creation + var outputPath = FilePath(baselineStoragePath) // package + var subPath = FilePath() // subpath rooted in package used for directory creation - subPath.append(benchmarksDirectory) // package/Benchmarks - subPath.append("\(targetName)") // package/Benchmarks/targetName + subPath.append(benchmarksDirectory) // package/Benchmarks + subPath.append("\(targetName)") // package/Benchmarks/targetName - outputPath.createSubPath(subPath) // Create destination subpath if needed + outputPath.createSubPath(subPath) // Create destination subpath if needed outputPath.append(subPath.components) outputPath.append("\(targetName).swift") diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift b/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift index 40d00caf..15d00576 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift @@ -56,7 +56,7 @@ extension JMHPrimaryMetric { if result.metric.countable { scoreUnit = result.metric == .throughput ? "# / s" : "#" } else { - scoreUnit = "μs" // result.timeUnits.description + scoreUnit = "μs" // result.timeUnits.description } rawData = [recordedValues] } @@ -66,7 +66,7 @@ extension BenchmarkTool { func convertToJMH(_ baseline: BenchmarkBaseline) throws -> String { var resultString = "" var jmhElements: [JMHElement] = [] - var secondaryMetrics: [String: JMHPrimaryMetric] = [:] // could move to OrderedDictionary for consistent output + var secondaryMetrics: [String: JMHPrimaryMetric] = [:] // could move to OrderedDictionary for consistent output baseline.targets.forEach { benchmarkTarget in diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift b/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift index 82418fee..08934f49 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+Machine.swift @@ -23,7 +23,7 @@ import Glibc extension BenchmarkTool { func benchmarkMachine() -> BenchmarkMachine { let processors = sysconf(Int32(_SC_NPROCESSORS_ONLN)) - let memory = sysconf(Int32(_SC_PHYS_PAGES)) / 1_024 * sysconf(Int32(_SC_PAGESIZE)) / (1_024 * 1_024) // avoid overflow + let memory = sysconf(Int32(_SC_PHYS_PAGES)) / 1_024 * sysconf(Int32(_SC_PAGESIZE)) / (1_024 * 1_024) // avoid overflow var uuname = utsname() _ = uname(&uuname) @@ -48,7 +48,7 @@ extension BenchmarkTool { String(cString: $0) } } - + let releaseSize = MemoryLayout.size(ofValue: uuname.release) let release = withUnsafePointer(to: &uuname.release) { $0.withMemoryRebound(to: UInt8.self, capacity: releaseSize) { diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift b/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift index 7f573afc..611eceb3 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift @@ -29,7 +29,7 @@ extension BenchmarkTool { let benchmarkReply = try read() switch benchmarkReply { - case let .list(benchmark): + case .list(let benchmark): benchmark.executablePath = benchmarkPath benchmark.target = FilePath(benchmarkPath).lastComponent!.description if metrics.isEmpty == false { @@ -38,7 +38,7 @@ extension BenchmarkTool { benchmarks.append(benchmark) case .end: break outerloop - case let .error(description): + case .error(let description): failBenchmark(description) break outerloop default: @@ -55,12 +55,12 @@ extension BenchmarkTool { let benchmarkReply = try read() switch benchmarkReply { - case let .result(benchmark: benchmark, results: results): + case .result(benchmark: let benchmark, results: let results): let filteredResults = results.filter { benchmark.configuration.metrics.contains($0.metric) } benchmarkResults[BenchmarkIdentifier(target: target, name: benchmark.name)] = filteredResults case .end: break outerloop - case let .error(description): + case .error(let description): failBenchmark(description, exitCode: .benchmarkJobFailed, "\(target)/\(benchmark.name)") benchmarkResults[BenchmarkIdentifier(target: target, name: benchmark.name)] = [] @@ -129,7 +129,7 @@ extension BenchmarkTool { return } - if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare + if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare currentBaseline.results.keys.forEach { baselineKey in if let benchmark: Benchmark = .init(baselineKey.name, closure: { _ in }) { benchmark.target = baselineKey.target @@ -282,7 +282,7 @@ extension BenchmarkTool { return } - if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare + if benchmarks.isEmpty { // if we read from baseline and didn't run them, we put in some fake entries for the compare currentBaseline.results.keys.forEach { baselineKey in if let benchmark: Benchmark = .init(baselineKey.name, closure: { _ in }) { benchmark.target = baselineKey.target @@ -302,7 +302,7 @@ extension BenchmarkTool { var p90Thresholds: [BenchmarkIdentifier: [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]] = [:] - if let benchmarkPath = checkAbsolutePath { // load statically defined thresholds for .p90 + if let benchmarkPath = checkAbsolutePath { // load statically defined thresholds for .p90 benchmarks.forEach { benchmark in if let thresholds = BenchmarkTool.makeBenchmarkThresholds( path: benchmarkPath, diff --git a/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift b/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift index 147b3683..a47ead05 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+PrettyPrinting.swift @@ -39,7 +39,7 @@ extension BenchmarkTool { } private func formatLargeNumber(_ value: Int) -> String { - if abs(value) >= 10_000_000 { // 8 digits or more + if abs(value) >= 10_000_000 { // 8 digits or more let doubleValue = Double(value) return String(format: "%.2e", doubleValue) } @@ -193,7 +193,7 @@ extension BenchmarkTool { func prettyPrint( _ baseline: BenchmarkBaseline, - header: String, // = "Benchmark results", + header: String, // = "Benchmark results", hostIdentifier _: String? = nil ) { guard quiet == false else { return } diff --git a/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift b/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift index 066dd554..5fad206b 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift @@ -89,7 +89,7 @@ extension BenchmarkTool { print("Failed to close fd for \(path) after reading.") } } catch { - if errno != ENOENT { // file not found is ok, e.g. no thresholds found, then silently return nil + if errno != ENOENT { // file not found is ok, e.g. no thresholds found, then silently return nil print("Failed to open file \(path), errno = [\(errno)] \(Errno(rawValue: errno).description)") } } diff --git a/Plugins/BenchmarkTool/BenchmarkTool.swift b/Plugins/BenchmarkTool/BenchmarkTool.swift index 6bb6233c..a2ecf655 100644 --- a/Plugins/BenchmarkTool/BenchmarkTool.swift +++ b/Plugins/BenchmarkTool/BenchmarkTool.swift @@ -28,7 +28,7 @@ enum BenchmarkOperation: String, ExpressibleByArgument { case thresholds case list case run - case query // query all benchmarks from target, used internally in tool + case query // query all benchmarks from target, used internally in tool case `init` } @@ -127,7 +127,7 @@ struct BenchmarkTool: AsyncParsableCommand { var outputFD: CInt = 0 var benchmarks: [Benchmark] = [] - var benchmarkBaselines: [BenchmarkBaseline] = [] // The baselines read from disk, merged + current run if needed + var benchmarkBaselines: [BenchmarkBaseline] = [] // The baselines read from disk, merged + current run if needed var comparisonBaseline: BenchmarkBaseline? var checkBaseline: BenchmarkBaseline? @@ -192,9 +192,9 @@ struct BenchmarkTool: AsyncParsableCommand { mutating func readBaselines() throws { func readBaseline(_ baselineName: String) throws -> BenchmarkBaseline? { // read all specified baselines - var readBaselines: [BenchmarkBaseline] = [] // The baselines read from disk + var readBaselines: [BenchmarkBaseline] = [] // The baselines read from disk - try targets.forEach { target in // read from all the targets (baselines are stored separately) + try targets.forEach { target in // read from all the targets (baselines are stored separately) let currentBaseline = try read(target: target, baselineIdentifier: baselineName) if let currentBaseline { @@ -214,7 +214,7 @@ struct BenchmarkTool: AsyncParsableCommand { return nil } - try baseline.forEach { baselineName in // for all specified baselines at command line + try baseline.forEach { baselineName in // for all specified baselines at command line if let baseline = try readBaseline(baselineName) { benchmarkBaselines.append(baseline) } else { @@ -401,7 +401,7 @@ struct BenchmarkTool: AsyncParsableCommand { case .`init`: fatalError("Should never come here") case .query: - try queryBenchmarks(benchmarkPath) // Get all available benchmarks first + try queryBenchmarks(benchmarkPath) // Get all available benchmarks first case .list: try listBenchmarks() case .baseline, .thresholds, .run: diff --git a/Plugins/BenchmarkTool/FilePath+Additions.swift b/Plugins/BenchmarkTool/FilePath+Additions.swift index 228bc7a6..7659e7cc 100644 --- a/Plugins/BenchmarkTool/FilePath+Additions.swift +++ b/Plugins/BenchmarkTool/FilePath+Additions.swift @@ -38,7 +38,7 @@ public extension FilePath { } catch { print("failed close directory") } } catch { switch errno { - case ENOENT: // doesn't exist, let's create it + case ENOENT: // doesn't exist, let's create it if mkdir(creationPath.string, S_IRWXU | S_IRWXG | S_IRWXO) == -1 { if errno == EPERM { print("Lacking permissions to write to \(creationPath)") diff --git a/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift b/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift index 9921c169..e40eb651 100644 --- a/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift +++ b/Sources/Benchmark/Benchmark+ConvenienceInitializers.swift @@ -17,7 +17,7 @@ public extension Benchmark { teardown: BenchmarkTeardownHook? = nil ) { self.init(name, configuration: configuration) { benchmark in - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast closure(benchmark, setupResult) } teardown: { try await teardown?() @@ -46,7 +46,7 @@ public extension Benchmark { teardown: BenchmarkTeardownHook? = nil ) { self.init(name, configuration: configuration) { benchmark in - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast await closure(benchmark, setupResult) } teardown: { try await teardown?() @@ -79,7 +79,7 @@ public extension Benchmark { configuration: configuration, closure: { benchmark in do { - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast try closure(benchmark, setupResult) } catch { benchmark.error("Benchmark \(name) failed with \(String(reflecting: error))") @@ -115,7 +115,7 @@ public extension Benchmark { configuration: configuration, closure: { benchmark in do { - let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast + let setupResult = benchmark.setupState! as! SetupResult // swiftlint:disable:this force_cast try await closure(benchmark, setupResult) } catch { benchmark.error("Benchmark \(name) failed with \(String(reflecting: error))") diff --git a/Sources/Benchmark/Benchmark.swift b/Sources/Benchmark/Benchmark.swift index c0bc7f44..44e811f2 100644 --- a/Sources/Benchmark/Benchmark.swift +++ b/Sources/Benchmark/Benchmark.swift @@ -14,7 +14,7 @@ import Foundation // swiftlint:disable file_length identifier_name /// Defines a benchmark -public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type_body_length +public final class Benchmark: Codable, Hashable { // swiftlint:disable:this type_body_length @_documentation(visibility: internal) public typealias BenchmarkClosure = (_ benchmark: Benchmark) -> Void @_documentation(visibility: internal) @@ -36,11 +36,11 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this typ @_documentation(visibility: internal) @ThreadSafeProperty(wrappedValue: nil, lock: setupTeardownLock) - public static var _startupHook: BenchmarkSetupHook? // Should be removed when going to 2.0, just kept for API compatiblity + public static var _startupHook: BenchmarkSetupHook? // Should be removed when going to 2.0, just kept for API compatiblity @_documentation(visibility: internal) @ThreadSafeProperty(wrappedValue: nil, lock: setupTeardownLock) - public static var _shutdownHook: BenchmarkTeardownHook? // Should be removed when going to 2.0, just kept for API compatiblity + public static var _shutdownHook: BenchmarkTeardownHook? // Should be removed when going to 2.0, just kept for API compatiblity @_documentation(visibility: internal) @ThreadSafeProperty(wrappedValue: nil, lock: setupTeardownLock) @@ -111,7 +111,7 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this typ public static var checkAbsoluteThresholds = false @_documentation(visibility: internal) - public static var benchmarks: [Benchmark] = [] // Bookkeeping of all registered benchmarks + public static var benchmarks: [Benchmark] = [] // Bookkeeping of all registered benchmarks /// The name of the benchmark without any of the tags appended public var baseName: String @@ -150,9 +150,9 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this typ @_documentation(visibility: internal) public var executablePath: String? /// closure: The actual benchmark closure that will be measured - var closure: BenchmarkClosure? // The actual benchmark to run + var closure: BenchmarkClosure? // The actual benchmark to run /// asyncClosure: The actual benchmark (async) closure that will be measured - var asyncClosure: BenchmarkAsyncClosure? // The actual benchmark to run + var asyncClosure: BenchmarkAsyncClosure? // The actual benchmark to run // setup/teardown hooks for the instance var setup: BenchmarkSetupHook? var teardown: BenchmarkTeardownHook? @@ -205,8 +205,8 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this typ } #endif - static var testSkipBenchmarkRegistrations = false // true in test to avoid bench registration fail - var measurementCompleted = false // Keep track so we skip multiple 'end of measurement' + static var testSkipBenchmarkRegistrations = false // true in test to avoid bench registration fail + var measurementCompleted = false // Keep track so we skip multiple 'end of measurement' enum CodingKeys: String, CodingKey { case baseName = "name" @@ -406,7 +406,7 @@ public final class Benchmark: Codable, Hashable { // swiftlint:disable:this typ } private func _stopMeasurement(_ explicitStartStop: Bool) { - guard measurementCompleted == false else { // This is to skip the implicit stop if we did an explicit before + guard measurementCompleted == false else { // This is to skip the implicit stop if we did an explicit before return } @@ -560,7 +560,7 @@ public extension Benchmark { /// } /// } /// ``` - @_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf + @_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf static func blackHole(_: some Any) {} } diff --git a/Sources/Benchmark/BenchmarkClock.swift b/Sources/Benchmark/BenchmarkClock.swift index fa3e7604..7f44c3b5 100644 --- a/Sources/Benchmark/BenchmarkClock.swift +++ b/Sources/Benchmark/BenchmarkClock.swift @@ -78,7 +78,7 @@ extension BenchmarkClock: Clock { /// The current continuous instant. public static var now: BenchmarkClock.Instant { #if canImport(Darwin) - let nanos = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) // to get ns resolution on macOS + let nanos = clock_gettime_nsec_np(CLOCK_UPTIME_RAW) // to get ns resolution on macOS let seconds: UInt64 = nanos / 1_000_000_000 let attoseconds: UInt64 = (nanos % 1_000_000_000) * 1_000_000_000 diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index 4793250a..04f30398 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -16,7 +16,7 @@ import OSLog // swiftlint:disable file_length -struct BenchmarkExecutor { // swiftlint:disable:this type_body_length +struct BenchmarkExecutor { // swiftlint:disable:this type_body_length init(quiet: Bool = false) { self.quiet = quiet } @@ -128,7 +128,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length for _ in 0.. .zero { // macOS sometimes gives us identical timestamps so let's skip those. + if runningTime > .zero { // macOS sometimes gives us identical timestamps so let's skip those. let nanoSeconds = runningTime.nanoseconds() statistics[BenchmarkMetric.wallClock.index].add(Int(nanoSeconds)) @@ -227,10 +227,10 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length let objectAllocDelta = stopARCStats.objectAllocCount - startARCStats.objectAllocCount statistics[BenchmarkMetric.objectAllocCount.index].add(Int(objectAllocDelta)) - let retainDelta = stopARCStats.retainCount - startARCStats.retainCount - 1 // due to some ARC traffic in the path + let retainDelta = stopARCStats.retainCount - startARCStats.retainCount - 1 // due to some ARC traffic in the path statistics[BenchmarkMetric.retainCount.index].add(Int(retainDelta)) - let releaseDelta = stopARCStats.releaseCount - startARCStats.releaseCount - 1 // due to some ARC traffic in the path + let releaseDelta = stopARCStats.releaseCount - startARCStats.releaseCount - 1 // due to some ARC traffic in the path statistics[BenchmarkMetric.releaseCount.index].add(Int(releaseDelta)) statistics[BenchmarkMetric.retainReleaseDelta.index] @@ -353,7 +353,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length || benchmark.configuration.metrics.contains(.peakMemoryResidentDelta) || benchmark.configuration.metrics.contains(.peakMemoryVirtual) { - operatingSystemStatsProducer.startSampling(5_000) // ~5 ms + operatingSystemStatsProducer.startSampling(5_000) // ~5 ms if benchmark.configuration.metrics.contains(.peakMemoryResidentDelta) { baselinePeakMemoryResidentDelta = @@ -411,7 +411,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length iterations += 1 - if iterations < 1_000 || iterations.isMultiple(of: 500) { // only update for low iteration count benchmarks, else 1/500 + if iterations < 1_000 || iterations.isMultiple(of: 500) { // only update for low iteration count benchmarks, else 1/500 if var progressBar { let iterationsPercentage = 100.0 * Double(iterations) / Double(benchmark.configuration.maxIterations) diff --git a/Sources/Benchmark/BenchmarkInternals.swift b/Sources/Benchmark/BenchmarkInternals.swift index e942c577..1c624116 100644 --- a/Sources/Benchmark/BenchmarkInternals.swift +++ b/Sources/Benchmark/BenchmarkInternals.swift @@ -17,7 +17,7 @@ public enum BenchmarkCommandRequest: Codable { case list case run(benchmark: Benchmark) - case end // exit the benchmark + case end // exit the benchmark } // Replies from benchmark under measure to benchmark runner @@ -25,10 +25,10 @@ public enum BenchmarkCommandRequest: Codable { public enum BenchmarkCommandReply: Codable { case list(benchmark: Benchmark) case ready - case result(benchmark: Benchmark, results: [BenchmarkResult]) // receives results from built-in metric collectors + case result(benchmark: Benchmark, results: [BenchmarkResult]) // receives results from built-in metric collectors case run - case end // end of query for list/result - case error(_ description: String) // error while performing operation (e.g. 'run') + case end // end of query for list/result + case error(_ description: String) // error while performing operation (e.g. 'run') } // swiftlint:enable all diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index 8c605bac..0672eb05 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -104,7 +104,7 @@ public extension BenchmarkMetric { public extension BenchmarkMetric { /// A constant that states whether larger or smaller measurements, relative to a set baseline, indicate better performance. - enum Polarity: Codable, Sendable { // same naming as XCTest uses, polarity is known for all metrics except custom + enum Polarity: Codable, Sendable { // same naming as XCTest uses, polarity is known for all metrics except custom /// A performance measurement where a larger value, relative to a set baseline, indicates better performance. case prefersLarger /// A performance measurement where a smaller value, relative to a set baseline, indicates better performance. @@ -140,7 +140,7 @@ public extension BenchmarkMetric { return true case .objectAllocCount, .retainCount, .releaseCount, .retainReleaseDelta: return true - case let .custom(_, _, useScaleFactor): + case .custom(_, _, let useScaleFactor): return useScaleFactor default: return false @@ -152,7 +152,7 @@ public extension BenchmarkMetric { switch self { case .throughput: return .prefersLarger - case let .custom(_, polarity, _): + case .custom(_, let polarity, _): return polarity default: return .prefersSmaller @@ -225,7 +225,7 @@ public extension BenchmarkMetric { return "Δ" case .deltaPercentage: return "Δ %" - case let .custom(name, _, _): + case .custom(let name, _, _): return name case .freeCountTotal: return "Free (total)" @@ -250,7 +250,7 @@ public extension BenchmarkMetric { return 6 case .peakMemoryResidentDelta: return 7 - case .peakMemoryVirtual: + case .peakMemoryVirtual: return 8 case .mallocCountSmall: return 9 @@ -299,16 +299,16 @@ public extension BenchmarkMetric { case .instructions: return 31 default: - return 0 // custom payloads must be stored in dictionary + return 0 // custom payloads must be stored in dictionary } } @_documentation(visibility: internal) - static var maxIndex: Int { 31 } // + static var maxIndex: Int { 31 } // // Used by the Benchmark Executor for efficient indexing into results @_documentation(visibility: internal) - func metricFor(index: Int) -> BenchmarkMetric { // swiftlint:disable:this cyclomatic_complexity function_body_length + func metricFor(index: Int) -> BenchmarkMetric { // swiftlint:disable:this cyclomatic_complexity function_body_length switch index { case 1: return .cpuUser @@ -381,7 +381,7 @@ public extension BenchmarkMetric { @_documentation(visibility: internal) public extension BenchmarkMetric { - var rawDescription: String { // As we can't have raw values due to custom support, we do this... + var rawDescription: String { // As we can't have raw values due to custom support, we do this... switch self { case .cpuUser: return "cpuUser" @@ -449,7 +449,7 @@ public extension BenchmarkMetric { return "Δ" case .deltaPercentage: return "Δ %" - case let .custom(name, _, _): + case .custom(let name, _, _): return name } } diff --git a/Sources/Benchmark/BenchmarkResult.swift b/Sources/Benchmark/BenchmarkResult.swift index 276f28aa..80350f2c 100644 --- a/Sources/Benchmark/BenchmarkResult.swift +++ b/Sources/Benchmark/BenchmarkResult.swift @@ -31,7 +31,7 @@ public enum BenchmarkTimeUnits: String, Codable, CustomStringConvertible, CaseIt case seconds case kiloseconds case megaseconds - case automatic // will pick time unit above automatically + case automatic // will pick time unit above automatically public var factor: Int { switch self { case .nanoseconds: @@ -43,7 +43,7 @@ public enum BenchmarkTimeUnits: String, Codable, CustomStringConvertible, CaseIt case .seconds: return 1 case .kiloseconds: - return 2 // Yeah, not right but we need to refactor to get rid of this, works for now + return 2 // Yeah, not right but we need to refactor to get rid of this, works for now case .megaseconds: return 3 case .automatic: @@ -98,7 +98,7 @@ public enum BenchmarkUnits: Int, Codable, CustomStringConvertible, CaseIterable case giga = 1_000_000_000 case tera = 1_000_000_000_000 case peta = 1_000_000_000_000_000 - case automatic // will pick unit above automatically + case automatic // will pick unit above automatically public var description: String { switch self { @@ -169,17 +169,17 @@ public extension BenchmarkTimeUnits { /// Use a scaling factor when running your short benchmarks to provide greater numerical stability to the results. public enum BenchmarkScalingFactor: Int, Codable { /// No scaling factor, the raw count of iterations. - case one = 1 // e.g. nanoseconds, or count + case one = 1 // e.g. nanoseconds, or count /// Scaling factor of 1e03. - case kilo = 1_000 // microseconds + case kilo = 1_000 // microseconds /// Scaling factor of 1e06. - case mega = 1_000_000 // milliseconds + case mega = 1_000_000 // milliseconds /// Scaling factor of 1e09. - case giga = 1_000_000_000 // seconds + case giga = 1_000_000_000 // seconds /// Scaling factor of 1e12. - case tera = 1_000_000_000_000 // 1K seconds + case tera = 1_000_000_000_000 // 1K seconds /// Scaling factor of 1e15. - case peta = 1_000_000_000_000_000 // 1M + case peta = 1_000_000_000_000_000 // 1M public var description: String { switch self { @@ -279,7 +279,7 @@ public struct BenchmarkResult: Codable, Comparable, Equatable { return .microseconds case .giga: return .nanoseconds - case .tera, .peta: // shouldn't be possible as tera is only used internally to present scaled up throughput + case .tera, .peta: // shouldn't be possible as tera is only used internally to present scaled up throughput break } default: @@ -312,7 +312,7 @@ public struct BenchmarkResult: Codable, Comparable, Equatable { return y * x } else if n.isMultiple(of: 2) { return expBySq(y, x * x, n / 2) - } else { // n is odd + } else { // n is odd return expBySq(y * x, x * x, (n - 1) / 2) } } diff --git a/Sources/Benchmark/BenchmarkRunner.swift b/Sources/Benchmark/BenchmarkRunner.swift index 255cc31d..3a14f343 100644 --- a/Sources/Benchmark/BenchmarkRunner.swift +++ b/Sources/Benchmark/BenchmarkRunner.swift @@ -121,7 +121,7 @@ public struct BenchmarkRunner: AsyncParsableCommand, BenchmarkRunnerReadWrite { let suppressor = OutputSuppressor() while true { - if debug { // in debug mode we run all benchmarks matching filter/skip specified + if debug { // in debug mode we run all benchmarks matching filter/skip specified var benchmark: Benchmark? benchmarkCommand = .list @@ -149,7 +149,7 @@ public struct BenchmarkRunner: AsyncParsableCommand, BenchmarkRunnerReadWrite { } try channel.write(.end) - case let .run(benchmarkToRun): + case .run(let benchmarkToRun): benchmark = Benchmark.benchmarks.first { $0.name == benchmarkToRun.name } if let benchmark { diff --git a/Sources/Benchmark/Blackhole.swift b/Sources/Benchmark/Blackhole.swift index 37642626..bd6bae62 100644 --- a/Sources/Benchmark/Blackhole.swift +++ b/Sources/Benchmark/Blackhole.swift @@ -29,10 +29,10 @@ /// } /// } /// ``` -@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf +@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf public func blackHole(_: some Any) {} -@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf +@_optimize(none) // Used after tip here: https://forums.swift.org/t/compiler-swallows-blackhole/64305/10 - see also https://github.com/apple/swift/commit/1fceeab71e79dc96f1b6f560bf745b016d7fcdcf public func identity(_ value: T) -> T { value } diff --git a/Sources/Benchmark/MallocStats/MallocStats.swift b/Sources/Benchmark/MallocStats/MallocStats.swift index 14e1c10e..2056d413 100644 --- a/Sources/Benchmark/MallocStats/MallocStats.swift +++ b/Sources/Benchmark/MallocStats/MallocStats.swift @@ -23,5 +23,5 @@ struct MallocStats { /// , and unused dirty pages. This is a maximum rather than precise because pages may /// not actually be physically resident if they correspond to demand-zeroed virtual memory /// that has not yet been touched. This is a multiple of the page size. - var allocatedResidentMemory: Int = 0 // in bytes + var allocatedResidentMemory: Int = 0 // in bytes } diff --git a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift index bfc478dc..7e5f6c90 100644 --- a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift +++ b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Darwin.swift @@ -94,7 +94,7 @@ final class OperatingSystemStatsProducer { } #endif - func startSampling(_: Int = 10_000) { // sample rate in microseconds + func startSampling(_: Int = 10_000) { // sample rate in microseconds #if os(macOS) let sampleSemaphore = DispatchSemaphore(value: 0) @@ -139,7 +139,7 @@ final class OperatingSystemStatsProducer { let quit = self.runState self.lock.unlock() - if firstEventSampled == false { // allow calling thread to continue when we have captured a sample + if firstEventSampled == false { // allow calling thread to continue when we have captured a sample firstEventSampled = true sampleSemaphore.signal() } diff --git a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift index b882f7ab..ee35d299 100644 --- a/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift +++ b/Sources/Benchmark/OperatingSystemStats/OperatingSystemStatsProducer+Linux.swift @@ -75,7 +75,7 @@ final class OperatingSystemStatsProducer { print("Failed to close fileDescriptor for \(path) after reading.") } } catch { - if errno != ENOENT { // file not found is ok, e.g. when no baselines exist + if errno != ENOENT { // file not found is ok, e.g. when no baselines exist print("Failed to open file \(path), errno = [\(errno)]") } } @@ -140,7 +140,7 @@ final class OperatingSystemStatsProducer { syscalls: 0, contextSwitches: 0, threads: threads, - threadsRunning: threadsRunning, // we can go dig in /proc/self/task/ later if want this + threadsRunning: threadsRunning, // we can go dig in /proc/self/task/ later if want this readSyscalls: Int(ioStats.readSyscalls), writeSyscalls: Int(ioStats.writeSyscalls), readBytesLogical: Int(ioStats.readBytesLogical), @@ -163,7 +163,7 @@ final class OperatingSystemStatsProducer { } } - func startSampling(_: Int = 10_000) { // sample rate in microseconds + func startSampling(_: Int = 10_000) { // sample rate in microseconds let sampleSemaphore = DispatchSemaphore(value: 0) DispatchQueue.global(qos: .userInitiated) @@ -205,7 +205,7 @@ final class OperatingSystemStatsProducer { self.lock.unlock() - if firstEventSampled == false { // allow calling thread to continue when we have captured a sample + if firstEventSampled == false { // allow calling thread to continue when we have captured a sample firstEventSampled = true sampleSemaphore.signal() } diff --git a/Sources/Benchmark/Progress/ProgressElements.swift b/Sources/Benchmark/Progress/ProgressElements.swift index 95920beb..26c4efa5 100644 --- a/Sources/Benchmark/Progress/ProgressElements.swift +++ b/Sources/Benchmark/Progress/ProgressElements.swift @@ -83,7 +83,7 @@ public struct ProgressPercent: ProgressElementType { while padded.count < 4 { padded = " " + padded } - return padded // "\(percentDone.format(decimalPlaces))%" + return padded // "\(percentDone.format(decimalPlaces))%" } } diff --git a/Sources/Benchmark/Statistics.swift b/Sources/Benchmark/Statistics.swift index 6cbe7fc0..49dec756 100644 --- a/Sources/Benchmark/Statistics.swift +++ b/Sources/Benchmark/Statistics.swift @@ -15,18 +15,18 @@ import Numerics // A type that provides distribution / percentile calculations of latency measurements @_documentation(visibility: internal) public final class Statistics: Codable { - public static let defaultMaximumMeasurement = 1_000_000_000 // 1 second in nanoseconds + public static let defaultMaximumMeasurement = 1_000_000_000 // 1 second in nanoseconds public static let defaultPercentilesToCalculate = [0.0, 25.0, 50.0, 75.0, 90.0, 99.0, 100.0] public static let defaultPercentilesToCalculateP90Index = 4 public enum Units: Int, Codable, CaseIterable { - case count = 1 // e.g. nanoseconds - case kilo = 1_000 // microseconds - case mega = 1_000_000 // milliseconds - case giga = 1_000_000_000 // seconds - case tera = 1_000_000_000_000 // 1K seconds - case peta = 1_000_000_000_000_000 // 1M seconds - case automatic = 0 // will pick time unit above automatically + case count = 1 // e.g. nanoseconds + case kilo = 1_000 // microseconds + case mega = 1_000_000 // milliseconds + case giga = 1_000_000_000 // seconds + case tera = 1_000_000_000_000 // 1K seconds + case peta = 1_000_000_000_000_000 // 1M seconds + case automatic = 0 // will pick time unit above automatically public var description: String { switch self { @@ -173,7 +173,7 @@ public final class Statistics: Codable { @inline(__always) public func add(_ measurement: Int) { guard measurement >= 0 else { - return // We sometimes got a <0 measurement, should run with fatalError and try to see how that could occur + return // We sometimes got a <0 measurement, should run with fatalError and try to see how that could occur // fatalError() } diff --git a/Tests/BenchmarkTests/BenchmarkRunnerTests.swift b/Tests/BenchmarkTests/BenchmarkRunnerTests.swift index a7724d65..31f17580 100644 --- a/Tests/BenchmarkTests/BenchmarkRunnerTests.swift +++ b/Tests/BenchmarkTests/BenchmarkRunnerTests.swift @@ -56,7 +56,7 @@ final class BenchmarkRunnerTests: XCTestCase, BenchmarkRunnerReadWrite { runner.quiet = false runner.timeUnits = .nanoseconds try await runner.run() - XCTAssertEqual(writeCount, 6) // 3 tests results + 3 end markers + XCTAssertEqual(writeCount, 6) // 3 tests results + 3 end markers } } From 46e72246908a0ec8e2c5c1152bb05b8f6f5432ea Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:01:05 +0100 Subject: [PATCH 18/37] improve malloc interposer performance --- Benchmarks/Package.resolved | 4 +- .../MallocInterposerC/include/interposer.h | 57 +-- .../MallocInterposerC/src/interposer-darwin.c | 227 +++++------ .../MallocInterposerC/src/interposer-unix.c | 135 +++---- .../MallocInterposerSwift/Package.resolved | 15 - .../MallocInterposerSwift/Package.swift | 2 - .../MallocInterposerSwift.swift | 226 ++--------- Package.resolved | 7 +- Package.swift | 6 +- Package@swift-6.2.swift | 135 +++++++ Sources/Benchmark/BenchmarkExecutor.swift | 33 ++ Sources/Benchmark/BenchmarkRunner.swift | 4 + .../MallocStats+jemalloc-support.swift | 367 ++++++++++++++++++ 13 files changed, 742 insertions(+), 476 deletions(-) delete mode 100644 LocalPackages/MallocInterposerSwift/Package.resolved create mode 100644 Package@swift-6.2.swift create mode 100644 Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved index b7658529..1c475535 100644 --- a/Benchmarks/Package.resolved +++ b/Benchmarks/Package.resolved @@ -6,8 +6,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/HdrHistogram/hdrhistogram-swift", "state" : { - "revision" : "a69fa24d7b70421870cafa86340ece900489e17e", - "version" : "0.1.2" + "revision" : "de0b9b8a27956b9bfc9b4dce7d1c38ad7c579f19", + "version" : "0.1.4" } }, { diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h index f4d4118f..92c9a7cf 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h @@ -11,6 +11,7 @@ #ifndef INTERPOSER_H #define INTERPOSER_H +#include #include #include #include @@ -18,54 +19,15 @@ # include #endif -// Hook function types -typedef void (*malloc_hook_t)(size_t size); -typedef void (*free_hook_t)(void* ptr); -typedef void (*calloc_hook_t)(size_t nmemb, size_t size); -typedef void (*realloc_hook_t)(void* ptr, size_t size); -typedef void (*valloc_hook_t)(size_t size); -typedef void (*posix_memalign_hook_t)(void **memptr, size_t alignment, size_t size); +// Enable/disable counting and reset/read stats +void malloc_interposer_enable(void); +void malloc_interposer_disable(void); +void malloc_interposer_reset(void); +void malloc_interposer_get_stats(int64_t *malloc_count, int64_t *malloc_bytes, + int64_t *malloc_small, int64_t *malloc_large, + int64_t *free_count, int64_t *free_bytes); -#if __APPLE__ -typedef void (*malloc_zone_hook_t)(malloc_zone_t *zone, size_t size); -typedef void (*malloc_zone_calloc_hook_t)(malloc_zone_t *zone, size_t num_items, size_t size); -typedef void (*malloc_zone_realloc_hook_t)(malloc_zone_t *zone, void *ptr, size_t size); -typedef void (*malloc_zone_memalign_hook_t)(malloc_zone_t *zone, size_t alignment, size_t size); -typedef void (*malloc_zone_valloc_hook_t)(malloc_zone_t *zone, size_t size); -typedef void (*malloc_zone_free_hook_t)(malloc_zone_t *zone, void *ptr); -#endif - -// Hook management functions -void set_malloc_hook(malloc_hook_t hook); -void set_free_hook(free_hook_t hook); -void set_calloc_hook(calloc_hook_t hook); -void set_realloc_hook(realloc_hook_t hook); -void set_posix_memalign_hook(posix_memalign_hook_t hook); - -#if __APPLE__ -void set_malloc_zone_hook(malloc_zone_hook_t hook); -void set_malloc_zone_calloc_hook(malloc_zone_calloc_hook_t hook); -void set_malloc_zone_realloc_hook(malloc_zone_realloc_hook_t hook); -void set_malloc_zone_memalign_hook(malloc_zone_memalign_hook_t hook); -void set_malloc_zone_valloc_hook(malloc_zone_valloc_hook_t hook); -void set_malloc_zone_free_hook(malloc_zone_free_hook_t hook); -#endif - -void clear_malloc_hook(void); -void clear_free_hook(void); -void clear_calloc_hook(void); -void clear_realloc_hook(void); - -#if __APPLE__ -void clear_malloc_zone_hook(void); -void clear_malloc_zone_calloc_hook(void); -void clear_malloc_zone_realloc_hook(void); -void clear_malloc_zone_memalign_hook(void); -void clear_malloc_zone_valloc_hook(void); -void clear_malloc_zone_free_hook(void); -#endif - -// Replacement functions +// Replacement functions (used internally for DYLD_INTERPOSE and Linux overrides) void *replacement_malloc(size_t size); void replacement_free(void *ptr); void *replacement_calloc(size_t nmemb, size_t size); @@ -86,7 +48,6 @@ void *valloc(size_t size); int posix_memalign(void **memptr, size_t alignment, size_t size); #endif - #if __APPLE__ void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size); void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size); diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c index 03ea9a14..e330e22a 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c @@ -26,181 +26,137 @@ #include #include #include -#include #include -// Global hooks -static malloc_hook_t g_malloc_hook = NULL; -static free_hook_t g_free_hook = NULL; -static calloc_hook_t g_calloc_hook = NULL; -static realloc_hook_t g_realloc_hook = NULL; -static valloc_hook_t g_valloc_hook = NULL; -static posix_memalign_hook_t g_posix_memalign_hook = NULL; -static malloc_zone_hook_t g_malloc_zone_hook = NULL; -static malloc_zone_realloc_hook_t g_malloc_zone_realloc_hook = NULL; -static malloc_zone_calloc_hook_t g_malloc_zone_calloc_hook = NULL; -static malloc_zone_valloc_hook_t g_malloc_zone_valloc_hook = NULL; -static malloc_zone_memalign_hook_t g_malloc_zone_memalign_hook = NULL; -static malloc_zone_free_hook_t g_malloc_zone_free_hook = NULL; - -// Statistics -static pthread_mutex_t hook_mutex = PTHREAD_MUTEX_INITIALIZER; - -#define DYLD_INTERPOSE(_replacement,_replacee) \ - __attribute__((used)) static struct { const void *replacement; const void *replacee; } _interpose_##_replacee \ - __attribute__ ((section("__DATA,__interpose"))) = { (const void *)(unsigned long)&_replacement, (const void *)(unsigned long)&_replacee }; - -/* on Darwin calling the original function is super easy, just call it, done. */ -#define JUMP_INTO_LIBC_FUN(_fun, ...) /* \ -*/ do { /* \ -*/ return _fun(__VA_ARGS__); /* \ -*/ } while(0) - -// Hook management functions -void set_malloc_hook(malloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +// Counting state — all updated on the malloc hot path, so use relaxed atomics. +static _Atomic bool g_counting_enabled = false; +static _Atomic int64_t g_malloc_count = 0; +static _Atomic int64_t g_malloc_bytes = 0; +static _Atomic int64_t g_malloc_small = 0; +static _Atomic int64_t g_malloc_large = 0; +static _Atomic int64_t g_free_count = 0; +static _Atomic int64_t g_free_bytes = 0; + +// Cached page size for small/large classification +static int g_page_size = 0; + +static int get_page_size(void) { + if (__builtin_expect(g_page_size == 0, 0)) { + g_page_size = (int)getpagesize(); + } + return g_page_size; } -void set_free_hook(free_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_free_hook = hook; - pthread_mutex_unlock(&hook_mutex); -} +// Public API ---------------------------------------------------------------- -void set_calloc_hook(calloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_calloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_enable(void) { + atomic_store_explicit(&g_counting_enabled, true, memory_order_release); } -void set_realloc_hook(realloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_realloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_disable(void) { + atomic_store_explicit(&g_counting_enabled, false, memory_order_release); } -void set_valloc_hook(valloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_valloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_reset(void) { + atomic_store_explicit(&g_malloc_count, 0, memory_order_relaxed); + atomic_store_explicit(&g_malloc_bytes, 0, memory_order_relaxed); + atomic_store_explicit(&g_malloc_small, 0, memory_order_relaxed); + atomic_store_explicit(&g_malloc_large, 0, memory_order_relaxed); + atomic_store_explicit(&g_free_count, 0, memory_order_relaxed); + atomic_store_explicit(&g_free_bytes, 0, memory_order_relaxed); + atomic_thread_fence(memory_order_release); } -void set_posix_memalign_hook(posix_memalign_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_posix_memalign_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_get_stats(int64_t *malloc_count, int64_t *malloc_bytes, + int64_t *malloc_small, int64_t *malloc_large, + int64_t *free_count, int64_t *free_bytes) { + *malloc_count = atomic_load_explicit(&g_malloc_count, memory_order_relaxed); + *malloc_bytes = atomic_load_explicit(&g_malloc_bytes, memory_order_relaxed); + *malloc_small = atomic_load_explicit(&g_malloc_small, memory_order_relaxed); + *malloc_large = atomic_load_explicit(&g_malloc_large, memory_order_relaxed); + *free_count = atomic_load_explicit(&g_free_count, memory_order_relaxed); + *free_bytes = atomic_load_explicit(&g_free_bytes, memory_order_relaxed); } -void set_malloc_zone_hook(malloc_zone_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_zone_hook = hook; - pthread_mutex_unlock(&hook_mutex); -} +// --------------------------------------------------------------------------- -void set_malloc_zone_realloc_hook(malloc_zone_realloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_zone_realloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); -} +#define DYLD_INTERPOSE(_replacement,_replacee) \ + __attribute__((used)) static struct { const void *replacement; const void *replacee; } _interpose_##_replacee \ + __attribute__ ((section("__DATA,__interpose"))) = { (const void *)(unsigned long)&_replacement, (const void *)(unsigned long)&_replacee }; -void set_malloc_zone_calloc_hook(malloc_zone_calloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_zone_calloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); -} +/* on Darwin calling the original function is super easy, just call it, done. */ +#define JUMP_INTO_LIBC_FUN(_fun, ...) /* \ +*/ do { /* \ +*/ return _fun(__VA_ARGS__); /* \ +*/ } while(0) -void set_malloc_zone_valloc_hook(malloc_zone_valloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_zone_valloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); -} +// Inline counting helpers --------------------------------------------------- -void set_malloc_zone_memalign_hook(malloc_zone_memalign_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_zone_memalign_hook = hook; - pthread_mutex_unlock(&hook_mutex); +static __attribute__((always_inline)) void count_malloc(size_t size) { + atomic_fetch_add_explicit(&g_malloc_count, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_malloc_bytes, (int64_t)size, memory_order_relaxed); + if ((int)size > get_page_size()) { + atomic_fetch_add_explicit(&g_malloc_large, 1, memory_order_relaxed); + } else { + atomic_fetch_add_explicit(&g_malloc_small, 1, memory_order_relaxed); + } } -void set_malloc_zone_free_hook(malloc_zone_free_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_zone_free_hook = hook; - pthread_mutex_unlock(&hook_mutex); +static __attribute__((always_inline)) void count_free(void *ptr) { + size_t size = malloc_size(ptr); + atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_free_bytes, (int64_t)size, memory_order_relaxed); } -// Clear hooks -void clear_malloc_hook(void) { set_malloc_hook(NULL); } -void clear_free_hook(void) { set_free_hook(NULL); } -void clear_calloc_hook(void) { set_calloc_hook(NULL); } -void clear_realloc_hook(void) { set_realloc_hook(NULL); } -void clear_valloc_hook(void) { set_valloc_hook(NULL); } -void clear_posix_memalign_hook(void) { set_posix_memalign_hook(NULL); } -void clear_malloc_zone_hook(void) { set_malloc_zone_hook(NULL); } -void clear_malloc_zone_realloc_hook(void) { set_malloc_zone_realloc_hook(NULL); } -void clear_malloc_zone_calloc_hook(void) { set_malloc_zone_calloc_hook(NULL); } -void clear_malloc_zone_valloc_hook(void) { set_malloc_zone_valloc_hook(NULL); } -void clear_malloc_zone_memalign_hook(void) { set_malloc_zone_memalign_hook(NULL); } -void clear_malloc_zone_free_hook(void) { set_malloc_zone_free_hook(NULL); } +// Replacement functions ----------------------------------------------------- -// Replacement functions void replacement_free(void *ptr) { - - // Call hook if set - if (g_free_hook) { - g_free_hook(ptr); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(ptr); } - JUMP_INTO_LIBC_FUN(free, ptr); } void *replacement_malloc(size_t size) { - - // Call hook if set - if (g_malloc_hook) { - g_malloc_hook(size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); } - - JUMP_INTO_LIBC_FUN(malloc, size); + JUMP_INTO_LIBC_FUN(malloc, size); } void *replacement_realloc(void *ptr, size_t size) { - if (g_realloc_hook) { - g_realloc_hook(ptr, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(ptr); + count_malloc(size); } - JUMP_INTO_LIBC_FUN(realloc, ptr, size); } void *replacement_calloc(size_t count, size_t size) { - if (g_calloc_hook) { - g_calloc_hook(count, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(count * size); } - JUMP_INTO_LIBC_FUN(calloc, count, size); } void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size) { - if (g_malloc_zone_hook) { - g_malloc_zone_hook(zone, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); } - JUMP_INTO_LIBC_FUN(malloc_zone_malloc, zone, size); } void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size) { - if (g_malloc_zone_calloc_hook) { - g_malloc_zone_calloc_hook(zone, num_items, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(num_items * size); } - JUMP_INTO_LIBC_FUN(malloc_zone_calloc, zone, num_items, size); } void *replacement_malloc_zone_valloc(malloc_zone_t *zone, size_t size) { - if (g_malloc_zone_valloc_hook) { - g_malloc_zone_valloc_hook(zone, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); } - JUMP_INTO_LIBC_FUN(malloc_zone_valloc, zone, size); } @@ -212,27 +168,24 @@ void *replacement_malloc_zone_realloc(malloc_zone_t *zone, void *ptr, size_t siz if (!ptr) { return replacement_malloc(size); } - - if (g_malloc_zone_realloc_hook) { - g_malloc_zone_realloc_hook(zone, ptr, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(ptr); + count_malloc(size); } - JUMP_INTO_LIBC_FUN(realloc, ptr, size); } void *replacement_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { - if (g_malloc_zone_memalign_hook) { - g_malloc_zone_memalign_hook(zone, alignment, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); } - JUMP_INTO_LIBC_FUN(malloc_zone_memalign, zone, alignment, size); } void replacement_malloc_zone_free(malloc_zone_t *zone, void *ptr) { - if (g_malloc_zone_free_hook) { - g_malloc_zone_free_hook(zone, ptr); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); } - JUMP_INTO_LIBC_FUN(malloc_zone_free, zone, ptr); } @@ -245,18 +198,16 @@ void *replacement_reallocf(void *ptr, size_t size) { } void *replacement_valloc(size_t size) { - if (g_valloc_hook) { - g_valloc_hook(size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); } - JUMP_INTO_LIBC_FUN(valloc, size); } int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { - if (g_posix_memalign_hook) { - g_posix_memalign_hook(memptr, alignment, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); } - JUMP_INTO_LIBC_FUN(posix_memalign, memptr, alignment, size); } diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c index 6e17ae55..08a23669 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -60,61 +61,57 @@ _Atomic type_libc_accept g_libc_accept; _Atomic type_libc_accept4 g_libc_accept4; _Atomic type_libc_close g_libc_close; -// Global hooks -static malloc_hook_t g_malloc_hook = NULL; -static free_hook_t g_free_hook = NULL; -static calloc_hook_t g_calloc_hook = NULL; -static realloc_hook_t g_realloc_hook = NULL; -static valloc_hook_t g_valloc_hook = NULL; -static posix_memalign_hook_t g_posix_memalign_hook = NULL; - -// Statistics -static pthread_mutex_t hook_mutex = PTHREAD_MUTEX_INITIALIZER; - -// Hook management functions -void set_malloc_hook(malloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_malloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +// Counting state — all updated on the malloc hot path, so use relaxed atomics. +static _Atomic bool g_counting_enabled = false; +static _Atomic int64_t g_malloc_count = 0; +static _Atomic int64_t g_malloc_bytes = 0; +static _Atomic int64_t g_malloc_small = 0; +static _Atomic int64_t g_malloc_large = 0; +static _Atomic int64_t g_free_count = 0; +static _Atomic int64_t g_free_bytes = 0; + +// Cached page size for small/large classification +static int g_page_size = 0; + +static int get_page_size(void) { + if (__builtin_expect(g_page_size == 0, 0)) { + g_page_size = (int)getpagesize(); + } + return g_page_size; } -void set_free_hook(free_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_free_hook = hook; - pthread_mutex_unlock(&hook_mutex); -} +// Public API ---------------------------------------------------------------- -void set_calloc_hook(calloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_calloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_enable(void) { + atomic_store_explicit(&g_counting_enabled, true, memory_order_release); } -void set_realloc_hook(realloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_realloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_disable(void) { + atomic_store_explicit(&g_counting_enabled, false, memory_order_release); } -void set_valloc_hook(valloc_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_valloc_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_reset(void) { + atomic_store_explicit(&g_malloc_count, 0, memory_order_relaxed); + atomic_store_explicit(&g_malloc_bytes, 0, memory_order_relaxed); + atomic_store_explicit(&g_malloc_small, 0, memory_order_relaxed); + atomic_store_explicit(&g_malloc_large, 0, memory_order_relaxed); + atomic_store_explicit(&g_free_count, 0, memory_order_relaxed); + atomic_store_explicit(&g_free_bytes, 0, memory_order_relaxed); + atomic_thread_fence(memory_order_release); } -void set_posix_memalign_hook(posix_memalign_hook_t hook) { - pthread_mutex_lock(&hook_mutex); - g_posix_memalign_hook = hook; - pthread_mutex_unlock(&hook_mutex); +void malloc_interposer_get_stats(int64_t *malloc_count, int64_t *malloc_bytes, + int64_t *malloc_small, int64_t *malloc_large, + int64_t *free_count, int64_t *free_bytes) { + *malloc_count = atomic_load_explicit(&g_malloc_count, memory_order_relaxed); + *malloc_bytes = atomic_load_explicit(&g_malloc_bytes, memory_order_relaxed); + *malloc_small = atomic_load_explicit(&g_malloc_small, memory_order_relaxed); + *malloc_large = atomic_load_explicit(&g_malloc_large, memory_order_relaxed); + *free_count = atomic_load_explicit(&g_free_count, memory_order_relaxed); + *free_bytes = atomic_load_explicit(&g_free_bytes, memory_order_relaxed); } -// Clear hooks -void clear_malloc_hook(void) { set_malloc_hook(NULL); } -void clear_free_hook(void) { set_free_hook(NULL); } -void clear_calloc_hook(void) { set_calloc_hook(NULL); } -void clear_realloc_hook(void) { set_realloc_hook(NULL); } -void clear_valloc_hook(void) { set_valloc_hook(NULL); } -void clear_posix_memalign_hook(void) { set_posix_memalign_hook(NULL); } +// --------------------------------------------------------------------------- // this is called if malloc is called whilst trying to resolve libc's realloc. // we just vend out pointers to a large block in the BSS (which we never free). @@ -219,13 +216,31 @@ static int recursive_close(int fildes) { */ return local_fun(__VA_ARGS__); /* \ */ } while(0) +// Inline counting helpers --------------------------------------------------- + +static __attribute__((always_inline)) void count_malloc(size_t size) { + atomic_fetch_add_explicit(&g_malloc_count, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_malloc_bytes, (int64_t)size, memory_order_relaxed); + if ((int)size > get_page_size()) { + atomic_fetch_add_explicit(&g_malloc_large, 1, memory_order_relaxed); + } else { + atomic_fetch_add_explicit(&g_malloc_small, 1, memory_order_relaxed); + } +} + +static __attribute__((always_inline)) void count_free(void *ptr) { + size_t size = malloc_usable_size(ptr); + atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); + atomic_fetch_add_explicit(&g_free_bytes, (int64_t)size, memory_order_relaxed); +} + +// Replacement functions ----------------------------------------------------- + void replacement_free(void *ptr) { if (ptr) { - - if (g_free_hook) { - g_free_hook(ptr); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(ptr); } - if (!is_recursive_malloc_block(ptr)) { JUMP_INTO_LIBC_FUN(free, ptr); } @@ -233,10 +248,9 @@ void replacement_free(void *ptr) { } void *replacement_malloc(size_t size) { - if (g_malloc_hook) { - g_malloc_hook(size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); } - JUMP_INTO_LIBC_FUN(malloc, size); } @@ -248,22 +262,16 @@ void *replacement_realloc(void *ptr, size_t size) { if (!ptr) { return replacement_malloc(size); } - - if (g_realloc_hook) { - g_realloc_hook(ptr, size); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(ptr); + count_malloc(size); } - JUMP_INTO_LIBC_FUN(realloc, ptr, size); } void *replacement_calloc(size_t count, size_t size) { void *ptr = replacement_malloc(count * size); memset(ptr, 0, count * size); - - if (g_calloc_hook) { - g_calloc_hook(count, size); - } - return ptr; } @@ -276,18 +284,11 @@ void *replacement_reallocf(void *ptr, size_t size) { } void *replacement_valloc(size_t size) { - if (g_valloc_hook) { - g_valloc_hook(size); - } // not aligning correctly (should be PAGE_SIZE) but good enough return replacement_malloc(size); } int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { - if (g_posix_memalign_hook) { - g_posix_memalign_hook(memptr, alignment, size); - } - // not aligning correctly (should be `alignment`) but good enough void *ptr = replacement_malloc(size); if (ptr && memptr) { diff --git a/LocalPackages/MallocInterposerSwift/Package.resolved b/LocalPackages/MallocInterposerSwift/Package.resolved deleted file mode 100644 index 30a41cda..00000000 --- a/LocalPackages/MallocInterposerSwift/Package.resolved +++ /dev/null @@ -1,15 +0,0 @@ -{ - "originHash" : "f9d52b4684b4f378f6711fa01082569f9206a98fc7e9e15cb2fc72bbeafb9737", - "pins" : [ - { - "identity" : "swift-atomics", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-atomics.git", - "state" : { - "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", - "version" : "1.3.0" - } - } - ], - "version" : 3 -} diff --git a/LocalPackages/MallocInterposerSwift/Package.swift b/LocalPackages/MallocInterposerSwift/Package.swift index bc7edc3d..dab0e337 100644 --- a/LocalPackages/MallocInterposerSwift/Package.swift +++ b/LocalPackages/MallocInterposerSwift/Package.swift @@ -14,7 +14,6 @@ let package = Package( ], dependencies: [ .package(path: "../MallocInterposerC"), - .package(url: "https://github.com/apple/swift-atomics.git", from: "1.3.0"), ], targets: [ // Targets are the basic building blocks of a package, defining a module or a test suite. @@ -23,7 +22,6 @@ let package = Package( name: "MallocInterposerSwift", dependencies: [ .product(name: "MallocInterposerC", package: "MallocInterposerC"), - .product(name: "Atomics", package: "swift-atomics"), ]), .executableTarget( name: "SwiftTestClient", diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift index 14bcb56a..3f0c5fc0 100644 --- a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift +++ b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift @@ -8,224 +8,52 @@ // http://www.apache.org/licenses/LICENSE-2.0 // -import Atomics -import Foundation import MallocInterposerC -#if canImport(Glibc) -import Glibc -// We need to expose malloc_usable_size manually since it's not exposed through Glibc -@_silgen_name("malloc_usable_size") -func malloc_usable_size(_ ptr: UnsafeMutableRawPointer?) -> Int -#endif - -/// Swift-friendly hook types -public typealias MallocHook = @convention(c) (Int) -> Void -public typealias FreeHook = @convention(c) (UnsafeMutableRawPointer?) -> Void -public typealias CallocHook = @convention(c) (Int, Int) -> Void -public typealias ReallocHook = @convention(c) (UnsafeMutableRawPointer?, Int) -> Void -public typealias PosixMemalignHook = @convention(c) (UnsafeMutablePointer?, Int, Int) -> Void - -#if canImport(Darwin) -public typealias MallocZoneHook = @convention(c) (UnsafeMutablePointer?, Int) -> Void -public typealias MallocZoneFreeHook = @convention(c) (UnsafeMutablePointer?, UnsafeMutableRawPointer?) -> Void -public typealias MallocZoneCallocHook = @convention(c) (UnsafeMutablePointer?, Int, Int) -> Void -public typealias MallocZoneReallocHook = @convention(c) (UnsafeMutablePointer?, UnsafeMutableRawPointer?, Int) -> Void -public typealias MallocZoneVallocHook = @convention(c) (UnsafeMutablePointer?, Int) -> Void -public typealias MallocZoneMemalignHook = @convention(c) (UnsafeMutablePointer?, Int, Int) -> Void -#endif - -/// Main class for managing malloc interposition +/// Main class for managing malloc interposition. +/// Counting is performed entirely in C using _Atomic int64_t globals, +/// so there is no Swift dispatch overhead on the malloc hot path. public class MallocInterposerSwift: @unchecked Sendable { - /// We use `UnsafeAtomic` in order to avoid malloc calls during interposition - nonisolated(unsafe) private static var mallocCount: ManagedAtomic! - nonisolated(unsafe) private static var mallocBytesCount: ManagedAtomic! - nonisolated(unsafe) private static var freeCount: ManagedAtomic! - nonisolated(unsafe) private static var freeBytesCount: ManagedAtomic! - nonisolated(unsafe) private static var mallocSmallCount: ManagedAtomic! - nonisolated(unsafe) private static var mallocLargeCount: ManagedAtomic! - static let pageSize = getpagesize() - private init() {} - // Initialize the atomic counters before hooking - // because ManagedAtomic calls into malloc + /// Call once at startup (before hook()) to initialize C-side state. public static func initialize() { - mallocCount = ManagedAtomic(0) - mallocBytesCount = ManagedAtomic(0) - freeCount = ManagedAtomic(0) - freeBytesCount = ManagedAtomic(0) - mallocSmallCount = ManagedAtomic(0) - mallocLargeCount = ManagedAtomic(0) + malloc_interposer_reset() } + /// Start counting allocations. public static func hook() { - - let mallocHook: MallocHook = { size in - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - - if size > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - - let freeHook: FreeHook = { pointer in - MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) - #if canImport(Darwin) - let size = malloc_size(pointer) - #else - let size = malloc_usable_size(pointer) - #endif - MallocInterposerSwift.freeBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - } - - let callocHook: CallocHook = { num, size in - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - let total = num * size - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: total, ordering: .relaxed) - - if total > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - - let reallocHook: ReallocHook = { pointer, size in - MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - - if size > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - - let posixMemalignHook: PosixMemalignHook = { pointer, alignment, size in - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - - if size > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - - #if canImport(Darwin) - let mallocZoneHook: MallocZoneHook = { zone, size in - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - - if size > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - let mallocZoneFreeHook: MallocZoneFreeHook = { zone, pointer in - MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) - } - let mallocZoneCallocHook: MallocZoneCallocHook = { zone, num, size in - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - let total = num * size - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: total, ordering: .relaxed) - - if total > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - let mallocZoneReallocHook: MallocZoneReallocHook = { zone, pointer, size in - MallocInterposerSwift.freeCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - - if size > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - let mallocZoneVallocHook: MallocZoneVallocHook = { zone, size in - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - - if size > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - let mallocZoneMemalignHook: MallocZoneMemalignHook = { zone, alignment, size in - MallocInterposerSwift.mallocCount.wrappingIncrement(ordering: .relaxed) - MallocInterposerSwift.mallocBytesCount.wrappingIncrement(by: size, ordering: .relaxed) - - if size > MallocInterposerSwift.pageSize { - MallocInterposerSwift.mallocLargeCount.wrappingIncrement(ordering: .relaxed) - } else { - MallocInterposerSwift.mallocSmallCount.wrappingIncrement(ordering: .relaxed) - } - } - - set_malloc_zone_hook(mallocZoneHook) - set_malloc_zone_free_hook(mallocZoneFreeHook) - set_malloc_zone_calloc_hook(mallocZoneCallocHook) - set_malloc_zone_realloc_hook(mallocZoneReallocHook) - set_malloc_zone_valloc_hook(mallocZoneVallocHook) - set_malloc_zone_memalign_hook(mallocZoneMemalignHook) - #endif - - set_malloc_hook(mallocHook) - set_free_hook(freeHook) - set_calloc_hook(callocHook) - set_realloc_hook(reallocHook) - set_posix_memalign_hook(posixMemalignHook) + malloc_interposer_reset() + malloc_interposer_enable() } + /// Stop counting allocations. public static func unhook() { - set_malloc_hook(nil) - set_free_hook(nil) - set_calloc_hook(nil) - set_realloc_hook(nil) - set_posix_memalign_hook(nil) - - #if canImport(Darwin) - set_malloc_zone_hook(nil) - set_malloc_zone_free_hook(nil) - set_malloc_zone_calloc_hook(nil) - set_malloc_zone_realloc_hook(nil) - set_malloc_zone_valloc_hook(nil) - set_malloc_zone_memalign_hook(nil) - #endif + malloc_interposer_disable() } + /// Reset all counters to zero (counting state unchanged). public static func reset() { - mallocCount.store(0, ordering: .relaxed) - mallocBytesCount.store(0, ordering: .relaxed) - freeCount.store(0, ordering: .relaxed) - freeBytesCount.store(0, ordering: .relaxed) - mallocSmallCount.store(0, ordering: .relaxed) - mallocLargeCount.store(0, ordering: .relaxed) + malloc_interposer_reset() } + /// Read the current counter snapshot. public static func getStatistics() -> Statistics { - let stats = Statistics( - mallocCount: mallocCount.load(ordering: .relaxed), - mallocBytesCount: mallocBytesCount.load(ordering: .relaxed), - mallocSmallCount: mallocSmallCount.load(ordering: .relaxed), - mallocLargeCount: mallocLargeCount.load(ordering: .relaxed), - freeCount: freeCount.load(ordering: .relaxed), - freeBytesCount: freeBytesCount.load(ordering: .relaxed) + var mallocCount: Int64 = 0 + var mallocBytes: Int64 = 0 + var mallocSmall: Int64 = 0 + var mallocLarge: Int64 = 0 + var freeCount: Int64 = 0 + var freeBytes: Int64 = 0 + malloc_interposer_get_stats(&mallocCount, &mallocBytes, &mallocSmall, &mallocLarge, &freeCount, &freeBytes) + return Statistics( + mallocCount: Int(mallocCount), + mallocBytesCount: Int(mallocBytes), + mallocSmallCount: Int(mallocSmall), + mallocLargeCount: Int(mallocLarge), + freeCount: Int(freeCount), + freeBytesCount: Int(freeBytes) ) - - return stats } } diff --git a/Package.resolved b/Package.resolved index 1514ff81..8d591558 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,12 +1,13 @@ { + "originHash" : "84469d781744f78b0b64fbe87db45451202dcf2fb4bf5be28fb53e29a1cac9ff", "pins" : [ { "identity" : "hdrhistogram-swift", "kind" : "remoteSourceControl", "location" : "https://github.com/HdrHistogram/hdrhistogram-swift.git", "state" : { - "revision" : "93a1618c8aa20f6a521a9da656a3e0591889e9dc", - "version" : "0.1.3" + "revision" : "de0b9b8a27956b9bfc9b4dce7d1c38ad7c579f19", + "version" : "0.1.4" } }, { @@ -64,5 +65,5 @@ } } ], - "version" : 2 + "version" : 3 } diff --git a/Package.swift b/Package.swift index 7ff35c07..60582315 100644 --- a/Package.swift +++ b/Package.swift @@ -26,6 +26,7 @@ let package = Package( .package(url: "https://github.com/ordo-one/TextTable.git", .upToNextMajor(from: "0.0.1")), .package(url: "https://github.com/HdrHistogram/hdrhistogram-swift.git", .upToNextMajor(from: "0.1.4")), .package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.0.0")), + .package(path: "LocalPackages/MallocInterposerC"), .package(path: "LocalPackages/MallocInterposerSwift"), ], targets: [ @@ -129,9 +130,10 @@ var dependencies: [PackageDescription.Target.Dependency] = [ .product(name: "Atomics", package: "swift-atomics"), "SwiftRuntimeHooks", "BenchmarkShared", - "MallocInterposerSwift" + .product(name: "MallocInterposerC", package: "MallocInterposerC"), + "MallocInterposerSwift", ] package.targets += [ - .target(name: "Benchmark", dependencies: dependencies, swiftSettings: [.swiftLanguageMode(.v5)) + .target(name: "Benchmark", dependencies: dependencies, swiftSettings: [.swiftLanguageMode(.v5)]) ] diff --git a/Package@swift-6.2.swift b/Package@swift-6.2.swift new file mode 100644 index 00000000..30fdd98f --- /dev/null +++ b/Package@swift-6.2.swift @@ -0,0 +1,135 @@ +// swift-tools-version: 6.1 + +import PackageDescription + +let package = Package( + name: "Benchmark", + platforms: [ + .macOS(.v13), + .iOS(.v16), + ], + products: [ + .plugin(name: "BenchmarkCommandPlugin", targets: ["BenchmarkCommandPlugin"]), + .plugin(name: "BenchmarkPlugin", targets: ["BenchmarkPlugin"]), + .library( + name: "Benchmark", + targets: ["Benchmark"] + ), + ], + traits: [ + .trait(name: "Jemalloc"), + .default(enabledTraits: ["Jemalloc"]), + ], + dependencies: [ + .package(url: "https://github.com/apple/swift-system.git", .upToNextMajor(from: "1.1.0")), + .package(url: "https://github.com/apple/swift-argument-parser.git", "1.1.0"..<"1.6.0"), + .package(url: "https://github.com/ordo-one/TextTable.git", .upToNextMajor(from: "0.0.1")), + .package(url: "https://github.com/HdrHistogram/hdrhistogram-swift.git", .upToNextMajor(from: "0.1.4")), + .package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.0.0")), + .package(url: "https://github.com/ordo-one/package-jemalloc.git", .upToNextMajor(from: "1.0.0")), + ], + targets: [ + .target( + name: "Benchmark", + dependencies: [ + .product(name: "Histogram", package: "hdrhistogram-swift"), + .product(name: "ArgumentParser", package: "swift-argument-parser"), + .product(name: "SystemPackage", package: "swift-system"), + .byNameItem(name: "CDarwinOperatingSystemStats", condition: .when(platforms: [.macOS, .iOS])), + .byNameItem(name: "CLinuxOperatingSystemStats", condition: .when(platforms: [.linux])), + .product(name: "Atomics", package: "swift-atomics"), + "SwiftRuntimeHooks", + "BenchmarkShared", + .product( + name: "jemalloc", package: "package-jemalloc", condition: .when(platforms: [.macOS, .linux], traits: ["Jemalloc"])), + ], + swiftSettings: [.swiftLanguageMode(.v5)] + ), + // Plugins used by users of the package + + // The actual 'benchmark' command plugin + .plugin( + name: "BenchmarkCommandPlugin", + capability: .command( + intent: .custom( + verb: "benchmark", + description: "Run the Benchmark performance test suite." + ) + ), + dependencies: [ + "BenchmarkTool" + ], + path: "Plugins/BenchmarkCommandPlugin" + ), + + // Plugin that generates the boilerplate needed to interface with the Benchmark infrastructure + .plugin( + name: "BenchmarkPlugin", + capability: .buildTool(), + dependencies: [ + "BenchmarkBoilerplateGenerator" + ], + path: "Plugins/BenchmarkPlugin" + ), + + // Tool that the plugin executes to perform the actual work, the real benchmark driver + .executableTarget( + name: "BenchmarkTool", + dependencies: [ + .product(name: "ArgumentParser", package: "swift-argument-parser"), + .product(name: "SystemPackage", package: "swift-system"), + .product(name: "TextTable", package: "TextTable"), + "Benchmark", + "BenchmarkShared", + ], + path: "Plugins/BenchmarkTool", + swiftSettings: [.swiftLanguageMode(.v5)] + ), + + // Tool that generates the boilerplate + .executableTarget( + name: "BenchmarkBoilerplateGenerator", + dependencies: [ + .product(name: "ArgumentParser", package: "swift-argument-parser"), + .product(name: "SystemPackage", package: "swift-system"), + ], + path: "Plugins/BenchmarkBoilerplateGenerator" + ), + + // Tool that simply generates the man page for the BenchmarkPlugin as we can't use SAP in it... :-/ + .executableTarget( + name: "BenchmarkHelpGenerator", + dependencies: [ + .product(name: "ArgumentParser", package: "swift-argument-parser"), + "BenchmarkShared", + ], + path: "Plugins/BenchmarkHelpGenerator" + ), + + // Getting OS specific information + .target( + name: "CDarwinOperatingSystemStats", + dependencies: [], + path: "Platform/CDarwinOperatingSystemStats" + ), + + // Getting OS specific information + .target( + name: "CLinuxOperatingSystemStats", + dependencies: [], + path: "Platform/CLinuxOperatingSystemStats" + ), + + // Hooks for ARC + .target(name: "SwiftRuntimeHooks"), + + // Shared definitions + .target(name: "BenchmarkShared"), + + .testTarget( + name: "BenchmarkTests", + dependencies: ["Benchmark"], + swiftSettings: [.swiftLanguageMode(.v5)] + ), + ] +) diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index 04f30398..78f5240b 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -8,7 +8,9 @@ // http://www.apache.org/licenses/LICENSE-2.0 // +#if canImport(MallocInterposerSwift) import MallocInterposerSwift +#endif #if canImport(OSLog) import OSLog @@ -27,8 +29,13 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length // swiftlint:disable cyclomatic_complexity function_body_length func run(_ benchmark: Benchmark) -> [BenchmarkResult] { var wallClockDuration: Duration = .zero + #if canImport(MallocInterposerSwift) var startMallocStats = MallocInterposerSwift.Statistics() var stopMallocStats = MallocInterposerSwift.Statistics() + #else + var startMallocStats = MallocStats() + var stopMallocStats = MallocStats() + #endif var startOperatingSystemStats = OperatingSystemStats() var stopOperatingSystemStats = OperatingSystemStats() var startPerformanceCounters = PerformanceCounters() @@ -153,7 +160,11 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length #endif if mallocStatsRequested { + #if canImport(MallocInterposerSwift) startMallocStats = MallocInterposerSwift.getStatistics() + #else + startMallocStats = MallocStatsProducer.makeMallocStats() + #endif } if arcStatsRequested { @@ -190,7 +201,11 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { + #if canImport(MallocInterposerSwift) stopMallocStats = MallocInterposerSwift.getStatistics() + #else + stopMallocStats = MallocStatsProducer.makeMallocStats() + #endif } #if canImport(OSLog) @@ -238,6 +253,7 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { + #if canImport(MallocInterposerSwift) let mallocCount = stopMallocStats.mallocCount - startMallocStats.mallocCount statistics[BenchmarkMetric.mallocCountTotal.index].add(mallocCount) @@ -262,6 +278,19 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length let freeBytes = stopMallocStats.freeBytesCount - startMallocStats.freeBytesCount let memoryLeakedBytes = mallocBytesCount - freeBytes statistics[BenchmarkMetric.memoryLeakedBytes.index].add(Int(memoryLeakedBytes)) + #else + let mallocCountTotal = stopMallocStats.mallocCountTotal - startMallocStats.mallocCountTotal + statistics[BenchmarkMetric.mallocCountTotal.index].add(mallocCountTotal) + + let allocatedResidentMemory = stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory + statistics[BenchmarkMetric.allocatedResidentMemory.index].add(allocatedResidentMemory) + + let mallocSmallCount = stopMallocStats.mallocCountSmall - startMallocStats.mallocCountSmall + statistics[BenchmarkMetric.mallocCountSmall.index].add(mallocSmallCount) + + let mallocLargeCount = stopMallocStats.mallocCountLarge - startMallocStats.mallocCountLarge + statistics[BenchmarkMetric.mallocCountLarge.index].add(mallocLargeCount) + #endif } if operatingSystemStatsRequested { @@ -344,7 +373,9 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { + #if canImport(MallocInterposerSwift) MallocInterposerSwift.hook() + #endif } if benchmark.configuration.metrics.contains(.threads) @@ -438,7 +469,9 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length } if mallocStatsRequested { + #if canImport(MallocInterposerSwift) MallocInterposerSwift.unhook() + #endif } #if canImport(OSLog) diff --git a/Sources/Benchmark/BenchmarkRunner.swift b/Sources/Benchmark/BenchmarkRunner.swift index 186b2460..ed1505bf 100644 --- a/Sources/Benchmark/BenchmarkRunner.swift +++ b/Sources/Benchmark/BenchmarkRunner.swift @@ -10,7 +10,9 @@ import ArgumentParser import BenchmarkShared +#if canImport(MallocInterposerSwift) import MallocInterposerSwift +#endif #if canImport(Darwin) import Darwin @@ -115,7 +117,9 @@ public struct BenchmarkRunner: AsyncParsableCommand, BenchmarkRunnerReadWrite { var debugIterator = Benchmark.benchmarks.makeIterator() var benchmarkCommand: BenchmarkCommandRequest + #if canImport(MallocInterposerSwift) MallocInterposerSwift.initialize() + #endif let benchmarkExecutor = BenchmarkExecutor(quiet: quiet) var benchmark: Benchmark? var results: [BenchmarkResult] = [] diff --git a/Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift b/Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift new file mode 100644 index 00000000..ed2dd8de --- /dev/null +++ b/Sources/Benchmark/MallocStats/MallocStats+jemalloc-support.swift @@ -0,0 +1,367 @@ +// swiftlint:disable all +// +// Copyright (c) 2022 Ordo One AB. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// + +// This file was generated from JSON Schema using quicktype, do not modify it directly. + +// Generated using https://app.quicktype.io with paired down output from +// let optionString = "J" +// malloc_stats_print(nil, nil, optionString) + +#if canImport(jemalloc) + +// MARK: - Pokedex + +struct Pokedex: Codable { + let jemalloc: Jemalloc +} + +// MARK: - Jemalloc + +struct Jemalloc: Codable { + let version: String + let config: Config + let opt: Opt + let arenas: Arenas + let stats: Stats + let statsArenas: StatsArenas + + enum CodingKeys: String, CodingKey { + case version, config, opt, arenas, stats + case statsArenas = "stats.arenas" + } +} + +// MARK: - Arenas + +struct Arenas: Codable { + let narenas, dirtyDecayMS, muzzyDecayMS, quantum: Int + let page, tcacheMax, nbins, nhbins: Int + let bin: [ArenasBin] + let nlextents: Int + let lextent: [ArenasLextent] + + enum CodingKeys: String, CodingKey { + case narenas + case dirtyDecayMS = "dirty_decay_ms" + case muzzyDecayMS = "muzzy_decay_ms" + case quantum, page + case tcacheMax = "tcache_max" + case nbins, nhbins, bin, nlextents, lextent + } +} + +// MARK: - ArenasBin + +struct ArenasBin: Codable { + let size, nregs, slabSize, nshards: Int + + enum CodingKeys: String, CodingKey { + case size, nregs + case slabSize = "slab_size" + case nshards + } +} + +// MARK: - ArenasLextent + +struct ArenasLextent: Codable { + let size: Double +} + +// MARK: - Config + +struct Config: Codable { + let cacheOblivious, debug, fill, lazyLock: Bool + let mallocConf: String + let optSafetyChecks, prof, profLibgcc, profLibunwind: Bool + let stats, utrace, xmalloc: Bool + + enum CodingKeys: String, CodingKey { + case cacheOblivious = "cache_oblivious" + case debug, fill + case lazyLock = "lazy_lock" + case mallocConf = "malloc_conf" + case optSafetyChecks = "opt_safety_checks" + case prof + case profLibgcc = "prof_libgcc" + case profLibunwind = "prof_libunwind" + case stats, utrace, xmalloc + } +} + +// MARK: - Opt + +struct Opt: Codable { + let abort, abortConf, cacheOblivious, confirmConf: Bool + let retain: Bool + let dss: String + let narenas: Int + let percpuArena: String + let oversizeThreshold: Int + let hpa: Bool + let hpaSlabMaxAlloc, hpaHugificationThreshold, hpaHugifyDelayMS, hpaMinPurgeIntervalMS: Int + let hpaDirtyMult: String + let hpaSECNshards, hpaSECMaxAlloc, hpaSECMaxBytes, hpaSECBytesAfterFlush: Int + let hpaSECBatchFillExtra: Int + let metadataThp: String + let mutexMaxSpin, dirtyDecayMS, muzzyDecayMS, lgExtentMaxActiveFit: Int + let junk: String + let zero, experimentalInfallibleNew, tcache: Bool + let tcacheMax, tcacheNslotsSmallMin, tcacheNslotsSmallMax, tcacheNslotsLarge: Int + let lgTcacheNslotsMul, tcacheGcIncrBytes, tcacheGcDelayBytes, lgTcacheFlushSmallDiv: Int + let lgTcacheFlushLargeDiv: Int + let thp: String + let statsPrint: Bool + let statsPrintOpts: String + let statsInterval: Int + let statsIntervalOpts, zeroRealloc: String + + enum CodingKeys: String, CodingKey { + case abort + case abortConf = "abort_conf" + case cacheOblivious = "cache_oblivious" + case confirmConf = "confirm_conf" + case retain, dss, narenas + case percpuArena = "percpu_arena" + case oversizeThreshold = "oversize_threshold" + case hpa + case hpaSlabMaxAlloc = "hpa_slab_max_alloc" + case hpaHugificationThreshold = "hpa_hugification_threshold" + case hpaHugifyDelayMS = "hpa_hugify_delay_ms" + case hpaMinPurgeIntervalMS = "hpa_min_purge_interval_ms" + case hpaDirtyMult = "hpa_dirty_mult" + case hpaSECNshards = "hpa_sec_nshards" + case hpaSECMaxAlloc = "hpa_sec_max_alloc" + case hpaSECMaxBytes = "hpa_sec_max_bytes" + case hpaSECBytesAfterFlush = "hpa_sec_bytes_after_flush" + case hpaSECBatchFillExtra = "hpa_sec_batch_fill_extra" + case metadataThp = "metadata_thp" + case mutexMaxSpin = "mutex_max_spin" + case dirtyDecayMS = "dirty_decay_ms" + case muzzyDecayMS = "muzzy_decay_ms" + case lgExtentMaxActiveFit = "lg_extent_max_active_fit" + case junk, zero + case experimentalInfallibleNew = "experimental_infallible_new" + case tcache + case tcacheMax = "tcache_max" + case tcacheNslotsSmallMin = "tcache_nslots_small_min" + case tcacheNslotsSmallMax = "tcache_nslots_small_max" + case tcacheNslotsLarge = "tcache_nslots_large" + case lgTcacheNslotsMul = "lg_tcache_nslots_mul" + case tcacheGcIncrBytes = "tcache_gc_incr_bytes" + case tcacheGcDelayBytes = "tcache_gc_delay_bytes" + case lgTcacheFlushSmallDiv = "lg_tcache_flush_small_div" + case lgTcacheFlushLargeDiv = "lg_tcache_flush_large_div" + case thp + case statsPrint = "stats_print" + case statsPrintOpts = "stats_print_opts" + case statsInterval = "stats_interval" + case statsIntervalOpts = "stats_interval_opts" + case zeroRealloc = "zero_realloc" + } +} + +// MARK: - Stats + +struct Stats: Codable { + let allocated, active, metadata, metadataThp: Int + let resident, mapped, retained, zeroReallocs: Int + let backgroundThread: StatsBackgroundThread + let mutexes: Mutexes + + enum CodingKeys: String, CodingKey { + case allocated, active, metadata + case metadataThp = "metadata_thp" + case resident, mapped, retained + case zeroReallocs = "zero_reallocs" + case backgroundThread = "background_thread" + case mutexes + } +} + +// MARK: - StatsBackgroundThread + +struct StatsBackgroundThread: Codable { + let numThreads, numRuns, runInterval: Int + + enum CodingKeys: String, CodingKey { + case numThreads = "num_threads" + case numRuns = "num_runs" + case runInterval = "run_interval" + } +} + +// MARK: - Mutexes + +struct Mutexes: Codable { + let backgroundThread, maxPerBgThd, ctl, prof: BackgroundThreadValue + let profThdsData, profDump, profRecentAlloc, profRecentDump: BackgroundThreadValue + let profStats: BackgroundThreadValue + + enum CodingKeys: String, CodingKey { + case backgroundThread = "background_thread" + case maxPerBgThd = "max_per_bg_thd" + case ctl, prof + case profThdsData = "prof_thds_data" + case profDump = "prof_dump" + case profRecentAlloc = "prof_recent_alloc" + case profRecentDump = "prof_recent_dump" + case profStats = "prof_stats" + } +} + +// MARK: - BackgroundThreadValue + +struct BackgroundThreadValue: Codable { + let numOps, numWait, numSpinAcq, numOwnerSwitch: Int + let totalWaitTime, maxWaitTime, maxNumThds: Int + + enum CodingKeys: String, CodingKey { + case numOps = "num_ops" + case numWait = "num_wait" + case numSpinAcq = "num_spin_acq" + case numOwnerSwitch = "num_owner_switch" + case totalWaitTime = "total_wait_time" + case maxWaitTime = "max_wait_time" + case maxNumThds = "max_num_thds" + } +} + +// MARK: - StatsArenas + +struct StatsArenas: Codable { + let merged: Merged +} + +// MARK: - Merged + +struct Merged: Codable { + let nthreads, uptimeNS: Int + let dss: String + let dirtyDecayMS, muzzyDecayMS, pactive, pdirty: Int + let pmuzzy, dirtyNpurge, dirtyNmadvise, dirtyPurged: Int + let muzzyNpurge, muzzyNmadvise, muzzyPurged: Int + let small, large: Large + let mapped, retained, base, mergedInternal: Int + let metadataThp, tcacheBytes, tcacheStashedBytes, resident: Int + let abandonedVM, extentAvail: Int + let mutexes: [String: BackgroundThreadValue] + let bins: [MergedBin] + let lextents: [MergedLextent] + let extents: [Extent] + let secBytes: Int + let hpaShard: HpaShard + + enum CodingKeys: String, CodingKey { + case nthreads + case uptimeNS = "uptime_ns" + case dss + case dirtyDecayMS = "dirty_decay_ms" + case muzzyDecayMS = "muzzy_decay_ms" + case pactive, pdirty, pmuzzy + case dirtyNpurge = "dirty_npurge" + case dirtyNmadvise = "dirty_nmadvise" + case dirtyPurged = "dirty_purged" + case muzzyNpurge = "muzzy_npurge" + case muzzyNmadvise = "muzzy_nmadvise" + case muzzyPurged = "muzzy_purged" + case small, large, mapped, retained, base + case mergedInternal = "internal" + case metadataThp = "metadata_thp" + case tcacheBytes = "tcache_bytes" + case tcacheStashedBytes = "tcache_stashed_bytes" + case resident + case abandonedVM = "abandoned_vm" + case extentAvail = "extent_avail" + case mutexes, bins, lextents, extents + case secBytes = "sec_bytes" + case hpaShard = "hpa_shard" + } +} + +// MARK: - MergedBin + +struct MergedBin: Codable { + let nmalloc, ndalloc, curregs, nrequests: Int + let nfills, nflushes, nreslabs, curslabs: Int + let nonfullSlabs: Int + let mutex: BackgroundThreadValue + + enum CodingKeys: String, CodingKey { + case nmalloc, ndalloc, curregs, nrequests, nfills, nflushes, nreslabs, curslabs + case nonfullSlabs = "nonfull_slabs" + case mutex + } +} + +// MARK: - Extent + +struct Extent: Codable { + let ndirty, nmuzzy, nretained, dirtyBytes: Int + let muzzyBytes, retainedBytes: Int + + enum CodingKeys: String, CodingKey { + case ndirty, nmuzzy, nretained + case dirtyBytes = "dirty_bytes" + case muzzyBytes = "muzzy_bytes" + case retainedBytes = "retained_bytes" + } +} + +// MARK: - HpaShard + +struct HpaShard: Codable { + let npurgePasses, npurges, nhugifies, ndehugifies: Int + let fullSlabs, emptySlabs: EmptySlabs + let nonfullSlabs: [EmptySlabs] + + enum CodingKeys: String, CodingKey { + case npurgePasses = "npurge_passes" + case npurges, nhugifies, ndehugifies + case fullSlabs = "full_slabs" + case emptySlabs = "empty_slabs" + case nonfullSlabs = "nonfull_slabs" + } +} + +// MARK: - EmptySlabs + +struct EmptySlabs: Codable { + let npageslabsHuge, nactiveHuge, npageslabsNonhuge, nactiveNonhuge: Int + let ndirtyNonhuge: Int + let ndirtyHuge: Int? + + enum CodingKeys: String, CodingKey { + case npageslabsHuge = "npageslabs_huge" + case nactiveHuge = "nactive_huge" + case npageslabsNonhuge = "npageslabs_nonhuge" + case nactiveNonhuge = "nactive_nonhuge" + case ndirtyNonhuge = "ndirty_nonhuge" + case ndirtyHuge = "ndirty_huge" + } +} + +// MARK: - Large + +struct Large: Codable { + let allocated, nmalloc, ndalloc, nrequests: Int + let nfills, nflushes: Int +} + +// MARK: - MergedLextent + +struct MergedLextent: Codable { + let curlextents: Int +} + +// swiftlint:enable all + +#endif From 0eeb00a58a1df80a19103875087f17f62508560a Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:03:36 +0100 Subject: [PATCH 19/37] delete Package.resolved --- Benchmarks/Package.resolved | 69 ------------------------------------- Package.resolved | 69 ------------------------------------- 2 files changed, 138 deletions(-) delete mode 100644 Benchmarks/Package.resolved delete mode 100644 Package.resolved diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved deleted file mode 100644 index 1c475535..00000000 --- a/Benchmarks/Package.resolved +++ /dev/null @@ -1,69 +0,0 @@ -{ - "originHash" : "f1d359a544b71b52c6788ad2e4cd2952f7f166b62ddb07316768f66be7ba4099", - "pins" : [ - { - "identity" : "hdrhistogram-swift", - "kind" : "remoteSourceControl", - "location" : "https://github.com/HdrHistogram/hdrhistogram-swift", - "state" : { - "revision" : "de0b9b8a27956b9bfc9b4dce7d1c38ad7c579f19", - "version" : "0.1.4" - } - }, - { - "identity" : "package-datetime", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-datetime", - "state" : { - "revision" : "d1242188c9f48aad297e6ca9b717776f8660bc31", - "version" : "1.0.2" - } - }, - { - "identity" : "swift-argument-parser", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-argument-parser", - "state" : { - "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41", - "version" : "1.3.0" - } - }, - { - "identity" : "swift-atomics", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-atomics", - "state" : { - "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", - "version" : "1.3.0" - } - }, - { - "identity" : "swift-numerics", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-numerics", - "state" : { - "revision" : "0a5bc04095a675662cf24757cc0640aa2204253b", - "version" : "1.0.2" - } - }, - { - "identity" : "swift-system", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-system", - "state" : { - "revision" : "025bcb1165deab2e20d4eaba79967ce73013f496", - "version" : "1.2.1" - } - }, - { - "identity" : "texttable", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/TextTable", - "state" : { - "revision" : "a27a07300cf4ae322e0079ca0a475c5583dd575f", - "version" : "0.0.2" - } - } - ], - "version" : 3 -} diff --git a/Package.resolved b/Package.resolved deleted file mode 100644 index 8d591558..00000000 --- a/Package.resolved +++ /dev/null @@ -1,69 +0,0 @@ -{ - "originHash" : "84469d781744f78b0b64fbe87db45451202dcf2fb4bf5be28fb53e29a1cac9ff", - "pins" : [ - { - "identity" : "hdrhistogram-swift", - "kind" : "remoteSourceControl", - "location" : "https://github.com/HdrHistogram/hdrhistogram-swift.git", - "state" : { - "revision" : "de0b9b8a27956b9bfc9b4dce7d1c38ad7c579f19", - "version" : "0.1.4" - } - }, - { - "identity" : "package-jemalloc", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-jemalloc.git", - "state" : { - "revision" : "e8a5db026963f5bfeac842d9d3f2cc8cde323b49", - "version" : "1.0.0" - } - }, - { - "identity" : "swift-argument-parser", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-argument-parser.git", - "state" : { - "revision" : "011f0c765fb46d9cac61bca19be0527e99c98c8b", - "version" : "1.5.1" - } - }, - { - "identity" : "swift-atomics", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-atomics.git", - "state" : { - "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", - "version" : "1.3.0" - } - }, - { - "identity" : "swift-numerics", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-numerics", - "state" : { - "revision" : "e0ec0f5f3af6f3e4d5e7a19d2af26b481acb6ba8", - "version" : "1.0.3" - } - }, - { - "identity" : "swift-system", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-system.git", - "state" : { - "revision" : "890830fff1a577dc83134890c7984020c5f6b43b", - "version" : "1.6.2" - } - }, - { - "identity" : "texttable", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/TextTable.git", - "state" : { - "revision" : "a27a07300cf4ae322e0079ca0a475c5583dd575f", - "version" : "0.0.2" - } - } - ], - "version" : 3 -} From 73f19c7652502111683c0b47932b4e1ad828e9e6 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 10 Mar 2026 09:49:31 +0100 Subject: [PATCH 20/37] add missing cases --- Sources/Benchmark/BenchmarkExecutor+Extensions.swift | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift index 72d2ab70..f73c4c07 100644 --- a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift +++ b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift @@ -29,6 +29,12 @@ extension BenchmarkExecutor { return true case .mallocCountTotal: return true + case .mallocCountSmall: + return true + case .mallocCountLarge: + return true + case .mallocBytesCount: + return true case .allocatedResidentMemory: return true case .freeCountTotal: From 56828a7e2852c41fe703411e0a07d0916d3663a1 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 10 Mar 2026 10:01:39 +0100 Subject: [PATCH 21/37] add preconcurrency on import --- Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift index aeb3e3bf..944f062b 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift @@ -11,7 +11,7 @@ // 'Benchmark' plugin that is responsible for gathering command line arguments and then // Running the `BenchmarkTool` for each benchmark target. -import Foundation +@preconcurrency import Foundation import PackagePlugin #if canImport(Darwin) From 5767becf454a74a627e82abe79d0cc8802129f48 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 7 May 2026 17:20:45 +0200 Subject: [PATCH 22/37] add benchmarks for MallocInterposer --- .gitignore | 1 + .../MallocInterposer/MallocInterposer.swift | 202 ++++++++++++++++++ Benchmarks/Package.swift | 17 ++ scripts/compare-malloc-local.sh | 123 +++++++++++ 4 files changed, 343 insertions(+) create mode 100644 Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift create mode 100755 scripts/compare-malloc-local.sh diff --git a/.gitignore b/.gitignore index 5e610d3b..dba32f05 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,7 @@ Package.resolved .swiftpm .DS_Store .build/ +.build-*/ # CocoaPods # diff --git a/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift b/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift new file mode 100644 index 00000000..2b7ffc3b --- /dev/null +++ b/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift @@ -0,0 +1,202 @@ +// +// Copyright (c) 2026 Ordo One AB +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Regression benchmarks for the malloc interposer. Each benchmark performs +// a known, fixed number of allocations per iteration so the reported +// per-iteration counts (mallocCountTotal / freeCountTotal / etc.) line up +// with the expected values noted in the benchmark name. Drift between the +// jemalloc and interposer code paths — or between branches — shows up +// immediately as a count mismatch. +// +// Counts are scaled per iteration: with .kilo scaling, one malloc inside +// the body produces "1" in the count column, not "1000". + +import Benchmark + +#if canImport(Darwin) +import Darwin +#elseif canImport(Glibc) +import Glibc +#elseif canImport(Musl) +import Musl +#else +#error("Unsupported Platform") +#endif + +let mallocMetrics: [BenchmarkMetric] = [ + .wallClock, + .mallocCountSmall, + .mallocCountLarge, + .mallocCountTotal, + .freeCountTotal, + .mallocBytesCount, + .memoryLeaked, + .memoryLeakedBytes, +] + +let benchmarks: @Sendable () -> Void = { + Benchmark.defaultConfiguration = .init( + metrics: mallocMetrics, + warmupIterations: 1, + scalingFactor: .kilo, + maxDuration: .seconds(1), + maxIterations: 100 + ) + + // Sanity floor: an empty body should report (close to) zero allocations. + // Whatever the framework's per-iteration overhead is, it shows up here + // and is the reference for what "no allocations" looks like. + Benchmark("Noop") { benchmark in + for _ in benchmark.scaledIterations { + blackHole(0) + } + } + + // Bread-and-butter malloc/free pair, sub-page size — should land in + // mallocCountSmall, not mallocCountLarge. + // Expected per iter: malloc=1 (small=1, large=0), free=1, leaked=0. + Benchmark("Malloc 64B + free") { benchmark in + for _ in benchmark.scaledIterations { + let p = malloc(64) + blackHole(p) + free(p) + } + } + + // Larger-than-page allocation — should land in mallocCountLarge. + // Expected per iter: malloc=1 (small=0, large=1), free=1. + Benchmark("Malloc 2 MiB + free") { benchmark in + for _ in benchmark.scaledIterations { + let p = malloc(2 * 1024 * 1024) + blackHole(p) + free(p) + } + } + + // calloc must be counted exactly like malloc + memset. + // Expected per iter: malloc=1, free=1. + Benchmark("Calloc 8x8 + free") { benchmark in + for _ in benchmark.scaledIterations { + let p = calloc(8, 8) + blackHole(p) + free(p) + } + } + + // realloc(grow) on success: implicit free of old + alloc of new. + // Expected per iter: malloc=2, free=2. + Benchmark("Realloc grow 64→256 + free") { benchmark in + for _ in benchmark.scaledIterations { + let p1 = malloc(64) + let p2 = realloc(p1, 256) + blackHole(p2) + free(p2) + } + } + + // realloc(NULL, size) is a pure malloc — no implicit free. + // Expected per iter: malloc=1, free=1. + Benchmark("Realloc(NULL, 128) + free") { benchmark in + for _ in benchmark.scaledIterations { + let p = realloc(nil, 128) + blackHole(p) + free(p) + } + } + + // realloc(p, 0) frees p and returns NULL — pure free, no second malloc. + // Expected per iter: malloc=1, free=1. + Benchmark("Malloc + realloc(p, 0)") { benchmark in + for _ in benchmark.scaledIterations { + let p = malloc(64) + let r = realloc(p, 0) + blackHole(r) // expected nil + } + } + + // posix_memalign — separate code path that's easy to forget to count. + // Expected per iter: malloc=1, free=1. + Benchmark("posix_memalign(64, 1024) + free") { benchmark in + var ptr: UnsafeMutableRawPointer? + for _ in benchmark.scaledIterations { + _ = posix_memalign(&ptr, 64, 1024) + blackHole(ptr) + free(ptr) + } + } + + // C11 aligned_alloc — currently only intercepted on Linux. On Darwin the + // count drops because the symbol isn't in the DYLD_INTERPOSE list. Useful + // signal for that gap. + // Expected per iter (Linux): malloc=1, free=1. + // Expected per iter (Darwin): malloc=0 (not interposed), free=1. + #if !canImport(Darwin) + Benchmark("aligned_alloc(64, 1024) + free") { benchmark in + for _ in benchmark.scaledIterations { + let p = aligned_alloc(64, 1024) + blackHole(p) + free(p) + } + } + #endif + + // Batched mallocs in a single iteration — verifies the counter scales + // linearly and isn't accidentally collapsed/de-duplicated. + // Expected per iter: malloc=16, free=16. + Benchmark("Malloc x16 + free x16") { benchmark in + let n = 16 + let buf = UnsafeMutablePointer.allocate(capacity: n) + defer { buf.deallocate() } + buf.update(repeating: nil, count: n) + + for _ in benchmark.scaledIterations { + for i in 0.. 0 and stable + // between runs. + Benchmark("Swift Array(repeating:0, count:128)") { benchmark in + for _ in benchmark.scaledIterations { + var arr = [Int](repeating: 0, count: 128) + arr.withUnsafeMutableBufferPointer { buf in + blackHole(buf.baseAddress) + } + } + } + + // Heap-allocated String (must exceed the small-string inline limit of + // 15 bytes). Same caveat as Array — count is stdlib-dependent but must + // be stable. + Benchmark("Swift String (long, heap)") { benchmark in + for _ in benchmark.scaledIterations { + let s = String(repeating: "x", count: 256) + blackHole(s) + } + } +} diff --git a/Benchmarks/Package.swift b/Benchmarks/Package.swift index b9d33b17..bca55efd 100644 --- a/Benchmarks/Package.swift +++ b/Benchmarks/Package.swift @@ -77,3 +77,20 @@ package.targets += [ ] ) ] + +// Regression coverage for the malloc interposer: predictable allocation +// patterns (counts known per iteration) so any drift between jemalloc and +// interposer code paths is immediately visible in mallocCountTotal / +// freeCountTotal / memoryLeaked. +package.targets += [ + .executableTarget( + name: "MallocInterposerBenchmarks", + dependencies: [ + .product(name: "Benchmark", package: "package-benchmark") + ], + path: "Benchmarks/MallocInterposer", + plugins: [ + .plugin(name: "BenchmarkPlugin", package: "package-benchmark") + ] + ) +] diff --git a/scripts/compare-malloc-local.sh b/scripts/compare-malloc-local.sh new file mode 100755 index 00000000..7f0a7fe0 --- /dev/null +++ b/scripts/compare-malloc-local.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# +# compare-malloc-local.sh — compare malloc counts between the legacy jemalloc +# path (Swift 6.2 → Package@swift-6.2.swift) and the new custom interposer +# (Swift 6.3 → Package.swift) using THIS repo's local +# `MallocInterposerBenchmarks` target. +# +# These benchmarks have predictable per-iteration allocation counts, so any +# drift between the two code paths is a regression. For "real workload" +# comparison against swift-nio, see compare-malloc.sh instead. +# +# Mechanism: +# 1. Runs `swift package benchmark baseline update ` once per +# toolchain via swiftly. SwiftPM picks the right Package*.swift +# manifest for each toolchain automatically. +# 2. Calls `baseline compare` for the two recorded baselines. +# +# Pre-requisites: +# - swiftly with both toolchains installed. +# +# Usage: +# ./scripts/compare-malloc-local.sh [filter ...] +# +# Each positional arg becomes a `--filter` regex. With no args every +# benchmark in the target runs. +# +# Env overrides: +# TOOLCHAIN_OLD default 6.2.2 +# TOOLCHAIN_NEW default 6.3-snapshot-2026-02-27 + +set -euo pipefail + +PB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +BENCH_DIR="${PB_DIR}/Benchmarks" +TARGET="MallocInterposerBenchmarks" +TOOLCHAIN_OLD="${TOOLCHAIN_OLD:-6.2.2}" +TOOLCHAIN_NEW="${TOOLCHAIN_NEW:-6.3-snapshot-2026-02-27}" +BASELINE_OLD="jemalloc-${TOOLCHAIN_OLD}" +BASELINE_NEW="interposer-${TOOLCHAIN_NEW}" +# Per-toolchain scratch paths so each toolchain has its own .build cache. +# Without this, switching toolchains hits "module compiled with Swift X +# cannot be imported by Y" errors on the cached Benchmark.swiftmodule. +SCRATCH_OLD="${BENCH_DIR}/.build-${TOOLCHAIN_OLD}" +SCRATCH_NEW="${BENCH_DIR}/.build-${TOOLCHAIN_NEW}" + +step() { printf '\n\033[1;36m== %s\033[0m\n' "$*"; } +warn() { printf '\033[33m!! %s\033[0m\n' "$*" >&2; } +fail() { printf '\033[31m## %s\033[0m\n' "$*" >&2; exit 1; } + +[[ -d "$BENCH_DIR/Benchmarks/MallocInterposer" ]] \ + || fail "MallocInterposer benchmark dir missing — expected $BENCH_DIR/Benchmarks/MallocInterposer" +command -v swiftly >/dev/null || fail "swiftly required" + +cd "$BENCH_DIR" + +# Forward any positional args as --filter regexes. +declare -a FILTER_ARGS=() +for f in "$@"; do + FILTER_ARGS+=(--filter "$f") +done + +# SwiftPM #9062 workaround: copy lib*-tool.dylib → lib*.dylib so the spawned +# benchmark tool finds the interposer at the path it expects. Only relevant +# on the interposer (6.3) run. +fix_tool_dylibs() { + local search_dir="$1" + local copied=0 + while IFS= read -r src; do + local dst="${src/-tool.dylib/.dylib}" + if [[ ! -f "$dst" || "$src" -nt "$dst" ]]; then + cp -p "$src" "$dst" + copied=$((copied + 1)) + fi + done < <(find "$search_dir" -name "libMallocInterposer*-tool.dylib" 2>/dev/null) + if (( copied > 0 )); then + warn "Renamed $copied -tool.dylib → .dylib (SwiftPM #9062 workaround)" + fi +} + +run_jemalloc() { + step "Run 1: Swift $TOOLCHAIN_OLD (jemalloc) → baseline '$BASELINE_OLD' [scratch: $SCRATCH_OLD]" + swiftly run +"$TOOLCHAIN_OLD" \ + swift package \ + --scratch-path "$SCRATCH_OLD" \ + --allow-writing-to-package-directory benchmark \ + baseline update "$BASELINE_OLD" \ + --target "$TARGET" \ + --quiet --no-progress \ + "${FILTER_ARGS[@]}" +} + +run_interposer() { + step "Run 2: Swift $TOOLCHAIN_NEW (interposer) → baseline '$BASELINE_NEW' [scratch: $SCRATCH_NEW]" + if ! swiftly run +"$TOOLCHAIN_NEW" \ + swift package \ + --scratch-path "$SCRATCH_NEW" \ + --allow-writing-to-package-directory benchmark \ + baseline update "$BASELINE_NEW" \ + --target "$TARGET" \ + --quiet --no-progress \ + "${FILTER_ARGS[@]}"; then + warn "First attempt failed — applying SwiftPM #9062 workaround and retrying" + fix_tool_dylibs "$SCRATCH_NEW" + swiftly run +"$TOOLCHAIN_NEW" \ + swift package \ + --scratch-path "$SCRATCH_NEW" \ + --allow-writing-to-package-directory benchmark \ + baseline update "$BASELINE_NEW" \ + --target "$TARGET" \ + --quiet --no-progress \ + "${FILTER_ARGS[@]}" + fi +} + +run_jemalloc +run_interposer + +step "Comparison: $BASELINE_OLD vs $BASELINE_NEW" +swiftly run +"$TOOLCHAIN_NEW" \ + swift package \ + --scratch-path "$SCRATCH_NEW" \ + benchmark baseline compare "$BASELINE_OLD" "$BASELINE_NEW" \ + --target "$TARGET" From 02323748ad11d8fca6c13dbd11aa898fa703e6ee Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Fri, 8 May 2026 16:30:37 +0200 Subject: [PATCH 23/37] add malloc and jemalloc comparison --- scripts/bench_malloc.c | 128 ++++++++++++++++++++++++++++++++++++++++ scripts/bench_malloc.sh | 88 +++++++++++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 scripts/bench_malloc.c create mode 100755 scripts/bench_malloc.sh diff --git a/scripts/bench_malloc.c b/scripts/bench_malloc.c new file mode 100644 index 00000000..0af0de62 --- /dev/null +++ b/scripts/bench_malloc.c @@ -0,0 +1,128 @@ +// bench_malloc.c — standalone wallclock benchmark for malloc/free patterns. +// +// Build once, run twice (with and without jemalloc injected via +// DYLD_INSERT_LIBRARIES on macOS / LD_PRELOAD on Linux). See +// scripts/bench_malloc.sh. +// +// Each benchmark runs an inner loop N times; we run K trials of that and +// report min / median / max ns per op so noise is visible. + +#include +#include +#include +#include + +#define WARMUP_ITERS 1000 +#define TRIALS 9 // odd → median is a single sample + +// Volatile sink prevents the compiler from optimizing alloc/free pairs away. +static volatile void *sink; + +static double now_ns(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec; +} + +static int cmp_double(const void *a, const void *b) { + double da = *(const double *)a, db = *(const double *)b; + return (da > db) - (da < db); +} + +// ---- benchmark bodies ---- + +#define DEFINE_BENCH(NAME, BODY) \ + static void bench_##NAME(int iters) { \ + for (int _i = 0; _i < iters; _i++) { \ + BODY \ + } \ + } + +DEFINE_BENCH(malloc_64, { + void *p = malloc(64); sink = p; free(p); +}) + +DEFINE_BENCH(malloc_2mb, { + void *p = malloc(2 * 1024 * 1024); sink = p; free(p); +}) + +DEFINE_BENCH(calloc_8x8, { + void *p = calloc(8, 8); sink = p; free(p); +}) + +DEFINE_BENCH(realloc_grow, { + void *p = malloc(64); + p = realloc(p, 256); + sink = p; + free(p); +}) + +DEFINE_BENCH(realloc_null, { + void *p = realloc(NULL, 128); sink = p; free(p); +}) + +DEFINE_BENCH(posix_memalign_1k, { + void *p = NULL; + (void)posix_memalign(&p, 64, 1024); + sink = p; + free(p); +}) + +DEFINE_BENCH(malloc_x16, { + void *ptrs[16]; + for (int i = 0; i < 16; i++) ptrs[i] = malloc(48); + sink = ptrs[0]; // defeat clang's malloc/free elision at -O2 + for (int i = 0; i < 16; i++) free(ptrs[i]); +}) + +// ---- runner ---- + +typedef void (*bench_fn)(int); + +typedef struct { + const char *name; + bench_fn fn; + int inner; // iterations inside one trial +} bench_t; + +#define B(NAME, INNER) { #NAME, bench_##NAME, INNER } + +static const bench_t benchmarks[] = { + B(malloc_64, 1000000), + B(calloc_8x8, 1000000), + B(realloc_null, 1000000), + B(realloc_grow, 500000), + B(posix_memalign_1k, 1000000), + B(malloc_x16, 200000), + B(malloc_2mb, 10000), +}; + +int main(void) { + const char *label = getenv("BENCH_LABEL"); + if (!label) label = "(no label)"; + + printf("== %s ==\n", label); + printf("%-22s %12s %12s %12s\n", "benchmark", "min ns/op", "median ns/op", "max ns/op"); + printf("%-22s %12s %12s %12s\n", "---------", "---------", "------------", "---------"); + + size_t n = sizeof(benchmarks) / sizeof(benchmarks[0]); + for (size_t i = 0; i < n; i++) { + const bench_t *b = &benchmarks[i]; + + // Warmup + b->fn(WARMUP_ITERS); + + double trials[TRIALS]; + for (int t = 0; t < TRIALS; t++) { + double t0 = now_ns(); + b->fn(b->inner); + double t1 = now_ns(); + trials[t] = (t1 - t0) / (double)b->inner; + } + qsort(trials, TRIALS, sizeof(double), cmp_double); + + printf("%-22s %12.2f %12.2f %12.2f\n", + b->name, trials[0], trials[TRIALS / 2], trials[TRIALS - 1]); + } + return 0; +} diff --git a/scripts/bench_malloc.sh b/scripts/bench_malloc.sh new file mode 100755 index 00000000..d8a8f58a --- /dev/null +++ b/scripts/bench_malloc.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# +# bench_malloc.sh — build scripts/bench_malloc.c once and run it twice: +# under the system allocator and under jemalloc. Uses runtime injection +# (DYLD_INSERT_LIBRARIES on macOS, LD_PRELOAD on Linux), so there's no +# link-time difference between the two runs. +# +# Pre-requisites: +# - macOS: `brew install jemalloc` (or override JEMALLOC_LIB) +# - Linux: jemalloc installed (e.g. `apt install libjemalloc2`) +# +# Usage: +# ./scripts/bench_malloc.sh +# +# Env overrides: +# JEMALLOC_LIB path to libjemalloc.{dylib,so}; auto-detected if unset. +# CC compiler; defaults to cc. +# CFLAGS extra cflags; defaults to "-O2 -Wall -Wextra". + +set -euo pipefail + +# Use clang explicitly — `cc` is aliased to other things in many shells. +CC="${CC:-$(command -v clang || command -v gcc || echo cc)}" +CFLAGS="${CFLAGS:--O2 -Wall -Wextra}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SRC="${SCRIPT_DIR}/bench_malloc.c" +BIN="$(mktemp -t bench_malloc.XXXXXX)" +trap 'rm -f "$BIN"' EXIT + +step() { printf '\n\033[1;36m== %s\033[0m\n' "$*"; } +fail() { printf '\033[31m## %s\033[0m\n' "$*" >&2; exit 1; } + +# --- locate jemalloc --- +if [[ -z "${JEMALLOC_LIB:-}" ]]; then + case "$(uname -s)" in + Darwin) + for cand in \ + /opt/homebrew/opt/jemalloc/lib/libjemalloc.2.dylib \ + /opt/homebrew/opt/jemalloc/lib/libjemalloc.dylib \ + /usr/local/opt/jemalloc/lib/libjemalloc.2.dylib \ + /usr/local/opt/jemalloc/lib/libjemalloc.dylib; do + if [[ -f "$cand" ]]; then JEMALLOC_LIB="$cand"; break; fi + done + ;; + Linux) + for cand in \ + /usr/lib/x86_64-linux-gnu/libjemalloc.so.2 \ + /usr/lib/aarch64-linux-gnu/libjemalloc.so.2 \ + /usr/lib64/libjemalloc.so.2 \ + /usr/lib/libjemalloc.so.2 \ + /usr/lib/x86_64-linux-gnu/libjemalloc.so \ + /usr/lib/libjemalloc.so; do + if [[ -f "$cand" ]]; then JEMALLOC_LIB="$cand"; break; fi + done + ;; + esac +fi +[[ -n "${JEMALLOC_LIB:-}" && -f "$JEMALLOC_LIB" ]] \ + || fail "jemalloc dylib not found — set JEMALLOC_LIB=/path/to/libjemalloc.{dylib,so}" + +# --- build --- +step "Compiling $SRC" +# shellcheck disable=SC2086 +"$CC" $CFLAGS -o "$BIN" "$SRC" + +# --- run system allocator --- +step "Run 1 — system allocator" +BENCH_LABEL="system" "$BIN" + +# --- run with jemalloc injected --- +step "Run 2 — jemalloc (injected: $JEMALLOC_LIB)" +case "$(uname -s)" in + Darwin) + BENCH_LABEL="jemalloc" \ + DYLD_INSERT_LIBRARIES="$JEMALLOC_LIB" \ + DYLD_FORCE_FLAT_NAMESPACE=1 \ + "$BIN" + ;; + Linux) + BENCH_LABEL="jemalloc" \ + LD_PRELOAD="$JEMALLOC_LIB" \ + "$BIN" + ;; + *) + fail "Unsupported platform: $(uname -s)" + ;; +esac From 94543e3ad87bb88823d59787dd8946400b0f344d Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Fri, 8 May 2026 17:34:38 +0200 Subject: [PATCH 24/37] store requested size in custom header prefix instead of calling malloc_size() --- .../MallocInterposerC/include/interposer.h | 53 ++++ .../MallocInterposerC/src/interposer-darwin.c | 245 +++++++++++++----- .../MallocInterposerC/src/interposer-unix.c | 217 ++++++++++------ 3 files changed, 370 insertions(+), 145 deletions(-) diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h index 92c9a7cf..31f86ce0 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h @@ -11,14 +11,61 @@ #ifndef INTERPOSER_H #define INTERPOSER_H +#include #include #include +#include #include #include #if __APPLE__ # include #endif +// --------------------------------------------------------------------------- +// Header-prefix size tracking +// +// Each allocation we hand back to the caller is preceded by a 16-byte header +// that records the requested size and a magic word. On free/realloc we read +// the header instead of calling malloc_size/malloc_usable_size, eliminating +// a libc round-trip per call. Pointers that didn't go through the interposer +// (e.g., aligned-alloc slow path, allocations that pre-date hooking) are +// detected by a failing magic check and fall back to libc bookkeeping. +// +// The header is exactly 16 bytes so user_ptr inherits the 16-byte alignment +// of the underlying libc allocation. +// --------------------------------------------------------------------------- + +#define MALLOC_INTERPOSER_MAGIC 0xC0FFEE5AU + +typedef struct { + size_t requested_size; // offset 0 + uint32_t reserved; // offset 8 + uint32_t magic; // offset 12 — last 4 bytes for fast probe via *(user_ptr - 4) +} malloc_header_t; + +_Static_assert(sizeof(malloc_header_t) == 16, + "malloc_header_t must be 16 bytes to preserve 16-byte alignment"); + +static inline malloc_header_t *malloc_interposer_header_for(void *user_ptr) { + return (malloc_header_t *)((char *)user_ptr - sizeof(malloc_header_t)); +} + +static inline void *malloc_interposer_user_for(void *raw) { + return (char *)raw + sizeof(malloc_header_t); +} + +static inline bool malloc_interposer_is_ours(const void *user_ptr) { + if (!user_ptr) return false; + // Probe the last 4 bytes of the would-be header. For our pointers this + // reads our magic; for external pointers it reads into libc chunk + // metadata (always present and readable for libc-malloc'd pointers). + uint32_t magic; + memcpy(&magic, (const char *)user_ptr - sizeof(uint32_t), sizeof(magic)); + return magic == MALLOC_INTERPOSER_MAGIC; +} + +// --------------------------------------------------------------------------- + // Enable/disable counting and reset/read stats void malloc_interposer_enable(void); void malloc_interposer_disable(void); @@ -35,6 +82,11 @@ void *replacement_realloc(void *ptr, size_t size); void *replacement_reallocf(void *ptr, size_t size); void *replacement_valloc(size_t size); int replacement_posix_memalign(void **memptr, size_t alignment, size_t size); +#if __APPLE__ +size_t replacement_malloc_size(const void *ptr); +#else +size_t replacement_malloc_usable_size(void *ptr); +#endif // On Linux we use LD_PRELOAD to interpose the standard malloc functions // and we have to declare them ourselves @@ -46,6 +98,7 @@ void *realloc(void *ptr, size_t size); void *reallocf(void *ptr, size_t size); void *valloc(size_t size); int posix_memalign(void **memptr, size_t alignment, size_t size); +size_t malloc_usable_size(void *ptr); #endif #if __APPLE__ diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c index e330e22a..8f03ba88 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c @@ -84,131 +84,249 @@ void malloc_interposer_get_stats(int64_t *malloc_count, int64_t *malloc_bytes, __attribute__((used)) static struct { const void *replacement; const void *replacee; } _interpose_##_replacee \ __attribute__ ((section("__DATA,__interpose"))) = { (const void *)(unsigned long)&_replacement, (const void *)(unsigned long)&_replacee }; -/* on Darwin calling the original function is super easy, just call it, done. */ -#define JUMP_INTO_LIBC_FUN(_fun, ...) /* \ -*/ do { /* \ -*/ return _fun(__VA_ARGS__); /* \ -*/ } while(0) - // Inline counting helpers --------------------------------------------------- static __attribute__((always_inline)) void count_malloc(size_t size) { atomic_fetch_add_explicit(&g_malloc_count, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_malloc_bytes, (int64_t)size, memory_order_relaxed); - if ((int)size > get_page_size()) { + if (size > (size_t)get_page_size()) { atomic_fetch_add_explicit(&g_malloc_large, 1, memory_order_relaxed); } else { atomic_fetch_add_explicit(&g_malloc_small, 1, memory_order_relaxed); } } -static __attribute__((always_inline)) void count_free(void *ptr) { - size_t size = malloc_size(ptr); +static __attribute__((always_inline)) void count_free(size_t size) { atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_free_bytes, (int64_t)size, memory_order_relaxed); } -// Replacement functions ----------------------------------------------------- +// Header-write helpers ------------------------------------------------------ -void replacement_free(void *ptr) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(ptr); - } - JUMP_INTO_LIBC_FUN(free, ptr); +static __attribute__((always_inline)) void *write_header(void *raw, size_t size) { + malloc_header_t *hdr = (malloc_header_t *)raw; + hdr->requested_size = size; + hdr->reserved = 0; + hdr->magic = MALLOC_INTERPOSER_MAGIC; + return malloc_interposer_user_for(raw); } +// Replacement functions ----------------------------------------------------- +// +// On Darwin, calls from inside this dylib resolve directly to libsystem +// (DYLD_INTERPOSE only rewrites calls in OTHER images), so plain `malloc`, +// `free` etc. below are libsystem's, not recursive into ourselves. + void *replacement_malloc(size_t size) { + void *raw = malloc(size + sizeof(malloc_header_t)); + if (!raw) return NULL; if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { count_malloc(size); } - JUMP_INTO_LIBC_FUN(malloc, size); + return write_header(raw, size); } -void *replacement_realloc(void *ptr, size_t size) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(ptr); - count_malloc(size); +void replacement_free(void *user_ptr) { + if (!user_ptr) return; + if (malloc_interposer_is_ours(user_ptr)) { + malloc_header_t *hdr = malloc_interposer_header_for(user_ptr); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(hdr->requested_size); + } + free(hdr); + } else { + // External pointer (rare on Darwin once DYLD_INTERPOSE is active). + // Fall back to libc bookkeeping for byte accounting. + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(malloc_size(user_ptr)); + } + free(user_ptr); } - JUMP_INTO_LIBC_FUN(realloc, ptr, size); } void *replacement_calloc(size_t count, size_t size) { + size_t total; + if (__builtin_mul_overflow(count, size, &total)) { + // Let libc surface the overflow exactly as the user would expect. + return calloc(count, size); + } + // libc calloc zeros the entire allocation including where the header + // sits; we then overwrite those 16 bytes. Slightly redundant but simple. + void *raw = calloc(1, total + sizeof(malloc_header_t)); + if (!raw) return NULL; if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(count * size); + count_malloc(total); } - JUMP_INTO_LIBC_FUN(calloc, count, size); + return write_header(raw, total); } -void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); +void *replacement_realloc(void *user_ptr, size_t new_size) { + if (!user_ptr) return replacement_malloc(new_size); + if (new_size == 0) { + replacement_free(user_ptr); + return NULL; } - JUMP_INTO_LIBC_FUN(malloc_zone_malloc, zone, size); -} -void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(num_items * size); + bool counting = atomic_load_explicit(&g_counting_enabled, memory_order_relaxed); + + if (malloc_interposer_is_ours(user_ptr)) { + malloc_header_t *old_hdr = malloc_interposer_header_for(user_ptr); + size_t old_size = old_hdr->requested_size; + + void *new_raw = realloc(old_hdr, new_size + sizeof(malloc_header_t)); + if (!new_raw) return NULL; + + if (counting) { + count_free(old_size); + count_malloc(new_size); + } + // realloc may have moved memory; rewrite the header unconditionally. + return write_header(new_raw, new_size); + } + + // External pointer; use libc bookkeeping. + size_t old_size = malloc_size(user_ptr); + void *new_ptr = realloc(user_ptr, new_size); + if (!new_ptr) return NULL; + if (counting) { + count_free(old_size); + count_malloc(malloc_size(new_ptr)); } - JUMP_INTO_LIBC_FUN(malloc_zone_calloc, zone, num_items, size); + return new_ptr; } -void *replacement_malloc_zone_valloc(malloc_zone_t *zone, size_t size) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); +void *replacement_reallocf(void *user_ptr, size_t new_size) { + void *new_ptr = replacement_realloc(user_ptr, new_size); + // reallocf semantics: if reallocation fails, free the original pointer. + // replacement_realloc handles size==0 (frees) and ptr==NULL (no original) + // itself, so only free on the actual-failure case. + if (!new_ptr && user_ptr && new_size != 0) { + replacement_free(user_ptr); } - JUMP_INTO_LIBC_FUN(malloc_zone_valloc, zone, size); + return new_ptr; } -void *replacement_malloc_zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - if (0 == size) { - replacement_free(ptr); - return NULL; +// ---- Aligned/legacy paths: alignment requirements rule out the header ---- +// We let libc place a properly-aligned chunk and use malloc_size on free +// (paid by the rare allocations that use these). Magic check on free will +// fail, falling through to the external path that reads malloc_size. + +void *replacement_valloc(size_t size) { + void *ptr = valloc(size); + if (ptr && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(malloc_size(ptr)); } - if (!ptr) { - return replacement_malloc(size); + return ptr; +} + +int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { + int result = posix_memalign(memptr, alignment, size); + if (result == 0 + && memptr + && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(malloc_size(*memptr)); } + return result; +} + +// ---- Zone-level wrappers (rarely hit by user code) ------------------------ + +void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size) { + void *raw = malloc_zone_malloc(zone, size + sizeof(malloc_header_t)); + if (!raw) return NULL; if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(ptr); count_malloc(size); } - JUMP_INTO_LIBC_FUN(realloc, ptr, size); + return write_header(raw, size); } -void *replacement_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { +void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size) { + size_t total; + if (__builtin_mul_overflow(num_items, size, &total)) { + return malloc_zone_calloc(zone, num_items, size); + } + void *raw = malloc_zone_calloc(zone, 1, total + sizeof(malloc_header_t)); + if (!raw) return NULL; if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); + count_malloc(total); } - JUMP_INTO_LIBC_FUN(malloc_zone_memalign, zone, alignment, size); + return write_header(raw, total); } -void replacement_malloc_zone_free(malloc_zone_t *zone, void *ptr) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); +void *replacement_malloc_zone_valloc(malloc_zone_t *zone, size_t size) { + void *ptr = malloc_zone_valloc(zone, size); + if (ptr && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(malloc_size(ptr)); } - JUMP_INTO_LIBC_FUN(malloc_zone_free, zone, ptr); + return ptr; } -void *replacement_reallocf(void *ptr, size_t size) { - void *new_ptr = replacement_realloc(ptr, size); - if (!new_ptr) { - replacement_free(new_ptr); +void *replacement_malloc_zone_realloc(malloc_zone_t *zone, void *user_ptr, size_t new_size) { + if (!user_ptr) return replacement_malloc_zone_malloc(zone, new_size); + if (new_size == 0) { + replacement_malloc_zone_free(zone, user_ptr); + return NULL; + } + + bool counting = atomic_load_explicit(&g_counting_enabled, memory_order_relaxed); + + if (malloc_interposer_is_ours(user_ptr)) { + malloc_header_t *old_hdr = malloc_interposer_header_for(user_ptr); + size_t old_size = old_hdr->requested_size; + void *new_raw = malloc_zone_realloc(zone, old_hdr, new_size + sizeof(malloc_header_t)); + if (!new_raw) return NULL; + if (counting) { + count_free(old_size); + count_malloc(new_size); + } + return write_header(new_raw, new_size); + } + + size_t old_size = malloc_size(user_ptr); + void *new_ptr = malloc_zone_realloc(zone, user_ptr, new_size); + if (!new_ptr) return NULL; + if (counting) { + count_free(old_size); + count_malloc(malloc_size(new_ptr)); } return new_ptr; } -void *replacement_valloc(size_t size) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); +void *replacement_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { + void *ptr = malloc_zone_memalign(zone, alignment, size); + if (ptr && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(malloc_size(ptr)); } - JUMP_INTO_LIBC_FUN(valloc, size); + return ptr; } -int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); +void replacement_malloc_zone_free(malloc_zone_t *zone, void *user_ptr) { + if (!user_ptr) return; + if (malloc_interposer_is_ours(user_ptr)) { + malloc_header_t *hdr = malloc_interposer_header_for(user_ptr); + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(hdr->requested_size); + } + malloc_zone_free(zone, hdr); + } else { + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_free(malloc_size(user_ptr)); + } + malloc_zone_free(zone, user_ptr); + } +} + +// ---- Size queries --------------------------------------------------------- +// External code that calls malloc_size on one of our pointers would see the +// offset address (not the libc chunk start), so libsystem can't find it in +// any zone. Interpose to return the requested size from the header. + +size_t replacement_malloc_size(const void *user_ptr) { + if (!user_ptr) return 0; + if (malloc_interposer_is_ours(user_ptr)) { + return malloc_interposer_header_for((void *)user_ptr)->requested_size; } - JUMP_INTO_LIBC_FUN(posix_memalign, memptr, alignment, size); + return malloc_size(user_ptr); } DYLD_INTERPOSE(replacement_free, free) @@ -218,6 +336,7 @@ DYLD_INTERPOSE(replacement_calloc, calloc) DYLD_INTERPOSE(replacement_reallocf, reallocf) DYLD_INTERPOSE(replacement_valloc, valloc) DYLD_INTERPOSE(replacement_posix_memalign, posix_memalign) +DYLD_INTERPOSE(replacement_malloc_size, malloc_size) DYLD_INTERPOSE(replacement_malloc_zone_malloc, malloc_zone_malloc) DYLD_INTERPOSE(replacement_malloc_zone_calloc, malloc_zone_calloc) DYLD_INTERPOSE(replacement_malloc_zone_valloc, malloc_zone_valloc) diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c index 08a23669..74f3a82b 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c @@ -12,6 +12,7 @@ #define _GNU_SOURCE #include +#include #include #include #include @@ -144,180 +145,232 @@ static bool is_recursive_malloc_block(void *ptr) { // this is called if realloc is called whilst trying to resolve libc's realloc. static void *recursive_realloc(void *ptr, size_t size) { - // not implemented yet... + (void)ptr; (void)size; abort(); } // this is called if free is called whilst trying to resolve libc's free. static void recursive_free(void *ptr) { - // not implemented yet... + (void)ptr; abort(); } -// this is called if socket is called whilst trying to resolve libc's socket. static int recursive_socket(int domain, int type, int protocol) { - // not possible + (void)domain; (void)type; (void)protocol; abort(); } - -// this is called if accept is called whilst trying to resolve libc's accept. static int recursive_accept(int socket, struct sockaddr *restrict address, socklen_t *restrict address_len) { - // not possible + (void)socket; (void)address; (void)address_len; abort(); } - -// this is called if accept4 is called whilst trying to resolve libc's accept4. static int recursive_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) { - // not possible + (void)sockfd; (void)addr; (void)addrlen; (void)flags; abort(); } - -// this is called if close is called whilst trying to resolve libc's close. static int recursive_close(int fildes) { - // not possible + (void)fildes; abort(); } -/* On Apple platforms getting to the original libc function from a hooked - * function is easy. On other UNIX systems this is slightly harder because we - * have to look up the function with the dynamic linker. Because that isn't - * super performant we cache the lookup result in an (atomic) global. - * - * Calling into the libc function if we have already cached it is easy, we - * (atomically) load it and call into it. If have not yet cached it, we need to - * resolve it which we do by using dlsym and then write it into the (atomic) - * global. There's only one slight problem: dlsym might call back into the - * function we're just trying to resolve (dlsym does call malloc). In that case - * we need to emulate that function (named recursive_*). But that's all then. - */ #define JUMP_INTO_LIBC_FUN(_fun, ...) /* \ */ do { /* \ -*/ /* Let's see if somebody else already resolved that function for us */ /* \ */ type_libc_ ## _fun local_fun = atomic_load(&g_libc_ ## _fun); /* \ */ if (!local_fun) { /* \ -*/ /* No, we're the first ones to use this function. */ /* \ */ if (!g_in_ ## _fun) { /* \ */ g_in_ ## _fun = true; /* \ -*/ /* If we're here, we're at least not recursively in ourselves. */ /* \ -*/ /* That means we can use dlsym to resolve the libc function. */ /* \ */ type_libc_ ## _fun desired = dlsym(RTLD_NEXT, LIBC_SYMBOL(_fun)); /* \ */ if (atomic_compare_exchange_strong(&g_libc_ ## _fun, &local_fun, desired)) { /* \ -*/ /* If we're here, we won the race, so let's use our resolved function. */ /* \ */ local_fun = desired; /* \ */ } else { /* \ -*/ /* Lost the race, let's load the global again */ /* \ */ local_fun = atomic_load(&g_libc_ ## _fun); /* \ */ } /* \ */ } else { /* \ -*/ /* Okay, we can't jump into libc here and need to use our own version. */ /* \ */ return recursive_ ## _fun (__VA_ARGS__); /* \ */ } /* \ */ } /* \ */ return local_fun(__VA_ARGS__); /* \ */ } while(0) +/* Companion to JUMP_INTO_LIBC_FUN that captures the libc result into _outvar + * instead of returning. Used when we need to inspect the result before + * returning (e.g. to write the size header). */ +#define CALL_LIBC_FUN_CAPTURE(_outvar, _fun, ...) \ + do { \ + type_libc_ ## _fun local_fun = atomic_load(&g_libc_ ## _fun); \ + if (!local_fun) { \ + if (!g_in_ ## _fun) { \ + g_in_ ## _fun = true; \ + type_libc_ ## _fun desired = dlsym(RTLD_NEXT, LIBC_SYMBOL(_fun)); \ + if (atomic_compare_exchange_strong(&g_libc_ ## _fun, &local_fun, desired)) { \ + local_fun = desired; \ + } else { \ + local_fun = atomic_load(&g_libc_ ## _fun); \ + } \ + } else { \ + (_outvar) = recursive_ ## _fun (__VA_ARGS__); \ + break; \ + } \ + } \ + (_outvar) = local_fun(__VA_ARGS__); \ + } while (0) + // Inline counting helpers --------------------------------------------------- static __attribute__((always_inline)) void count_malloc(size_t size) { atomic_fetch_add_explicit(&g_malloc_count, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_malloc_bytes, (int64_t)size, memory_order_relaxed); - if ((int)size > get_page_size()) { + if (size > (size_t)get_page_size()) { atomic_fetch_add_explicit(&g_malloc_large, 1, memory_order_relaxed); } else { atomic_fetch_add_explicit(&g_malloc_small, 1, memory_order_relaxed); } } -static __attribute__((always_inline)) void count_free(void *ptr) { - size_t size = malloc_usable_size(ptr); +static __attribute__((always_inline)) void count_free(size_t size) { atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_free_bytes, (int64_t)size, memory_order_relaxed); } +// Header-write helper ------------------------------------------------------- + +static __attribute__((always_inline)) void *write_header(void *raw, size_t size) { + malloc_header_t *hdr = (malloc_header_t *)raw; + hdr->requested_size = size; + hdr->reserved = 0; + hdr->magic = MALLOC_INTERPOSER_MAGIC; + return malloc_interposer_user_for(raw); +} + // Replacement functions ----------------------------------------------------- -void replacement_free(void *ptr) { - if (ptr) { +void *replacement_malloc(size_t size) { + void *raw; + CALL_LIBC_FUN_CAPTURE(raw, malloc, size + sizeof(malloc_header_t)); + if (!raw) return NULL; + if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { + count_malloc(size); + } + return write_header(raw, size); +} + +void replacement_free(void *user_ptr) { + if (!user_ptr) return; + if (malloc_interposer_is_ours(user_ptr)) { + malloc_header_t *hdr = malloc_interposer_header_for(user_ptr); if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(ptr); + count_free(hdr->requested_size); } - if (!is_recursive_malloc_block(ptr)) { - JUMP_INTO_LIBC_FUN(free, ptr); + // Recursive-malloc blocks live in our static buffer; never call libc free on them. + if (!is_recursive_malloc_block(hdr)) { + JUMP_INTO_LIBC_FUN(free, hdr); } + return; } -} - -void *replacement_malloc(size_t size) { + // Externally-allocated pointer (no header). + if (is_recursive_malloc_block(user_ptr)) return; if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); + count_free(malloc_usable_size(user_ptr)); } - JUMP_INTO_LIBC_FUN(malloc, size); + JUMP_INTO_LIBC_FUN(free, user_ptr); } -void *replacement_realloc(void *ptr, size_t size) { - if (0 == size) { - replacement_free(ptr); +void *replacement_realloc(void *user_ptr, size_t new_size) { + if (!user_ptr) return replacement_malloc(new_size); + if (new_size == 0) { + replacement_free(user_ptr); return NULL; } - if (!ptr) { - return replacement_malloc(size); + + bool counting = atomic_load_explicit(&g_counting_enabled, memory_order_relaxed); + + if (malloc_interposer_is_ours(user_ptr)) { + malloc_header_t *old_hdr = malloc_interposer_header_for(user_ptr); + size_t old_size = old_hdr->requested_size; + + void *new_raw; + CALL_LIBC_FUN_CAPTURE(new_raw, realloc, old_hdr, new_size + sizeof(malloc_header_t)); + if (!new_raw) return NULL; + + if (counting) { + count_free(old_size); + count_malloc(new_size); + } + return write_header(new_raw, new_size); } - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(ptr); - count_malloc(size); + + // External pointer; use libc bookkeeping. + size_t old_size = malloc_usable_size(user_ptr); + void *new_ptr; + CALL_LIBC_FUN_CAPTURE(new_ptr, realloc, user_ptr, new_size); + if (!new_ptr) return NULL; + if (counting) { + count_free(old_size); + count_malloc(malloc_usable_size(new_ptr)); } - JUMP_INTO_LIBC_FUN(realloc, ptr, size); + return new_ptr; } void *replacement_calloc(size_t count, size_t size) { - void *ptr = replacement_malloc(count * size); - memset(ptr, 0, count * size); - return ptr; + size_t total; + if (__builtin_mul_overflow(count, size, &total)) { + errno = ENOMEM; + return NULL; + } + void *user_ptr = replacement_malloc(total); + if (user_ptr) { + memset(user_ptr, 0, total); + } + return user_ptr; } -void *replacement_reallocf(void *ptr, size_t size) { - void *new_ptr = replacement_realloc(ptr, size); - if (!new_ptr) { - replacement_free(new_ptr); +void *replacement_reallocf(void *user_ptr, size_t new_size) { + void *new_ptr = replacement_realloc(user_ptr, new_size); + if (!new_ptr && user_ptr && new_size != 0) { + replacement_free(user_ptr); } return new_ptr; } +// Aligned/legacy paths skip the header (alignment requirements rule it out) +// and rely on malloc_usable_size for byte accounting. + void *replacement_valloc(size_t size) { - // not aligning correctly (should be PAGE_SIZE) but good enough + // Note: not aligning correctly (should be PAGE_SIZE) but good enough. return replacement_malloc(size); } int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { - // not aligning correctly (should be `alignment`) but good enough + (void)alignment; + // Note: not aligning correctly (should be `alignment`) but good enough. void *ptr = replacement_malloc(size); if (ptr && memptr) { *memptr = ptr; return 0; - } else { - return 1; } + return ENOMEM; } -void free(void *ptr) { - replacement_free(ptr); -} -void *malloc(size_t size) { - return replacement_malloc(size); -} -void *calloc(size_t nmemb, size_t size) { - return replacement_calloc(nmemb, size); -} -void *realloc(void *ptr, size_t size) { - return replacement_realloc(ptr, size); -} -void *reallocf(void *ptr, size_t size) { - return replacement_reallocf(ptr, size); -} -void *valloc(size_t size) { - return replacement_valloc(size); +// Size queries -------------------------------------------------------------- + +size_t replacement_malloc_usable_size(void *user_ptr) { + if (!user_ptr) return 0; + if (malloc_interposer_is_ours(user_ptr)) { + return malloc_interposer_header_for(user_ptr)->requested_size; + } + return malloc_usable_size(user_ptr); } + +// Public symbol overrides --------------------------------------------------- + +void free(void *ptr) { replacement_free(ptr); } +void *malloc(size_t size) { return replacement_malloc(size); } +void *calloc(size_t nmemb, size_t size) { return replacement_calloc(nmemb, size); } +void *realloc(void *ptr, size_t size) { return replacement_realloc(ptr, size); } +void *reallocf(void *ptr, size_t size) { return replacement_reallocf(ptr, size); } +void *valloc(size_t size) { return replacement_valloc(size); } int posix_memalign(void **memptr, size_t alignment, size_t size) { return replacement_posix_memalign(memptr, alignment, size); } +size_t malloc_usable_size(void *ptr) { return replacement_malloc_usable_size(ptr); } + #endif From 194449704af47bba1ead003e23ce0032fd4af931 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Fri, 8 May 2026 18:21:32 +0200 Subject: [PATCH 25/37] fix linux --- .../MallocInterposerC/src/interposer-unix.c | 47 ++++++++++++++----- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c index 74f3a82b..aa7c4143 100644 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c +++ b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c @@ -39,24 +39,27 @@ static _Atomic ptrdiff_t g_recursive_malloc_next_free_ptr = ATOMIC_VAR_INIT(0); static __thread bool g_in_malloc = false; static __thread bool g_in_realloc = false; static __thread bool g_in_free = false; +static __thread bool g_in_malloc_usable_size = false; static __thread bool g_in_socket = false; static __thread bool g_in_accept = false; static __thread bool g_in_accept4 = false; static __thread bool g_in_close = false; /* The types of the variables holding the libc function pointers. */ -typedef void *(*type_libc_malloc)(size_t); -typedef void *(*type_libc_realloc)(void *, size_t); -typedef void (*type_libc_free)(void *); -typedef int (*type_libc_socket)(int, int, int); -typedef int (*type_libc_accept)(int, struct sockaddr*, socklen_t *); -typedef int (*type_libc_accept4)(int, struct sockaddr *, socklen_t *, int); -typedef int (*type_libc_close)(int); +typedef void *(*type_libc_malloc)(size_t); +typedef void *(*type_libc_realloc)(void *, size_t); +typedef void (*type_libc_free)(void *); +typedef size_t (*type_libc_malloc_usable_size)(void *); +typedef int (*type_libc_socket)(int, int, int); +typedef int (*type_libc_accept)(int, struct sockaddr*, socklen_t *); +typedef int (*type_libc_accept4)(int, struct sockaddr *, socklen_t *, int); +typedef int (*type_libc_close)(int); /* The (atomic) globals holding the pointer to the original libc implementation. */ _Atomic type_libc_malloc g_libc_malloc; _Atomic type_libc_realloc g_libc_realloc; _Atomic type_libc_free g_libc_free; +_Atomic type_libc_malloc_usable_size g_libc_malloc_usable_size; _Atomic type_libc_socket g_libc_socket; _Atomic type_libc_accept g_libc_accept; _Atomic type_libc_accept4 g_libc_accept4; @@ -155,6 +158,13 @@ static void recursive_free(void *ptr) { abort(); } +// If malloc_usable_size is queried during dlsym handshake, we have nothing +// useful to report — return 0. Reaching here is exceptional. +static size_t recursive_malloc_usable_size(void *ptr) { + (void)ptr; + return 0; +} + static int recursive_socket(int domain, int type, int protocol) { (void)domain; (void)type; (void)protocol; abort(); @@ -269,7 +279,9 @@ void replacement_free(void *user_ptr) { // Externally-allocated pointer (no header). if (is_recursive_malloc_block(user_ptr)) return; if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(malloc_usable_size(user_ptr)); + size_t size; + CALL_LIBC_FUN_CAPTURE(size, malloc_usable_size, user_ptr); + count_free(size); } JUMP_INTO_LIBC_FUN(free, user_ptr); } @@ -298,14 +310,18 @@ void *replacement_realloc(void *user_ptr, size_t new_size) { return write_header(new_raw, new_size); } - // External pointer; use libc bookkeeping. - size_t old_size = malloc_usable_size(user_ptr); + // External pointer; use libc bookkeeping. Route every malloc_usable_size + // call through CALL_LIBC_FUN_CAPTURE so we hit libc, not our override. + size_t old_size; + CALL_LIBC_FUN_CAPTURE(old_size, malloc_usable_size, user_ptr); void *new_ptr; CALL_LIBC_FUN_CAPTURE(new_ptr, realloc, user_ptr, new_size); if (!new_ptr) return NULL; if (counting) { count_free(old_size); - count_malloc(malloc_usable_size(new_ptr)); + size_t new_usable; + CALL_LIBC_FUN_CAPTURE(new_usable, malloc_usable_size, new_ptr); + count_malloc(new_usable); } return new_ptr; } @@ -351,13 +367,20 @@ int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { } // Size queries -------------------------------------------------------------- +// +// External callers may pass our pointers to malloc_usable_size; libc would +// see an offset address and return garbage from its chunk-header probe. +// Override and route ours through the header. Internal calls go via +// CALL_LIBC_FUN_CAPTURE (dlsym-cached), bypassing our override. size_t replacement_malloc_usable_size(void *user_ptr) { if (!user_ptr) return 0; if (malloc_interposer_is_ours(user_ptr)) { return malloc_interposer_header_for(user_ptr)->requested_size; } - return malloc_usable_size(user_ptr); + size_t size; + CALL_LIBC_FUN_CAPTURE(size, malloc_usable_size, user_ptr); + return size; } // Public symbol overrides --------------------------------------------------- From 92b714ffc1302d2ca12329ce0f48b73091716f8f Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Fri, 8 May 2026 18:46:45 +0200 Subject: [PATCH 26/37] update compare script --- scripts/compare-malloc-local.sh | 35 +++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/scripts/compare-malloc-local.sh b/scripts/compare-malloc-local.sh index 7f0a7fe0..08b4e1aa 100755 --- a/scripts/compare-malloc-local.sh +++ b/scripts/compare-malloc-local.sh @@ -27,6 +27,12 @@ # Env overrides: # TOOLCHAIN_OLD default 6.2.2 # TOOLCHAIN_NEW default 6.3-snapshot-2026-02-27 +# FRESH=1 use timestamp-suffixed scratch dirs (fresh build, no +# cache reuse). Use this when a previous hung/zombie +# process is holding a SwiftPM lock on .build-X and you +# can't kill it. Trade-off: full rebuild each run. +# KEEP_FRESH=1 when FRESH=1, don't auto-delete the scratch dirs at +# exit (default is to clean up on success). set -euo pipefail @@ -37,11 +43,20 @@ TOOLCHAIN_OLD="${TOOLCHAIN_OLD:-6.2.2}" TOOLCHAIN_NEW="${TOOLCHAIN_NEW:-6.3-snapshot-2026-02-27}" BASELINE_OLD="jemalloc-${TOOLCHAIN_OLD}" BASELINE_NEW="interposer-${TOOLCHAIN_NEW}" + # Per-toolchain scratch paths so each toolchain has its own .build cache. # Without this, switching toolchains hits "module compiled with Swift X # cannot be imported by Y" errors on the cached Benchmark.swiftmodule. -SCRATCH_OLD="${BENCH_DIR}/.build-${TOOLCHAIN_OLD}" -SCRATCH_NEW="${BENCH_DIR}/.build-${TOOLCHAIN_NEW}" +# +# If FRESH=1 is set, append a timestamp suffix so this run can't collide +# with a SwiftPM lock held by a previous (possibly hung) process. Trade-off: +# no cache reuse — every run rebuilds from scratch. +SCRATCH_SUFFIX="" +if [[ "${FRESH:-0}" == "1" ]]; then + SCRATCH_SUFFIX="-fresh-$(date +%s)" +fi +SCRATCH_OLD="${BENCH_DIR}/.build-${TOOLCHAIN_OLD}${SCRATCH_SUFFIX}" +SCRATCH_NEW="${BENCH_DIR}/.build-${TOOLCHAIN_NEW}${SCRATCH_SUFFIX}" step() { printf '\n\033[1;36m== %s\033[0m\n' "$*"; } warn() { printf '\033[33m!! %s\033[0m\n' "$*" >&2; } @@ -51,6 +66,22 @@ fail() { printf '\033[31m## %s\033[0m\n' "$*" >&2; exit 1; } || fail "MallocInterposer benchmark dir missing — expected $BENCH_DIR/Benchmarks/MallocInterposer" command -v swiftly >/dev/null || fail "swiftly required" +# When FRESH=1, clean the throwaway scratch dirs on successful exit so they +# don't accumulate. KEEP_FRESH=1 disables this if the user wants to inspect. +if [[ "${FRESH:-0}" == "1" && "${KEEP_FRESH:-0}" != "1" ]]; then + cleanup_fresh() { + local rc=$? + if (( rc == 0 )); then + rm -rf "$SCRATCH_OLD" "$SCRATCH_NEW" 2>/dev/null || true + else + warn "Run failed (exit $rc); leaving fresh scratch dirs for inspection:" + warn " $SCRATCH_OLD" + warn " $SCRATCH_NEW" + fi + } + trap cleanup_fresh EXIT +fi + cd "$BENCH_DIR" # Forward any positional args as --filter regexes. From 2a3efb3295f09a8195acaeb6ba2d649c50fb8fe5 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Thu, 14 May 2026 17:27:26 +0200 Subject: [PATCH 27/37] move interposer to seperate package --- Benchmarks/Package.resolved | 18 +- LocalPackages/MallocInterposerC/.gitignore | 8 - LocalPackages/MallocInterposerC/Package.swift | 24 -- .../MallocInterposerC/include/interposer.h | 113 ----- .../MallocInterposerC/src/interposer-darwin.c | 346 --------------- .../MallocInterposerC/src/interposer-unix.c | 399 ------------------ .../MallocInterposerSwift/Package.swift | 31 -- .../MallocInterposerSwift.swift | 85 ---- .../SwiftTestClient/SwiftTestClient.swift | 66 --- Package.swift | 20 +- .../BenchmarkCommandPlugin.swift | 2 +- scripts/compare-malloc-local.sh | 127 +++--- 12 files changed, 91 insertions(+), 1148 deletions(-) delete mode 100644 LocalPackages/MallocInterposerC/.gitignore delete mode 100644 LocalPackages/MallocInterposerC/Package.swift delete mode 100644 LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h delete mode 100644 LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c delete mode 100644 LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c delete mode 100644 LocalPackages/MallocInterposerSwift/Package.swift delete mode 100644 LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift delete mode 100644 LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved index 86e322d3..5bf6d5f5 100644 --- a/Benchmarks/Package.resolved +++ b/Benchmarks/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "f1d359a544b71b52c6788ad2e4cd2952f7f166b62ddb07316768f66be7ba4099", + "originHash" : "a9d6bb991cd82bfd91d8290469d6c06ea1ed287e5ad8a103d3ef751159aa48f7", "pins" : [ { "identity" : "hdrhistogram-swift", @@ -11,21 +11,21 @@ } }, { - "identity" : "package-datetime", + "identity" : "malloc-interposer", "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-datetime", + "location" : "https://github.com/ordo-one/malloc-interposer.git", "state" : { - "revision" : "d1242188c9f48aad297e6ca9b717776f8660bc31", - "version" : "1.0.2" + "revision" : "d9ca5ad6d85622fb2bd5b3d3387ba064dbcab1c2", + "version" : "1.0.0" } }, { - "identity" : "package-jemalloc", + "identity" : "package-datetime", "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-jemalloc", + "location" : "https://github.com/ordo-one/package-datetime", "state" : { - "revision" : "e8a5db026963f5bfeac842d9d3f2cc8cde323b49", - "version" : "1.0.0" + "revision" : "d1242188c9f48aad297e6ca9b717776f8660bc31", + "version" : "1.0.2" } }, { diff --git a/LocalPackages/MallocInterposerC/.gitignore b/LocalPackages/MallocInterposerC/.gitignore deleted file mode 100644 index 0023a534..00000000 --- a/LocalPackages/MallocInterposerC/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.DS_Store -/.build -/Packages -xcuserdata/ -DerivedData/ -.swiftpm/configuration/registries.json -.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata -.netrc diff --git a/LocalPackages/MallocInterposerC/Package.swift b/LocalPackages/MallocInterposerC/Package.swift deleted file mode 100644 index 6dba7002..00000000 --- a/LocalPackages/MallocInterposerC/Package.swift +++ /dev/null @@ -1,24 +0,0 @@ -// swift-tools-version: 5.10 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "MallocInterposer", - products: [ - // Products define the executables and libraries a package produces, making them visible to other packages. - .library( - name: "MallocInterposerC", - type: .dynamic, - targets: ["MallocInterposerC"]) - ], - targets: [ - // Targets are the basic building blocks of a package, defining a module or a test suite. - // Targets can depend on other targets in this package and products from dependencies. - .target( - name: "MallocInterposerC", - linkerSettings: [ - .linkedLibrary("dl") - ]) - ] -) diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h deleted file mode 100644 index 31f86ce0..00000000 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/include/interposer.h +++ /dev/null @@ -1,113 +0,0 @@ -// -// Copyright (c) 2022 Ordo One AB. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// - -#ifndef INTERPOSER_H -#define INTERPOSER_H - -#include -#include -#include -#include -#include -#include -#if __APPLE__ -# include -#endif - -// --------------------------------------------------------------------------- -// Header-prefix size tracking -// -// Each allocation we hand back to the caller is preceded by a 16-byte header -// that records the requested size and a magic word. On free/realloc we read -// the header instead of calling malloc_size/malloc_usable_size, eliminating -// a libc round-trip per call. Pointers that didn't go through the interposer -// (e.g., aligned-alloc slow path, allocations that pre-date hooking) are -// detected by a failing magic check and fall back to libc bookkeeping. -// -// The header is exactly 16 bytes so user_ptr inherits the 16-byte alignment -// of the underlying libc allocation. -// --------------------------------------------------------------------------- - -#define MALLOC_INTERPOSER_MAGIC 0xC0FFEE5AU - -typedef struct { - size_t requested_size; // offset 0 - uint32_t reserved; // offset 8 - uint32_t magic; // offset 12 — last 4 bytes for fast probe via *(user_ptr - 4) -} malloc_header_t; - -_Static_assert(sizeof(malloc_header_t) == 16, - "malloc_header_t must be 16 bytes to preserve 16-byte alignment"); - -static inline malloc_header_t *malloc_interposer_header_for(void *user_ptr) { - return (malloc_header_t *)((char *)user_ptr - sizeof(malloc_header_t)); -} - -static inline void *malloc_interposer_user_for(void *raw) { - return (char *)raw + sizeof(malloc_header_t); -} - -static inline bool malloc_interposer_is_ours(const void *user_ptr) { - if (!user_ptr) return false; - // Probe the last 4 bytes of the would-be header. For our pointers this - // reads our magic; for external pointers it reads into libc chunk - // metadata (always present and readable for libc-malloc'd pointers). - uint32_t magic; - memcpy(&magic, (const char *)user_ptr - sizeof(uint32_t), sizeof(magic)); - return magic == MALLOC_INTERPOSER_MAGIC; -} - -// --------------------------------------------------------------------------- - -// Enable/disable counting and reset/read stats -void malloc_interposer_enable(void); -void malloc_interposer_disable(void); -void malloc_interposer_reset(void); -void malloc_interposer_get_stats(int64_t *malloc_count, int64_t *malloc_bytes, - int64_t *malloc_small, int64_t *malloc_large, - int64_t *free_count, int64_t *free_bytes); - -// Replacement functions (used internally for DYLD_INTERPOSE and Linux overrides) -void *replacement_malloc(size_t size); -void replacement_free(void *ptr); -void *replacement_calloc(size_t nmemb, size_t size); -void *replacement_realloc(void *ptr, size_t size); -void *replacement_reallocf(void *ptr, size_t size); -void *replacement_valloc(size_t size); -int replacement_posix_memalign(void **memptr, size_t alignment, size_t size); -#if __APPLE__ -size_t replacement_malloc_size(const void *ptr); -#else -size_t replacement_malloc_usable_size(void *ptr); -#endif - -// On Linux we use LD_PRELOAD to interpose the standard malloc functions -// and we have to declare them ourselves -#if !__APPLE__ -void free(void *ptr); -void *malloc(size_t size); -void *calloc(size_t nmemb, size_t size); -void *realloc(void *ptr, size_t size); -void *reallocf(void *ptr, size_t size); -void *valloc(size_t size); -int posix_memalign(void **memptr, size_t alignment, size_t size); -size_t malloc_usable_size(void *ptr); -#endif - -#if __APPLE__ -void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size); -void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size); -void *replacement_malloc_zone_valloc(malloc_zone_t *zone, size_t size); -void *replacement_malloc_zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -void *replacement_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size); -void replacement_malloc_zone_free(malloc_zone_t *zone, void *ptr); -#endif - -#endif diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c deleted file mode 100644 index 8f03ba88..00000000 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-darwin.c +++ /dev/null @@ -1,346 +0,0 @@ -// -// Copyright (c) 2022 Ordo One AB. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// - -#include -#if __APPLE__ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Counting state — all updated on the malloc hot path, so use relaxed atomics. -static _Atomic bool g_counting_enabled = false; -static _Atomic int64_t g_malloc_count = 0; -static _Atomic int64_t g_malloc_bytes = 0; -static _Atomic int64_t g_malloc_small = 0; -static _Atomic int64_t g_malloc_large = 0; -static _Atomic int64_t g_free_count = 0; -static _Atomic int64_t g_free_bytes = 0; - -// Cached page size for small/large classification -static int g_page_size = 0; - -static int get_page_size(void) { - if (__builtin_expect(g_page_size == 0, 0)) { - g_page_size = (int)getpagesize(); - } - return g_page_size; -} - -// Public API ---------------------------------------------------------------- - -void malloc_interposer_enable(void) { - atomic_store_explicit(&g_counting_enabled, true, memory_order_release); -} - -void malloc_interposer_disable(void) { - atomic_store_explicit(&g_counting_enabled, false, memory_order_release); -} - -void malloc_interposer_reset(void) { - atomic_store_explicit(&g_malloc_count, 0, memory_order_relaxed); - atomic_store_explicit(&g_malloc_bytes, 0, memory_order_relaxed); - atomic_store_explicit(&g_malloc_small, 0, memory_order_relaxed); - atomic_store_explicit(&g_malloc_large, 0, memory_order_relaxed); - atomic_store_explicit(&g_free_count, 0, memory_order_relaxed); - atomic_store_explicit(&g_free_bytes, 0, memory_order_relaxed); - atomic_thread_fence(memory_order_release); -} - -void malloc_interposer_get_stats(int64_t *malloc_count, int64_t *malloc_bytes, - int64_t *malloc_small, int64_t *malloc_large, - int64_t *free_count, int64_t *free_bytes) { - *malloc_count = atomic_load_explicit(&g_malloc_count, memory_order_relaxed); - *malloc_bytes = atomic_load_explicit(&g_malloc_bytes, memory_order_relaxed); - *malloc_small = atomic_load_explicit(&g_malloc_small, memory_order_relaxed); - *malloc_large = atomic_load_explicit(&g_malloc_large, memory_order_relaxed); - *free_count = atomic_load_explicit(&g_free_count, memory_order_relaxed); - *free_bytes = atomic_load_explicit(&g_free_bytes, memory_order_relaxed); -} - -// --------------------------------------------------------------------------- - -#define DYLD_INTERPOSE(_replacement,_replacee) \ - __attribute__((used)) static struct { const void *replacement; const void *replacee; } _interpose_##_replacee \ - __attribute__ ((section("__DATA,__interpose"))) = { (const void *)(unsigned long)&_replacement, (const void *)(unsigned long)&_replacee }; - -// Inline counting helpers --------------------------------------------------- - -static __attribute__((always_inline)) void count_malloc(size_t size) { - atomic_fetch_add_explicit(&g_malloc_count, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_malloc_bytes, (int64_t)size, memory_order_relaxed); - if (size > (size_t)get_page_size()) { - atomic_fetch_add_explicit(&g_malloc_large, 1, memory_order_relaxed); - } else { - atomic_fetch_add_explicit(&g_malloc_small, 1, memory_order_relaxed); - } -} - -static __attribute__((always_inline)) void count_free(size_t size) { - atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_free_bytes, (int64_t)size, memory_order_relaxed); -} - -// Header-write helpers ------------------------------------------------------ - -static __attribute__((always_inline)) void *write_header(void *raw, size_t size) { - malloc_header_t *hdr = (malloc_header_t *)raw; - hdr->requested_size = size; - hdr->reserved = 0; - hdr->magic = MALLOC_INTERPOSER_MAGIC; - return malloc_interposer_user_for(raw); -} - -// Replacement functions ----------------------------------------------------- -// -// On Darwin, calls from inside this dylib resolve directly to libsystem -// (DYLD_INTERPOSE only rewrites calls in OTHER images), so plain `malloc`, -// `free` etc. below are libsystem's, not recursive into ourselves. - -void *replacement_malloc(size_t size) { - void *raw = malloc(size + sizeof(malloc_header_t)); - if (!raw) return NULL; - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); - } - return write_header(raw, size); -} - -void replacement_free(void *user_ptr) { - if (!user_ptr) return; - if (malloc_interposer_is_ours(user_ptr)) { - malloc_header_t *hdr = malloc_interposer_header_for(user_ptr); - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(hdr->requested_size); - } - free(hdr); - } else { - // External pointer (rare on Darwin once DYLD_INTERPOSE is active). - // Fall back to libc bookkeeping for byte accounting. - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(malloc_size(user_ptr)); - } - free(user_ptr); - } -} - -void *replacement_calloc(size_t count, size_t size) { - size_t total; - if (__builtin_mul_overflow(count, size, &total)) { - // Let libc surface the overflow exactly as the user would expect. - return calloc(count, size); - } - // libc calloc zeros the entire allocation including where the header - // sits; we then overwrite those 16 bytes. Slightly redundant but simple. - void *raw = calloc(1, total + sizeof(malloc_header_t)); - if (!raw) return NULL; - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(total); - } - return write_header(raw, total); -} - -void *replacement_realloc(void *user_ptr, size_t new_size) { - if (!user_ptr) return replacement_malloc(new_size); - if (new_size == 0) { - replacement_free(user_ptr); - return NULL; - } - - bool counting = atomic_load_explicit(&g_counting_enabled, memory_order_relaxed); - - if (malloc_interposer_is_ours(user_ptr)) { - malloc_header_t *old_hdr = malloc_interposer_header_for(user_ptr); - size_t old_size = old_hdr->requested_size; - - void *new_raw = realloc(old_hdr, new_size + sizeof(malloc_header_t)); - if (!new_raw) return NULL; - - if (counting) { - count_free(old_size); - count_malloc(new_size); - } - // realloc may have moved memory; rewrite the header unconditionally. - return write_header(new_raw, new_size); - } - - // External pointer; use libc bookkeeping. - size_t old_size = malloc_size(user_ptr); - void *new_ptr = realloc(user_ptr, new_size); - if (!new_ptr) return NULL; - if (counting) { - count_free(old_size); - count_malloc(malloc_size(new_ptr)); - } - return new_ptr; -} - -void *replacement_reallocf(void *user_ptr, size_t new_size) { - void *new_ptr = replacement_realloc(user_ptr, new_size); - // reallocf semantics: if reallocation fails, free the original pointer. - // replacement_realloc handles size==0 (frees) and ptr==NULL (no original) - // itself, so only free on the actual-failure case. - if (!new_ptr && user_ptr && new_size != 0) { - replacement_free(user_ptr); - } - return new_ptr; -} - -// ---- Aligned/legacy paths: alignment requirements rule out the header ---- -// We let libc place a properly-aligned chunk and use malloc_size on free -// (paid by the rare allocations that use these). Magic check on free will -// fail, falling through to the external path that reads malloc_size. - -void *replacement_valloc(size_t size) { - void *ptr = valloc(size); - if (ptr && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(malloc_size(ptr)); - } - return ptr; -} - -int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { - int result = posix_memalign(memptr, alignment, size); - if (result == 0 - && memptr - && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(malloc_size(*memptr)); - } - return result; -} - -// ---- Zone-level wrappers (rarely hit by user code) ------------------------ - -void *replacement_malloc_zone_malloc(malloc_zone_t *zone, size_t size) { - void *raw = malloc_zone_malloc(zone, size + sizeof(malloc_header_t)); - if (!raw) return NULL; - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); - } - return write_header(raw, size); -} - -void *replacement_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size) { - size_t total; - if (__builtin_mul_overflow(num_items, size, &total)) { - return malloc_zone_calloc(zone, num_items, size); - } - void *raw = malloc_zone_calloc(zone, 1, total + sizeof(malloc_header_t)); - if (!raw) return NULL; - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(total); - } - return write_header(raw, total); -} - -void *replacement_malloc_zone_valloc(malloc_zone_t *zone, size_t size) { - void *ptr = malloc_zone_valloc(zone, size); - if (ptr && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(malloc_size(ptr)); - } - return ptr; -} - -void *replacement_malloc_zone_realloc(malloc_zone_t *zone, void *user_ptr, size_t new_size) { - if (!user_ptr) return replacement_malloc_zone_malloc(zone, new_size); - if (new_size == 0) { - replacement_malloc_zone_free(zone, user_ptr); - return NULL; - } - - bool counting = atomic_load_explicit(&g_counting_enabled, memory_order_relaxed); - - if (malloc_interposer_is_ours(user_ptr)) { - malloc_header_t *old_hdr = malloc_interposer_header_for(user_ptr); - size_t old_size = old_hdr->requested_size; - void *new_raw = malloc_zone_realloc(zone, old_hdr, new_size + sizeof(malloc_header_t)); - if (!new_raw) return NULL; - if (counting) { - count_free(old_size); - count_malloc(new_size); - } - return write_header(new_raw, new_size); - } - - size_t old_size = malloc_size(user_ptr); - void *new_ptr = malloc_zone_realloc(zone, user_ptr, new_size); - if (!new_ptr) return NULL; - if (counting) { - count_free(old_size); - count_malloc(malloc_size(new_ptr)); - } - return new_ptr; -} - -void *replacement_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { - void *ptr = malloc_zone_memalign(zone, alignment, size); - if (ptr && atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(malloc_size(ptr)); - } - return ptr; -} - -void replacement_malloc_zone_free(malloc_zone_t *zone, void *user_ptr) { - if (!user_ptr) return; - if (malloc_interposer_is_ours(user_ptr)) { - malloc_header_t *hdr = malloc_interposer_header_for(user_ptr); - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(hdr->requested_size); - } - malloc_zone_free(zone, hdr); - } else { - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(malloc_size(user_ptr)); - } - malloc_zone_free(zone, user_ptr); - } -} - -// ---- Size queries --------------------------------------------------------- -// External code that calls malloc_size on one of our pointers would see the -// offset address (not the libc chunk start), so libsystem can't find it in -// any zone. Interpose to return the requested size from the header. - -size_t replacement_malloc_size(const void *user_ptr) { - if (!user_ptr) return 0; - if (malloc_interposer_is_ours(user_ptr)) { - return malloc_interposer_header_for((void *)user_ptr)->requested_size; - } - return malloc_size(user_ptr); -} - -DYLD_INTERPOSE(replacement_free, free) -DYLD_INTERPOSE(replacement_malloc, malloc) -DYLD_INTERPOSE(replacement_realloc, realloc) -DYLD_INTERPOSE(replacement_calloc, calloc) -DYLD_INTERPOSE(replacement_reallocf, reallocf) -DYLD_INTERPOSE(replacement_valloc, valloc) -DYLD_INTERPOSE(replacement_posix_memalign, posix_memalign) -DYLD_INTERPOSE(replacement_malloc_size, malloc_size) -DYLD_INTERPOSE(replacement_malloc_zone_malloc, malloc_zone_malloc) -DYLD_INTERPOSE(replacement_malloc_zone_calloc, malloc_zone_calloc) -DYLD_INTERPOSE(replacement_malloc_zone_valloc, malloc_zone_valloc) -DYLD_INTERPOSE(replacement_malloc_zone_realloc, malloc_zone_realloc) -DYLD_INTERPOSE(replacement_malloc_zone_memalign, malloc_zone_memalign) -DYLD_INTERPOSE(replacement_malloc_zone_free, malloc_zone_free) -#endif diff --git a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c b/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c deleted file mode 100644 index aa7c4143..00000000 --- a/LocalPackages/MallocInterposerC/Sources/MallocInterposerC/src/interposer-unix.c +++ /dev/null @@ -1,399 +0,0 @@ -// -// Copyright (c) 2022 Ordo One AB. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// - -#ifndef __APPLE__ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* a big block of memory that we'll use for recursive mallocs */ -static char g_recursive_malloc_mem[10 * 1024 * 1024] = {0}; -/* the index of the first free byte */ -static _Atomic ptrdiff_t g_recursive_malloc_next_free_ptr = ATOMIC_VAR_INIT(0); - -#define LIBC_SYMBOL(_fun) "" # _fun - -/* Some thread-local flags we use to check if we're recursively in a hooked function. */ -static __thread bool g_in_malloc = false; -static __thread bool g_in_realloc = false; -static __thread bool g_in_free = false; -static __thread bool g_in_malloc_usable_size = false; -static __thread bool g_in_socket = false; -static __thread bool g_in_accept = false; -static __thread bool g_in_accept4 = false; -static __thread bool g_in_close = false; - -/* The types of the variables holding the libc function pointers. */ -typedef void *(*type_libc_malloc)(size_t); -typedef void *(*type_libc_realloc)(void *, size_t); -typedef void (*type_libc_free)(void *); -typedef size_t (*type_libc_malloc_usable_size)(void *); -typedef int (*type_libc_socket)(int, int, int); -typedef int (*type_libc_accept)(int, struct sockaddr*, socklen_t *); -typedef int (*type_libc_accept4)(int, struct sockaddr *, socklen_t *, int); -typedef int (*type_libc_close)(int); - -/* The (atomic) globals holding the pointer to the original libc implementation. */ -_Atomic type_libc_malloc g_libc_malloc; -_Atomic type_libc_realloc g_libc_realloc; -_Atomic type_libc_free g_libc_free; -_Atomic type_libc_malloc_usable_size g_libc_malloc_usable_size; -_Atomic type_libc_socket g_libc_socket; -_Atomic type_libc_accept g_libc_accept; -_Atomic type_libc_accept4 g_libc_accept4; -_Atomic type_libc_close g_libc_close; - -// Counting state — all updated on the malloc hot path, so use relaxed atomics. -static _Atomic bool g_counting_enabled = false; -static _Atomic int64_t g_malloc_count = 0; -static _Atomic int64_t g_malloc_bytes = 0; -static _Atomic int64_t g_malloc_small = 0; -static _Atomic int64_t g_malloc_large = 0; -static _Atomic int64_t g_free_count = 0; -static _Atomic int64_t g_free_bytes = 0; - -// Cached page size for small/large classification -static int g_page_size = 0; - -static int get_page_size(void) { - if (__builtin_expect(g_page_size == 0, 0)) { - g_page_size = (int)getpagesize(); - } - return g_page_size; -} - -// Public API ---------------------------------------------------------------- - -void malloc_interposer_enable(void) { - atomic_store_explicit(&g_counting_enabled, true, memory_order_release); -} - -void malloc_interposer_disable(void) { - atomic_store_explicit(&g_counting_enabled, false, memory_order_release); -} - -void malloc_interposer_reset(void) { - atomic_store_explicit(&g_malloc_count, 0, memory_order_relaxed); - atomic_store_explicit(&g_malloc_bytes, 0, memory_order_relaxed); - atomic_store_explicit(&g_malloc_small, 0, memory_order_relaxed); - atomic_store_explicit(&g_malloc_large, 0, memory_order_relaxed); - atomic_store_explicit(&g_free_count, 0, memory_order_relaxed); - atomic_store_explicit(&g_free_bytes, 0, memory_order_relaxed); - atomic_thread_fence(memory_order_release); -} - -void malloc_interposer_get_stats(int64_t *malloc_count, int64_t *malloc_bytes, - int64_t *malloc_small, int64_t *malloc_large, - int64_t *free_count, int64_t *free_bytes) { - *malloc_count = atomic_load_explicit(&g_malloc_count, memory_order_relaxed); - *malloc_bytes = atomic_load_explicit(&g_malloc_bytes, memory_order_relaxed); - *malloc_small = atomic_load_explicit(&g_malloc_small, memory_order_relaxed); - *malloc_large = atomic_load_explicit(&g_malloc_large, memory_order_relaxed); - *free_count = atomic_load_explicit(&g_free_count, memory_order_relaxed); - *free_bytes = atomic_load_explicit(&g_free_bytes, memory_order_relaxed); -} - -// --------------------------------------------------------------------------- - -// this is called if malloc is called whilst trying to resolve libc's realloc. -// we just vend out pointers to a large block in the BSS (which we never free). -// This block should be large enough because it's only used when malloc is -// called from dlsym which should only happen once per thread. -static void *recursive_malloc(size_t size_in) { - size_t size = size_in; - if ((size & 0xf) != 0) { - // make size 16 byte aligned - size = (size + 0xf) & (~(size_t)0xf); - } - - ptrdiff_t next = atomic_fetch_add_explicit(&g_recursive_malloc_next_free_ptr, - size, - memory_order_relaxed); - if ((size_t)next >= sizeof(g_recursive_malloc_mem)) { - // we ran out of memory - return NULL; - } - return (void *)((intptr_t)g_recursive_malloc_mem + next); -} - -static bool is_recursive_malloc_block(void *ptr) { - uintptr_t block_begin = (uintptr_t)g_recursive_malloc_mem; - uintptr_t block_end = block_begin + sizeof(g_recursive_malloc_mem); - uintptr_t user_ptr = (uintptr_t)ptr; - - return user_ptr >= block_begin && user_ptr < block_end; -} - -// this is called if realloc is called whilst trying to resolve libc's realloc. -static void *recursive_realloc(void *ptr, size_t size) { - (void)ptr; (void)size; - abort(); -} - -// this is called if free is called whilst trying to resolve libc's free. -static void recursive_free(void *ptr) { - (void)ptr; - abort(); -} - -// If malloc_usable_size is queried during dlsym handshake, we have nothing -// useful to report — return 0. Reaching here is exceptional. -static size_t recursive_malloc_usable_size(void *ptr) { - (void)ptr; - return 0; -} - -static int recursive_socket(int domain, int type, int protocol) { - (void)domain; (void)type; (void)protocol; - abort(); -} -static int recursive_accept(int socket, struct sockaddr *restrict address, socklen_t *restrict address_len) { - (void)socket; (void)address; (void)address_len; - abort(); -} -static int recursive_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) { - (void)sockfd; (void)addr; (void)addrlen; (void)flags; - abort(); -} -static int recursive_close(int fildes) { - (void)fildes; - abort(); -} - -#define JUMP_INTO_LIBC_FUN(_fun, ...) /* \ -*/ do { /* \ -*/ type_libc_ ## _fun local_fun = atomic_load(&g_libc_ ## _fun); /* \ -*/ if (!local_fun) { /* \ -*/ if (!g_in_ ## _fun) { /* \ -*/ g_in_ ## _fun = true; /* \ -*/ type_libc_ ## _fun desired = dlsym(RTLD_NEXT, LIBC_SYMBOL(_fun)); /* \ -*/ if (atomic_compare_exchange_strong(&g_libc_ ## _fun, &local_fun, desired)) { /* \ -*/ local_fun = desired; /* \ -*/ } else { /* \ -*/ local_fun = atomic_load(&g_libc_ ## _fun); /* \ -*/ } /* \ -*/ } else { /* \ -*/ return recursive_ ## _fun (__VA_ARGS__); /* \ -*/ } /* \ -*/ } /* \ -*/ return local_fun(__VA_ARGS__); /* \ -*/ } while(0) - -/* Companion to JUMP_INTO_LIBC_FUN that captures the libc result into _outvar - * instead of returning. Used when we need to inspect the result before - * returning (e.g. to write the size header). */ -#define CALL_LIBC_FUN_CAPTURE(_outvar, _fun, ...) \ - do { \ - type_libc_ ## _fun local_fun = atomic_load(&g_libc_ ## _fun); \ - if (!local_fun) { \ - if (!g_in_ ## _fun) { \ - g_in_ ## _fun = true; \ - type_libc_ ## _fun desired = dlsym(RTLD_NEXT, LIBC_SYMBOL(_fun)); \ - if (atomic_compare_exchange_strong(&g_libc_ ## _fun, &local_fun, desired)) { \ - local_fun = desired; \ - } else { \ - local_fun = atomic_load(&g_libc_ ## _fun); \ - } \ - } else { \ - (_outvar) = recursive_ ## _fun (__VA_ARGS__); \ - break; \ - } \ - } \ - (_outvar) = local_fun(__VA_ARGS__); \ - } while (0) - -// Inline counting helpers --------------------------------------------------- - -static __attribute__((always_inline)) void count_malloc(size_t size) { - atomic_fetch_add_explicit(&g_malloc_count, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_malloc_bytes, (int64_t)size, memory_order_relaxed); - if (size > (size_t)get_page_size()) { - atomic_fetch_add_explicit(&g_malloc_large, 1, memory_order_relaxed); - } else { - atomic_fetch_add_explicit(&g_malloc_small, 1, memory_order_relaxed); - } -} - -static __attribute__((always_inline)) void count_free(size_t size) { - atomic_fetch_add_explicit(&g_free_count, 1, memory_order_relaxed); - atomic_fetch_add_explicit(&g_free_bytes, (int64_t)size, memory_order_relaxed); -} - -// Header-write helper ------------------------------------------------------- - -static __attribute__((always_inline)) void *write_header(void *raw, size_t size) { - malloc_header_t *hdr = (malloc_header_t *)raw; - hdr->requested_size = size; - hdr->reserved = 0; - hdr->magic = MALLOC_INTERPOSER_MAGIC; - return malloc_interposer_user_for(raw); -} - -// Replacement functions ----------------------------------------------------- - -void *replacement_malloc(size_t size) { - void *raw; - CALL_LIBC_FUN_CAPTURE(raw, malloc, size + sizeof(malloc_header_t)); - if (!raw) return NULL; - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_malloc(size); - } - return write_header(raw, size); -} - -void replacement_free(void *user_ptr) { - if (!user_ptr) return; - if (malloc_interposer_is_ours(user_ptr)) { - malloc_header_t *hdr = malloc_interposer_header_for(user_ptr); - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - count_free(hdr->requested_size); - } - // Recursive-malloc blocks live in our static buffer; never call libc free on them. - if (!is_recursive_malloc_block(hdr)) { - JUMP_INTO_LIBC_FUN(free, hdr); - } - return; - } - // Externally-allocated pointer (no header). - if (is_recursive_malloc_block(user_ptr)) return; - if (atomic_load_explicit(&g_counting_enabled, memory_order_relaxed)) { - size_t size; - CALL_LIBC_FUN_CAPTURE(size, malloc_usable_size, user_ptr); - count_free(size); - } - JUMP_INTO_LIBC_FUN(free, user_ptr); -} - -void *replacement_realloc(void *user_ptr, size_t new_size) { - if (!user_ptr) return replacement_malloc(new_size); - if (new_size == 0) { - replacement_free(user_ptr); - return NULL; - } - - bool counting = atomic_load_explicit(&g_counting_enabled, memory_order_relaxed); - - if (malloc_interposer_is_ours(user_ptr)) { - malloc_header_t *old_hdr = malloc_interposer_header_for(user_ptr); - size_t old_size = old_hdr->requested_size; - - void *new_raw; - CALL_LIBC_FUN_CAPTURE(new_raw, realloc, old_hdr, new_size + sizeof(malloc_header_t)); - if (!new_raw) return NULL; - - if (counting) { - count_free(old_size); - count_malloc(new_size); - } - return write_header(new_raw, new_size); - } - - // External pointer; use libc bookkeeping. Route every malloc_usable_size - // call through CALL_LIBC_FUN_CAPTURE so we hit libc, not our override. - size_t old_size; - CALL_LIBC_FUN_CAPTURE(old_size, malloc_usable_size, user_ptr); - void *new_ptr; - CALL_LIBC_FUN_CAPTURE(new_ptr, realloc, user_ptr, new_size); - if (!new_ptr) return NULL; - if (counting) { - count_free(old_size); - size_t new_usable; - CALL_LIBC_FUN_CAPTURE(new_usable, malloc_usable_size, new_ptr); - count_malloc(new_usable); - } - return new_ptr; -} - -void *replacement_calloc(size_t count, size_t size) { - size_t total; - if (__builtin_mul_overflow(count, size, &total)) { - errno = ENOMEM; - return NULL; - } - void *user_ptr = replacement_malloc(total); - if (user_ptr) { - memset(user_ptr, 0, total); - } - return user_ptr; -} - -void *replacement_reallocf(void *user_ptr, size_t new_size) { - void *new_ptr = replacement_realloc(user_ptr, new_size); - if (!new_ptr && user_ptr && new_size != 0) { - replacement_free(user_ptr); - } - return new_ptr; -} - -// Aligned/legacy paths skip the header (alignment requirements rule it out) -// and rely on malloc_usable_size for byte accounting. - -void *replacement_valloc(size_t size) { - // Note: not aligning correctly (should be PAGE_SIZE) but good enough. - return replacement_malloc(size); -} - -int replacement_posix_memalign(void **memptr, size_t alignment, size_t size) { - (void)alignment; - // Note: not aligning correctly (should be `alignment`) but good enough. - void *ptr = replacement_malloc(size); - if (ptr && memptr) { - *memptr = ptr; - return 0; - } - return ENOMEM; -} - -// Size queries -------------------------------------------------------------- -// -// External callers may pass our pointers to malloc_usable_size; libc would -// see an offset address and return garbage from its chunk-header probe. -// Override and route ours through the header. Internal calls go via -// CALL_LIBC_FUN_CAPTURE (dlsym-cached), bypassing our override. - -size_t replacement_malloc_usable_size(void *user_ptr) { - if (!user_ptr) return 0; - if (malloc_interposer_is_ours(user_ptr)) { - return malloc_interposer_header_for(user_ptr)->requested_size; - } - size_t size; - CALL_LIBC_FUN_CAPTURE(size, malloc_usable_size, user_ptr); - return size; -} - -// Public symbol overrides --------------------------------------------------- - -void free(void *ptr) { replacement_free(ptr); } -void *malloc(size_t size) { return replacement_malloc(size); } -void *calloc(size_t nmemb, size_t size) { return replacement_calloc(nmemb, size); } -void *realloc(void *ptr, size_t size) { return replacement_realloc(ptr, size); } -void *reallocf(void *ptr, size_t size) { return replacement_reallocf(ptr, size); } -void *valloc(size_t size) { return replacement_valloc(size); } -int posix_memalign(void **memptr, size_t alignment, size_t size) { - return replacement_posix_memalign(memptr, alignment, size); -} -size_t malloc_usable_size(void *ptr) { return replacement_malloc_usable_size(ptr); } - -#endif diff --git a/LocalPackages/MallocInterposerSwift/Package.swift b/LocalPackages/MallocInterposerSwift/Package.swift deleted file mode 100644 index dab0e337..00000000 --- a/LocalPackages/MallocInterposerSwift/Package.swift +++ /dev/null @@ -1,31 +0,0 @@ -// swift-tools-version: 5.10 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "MallocInterposerSwift", - products: [ - // Products define the executables and libraries a package produces, making them visible to other packages. - .library( - name: "MallocInterposerSwift", - type: .dynamic, - targets: ["MallocInterposerSwift"]) - ], - dependencies: [ - .package(path: "../MallocInterposerC"), - ], - targets: [ - // Targets are the basic building blocks of a package, defining a module or a test suite. - // Targets can depend on other targets in this package and products from dependencies. - .target( - name: "MallocInterposerSwift", - dependencies: [ - .product(name: "MallocInterposerC", package: "MallocInterposerC"), - ]), - .executableTarget( - name: "SwiftTestClient", - dependencies: ["MallocInterposerSwift"] - ), - ] -) diff --git a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift b/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift deleted file mode 100644 index 3f0c5fc0..00000000 --- a/LocalPackages/MallocInterposerSwift/Sources/MallocInterposerSwift/MallocInterposerSwift.swift +++ /dev/null @@ -1,85 +0,0 @@ -// -// Copyright (c) 2022 Ordo One AB. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// - -import MallocInterposerC - -/// Main class for managing malloc interposition. -/// Counting is performed entirely in C using _Atomic int64_t globals, -/// so there is no Swift dispatch overhead on the malloc hot path. -public class MallocInterposerSwift: @unchecked Sendable { - private init() {} - - /// Call once at startup (before hook()) to initialize C-side state. - public static func initialize() { - malloc_interposer_reset() - } - - /// Start counting allocations. - public static func hook() { - malloc_interposer_reset() - malloc_interposer_enable() - } - - /// Stop counting allocations. - public static func unhook() { - malloc_interposer_disable() - } - - /// Reset all counters to zero (counting state unchanged). - public static func reset() { - malloc_interposer_reset() - } - - /// Read the current counter snapshot. - public static func getStatistics() -> Statistics { - var mallocCount: Int64 = 0 - var mallocBytes: Int64 = 0 - var mallocSmall: Int64 = 0 - var mallocLarge: Int64 = 0 - var freeCount: Int64 = 0 - var freeBytes: Int64 = 0 - malloc_interposer_get_stats(&mallocCount, &mallocBytes, &mallocSmall, &mallocLarge, &freeCount, &freeBytes) - return Statistics( - mallocCount: Int(mallocCount), - mallocBytesCount: Int(mallocBytes), - mallocSmallCount: Int(mallocSmall), - mallocLargeCount: Int(mallocLarge), - freeCount: Int(freeCount), - freeBytesCount: Int(freeBytes) - ) - } -} - -public extension MallocInterposerSwift { - struct Statistics { - public let mallocCount: Int - public let mallocBytesCount: Int - public let mallocSmallCount: Int - public let mallocLargeCount: Int - public let freeCount: Int - public let freeBytesCount: Int - - public init( - mallocCount: Int = 0, - mallocBytesCount: Int = 0, - mallocSmallCount: Int = 0, - mallocLargeCount: Int = 0, - freeCount: Int = 0, - freeBytesCount: Int = 0 - ) { - self.mallocCount = mallocCount - self.mallocBytesCount = mallocBytesCount - self.mallocSmallCount = mallocSmallCount - self.mallocLargeCount = mallocLargeCount - self.freeCount = freeCount - self.freeBytesCount = freeBytesCount - } - } -} diff --git a/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift b/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift deleted file mode 100644 index c398d4d7..00000000 --- a/LocalPackages/MallocInterposerSwift/Sources/SwiftTestClient/SwiftTestClient.swift +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright (c) 2022 Ordo One AB. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// - -import Foundation -import MallocInterposerC -import MallocInterposerSwift - -@main -enum TestClient { - - @_optimize(none) - static func blackHole(_ value: Any) { - - } - - static func performAllocations(count: Int, size: Int, shouldFree: Bool = true) { - var index = 0 - repeat { - let x = malloc(size) - if shouldFree { - free(x) - } - index += 1 - } while index < count - } - - @_optimize(none) - static func main() { - print("=== MallocInterposerSwift Test ===") - // Reset statistics to start clean - MallocInterposerSwift.initialize() - MallocInterposerSwift.hook() - - // let ptr = malloc(1000) - // let ptr2 = malloc(500) - // - // free(ptr) - // free(ptr2) - - // let x: UnsafeMutablePointer = UnsafeMutablePointer.allocate(capacity: 5000) - - performAllocations(count: 1, size: 11 * 1024 * 1024) - //performAllocations(count: 1, size: 32 * 1024 * 1024, shouldFree: false) - - MallocInterposerSwift.unhook() - - // Print final statistics - let stats = MallocInterposerSwift.getStatistics() - - print("Total malloc count: \(stats.mallocCount)") - print("Malloc small count: \(stats.mallocSmallCount)") - print("Malloc large count: \(stats.mallocLargeCount)") - print("Total allocated memory: \(stats.mallocBytesCount) bytes") - print("Total free count: \(stats.freeCount)") - print("Total freed memory: \(stats.freeBytesCount) bytes") - - print("\n--- Test complete ---") - } -} diff --git a/Package.swift b/Package.swift index e2f05501..1f5e18ff 100644 --- a/Package.swift +++ b/Package.swift @@ -15,6 +15,20 @@ if disableJemalloc { defaultTraits = ["Jemalloc"] } +// When MALLOC_INTERPOSER_LOCAL_PATH is set, use a local checkout of the +// malloc-interposer package instead of the published GitHub URL. Useful +// when iterating on the interposer alongside this package. +let mallocInterposerDependency: Package.Dependency = { + if let localPath = ProcessInfo.processInfo.environment["MALLOC_INTERPOSER_LOCAL_PATH"], + localPath.isEmpty == false { + return .package(path: localPath) + } + return .package( + url: "https://github.com/ordo-one/malloc-interposer.git", + .upToNextMajor(from: "1.0.0") + ) +}() + var packageDependencies: [Package.Dependency] = [ .package(url: "https://github.com/apple/swift-system.git", .upToNextMajor(from: "1.1.0")), .package(url: "https://github.com/apple/swift-argument-parser.git", "1.1.0"..<"1.6.0"), @@ -22,8 +36,7 @@ var packageDependencies: [Package.Dependency] = [ .package(url: "https://github.com/HdrHistogram/hdrhistogram-swift.git", .upToNextMajor(from: "0.1.4")), .package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.0.0")), .package(url: "https://github.com/ordo-one/package-jemalloc.git", .upToNextMajor(from: "1.0.0")), - .package(path: "LocalPackages/MallocInterposerC"), - .package(path: "LocalPackages/MallocInterposerSwift"), + mallocInterposerDependency, ] #if os(Linux) && compiler(>=6.3) @@ -41,8 +54,7 @@ var benchmarkDependencies: [Target.Dependency] = [ .product(name: "Atomics", package: "swift-atomics"), "SwiftRuntimeHooks", "BenchmarkShared", - .product(name: "MallocInterposerC", package: "MallocInterposerC"), - "MallocInterposerSwift", + .product(name: "MallocInterposerSwift", package: "malloc-interposer"), ] #if os(Linux) && compiler(>=6.3) diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift index 0cc438dd..55539314 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift @@ -423,7 +423,7 @@ import PackagePlugin } benchmarkTool = tool.path - interposerLib = tool.path.removingLastComponent().appending(subpath: "libMallocInterposerC.so").string + interposerLib = tool.path.removingLastComponent().appending(subpath: "libMallocInterposerSwift.so").string #if os(Linux) && compiler(>=6.3) let swiftRuntimeInterposerLib = tool.path.removingLastComponent() .appending(subpath: "libSwiftRuntimeInterposerC.so").string diff --git a/scripts/compare-malloc-local.sh b/scripts/compare-malloc-local.sh index 08b4e1aa..4e8e6728 100755 --- a/scripts/compare-malloc-local.sh +++ b/scripts/compare-malloc-local.sh @@ -39,8 +39,8 @@ set -euo pipefail PB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" BENCH_DIR="${PB_DIR}/Benchmarks" TARGET="MallocInterposerBenchmarks" -TOOLCHAIN_OLD="${TOOLCHAIN_OLD:-6.2.2}" -TOOLCHAIN_NEW="${TOOLCHAIN_NEW:-6.3-snapshot-2026-02-27}" +TOOLCHAIN_OLD="${TOOLCHAIN_OLD:-6.2.4}" +TOOLCHAIN_NEW="${TOOLCHAIN_NEW:-6.3}" BASELINE_OLD="jemalloc-${TOOLCHAIN_OLD}" BASELINE_NEW="interposer-${TOOLCHAIN_NEW}" @@ -53,33 +53,36 @@ BASELINE_NEW="interposer-${TOOLCHAIN_NEW}" # no cache reuse — every run rebuilds from scratch. SCRATCH_SUFFIX="" if [[ "${FRESH:-0}" == "1" ]]; then - SCRATCH_SUFFIX="-fresh-$(date +%s)" + SCRATCH_SUFFIX="-fresh-$(date +%s)" fi SCRATCH_OLD="${BENCH_DIR}/.build-${TOOLCHAIN_OLD}${SCRATCH_SUFFIX}" SCRATCH_NEW="${BENCH_DIR}/.build-${TOOLCHAIN_NEW}${SCRATCH_SUFFIX}" step() { printf '\n\033[1;36m== %s\033[0m\n' "$*"; } warn() { printf '\033[33m!! %s\033[0m\n' "$*" >&2; } -fail() { printf '\033[31m## %s\033[0m\n' "$*" >&2; exit 1; } +fail() { + printf '\033[31m## %s\033[0m\n' "$*" >&2 + exit 1 +} -[[ -d "$BENCH_DIR/Benchmarks/MallocInterposer" ]] \ - || fail "MallocInterposer benchmark dir missing — expected $BENCH_DIR/Benchmarks/MallocInterposer" +[[ -d "$BENCH_DIR/Benchmarks/MallocInterposer" ]] || + fail "MallocInterposer benchmark dir missing — expected $BENCH_DIR/Benchmarks/MallocInterposer" command -v swiftly >/dev/null || fail "swiftly required" # When FRESH=1, clean the throwaway scratch dirs on successful exit so they # don't accumulate. KEEP_FRESH=1 disables this if the user wants to inspect. if [[ "${FRESH:-0}" == "1" && "${KEEP_FRESH:-0}" != "1" ]]; then - cleanup_fresh() { - local rc=$? - if (( rc == 0 )); then - rm -rf "$SCRATCH_OLD" "$SCRATCH_NEW" 2>/dev/null || true - else - warn "Run failed (exit $rc); leaving fresh scratch dirs for inspection:" - warn " $SCRATCH_OLD" - warn " $SCRATCH_NEW" - fi - } - trap cleanup_fresh EXIT + cleanup_fresh() { + local rc=$? + if ((rc == 0)); then + rm -rf "$SCRATCH_OLD" "$SCRATCH_NEW" 2>/dev/null || true + else + warn "Run failed (exit $rc); leaving fresh scratch dirs for inspection:" + warn " $SCRATCH_OLD" + warn " $SCRATCH_NEW" + fi + } + trap cleanup_fresh EXIT fi cd "$BENCH_DIR" @@ -87,60 +90,60 @@ cd "$BENCH_DIR" # Forward any positional args as --filter regexes. declare -a FILTER_ARGS=() for f in "$@"; do - FILTER_ARGS+=(--filter "$f") + FILTER_ARGS+=(--filter "$f") done # SwiftPM #9062 workaround: copy lib*-tool.dylib → lib*.dylib so the spawned # benchmark tool finds the interposer at the path it expects. Only relevant # on the interposer (6.3) run. fix_tool_dylibs() { - local search_dir="$1" - local copied=0 - while IFS= read -r src; do - local dst="${src/-tool.dylib/.dylib}" - if [[ ! -f "$dst" || "$src" -nt "$dst" ]]; then - cp -p "$src" "$dst" - copied=$((copied + 1)) - fi - done < <(find "$search_dir" -name "libMallocInterposer*-tool.dylib" 2>/dev/null) - if (( copied > 0 )); then - warn "Renamed $copied -tool.dylib → .dylib (SwiftPM #9062 workaround)" + local search_dir="$1" + local copied=0 + while IFS= read -r src; do + local dst="${src/-tool.dylib/.dylib}" + if [[ ! -f "$dst" || "$src" -nt "$dst" ]]; then + cp -p "$src" "$dst" + copied=$((copied + 1)) fi + done < <(find "$search_dir" -name "libMallocInterposer*-tool.dylib" 2>/dev/null) + if ((copied > 0)); then + warn "Renamed $copied -tool.dylib → .dylib (SwiftPM #9062 workaround)" + fi } run_jemalloc() { - step "Run 1: Swift $TOOLCHAIN_OLD (jemalloc) → baseline '$BASELINE_OLD' [scratch: $SCRATCH_OLD]" - swiftly run +"$TOOLCHAIN_OLD" \ - swift package \ - --scratch-path "$SCRATCH_OLD" \ - --allow-writing-to-package-directory benchmark \ - baseline update "$BASELINE_OLD" \ - --target "$TARGET" \ - --quiet --no-progress \ - "${FILTER_ARGS[@]}" + step "Run 1: Swift $TOOLCHAIN_OLD (jemalloc) → baseline '$BASELINE_OLD' [scratch: $SCRATCH_OLD]" + swiftly run +"$TOOLCHAIN_OLD" \ + swift package \ + --scratch-path "$SCRATCH_OLD" \ + --allow-writing-to-package-directory benchmark \ + baseline update "$BASELINE_OLD" \ + --target "$TARGET" \ + --quiet --no-progress \ + "${FILTER_ARGS[@]}" } run_interposer() { - step "Run 2: Swift $TOOLCHAIN_NEW (interposer) → baseline '$BASELINE_NEW' [scratch: $SCRATCH_NEW]" - if ! swiftly run +"$TOOLCHAIN_NEW" \ - swift package \ - --scratch-path "$SCRATCH_NEW" \ - --allow-writing-to-package-directory benchmark \ - baseline update "$BASELINE_NEW" \ - --target "$TARGET" \ - --quiet --no-progress \ - "${FILTER_ARGS[@]}"; then - warn "First attempt failed — applying SwiftPM #9062 workaround and retrying" - fix_tool_dylibs "$SCRATCH_NEW" - swiftly run +"$TOOLCHAIN_NEW" \ - swift package \ - --scratch-path "$SCRATCH_NEW" \ - --allow-writing-to-package-directory benchmark \ - baseline update "$BASELINE_NEW" \ - --target "$TARGET" \ - --quiet --no-progress \ - "${FILTER_ARGS[@]}" - fi + step "Run 2: Swift $TOOLCHAIN_NEW (interposer) → baseline '$BASELINE_NEW' [scratch: $SCRATCH_NEW]" + if ! swiftly run +"$TOOLCHAIN_NEW" \ + swift package \ + --scratch-path "$SCRATCH_NEW" \ + --allow-writing-to-package-directory benchmark \ + baseline update "$BASELINE_NEW" \ + --target "$TARGET" \ + --quiet --no-progress \ + "${FILTER_ARGS[@]}"; then + warn "First attempt failed — applying SwiftPM #9062 workaround and retrying" + fix_tool_dylibs "$SCRATCH_NEW" + swiftly run +"$TOOLCHAIN_NEW" \ + swift package \ + --scratch-path "$SCRATCH_NEW" \ + --allow-writing-to-package-directory benchmark \ + baseline update "$BASELINE_NEW" \ + --target "$TARGET" \ + --quiet --no-progress \ + "${FILTER_ARGS[@]}" + fi } run_jemalloc @@ -148,7 +151,7 @@ run_interposer step "Comparison: $BASELINE_OLD vs $BASELINE_NEW" swiftly run +"$TOOLCHAIN_NEW" \ - swift package \ - --scratch-path "$SCRATCH_NEW" \ - benchmark baseline compare "$BASELINE_OLD" "$BASELINE_NEW" \ - --target "$TARGET" + swift package \ + --scratch-path "$SCRATCH_NEW" \ + benchmark baseline compare "$BASELINE_OLD" "$BASELINE_NEW" \ + --target "$TARGET" From dedd0e3de8dd30afcf3f487b4ae3e4df62aef5c3 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Fri, 29 May 2026 14:42:09 +0200 Subject: [PATCH 28/37] rename to benchmark --- Benchmarks/Package.resolved | 2 +- Benchmarks/Package.swift | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved index 5bf6d5f5..56044caf 100644 --- a/Benchmarks/Package.resolved +++ b/Benchmarks/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "a9d6bb991cd82bfd91d8290469d6c06ea1ed287e5ad8a103d3ef751159aa48f7", + "originHash" : "e857c3ae5e128252a36e647f56feaed932fd105d4a88290b83e8309219f2ed92", "pins" : [ { "identity" : "hdrhistogram-swift", diff --git a/Benchmarks/Package.swift b/Benchmarks/Package.swift index e6e37705..73f9937e 100644 --- a/Benchmarks/Package.swift +++ b/Benchmarks/Package.swift @@ -86,11 +86,11 @@ package.targets += [ .executableTarget( name: "MallocInterposerBenchmarks", dependencies: [ - .product(name: "Benchmark", package: "package-benchmark") + .product(name: "Benchmark", package: "benchmark") ], path: "Benchmarks/MallocInterposer", plugins: [ - .plugin(name: "BenchmarkPlugin", package: "package-benchmark") + .plugin(name: "BenchmarkPlugin", package: "benchmark") ] ) ] From 3686bc11f422ff97f7f2503dbcd72cb50f882443 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:50:01 +0200 Subject: [PATCH 29/37] add overhead scripts --- scripts/wrapper_overhead.c | 73 ++++++++++++++ scripts/wrapper_overhead.sh | 71 ++++++++++++++ scripts/wrapper_overhead_passthrough.c | 128 +++++++++++++++++++++++++ 3 files changed, 272 insertions(+) create mode 100644 scripts/wrapper_overhead.c create mode 100755 scripts/wrapper_overhead.sh create mode 100644 scripts/wrapper_overhead_passthrough.c diff --git a/scripts/wrapper_overhead.c b/scripts/wrapper_overhead.c new file mode 100644 index 00000000..ba847544 --- /dev/null +++ b/scripts/wrapper_overhead.c @@ -0,0 +1,73 @@ +// wrapper_overhead.c — measure the cost of "being a wrapper" in isolation. +// +// Run the same malloc/free hot loop twice: +// 1. With nothing preloaded → user code → libc allocator. +// 2. With wrapper_overhead_passthrough.dylib preloaded → user code → our +// one-instruction tail-call wrapper → libc allocator. +// +// The wrapper does no bookkeeping at all — its `replacement_malloc` is a +// single `b _malloc` and `replacement_free` is a single `b _free`. So the +// delta between the two runs is purely the cost of inserting one extra +// PLT stub + branch into the call path. Nothing else changes. +// +// Build + drive: see wrapper_overhead.sh in the same directory. + +#include +#include +#include +#include + +#define WARMUP_ITERS 10000 +#define INNER_ITERS 2000000 +#define TRIALS 9 + +static volatile void *sink; + +static double now_ns(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec; +} + +static int cmp_double(const void *a, const void *b) { + double da = *(const double *)a, db = *(const double *)b; + return (da > db) - (da < db); +} + +static void measure_pair(const char *name, size_t size) { + // Warmup primes tcache and lets dyld bind any lazy stubs. + for (int i = 0; i < WARMUP_ITERS; i++) { + void *p = malloc(size); + sink = p; + free(p); + } + + double trials[TRIALS]; + for (int t = 0; t < TRIALS; t++) { + double t0 = now_ns(); + for (int i = 0; i < INNER_ITERS; i++) { + void *p = malloc(size); + sink = p; + free(p); + } + trials[t] = (now_ns() - t0) / (double)INNER_ITERS; + } + qsort(trials, TRIALS, sizeof(double), cmp_double); + + printf("%-18s %10.2f %10.2f %10.2f\n", + name, trials[0], trials[TRIALS / 2], trials[TRIALS - 1]); +} + +int main(void) { + const char *label = getenv("BENCH_LABEL"); + if (!label) label = "(no label)"; + printf("== %s ==\n", label); + printf("%-18s %10s %10s %10s\n", "size", "min ns", "median", "max ns"); + printf("%-18s %10s %10s %10s\n", "----", "------", "------", "------"); + + measure_pair("malloc(64)+free", 64); + measure_pair("malloc(256)+free", 256); + measure_pair("malloc(1024)+free", 1024); + measure_pair("malloc(4096)+free", 4096); + return 0; +} diff --git a/scripts/wrapper_overhead.sh b/scripts/wrapper_overhead.sh new file mode 100755 index 00000000..bb97246d --- /dev/null +++ b/scripts/wrapper_overhead.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# +# wrapper_overhead.sh — show the irreducible cost of "being a wrapper" in +# isolation, with no header / no counters / no enable check / nothing. +# +# Builds two artifacts: +# - bin/wrapper_overhead the hot-loop bench +# - bin/wrapper_passthrough.{dylib,so} a do-nothing tail-call interposer +# +# Runs the bench twice: +# 1. Plain — user code → libc malloc. +# 2. Wrapped — user code → tail-call wrapper → libc malloc. +# +# Whatever ns delta you see is the price of the extra function-call layer +# alone. Anything you'd build on top (header, counters, enable check) +# stacks on top of that. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BUILD_DIR="$(mktemp -d -t wrapper_overhead.XXXXXX)" +trap 'rm -rf "$BUILD_DIR"' EXIT + +CC="${CC:-$(command -v clang || command -v gcc || echo cc)}" +CFLAGS="${CFLAGS:--O2 -Wall -Wextra}" + +step() { printf '\n\033[1;36m== %s\033[0m\n' "$*"; } +fail() { printf '\033[31m## %s\033[0m\n' "$*" >&2; exit 1; } + +# --- Build --- +step "Building bench harness + pass-through wrapper" +"$CC" $CFLAGS -o "$BUILD_DIR/wrapper_overhead" "$SCRIPT_DIR/wrapper_overhead.c" + +# Collect injection env vars in a bash array so they pass cleanly to `env`. +declare -a INJECT_ENV=() +case "$(uname -s)" in + Darwin) + WRAPPER_LIB="$BUILD_DIR/libwrapper_passthrough.dylib" + "$CC" $CFLAGS -dynamiclib -o "$WRAPPER_LIB" \ + "$SCRIPT_DIR/wrapper_overhead_passthrough.c" + INJECT_ENV+=("DYLD_INSERT_LIBRARIES=$WRAPPER_LIB" "DYLD_FORCE_FLAT_NAMESPACE=1") + ;; + Linux) + WRAPPER_LIB="$BUILD_DIR/libwrapper_passthrough.so" + "$CC" $CFLAGS -fPIC -shared -o "$WRAPPER_LIB" \ + "$SCRIPT_DIR/wrapper_overhead_passthrough.c" -ldl + INJECT_ENV+=("LD_PRELOAD=$WRAPPER_LIB") + ;; + *) + fail "Unsupported platform: $(uname -s)" + ;; +esac + +# --- Run plain --- +step "Run 1 — plain (no wrapper)" +BENCH_LABEL="plain" "$BUILD_DIR/wrapper_overhead" + +# --- Run wrapped --- +step "Run 2 — pass-through wrapper preloaded ($(basename "$WRAPPER_LIB"))" +env BENCH_LABEL="wrapped" "${INJECT_ENV[@]}" "$BUILD_DIR/wrapper_overhead" + +cat <<'EOF' + +The delta between the two median columns above is the cost of the wrapper +layer alone, with zero bookkeeping. On Apple Silicon you'll typically see +~7–8 ns/pair; on Linux it's smaller because the LD_PRELOAD path goes +through dlsym once but the per-call dispatch is a direct pointer call. + +Anything an interposer wants to do (header bookkeeping, counters, enable +check, etc.) adds on top of this floor — it does not replace it. +EOF diff --git a/scripts/wrapper_overhead_passthrough.c b/scripts/wrapper_overhead_passthrough.c new file mode 100644 index 00000000..4a46c559 --- /dev/null +++ b/scripts/wrapper_overhead_passthrough.c @@ -0,0 +1,128 @@ +// wrapper_overhead_passthrough.c — a bare malloc/free interposer that does +// NOTHING beyond what an empty wrapper does. No header, no counters, no +// enable check, no TLS, no atomics. Each replacement_* is a single- +// instruction tail call to libc. +// +// Used by wrapper_overhead.sh to isolate the cost of the wrapper layer +// itself — independent of any bookkeeping you might layer on top. +// +// macOS path: DYLD_INTERPOSE entries route malloc/free through us via +// the standard __DATA,__interpose section. Internal calls to malloc/free +// inside this dylib resolve directly to libsystem. +// +// Linux path: defining `malloc` / `free` in an LD_PRELOAD'd shared object +// overrides the global symbol resolution. We forward to the real libc +// entries via dlsym(RTLD_NEXT, …). The resolve dance is a small one-time +// cost amortised away after warmup, so it doesn't pollute the measurement. + +#include + +#if defined(__APPLE__) + +#define DYLD_INTERPOSE(_replacement, _replacee) \ + __attribute__((used)) static struct { \ + const void *replacement; \ + const void *replacee; \ + } _interpose_##_replacee __attribute__((section("__DATA,__interpose"))) = { \ + (const void *)&_replacement, (const void *)&_replacee \ + }; + +void *replacement_malloc(size_t size) { return malloc(size); } +void replacement_free(void *p) { free(p); } +void *replacement_calloc(size_t n, size_t s) { return calloc(n, s); } +void *replacement_realloc(void *p, size_t s) { return realloc(p, s); } +void *replacement_reallocf(void *p, size_t s) { return reallocf(p, s); } +void *replacement_valloc(size_t s) { return valloc(s); } +int replacement_posix_memalign(void **m, size_t a, size_t s) { + return posix_memalign(m, a, s); +} + +DYLD_INTERPOSE(replacement_malloc, malloc) +DYLD_INTERPOSE(replacement_free, free) +DYLD_INTERPOSE(replacement_calloc, calloc) +DYLD_INTERPOSE(replacement_realloc, realloc) +DYLD_INTERPOSE(replacement_reallocf, reallocf) +DYLD_INTERPOSE(replacement_valloc, valloc) +DYLD_INTERPOSE(replacement_posix_memalign, posix_memalign) + +#else /* Linux */ + +#define _GNU_SOURCE +#include +#include +#include + +static _Atomic(void *(*)(size_t)) g_real_malloc; +static _Atomic(void (*)(void *)) g_real_free; +static _Atomic(void *(*)(size_t, size_t)) g_real_calloc; +static _Atomic(void *(*)(void *, size_t)) g_real_realloc; + +// Small recursion buffer for the rare case where dlsym itself allocates +// before we've resolved the real symbols. ~1 MiB is plenty. +static char g_bootstrap[1024 * 1024]; +static _Atomic size_t g_bootstrap_off = 0; +static int bootstrap_owns(void *p) { + return (char *)p >= g_bootstrap && + (char *)p < g_bootstrap + sizeof(g_bootstrap); +} +static void *bootstrap_alloc(size_t n) { + size_t aligned = (n + 15) & ~(size_t)15; + size_t off = atomic_fetch_add_explicit(&g_bootstrap_off, aligned, + memory_order_relaxed); + if (off + aligned > sizeof(g_bootstrap)) return NULL; + return g_bootstrap + off; +} + +#define REAL(_fn) ({ \ + typeof(g_real_##_fn) _r = atomic_load_explicit(&g_real_##_fn, \ + memory_order_relaxed); \ + if (!_r) { \ + _r = dlsym(RTLD_NEXT, #_fn); \ + atomic_store_explicit(&g_real_##_fn, _r, memory_order_relaxed); \ + } \ + _r; \ +}) + +void *malloc(size_t s) { + typeof(g_real_malloc) r = atomic_load_explicit(&g_real_malloc, + memory_order_relaxed); + if (!r) { + r = dlsym(RTLD_NEXT, "malloc"); + if (!r) return bootstrap_alloc(s); + atomic_store_explicit(&g_real_malloc, r, memory_order_relaxed); + } + return r(s); +} + +void free(void *p) { + if (!p || bootstrap_owns(p)) return; + typeof(g_real_free) r = REAL(free); + if (r) r(p); +} + +void *calloc(size_t n, size_t s) { + typeof(g_real_calloc) r = atomic_load_explicit(&g_real_calloc, + memory_order_relaxed); + if (!r) { + r = dlsym(RTLD_NEXT, "calloc"); + if (!r) { + void *p = bootstrap_alloc(n * s); + if (p) memset(p, 0, n * s); + return p; + } + atomic_store_explicit(&g_real_calloc, r, memory_order_relaxed); + } + return r(n, s); +} + +void *realloc(void *p, size_t s) { + if (bootstrap_owns(p)) { + // Can't realloc a bootstrap allocation in place; copy out. + void *np = malloc(s); + if (np && p) memcpy(np, p, s); + return np; + } + return REAL(realloc)(p, s); +} + +#endif From a4989f6f40065d798cd51c72c6c776939a455c07 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:11:34 +0200 Subject: [PATCH 30/37] update overhead script --- scripts/wrapper_overhead.c | 33 +++++++++++++++++++++++++++------ scripts/wrapper_overhead.sh | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/scripts/wrapper_overhead.c b/scripts/wrapper_overhead.c index ba847544..008f22a7 100644 --- a/scripts/wrapper_overhead.c +++ b/scripts/wrapper_overhead.c @@ -1,17 +1,23 @@ -// wrapper_overhead.c — measure the cost of "being a wrapper" in isolation. +// wrapper_overhead.c — measure the cost of "being a wrapper" in isolation, +// and (optionally) the additional cost of the real interposer's bookkeeping. // -// Run the same malloc/free hot loop twice: +// Run the same malloc/free hot loop two or three times: // 1. With nothing preloaded → user code → libc allocator. // 2. With wrapper_overhead_passthrough.dylib preloaded → user code → our // one-instruction tail-call wrapper → libc allocator. +// Delta from #1 = wrapper layer cost (no bookkeeping at all). +// 3. (Optional) With the real malloc-interposer preloaded and counting +// enabled. Delta from #2 = bookkeeping cost (header + magic check + +// enable check + TLS pointer load + counter writes). // -// The wrapper does no bookkeeping at all — its `replacement_malloc` is a -// single `b _malloc` and `replacement_free` is a single `b _free`. So the -// delta between the two runs is purely the cost of inserting one extra -// PLT stub + branch into the call path. Nothing else changes. +// To enable run #3, set INTERPOSER_DYLIB in the environment to the path of +// the full interposer dylib/so. The harness will dlsym +// `malloc_interposer_enable` and call it at startup so counting is on for +// every measured iteration. // // Build + drive: see wrapper_overhead.sh in the same directory. +#include #include #include #include @@ -61,6 +67,21 @@ static void measure_pair(const char *name, size_t size) { int main(void) { const char *label = getenv("BENCH_LABEL"); if (!label) label = "(no label)"; + + // If the real malloc-interposer is preloaded, flip its counting on so we + // measure the full bookkeeping cost (header + magic check + enable check + // + TLS access + counter writes). dlsym returns NULL for the pass-through + // wrapper and for the plain libc run, which is exactly what we want. + void (*enable_fn)(void) = (void (*)(void))dlsym(RTLD_DEFAULT, + "malloc_interposer_enable"); + void (*reset_fn)(void) = (void (*)(void))dlsym(RTLD_DEFAULT, + "malloc_interposer_reset"); + if (enable_fn) { + if (reset_fn) reset_fn(); + enable_fn(); + fprintf(stderr, "[%s] interposer counting enabled\n", label); + } + printf("== %s ==\n", label); printf("%-18s %10s %10s %10s\n", "size", "min ns", "median", "max ns"); printf("%-18s %10s %10s %10s\n", "----", "------", "------", "------"); diff --git a/scripts/wrapper_overhead.sh b/scripts/wrapper_overhead.sh index bb97246d..a01ea412 100755 --- a/scripts/wrapper_overhead.sh +++ b/scripts/wrapper_overhead.sh @@ -59,13 +59,37 @@ BENCH_LABEL="plain" "$BUILD_DIR/wrapper_overhead" step "Run 2 — pass-through wrapper preloaded ($(basename "$WRAPPER_LIB"))" env BENCH_LABEL="wrapped" "${INJECT_ENV[@]}" "$BUILD_DIR/wrapper_overhead" +# --- Run full interposer (optional) --- +# If the caller points us at the real malloc-interposer dylib, do a third run +# with counting enabled. Delta from run #2 is the real bookkeeping cost. +if [[ -n "${INTERPOSER_DYLIB:-}" ]]; then + if [[ ! -f "$INTERPOSER_DYLIB" ]]; then + fail "INTERPOSER_DYLIB=$INTERPOSER_DYLIB does not exist" + fi + + declare -a FULL_INJECT=() + case "$(uname -s)" in + Darwin) + FULL_INJECT+=("DYLD_INSERT_LIBRARIES=$INTERPOSER_DYLIB" + "DYLD_FORCE_FLAT_NAMESPACE=1") + ;; + Linux) + FULL_INJECT+=("LD_PRELOAD=$INTERPOSER_DYLIB") + ;; + esac + + step "Run 3 — full malloc-interposer preloaded, counting ON" + env BENCH_LABEL="full-interposer" "${FULL_INJECT[@]}" "$BUILD_DIR/wrapper_overhead" +fi + cat <<'EOF' -The delta between the two median columns above is the cost of the wrapper -layer alone, with zero bookkeeping. On Apple Silicon you'll typically see -~7–8 ns/pair; on Linux it's smaller because the LD_PRELOAD path goes -through dlsym once but the per-call dispatch is a direct pointer call. +Reading the output: + delta(plain → wrapped) = cost of the wrapper layer alone (no logic). + delta(wrapped → full) = cost of header + magic check + enable check + + TLS pointer + counter writes (the + "bookkeeping" on top of the wrapper). + delta(plain → full) = total interposer overhead vs. raw libc. -Anything an interposer wants to do (header bookkeeping, counters, enable -check, etc.) adds on top of this floor — it does not replace it. +If only runs 1 and 2 appear, set INTERPOSER_DYLIB= to enable run 3. EOF From 1931a4fe2f5ab5db78293afc882415d48189810d Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:19:48 +0200 Subject: [PATCH 31/37] linux fix --- scripts/wrapper_overhead_passthrough.c | 128 +++++++++++++++---------- 1 file changed, 76 insertions(+), 52 deletions(-) diff --git a/scripts/wrapper_overhead_passthrough.c b/scripts/wrapper_overhead_passthrough.c index 4a46c559..146dae84 100644 --- a/scripts/wrapper_overhead_passthrough.c +++ b/scripts/wrapper_overhead_passthrough.c @@ -47,82 +47,106 @@ DYLD_INTERPOSE(replacement_posix_memalign, posix_memalign) #else /* Linux */ +// On Linux we resolve the real libc functions via dlsym(RTLD_NEXT, …) and +// cache the function pointers. The wrinkle: dlsym itself can call calloc +// internally during symbol resolution, which would recurse back into our +// hooks. We guard against that with a thread-local "in dlsym" flag and a +// small static bootstrap buffer that absorbs any allocations made while +// resolving. +// +// After resolution completes (which happens during the constructor, before +// the bench's hot loop runs), the steady-state hot path is just: +// ldr x_real_fn +// blr x_real_fn +// — one load, one indirect call. Same shape as glibc's own PLT stub, so +// the wrapper-layer cost is just the extra branch. + #define _GNU_SOURCE #include -#include #include -static _Atomic(void *(*)(size_t)) g_real_malloc; -static _Atomic(void (*)(void *)) g_real_free; -static _Atomic(void *(*)(size_t, size_t)) g_real_calloc; -static _Atomic(void *(*)(void *, size_t)) g_real_realloc; - -// Small recursion buffer for the rare case where dlsym itself allocates -// before we've resolved the real symbols. ~1 MiB is plenty. -static char g_bootstrap[1024 * 1024]; -static _Atomic size_t g_bootstrap_off = 0; -static int bootstrap_owns(void *p) { - return (char *)p >= g_bootstrap && - (char *)p < g_bootstrap + sizeof(g_bootstrap); +static void *(*real_malloc)(size_t) = NULL; +static void (*real_free)(void *) = NULL; +static void *(*real_calloc)(size_t, size_t) = NULL; +static void *(*real_realloc)(void *, size_t)= NULL; + +// TLS guard: set while we're inside dlsym so any reentrant malloc/calloc/ +// realloc/free calls go to the bootstrap path instead of recursing. +static __thread int g_in_resolve = 0; + +// Small static buffer for allocations made during dlsym resolution. +// 64 KiB is more than enough — dlsym typically allocates only a handful of +// small objects during the first call. +static char g_boot_mem[64 * 1024]; +static size_t g_boot_off = 0; + +static int boot_owns(const void *p) { + return (const char *)p >= g_boot_mem && + (const char *)p < g_boot_mem + sizeof(g_boot_mem); } -static void *bootstrap_alloc(size_t n) { + +static void *boot_alloc(size_t n) { size_t aligned = (n + 15) & ~(size_t)15; - size_t off = atomic_fetch_add_explicit(&g_bootstrap_off, aligned, - memory_order_relaxed); - if (off + aligned > sizeof(g_bootstrap)) return NULL; - return g_bootstrap + off; + if (g_boot_off + aligned > sizeof(g_boot_mem)) return NULL; + void *p = g_boot_mem + g_boot_off; + g_boot_off += aligned; + return p; } -#define REAL(_fn) ({ \ - typeof(g_real_##_fn) _r = atomic_load_explicit(&g_real_##_fn, \ - memory_order_relaxed); \ - if (!_r) { \ - _r = dlsym(RTLD_NEXT, #_fn); \ - atomic_store_explicit(&g_real_##_fn, _r, memory_order_relaxed); \ - } \ - _r; \ -}) +static void resolve_real(void) { + g_in_resolve = 1; + real_malloc = dlsym(RTLD_NEXT, "malloc"); + real_free = dlsym(RTLD_NEXT, "free"); + real_calloc = dlsym(RTLD_NEXT, "calloc"); + real_realloc = dlsym(RTLD_NEXT, "realloc"); + g_in_resolve = 0; +} + +__attribute__((constructor)) static void preresolve(void) { + resolve_real(); +} void *malloc(size_t s) { - typeof(g_real_malloc) r = atomic_load_explicit(&g_real_malloc, - memory_order_relaxed); - if (!r) { - r = dlsym(RTLD_NEXT, "malloc"); - if (!r) return bootstrap_alloc(s); - atomic_store_explicit(&g_real_malloc, r, memory_order_relaxed); - } - return r(s); + if (__builtin_expect(real_malloc != NULL, 1)) return real_malloc(s); + if (g_in_resolve) return boot_alloc(s); + resolve_real(); + return real_malloc ? real_malloc(s) : boot_alloc(s); } void free(void *p) { - if (!p || bootstrap_owns(p)) return; - typeof(g_real_free) r = REAL(free); - if (r) r(p); + if (!p) return; + if (boot_owns(p)) return; // bootstrap blocks have no underlying chunk + if (__builtin_expect(real_free != NULL, 1)) { real_free(p); return; } + if (g_in_resolve) return; + resolve_real(); + if (real_free) real_free(p); } void *calloc(size_t n, size_t s) { - typeof(g_real_calloc) r = atomic_load_explicit(&g_real_calloc, - memory_order_relaxed); - if (!r) { - r = dlsym(RTLD_NEXT, "calloc"); - if (!r) { - void *p = bootstrap_alloc(n * s); - if (p) memset(p, 0, n * s); - return p; - } - atomic_store_explicit(&g_real_calloc, r, memory_order_relaxed); + if (__builtin_expect(real_calloc != NULL, 1)) return real_calloc(n, s); + if (g_in_resolve) { + void *p = boot_alloc(n * s); + if (p) memset(p, 0, n * s); + return p; } - return r(n, s); + resolve_real(); + if (real_calloc) return real_calloc(n, s); + void *p = boot_alloc(n * s); + if (p) memset(p, 0, n * s); + return p; } void *realloc(void *p, size_t s) { - if (bootstrap_owns(p)) { - // Can't realloc a bootstrap allocation in place; copy out. + if (boot_owns(p)) { + // Can't realloc a bootstrap allocation in place; copy out via malloc. void *np = malloc(s); if (np && p) memcpy(np, p, s); return np; } - return REAL(realloc)(p, s); + if (__builtin_expect(real_realloc != NULL, 1)) return real_realloc(p, s); + if (g_in_resolve) return boot_alloc(s); + resolve_real(); + return real_realloc ? real_realloc(p, s) : boot_alloc(s); } #endif From e227d8e5f5f7bdb8d0164b7e9e0951ccdcde3cc4 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 16 Jun 2026 12:38:23 +0200 Subject: [PATCH 32/37] fix --- Benchmarks/Package.resolved | 4 +- Package.swift | 9 +- .../BenchmarkExecutor+Extensions.swift | 28 +++++ Sources/Benchmark/BenchmarkExecutor.swift | 46 ++++---- .../Benchmark/BenchmarkMetric+Defaults.swift | 4 - Sources/Benchmark/BenchmarkMetric.swift | 29 +++-- .../MallocStatisticsTests.swift | 100 ++++++++++++++++++ 7 files changed, 182 insertions(+), 38 deletions(-) create mode 100644 Tests/BenchmarkTests/MallocStatisticsTests.swift diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved index 56044caf..93dde97c 100644 --- a/Benchmarks/Package.resolved +++ b/Benchmarks/Package.resolved @@ -15,8 +15,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/ordo-one/malloc-interposer.git", "state" : { - "revision" : "d9ca5ad6d85622fb2bd5b3d3387ba064dbcab1c2", - "version" : "1.0.0" + "revision" : "bcda3c88ae083adb9c372b76c825806045fff99e", + "version" : "1.2.0" } }, { diff --git a/Package.swift b/Package.swift index 1f5e18ff..1c22ba6f 100644 --- a/Package.swift +++ b/Package.swift @@ -41,7 +41,7 @@ var packageDependencies: [Package.Dependency] = [ #if os(Linux) && compiler(>=6.3) packageDependencies += [ - .package(url: "https://github.com/ordo-one/swift-runtime-interposer.git", .upToNextMajor(from: "1.0.0")), + .package(url: "https://github.com/ordo-one/swift-runtime-interposer.git", .upToNextMajor(from: "1.2.0")), ] #endif @@ -54,7 +54,12 @@ var benchmarkDependencies: [Target.Dependency] = [ .product(name: "Atomics", package: "swift-atomics"), "SwiftRuntimeHooks", "BenchmarkShared", - .product(name: "MallocInterposerSwift", package: "malloc-interposer"), + // Gated on the `Jemalloc` trait so that `--disable-default-traits` / + // BENCHMARK_DISABLE_JEMALLOC removes the malloc-stats backend entirely (needed for e.g. + // fully-static musl builds and sanitizer runs). On Swift 6.3+ this trait selects the + // interposer backend; on Swift <=6.2 (see Package@swift-6.2.swift) it selects jemalloc. + // When the trait is off, BenchmarkExecutor falls back to the no-op MallocStatsProducer stub. + .product(name: "MallocInterposerSwift", package: "malloc-interposer", condition: .when(traits: ["Jemalloc"])), ] #if os(Linux) && compiler(>=6.3) diff --git a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift index f73c4c07..b5f7db26 100644 --- a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift +++ b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift @@ -45,6 +45,34 @@ extension BenchmarkExecutor { } } +extension BenchmarkExecutor { + /// Maps a measured window's interposer counter deltas to the `(metric, value)` pairs to record. + /// + /// Extracted as a pure function so the leak/scaling arithmetic can be unit-tested without a live + /// interposer. `memoryLeaked` / `memoryLeakedBytes` are clamped to `0`: a net-negative window + /// (more frees than mallocs — e.g. freeing a warmup survivor, or cross-thread frees) is not a + /// leak, and clamping records a `0` sample rather than letting `Statistics.add` drop it, which + /// would desync the column's sample count and bias the average upward. + static func mallocStatistics( + mallocCountDelta: Int, + mallocBytesDelta: Int, + mallocSmallDelta: Int, + mallocLargeDelta: Int, + freeCountDelta: Int, + freeBytesDelta: Int + ) -> [(metric: BenchmarkMetric, value: Int)] { + [ + (.mallocCountTotal, mallocCountDelta), + (.mallocBytesCount, mallocBytesDelta), + (.mallocCountSmall, mallocSmallDelta), + (.mallocCountLarge, mallocLargeDelta), + (.freeCountTotal, freeCountDelta), + (.memoryLeaked, max(0, mallocCountDelta - freeCountDelta)), + (.memoryLeakedBytes, max(0, mallocBytesDelta - freeBytesDelta)), + ] + } +} + extension BenchmarkExecutor { func operatingSystemsStatsProducerNeeded(_ metric: BenchmarkMetric) -> Bool { switch metric { diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index 78f5240b..f11b9231 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -254,30 +254,23 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length if mallocStatsRequested { #if canImport(MallocInterposerSwift) - let mallocCount = stopMallocStats.mallocCount - startMallocStats.mallocCount - statistics[BenchmarkMetric.mallocCountTotal.index].add(mallocCount) - - let mallocBytesCount = stopMallocStats.mallocBytesCount - startMallocStats.mallocBytesCount - statistics[BenchmarkMetric.mallocBytesCount.index].add(mallocBytesCount) - - // For backwards compatibility we keep allocatedResidentMemory as the total malloc bytes - statistics[BenchmarkMetric.allocatedResidentMemory.index].add(mallocBytesCount) - - let mallocSmallCount = stopMallocStats.mallocSmallCount - startMallocStats.mallocSmallCount - statistics[BenchmarkMetric.mallocCountSmall.index].add(mallocSmallCount) - - let mallocLargeCount = stopMallocStats.mallocLargeCount - startMallocStats.mallocLargeCount - statistics[BenchmarkMetric.mallocCountLarge.index].add(mallocLargeCount) - - let freeCount = stopMallocStats.freeCount - startMallocStats.freeCount - statistics[BenchmarkMetric.freeCountTotal.index].add(freeCount) - - let memoryLeakedCount = mallocCount - freeCount - statistics[BenchmarkMetric.memoryLeaked.index].add(Int(memoryLeakedCount)) - - let freeBytes = stopMallocStats.freeBytesCount - startMallocStats.freeBytesCount - let memoryLeakedBytes = mallocBytesCount - freeBytes - statistics[BenchmarkMetric.memoryLeakedBytes.index].add(Int(memoryLeakedBytes)) + // allocatedResidentMemory is intentionally not populated on the interposer path: + // the interposer cannot measure the allocator's resident set (only gross requested + // bytes). It remains produced by the jemalloc backend (Swift <=6.2). Use + // mallocBytesCount for gross allocated bytes or peakMemoryResident for OS-sampled + // resident memory. The leak/scaling arithmetic lives in BenchmarkExecutor + // .mallocStatistics(...) so it can be unit-tested without a live interposer. + let mallocMetrics = BenchmarkExecutor.mallocStatistics( + mallocCountDelta: stopMallocStats.mallocCount - startMallocStats.mallocCount, + mallocBytesDelta: stopMallocStats.mallocBytesCount - startMallocStats.mallocBytesCount, + mallocSmallDelta: stopMallocStats.mallocSmallCount - startMallocStats.mallocSmallCount, + mallocLargeDelta: stopMallocStats.mallocLargeCount - startMallocStats.mallocLargeCount, + freeCountDelta: stopMallocStats.freeCount - startMallocStats.freeCount, + freeBytesDelta: stopMallocStats.freeBytesCount - startMallocStats.freeBytesCount + ) + for (metric, value) in mallocMetrics { + statistics[metric.index].add(value) + } #else let mallocCountTotal = stopMallocStats.mallocCountTotal - startMallocStats.mallocCountTotal statistics[BenchmarkMetric.mallocCountTotal.index].add(mallocCountTotal) @@ -285,6 +278,11 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length let allocatedResidentMemory = stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory statistics[BenchmarkMetric.allocatedResidentMemory.index].add(allocatedResidentMemory) + // jemalloc has no free counter, so memoryLeaked is reported (as on the + // pre-interposer path) as resident-byte growth rather than a malloc-minus-free + // count. Backend-dependent definition; see BenchmarkMetric.memoryLeaked docs. + statistics[BenchmarkMetric.memoryLeaked.index].add(max(0, allocatedResidentMemory)) + let mallocSmallCount = stopMallocStats.mallocCountSmall - startMallocStats.mallocCountSmall statistics[BenchmarkMetric.mallocCountSmall.index].add(mallocSmallCount) diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift index ece0725a..7ffbc325 100644 --- a/Sources/Benchmark/BenchmarkMetric+Defaults.swift +++ b/Sources/Benchmark/BenchmarkMetric+Defaults.swift @@ -33,8 +33,6 @@ public extension BenchmarkMetric { [ .wallClock, .cpuTotal, - .mallocCountSmall, - .mallocCountLarge, .mallocCountTotal, .freeCountTotal, .mallocBytesCount, @@ -52,8 +50,6 @@ public extension BenchmarkMetric { .wallClock, .cpuUser, .cpuTotal, - .mallocCountSmall, - .mallocCountLarge, .mallocCountTotal, .freeCountTotal, .mallocBytesCount, diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index 0672eb05..f7065ad5 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -32,21 +32,37 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib /// Measure virtual memory usage - sampled during runtime case peakMemoryVirtual /// Number of small malloc calls + /// + /// > Deprecated: The small/large split is backend-dependent — the jemalloc backend + /// > (Swift ≤6.2) splits on jemalloc's size classes, while the 6.3+ interposer backend + /// > splits on a coarser `requested size > page size` threshold. Prefer ``mallocCountTotal``. + @available(*, deprecated, message: "Backend-dependent small/large split; prefer mallocCountTotal") case mallocCountSmall /// Number of large malloc calls + /// + /// > Deprecated: See ``mallocCountSmall``. + @available(*, deprecated, message: "Backend-dependent small/large split; prefer mallocCountTotal") case mallocCountLarge - /// Number of total malloc calls (small+large) + /// Number of total malloc calls case mallocCountTotal /// Number of totatl free calls case freeCountTotal /// The amount of memory allocated in bytes through malloc calls case mallocBytesCount /// The amount of allocated resident memory according to the memory allocator - /// by the application (does not include metadata overhead etc) - /// **Deprecated** in favour of ``mallocBytesCount``. It value is equal to ``mallocBytesCount``. - @available(*, deprecated, message: "Deprecated in favor of mallocBytesCount") + /// by the application (does not include metadata overhead etc). + /// + /// > Deprecated: Only produced by the jemalloc backend (Swift ≤6.2). The 6.3+ + /// > interposer backend does not measure resident memory — use ``mallocBytesCount`` + /// > for gross allocated bytes, or ``peakMemoryResident`` for OS-sampled resident memory. + @available(*, deprecated, message: "Only produced by the jemalloc backend; use mallocBytesCount or peakMemoryResident") case allocatedResidentMemory - /// Number of small+large mallocs - small+large frees in resident memory + /// Net unfreed allocations within the measured region. + /// + /// Backend-dependent: the 6.3+ interposer backend reports `malloc` count minus `free` count, + /// while the jemalloc backend (Swift ≤6.2) reports resident-byte growth. Because counting is + /// process-global, this metric is only reliable for single-threaded benchmarks with quiescent + /// background allocation. case memoryLeaked /// Leaked memeory in bytes case memoryLeakedBytes @@ -128,7 +144,8 @@ public extension BenchmarkMetric { switch self { case .cpuSystem, .cpuTotal, .cpuUser, .wallClock: return true - case .mallocCountTotal, .memoryLeaked, .memoryLeakedBytes: + case .mallocCountSmall, .mallocCountLarge, .mallocCountTotal, .freeCountTotal, + .mallocBytesCount, .memoryLeaked, .memoryLeakedBytes: return true case .syscalls: return true diff --git a/Tests/BenchmarkTests/MallocStatisticsTests.swift b/Tests/BenchmarkTests/MallocStatisticsTests.swift new file mode 100644 index 00000000..feb6d22f --- /dev/null +++ b/Tests/BenchmarkTests/MallocStatisticsTests.swift @@ -0,0 +1,100 @@ +// +// Copyright (c) 2026 Ordo One AB. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// + +import XCTest + +@testable import Benchmark + +/// Unit coverage for the interposer malloc-metric arithmetic and the malloc-metric scaling +/// configuration. These exercise `BenchmarkExecutor.mallocStatistics(...)` directly with +/// synthetic counter deltas, so no live interposer / allocation is required. +final class MallocStatisticsTests: XCTestCase { + private func value( + _ metrics: [(metric: BenchmarkMetric, value: Int)], + _ wanted: BenchmarkMetric + ) -> Int? { + metrics.first { $0.metric == wanted }?.value + } + + func testBalancedAllocFreeReportsNoLeak() { + let metrics = BenchmarkExecutor.mallocStatistics( + mallocCountDelta: 10, mallocBytesDelta: 1_024, + mallocSmallDelta: 8, mallocLargeDelta: 2, + freeCountDelta: 10, freeBytesDelta: 1_024 + ) + XCTAssertEqual(value(metrics, .mallocCountTotal), 10) + XCTAssertEqual(value(metrics, .freeCountTotal), 10) + XCTAssertEqual(value(metrics, .mallocBytesCount), 1_024) + XCTAssertEqual(value(metrics, .memoryLeaked), 0) + XCTAssertEqual(value(metrics, .memoryLeakedBytes), 0) + } + + func testUnbalancedAllocReportsLeak() { + let metrics = BenchmarkExecutor.mallocStatistics( + mallocCountDelta: 10, mallocBytesDelta: 2_048, + mallocSmallDelta: 7, mallocLargeDelta: 3, + freeCountDelta: 6, freeBytesDelta: 1_024 + ) + XCTAssertEqual(value(metrics, .memoryLeaked), 4) // 10 mallocs - 6 frees + XCTAssertEqual(value(metrics, .memoryLeakedBytes), 1_024) // 2048 - 1024 + } + + /// A window that frees more than it allocates (e.g. freeing a warmup survivor or cross-thread + /// frees) must clamp the leak to 0 — not go negative (which `Statistics.add` would silently + /// drop, desyncing the sample count and biasing the average upward). + func testNetFreeWindowClampsLeakToZero() { + let metrics = BenchmarkExecutor.mallocStatistics( + mallocCountDelta: 3, mallocBytesDelta: 256, + mallocSmallDelta: 3, mallocLargeDelta: 0, + freeCountDelta: 5, freeBytesDelta: 4_096 + ) + XCTAssertEqual(value(metrics, .memoryLeaked), 0) + XCTAssertEqual(value(metrics, .memoryLeakedBytes), 0) + } + + /// `mallocStatistics` is a pure mapping: each counter delta must land in its own metric slot + /// unchanged, so a mis-routing of any single delta fails distinctly. (The `small + large == total` + /// invariant is a property of the interposer's counters, not of this function, so it cannot be + /// asserted at this layer.) + func testDeltasRouteToCorrectMetricSlots() { + let metrics = BenchmarkExecutor.mallocStatistics( + mallocCountDelta: 10, mallocBytesDelta: 100, + mallocSmallDelta: 6, mallocLargeDelta: 4, + freeCountDelta: 3, freeBytesDelta: 48 + ) + XCTAssertEqual(value(metrics, .mallocCountTotal), 10) + XCTAssertEqual(value(metrics, .mallocCountSmall), 6) + XCTAssertEqual(value(metrics, .mallocCountLarge), 4) + XCTAssertEqual(value(metrics, .mallocBytesCount), 100) + XCTAssertEqual(value(metrics, .freeCountTotal), 3) + } + + /// The whole per-iteration malloc count/byte family must scale together, otherwise the scaled + /// output is internally inconsistent (e.g. `small + large != total`, or bytes not comparable + /// to free) under a non-unit `scalingFactor`. + func testMallocFamilyScalesConsistently() { + let scaledFamily: [BenchmarkMetric] = [ + .mallocCountSmall, .mallocCountLarge, .mallocCountTotal, + .freeCountTotal, .mallocBytesCount, .memoryLeaked, .memoryLeakedBytes, + ] + for metric in scaledFamily { + XCTAssertTrue( + metric.useScalingFactor, + "\(metric.rawDescription) must scale with the rest of the malloc family" + ) + } + } + + /// Metric array slots must be unique so two metrics never collide on the same `statistics` slot. + func testMetricIndicesAreUnique() { + let indices = BenchmarkMetric.all.map(\.index) + XCTAssertEqual(Set(indices).count, indices.count, "metric indices must be unique") + } +} From 553017e9de0a3de7c33f9feff97cb86510704692 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:38:21 +0200 Subject: [PATCH 33/37] add new metric --- .../MallocInterposer/MallocInterposer.swift | 4 +- Benchmarks/Package.resolved | 18 ++++---- Benchmarks/Package.swift | 2 +- Package.swift | 1 - .../BenchmarkPlugin+Help.swift | 5 +- .../BenchmarkHelpGenerator.swift | 4 ++ .../BenchmarkExecutor+Extensions.swift | 14 +++++- Sources/Benchmark/BenchmarkExecutor.swift | 19 ++++---- .../Benchmark/BenchmarkMetric+Defaults.swift | 46 ++++++++++++++++--- Sources/Benchmark/BenchmarkMetric.swift | 35 ++++++++++---- .../Documentation.docc/BenchmarkMetric.md | 8 ++++ .../Benchmark/Documentation.docc/Metrics.md | 12 +++-- .../Documentation.docc/RunningBenchmarks.md | 5 +- .../BenchmarkMetricsTests.swift | 4 ++ .../MallocStatisticsTests.swift | 24 ++++++++-- 15 files changed, 148 insertions(+), 53 deletions(-) diff --git a/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift b/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift index 2b7ffc3b..f53e039d 100644 --- a/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift +++ b/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift @@ -36,7 +36,7 @@ let mallocMetrics: [BenchmarkMetric] = [ .mallocCountTotal, .freeCountTotal, .mallocBytesCount, - .memoryLeaked, + .mallocFreeDelta, .memoryLeakedBytes, ] @@ -165,7 +165,7 @@ let benchmarks: @Sendable () -> Void = { } } - // Deliberate leak: malloc without free. Confirms memoryLeaked / + // Deliberate leak: malloc without free. Confirms mallocFreeDelta / // memoryLeakedBytes track unbalanced flow correctly. // Expected per iter: malloc=1, free=0, leaked=1, leakedBytes≈128. // The accumulated leak across the run is bounded: diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved index 93dde97c..3b0a2321 100644 --- a/Benchmarks/Package.resolved +++ b/Benchmarks/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "e857c3ae5e128252a36e647f56feaed932fd105d4a88290b83e8309219f2ed92", + "originHash" : "beddb8cb97cf892b8a2c00081488d118648e1609b6467ece2ea9cd075a22b282", "pins" : [ { "identity" : "hdrhistogram-swift", @@ -11,21 +11,21 @@ } }, { - "identity" : "malloc-interposer", + "identity" : "package-datetime", "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/malloc-interposer.git", + "location" : "https://github.com/ordo-one/package-datetime", "state" : { - "revision" : "bcda3c88ae083adb9c372b76c825806045fff99e", - "version" : "1.2.0" + "revision" : "d1242188c9f48aad297e6ca9b717776f8660bc31", + "version" : "1.0.2" } }, { - "identity" : "package-datetime", + "identity" : "package-jemalloc", "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-datetime", + "location" : "https://github.com/ordo-one/package-jemalloc.git", "state" : { - "revision" : "d1242188c9f48aad297e6ca9b717776f8660bc31", - "version" : "1.0.2" + "revision" : "e8a5db026963f5bfeac842d9d3f2cc8cde323b49", + "version" : "1.0.0" } }, { diff --git a/Benchmarks/Package.swift b/Benchmarks/Package.swift index 73f9937e..ec44ccb0 100644 --- a/Benchmarks/Package.swift +++ b/Benchmarks/Package.swift @@ -81,7 +81,7 @@ package.targets += [ // Regression coverage for the malloc interposer: predictable allocation // patterns (counts known per iteration) so any drift between jemalloc and // interposer code paths is immediately visible in mallocCountTotal / -// freeCountTotal / memoryLeaked. +// freeCountTotal / mallocFreeDelta / memoryLeakedBytes. package.targets += [ .executableTarget( name: "MallocInterposerBenchmarks", diff --git a/Package.swift b/Package.swift index c09c79a5..7b6ba30d 100644 --- a/Package.swift +++ b/Package.swift @@ -35,7 +35,6 @@ var packageDependencies: [Package.Dependency] = [ .package(url: "https://github.com/ordo-one/TextTable.git", .upToNextMajor(from: "0.0.1")), .package(url: "https://github.com/HdrHistogram/hdrhistogram-swift.git", .upToNextMajor(from: "0.1.4")), .package(url: "https://github.com/apple/swift-atomics.git", .upToNextMajor(from: "1.0.0")), - .package(url: "https://github.com/ordo-one/package-jemalloc.git", .upToNextMajor(from: "1.0.0")), mallocInterposerDependency, ] diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkPlugin+Help.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkPlugin+Help.swift index 8c91fac0..f09c4c69 100644 --- a/Plugins/BenchmarkCommandPlugin/BenchmarkPlugin+Help.swift +++ b/Plugins/BenchmarkCommandPlugin/BenchmarkPlugin+Help.swift @@ -53,8 +53,9 @@ let help = Benchmark targets matching the regexp filter that should be skipped --format The output format to use, default is 'text' (values: text, markdown, influx, jmh, jsonSmallerIsBetter, jsonBiggerIsBetter, histogramEncoded, histogram, histogramSamples, histogramPercentiles, metricP90AbsoluteThresholds) --metric Specifies that the benchmark run should use one or more specific metrics instead of the ones defined by the benchmarks. (values: cpuUser, cpuSystem, cpuTotal, wallClock, throughput, - peakMemoryResident, peakMemoryResidentDelta, peakMemoryVirtual, mallocCountSmall, mallocCountLarge, mallocCountTotal, allocatedResidentMemory, memoryLeaked, syscalls, contextSwitches, threads, - threadsRunning, readSyscalls, writeSyscalls, readBytesLogical, writeBytesLogical, readBytesPhysical, writeBytesPhysical, instructions, retainCount, releaseCount, retainReleaseDelta, custom) + peakMemoryResident, peakMemoryResidentDelta, peakMemoryVirtual, mallocCountSmall, mallocCountLarge, mallocCountTotal, freeCountTotal, mallocBytesCount, mallocFreeDelta, + allocatedResidentMemory, memoryLeaked, memoryLeakedBytes, syscalls, contextSwitches, threads, threadsRunning, readSyscalls, writeSyscalls, readBytesLogical, writeBytesLogical, + readBytesPhysical, writeBytesPhysical, instructions, retainCount, releaseCount, retainReleaseDelta, custom) --path The path to operate on for data export or threshold operations, default is the current directory (".") for exports and the ("./Thresholds") directory for thresholds. --quiet Specifies that output should be suppressed (useful for if you just want to check return code) --scale Specifies that some of the text output should be scaled using the scalingFactor (denoted by '*' in output) diff --git a/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift b/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift index 979daee2..5cc9920b 100644 --- a/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift +++ b/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift @@ -26,8 +26,12 @@ let availableMetrics = [ "mallocCountSmall", "mallocCountLarge", "mallocCountTotal", + "freeCountTotal", + "mallocBytesCount", + "mallocFreeDelta", "allocatedResidentMemory", "memoryLeaked", + "memoryLeakedBytes", "syscalls", "contextSwitches", "threads", diff --git a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift index b5f7db26..33cc84b8 100644 --- a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift +++ b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift @@ -24,9 +24,19 @@ extension BenchmarkExecutor { func mallocStatsProducerNeeded(_ metric: BenchmarkMetric) -> Bool { switch metric { case .memoryLeaked: + #if canImport(MallocInterposerSwift) + return false + #else return true + #endif case .memoryLeakedBytes: return true + case .mallocFreeDelta: + #if canImport(MallocInterposerSwift) + return true + #else + return false + #endif case .mallocCountTotal: return true case .mallocCountSmall: @@ -49,7 +59,7 @@ extension BenchmarkExecutor { /// Maps a measured window's interposer counter deltas to the `(metric, value)` pairs to record. /// /// Extracted as a pure function so the leak/scaling arithmetic can be unit-tested without a live - /// interposer. `memoryLeaked` / `memoryLeakedBytes` are clamped to `0`: a net-negative window + /// interposer. `mallocFreeDelta` / `memoryLeakedBytes` are clamped to `0`: a net-negative window /// (more frees than mallocs — e.g. freeing a warmup survivor, or cross-thread frees) is not a /// leak, and clamping records a `0` sample rather than letting `Statistics.add` drop it, which /// would desync the column's sample count and bias the average upward. @@ -67,7 +77,7 @@ extension BenchmarkExecutor { (.mallocCountSmall, mallocSmallDelta), (.mallocCountLarge, mallocLargeDelta), (.freeCountTotal, freeCountDelta), - (.memoryLeaked, max(0, mallocCountDelta - freeCountDelta)), + (.mallocFreeDelta, max(0, mallocCountDelta - freeCountDelta)), (.memoryLeakedBytes, max(0, mallocBytesDelta - freeBytesDelta)), ] } diff --git a/Sources/Benchmark/BenchmarkExecutor.swift b/Sources/Benchmark/BenchmarkExecutor.swift index f11b9231..ead3edc1 100644 --- a/Sources/Benchmark/BenchmarkExecutor.swift +++ b/Sources/Benchmark/BenchmarkExecutor.swift @@ -254,12 +254,13 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length if mallocStatsRequested { #if canImport(MallocInterposerSwift) - // allocatedResidentMemory is intentionally not populated on the interposer path: - // the interposer cannot measure the allocator's resident set (only gross requested - // bytes). It remains produced by the jemalloc backend (Swift <=6.2). Use - // mallocBytesCount for gross allocated bytes or peakMemoryResident for OS-sampled - // resident memory. The leak/scaling arithmetic lives in BenchmarkExecutor - // .mallocStatistics(...) so it can be unit-tested without a live interposer. + // allocatedResidentMemory and the legacy memoryLeaked metric are intentionally + // not populated on the interposer path: the interposer cannot measure the + // allocator's resident set. Use mallocBytesCount / memoryLeakedBytes for + // requested-byte accounting, mallocFreeDelta for allocation-count delta, or + // peakMemoryResident for OS-sampled resident memory. The leak/scaling arithmetic + // lives in BenchmarkExecutor.mallocStatistics(...) so it can be unit-tested + // without a live interposer. let mallocMetrics = BenchmarkExecutor.mallocStatistics( mallocCountDelta: stopMallocStats.mallocCount - startMallocStats.mallocCount, mallocBytesDelta: stopMallocStats.mallocBytesCount - startMallocStats.mallocBytesCount, @@ -278,9 +279,9 @@ struct BenchmarkExecutor { // swiftlint:disable:this type_body_length let allocatedResidentMemory = stopMallocStats.allocatedResidentMemory - startMallocStats.allocatedResidentMemory statistics[BenchmarkMetric.allocatedResidentMemory.index].add(allocatedResidentMemory) - // jemalloc has no free counter, so memoryLeaked is reported (as on the - // pre-interposer path) as resident-byte growth rather than a malloc-minus-free - // count. Backend-dependent definition; see BenchmarkMetric.memoryLeaked docs. + // jemalloc has no free counter, so memoryLeaked keeps the legacy resident-byte + // growth definition. The interposer backend uses mallocFreeDelta for + // malloc-minus-free count and memoryLeakedBytes for requested-byte delta. statistics[BenchmarkMetric.memoryLeaked.index].add(max(0, allocatedResidentMemory)) let mallocSmallCount = stopMallocStats.mallocCountSmall - startMallocStats.mallocCountSmall diff --git a/Sources/Benchmark/BenchmarkMetric+Defaults.swift b/Sources/Benchmark/BenchmarkMetric+Defaults.swift index 7ffbc325..8828adbc 100644 --- a/Sources/Benchmark/BenchmarkMetric+Defaults.swift +++ b/Sources/Benchmark/BenchmarkMetric+Defaults.swift @@ -30,53 +30,84 @@ public extension BenchmarkMetric { /// There is also an convenience extension on Array defined such that you can write just `.default` rather than `BenchmarkMetric.default` /// static var `default`: [BenchmarkMetric] { - [ + var metrics: [BenchmarkMetric] = [ .wallClock, .cpuTotal, .mallocCountTotal, + ] + #if canImport(MallocInterposerSwift) + metrics += [ .freeCountTotal, .mallocBytesCount, - .memoryLeaked, + .mallocFreeDelta, .memoryLeakedBytes, + ] + #else + metrics += [ + .memoryLeaked, + ] + #endif + metrics += [ .throughput, .instructions, .peakMemoryResident, ] + return metrics } /// A collection of extended system benchmarks. static var extended: [BenchmarkMetric] { - [ + var metrics: [BenchmarkMetric] = [ .wallClock, .cpuUser, .cpuTotal, .mallocCountTotal, + ] + #if canImport(MallocInterposerSwift) + metrics += [ .freeCountTotal, .mallocBytesCount, + .mallocFreeDelta, + .memoryLeakedBytes, + ] + #else + metrics += [ + .memoryLeaked, + ] + #endif + metrics += [ .throughput, .peakMemoryResident, - .memoryLeaked, - .memoryLeakedBytes, .syscalls, .instructions, ] + return metrics } /// A collection of memory benchmarks. static var memory: [BenchmarkMetric] { - [ + var metrics: [BenchmarkMetric] = [ .peakMemoryResident, .peakMemoryResidentDelta, .peakMemoryVirtual, .mallocCountSmall, .mallocCountLarge, .mallocCountTotal, + ] + #if canImport(MallocInterposerSwift) + metrics += [ .mallocBytesCount, .freeCountTotal, - .memoryLeaked, + .mallocFreeDelta, .memoryLeakedBytes, + ] + #else + metrics += [ + .memoryLeaked, .allocatedResidentMemory, ] + #endif + return metrics } /// A collection of ARC metrics @@ -129,6 +160,7 @@ public extension BenchmarkMetric { .mallocCountTotal, .freeCountTotal, .mallocBytesCount, + .mallocFreeDelta, .memoryLeaked, .memoryLeakedBytes, .syscalls, diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index f7065ad5..2ddd6b93 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -45,10 +45,16 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib case mallocCountLarge /// Number of total malloc calls case mallocCountTotal - /// Number of totatl free calls + /// Number of total free calls case freeCountTotal /// The amount of memory allocated in bytes through malloc calls case mallocBytesCount + /// Net unfreed allocation count within the measured region. + /// + /// Reports `malloc` count minus `free` count from the interposer backend. Because counting is + /// process-global, this metric is only reliable for single-threaded benchmarks with quiescent + /// background allocation. + case mallocFreeDelta /// The amount of allocated resident memory according to the memory allocator /// by the application (does not include metadata overhead etc). /// @@ -57,14 +63,13 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib /// > for gross allocated bytes, or ``peakMemoryResident`` for OS-sampled resident memory. @available(*, deprecated, message: "Only produced by the jemalloc backend; use mallocBytesCount or peakMemoryResident") case allocatedResidentMemory - /// Net unfreed allocations within the measured region. + /// Legacy jemalloc resident-byte growth within the measured region. /// - /// Backend-dependent: the 6.3+ interposer backend reports `malloc` count minus `free` count, - /// while the jemalloc backend (Swift ≤6.2) reports resident-byte growth. Because counting is - /// process-global, this metric is only reliable for single-threaded benchmarks with quiescent - /// background allocation. + /// Only produced by the jemalloc backend (Swift ≤6.2). The 6.3+ interposer backend does not + /// produce this metric; use ``mallocFreeDelta`` for allocation-count delta or + /// ``memoryLeakedBytes`` for requested-byte delta. case memoryLeaked - /// Leaked memeory in bytes + /// Net unfreed requested bytes within the measured region. case memoryLeakedBytes /// Measure number of syscalls made during the test case syscalls @@ -144,7 +149,7 @@ public extension BenchmarkMetric { switch self { case .cpuSystem, .cpuTotal, .cpuUser, .wallClock: return true - case .mallocCountSmall, .mallocCountLarge, .mallocCountTotal, .freeCountTotal, + case .mallocCountSmall, .mallocCountLarge, .mallocCountTotal, .freeCountTotal, .mallocFreeDelta, .mallocBytesCount, .memoryLeaked, .memoryLeakedBytes: return true case .syscalls: @@ -202,10 +207,12 @@ public extension BenchmarkMetric { return "Malloc (total)" case .mallocBytesCount: return "Malloc (bytes total)" + case .mallocFreeDelta: + return "Malloc / free Δ" case .allocatedResidentMemory: return "Memory (allocated resident)" case .memoryLeaked: - return "Malloc / free Δ" + return "Memory leaked (resident)" case .memoryLeakedBytes: return "Malloc / free Δ (bytes)" case .syscalls: @@ -315,13 +322,15 @@ public extension BenchmarkMetric { return 30 case .instructions: return 31 + case .mallocFreeDelta: + return 32 default: return 0 // custom payloads must be stored in dictionary } } @_documentation(visibility: internal) - static var maxIndex: Int { 31 } // + static var maxIndex: Int { 32 } // // Used by the Benchmark Executor for efficient indexing into results @_documentation(visibility: internal) @@ -389,6 +398,8 @@ public extension BenchmarkMetric { return .retainReleaseDelta case 31: return .instructions + case 32: + return .mallocFreeDelta default: break } @@ -426,6 +437,8 @@ public extension BenchmarkMetric { return "freeCountTotal" case .mallocBytesCount: return "mallocBytesCount" + case .mallocFreeDelta: + return "mallocFreeDelta" case .allocatedResidentMemory: return "allocatedResidentMemory" case .memoryLeaked: @@ -504,6 +517,8 @@ public extension BenchmarkMetric { self = BenchmarkMetric.freeCountTotal case "mallocBytesCount": self = BenchmarkMetric.mallocBytesCount + case "mallocFreeDelta": + self = BenchmarkMetric.mallocFreeDelta case "allocatedResidentMemory": self = BenchmarkMetric.allocatedResidentMemory case "memoryLeaked": diff --git a/Sources/Benchmark/Documentation.docc/BenchmarkMetric.md b/Sources/Benchmark/Documentation.docc/BenchmarkMetric.md index ad9a7ab2..5413bfde 100644 --- a/Sources/Benchmark/Documentation.docc/BenchmarkMetric.md +++ b/Sources/Benchmark/Documentation.docc/BenchmarkMetric.md @@ -27,6 +27,10 @@ - ``BenchmarkMetric/wallClock`` - ``BenchmarkMetric/cpuTotal`` - ``BenchmarkMetric/mallocCountTotal`` +- ``BenchmarkMetric/freeCountTotal`` +- ``BenchmarkMetric/mallocBytesCount`` +- ``BenchmarkMetric/mallocFreeDelta`` +- ``BenchmarkMetric/memoryLeakedBytes`` - ``BenchmarkMetric/throughput`` - ``BenchmarkMetric/peakMemoryResident`` - ``BenchmarkMetric/memoryLeaked`` @@ -41,7 +45,11 @@ - ``BenchmarkMetric/mallocCountSmall`` - ``BenchmarkMetric/mallocCountLarge`` - ``BenchmarkMetric/mallocCountTotal`` +- ``BenchmarkMetric/freeCountTotal`` +- ``BenchmarkMetric/mallocBytesCount`` +- ``BenchmarkMetric/mallocFreeDelta`` - ``BenchmarkMetric/memoryLeaked`` +- ``BenchmarkMetric/memoryLeakedBytes`` - ``BenchmarkMetric/allocatedResidentMemory`` ### Reference Counting (retain/release) diff --git a/Sources/Benchmark/Documentation.docc/Metrics.md b/Sources/Benchmark/Documentation.docc/Metrics.md index a5b790bf..71ce4ad1 100644 --- a/Sources/Benchmark/Documentation.docc/Metrics.md +++ b/Sources/Benchmark/Documentation.docc/Metrics.md @@ -18,11 +18,15 @@ Currently supported metrics are: - term `peakMemoryResident`: The resident memory usage - sampled during runtime - term `peakMemoryResidentDelta`: The resident memory usage - sampled during runtime (excluding start of benchmark baseline) - term `peakMemoryVirtual`: The virtual memory usage - sampled during runtime -- term `mallocCountSmall`: The number of small malloc calls according to jemalloc -- term `mallocCountLarge`: The number of large malloc calls according to jemalloc -- term `mallocCountTotal`: The total number of mallocs according to jemalloc +- term `mallocCountSmall`: The number of small malloc calls according to the active malloc backend +- term `mallocCountLarge`: The number of large malloc calls according to the active malloc backend +- term `mallocCountTotal`: The total number of malloc calls according to the active malloc backend +- term `freeCountTotal`: The total number of free calls according to the interposer backend +- term `mallocBytesCount`: The total requested bytes allocated through malloc calls according to the interposer backend +- term `mallocFreeDelta`: The number of malloc calls minus free calls according to the interposer backend - term `allocatedResidentMemory`: The amount of allocated resident memory by the application (not including allocator metadata overhead etc) according to jemalloc -- term `memoryLeaked`: The number of small+large mallocs - small+large frees in resident memory (just a possible leak) +- term `memoryLeaked`: Legacy jemalloc resident-byte growth within the measured region +- term `memoryLeakedBytes`: The requested bytes allocated minus requested bytes freed according to the interposer backend - term `syscalls`: The number of syscalls made during the test -- macOS only - term `contextSwitches`: The number of context switches made during the test -- macOS only - term `threads`: The maximum number of threads in the process under the test (not exact, sampled) diff --git a/Sources/Benchmark/Documentation.docc/RunningBenchmarks.md b/Sources/Benchmark/Documentation.docc/RunningBenchmarks.md index cb48bffc..45482407 100644 --- a/Sources/Benchmark/Documentation.docc/RunningBenchmarks.md +++ b/Sources/Benchmark/Documentation.docc/RunningBenchmarks.md @@ -91,8 +91,9 @@ OPTIONS: Benchmark targets matching the regexp filter that should be skipped --format The output format to use, default is 'text' (values: text, markdown, influx, jmh, histogramEncoded, histogram, histogramSamples, histogramPercentiles, metricP90AbsoluteThresholds) --metric Specifies that the benchmark run should use one or more specific metrics instead of the ones defined by the benchmarks. (values: cpuUser, cpuSystem, cpuTotal, wallClock, throughput, -peakMemoryResident, peakMemoryResidentDelta, peakMemoryVirtual, mallocCountSmall, mallocCountLarge, mallocCountTotal, allocatedResidentMemory, memoryLeaked, syscalls, contextSwitches, threads, -threadsRunning, readSyscalls, writeSyscalls, readBytesLogical, writeBytesLogical, readBytesPhysical, writeBytesPhysical, instructions, retainCount, releaseCount, retainReleaseDelta, custom) +peakMemoryResident, peakMemoryResidentDelta, peakMemoryVirtual, mallocCountSmall, mallocCountLarge, mallocCountTotal, freeCountTotal, mallocBytesCount, mallocFreeDelta, allocatedResidentMemory, +memoryLeaked, memoryLeakedBytes, syscalls, contextSwitches, threads, threadsRunning, readSyscalls, writeSyscalls, readBytesLogical, writeBytesLogical, readBytesPhysical, writeBytesPhysical, instructions, +retainCount, releaseCount, retainReleaseDelta, custom) --path The path to operate on for data export or threshold operations, default is the current directory (".") for exports and the ("./Thresholds") directory for thresholds. --quiet Specifies that output should be suppressed (useful for if you just want to check return code) --scale Specifies that some of the text output should be scaled using the scalingFactor (denoted by '*' in output) diff --git a/Tests/BenchmarkTests/BenchmarkMetricsTests.swift b/Tests/BenchmarkTests/BenchmarkMetricsTests.swift index 889128cf..e5f82a8f 100644 --- a/Tests/BenchmarkTests/BenchmarkMetricsTests.swift +++ b/Tests/BenchmarkTests/BenchmarkMetricsTests.swift @@ -25,8 +25,10 @@ final class BenchmarkMetricsTests: XCTestCase { .mallocCountTotal, .mallocBytesCount, .freeCountTotal, + .mallocFreeDelta, .allocatedResidentMemory, .memoryLeaked, + .memoryLeakedBytes, .syscalls, .contextSwitches, .threads, @@ -58,8 +60,10 @@ final class BenchmarkMetricsTests: XCTestCase { "mallocCountTotal", "mallocBytesCount", "freeCountTotal", + "mallocFreeDelta", "allocatedResidentMemory", "memoryLeaked", + "memoryLeakedBytes", "syscalls", "contextSwitches", "threads", diff --git a/Tests/BenchmarkTests/MallocStatisticsTests.swift b/Tests/BenchmarkTests/MallocStatisticsTests.swift index feb6d22f..bd7718e9 100644 --- a/Tests/BenchmarkTests/MallocStatisticsTests.swift +++ b/Tests/BenchmarkTests/MallocStatisticsTests.swift @@ -32,8 +32,9 @@ final class MallocStatisticsTests: XCTestCase { XCTAssertEqual(value(metrics, .mallocCountTotal), 10) XCTAssertEqual(value(metrics, .freeCountTotal), 10) XCTAssertEqual(value(metrics, .mallocBytesCount), 1_024) - XCTAssertEqual(value(metrics, .memoryLeaked), 0) + XCTAssertEqual(value(metrics, .mallocFreeDelta), 0) XCTAssertEqual(value(metrics, .memoryLeakedBytes), 0) + XCTAssertNil(value(metrics, .memoryLeaked), "interposer stats must not emit the legacy jemalloc memoryLeaked metric") } func testUnbalancedAllocReportsLeak() { @@ -42,7 +43,7 @@ final class MallocStatisticsTests: XCTestCase { mallocSmallDelta: 7, mallocLargeDelta: 3, freeCountDelta: 6, freeBytesDelta: 1_024 ) - XCTAssertEqual(value(metrics, .memoryLeaked), 4) // 10 mallocs - 6 frees + XCTAssertEqual(value(metrics, .mallocFreeDelta), 4) // 10 mallocs - 6 frees XCTAssertEqual(value(metrics, .memoryLeakedBytes), 1_024) // 2048 - 1024 } @@ -55,7 +56,7 @@ final class MallocStatisticsTests: XCTestCase { mallocSmallDelta: 3, mallocLargeDelta: 0, freeCountDelta: 5, freeBytesDelta: 4_096 ) - XCTAssertEqual(value(metrics, .memoryLeaked), 0) + XCTAssertEqual(value(metrics, .mallocFreeDelta), 0) XCTAssertEqual(value(metrics, .memoryLeakedBytes), 0) } @@ -74,6 +75,7 @@ final class MallocStatisticsTests: XCTestCase { XCTAssertEqual(value(metrics, .mallocCountLarge), 4) XCTAssertEqual(value(metrics, .mallocBytesCount), 100) XCTAssertEqual(value(metrics, .freeCountTotal), 3) + XCTAssertEqual(value(metrics, .mallocFreeDelta), 7) } /// The whole per-iteration malloc count/byte family must scale together, otherwise the scaled @@ -82,7 +84,7 @@ final class MallocStatisticsTests: XCTestCase { func testMallocFamilyScalesConsistently() { let scaledFamily: [BenchmarkMetric] = [ .mallocCountSmall, .mallocCountLarge, .mallocCountTotal, - .freeCountTotal, .mallocBytesCount, .memoryLeaked, .memoryLeakedBytes, + .freeCountTotal, .mallocBytesCount, .mallocFreeDelta, .memoryLeakedBytes, ] for metric in scaledFamily { XCTAssertTrue( @@ -92,6 +94,20 @@ final class MallocStatisticsTests: XCTestCase { } } + func testDefaultMetricsUseBackendSpecificLeakMetrics() { + #if canImport(MallocInterposerSwift) + XCTAssertTrue(BenchmarkMetric.default.contains(.mallocFreeDelta)) + XCTAssertTrue(BenchmarkMetric.default.contains(.memoryLeakedBytes)) + XCTAssertFalse( + BenchmarkMetric.default.contains(.memoryLeaked), + "interposer defaults must not emit legacy jemalloc memoryLeaked" + ) + #else + XCTAssertTrue(BenchmarkMetric.default.contains(.memoryLeaked)) + XCTAssertFalse(BenchmarkMetric.default.contains(.mallocFreeDelta)) + #endif + } + /// Metric array slots must be unique so two metrics never collide on the same `statistics` slot. func testMetricIndicesAreUnique() { let indices = BenchmarkMetric.all.map(\.index) From 0b9aed42cc4a09b159fc0fd19f9950fd1119a0fb Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 16 Jun 2026 15:00:48 +0200 Subject: [PATCH 34/37] fix swiftlint issues --- .../MallocInterposer/MallocInterposer.swift | 64 +++++++++---------- Benchmarks/Package.resolved | 20 +++--- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift b/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift index f53e039d..eb0ad5f0 100644 --- a/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift +++ b/Benchmarks/Benchmarks/MallocInterposer/MallocInterposer.swift @@ -63,9 +63,9 @@ let benchmarks: @Sendable () -> Void = { // Expected per iter: malloc=1 (small=1, large=0), free=1, leaked=0. Benchmark("Malloc 64B + free") { benchmark in for _ in benchmark.scaledIterations { - let p = malloc(64) - blackHole(p) - free(p) + let ptr = malloc(64) + blackHole(ptr) + free(ptr) } } @@ -73,9 +73,9 @@ let benchmarks: @Sendable () -> Void = { // Expected per iter: malloc=1 (small=0, large=1), free=1. Benchmark("Malloc 2 MiB + free") { benchmark in for _ in benchmark.scaledIterations { - let p = malloc(2 * 1024 * 1024) - blackHole(p) - free(p) + let ptr = malloc(2 * 1_024 * 1_024) + blackHole(ptr) + free(ptr) } } @@ -83,9 +83,9 @@ let benchmarks: @Sendable () -> Void = { // Expected per iter: malloc=1, free=1. Benchmark("Calloc 8x8 + free") { benchmark in for _ in benchmark.scaledIterations { - let p = calloc(8, 8) - blackHole(p) - free(p) + let ptr = calloc(8, 8) + blackHole(ptr) + free(ptr) } } @@ -93,10 +93,10 @@ let benchmarks: @Sendable () -> Void = { // Expected per iter: malloc=2, free=2. Benchmark("Realloc grow 64→256 + free") { benchmark in for _ in benchmark.scaledIterations { - let p1 = malloc(64) - let p2 = realloc(p1, 256) - blackHole(p2) - free(p2) + let original = malloc(64) + let grown = realloc(original, 256) + blackHole(grown) + free(grown) } } @@ -104,9 +104,9 @@ let benchmarks: @Sendable () -> Void = { // Expected per iter: malloc=1, free=1. Benchmark("Realloc(NULL, 128) + free") { benchmark in for _ in benchmark.scaledIterations { - let p = realloc(nil, 128) - blackHole(p) - free(p) + let ptr = realloc(nil, 128) + blackHole(ptr) + free(ptr) } } @@ -114,9 +114,9 @@ let benchmarks: @Sendable () -> Void = { // Expected per iter: malloc=1, free=1. Benchmark("Malloc + realloc(p, 0)") { benchmark in for _ in benchmark.scaledIterations { - let p = malloc(64) - let r = realloc(p, 0) - blackHole(r) // expected nil + let ptr = malloc(64) + let resized = realloc(ptr, 0) + blackHole(resized) // expected nil } } @@ -125,7 +125,7 @@ let benchmarks: @Sendable () -> Void = { Benchmark("posix_memalign(64, 1024) + free") { benchmark in var ptr: UnsafeMutableRawPointer? for _ in benchmark.scaledIterations { - _ = posix_memalign(&ptr, 64, 1024) + _ = posix_memalign(&ptr, 64, 1_024) blackHole(ptr) free(ptr) } @@ -139,9 +139,9 @@ let benchmarks: @Sendable () -> Void = { #if !canImport(Darwin) Benchmark("aligned_alloc(64, 1024) + free") { benchmark in for _ in benchmark.scaledIterations { - let p = aligned_alloc(64, 1024) - blackHole(p) - free(p) + let ptr = aligned_alloc(64, 1_024) + blackHole(ptr) + free(ptr) } } #endif @@ -150,16 +150,16 @@ let benchmarks: @Sendable () -> Void = { // linearly and isn't accidentally collapsed/de-duplicated. // Expected per iter: malloc=16, free=16. Benchmark("Malloc x16 + free x16") { benchmark in - let n = 16 - let buf = UnsafeMutablePointer.allocate(capacity: n) + let count = 16 + let buf = UnsafeMutablePointer.allocate(capacity: count) defer { buf.deallocate() } - buf.update(repeating: nil, count: n) + buf.update(repeating: nil, count: count) for _ in benchmark.scaledIterations { - for i in 0.. Void = { // <= maxIterations * scalingFactor * 128 = 100 * 1000 * 128 = ~12.5 MiB. Benchmark("Leak: malloc 128B (no free)") { benchmark in for _ in benchmark.scaledIterations { - let p = malloc(128) - blackHole(p) + let ptr = malloc(128) + blackHole(ptr) } } @@ -195,8 +195,8 @@ let benchmarks: @Sendable () -> Void = { // be stable. Benchmark("Swift String (long, heap)") { benchmark in for _ in benchmark.scaledIterations { - let s = String(repeating: "x", count: 256) - blackHole(s) + let str = String(repeating: "x", count: 256) + blackHole(str) } } } diff --git a/Benchmarks/Package.resolved b/Benchmarks/Package.resolved index 3b0a2321..2c543468 100644 --- a/Benchmarks/Package.resolved +++ b/Benchmarks/Package.resolved @@ -11,21 +11,21 @@ } }, { - "identity" : "package-datetime", + "identity" : "malloc-interposer", "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-datetime", + "location" : "https://github.com/ordo-one/malloc-interposer.git", "state" : { - "revision" : "d1242188c9f48aad297e6ca9b717776f8660bc31", - "version" : "1.0.2" + "revision" : "d9ca5ad6d85622fb2bd5b3d3387ba064dbcab1c2", + "version" : "1.0.0" } }, { - "identity" : "package-jemalloc", + "identity" : "package-datetime", "kind" : "remoteSourceControl", - "location" : "https://github.com/ordo-one/package-jemalloc.git", + "location" : "https://github.com/ordo-one/package-datetime", "state" : { - "revision" : "e8a5db026963f5bfeac842d9d3f2cc8cde323b49", - "version" : "1.0.0" + "revision" : "d1242188c9f48aad297e6ca9b717776f8660bc31", + "version" : "1.0.2" } }, { @@ -33,8 +33,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-argument-parser", "state" : { - "revision" : "c8ed701b513cf5177118a175d85fbbbcd707ab41", - "version" : "1.3.0" + "revision" : "6a52f3251125d74daf04fcbd5e6f08a75d074382", + "version" : "1.8.2" } }, { From e394b2705a9e94f0ce32a5d80a5b1295c63094c2 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:20:01 +0200 Subject: [PATCH 35/37] fix version --- Package.swift | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Package.swift b/Package.swift index 7b6ba30d..4002f34d 100644 --- a/Package.swift +++ b/Package.swift @@ -20,7 +20,8 @@ if disableJemalloc { // when iterating on the interposer alongside this package. let mallocInterposerDependency: Package.Dependency = { if let localPath = ProcessInfo.processInfo.environment["MALLOC_INTERPOSER_LOCAL_PATH"], - localPath.isEmpty == false { + localPath.isEmpty == false + { return .package(path: localPath) } return .package( @@ -40,7 +41,7 @@ var packageDependencies: [Package.Dependency] = [ #if os(Linux) && compiler(>=6.3) packageDependencies += [ - .package(url: "https://github.com/ordo-one/swift-runtime-interposer.git", .upToNextMajor(from: "1.2.0")), + .package(url: "https://github.com/ordo-one/swift-runtime-interposer.git", .upToNextMajor(from: "1.0.0")) ] #endif From f5f5634b4e99f7c27802fe588dac1857fc8ea4ac Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 16 Jun 2026 16:40:12 +0200 Subject: [PATCH 36/37] leave mallocSmall and mallocLarge metrics --- Sources/Benchmark/BenchmarkMetric.swift | 10 +++----- scripts/compare-malloc-local.sh | 33 ++----------------------- 2 files changed, 6 insertions(+), 37 deletions(-) diff --git a/Sources/Benchmark/BenchmarkMetric.swift b/Sources/Benchmark/BenchmarkMetric.swift index 2ddd6b93..13ea5128 100644 --- a/Sources/Benchmark/BenchmarkMetric.swift +++ b/Sources/Benchmark/BenchmarkMetric.swift @@ -33,15 +33,13 @@ public enum BenchmarkMetric: Hashable, Equatable, Codable, CustomStringConvertib case peakMemoryVirtual /// Number of small malloc calls /// - /// > Deprecated: The small/large split is backend-dependent — the jemalloc backend - /// > (Swift ≤6.2) splits on jemalloc's size classes, while the 6.3+ interposer backend - /// > splits on a coarser `requested size > page size` threshold. Prefer ``mallocCountTotal``. - @available(*, deprecated, message: "Backend-dependent small/large split; prefer mallocCountTotal") + /// The small/large split is backend-dependent: the jemalloc backend (Swift ≤6.2) splits on + /// jemalloc's size classes, while the 6.3+ interposer backend splits on a coarser + /// `requested size > page size` threshold. case mallocCountSmall /// Number of large malloc calls /// - /// > Deprecated: See ``mallocCountSmall``. - @available(*, deprecated, message: "Backend-dependent small/large split; prefer mallocCountTotal") + /// The backend-specific counterpart to ``mallocCountSmall``. case mallocCountLarge /// Number of total malloc calls case mallocCountTotal diff --git a/scripts/compare-malloc-local.sh b/scripts/compare-malloc-local.sh index 4e8e6728..4b3ab492 100755 --- a/scripts/compare-malloc-local.sh +++ b/scripts/compare-malloc-local.sh @@ -93,24 +93,6 @@ for f in "$@"; do FILTER_ARGS+=(--filter "$f") done -# SwiftPM #9062 workaround: copy lib*-tool.dylib → lib*.dylib so the spawned -# benchmark tool finds the interposer at the path it expects. Only relevant -# on the interposer (6.3) run. -fix_tool_dylibs() { - local search_dir="$1" - local copied=0 - while IFS= read -r src; do - local dst="${src/-tool.dylib/.dylib}" - if [[ ! -f "$dst" || "$src" -nt "$dst" ]]; then - cp -p "$src" "$dst" - copied=$((copied + 1)) - fi - done < <(find "$search_dir" -name "libMallocInterposer*-tool.dylib" 2>/dev/null) - if ((copied > 0)); then - warn "Renamed $copied -tool.dylib → .dylib (SwiftPM #9062 workaround)" - fi -} - run_jemalloc() { step "Run 1: Swift $TOOLCHAIN_OLD (jemalloc) → baseline '$BASELINE_OLD' [scratch: $SCRATCH_OLD]" swiftly run +"$TOOLCHAIN_OLD" \ @@ -125,25 +107,14 @@ run_jemalloc() { run_interposer() { step "Run 2: Swift $TOOLCHAIN_NEW (interposer) → baseline '$BASELINE_NEW' [scratch: $SCRATCH_NEW]" - if ! swiftly run +"$TOOLCHAIN_NEW" \ + swiftly run +"$TOOLCHAIN_NEW" \ swift package \ --scratch-path "$SCRATCH_NEW" \ --allow-writing-to-package-directory benchmark \ baseline update "$BASELINE_NEW" \ --target "$TARGET" \ --quiet --no-progress \ - "${FILTER_ARGS[@]}"; then - warn "First attempt failed — applying SwiftPM #9062 workaround and retrying" - fix_tool_dylibs "$SCRATCH_NEW" - swiftly run +"$TOOLCHAIN_NEW" \ - swift package \ - --scratch-path "$SCRATCH_NEW" \ - --allow-writing-to-package-directory benchmark \ - baseline update "$BASELINE_NEW" \ - --target "$TARGET" \ - --quiet --no-progress \ - "${FILTER_ARGS[@]}" - fi + "${FILTER_ARGS[@]}" } run_jemalloc From 35e81f387d2976bd1ca2981c77f92b43ec5c3f83 Mon Sep 17 00:00:00 2001 From: Mirza Ucanbarlic <56406159+supersonicbyte@users.noreply.github.com> Date: Tue, 16 Jun 2026 17:18:48 +0200 Subject: [PATCH 37/37] remove jemalloc trait form swift 6.3 manifest --- Package.swift | 34 ++++++------------- Package@swift-6.2.swift | 15 +++++++- .../BenchmarkExecutor+Extensions.swift | 2 +- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/Package.swift b/Package.swift index 4002f34d..71c4c543 100644 --- a/Package.swift +++ b/Package.swift @@ -4,17 +4,6 @@ import PackageDescription import class Foundation.ProcessInfo -// If the environment variable BENCHMARK_DISABLE_JEMALLOC is set disable Jemalloc trait (backward compatibility) -let disableJemalloc = ProcessInfo.processInfo.environment["BENCHMARK_DISABLE_JEMALLOC"] != nil - -let defaultTraits: Set - -if disableJemalloc { - defaultTraits = [] -} else { - defaultTraits = ["Jemalloc"] -} - // When MALLOC_INTERPOSER_LOCAL_PATH is set, use a local checkout of the // malloc-interposer package instead of the published GitHub URL. Useful // when iterating on the interposer alongside this package. @@ -54,18 +43,21 @@ var benchmarkDependencies: [Target.Dependency] = [ .product(name: "Atomics", package: "swift-atomics"), "SwiftRuntimeHooks", "BenchmarkShared", - // Gated on the `Jemalloc` trait so that `--disable-default-traits` / - // BENCHMARK_DISABLE_JEMALLOC removes the malloc-stats backend entirely (needed for e.g. - // fully-static musl builds and sanitizer runs). On Swift 6.3+ this trait selects the - // interposer backend; on Swift <=6.2 (see Package@swift-6.2.swift) it selects jemalloc. - // When the trait is off, BenchmarkExecutor falls back to the no-op MallocStatsProducer stub. - .product(name: "MallocInterposerSwift", package: "malloc-interposer", condition: .when(traits: ["Jemalloc"])), + .product(name: "MallocInterposerSwift", package: "malloc-interposer"), ] #if os(Linux) && compiler(>=6.3) benchmarkDependencies += [ - .product(name: "SwiftRuntimeInterposerC", package: "swift-runtime-interposer", condition: .when(platforms: [.linux])), - .product(name: "SwiftRuntimeInterposerSwift", package: "swift-runtime-interposer", condition: .when(platforms: [.linux])), + .product( + name: "SwiftRuntimeInterposerC", + package: "swift-runtime-interposer", + condition: .when(platforms: [.linux]) + ), + .product( + name: "SwiftRuntimeInterposerSwift", + package: "swift-runtime-interposer", + condition: .when(platforms: [.linux]) + ), ] #endif @@ -83,10 +75,6 @@ let package = Package( targets: ["Benchmark"] ), ], - traits: [ - .trait(name: "Jemalloc"), - .default(enabledTraits: defaultTraits), - ], dependencies: packageDependencies, targets: [ .target( diff --git a/Package@swift-6.2.swift b/Package@swift-6.2.swift index 30fdd98f..2f8c1f0a 100644 --- a/Package@swift-6.2.swift +++ b/Package@swift-6.2.swift @@ -2,6 +2,19 @@ import PackageDescription +import class Foundation.ProcessInfo + +// If the environment variable BENCHMARK_DISABLE_JEMALLOC is set disable Jemalloc trait (backward compatibility) +let disableJemalloc = ProcessInfo.processInfo.environment["BENCHMARK_DISABLE_JEMALLOC"] != nil + +let defaultTraits: Set + +if disableJemalloc { + defaultTraits = [] +} else { + defaultTraits = ["Jemalloc"] +} + let package = Package( name: "Benchmark", platforms: [ @@ -18,7 +31,7 @@ let package = Package( ], traits: [ .trait(name: "Jemalloc"), - .default(enabledTraits: ["Jemalloc"]), + .default(enabledTraits: defaultTraits), ], dependencies: [ .package(url: "https://github.com/apple/swift-system.git", .upToNextMajor(from: "1.1.0")), diff --git a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift index 33cc84b8..1a9d54fe 100644 --- a/Sources/Benchmark/BenchmarkExecutor+Extensions.swift +++ b/Sources/Benchmark/BenchmarkExecutor+Extensions.swift @@ -63,7 +63,7 @@ extension BenchmarkExecutor { /// (more frees than mallocs — e.g. freeing a warmup survivor, or cross-thread frees) is not a /// leak, and clamping records a `0` sample rather than letting `Statistics.add` drop it, which /// would desync the column's sample count and bias the average upward. - static func mallocStatistics( + static func mallocStatistics( // swiftlint:disable:this function_parameter_count mallocCountDelta: Int, mallocBytesDelta: Int, mallocSmallDelta: Int,