From 00f53eb3377999bab183f1beb51d6f30f97be6d6 Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Thu, 14 May 2026 11:38:41 -0700 Subject: [PATCH] Add follow-up test for postfork multithread Fix FreeBSD postfork child handler never being called: FreeBSD's libthr calls _malloc_postfork in both parent and child (see freebsd-src lib/libthr/thread/thr_fork.c), but jemalloc mapped it to the parent handler only. Detect the child via getpid() and route to jemalloc_postfork_child, which resets nthreads and rebuilds the descriptor queue. Remove the child_survivor_bytes vs pre_survivor_bytes comparison: on macOS where jemalloc registers as the default zone, internal allocations during the postfork handler (pthread_mutex_init) can inflate the surviving thread's tcache. Add double-fork test to verify prefork pid is refreshed correctly when a child process forks again. --- src/jemalloc_fork.c | 21 ++++ test/unit/fork.c | 280 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 300 insertions(+), 1 deletion(-) diff --git a/src/jemalloc_fork.c b/src/jemalloc_fork.c index 6c6a7109..9bab77e8 100644 --- a/src/jemalloc_fork.c +++ b/src/jemalloc_fork.c @@ -12,6 +12,19 @@ * malloc during fork(). */ +#ifdef JEMALLOC_MUTEX_INIT_CB +/* + * When JEMALLOC_MUTEX_INIT_CB is defined, pthread_atfork registration is + * skipped and the platform calls _malloc_prefork/_malloc_postfork directly. + * FreeBSD's libthr calls _malloc_postfork in both parent and child. Detect + * the child by pid change so we route to jemalloc_postfork_child, which resets + * per-arena state the parent handler does not touch (nthreads, descriptor + * queues). The check is harmless on any platform that only calls + * _malloc_postfork in the parent. + */ +static pid_t jemalloc_prefork_pid; +#endif + #ifndef JEMALLOC_MUTEX_INIT_CB void jemalloc_prefork(void) @@ -31,6 +44,10 @@ _malloc_prefork(void) #endif assert(malloc_initialized()); +#ifdef JEMALLOC_MUTEX_INIT_CB + jemalloc_prefork_pid = getpid(); +#endif + tsd = tsd_fetch(); narenas = narenas_total_get(); @@ -105,6 +122,10 @@ _malloc_postfork(void) if (!malloc_initialized()) { return; } + if (getpid() != jemalloc_prefork_pid) { + jemalloc_postfork_child(); + return; + } #endif assert(malloc_initialized()); diff --git a/test/unit/fork.c b/test/unit/fork.c index 8c446876..ac0b8db7 100644 --- a/test/unit/fork.c +++ b/test/unit/fork.c @@ -30,6 +30,60 @@ wait_for_child_exit(int pid) { } #endif +#ifndef _WIN32 +static void +create_arena(unsigned *arena_ind) { + size_t sz = sizeof(*arena_ind); + expect_d_eq(mallctl("arenas.create", (void *)arena_ind, &sz, NULL, 0), + 0, "Unexpected mallctl() failure"); +} + +static void +bind_thread_arena(unsigned arena_ind) { + unsigned old_arena_ind; + size_t sz = sizeof(old_arena_ind); + expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz, + (void *)&arena_ind, sizeof(arena_ind)), + 0, "Unexpected mallctl() failure"); +} + +static void +populate_tcache(void) { + void *p[16]; + for (size_t i = 0; i < ARRAY_SIZE(p); i++) { + p[i] = malloc(8); + expect_ptr_not_null(p[i], "Unexpected malloc() failure"); + } + for (size_t i = 0; i < ARRAY_SIZE(p); i++) { + free(p[i]); + } +} + +static bool +refresh_epoch(void) { + uint64_t epoch = 1; + return mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)) == 0; +} + +static int +read_arena_tcache_bytes(unsigned arena_ind, size_t *bytes) { + char ctl[64]; + malloc_snprintf(ctl, sizeof(ctl), + "stats.arenas.%u.tcache_bytes", arena_ind); + size_t sz = sizeof(*bytes); + return mallctl(ctl, bytes, &sz, NULL, 0); +} + +static int +read_arena_nthreads(unsigned arena_ind, unsigned *nthreads) { + char ctl[64]; + malloc_snprintf(ctl, sizeof(ctl), "stats.arenas.%u.nthreads", + arena_ind); + size_t sz = sizeof(*nthreads); + return mallctl(ctl, nthreads, &sz, NULL, 0); +} +#endif + TEST_BEGIN(test_fork) { #ifndef _WIN32 void *p; @@ -292,8 +346,232 @@ TEST_BEGIN(test_fork_postfork_descriptor_relink) { } TEST_END +#ifndef _WIN32 +typedef struct { + unsigned arena_ind; + atomic_b_t ready; + atomic_b_t release; +} fork_worker_arg_t; + +static void * +fork_worker_thd(void *arg) { + fork_worker_arg_t *worker = (fork_worker_arg_t *)arg; + + bind_thread_arena(worker->arena_ind); + populate_tcache(); + atomic_store_b(&worker->ready, true, ATOMIC_RELEASE); + + while (!atomic_load_b(&worker->release, ATOMIC_ACQUIRE)) { + sleep_ns(1000); + } + return NULL; +} +#endif + +TEST_BEGIN(test_fork_postfork_descriptor_relink_multithreaded) { +#ifndef _WIN32 + test_skip_if(!config_stats); + test_skip_if(!tcache_available(tsd_fetch())); + + unsigned survivor_arena; + unsigned peer_arena; + create_arena(&survivor_arena); + create_arena(&peer_arena); + + bind_thread_arena(survivor_arena); + populate_tcache(); + + fork_worker_arg_t survivor_worker = { + .arena_ind = survivor_arena, + .ready = ATOMIC_INIT(false), + .release = ATOMIC_INIT(false), + }; + fork_worker_arg_t peer_worker = { + .arena_ind = peer_arena, + .ready = ATOMIC_INIT(false), + .release = ATOMIC_INIT(false), + }; + thd_t survivor_thd; + thd_t peer_thd; + thd_create(&survivor_thd, fork_worker_thd, &survivor_worker); + thd_create(&peer_thd, fork_worker_thd, &peer_worker); + + while (!atomic_load_b(&survivor_worker.ready, ATOMIC_ACQUIRE) + || !atomic_load_b(&peer_worker.ready, ATOMIC_ACQUIRE)) { + sleep_ns(1000); + } + + expect_true(refresh_epoch(), "epoch refresh failed"); + + size_t pre_survivor_bytes = 0; + size_t pre_peer_bytes = 0; + unsigned pre_survivor_nthreads = 0; + unsigned pre_peer_nthreads = 0; + expect_d_eq(read_arena_tcache_bytes( + survivor_arena, &pre_survivor_bytes), + 0, "read survivor tcache_bytes pre-fork"); + expect_d_eq(read_arena_tcache_bytes(peer_arena, &pre_peer_bytes), 0, + "read peer tcache_bytes pre-fork"); + expect_d_eq(read_arena_nthreads( + survivor_arena, &pre_survivor_nthreads), + 0, "read survivor nthreads pre-fork"); + expect_d_eq(read_arena_nthreads(peer_arena, &pre_peer_nthreads), 0, + "read peer nthreads pre-fork"); + expect_zu_gt(pre_survivor_bytes, 0, + "Survivor arena should have cached bytes before fork"); + expect_zu_gt(pre_peer_bytes, 0, + "Peer arena should have cached bytes before fork"); + expect_u_eq(pre_survivor_nthreads, 2, + "Survivor arena should have two threads before fork"); + expect_u_eq(pre_peer_nthreads, 1, + "Peer arena should have one thread before fork"); + + pid_t pid = fork(); + if (pid == -1) { + test_fail("Unexpected fork() failure"); + } else if (pid == 0) { + if (!refresh_epoch()) { + _exit(1); + } + + size_t child_survivor_bytes = 0; + size_t child_peer_bytes = 0; + unsigned child_survivor_nthreads = 0; + unsigned child_peer_nthreads = 0; + if (read_arena_tcache_bytes( + survivor_arena, &child_survivor_bytes) != 0) { + _exit(2); + } + if (read_arena_tcache_bytes(peer_arena, &child_peer_bytes) + != 0) { + _exit(3); + } + if (read_arena_nthreads( + survivor_arena, &child_survivor_nthreads) != 0) { + _exit(4); + } + if (read_arena_nthreads(peer_arena, &child_peer_nthreads) + != 0) { + _exit(5); + } + if (child_survivor_nthreads != 1) { + _exit(6); + } + if (child_peer_nthreads != 0) { + _exit(7); + } + if (child_survivor_bytes == 0) { + _exit(8); + } + /* + * Don't compare child_survivor_bytes against pre_survivor_bytes: + * on platforms where jemalloc is the default zone allocator + * (macOS), internal allocations during the postfork handler + * (e.g. pthread_mutex_init) can inflate the surviving thread's + * tcache, making the child's bytes >= the pre-fork total. + */ + if (child_peer_bytes != 0) { + _exit(9); + } + _exit(0); + } else { + wait_for_child_exit(pid); + } + + expect_true(refresh_epoch(), "epoch refresh failed"); + + size_t parent_survivor_bytes = 0; + size_t parent_peer_bytes = 0; + unsigned parent_survivor_nthreads = 0; + unsigned parent_peer_nthreads = 0; + expect_d_eq(read_arena_tcache_bytes( + survivor_arena, &parent_survivor_bytes), + 0, "read survivor tcache_bytes post-fork"); + expect_d_eq(read_arena_tcache_bytes(peer_arena, &parent_peer_bytes), 0, + "read peer tcache_bytes post-fork"); + expect_d_eq(read_arena_nthreads( + survivor_arena, &parent_survivor_nthreads), + 0, "read survivor nthreads post-fork"); + expect_d_eq(read_arena_nthreads(peer_arena, &parent_peer_nthreads), 0, + "read peer nthreads post-fork"); + expect_zu_eq(parent_survivor_bytes, pre_survivor_bytes, + "Parent survivor arena cached bytes should be unchanged"); + expect_zu_eq(parent_peer_bytes, pre_peer_bytes, + "Parent peer arena cached bytes should be unchanged"); + expect_u_eq(parent_survivor_nthreads, 2, + "Parent survivor arena should still have two threads"); + expect_u_eq(parent_peer_nthreads, 1, + "Parent peer arena should still have one thread"); + + atomic_store_b(&survivor_worker.release, true, ATOMIC_RELEASE); + atomic_store_b(&peer_worker.release, true, ATOMIC_RELEASE); + thd_join(survivor_thd, NULL); + thd_join(peer_thd, NULL); +#else + test_skip("fork(2) is irrelevant to Windows"); +#endif +} +TEST_END + +TEST_BEGIN(test_fork_postfork_double_fork) { +#ifndef _WIN32 + test_skip_if(!config_stats); + + unsigned arena_ind; + create_arena(&arena_ind); + bind_thread_arena(arena_ind); + populate_tcache(); + + pid_t pid = fork(); + if (pid == -1) { + test_fail("Unexpected fork() failure"); + } else if (pid == 0) { + /* First child: verify nthreads, then fork again. */ + if (!refresh_epoch()) { + _exit(1); + } + unsigned nthreads = 0; + if (read_arena_nthreads(arena_ind, &nthreads) != 0) { + _exit(2); + } + if (nthreads != 1) { + _exit(3); + } + + pid_t pid2 = fork(); + if (pid2 == -1) { + _exit(4); + } else if (pid2 == 0) { + /* Grandchild: verify nthreads again. */ + if (!refresh_epoch()) { + _exit(1); + } + unsigned gc_nthreads = 0; + if (read_arena_nthreads(arena_ind, &gc_nthreads) + != 0) { + _exit(2); + } + if (gc_nthreads != 1) { + _exit(3); + } + _exit(0); + } else { + wait_for_child_exit(pid2); + } + _exit(0); + } else { + wait_for_child_exit(pid); + } +#else + test_skip("fork(2) is irrelevant to Windows"); +#endif +} +TEST_END + int main(void) { return test_no_reentrancy(test_fork, test_fork_child_usability, - test_fork_multithreaded, test_fork_postfork_descriptor_relink); + test_fork_multithreaded, test_fork_postfork_descriptor_relink, + test_fork_postfork_descriptor_relink_multithreaded, + test_fork_postfork_double_fork); }