Add follow-up test for postfork multithread

Fix FreeBSD postfork child handler never being called: FreeBSD's libthr
calls _malloc_postfork in both parent and child (see freebsd-src
lib/libthr/thread/thr_fork.c), but jemalloc mapped it to the parent
handler only.  Detect the child via getpid() and route to
jemalloc_postfork_child, which resets nthreads and rebuilds the
descriptor queue.

Remove the child_survivor_bytes vs pre_survivor_bytes comparison: on
macOS where jemalloc registers as the default zone, internal allocations
during the postfork handler (pthread_mutex_init) can inflate the
surviving thread's tcache.

Add double-fork test to verify prefork pid is refreshed correctly when a
child process forks again.
This commit is contained in:
Slobodan Predolac 2026-05-14 11:38:41 -07:00
parent 300b58b49b
commit 00f53eb337
2 changed files with 300 additions and 1 deletions

View file

@ -12,6 +12,19 @@
* malloc during fork().
*/
#ifdef JEMALLOC_MUTEX_INIT_CB
/*
* When JEMALLOC_MUTEX_INIT_CB is defined, pthread_atfork registration is
* skipped and the platform calls _malloc_prefork/_malloc_postfork directly.
* FreeBSD's libthr calls _malloc_postfork in both parent and child. Detect
* the child by pid change so we route to jemalloc_postfork_child, which resets
* per-arena state the parent handler does not touch (nthreads, descriptor
* queues). The check is harmless on any platform that only calls
* _malloc_postfork in the parent.
*/
static pid_t jemalloc_prefork_pid;
#endif
#ifndef JEMALLOC_MUTEX_INIT_CB
void
jemalloc_prefork(void)
@ -31,6 +44,10 @@ _malloc_prefork(void)
#endif
assert(malloc_initialized());
#ifdef JEMALLOC_MUTEX_INIT_CB
jemalloc_prefork_pid = getpid();
#endif
tsd = tsd_fetch();
narenas = narenas_total_get();
@ -105,6 +122,10 @@ _malloc_postfork(void)
if (!malloc_initialized()) {
return;
}
if (getpid() != jemalloc_prefork_pid) {
jemalloc_postfork_child();
return;
}
#endif
assert(malloc_initialized());

View file

@ -30,6 +30,60 @@ wait_for_child_exit(int pid) {
}
#endif
#ifndef _WIN32
static void
create_arena(unsigned *arena_ind) {
size_t sz = sizeof(*arena_ind);
expect_d_eq(mallctl("arenas.create", (void *)arena_ind, &sz, NULL, 0),
0, "Unexpected mallctl() failure");
}
static void
bind_thread_arena(unsigned arena_ind) {
unsigned old_arena_ind;
size_t sz = sizeof(old_arena_ind);
expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
(void *)&arena_ind, sizeof(arena_ind)),
0, "Unexpected mallctl() failure");
}
static void
populate_tcache(void) {
void *p[16];
for (size_t i = 0; i < ARRAY_SIZE(p); i++) {
p[i] = malloc(8);
expect_ptr_not_null(p[i], "Unexpected malloc() failure");
}
for (size_t i = 0; i < ARRAY_SIZE(p); i++) {
free(p[i]);
}
}
static bool
refresh_epoch(void) {
uint64_t epoch = 1;
return mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)) == 0;
}
static int
read_arena_tcache_bytes(unsigned arena_ind, size_t *bytes) {
char ctl[64];
malloc_snprintf(ctl, sizeof(ctl),
"stats.arenas.%u.tcache_bytes", arena_ind);
size_t sz = sizeof(*bytes);
return mallctl(ctl, bytes, &sz, NULL, 0);
}
static int
read_arena_nthreads(unsigned arena_ind, unsigned *nthreads) {
char ctl[64];
malloc_snprintf(ctl, sizeof(ctl), "stats.arenas.%u.nthreads",
arena_ind);
size_t sz = sizeof(*nthreads);
return mallctl(ctl, nthreads, &sz, NULL, 0);
}
#endif
TEST_BEGIN(test_fork) {
#ifndef _WIN32
void *p;
@ -292,8 +346,232 @@ TEST_BEGIN(test_fork_postfork_descriptor_relink) {
}
TEST_END
#ifndef _WIN32
typedef struct {
unsigned arena_ind;
atomic_b_t ready;
atomic_b_t release;
} fork_worker_arg_t;
static void *
fork_worker_thd(void *arg) {
fork_worker_arg_t *worker = (fork_worker_arg_t *)arg;
bind_thread_arena(worker->arena_ind);
populate_tcache();
atomic_store_b(&worker->ready, true, ATOMIC_RELEASE);
while (!atomic_load_b(&worker->release, ATOMIC_ACQUIRE)) {
sleep_ns(1000);
}
return NULL;
}
#endif
TEST_BEGIN(test_fork_postfork_descriptor_relink_multithreaded) {
#ifndef _WIN32
test_skip_if(!config_stats);
test_skip_if(!tcache_available(tsd_fetch()));
unsigned survivor_arena;
unsigned peer_arena;
create_arena(&survivor_arena);
create_arena(&peer_arena);
bind_thread_arena(survivor_arena);
populate_tcache();
fork_worker_arg_t survivor_worker = {
.arena_ind = survivor_arena,
.ready = ATOMIC_INIT(false),
.release = ATOMIC_INIT(false),
};
fork_worker_arg_t peer_worker = {
.arena_ind = peer_arena,
.ready = ATOMIC_INIT(false),
.release = ATOMIC_INIT(false),
};
thd_t survivor_thd;
thd_t peer_thd;
thd_create(&survivor_thd, fork_worker_thd, &survivor_worker);
thd_create(&peer_thd, fork_worker_thd, &peer_worker);
while (!atomic_load_b(&survivor_worker.ready, ATOMIC_ACQUIRE)
|| !atomic_load_b(&peer_worker.ready, ATOMIC_ACQUIRE)) {
sleep_ns(1000);
}
expect_true(refresh_epoch(), "epoch refresh failed");
size_t pre_survivor_bytes = 0;
size_t pre_peer_bytes = 0;
unsigned pre_survivor_nthreads = 0;
unsigned pre_peer_nthreads = 0;
expect_d_eq(read_arena_tcache_bytes(
survivor_arena, &pre_survivor_bytes),
0, "read survivor tcache_bytes pre-fork");
expect_d_eq(read_arena_tcache_bytes(peer_arena, &pre_peer_bytes), 0,
"read peer tcache_bytes pre-fork");
expect_d_eq(read_arena_nthreads(
survivor_arena, &pre_survivor_nthreads),
0, "read survivor nthreads pre-fork");
expect_d_eq(read_arena_nthreads(peer_arena, &pre_peer_nthreads), 0,
"read peer nthreads pre-fork");
expect_zu_gt(pre_survivor_bytes, 0,
"Survivor arena should have cached bytes before fork");
expect_zu_gt(pre_peer_bytes, 0,
"Peer arena should have cached bytes before fork");
expect_u_eq(pre_survivor_nthreads, 2,
"Survivor arena should have two threads before fork");
expect_u_eq(pre_peer_nthreads, 1,
"Peer arena should have one thread before fork");
pid_t pid = fork();
if (pid == -1) {
test_fail("Unexpected fork() failure");
} else if (pid == 0) {
if (!refresh_epoch()) {
_exit(1);
}
size_t child_survivor_bytes = 0;
size_t child_peer_bytes = 0;
unsigned child_survivor_nthreads = 0;
unsigned child_peer_nthreads = 0;
if (read_arena_tcache_bytes(
survivor_arena, &child_survivor_bytes) != 0) {
_exit(2);
}
if (read_arena_tcache_bytes(peer_arena, &child_peer_bytes)
!= 0) {
_exit(3);
}
if (read_arena_nthreads(
survivor_arena, &child_survivor_nthreads) != 0) {
_exit(4);
}
if (read_arena_nthreads(peer_arena, &child_peer_nthreads)
!= 0) {
_exit(5);
}
if (child_survivor_nthreads != 1) {
_exit(6);
}
if (child_peer_nthreads != 0) {
_exit(7);
}
if (child_survivor_bytes == 0) {
_exit(8);
}
/*
* Don't compare child_survivor_bytes against pre_survivor_bytes:
* on platforms where jemalloc is the default zone allocator
* (macOS), internal allocations during the postfork handler
* (e.g. pthread_mutex_init) can inflate the surviving thread's
* tcache, making the child's bytes >= the pre-fork total.
*/
if (child_peer_bytes != 0) {
_exit(9);
}
_exit(0);
} else {
wait_for_child_exit(pid);
}
expect_true(refresh_epoch(), "epoch refresh failed");
size_t parent_survivor_bytes = 0;
size_t parent_peer_bytes = 0;
unsigned parent_survivor_nthreads = 0;
unsigned parent_peer_nthreads = 0;
expect_d_eq(read_arena_tcache_bytes(
survivor_arena, &parent_survivor_bytes),
0, "read survivor tcache_bytes post-fork");
expect_d_eq(read_arena_tcache_bytes(peer_arena, &parent_peer_bytes), 0,
"read peer tcache_bytes post-fork");
expect_d_eq(read_arena_nthreads(
survivor_arena, &parent_survivor_nthreads),
0, "read survivor nthreads post-fork");
expect_d_eq(read_arena_nthreads(peer_arena, &parent_peer_nthreads), 0,
"read peer nthreads post-fork");
expect_zu_eq(parent_survivor_bytes, pre_survivor_bytes,
"Parent survivor arena cached bytes should be unchanged");
expect_zu_eq(parent_peer_bytes, pre_peer_bytes,
"Parent peer arena cached bytes should be unchanged");
expect_u_eq(parent_survivor_nthreads, 2,
"Parent survivor arena should still have two threads");
expect_u_eq(parent_peer_nthreads, 1,
"Parent peer arena should still have one thread");
atomic_store_b(&survivor_worker.release, true, ATOMIC_RELEASE);
atomic_store_b(&peer_worker.release, true, ATOMIC_RELEASE);
thd_join(survivor_thd, NULL);
thd_join(peer_thd, NULL);
#else
test_skip("fork(2) is irrelevant to Windows");
#endif
}
TEST_END
TEST_BEGIN(test_fork_postfork_double_fork) {
#ifndef _WIN32
test_skip_if(!config_stats);
unsigned arena_ind;
create_arena(&arena_ind);
bind_thread_arena(arena_ind);
populate_tcache();
pid_t pid = fork();
if (pid == -1) {
test_fail("Unexpected fork() failure");
} else if (pid == 0) {
/* First child: verify nthreads, then fork again. */
if (!refresh_epoch()) {
_exit(1);
}
unsigned nthreads = 0;
if (read_arena_nthreads(arena_ind, &nthreads) != 0) {
_exit(2);
}
if (nthreads != 1) {
_exit(3);
}
pid_t pid2 = fork();
if (pid2 == -1) {
_exit(4);
} else if (pid2 == 0) {
/* Grandchild: verify nthreads again. */
if (!refresh_epoch()) {
_exit(1);
}
unsigned gc_nthreads = 0;
if (read_arena_nthreads(arena_ind, &gc_nthreads)
!= 0) {
_exit(2);
}
if (gc_nthreads != 1) {
_exit(3);
}
_exit(0);
} else {
wait_for_child_exit(pid2);
}
_exit(0);
} else {
wait_for_child_exit(pid);
}
#else
test_skip("fork(2) is irrelevant to Windows");
#endif
}
TEST_END
int
main(void) {
return test_no_reentrancy(test_fork, test_fork_child_usability,
test_fork_multithreaded, test_fork_postfork_descriptor_relink);
test_fork_multithreaded, test_fork_postfork_descriptor_relink,
test_fork_postfork_descriptor_relink_multithreaded,
test_fork_postfork_double_fork);
}