From b92420d309eac59f744a736a5c9c83bf45dc569d Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Fri, 8 May 2026 10:01:00 -0700 Subject: [PATCH] Replace arena->tcache_ql with cache_bin_array_descriptor_ql walks Drop the duplicate arena->tcache_ql; stats merging walks the cache_bin_array_descriptor_ql directly. Rename the protecting mutex from tcache_ql_mtx to cache_bin_array_descriptor_ql_mtx to match. Add an assertion in test_thread_migrate_arena that the dissociate-time flush zeros cache_bin->tstats.nrequests. --- include/jemalloc/internal/arena_structs.h | 11 ++-- include/jemalloc/internal/tcache_externs.h | 3 +- include/jemalloc/internal/tcache_structs.h | 3 - include/jemalloc/internal/witness.h | 2 +- src/arena.c | 19 +++--- src/ctl.c | 2 +- src/jemalloc_init.c | 14 ++-- src/tcache.c | 42 ++++++------ test/unit/arenas_management.c | 77 ++++++++++++++++++++++ 9 files changed, 122 insertions(+), 51 deletions(-) diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h index 471f7692..77777f1d 100644 --- a/include/jemalloc/internal/arena_structs.h +++ b/include/jemalloc/internal/arena_structs.h @@ -48,15 +48,14 @@ struct arena_s { arena_stats_t stats; /* - * Lists of tcaches and cache_bin_array_descriptors for extant threads - * associated with this arena. Stats from these are merged - * incrementally, and at exit if opt_stats_print is enabled. + * List of cache_bin_array_descriptors for extant threads associated + * with this arena. Stats from these are merged incrementally, and at + * exit if opt_stats_print is enabled. * - * Synchronization: tcache_ql_mtx. + * Synchronization: cache_bin_array_descriptor_ql_mtx. */ - ql_head(tcache_slow_t) tcache_ql; ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; - malloc_mutex_t tcache_ql_mtx; + malloc_mutex_t cache_bin_array_descriptor_ql_mtx; /* * Represents a dss_prec_t, but atomically. diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h index 97024b2b..59ef724e 100644 --- a/include/jemalloc/internal/tcache_externs.h +++ b/include/jemalloc/internal/tcache_externs.h @@ -66,7 +66,8 @@ void tcache_arena_reassociate( tcache_t *tcache_create_explicit(tsd_t *tsd); bool thread_tcache_max_set(tsd_t *tsd, size_t tcache_max); void tcache_cleanup(tsd_t *tsd); -void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +void tcache_stats_merge(tsdn_t *tsdn, + cache_bin_array_descriptor_t *desc, arena_t *arena); bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); void tcaches_flush(tsd_t *tsd, unsigned ind); void tcaches_destroy(tsd_t *tsd, unsigned ind); diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h index 2c000de3..710286c9 100644 --- a/include/jemalloc/internal/tcache_structs.h +++ b/include/jemalloc/internal/tcache_structs.h @@ -19,9 +19,6 @@ */ struct tcache_slow_s { - /* Lets us track all the tcaches in an arena. */ - ql_elm(tcache_slow_t) link; - /* * The descriptor lets the arena find our cache bins without seeing the * tcache definition. This enables arenas to aggregate stats across diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 0a426ff5..241178b1 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -44,7 +44,7 @@ enum witness_rank_e { */ WITNESS_RANK_CORE, WITNESS_RANK_DECAY = WITNESS_RANK_CORE, - WITNESS_RANK_TCACHE_QL, + WITNESS_RANK_CACHE_BIN_ARRAY_DESCRIPTOR_QL, WITNESS_RANK_SEC_BIN, diff --git a/src/arena.c b/src/arena.c index a76b26e8..6c55f2ab 100644 --- a/src/arena.c +++ b/src/arena.c @@ -164,7 +164,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, /* Currently cached bytes and sanitizer-stashed bytes in tcache. */ astats->tcache_bytes = 0; astats->tcache_stashed_bytes = 0; - malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + malloc_mutex_lock(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); cache_bin_array_descriptor_t *descriptor; ql_foreach (descriptor, &arena->cache_bin_array_descriptor_ql, link) { for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) { @@ -183,8 +183,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, } malloc_mutex_prof_read(tsdn, &astats->mutex_prof_data[arena_prof_mutex_tcache_list], - &arena->tcache_ql_mtx); - malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); + &arena->cache_bin_array_descriptor_ql_mtx); + malloc_mutex_unlock(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); #define READ_ARENA_MUTEX_PROF_DATA(mtx, ind) \ malloc_mutex_lock(tsdn, &arena->mtx); \ @@ -1809,10 +1809,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) { goto label_error; } - ql_new(&arena->tcache_ql); ql_new(&arena->cache_bin_array_descriptor_ql); - if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql", - WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) { + if (malloc_mutex_init(&arena->cache_bin_array_descriptor_ql_mtx, + "cache_bin_array_descriptor_ql", + WITNESS_RANK_CACHE_BIN_ARRAY_DESCRIPTOR_QL, + malloc_mutex_rank_exclusive)) { goto label_error; } } @@ -2022,7 +2023,7 @@ arena_prefork0(tsdn_t *tsdn, arena_t *arena) { void arena_prefork1(tsdn_t *tsdn, arena_t *arena) { if (config_stats) { - malloc_mutex_prefork(tsdn, &arena->tcache_ql_mtx); + malloc_mutex_prefork(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); } } @@ -2075,7 +2076,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { base_postfork_parent(tsdn, arena->base); pa_shard_postfork_parent(tsdn, &arena->pa_shard); if (config_stats) { - malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx); + malloc_mutex_postfork_parent(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); } } @@ -2100,6 +2101,6 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { base_postfork_child(tsdn, arena->base); pa_shard_postfork_child(tsdn, &arena->pa_shard); if (config_stats) { - malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx); + malloc_mutex_postfork_child(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); } } diff --git a/src/ctl.c b/src/ctl.c index 200f0793..c0057bd3 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -3974,7 +3974,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_pinned.mtx); MUTEX_PROF_RESET(arena->pa_shard.pac.decay_dirty.mtx); MUTEX_PROF_RESET(arena->pa_shard.pac.decay_muzzy.mtx); - MUTEX_PROF_RESET(arena->tcache_ql_mtx); + MUTEX_PROF_RESET(arena->cache_bin_array_descriptor_ql_mtx); MUTEX_PROF_RESET(arena->base->mtx); for (szind_t j = 0; j < SC_NBINS; j++) { diff --git a/src/jemalloc_init.c b/src/jemalloc_init.c index 0f7d0a4a..72b204bd 100644 --- a/src/jemalloc_init.c +++ b/src/jemalloc_init.c @@ -305,16 +305,16 @@ stats_print_atexit(void) { for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena = arena_get(tsdn, i, false); if (arena != NULL) { - tcache_slow_t *tcache_slow; + cache_bin_array_descriptor_t *desc; - malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); - ql_foreach ( - tcache_slow, &arena->tcache_ql, link) { - tcache_stats_merge( - tsdn, tcache_slow->tcache, arena); + malloc_mutex_lock(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); + ql_foreach (desc, + &arena->cache_bin_array_descriptor_ql, + link) { + tcache_stats_merge(tsdn, desc, arena); } malloc_mutex_unlock( - tsdn, &arena->tcache_ql_mtx); + tsdn, &arena->cache_bin_array_descriptor_ql_mtx); } } } diff --git a/src/tcache.c b/src/tcache.c index c843dee7..db285b1b 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -721,15 +721,12 @@ tcache_bin_ncached_max_read( } /* - * Insert tcache_slow into arena's tcache list. Caller must hold - * arena->tcache_ql_mtx, OR be in the postfork-child path (single-threaded, - * mutex re-init pending). config_stats must be true. + * Caller must hold arena->cache_bin_array_descriptor_ql_mtx, OR be in the postfork-child path + * (single-threaded, mutex re-init pending). */ static void tcache_arena_link(arena_t *arena, tcache_slow_t *tcache_slow, tcache_t *tcache) { - ql_elm_new(tcache_slow, link); - ql_tail_insert(&arena->tcache_ql, tcache_slow, link); cache_bin_array_descriptor_init( &tcache_slow->cache_bin_array_descriptor, tcache->bins); ql_tail_insert(&arena->cache_bin_array_descriptor_ql, @@ -743,9 +740,9 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_slow->arena = arena; if (config_stats) { - malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + malloc_mutex_lock(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); tcache_arena_link(arena, tcache_slow, tcache); - malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); + malloc_mutex_unlock(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); } } @@ -755,24 +752,25 @@ tcache_arena_dissociate( arena_t *arena = tcache_slow->arena; assert(arena != NULL); if (config_stats) { - /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + malloc_mutex_lock(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); if (config_debug) { - bool in_ql = false; - tcache_slow_t *iter; - ql_foreach (iter, &arena->tcache_ql, link) { - if (iter == tcache_slow) { + bool in_ql = false; + cache_bin_array_descriptor_t *iter; + ql_foreach (iter, + &arena->cache_bin_array_descriptor_ql, link) { + if (iter == + &tcache_slow->cache_bin_array_descriptor) { in_ql = true; break; } } assert(in_ql); } - ql_remove(&arena->tcache_ql, tcache_slow, link); ql_remove(&arena->cache_bin_array_descriptor_ql, &tcache_slow->cache_bin_array_descriptor, link); - tcache_stats_merge(tsdn, tcache_slow->tcache, arena); - malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); + tcache_stats_merge(tsdn, + &tcache_slow->cache_bin_array_descriptor, arena); + malloc_mutex_unlock(tsdn, &arena->cache_bin_array_descriptor_ql_mtx); } tcache_slow->arena = NULL; } @@ -789,7 +787,6 @@ tcache_arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { if (!config_stats) { return; } - ql_new(&arena->tcache_ql); ql_new(&arena->cache_bin_array_descriptor_ql); tcache_slow_t *tcache_slow = tcache_slow_get(tsdn_tsd(tsdn)); if (tcache_slow != NULL && tcache_slow->arena == arena) { @@ -811,7 +808,6 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, void *mem, tcache->tcache_slow = tcache_slow; tcache_slow->tcache = tcache; - memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t))); nstime_init_zero(&tcache_slow->last_gc_time); tcache_slow->next_gc_bin = 0; tcache_slow->next_gc_bin_small = 0; @@ -1277,13 +1273,13 @@ tcache_cleanup(tsd_t *tsd) { } void -tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { +tcache_stats_merge(tsdn_t *tsdn, cache_bin_array_descriptor_t *desc, + arena_t *arena) { cassert(config_stats); - /* Merge and reset tcache stats. */ - for (unsigned i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) { - cache_bin_t *cache_bin = &tcache->bins[i]; - if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) { + for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) { + cache_bin_t *cache_bin = &desc->bins[i]; + if (cache_bin_disabled(cache_bin)) { continue; } if (i < SC_NBINS) { diff --git a/test/unit/arenas_management.c b/test/unit/arenas_management.c index b5861b4e..dc464ee7 100644 --- a/test/unit/arenas_management.c +++ b/test/unit/arenas_management.c @@ -111,6 +111,24 @@ migrate_worker(void *unused) { expect_ptr_not_null(a1, "arena1 should exist"); expect_ptr_not_null(a2, "arena2 should exist"); + /* + * Populate cache_bin tstats with explicit small allocs so the + * migrate's flush has something to merge. + */ + szind_t test_binind = sz_size2index(8); + if (config_stats && tcache_available(tsd)) { + void *p[16]; + for (size_t i = 0; i < ARRAY_SIZE(p); i++) { + p[i] = malloc(8); + } + for (size_t i = 0; i < ARRAY_SIZE(p); i++) { + free(p[i]); + } + cache_bin_t *cb = &tsd_tcachep_get(tsd)->bins[test_binind]; + expect_u64_gt(cb->tstats.nrequests, 0, + "Small allocs should accumulate cache_bin tstats"); + } + thread_migrate_arena(tsd, a1, a2); expect_ptr_eq( @@ -121,6 +139,65 @@ migrate_worker(void *unused) { "tcache should be reassociated with newarena"); } + if (config_stats && tcache_available(tsd)) { + cache_bin_t *cb = &tsd_tcachep_get(tsd)->bins[test_binind]; + expect_u64_eq(cb->tstats.nrequests, 0, + "cache_bin tstats should be 0 after migrate flush"); + } + + /* + * Symmetric check: post-migrate allocations should accumulate against + * a2, not a1. Refresh stats, allocate N items, refresh again, and + * verify a2's bin nrequests grew while a1's did not. + */ + if (config_stats && tcache_available(tsd)) { + char ctl_a1[64], ctl_a2[64]; + uint64_t a1_pre, a2_pre, a1_post, a2_post; + size_t sz_u64 = sizeof(uint64_t); + uint64_t epoch = 1; + + malloc_snprintf(ctl_a1, sizeof(ctl_a1), + "stats.arenas.%u.bins.%u.nrequests", + migrate_a1_ind, test_binind); + malloc_snprintf(ctl_a2, sizeof(ctl_a2), + "stats.arenas.%u.bins.%u.nrequests", + migrate_a2_ind, test_binind); + + expect_d_eq(mallctl("epoch", NULL, NULL, &epoch, + sizeof(uint64_t)), 0, "epoch refresh"); + expect_d_eq(mallctl(ctl_a1, &a1_pre, &sz_u64, NULL, 0), 0, + "read a1 nrequests baseline"); + expect_d_eq(mallctl(ctl_a2, &a2_pre, &sz_u64, NULL, 0), 0, + "read a2 nrequests baseline"); + + void *p[24]; + for (size_t i = 0; i < ARRAY_SIZE(p); i++) { + p[i] = malloc(8); + } + for (size_t i = 0; i < ARRAY_SIZE(p); i++) { + free(p[i]); + } + + /* + * Flushing the tcache merges cache_bin tstats into the arena's + * bin stats (epoch refresh alone does not). + */ + expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, + 0), 0, "thread.tcache.flush"); + + expect_d_eq(mallctl("epoch", NULL, NULL, &epoch, + sizeof(uint64_t)), 0, "epoch refresh"); + expect_d_eq(mallctl(ctl_a1, &a1_post, &sz_u64, NULL, 0), 0, + "read a1 nrequests post"); + expect_d_eq(mallctl(ctl_a2, &a2_post, &sz_u64, NULL, 0), 0, + "read a2 nrequests post"); + + expect_u64_eq(a1_post, a1_pre, + "a1 nrequests should be unchanged by post-migrate allocs"); + expect_u64_ge(a2_post - a2_pre, (uint64_t)ARRAY_SIZE(p), + "a2 nrequests should reflect post-migrate allocs"); + } + atomic_store_b(&migrate_done, true, ATOMIC_RELEASE); while (!atomic_load_b(&migrate_go_exit, ATOMIC_ACQUIRE)) {