From 6b13adf375964b44ab790c7ea2ffe267af1c6a06 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 19 May 2026 00:11:15 -0700 Subject: [PATCH] Use SEC in PAC to reduce lock contention on the ecaches Add a small extent cache in front of the PAC ecaches. Allocs and dallocs that fit are served from per-shard SEC bins without taking the ecache mutex; overflow falls through to the backing ecaches, including ecache_pinned for pinned extents. The feature is gated behind experimental_pac_sec_nshards (default 0, disabled). To support independent HPA and PAC SEC instances, sec_alloc/sec_dalloc/sec_fill take an explicit shard argument, with HPA and PAC using separate TSD shard slots. --- Makefile.in | 1 + .../internal/jemalloc_internal_externs.h | 1 + include/jemalloc/internal/mutex_prof.h | 3 +- include/jemalloc/internal/pac.h | 12 + include/jemalloc/internal/sec.h | 14 +- include/jemalloc/internal/tsd_internals.h | 2 + src/conf.c | 11 + src/ctl.c | 41 ++ src/hpa.c | 21 +- src/jemalloc.c | 3 + src/pa.c | 1 + src/pa_extra.c | 9 + src/pac.c | 90 ++++- src/sec.c | 47 ++- src/stats.c | 38 ++ test/unit/json_stats.c | 2 +- test/unit/mallctl.c | 3 + test/unit/pac_sec_integration.c | 368 ++++++++++++++++++ test/unit/sec.c | 72 ++-- 19 files changed, 680 insertions(+), 59 deletions(-) create mode 100644 test/unit/pac_sec_integration.c diff --git a/Makefile.in b/Makefile.in index f939350f..db03a3d0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -266,6 +266,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/ncached_max.c \ $(srcroot)test/unit/oversize_threshold.c \ $(srcroot)test/unit/pa.c \ + $(srcroot)test/unit/pac_sec_integration.c \ $(srcroot)test/unit/pack.c \ $(srcroot)test/unit/pages.c \ $(srcroot)test/unit/peak.c \ diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index b5b12e91..6203e0d6 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -22,6 +22,7 @@ extern bool opt_confirm_conf; extern bool opt_hpa; extern hpa_shard_opts_t opt_hpa_opts; extern sec_opts_t opt_hpa_sec_opts; +extern sec_opts_t opt_pac_sec_opts; extern const char *opt_junk; extern bool opt_junk_alloc; diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h index 37f6a377..590b46c3 100644 --- a/include/jemalloc/internal/mutex_prof.h +++ b/include/jemalloc/internal/mutex_prof.h @@ -37,7 +37,8 @@ typedef enum { OP(tcache_list) \ OP(hpa_shard) \ OP(hpa_shard_grow) \ - OP(hpa_sec) + OP(hpa_sec) \ + OP(pac_sec) typedef enum { #define OP(mtx) arena_prof_mutex_##mtx, diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h index dc16dc1c..a031c7b6 100644 --- a/include/jemalloc/internal/pac.h +++ b/include/jemalloc/internal/pac.h @@ -8,6 +8,7 @@ #include "jemalloc/internal/edata_cache.h" #include "jemalloc/internal/exp_grow.h" #include "jemalloc/internal/lockedint.h" +#include "jemalloc/internal/sec.h" #include "jemalloc/internal/tsd_types.h" #include "san_bump.h" @@ -84,12 +85,21 @@ struct pac_stats_s { /* VM space had to be leaked (undocumented). Normally 0. */ atomic_zu_t abandoned_vm; + + /* PAC SEC stats. Derived. */ + sec_stats_t pac_sec_stats; }; typedef struct pac_s pac_t; struct pac_s { + /* Small extent cache in front of PAC ecaches to reduce contention. */ + sec_t sec; + /* 0 disables PAC SEC; otherwise max size SEC will cache. */ + atomic_zu_t sec_max_alloc; + /* True once pinned memory has been seen. */ atomic_b_t has_pinned; + /* * Collections of extents that were previously allocated. These are * used when allocating extents, in an attempt to re-use address space. @@ -237,4 +247,6 @@ ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state); void pac_reset(tsdn_t *tsdn, pac_t *pac); void pac_destroy(tsdn_t *tsdn, pac_t *pac); +void pac_sec_flush(tsdn_t *tsdn, pac_t *pac); + #endif /* JEMALLOC_INTERNAL_PAC_H */ diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h index 2a7f7238..e76ca9df 100644 --- a/include/jemalloc/internal/sec.h +++ b/include/jemalloc/internal/sec.h @@ -111,10 +111,17 @@ sec_size_supported(sec_t *sec, size_t size) { void sec_calc_nallocs_for_size( sec_t *sec, size_t size, size_t *min_nallocs, size_t *max_nallocs); +/* + * Lazily picks (and caches in *idxp) a shard for the calling thread. Different + * SEC instances pass independent per-thread uint8_t slots, initialized to + * (uint8_t)-1. + */ +uint8_t sec_shard_pick(tsd_t *tsd, sec_t *sec, uint8_t *idxp); + /* If sec does not have extent available, it will return NULL. */ -edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size); +edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size, uint8_t shard); void sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, - edata_list_active_t *result, size_t nallocs); + edata_list_active_t *result, size_t nallocs, uint8_t shard); /* * Upon return dalloc_list may be empty if edata is consumed by sec or non-empty @@ -124,7 +131,8 @@ void sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, * considered "hot" and preserved in the cache, while "colder" ones are * returned). */ -void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list); +void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list, + uint8_t shard); bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts); diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h index 46b4930f..8d2a675a 100644 --- a/include/jemalloc/internal/tsd_internals.h +++ b/include/jemalloc/internal/tsd_internals.h @@ -76,6 +76,7 @@ typedef void (*test_callback_t)(int *); O(arena, arena_t *, arena_t *) \ O(arena_decay_ticker, ticker_geom_t, ticker_geom_t) \ O(sec_shard, uint8_t, uint8_t) \ + O(pac_sec_shard, uint8_t, uint8_t) \ O(binshards, tsd_binshards_t, tsd_binshards_t) \ O(peak, peak_t, peak_t) \ O(tcache_slow, tcache_slow_t, tcache_slow_t) \ @@ -95,6 +96,7 @@ typedef void (*test_callback_t)(int *); /* arena */ NULL, /* arena_decay_ticker */ \ TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE), \ /* sec_shard */ (uint8_t) - 1, \ + /* pac_sec_shard */ (uint8_t) - 1, \ /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER, \ /* peak */ PEAK_INITIALIZER, /* tcache_slow */ \ TCACHE_SLOW_ZERO_INITIALIZER, \ diff --git a/src/conf.c b/src/conf.c index ecef73f5..869ad432 100644 --- a/src/conf.c +++ b/src/conf.c @@ -953,6 +953,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes, "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); + CONF_HANDLE_SIZE_T(opt_pac_sec_opts.nshards, + "experimental_pac_sec_nshards", 0, 0, + CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); + CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_alloc, + "experimental_pac_sec_max_alloc", PAGE, + USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN, + CONF_CHECK_MAX, true); + CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_bytes, + "experimental_pac_sec_max_bytes", + SEC_OPTS_MAX_BYTES_DEFAULT, 0, + CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); if (CONF_MATCH("slab_sizes")) { if (CONF_MATCH_VALUE("default")) { diff --git a/src/ctl.c b/src/ctl.c index e048135a..9c28ce27 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -115,6 +115,9 @@ CTL_PROTO(opt_hpa_dirty_mult) CTL_PROTO(opt_hpa_sec_nshards) CTL_PROTO(opt_hpa_sec_max_alloc) CTL_PROTO(opt_hpa_sec_max_bytes) +CTL_PROTO(opt_experimental_pac_sec_nshards) +CTL_PROTO(opt_experimental_pac_sec_max_alloc) +CTL_PROTO(opt_experimental_pac_sec_max_bytes) CTL_PROTO(opt_huge_arena_pac_thp) CTL_PROTO(opt_metadata_thp) CTL_PROTO(opt_retain) @@ -352,6 +355,11 @@ CTL_PROTO(stats_arenas_i_hpa_sec_misses) CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush) CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush) CTL_PROTO(stats_arenas_i_hpa_sec_overfills) +CTL_PROTO(stats_arenas_i_pac_sec_bytes) +CTL_PROTO(stats_arenas_i_pac_sec_hits) +CTL_PROTO(stats_arenas_i_pac_sec_misses) +CTL_PROTO(stats_arenas_i_pac_sec_dalloc_flush) +CTL_PROTO(stats_arenas_i_pac_sec_dalloc_noflush) INDEX_PROTO(stats_arenas_i) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) @@ -495,6 +503,12 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)}, {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)}, {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)}, {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)}, + {NAME("experimental_pac_sec_nshards"), + CTL(opt_experimental_pac_sec_nshards)}, + {NAME("experimental_pac_sec_max_alloc"), + CTL(opt_experimental_pac_sec_max_alloc)}, + {NAME("experimental_pac_sec_max_bytes"), + CTL(opt_experimental_pac_sec_max_bytes)}, {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)}, {NAME("metadata_thp"), CTL(opt_metadata_thp)}, {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)}, @@ -859,6 +873,12 @@ static const ctl_named_node_t stats_arenas_i_node[] = { CTL(stats_arenas_i_hpa_sec_dalloc_noflush)}, {NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)}, {NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)}, + {NAME("pac_sec_bytes"), CTL(stats_arenas_i_pac_sec_bytes)}, + {NAME("pac_sec_hits"), CTL(stats_arenas_i_pac_sec_hits)}, + {NAME("pac_sec_misses"), CTL(stats_arenas_i_pac_sec_misses)}, + {NAME("pac_sec_dalloc_noflush"), + CTL(stats_arenas_i_pac_sec_dalloc_noflush)}, + {NAME("pac_sec_dalloc_flush"), CTL(stats_arenas_i_pac_sec_dalloc_flush)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, @@ -1219,6 +1239,10 @@ ctl_arena_stats_sdmerge( &sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm, &astats->astats.pa_shard_stats.pac_stats.abandoned_vm); + sec_stats_accum( + &sdstats->astats.pa_shard_stats.pac_stats.pac_sec_stats, + &astats->astats.pa_shard_stats.pac_stats.pac_sec_stats); + sdstats->astats.tcache_bytes += astats->astats.tcache_bytes; sdstats->astats.tcache_stashed_bytes += astats->astats.tcache_stashed_bytes; @@ -2208,6 +2232,12 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t) CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t) CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t) CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t) +CTL_RO_NL_GEN(opt_experimental_pac_sec_nshards, + opt_pac_sec_opts.nshards, size_t) +CTL_RO_NL_GEN(opt_experimental_pac_sec_max_alloc, + opt_pac_sec_opts.max_alloc, size_t) +CTL_RO_NL_GEN(opt_experimental_pac_sec_max_bytes, + opt_pac_sec_opts.max_bytes, size_t) CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool) CTL_RO_NL_GEN( opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *) @@ -3881,6 +3911,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills, arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_bytes, + arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.bytes, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_hits, + arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nhits, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_misses, + arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nmisses, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_flush, + arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_flush, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_noflush, + arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_noflush, size_t) + CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, arenas_i(mib[2])->astats->allocated_small, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, diff --git a/src/hpa.c b/src/hpa.c index d59b7fc7..fde95afa 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -16,6 +16,18 @@ const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"}; bool opt_experimental_hpa_start_huge_if_thp_always = true; bool opt_experimental_hpa_enforce_hugify = false; +static inline uint8_t +hpa_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { + if (sec->opts.nshards <= 1) { + return 0; + } + if (tsdn_null(tsdn)) { + return 0; + } + tsd_t *tsd = tsdn_tsd(tsdn); + return sec_shard_pick(tsd, sec, tsd_sec_shardp_get(tsd)); +} + bool hpa_hugepage_size_exceeds_limit(void) { return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE; @@ -947,7 +959,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment, && (size > shard->opts.slab_max_alloc)) { return NULL; } - edata_t *edata = sec_alloc(tsdn, &shard->sec, size); + edata_t *edata = sec_alloc(tsdn, &shard->sec, size, + hpa_sec_shard_pick(tsdn, &shard->sec)); if (edata != NULL) { return edata; } @@ -968,7 +981,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment, } if (nsuccess > 0) { assert(sec_size_supported(&shard->sec, size)); - sec_fill(tsdn, &shard->sec, size, &results, nsuccess); + sec_fill(tsdn, &shard->sec, size, &results, nsuccess, + hpa_sec_shard_pick(tsdn, &shard->sec)); /* Unlikely rollback in case of overfill */ if (!edata_list_active_empty(&results)) { hpa_dalloc_batch( @@ -1075,7 +1089,8 @@ hpa_dalloc(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata, edata_list_active_init(&dalloc_list); edata_list_active_append(&dalloc_list, edata); - sec_dalloc(tsdn, &shard->sec, &dalloc_list); + sec_dalloc(tsdn, &shard->sec, &dalloc_list, + hpa_sec_shard_pick(tsdn, &shard->sec)); if (edata_list_active_empty(&dalloc_list)) { /* sec consumed the pointer */ *deferred_work_generated = false; diff --git a/src/jemalloc.c b/src/jemalloc.c index 6544657d..7378f946 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -181,6 +181,9 @@ size_t opt_calloc_madvise_threshold = CALLOC_MADVISE_THRESHOLD_DEFAULT; bool opt_hpa = false; hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT; sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT; +sec_opts_t opt_pac_sec_opts = {0, + (32 * 1024) > (PAGE * 2) ? (32 * 1024) : (PAGE * 2), + SEC_OPTS_MAX_BYTES_DEFAULT}; /* False should be the common case. Set to true to trigger initialization. */ bool malloc_slow = true; diff --git a/src/pa.c b/src/pa.c index f14fda81..44f89700 100644 --- a/src/pa.c +++ b/src/pa.c @@ -94,6 +94,7 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) { void pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) { + pac_sec_flush(tsdn, &shard->pac); if (shard->ever_used_hpa) { hpa_shard_flush(tsdn, &shard->hpa); } diff --git a/src/pa_extra.c b/src/pa_extra.c index 17b4449a..81989ebe 100644 --- a/src/pa_extra.c +++ b/src/pa_extra.c @@ -16,6 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) { void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) { + sec_prefork2(tsdn, &shard->pac.sec); if (shard->ever_used_hpa) { hpa_shard_prefork2(tsdn, &shard->hpa); } @@ -53,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) { ecache_postfork_parent(tsdn, &shard->pac.ecache_retained); ecache_postfork_parent(tsdn, &shard->pac.ecache_pinned); malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx); + sec_postfork_parent(tsdn, &shard->pac.sec); malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx); if (shard->ever_used_hpa) { @@ -68,6 +70,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) { ecache_postfork_child(tsdn, &shard->pac.ecache_retained); ecache_postfork_child(tsdn, &shard->pac.ecache_pinned); malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx); + sec_postfork_child(tsdn, &shard->pac.sec); malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx); if (shard->ever_used_hpa) { @@ -179,6 +182,9 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, if (shard->ever_used_hpa) { hpa_shard_stats_merge(tsdn, &shard->hpa, hpa_stats_out); } + + sec_stats_merge(tsdn, &shard->pac.sec, + &pa_shard_stats_out->pac_stats.pac_sec_stats); } static void @@ -207,6 +213,9 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard, pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data, &shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy); + sec_mutex_stats_read(tsdn, &shard->pac.sec, + &mutex_prof_data[arena_prof_mutex_pac_sec]); + if (shard->ever_used_hpa) { pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data, &shard->hpa.mtx, arena_prof_mutex_hpa_shard); diff --git a/src/pac.c b/src/pac.c index aab2bb1e..f59d2819 100644 --- a/src/pac.c +++ b/src/pac.c @@ -4,6 +4,18 @@ #include "jemalloc/internal/pac.h" #include "jemalloc/internal/san.h" +static inline uint8_t +pac_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { + if (sec->opts.nshards <= 1) { + return 0; + } + if (tsdn_null(tsdn)) { + return 0; + } + tsd_t *tsd = tsdn_tsd(tsdn); + return sec_shard_pick(tsd, sec, tsd_pac_sec_shardp_get(tsd)); +} + static inline void pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) { @@ -92,6 +104,17 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap, pac->stats_mtx = stats_mtx; atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED); + if (sec_init(tsdn, &pac->sec, base, &opt_pac_sec_opts)) { + /* Fall back to no SEC on allocation failure. */ + pac->sec.opts.nshards = 0; + } + if (!sec_is_used(&pac->sec) || dirty_decay_ms == 0) { + atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED); + } else { + atomic_store_zu(&pac->sec_max_alloc, + pac->sec.opts.max_alloc, ATOMIC_RELAXED); + } + return false; } @@ -133,6 +156,15 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, edata_t *edata = NULL; + if (!guarded && !zero && alignment <= PAGE + && size <= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) { + edata = sec_alloc(tsdn, &pac->sec, size, + pac_sec_shard_pick(tsdn, &pac->sec)); + if (edata != NULL) { + return edata; + } + } + /* * Guarded allocations need surrounding guard pages, which the pinned * pool does not maintain; skip ecache_pinned in that case. @@ -395,6 +427,32 @@ pac_dalloc(tsdn_t *tsdn, pac_t *pac, edata_t *edata, san_unguard_pages_two_sided( tsdn, ehooks, edata, pac->emap); } + } else if (edata_size_get(edata) + <= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) { + edata_zeroed_set(edata, false); + edata_list_active_t dalloc_list; + edata_list_active_init(&dalloc_list); + edata_list_active_append(&dalloc_list, edata); + sec_dalloc(tsdn, &pac->sec, &dalloc_list, + pac_sec_shard_pick(tsdn, &pac->sec)); + if (edata_list_active_empty(&dalloc_list)) { + *deferred_work_generated = false; + return; + } + /* Flush overflow extents to their backing ecaches. */ + bool any_deferred_work = false; + edata_t *flush_edata; + while ((flush_edata = + edata_list_active_first(&dalloc_list)) != NULL) { + edata_list_active_remove(&dalloc_list, + flush_edata); + if (!edata_pinned_get(flush_edata)) { + any_deferred_work = true; + } + pac_ecache_dalloc(tsdn, pac, ehooks, flush_edata); + } + *deferred_work_generated = any_deferred_work; + return; } bool pinned = edata_pinned_get(edata); @@ -717,6 +775,13 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state, return true; } + bool update_pac_sec = state == extent_state_dirty + && sec_is_used(&pac->sec); + if (update_pac_sec && decay_ms == 0) { + atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED); + pac_sec_flush(tsdn, pac); + } + malloc_mutex_lock(tsdn, &decay->mtx); /* * Restart decay backlog from scratch, which may cause many dirty pages @@ -732,6 +797,11 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state, pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness); malloc_mutex_unlock(tsdn, &decay->mtx); + if (update_pac_sec && decay_ms != 0) { + atomic_store_zu(&pac->sec_max_alloc, + pac->sec.opts.max_alloc, ATOMIC_RELAXED); + } + return false; } @@ -746,12 +816,11 @@ pac_decay_ms_get(pac_t *pac, extent_state_t state) { void pac_reset(tsdn_t *tsdn, pac_t *pac) { + pac_sec_flush(tsdn, pac); /* - * No-op for now; purging is still done at the arena-level. It should - * get moved in here, though. + * Purging is still done at the arena-level. It should get moved in + * here, though. */ - (void)tsdn; - (void)pac; } void @@ -816,3 +885,16 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) { extent_destroy_wrapper(tsdn, pac, ehooks, edata); } } + +void +pac_sec_flush(tsdn_t *tsdn, pac_t *pac) { + ehooks_t *ehooks = pac_ehooks_get(pac); + edata_list_active_t to_flush; + edata_list_active_init(&to_flush); + sec_flush(tsdn, &pac->sec, &to_flush); + edata_t *edata; + while ((edata = edata_list_active_first(&to_flush)) != NULL) { + edata_list_active_remove(&to_flush, edata); + pac_ecache_dalloc(tsdn, pac, ehooks, edata); + } +} diff --git a/src/sec.c b/src/sec.c index a9dfcaf8..2fdad2fe 100644 --- a/src/sec.c +++ b/src/sec.c @@ -25,6 +25,8 @@ sec_bin_init(sec_bin_t *bin) { bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) { sec->opts = *opts; + sec->bins = NULL; + sec->npsizes = 0; if (opts->nshards == 0) { return false; } @@ -57,18 +59,16 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) { return false; } -static uint8_t -sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { +uint8_t +sec_shard_pick(tsd_t *tsd, sec_t *sec, uint8_t *idxp) { /* * Eventually, we should implement affinity, tracking source shard using * the edata_t's newly freed up fields. For now, just randomly * distribute across all shards. + * + * Callers must ensure sec->opts.nshards > 1. */ - if (tsdn_null(tsdn)) { - return 0; - } - tsd_t *tsd = tsdn_tsd(tsdn); - uint8_t *idxp = tsd_sec_shardp_get(tsd); + assert(sec->opts.nshards > 1); if (*idxp == (uint8_t)-1) { /* * First use; initialize using the trick from Daniel Lemire's @@ -143,10 +143,10 @@ sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) { static edata_t * sec_multishard_trylock_alloc( - tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) { + tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind, uint8_t shard) { assert(sec->opts.nshards > 0); - uint8_t cur_shard = sec_shard_pick(tsdn, sec); + uint8_t cur_shard = shard; sec_bin_t *bin; for (size_t i = 0; i < sec->opts.nshards; ++i) { bin = sec_bin_pick(sec, cur_shard, pszind); @@ -170,7 +170,7 @@ sec_multishard_trylock_alloc( * declaring a miss. That could recover more remote-shard hits under * contention, but it also changes the allocation latency policy. */ - assert(cur_shard == sec_shard_pick(tsdn, sec)); + assert(cur_shard == shard); bin = sec_bin_pick(sec, cur_shard, pszind); malloc_mutex_lock(tsdn, &bin->mtx); edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size); @@ -184,7 +184,7 @@ sec_multishard_trylock_alloc( } edata_t * -sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) { +sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size, uint8_t shard) { if (!sec_size_supported(sec, size)) { return NULL; } @@ -208,7 +208,7 @@ sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) { /* frequent_reuse */ 1); return edata; } - return sec_multishard_trylock_alloc(tsdn, sec, size, pszind); + return sec_multishard_trylock_alloc(tsdn, sec, size, pszind, shard); } static void @@ -248,11 +248,11 @@ sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size, static void sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size, - pszind_t pszind, edata_list_active_t *dalloc_list) { + pszind_t pszind, edata_list_active_t *dalloc_list, uint8_t shard) { assert(sec->opts.nshards > 0); /* Try to dalloc in this threads bin first */ - uint8_t cur_shard = sec_shard_pick(tsdn, sec); + uint8_t cur_shard = shard; for (size_t i = 0; i < sec->opts.nshards; ++i) { sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind); if (!malloc_mutex_trylock(tsdn, &bin->mtx)) { @@ -267,7 +267,7 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size, } } /* No bin had alloc or had the extent */ - assert(cur_shard == sec_shard_pick(tsdn, sec)); + assert(cur_shard == shard); sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind); malloc_mutex_lock(tsdn, &bin->mtx); sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list); @@ -275,13 +275,11 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size, } void -sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) { - if (!sec_is_used(sec)) { - return; - } +sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list, + uint8_t shard) { edata_t *edata = edata_list_active_first(dalloc_list); size_t size = edata_size_get(edata); - if (size > sec->opts.max_alloc) { + if (!sec_size_supported(sec, size)) { return; } pszind_t pszind = sz_psz2ind(size); @@ -298,20 +296,21 @@ sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) { malloc_mutex_unlock(tsdn, &bin->mtx); return; } - sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list); + sec_multishard_trylock_dalloc( + tsdn, sec, size, pszind, dalloc_list, shard); } void sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result, - size_t nallocs) { + size_t nallocs, uint8_t shard) { assert((size & PAGE_MASK) == 0); - assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc); + assert(sec_size_supported(sec, size)); assert(nallocs > 0); pszind_t pszind = sz_psz2ind(size); assert(pszind < sec->npsizes); - sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind); + sec_bin_t *bin = sec_bin_pick(sec, shard, pszind); malloc_mutex_assert_not_owner(tsdn, &bin->mtx); malloc_mutex_lock(tsdn, &bin->mtx); size_t new_cached_bytes = nallocs * size; diff --git a/src/stats.c b/src/stats.c index 65583393..a4112fe6 100644 --- a/src/stats.c +++ b/src/stats.c @@ -835,6 +835,37 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) { &sec_overfills); } +static void +stats_arena_pac_sec_print(emitter_t *emitter, unsigned i) { + size_t sec_bytes; + size_t sec_hits; + size_t sec_misses; + size_t sec_dalloc_flush; + size_t sec_dalloc_noflush; + CTL_M2_GET("stats.arenas.0.pac_sec_bytes", i, &sec_bytes, size_t); + emitter_kv(emitter, "pac_sec_bytes", + "Bytes in PAC small extent cache", + emitter_type_size, &sec_bytes); + CTL_M2_GET("stats.arenas.0.pac_sec_hits", i, &sec_hits, size_t); + emitter_kv(emitter, "pac_sec_hits", + "Total hits in PAC small extent cache", + emitter_type_size, &sec_hits); + CTL_M2_GET("stats.arenas.0.pac_sec_misses", i, &sec_misses, size_t); + emitter_kv(emitter, "pac_sec_misses", + "Total misses in PAC small extent cache", + emitter_type_size, &sec_misses); + CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_noflush", i, + &sec_dalloc_noflush, size_t); + emitter_kv(emitter, "pac_sec_dalloc_noflush", + "Dalloc calls without flush in PAC small extent cache", + emitter_type_size, &sec_dalloc_noflush); + CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_flush", i, &sec_dalloc_flush, + size_t); + emitter_kv(emitter, "pac_sec_dalloc_flush", + "Dalloc calls with flush in PAC small extent cache", + emitter_type_size, &sec_dalloc_flush); +} + static void stats_arena_hpa_shard_counters_print( emitter_t *emitter, unsigned i, uint64_t uptime) { @@ -1567,6 +1598,10 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, GET_AND_EMIT_MEM_STAT(extent_avail) #undef GET_AND_EMIT_MEM_STAT + if (opt_pac_sec_opts.nshards > 0) { + stats_arena_pac_sec_print(emitter, i); + } + if (mutex) { stats_arena_mutexes_print(emitter, i, uptime); } @@ -1761,6 +1796,9 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_SIZE_T("hpa_sec_nshards") OPT_WRITE_SIZE_T("hpa_sec_max_alloc") OPT_WRITE_SIZE_T("hpa_sec_max_bytes") + OPT_WRITE_SIZE_T("experimental_pac_sec_nshards") + OPT_WRITE_SIZE_T("experimental_pac_sec_max_alloc") + OPT_WRITE_SIZE_T("experimental_pac_sec_max_bytes") OPT_WRITE_BOOL("huge_arena_pac_thp") OPT_WRITE_CHAR_P("metadata_thp") OPT_WRITE_INT64("mutex_max_spin") diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c index c206974b..11b397d9 100644 --- a/test/unit/json_stats.c +++ b/test/unit/json_stats.c @@ -181,7 +181,7 @@ static const size_t num_global_mutexes = sizeof(global_mutex_names) static const char *arena_mutex_names[] = {"large", "extent_avail", "extents_dirty", "extents_muzzy", "extents_retained", "decay_dirty", "decay_muzzy", "base", "tcache_list", "hpa_shard", "hpa_shard_grow", - "hpa_sec"}; + "hpa_sec", "pac_sec"}; static const size_t num_arena_mutexes = sizeof(arena_mutex_names) / sizeof(arena_mutex_names[0]); diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index ed3ee222..45f6996a 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -312,6 +312,9 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always); TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always); TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always); + TEST_MALLCTL_OPT(size_t, experimental_pac_sec_nshards, always); + TEST_MALLCTL_OPT(size_t, experimental_pac_sec_max_alloc, always); + TEST_MALLCTL_OPT(size_t, experimental_pac_sec_max_bytes, always); TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always); TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always); TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always); diff --git a/test/unit/pac_sec_integration.c b/test/unit/pac_sec_integration.c new file mode 100644 index 00000000..5436e8b8 --- /dev/null +++ b/test/unit/pac_sec_integration.c @@ -0,0 +1,368 @@ +#include "test/jemalloc_test.h" + +/* + * Use 1 shard for deterministic stat assertions and a small max_bytes so + * overflow triggers quickly. Background threads are disabled to prevent + * asynchronous decay from interfering with precise stat checks. + */ +const char *malloc_conf = + "experimental_pac_sec_nshards:1,background_thread:false"; + +static sec_opts_t saved_pac_sec_opts; + +static void +pac_sec_test_opts_set(void) { + saved_pac_sec_opts = opt_pac_sec_opts; + /* + * The test requests SC_LARGE_MINCLASS-sized allocations; PAC may see + * sz_large_pad on top. Configure these directly so the test remains + * valid across page sizes. + */ + size_t test_extent_size = SC_LARGE_MINCLASS + sz_large_pad; + opt_pac_sec_opts.max_alloc = test_extent_size; + opt_pac_sec_opts.max_bytes = 4 * test_extent_size; +} + +static void +pac_sec_test_opts_restore(void) { + opt_pac_sec_opts = saved_pac_sec_opts; +} + +static void * +pinned_extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit, + unsigned arena_ind) { + void *ret = ehooks_default_extent_hooks.alloc( + (extent_hooks_t *)&ehooks_default_extent_hooks, new_addr, size, + alignment, zero, commit, arena_ind); + if (ret == NULL) { + return NULL; + } + if (!*commit) { + if (ehooks_default_extent_hooks.commit != NULL + && ehooks_default_extent_hooks.commit( + (extent_hooks_t *)&ehooks_default_extent_hooks, ret, + size, 0, size, arena_ind)) { + ehooks_default_extent_hooks.dalloc( + (extent_hooks_t *)&ehooks_default_extent_hooks, ret, + size, *commit, arena_ind); + return NULL; + } + *commit = true; + } + return (void *)((uintptr_t)ret | EXTENT_ALLOC_FLAG_PINNED); +} + +static void +pinned_extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, + bool committed, unsigned arena_ind) { + ehooks_default_extent_hooks.destroy( + (extent_hooks_t *)&ehooks_default_extent_hooks, addr, size, + committed, arena_ind); +} + +static bool +pinned_extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, + size_t size_a, size_t size_b, bool committed, unsigned arena_ind) { + return ehooks_default_extent_hooks.split( + (extent_hooks_t *)&ehooks_default_extent_hooks, addr, size, size_a, + size_b, committed, arena_ind); +} + +static bool +pinned_extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, + void *addr_b, size_t size_b, bool committed, unsigned arena_ind) { + return ehooks_default_extent_hooks.merge( + (extent_hooks_t *)&ehooks_default_extent_hooks, addr_a, size_a, + addr_b, size_b, committed, arena_ind); +} + +static extent_hooks_t pinned_hooks = { + pinned_extent_alloc, + NULL, /* dalloc */ + pinned_extent_destroy, + NULL, /* commit */ + NULL, /* decommit */ + NULL, /* purge_lazy */ + NULL, /* purge_forced */ + pinned_extent_split, + pinned_extent_merge +}; + +static size_t +read_stat(unsigned arena_ind, const char *field) { + char cmd[128]; + size_t val; + size_t sz = sizeof(val); + uint64_t epoch = 1; + sz = sizeof(epoch); + expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0, + "Unexpected mallctl failure"); + sz = sizeof(val); + snprintf(cmd, sizeof(cmd), "stats.arenas.%u.pac_sec_%s", + arena_ind, field); + expect_d_eq(mallctl(cmd, (void *)&val, &sz, NULL, 0), 0, + "Unexpected mallctl failure reading pac_sec stat"); + return val; +} + +static size_t +read_pinned_npages(unsigned arena_ind) { + tsd_t *tsd = tsd_fetch(); + arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + expect_ptr_not_null(arena, "arena_get failed"); + return ecache_npages_get(&arena->pa_shard.pac.ecache_pinned); +} + +static void +dirty_decay_ms_set(unsigned arena_ind, ssize_t decay_ms) { + char cmd[64]; + snprintf(cmd, sizeof(cmd), "arena.%u.dirty_decay_ms", arena_ind); + expect_d_eq(mallctl(cmd, NULL, NULL, (void *)&decay_ms, + sizeof(decay_ms)), 0, "dirty_decay_ms mallctl failed"); +} + +TEST_BEGIN(test_pac_sec_alloc_dalloc_cycle) { + test_skip_if(!config_stats); + test_skip_if(opt_hpa); + + pac_sec_test_opts_set(); + unsigned arena_ind; + size_t sz = sizeof(arena_ind); + expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0), + 0, "Unexpected arenas.create failure"); + + int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; + size_t alloc_size = SC_LARGE_MINCLASS; + + /* + * Read the configured max_bytes so we can compute capacity. + * With nshards=1, PAC SEC caches extents one at a time until bytes_cur + * reaches max_bytes. + */ + size_t max_bytes; + sz = sizeof(max_bytes); + expect_d_eq(mallctl("opt.experimental_pac_sec_max_bytes", + (void *)&max_bytes, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + size_t capacity = max_bytes / alloc_size; + expect_zu_gt(capacity, 0, "SEC capacity must be > 0 for this test"); + + /* Step 1: First alloc — SEC miss, served from ecache or new mapping. */ + void *p1 = mallocx(alloc_size, flags); + expect_ptr_not_null(p1, "mallocx failed"); + expect_zu_eq(read_stat(arena_ind, "misses"), 1, + "first alloc should miss SEC"); + expect_zu_eq(read_stat(arena_ind, "hits"), 0, + "no hits yet"); + expect_zu_eq(read_stat(arena_ind, "bytes"), 0, + "SEC should be empty (extent is active)"); + + /* Step 2: Free p1 — SEC absorbs without flush. */ + dallocx(p1, flags); + size_t cached_after_one = read_stat(arena_ind, "bytes"); + expect_zu_gt(cached_after_one, 0, + "SEC should cache the freed extent"); + /* Actual extent size may exceed alloc_size due to size class rounding. */ + size_t extent_size = cached_after_one; + expect_zu_eq(read_stat(arena_ind, "dalloc_noflush"), 1, + "one dalloc absorbed without flush"); + expect_zu_eq(read_stat(arena_ind, "dalloc_flush"), 0, + "no flush yet"); + + /* Recompute capacity based on actual extent size. */ + capacity = max_bytes / extent_size; + expect_zu_gt(capacity, 0, "SEC capacity should be positive"); + + /* Step 3: Re-alloc same size — SEC hit, reuses cached extent. */ + void *p2 = mallocx(alloc_size, flags); + expect_ptr_not_null(p2, "mallocx failed"); + expect_zu_eq(read_stat(arena_ind, "hits"), 1, + "second alloc should hit SEC"); + expect_zu_eq(read_stat(arena_ind, "misses"), 1, + "misses should not increase"); + expect_zu_eq(read_stat(arena_ind, "bytes"), 0, + "SEC should be empty after hit"); + + dallocx(p2, flags); + + /* + * Step 4: Allocate (capacity + 2) extents, then free them all. + * The first `capacity` frees fill SEC; remaining frees overflow + * and flush cold extents to ecache_dirty. + */ + size_t nallocs = capacity + 2; + void **ptrs = mallocx(nallocs * sizeof(void *), + MALLOCX_TCACHE_NONE); + expect_ptr_not_null(ptrs, "metadata alloc failed"); + + for (size_t i = 0; i < nallocs; i++) { + ptrs[i] = mallocx(alloc_size, flags); + expect_ptr_not_null(ptrs[i], "mallocx %zu failed", i); + } + for (size_t i = 0; i < nallocs; i++) { + dallocx(ptrs[i], flags); + } + + size_t noflush = read_stat(arena_ind, "dalloc_noflush"); + size_t flush = read_stat(arena_ind, "dalloc_flush"); + size_t cached_bytes = read_stat(arena_ind, "bytes"); + + expect_zu_gt(noflush, 1, + "most dallocs should be absorbed"); + expect_zu_gt(flush, 0, + "overflow should trigger at least one flush"); + expect_zu_gt(cached_bytes, 0, + "SEC should still hold extents after partial flush"); + expect_zu_le(cached_bytes, max_bytes, + "SEC should not exceed max_bytes"); + + /* + * Step 5: Next alloc should be a SEC hit (cache is populated), + * and should not increase the miss counter. + */ + size_t misses_before = read_stat(arena_ind, "misses"); + void *p3 = mallocx(alloc_size, flags); + expect_ptr_not_null(p3, "mallocx failed"); + expect_zu_eq(read_stat(arena_ind, "misses"), misses_before, + "alloc from populated SEC should not miss"); + dallocx(p3, flags); + + /* + * Step 6: Purge flushes SEC entirely. + */ + char cmd[64]; + snprintf(cmd, sizeof(cmd), "arena.%u.purge", arena_ind); + expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0, + "purge failed"); + expect_zu_eq(read_stat(arena_ind, "bytes"), 0, + "SEC should be empty after purge"); + + /* + * Step 7: Alloc after purge — must miss SEC again. + */ + size_t hits_before = read_stat(arena_ind, "hits"); + void *p4 = mallocx(alloc_size, flags); + expect_ptr_not_null(p4, "mallocx failed"); + expect_zu_eq(read_stat(arena_ind, "hits"), hits_before, + "alloc after purge should miss SEC"); + dallocx(p4, flags); + + dallocx(ptrs, MALLOCX_TCACHE_NONE); + snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_ind); + expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0, + "arena destroy failed"); + pac_sec_test_opts_restore(); +} +TEST_END + +TEST_BEGIN(test_pac_sec_dirty_decay_toggle) { + test_skip_if(!config_stats); + test_skip_if(opt_hpa); + + pac_sec_test_opts_set(); + unsigned arena_ind; + size_t sz = sizeof(arena_ind); + expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0), + 0, "Unexpected arenas.create failure"); + + int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; + size_t alloc_size = SC_LARGE_MINCLASS; + + void *p = mallocx(alloc_size, flags); + expect_ptr_not_null(p, "mallocx failed"); + dallocx(p, flags); + expect_zu_gt(read_stat(arena_ind, "bytes"), 0, + "SEC should cache when dirty decay is enabled"); + + dirty_decay_ms_set(arena_ind, 0); + expect_zu_eq(read_stat(arena_ind, "bytes"), 0, + "disabling dirty decay should flush SEC"); + + p = mallocx(alloc_size, flags); + expect_ptr_not_null(p, "mallocx failed"); + dallocx(p, flags); + expect_zu_eq(read_stat(arena_ind, "bytes"), 0, + "SEC should stay disabled while dirty decay is zero"); + + dirty_decay_ms_set(arena_ind, 100); + p = mallocx(alloc_size, flags); + expect_ptr_not_null(p, "mallocx failed"); + dallocx(p, flags); + expect_zu_gt(read_stat(arena_ind, "bytes"), 0, + "SEC should be usable after dirty decay is re-enabled"); + + char cmd[64]; + snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_ind); + expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0, + "arena destroy failed"); + pac_sec_test_opts_restore(); +} +TEST_END + +TEST_BEGIN(test_pac_sec_flush_pinned) { + test_skip_if(!config_stats); + test_skip_if(opt_hpa); + + pac_sec_test_opts_set(); + unsigned arena_ind; + size_t sz = sizeof(arena_ind); + extent_hooks_t *hooks_ptr = &pinned_hooks; + expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, + &hooks_ptr, sizeof(hooks_ptr)), 0, + "Unexpected arenas.create failure"); + + int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; + size_t alloc_size = SC_LARGE_MINCLASS; + size_t max_bytes; + sz = sizeof(max_bytes); + expect_d_eq(mallctl("opt.experimental_pac_sec_max_bytes", + (void *)&max_bytes, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + void *p = mallocx(alloc_size, flags); + expect_ptr_not_null(p, "mallocx failed"); + dallocx(p, flags); + size_t sec_bytes = read_stat(arena_ind, "bytes"); + expect_zu_gt(sec_bytes, 0, "SEC should cache the pinned extent"); + + size_t extent_size = sec_bytes; + size_t nallocs = max_bytes / extent_size + 2; + void **ptrs = mallocx(nallocs * sizeof(void *), MALLOCX_TCACHE_NONE); + expect_ptr_not_null(ptrs, "metadata alloc failed"); + for (size_t i = 0; i < nallocs; i++) { + ptrs[i] = mallocx(alloc_size, flags); + expect_ptr_not_null(ptrs[i], "mallocx %zu failed", i); + } + size_t pinned_before_overflow = read_pinned_npages(arena_ind); + for (size_t i = 0; i < nallocs; i++) { + dallocx(ptrs[i], flags); + } + expect_zu_gt(read_pinned_npages(arena_ind), pinned_before_overflow, + "SEC overflow should flush pinned extents to ecache_pinned"); + + size_t pinned_before_purge = read_pinned_npages(arena_ind); + char cmd[64]; + snprintf(cmd, sizeof(cmd), "arena.%u.purge", arena_ind); + expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0, + "purge failed"); + expect_zu_eq(read_stat(arena_ind, "bytes"), 0, + "SEC should be empty after purge"); + expect_zu_gt(read_pinned_npages(arena_ind), pinned_before_purge, + "PAC SEC purge should flush pinned extents to ecache_pinned"); + + dallocx(ptrs, MALLOCX_TCACHE_NONE); + snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_ind); + expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0, + "arena destroy failed"); + pac_sec_test_opts_restore(); +} +TEST_END + +int +main(void) { + return test_no_reentrancy( + test_pac_sec_alloc_dalloc_cycle, test_pac_sec_dirty_decay_toggle, + test_pac_sec_flush_pinned); +} diff --git a/test/unit/sec.c b/test/unit/sec.c index 8caf0a6a..bbba5988 100644 --- a/test/unit/sec.c +++ b/test/unit/sec.c @@ -31,6 +31,32 @@ destroy_test_data(tsdn_t *tsdn, test_data_t *tdata) { base_delete(tsdn, tdata->base); } +static uint8_t +test_sec_shard(tsdn_t *tsdn, sec_t *sec) { + if (tsdn_null(tsdn) || sec->opts.nshards <= 1) { + return 0; + } + tsd_t *tsd = tsdn_tsd(tsdn); + return sec_shard_pick(tsd, sec, tsd_sec_shardp_get(tsd)); +} + +static edata_t * +sec_test_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) { + return sec_alloc(tsdn, sec, size, test_sec_shard(tsdn, sec)); +} + +static void +sec_test_fill(tsdn_t *tsdn, sec_t *sec, size_t size, + edata_list_active_t *result, size_t nallocs) { + sec_fill(tsdn, sec, size, result, nallocs, test_sec_shard(tsdn, sec)); +} + +static void +sec_test_dalloc(tsdn_t *tsdn, sec_t *sec, + edata_list_active_t *dalloc_list) { + sec_dalloc(tsdn, sec, dalloc_list, test_sec_shard(tsdn, sec)); +} + TEST_BEGIN(test_max_nshards_option_zero) { test_data_t tdata; sec_opts_t opts; @@ -41,7 +67,7 @@ TEST_BEGIN(test_max_nshards_option_zero) { tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); test_data_init(tsdn, &tdata, &opts); - edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE); + edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, PAGE); expect_ptr_null(edata, "SEC should be disabled when nshards==0"); destroy_test_data(tsdn, &tdata); } @@ -57,7 +83,7 @@ TEST_BEGIN(test_max_alloc_option_too_small) { tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); test_data_init(tsdn, &tdata, &opts); - edata_t *edata = sec_alloc(tsdn, &tdata.sec, 3 * PAGE); + edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, 3 * PAGE); expect_ptr_null(edata, "max_alloc is 2*PAGE, should not alloc 3*PAGE"); destroy_test_data(tsdn, &tdata); } @@ -82,7 +108,7 @@ TEST_BEGIN(test_sec_fill) { edata_size_set(&edata2, PAGE); edata_list_active_append(&allocs, &edata1); edata_list_active_append(&allocs, &edata2); - sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 2); + sec_test_fill(tsdn, &tdata.sec, PAGE, &allocs, 2); sec_stats_merge(tsdn, &tdata.sec, &stats); expect_zu_eq(stats.bytes, 2 * PAGE, "SEC should have what we filled"); expect_true(edata_list_active_empty(&allocs), @@ -97,7 +123,7 @@ TEST_BEGIN(test_sec_fill) { edata_list_active_append(&allocs, &edata3); edata_list_active_append(&allocs, &edata4); edata_list_active_append(&allocs, &edata5); - sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 3); + sec_test_fill(tsdn, &tdata.sec, PAGE, &allocs, 3); sec_stats_merge(tsdn, &tdata.sec, &stats); expect_zu_eq( stats.bytes, opts.max_bytes, "SEC can't have more than max_bytes"); @@ -118,7 +144,7 @@ TEST_BEGIN(test_sec_alloc) { test_data_init(tsdn, &tdata, &opts); /* Alloc from empty cache returns NULL */ - edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE); + edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, PAGE); expect_ptr_null(edata, "SEC is empty"); /* Place two extents into the sec */ @@ -127,11 +153,11 @@ TEST_BEGIN(test_sec_alloc) { edata_t edata1, edata2; edata_size_set(&edata1, PAGE); edata_list_active_append(&allocs, &edata1); - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_true(edata_list_active_empty(&allocs), ""); edata_size_set(&edata2, PAGE); edata_list_active_append(&allocs, &edata2); - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_true(edata_list_active_empty(&allocs), ""); sec_stats_t stats = {0}; @@ -141,20 +167,20 @@ TEST_BEGIN(test_sec_alloc) { stats.bytes = 0; /* Most recently cached extent should be used on alloc */ - edata = sec_alloc(tsdn, &tdata.sec, PAGE); + edata = sec_test_alloc(tsdn, &tdata.sec, PAGE); expect_ptr_eq(edata, &edata2, "edata2 is most recently used"); sec_stats_merge(tsdn, &tdata.sec, &stats); expect_zu_eq(stats.bytes, PAGE, "One more item left in the cache"); stats.bytes = 0; /* Alloc can still get extents from cache */ - edata = sec_alloc(tsdn, &tdata.sec, PAGE); + edata = sec_test_alloc(tsdn, &tdata.sec, PAGE); expect_ptr_eq(edata, &edata1, "SEC is not empty"); sec_stats_merge(tsdn, &tdata.sec, &stats); expect_zu_eq(stats.bytes, 0, "No more items after last one is popped"); /* And cache is empty again */ - edata = sec_alloc(tsdn, &tdata.sec, PAGE); + edata = sec_test_alloc(tsdn, &tdata.sec, PAGE); expect_ptr_null(edata, "SEC is empty"); destroy_test_data(tsdn, &tdata); } @@ -178,7 +204,7 @@ TEST_BEGIN(test_sec_dalloc) { edata_list_active_append(&allocs, &edata1); /* SEC is empty, we return one pointer to it */ - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_true( edata_list_active_empty(&allocs), "extents should be consumed"); @@ -187,7 +213,7 @@ TEST_BEGIN(test_sec_dalloc) { edata_size_set(&edata2, PAGE); edata_list_active_append(&allocs, &edata2); /* Sec can take one more as well and we will be exactly at max_bytes */ - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_true( edata_list_active_empty(&allocs), "extents should be consumed"); @@ -205,7 +231,7 @@ TEST_BEGIN(test_sec_dalloc) { edata_t edata3; edata_size_set(&edata3, PAGE); edata_list_active_append(&allocs, &edata3); - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_false( edata_list_active_empty(&allocs), "extents should NOT be consumed"); expect_ptr_ne( @@ -236,7 +262,7 @@ TEST_BEGIN(test_max_bytes_too_low) { edata_list_active_append(&allocs, &edata1); /* SEC is empty, we return one pointer to it */ - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_false( edata_list_active_empty(&allocs), "extents should not be consumed"); destroy_test_data(tsdn, &tdata); @@ -266,9 +292,9 @@ TEST_BEGIN(test_sec_flush) { edata_size_set(&edata4[i], 4 * PAGE); edata_list_active_append(&allocs1, &edata1[i]); - sec_dalloc(tsdn, &tdata.sec, &allocs1); + sec_test_dalloc(tsdn, &tdata.sec, &allocs1); edata_list_active_append(&allocs4, &edata4[i]); - sec_dalloc(tsdn, &tdata.sec, &allocs4); + sec_test_dalloc(tsdn, &tdata.sec, &allocs4); } sec_stats_t stats = {0}; @@ -305,11 +331,11 @@ TEST_BEGIN(test_sec_stats) { edata_list_active_append(&allocs, &edata1); /* SEC is empty alloc fails. nmisses==1 */ - edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE); + edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, PAGE); expect_ptr_null(edata, "SEC should be empty"); /* SEC is empty, we return one pointer to it. ndalloc_noflush=1 */ - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_true( edata_list_active_empty(&allocs), "extents should be consumed"); @@ -317,7 +343,7 @@ TEST_BEGIN(test_sec_stats) { edata_size_set(&edata2, PAGE); edata_list_active_append(&allocs, &edata2); /* Sec can take one more, so ndalloc_noflush=2 */ - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_true( edata_list_active_empty(&allocs), "extents should be consumed"); @@ -337,7 +363,7 @@ TEST_BEGIN(test_sec_stats) { edata_t edata3; edata_size_set(&edata3, PAGE); edata_list_active_append(&allocs, &edata3); - sec_dalloc(tsdn, &tdata.sec, &allocs); + sec_test_dalloc(tsdn, &tdata.sec, &allocs); expect_false( edata_list_active_empty(&allocs), "extents should NOT be consumed"); sec_stats_merge(tsdn, &tdata.sec, &stats); @@ -379,12 +405,12 @@ thd_trylock_test(void *varg) { *shard_idx = arg->preferred_shard; /* Fill the shard with some extents */ - sec_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz); + sec_test_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz); expect_true(edata_list_active_empty(&arg->fill_list), ""); for (unsigned i = 0; i < NOPS_PER_THREAD; i++) { /* Try to allocate from SEC */ - arg->edata[i] = sec_alloc(tsdn, arg->sec, PAGE); + arg->edata[i] = sec_test_alloc(tsdn, arg->sec, PAGE); if (arg->edata[i] != NULL) { expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, ""); } @@ -397,7 +423,7 @@ thd_trylock_test(void *varg) { arg->nallocs++; edata_list_active_append(&list, arg->edata[i]); expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, ""); - sec_dalloc(tsdn, arg->sec, &list); + sec_test_dalloc(tsdn, arg->sec, &list); if (edata_list_active_empty(&list)) { arg->ndallocs++; } else {