This commit is contained in:
Bin Liu 2026-05-28 17:16:21 +00:00 committed by GitHub
commit 8bec73d393
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 730 additions and 95 deletions

View file

@ -266,6 +266,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/ncached_max.c \
$(srcroot)test/unit/oversize_threshold.c \
$(srcroot)test/unit/pa.c \
$(srcroot)test/unit/pac_sec_integration.c \
$(srcroot)test/unit/pack.c \
$(srcroot)test/unit/pages.c \
$(srcroot)test/unit/peak.c \

View file

@ -22,6 +22,7 @@ extern bool opt_confirm_conf;
extern bool opt_hpa;
extern hpa_shard_opts_t opt_hpa_opts;
extern sec_opts_t opt_hpa_sec_opts;
extern sec_opts_t opt_pac_sec_opts;
extern const char *opt_junk;
extern bool opt_junk_alloc;

View file

@ -37,7 +37,8 @@ typedef enum {
OP(tcache_list) \
OP(hpa_shard) \
OP(hpa_shard_grow) \
OP(hpa_sec)
OP(hpa_sec) \
OP(pac_sec)
typedef enum {
#define OP(mtx) arena_prof_mutex_##mtx,

View file

@ -8,6 +8,7 @@
#include "jemalloc/internal/edata_cache.h"
#include "jemalloc/internal/exp_grow.h"
#include "jemalloc/internal/lockedint.h"
#include "jemalloc/internal/sec.h"
#include "jemalloc/internal/tsd_types.h"
#include "san_bump.h"
@ -84,12 +85,21 @@ struct pac_stats_s {
/* VM space had to be leaked (undocumented). Normally 0. */
atomic_zu_t abandoned_vm;
/* PAC SEC stats. Derived. */
sec_stats_t pac_sec_stats;
};
typedef struct pac_s pac_t;
struct pac_s {
/* Small extent cache in front of PAC ecaches to reduce contention. */
sec_t sec;
/* 0 disables PAC SEC; otherwise max size SEC will cache. */
atomic_zu_t sec_max_alloc;
/* True once pinned memory has been seen. */
atomic_b_t has_pinned;
/*
* Collections of extents that were previously allocated. These are
* used when allocating extents, in an attempt to re-use address space.
@ -237,4 +247,6 @@ ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
void pac_reset(tsdn_t *tsdn, pac_t *pac);
void pac_destroy(tsdn_t *tsdn, pac_t *pac);
void pac_sec_flush(tsdn_t *tsdn, pac_t *pac);
#endif /* JEMALLOC_INTERNAL_PAC_H */

View file

@ -40,15 +40,6 @@ struct sec_stats_s {
sec_bin_stats_t total;
};
static inline void
sec_bin_stats_init(sec_bin_stats_t *stats) {
stats->ndalloc_flush = 0;
stats->nmisses = 0;
stats->nhits = 0;
stats->ndalloc_noflush = 0;
stats->noverfills = 0;
}
static inline void
sec_bin_stats_accum(sec_bin_stats_t *dst, sec_bin_stats_t *src) {
dst->nmisses += src->nmisses;
@ -68,16 +59,20 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
typedef struct sec_bin_s sec_bin_t;
struct sec_bin_s {
/*
* Protects the data members of the bin.
* Protects the freelist and synchronizes counter updates.
*/
malloc_mutex_t mtx;
/*
* Number of bytes in this particular bin.
*/
size_t bytes_cur;
atomic_zu_t bytes_cur;
edata_list_active_t freelist;
sec_bin_stats_t stats;
atomic_zu_t nmisses;
atomic_zu_t nhits;
atomic_zu_t ndalloc_flush;
atomic_zu_t ndalloc_noflush;
atomic_zu_t noverfills;
};
typedef struct sec_s sec_t;
@ -116,10 +111,17 @@ sec_size_supported(sec_t *sec, size_t size) {
void sec_calc_nallocs_for_size(
sec_t *sec, size_t size, size_t *min_nallocs, size_t *max_nallocs);
/*
* Lazily picks (and caches in *idxp) a shard for the calling thread. Different
* SEC instances pass independent per-thread uint8_t slots, initialized to
* (uint8_t)-1.
*/
uint8_t sec_shard_pick(tsd_t *tsd, sec_t *sec, uint8_t *idxp);
/* If sec does not have extent available, it will return NULL. */
edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size);
edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size, uint8_t shard);
void sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
edata_list_active_t *result, size_t nallocs);
edata_list_active_t *result, size_t nallocs, uint8_t shard);
/*
* Upon return dalloc_list may be empty if edata is consumed by sec or non-empty
@ -129,7 +131,8 @@ void sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
* considered "hot" and preserved in the cache, while "colder" ones are
* returned).
*/
void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list);
void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list,
uint8_t shard);
bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts);

View file

@ -76,6 +76,7 @@ typedef void (*test_callback_t)(int *);
O(arena, arena_t *, arena_t *) \
O(arena_decay_ticker, ticker_geom_t, ticker_geom_t) \
O(sec_shard, uint8_t, uint8_t) \
O(pac_sec_shard, uint8_t, uint8_t) \
O(binshards, tsd_binshards_t, tsd_binshards_t) \
O(peak, peak_t, peak_t) \
O(tcache_slow, tcache_slow_t, tcache_slow_t) \
@ -95,6 +96,7 @@ typedef void (*test_callback_t)(int *);
/* arena */ NULL, /* arena_decay_ticker */ \
TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE), \
/* sec_shard */ (uint8_t) - 1, \
/* pac_sec_shard */ (uint8_t) - 1, \
/* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER, \
/* peak */ PEAK_INITIALIZER, /* tcache_slow */ \
TCACHE_SLOW_ZERO_INITIALIZER, \

View file

@ -953,6 +953,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
"hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.nshards,
"experimental_pac_sec_nshards", 0, 0,
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_alloc,
"experimental_pac_sec_max_alloc", PAGE,
USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
CONF_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_bytes,
"experimental_pac_sec_max_bytes",
SEC_OPTS_MAX_BYTES_DEFAULT, 0,
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
if (CONF_MATCH("slab_sizes")) {
if (CONF_MATCH_VALUE("default")) {

View file

@ -115,6 +115,9 @@ CTL_PROTO(opt_hpa_dirty_mult)
CTL_PROTO(opt_hpa_sec_nshards)
CTL_PROTO(opt_hpa_sec_max_alloc)
CTL_PROTO(opt_hpa_sec_max_bytes)
CTL_PROTO(opt_experimental_pac_sec_nshards)
CTL_PROTO(opt_experimental_pac_sec_max_alloc)
CTL_PROTO(opt_experimental_pac_sec_max_bytes)
CTL_PROTO(opt_huge_arena_pac_thp)
CTL_PROTO(opt_metadata_thp)
CTL_PROTO(opt_retain)
@ -352,6 +355,11 @@ CTL_PROTO(stats_arenas_i_hpa_sec_misses)
CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush)
CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush)
CTL_PROTO(stats_arenas_i_hpa_sec_overfills)
CTL_PROTO(stats_arenas_i_pac_sec_bytes)
CTL_PROTO(stats_arenas_i_pac_sec_hits)
CTL_PROTO(stats_arenas_i_pac_sec_misses)
CTL_PROTO(stats_arenas_i_pac_sec_dalloc_flush)
CTL_PROTO(stats_arenas_i_pac_sec_dalloc_noflush)
INDEX_PROTO(stats_arenas_i)
CTL_PROTO(stats_allocated)
CTL_PROTO(stats_active)
@ -495,6 +503,12 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
{NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
{NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
{NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
{NAME("experimental_pac_sec_nshards"),
CTL(opt_experimental_pac_sec_nshards)},
{NAME("experimental_pac_sec_max_alloc"),
CTL(opt_experimental_pac_sec_max_alloc)},
{NAME("experimental_pac_sec_max_bytes"),
CTL(opt_experimental_pac_sec_max_bytes)},
{NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
{NAME("metadata_thp"), CTL(opt_metadata_thp)},
{NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
@ -859,6 +873,12 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
CTL(stats_arenas_i_hpa_sec_dalloc_noflush)},
{NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)},
{NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)},
{NAME("pac_sec_bytes"), CTL(stats_arenas_i_pac_sec_bytes)},
{NAME("pac_sec_hits"), CTL(stats_arenas_i_pac_sec_hits)},
{NAME("pac_sec_misses"), CTL(stats_arenas_i_pac_sec_misses)},
{NAME("pac_sec_dalloc_noflush"),
CTL(stats_arenas_i_pac_sec_dalloc_noflush)},
{NAME("pac_sec_dalloc_flush"), CTL(stats_arenas_i_pac_sec_dalloc_flush)},
{NAME("small"), CHILD(named, stats_arenas_i_small)},
{NAME("large"), CHILD(named, stats_arenas_i_large)},
{NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
@ -1219,6 +1239,10 @@ ctl_arena_stats_sdmerge(
&sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
&astats->astats.pa_shard_stats.pac_stats.abandoned_vm);
sec_stats_accum(
&sdstats->astats.pa_shard_stats.pac_stats.pac_sec_stats,
&astats->astats.pa_shard_stats.pac_stats.pac_sec_stats);
sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
sdstats->astats.tcache_stashed_bytes +=
astats->astats.tcache_stashed_bytes;
@ -2208,6 +2232,12 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
CTL_RO_NL_GEN(opt_experimental_pac_sec_nshards,
opt_pac_sec_opts.nshards, size_t)
CTL_RO_NL_GEN(opt_experimental_pac_sec_max_alloc,
opt_pac_sec_opts.max_alloc, size_t)
CTL_RO_NL_GEN(opt_experimental_pac_sec_max_bytes,
opt_pac_sec_opts.max_bytes, size_t)
CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
CTL_RO_NL_GEN(
opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
@ -3881,6 +3911,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush,
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills,
arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_bytes,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.bytes, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_hits,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nhits, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_misses,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nmisses, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_flush,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_flush, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_noflush,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_noflush, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
arenas_i(mib[2])->astats->allocated_small, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,

View file

@ -16,6 +16,18 @@ const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
bool opt_experimental_hpa_start_huge_if_thp_always = true;
bool opt_experimental_hpa_enforce_hugify = false;
static inline uint8_t
hpa_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
if (sec->opts.nshards <= 1) {
return 0;
}
if (tsdn_null(tsdn)) {
return 0;
}
tsd_t *tsd = tsdn_tsd(tsdn);
return sec_shard_pick(tsd, sec, tsd_sec_shardp_get(tsd));
}
bool
hpa_hugepage_size_exceeds_limit(void) {
return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
@ -947,7 +959,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment,
&& (size > shard->opts.slab_max_alloc)) {
return NULL;
}
edata_t *edata = sec_alloc(tsdn, &shard->sec, size);
edata_t *edata = sec_alloc(tsdn, &shard->sec, size,
hpa_sec_shard_pick(tsdn, &shard->sec));
if (edata != NULL) {
return edata;
}
@ -968,7 +981,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment,
}
if (nsuccess > 0) {
assert(sec_size_supported(&shard->sec, size));
sec_fill(tsdn, &shard->sec, size, &results, nsuccess);
sec_fill(tsdn, &shard->sec, size, &results, nsuccess,
hpa_sec_shard_pick(tsdn, &shard->sec));
/* Unlikely rollback in case of overfill */
if (!edata_list_active_empty(&results)) {
hpa_dalloc_batch(
@ -1075,7 +1089,8 @@ hpa_dalloc(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata,
edata_list_active_init(&dalloc_list);
edata_list_active_append(&dalloc_list, edata);
sec_dalloc(tsdn, &shard->sec, &dalloc_list);
sec_dalloc(tsdn, &shard->sec, &dalloc_list,
hpa_sec_shard_pick(tsdn, &shard->sec));
if (edata_list_active_empty(&dalloc_list)) {
/* sec consumed the pointer */
*deferred_work_generated = false;

View file

@ -181,6 +181,9 @@ size_t opt_calloc_madvise_threshold = CALLOC_MADVISE_THRESHOLD_DEFAULT;
bool opt_hpa = false;
hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
sec_opts_t opt_pac_sec_opts = {0,
(32 * 1024) > (PAGE * 2) ? (32 * 1024) : (PAGE * 2),
SEC_OPTS_MAX_BYTES_DEFAULT};
/* False should be the common case. Set to true to trigger initialization. */
bool malloc_slow = true;

View file

@ -94,6 +94,7 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) {
pac_sec_flush(tsdn, &shard->pac);
if (shard->ever_used_hpa) {
hpa_shard_flush(tsdn, &shard->hpa);
}

View file

@ -16,6 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
sec_prefork2(tsdn, &shard->pac.sec);
if (shard->ever_used_hpa) {
hpa_shard_prefork2(tsdn, &shard->hpa);
}
@ -53,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
ecache_postfork_parent(tsdn, &shard->pac.ecache_pinned);
malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
sec_postfork_parent(tsdn, &shard->pac.sec);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
@ -68,6 +70,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
ecache_postfork_child(tsdn, &shard->pac.ecache_pinned);
malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
sec_postfork_child(tsdn, &shard->pac.sec);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
@ -179,6 +182,9 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
if (shard->ever_used_hpa) {
hpa_shard_stats_merge(tsdn, &shard->hpa, hpa_stats_out);
}
sec_stats_merge(tsdn, &shard->pac.sec,
&pa_shard_stats_out->pac_stats.pac_sec_stats);
}
static void
@ -207,6 +213,9 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
sec_mutex_stats_read(tsdn, &shard->pac.sec,
&mutex_prof_data[arena_prof_mutex_pac_sec]);
if (shard->ever_used_hpa) {
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->hpa.mtx, arena_prof_mutex_hpa_shard);

View file

@ -4,6 +4,18 @@
#include "jemalloc/internal/pac.h"
#include "jemalloc/internal/san.h"
static inline uint8_t
pac_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
if (sec->opts.nshards <= 1) {
return 0;
}
if (tsdn_null(tsdn)) {
return 0;
}
tsd_t *tsd = tsdn_tsd(tsdn);
return sec_shard_pick(tsd, sec, tsd_pac_sec_shardp_get(tsd));
}
static inline void
pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay,
pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
@ -92,6 +104,17 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
pac->stats_mtx = stats_mtx;
atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
if (sec_init(tsdn, &pac->sec, base, &opt_pac_sec_opts)) {
/* Fall back to no SEC on allocation failure. */
pac->sec.opts.nshards = 0;
}
if (!sec_is_used(&pac->sec) || dirty_decay_ms == 0) {
atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED);
} else {
atomic_store_zu(&pac->sec_max_alloc,
pac->sec.opts.max_alloc, ATOMIC_RELAXED);
}
return false;
}
@ -133,6 +156,15 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
edata_t *edata = NULL;
if (!guarded && !zero && alignment <= PAGE
&& size <= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) {
edata = sec_alloc(tsdn, &pac->sec, size,
pac_sec_shard_pick(tsdn, &pac->sec));
if (edata != NULL) {
return edata;
}
}
/*
* Guarded allocations need surrounding guard pages, which the pinned
* pool does not maintain; skip ecache_pinned in that case.
@ -395,6 +427,32 @@ pac_dalloc(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
san_unguard_pages_two_sided(
tsdn, ehooks, edata, pac->emap);
}
} else if (edata_size_get(edata)
<= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) {
edata_zeroed_set(edata, false);
edata_list_active_t dalloc_list;
edata_list_active_init(&dalloc_list);
edata_list_active_append(&dalloc_list, edata);
sec_dalloc(tsdn, &pac->sec, &dalloc_list,
pac_sec_shard_pick(tsdn, &pac->sec));
if (edata_list_active_empty(&dalloc_list)) {
*deferred_work_generated = false;
return;
}
/* Flush overflow extents to their backing ecaches. */
bool any_deferred_work = false;
edata_t *flush_edata;
while ((flush_edata =
edata_list_active_first(&dalloc_list)) != NULL) {
edata_list_active_remove(&dalloc_list,
flush_edata);
if (!edata_pinned_get(flush_edata)) {
any_deferred_work = true;
}
pac_ecache_dalloc(tsdn, pac, ehooks, flush_edata);
}
*deferred_work_generated = any_deferred_work;
return;
}
bool pinned = edata_pinned_get(edata);
@ -717,6 +775,13 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
return true;
}
bool update_pac_sec = state == extent_state_dirty
&& sec_is_used(&pac->sec);
if (update_pac_sec && decay_ms == 0) {
atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED);
pac_sec_flush(tsdn, pac);
}
malloc_mutex_lock(tsdn, &decay->mtx);
/*
* Restart decay backlog from scratch, which may cause many dirty pages
@ -732,6 +797,11 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness);
malloc_mutex_unlock(tsdn, &decay->mtx);
if (update_pac_sec && decay_ms != 0) {
atomic_store_zu(&pac->sec_max_alloc,
pac->sec.opts.max_alloc, ATOMIC_RELAXED);
}
return false;
}
@ -746,12 +816,11 @@ pac_decay_ms_get(pac_t *pac, extent_state_t state) {
void
pac_reset(tsdn_t *tsdn, pac_t *pac) {
pac_sec_flush(tsdn, pac);
/*
* No-op for now; purging is still done at the arena-level. It should
* get moved in here, though.
* Purging is still done at the arena-level. It should get moved in
* here, though.
*/
(void)tsdn;
(void)pac;
}
void
@ -816,3 +885,16 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) {
extent_destroy_wrapper(tsdn, pac, ehooks, edata);
}
}
void
pac_sec_flush(tsdn_t *tsdn, pac_t *pac) {
ehooks_t *ehooks = pac_ehooks_get(pac);
edata_list_active_t to_flush;
edata_list_active_init(&to_flush);
sec_flush(tsdn, &pac->sec, &to_flush);
edata_t *edata;
while ((edata = edata_list_active_first(&to_flush)) != NULL) {
edata_list_active_remove(&to_flush, edata);
pac_ecache_dalloc(tsdn, pac, ehooks, edata);
}
}

114
src/sec.c
View file

@ -6,8 +6,12 @@
static bool
sec_bin_init(sec_bin_t *bin) {
bin->bytes_cur = 0;
sec_bin_stats_init(&bin->stats);
atomic_store_zu(&bin->bytes_cur, 0, ATOMIC_RELAXED);
atomic_store_zu(&bin->ndalloc_flush, 0, ATOMIC_RELAXED);
atomic_store_zu(&bin->nmisses, 0, ATOMIC_RELAXED);
atomic_store_zu(&bin->nhits, 0, ATOMIC_RELAXED);
atomic_store_zu(&bin->ndalloc_noflush, 0, ATOMIC_RELAXED);
atomic_store_zu(&bin->noverfills, 0, ATOMIC_RELAXED);
edata_list_active_init(&bin->freelist);
bool err = malloc_mutex_init(&bin->mtx, "sec_bin", WITNESS_RANK_SEC_BIN,
malloc_mutex_rank_exclusive);
@ -21,6 +25,8 @@ sec_bin_init(sec_bin_t *bin) {
bool
sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
sec->opts = *opts;
sec->bins = NULL;
sec->npsizes = 0;
if (opts->nshards == 0) {
return false;
}
@ -53,18 +59,16 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
return false;
}
static uint8_t
sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
uint8_t
sec_shard_pick(tsd_t *tsd, sec_t *sec, uint8_t *idxp) {
/*
* Eventually, we should implement affinity, tracking source shard using
* the edata_t's newly freed up fields. For now, just randomly
* distribute across all shards.
*
* Callers must ensure sec->opts.nshards > 1.
*/
if (tsdn_null(tsdn)) {
return 0;
}
tsd_t *tsd = tsdn_tsd(tsdn);
uint8_t *idxp = tsd_sec_shardp_get(tsd);
assert(sec->opts.nshards > 1);
if (*idxp == (uint8_t)-1) {
/*
* First use; initialize using the trick from Daniel Lemire's
@ -128,19 +132,21 @@ sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) {
assert(!edata_list_active_empty(&bin->freelist));
edata_list_active_remove(&bin->freelist, edata);
size_t sz = edata_size_get(edata);
assert(sz <= bin->bytes_cur && sz > 0);
bin->bytes_cur -= sz;
bin->stats.nhits++;
size_t bytes_cur = atomic_load_zu(&bin->bytes_cur, ATOMIC_RELAXED);
assert(sz <= bytes_cur && sz > 0);
bytes_cur -= sz;
atomic_store_zu(&bin->bytes_cur, bytes_cur, ATOMIC_RELAXED);
atomic_load_add_store_zu(&bin->nhits, 1);
}
return edata;
}
static edata_t *
sec_multishard_trylock_alloc(
tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) {
tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind, uint8_t shard) {
assert(sec->opts.nshards > 0);
uint8_t cur_shard = sec_shard_pick(tsdn, sec);
uint8_t cur_shard = shard;
sec_bin_t *bin;
for (size_t i = 0; i < sec->opts.nshards; ++i) {
bin = sec_bin_pick(sec, cur_shard, pszind);
@ -164,13 +170,13 @@ sec_multishard_trylock_alloc(
* declaring a miss. That could recover more remote-shard hits under
* contention, but it also changes the allocation latency policy.
*/
assert(cur_shard == sec_shard_pick(tsdn, sec));
assert(cur_shard == shard);
bin = sec_bin_pick(sec, cur_shard, pszind);
malloc_mutex_lock(tsdn, &bin->mtx);
edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
if (edata == NULL) {
/* Only now we know it is a miss. */
bin->stats.nmisses++;
atomic_load_add_store_zu(&bin->nmisses, 1);
}
malloc_mutex_unlock(tsdn, &bin->mtx);
JE_USDT(sec_alloc, 5, sec, bin, edata, size, /* frequent_reuse */ 1);
@ -178,7 +184,7 @@ sec_multishard_trylock_alloc(
}
edata_t *
sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size, uint8_t shard) {
if (!sec_size_supported(sec, size)) {
return NULL;
}
@ -195,14 +201,14 @@ sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
malloc_mutex_lock(tsdn, &bin->mtx);
edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
if (edata == NULL) {
bin->stats.nmisses++;
atomic_load_add_store_zu(&bin->nmisses, 1);
}
malloc_mutex_unlock(tsdn, &bin->mtx);
JE_USDT(sec_alloc, 5, sec, bin, edata, size,
/* frequent_reuse */ 1);
return edata;
}
return sec_multishard_trylock_alloc(tsdn, sec, size, pszind);
return sec_multishard_trylock_alloc(tsdn, sec, size, pszind, shard);
}
static void
@ -210,7 +216,8 @@ sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size,
edata_list_active_t *dalloc_list) {
malloc_mutex_assert_owner(tsdn, &bin->mtx);
bin->bytes_cur += size;
size_t bytes_cur = atomic_load_zu(&bin->bytes_cur, ATOMIC_RELAXED);
bytes_cur += size;
edata_t *edata = edata_list_active_first(dalloc_list);
assert(edata != NULL);
edata_list_active_remove(dalloc_list, edata);
@ -219,31 +226,33 @@ sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size,
/* Single extent can be returned to SEC */
assert(edata_list_active_empty(dalloc_list));
if (bin->bytes_cur <= sec->opts.max_bytes) {
bin->stats.ndalloc_noflush++;
if (bytes_cur <= sec->opts.max_bytes) {
atomic_store_zu(&bin->bytes_cur, bytes_cur, ATOMIC_RELAXED);
atomic_load_add_store_zu(&bin->ndalloc_noflush, 1);
return;
}
bin->stats.ndalloc_flush++;
atomic_load_add_store_zu(&bin->ndalloc_flush, 1);
/* we want to flush 1/4 of max_bytes */
size_t bytes_target = sec->opts.max_bytes - (sec->opts.max_bytes >> 2);
while (bin->bytes_cur > bytes_target
while (bytes_cur > bytes_target
&& !edata_list_active_empty(&bin->freelist)) {
edata_t *cur = edata_list_active_last(&bin->freelist);
size_t sz = edata_size_get(cur);
assert(sz <= bin->bytes_cur && sz > 0);
bin->bytes_cur -= sz;
assert(sz <= bytes_cur && sz > 0);
bytes_cur -= sz;
edata_list_active_remove(&bin->freelist, cur);
edata_list_active_append(dalloc_list, cur);
}
atomic_store_zu(&bin->bytes_cur, bytes_cur, ATOMIC_RELAXED);
}
static void
sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
pszind_t pszind, edata_list_active_t *dalloc_list) {
pszind_t pszind, edata_list_active_t *dalloc_list, uint8_t shard) {
assert(sec->opts.nshards > 0);
/* Try to dalloc in this threads bin first */
uint8_t cur_shard = sec_shard_pick(tsdn, sec);
uint8_t cur_shard = shard;
for (size_t i = 0; i < sec->opts.nshards; ++i) {
sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
@ -258,7 +267,7 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
}
}
/* No bin had alloc or had the extent */
assert(cur_shard == sec_shard_pick(tsdn, sec));
assert(cur_shard == shard);
sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
malloc_mutex_lock(tsdn, &bin->mtx);
sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
@ -266,13 +275,11 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
}
void
sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
if (!sec_is_used(sec)) {
return;
}
sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list,
uint8_t shard) {
edata_t *edata = edata_list_active_first(dalloc_list);
size_t size = edata_size_get(edata);
if (size > sec->opts.max_alloc) {
if (!sec_size_supported(sec, size)) {
return;
}
pszind_t pszind = sz_psz2ind(size);
@ -289,34 +296,37 @@ sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
malloc_mutex_unlock(tsdn, &bin->mtx);
return;
}
sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list);
sec_multishard_trylock_dalloc(
tsdn, sec, size, pszind, dalloc_list, shard);
}
void
sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result,
size_t nallocs) {
size_t nallocs, uint8_t shard) {
assert((size & PAGE_MASK) == 0);
assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc);
assert(sec_size_supported(sec, size));
assert(nallocs > 0);
pszind_t pszind = sz_psz2ind(size);
assert(pszind < sec->npsizes);
sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind);
sec_bin_t *bin = sec_bin_pick(sec, shard, pszind);
malloc_mutex_assert_not_owner(tsdn, &bin->mtx);
malloc_mutex_lock(tsdn, &bin->mtx);
size_t new_cached_bytes = nallocs * size;
if (bin->bytes_cur + new_cached_bytes <= sec->opts.max_bytes) {
size_t bytes_cur = atomic_load_zu(&bin->bytes_cur, ATOMIC_RELAXED);
if (bytes_cur + new_cached_bytes <= sec->opts.max_bytes) {
assert(!edata_list_active_empty(result));
edata_list_active_concat(&bin->freelist, result);
bin->bytes_cur += new_cached_bytes;
atomic_store_zu(&bin->bytes_cur, bytes_cur + new_cached_bytes,
ATOMIC_RELAXED);
} else {
/*
* Unlikely case of many threads filling at the same time and
* going above max.
*/
bin->stats.noverfills++;
while (bin->bytes_cur + size <= sec->opts.max_bytes) {
atomic_load_add_store_zu(&bin->noverfills, 1);
while (bytes_cur + size <= sec->opts.max_bytes) {
edata_t *edata = edata_list_active_first(result);
if (edata == NULL) {
break;
@ -324,8 +334,9 @@ sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result,
edata_list_active_remove(result, edata);
assert(size == edata_size_get(edata));
edata_list_active_append(&bin->freelist, edata);
bin->bytes_cur += size;
bytes_cur += size;
}
atomic_store_zu(&bin->bytes_cur, bytes_cur, ATOMIC_RELAXED);
}
malloc_mutex_unlock(tsdn, &bin->mtx);
}
@ -339,7 +350,7 @@ sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush) {
for (pszind_t i = 0; i < ntotal_bins; i++) {
sec_bin_t *bin = &sec->bins[i];
malloc_mutex_lock(tsdn, &bin->mtx);
bin->bytes_cur = 0;
atomic_store_zu(&bin->bytes_cur, 0, ATOMIC_RELAXED);
edata_list_active_concat(to_flush, &bin->freelist);
malloc_mutex_unlock(tsdn, &bin->mtx);
}
@ -354,10 +365,17 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
for (pszind_t i = 0; i < ntotal_bins; i++) {
sec_bin_t *bin = &sec->bins[i];
malloc_mutex_lock(tsdn, &bin->mtx);
sum += bin->bytes_cur;
sec_bin_stats_accum(&stats->total, &bin->stats);
malloc_mutex_unlock(tsdn, &bin->mtx);
sum += atomic_load_zu(&bin->bytes_cur, ATOMIC_RELAXED);
stats->total.nmisses +=
atomic_load_zu(&bin->nmisses, ATOMIC_RELAXED);
stats->total.nhits +=
atomic_load_zu(&bin->nhits, ATOMIC_RELAXED);
stats->total.ndalloc_flush +=
atomic_load_zu(&bin->ndalloc_flush, ATOMIC_RELAXED);
stats->total.ndalloc_noflush +=
atomic_load_zu(&bin->ndalloc_noflush, ATOMIC_RELAXED);
stats->total.noverfills +=
atomic_load_zu(&bin->noverfills, ATOMIC_RELAXED);
}
stats->bytes += sum;
}

View file

@ -835,6 +835,37 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
&sec_overfills);
}
static void
stats_arena_pac_sec_print(emitter_t *emitter, unsigned i) {
size_t sec_bytes;
size_t sec_hits;
size_t sec_misses;
size_t sec_dalloc_flush;
size_t sec_dalloc_noflush;
CTL_M2_GET("stats.arenas.0.pac_sec_bytes", i, &sec_bytes, size_t);
emitter_kv(emitter, "pac_sec_bytes",
"Bytes in PAC small extent cache",
emitter_type_size, &sec_bytes);
CTL_M2_GET("stats.arenas.0.pac_sec_hits", i, &sec_hits, size_t);
emitter_kv(emitter, "pac_sec_hits",
"Total hits in PAC small extent cache",
emitter_type_size, &sec_hits);
CTL_M2_GET("stats.arenas.0.pac_sec_misses", i, &sec_misses, size_t);
emitter_kv(emitter, "pac_sec_misses",
"Total misses in PAC small extent cache",
emitter_type_size, &sec_misses);
CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_noflush", i,
&sec_dalloc_noflush, size_t);
emitter_kv(emitter, "pac_sec_dalloc_noflush",
"Dalloc calls without flush in PAC small extent cache",
emitter_type_size, &sec_dalloc_noflush);
CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_flush", i, &sec_dalloc_flush,
size_t);
emitter_kv(emitter, "pac_sec_dalloc_flush",
"Dalloc calls with flush in PAC small extent cache",
emitter_type_size, &sec_dalloc_flush);
}
static void
stats_arena_hpa_shard_counters_print(
emitter_t *emitter, unsigned i, uint64_t uptime) {
@ -1567,6 +1598,10 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
GET_AND_EMIT_MEM_STAT(extent_avail)
#undef GET_AND_EMIT_MEM_STAT
if (opt_pac_sec_opts.nshards > 0) {
stats_arena_pac_sec_print(emitter, i);
}
if (mutex) {
stats_arena_mutexes_print(emitter, i, uptime);
}
@ -1761,6 +1796,9 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_SIZE_T("hpa_sec_nshards")
OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
OPT_WRITE_SIZE_T("experimental_pac_sec_nshards")
OPT_WRITE_SIZE_T("experimental_pac_sec_max_alloc")
OPT_WRITE_SIZE_T("experimental_pac_sec_max_bytes")
OPT_WRITE_BOOL("huge_arena_pac_thp")
OPT_WRITE_CHAR_P("metadata_thp")
OPT_WRITE_INT64("mutex_max_spin")

View file

@ -181,7 +181,7 @@ static const size_t num_global_mutexes = sizeof(global_mutex_names)
static const char *arena_mutex_names[] = {"large", "extent_avail",
"extents_dirty", "extents_muzzy", "extents_retained", "decay_dirty",
"decay_muzzy", "base", "tcache_list", "hpa_shard", "hpa_shard_grow",
"hpa_sec"};
"hpa_sec", "pac_sec"};
static const size_t num_arena_mutexes = sizeof(arena_mutex_names)
/ sizeof(arena_mutex_names[0]);

View file

@ -312,6 +312,9 @@ TEST_BEGIN(test_mallctl_opt) {
TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
TEST_MALLCTL_OPT(size_t, experimental_pac_sec_nshards, always);
TEST_MALLCTL_OPT(size_t, experimental_pac_sec_max_alloc, always);
TEST_MALLCTL_OPT(size_t, experimental_pac_sec_max_bytes, always);
TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always);

View file

@ -0,0 +1,368 @@
#include "test/jemalloc_test.h"
/*
* Use 1 shard for deterministic stat assertions and a small max_bytes so
* overflow triggers quickly. Background threads are disabled to prevent
* asynchronous decay from interfering with precise stat checks.
*/
const char *malloc_conf =
"experimental_pac_sec_nshards:1,background_thread:false";
static sec_opts_t saved_pac_sec_opts;
static void
pac_sec_test_opts_set(void) {
saved_pac_sec_opts = opt_pac_sec_opts;
/*
* The test requests SC_LARGE_MINCLASS-sized allocations; PAC may see
* sz_large_pad on top. Configure these directly so the test remains
* valid across page sizes.
*/
size_t test_extent_size = SC_LARGE_MINCLASS + sz_large_pad;
opt_pac_sec_opts.max_alloc = test_extent_size;
opt_pac_sec_opts.max_bytes = 4 * test_extent_size;
}
static void
pac_sec_test_opts_restore(void) {
opt_pac_sec_opts = saved_pac_sec_opts;
}
static void *
pinned_extent_alloc(extent_hooks_t *extent_hooks, void *new_addr,
size_t size, size_t alignment, bool *zero, bool *commit,
unsigned arena_ind) {
void *ret = ehooks_default_extent_hooks.alloc(
(extent_hooks_t *)&ehooks_default_extent_hooks, new_addr, size,
alignment, zero, commit, arena_ind);
if (ret == NULL) {
return NULL;
}
if (!*commit) {
if (ehooks_default_extent_hooks.commit != NULL
&& ehooks_default_extent_hooks.commit(
(extent_hooks_t *)&ehooks_default_extent_hooks, ret,
size, 0, size, arena_ind)) {
ehooks_default_extent_hooks.dalloc(
(extent_hooks_t *)&ehooks_default_extent_hooks, ret,
size, *commit, arena_ind);
return NULL;
}
*commit = true;
}
return (void *)((uintptr_t)ret | EXTENT_ALLOC_FLAG_PINNED);
}
static void
pinned_extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
bool committed, unsigned arena_ind) {
ehooks_default_extent_hooks.destroy(
(extent_hooks_t *)&ehooks_default_extent_hooks, addr, size,
committed, arena_ind);
}
static bool
pinned_extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
return ehooks_default_extent_hooks.split(
(extent_hooks_t *)&ehooks_default_extent_hooks, addr, size, size_a,
size_b, committed, arena_ind);
}
static bool
pinned_extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
return ehooks_default_extent_hooks.merge(
(extent_hooks_t *)&ehooks_default_extent_hooks, addr_a, size_a,
addr_b, size_b, committed, arena_ind);
}
static extent_hooks_t pinned_hooks = {
pinned_extent_alloc,
NULL, /* dalloc */
pinned_extent_destroy,
NULL, /* commit */
NULL, /* decommit */
NULL, /* purge_lazy */
NULL, /* purge_forced */
pinned_extent_split,
pinned_extent_merge
};
static size_t
read_stat(unsigned arena_ind, const char *field) {
char cmd[128];
size_t val;
size_t sz = sizeof(val);
uint64_t epoch = 1;
sz = sizeof(epoch);
expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
"Unexpected mallctl failure");
sz = sizeof(val);
snprintf(cmd, sizeof(cmd), "stats.arenas.%u.pac_sec_%s",
arena_ind, field);
expect_d_eq(mallctl(cmd, (void *)&val, &sz, NULL, 0), 0,
"Unexpected mallctl failure reading pac_sec stat");
return val;
}
static size_t
read_pinned_npages(unsigned arena_ind) {
tsd_t *tsd = tsd_fetch();
arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
expect_ptr_not_null(arena, "arena_get failed");
return ecache_npages_get(&arena->pa_shard.pac.ecache_pinned);
}
static void
dirty_decay_ms_set(unsigned arena_ind, ssize_t decay_ms) {
char cmd[64];
snprintf(cmd, sizeof(cmd), "arena.%u.dirty_decay_ms", arena_ind);
expect_d_eq(mallctl(cmd, NULL, NULL, (void *)&decay_ms,
sizeof(decay_ms)), 0, "dirty_decay_ms mallctl failed");
}
TEST_BEGIN(test_pac_sec_alloc_dalloc_cycle) {
test_skip_if(!config_stats);
test_skip_if(opt_hpa);
pac_sec_test_opts_set();
unsigned arena_ind;
size_t sz = sizeof(arena_ind);
expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
0, "Unexpected arenas.create failure");
int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
size_t alloc_size = SC_LARGE_MINCLASS;
/*
* Read the configured max_bytes so we can compute capacity.
* With nshards=1, PAC SEC caches extents one at a time until bytes_cur
* reaches max_bytes.
*/
size_t max_bytes;
sz = sizeof(max_bytes);
expect_d_eq(mallctl("opt.experimental_pac_sec_max_bytes",
(void *)&max_bytes, &sz, NULL, 0), 0,
"Unexpected mallctl failure");
size_t capacity = max_bytes / alloc_size;
expect_zu_gt(capacity, 0, "SEC capacity must be > 0 for this test");
/* Step 1: First alloc — SEC miss, served from ecache or new mapping. */
void *p1 = mallocx(alloc_size, flags);
expect_ptr_not_null(p1, "mallocx failed");
expect_zu_eq(read_stat(arena_ind, "misses"), 1,
"first alloc should miss SEC");
expect_zu_eq(read_stat(arena_ind, "hits"), 0,
"no hits yet");
expect_zu_eq(read_stat(arena_ind, "bytes"), 0,
"SEC should be empty (extent is active)");
/* Step 2: Free p1 — SEC absorbs without flush. */
dallocx(p1, flags);
size_t cached_after_one = read_stat(arena_ind, "bytes");
expect_zu_gt(cached_after_one, 0,
"SEC should cache the freed extent");
/* Actual extent size may exceed alloc_size due to size class rounding. */
size_t extent_size = cached_after_one;
expect_zu_eq(read_stat(arena_ind, "dalloc_noflush"), 1,
"one dalloc absorbed without flush");
expect_zu_eq(read_stat(arena_ind, "dalloc_flush"), 0,
"no flush yet");
/* Recompute capacity based on actual extent size. */
capacity = max_bytes / extent_size;
expect_zu_gt(capacity, 0, "SEC capacity should be positive");
/* Step 3: Re-alloc same size — SEC hit, reuses cached extent. */
void *p2 = mallocx(alloc_size, flags);
expect_ptr_not_null(p2, "mallocx failed");
expect_zu_eq(read_stat(arena_ind, "hits"), 1,
"second alloc should hit SEC");
expect_zu_eq(read_stat(arena_ind, "misses"), 1,
"misses should not increase");
expect_zu_eq(read_stat(arena_ind, "bytes"), 0,
"SEC should be empty after hit");
dallocx(p2, flags);
/*
* Step 4: Allocate (capacity + 2) extents, then free them all.
* The first `capacity` frees fill SEC; remaining frees overflow
* and flush cold extents to ecache_dirty.
*/
size_t nallocs = capacity + 2;
void **ptrs = mallocx(nallocs * sizeof(void *),
MALLOCX_TCACHE_NONE);
expect_ptr_not_null(ptrs, "metadata alloc failed");
for (size_t i = 0; i < nallocs; i++) {
ptrs[i] = mallocx(alloc_size, flags);
expect_ptr_not_null(ptrs[i], "mallocx %zu failed", i);
}
for (size_t i = 0; i < nallocs; i++) {
dallocx(ptrs[i], flags);
}
size_t noflush = read_stat(arena_ind, "dalloc_noflush");
size_t flush = read_stat(arena_ind, "dalloc_flush");
size_t cached_bytes = read_stat(arena_ind, "bytes");
expect_zu_gt(noflush, 1,
"most dallocs should be absorbed");
expect_zu_gt(flush, 0,
"overflow should trigger at least one flush");
expect_zu_gt(cached_bytes, 0,
"SEC should still hold extents after partial flush");
expect_zu_le(cached_bytes, max_bytes,
"SEC should not exceed max_bytes");
/*
* Step 5: Next alloc should be a SEC hit (cache is populated),
* and should not increase the miss counter.
*/
size_t misses_before = read_stat(arena_ind, "misses");
void *p3 = mallocx(alloc_size, flags);
expect_ptr_not_null(p3, "mallocx failed");
expect_zu_eq(read_stat(arena_ind, "misses"), misses_before,
"alloc from populated SEC should not miss");
dallocx(p3, flags);
/*
* Step 6: Purge flushes SEC entirely.
*/
char cmd[64];
snprintf(cmd, sizeof(cmd), "arena.%u.purge", arena_ind);
expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0,
"purge failed");
expect_zu_eq(read_stat(arena_ind, "bytes"), 0,
"SEC should be empty after purge");
/*
* Step 7: Alloc after purge must miss SEC again.
*/
size_t hits_before = read_stat(arena_ind, "hits");
void *p4 = mallocx(alloc_size, flags);
expect_ptr_not_null(p4, "mallocx failed");
expect_zu_eq(read_stat(arena_ind, "hits"), hits_before,
"alloc after purge should miss SEC");
dallocx(p4, flags);
dallocx(ptrs, MALLOCX_TCACHE_NONE);
snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_ind);
expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0,
"arena destroy failed");
pac_sec_test_opts_restore();
}
TEST_END
TEST_BEGIN(test_pac_sec_dirty_decay_toggle) {
test_skip_if(!config_stats);
test_skip_if(opt_hpa);
pac_sec_test_opts_set();
unsigned arena_ind;
size_t sz = sizeof(arena_ind);
expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
0, "Unexpected arenas.create failure");
int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
size_t alloc_size = SC_LARGE_MINCLASS;
void *p = mallocx(alloc_size, flags);
expect_ptr_not_null(p, "mallocx failed");
dallocx(p, flags);
expect_zu_gt(read_stat(arena_ind, "bytes"), 0,
"SEC should cache when dirty decay is enabled");
dirty_decay_ms_set(arena_ind, 0);
expect_zu_eq(read_stat(arena_ind, "bytes"), 0,
"disabling dirty decay should flush SEC");
p = mallocx(alloc_size, flags);
expect_ptr_not_null(p, "mallocx failed");
dallocx(p, flags);
expect_zu_eq(read_stat(arena_ind, "bytes"), 0,
"SEC should stay disabled while dirty decay is zero");
dirty_decay_ms_set(arena_ind, 100);
p = mallocx(alloc_size, flags);
expect_ptr_not_null(p, "mallocx failed");
dallocx(p, flags);
expect_zu_gt(read_stat(arena_ind, "bytes"), 0,
"SEC should be usable after dirty decay is re-enabled");
char cmd[64];
snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_ind);
expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0,
"arena destroy failed");
pac_sec_test_opts_restore();
}
TEST_END
TEST_BEGIN(test_pac_sec_flush_pinned) {
test_skip_if(!config_stats);
test_skip_if(opt_hpa);
pac_sec_test_opts_set();
unsigned arena_ind;
size_t sz = sizeof(arena_ind);
extent_hooks_t *hooks_ptr = &pinned_hooks;
expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
&hooks_ptr, sizeof(hooks_ptr)), 0,
"Unexpected arenas.create failure");
int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
size_t alloc_size = SC_LARGE_MINCLASS;
size_t max_bytes;
sz = sizeof(max_bytes);
expect_d_eq(mallctl("opt.experimental_pac_sec_max_bytes",
(void *)&max_bytes, &sz, NULL, 0), 0,
"Unexpected mallctl failure");
void *p = mallocx(alloc_size, flags);
expect_ptr_not_null(p, "mallocx failed");
dallocx(p, flags);
size_t sec_bytes = read_stat(arena_ind, "bytes");
expect_zu_gt(sec_bytes, 0, "SEC should cache the pinned extent");
size_t extent_size = sec_bytes;
size_t nallocs = max_bytes / extent_size + 2;
void **ptrs = mallocx(nallocs * sizeof(void *), MALLOCX_TCACHE_NONE);
expect_ptr_not_null(ptrs, "metadata alloc failed");
for (size_t i = 0; i < nallocs; i++) {
ptrs[i] = mallocx(alloc_size, flags);
expect_ptr_not_null(ptrs[i], "mallocx %zu failed", i);
}
size_t pinned_before_overflow = read_pinned_npages(arena_ind);
for (size_t i = 0; i < nallocs; i++) {
dallocx(ptrs[i], flags);
}
expect_zu_gt(read_pinned_npages(arena_ind), pinned_before_overflow,
"SEC overflow should flush pinned extents to ecache_pinned");
size_t pinned_before_purge = read_pinned_npages(arena_ind);
char cmd[64];
snprintf(cmd, sizeof(cmd), "arena.%u.purge", arena_ind);
expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0,
"purge failed");
expect_zu_eq(read_stat(arena_ind, "bytes"), 0,
"SEC should be empty after purge");
expect_zu_gt(read_pinned_npages(arena_ind), pinned_before_purge,
"PAC SEC purge should flush pinned extents to ecache_pinned");
dallocx(ptrs, MALLOCX_TCACHE_NONE);
snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_ind);
expect_d_eq(mallctl(cmd, NULL, NULL, NULL, 0), 0,
"arena destroy failed");
pac_sec_test_opts_restore();
}
TEST_END
int
main(void) {
return test_no_reentrancy(
test_pac_sec_alloc_dalloc_cycle, test_pac_sec_dirty_decay_toggle,
test_pac_sec_flush_pinned);
}

View file

@ -31,6 +31,32 @@ destroy_test_data(tsdn_t *tsdn, test_data_t *tdata) {
base_delete(tsdn, tdata->base);
}
static uint8_t
test_sec_shard(tsdn_t *tsdn, sec_t *sec) {
if (tsdn_null(tsdn) || sec->opts.nshards <= 1) {
return 0;
}
tsd_t *tsd = tsdn_tsd(tsdn);
return sec_shard_pick(tsd, sec, tsd_sec_shardp_get(tsd));
}
static edata_t *
sec_test_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
return sec_alloc(tsdn, sec, size, test_sec_shard(tsdn, sec));
}
static void
sec_test_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
edata_list_active_t *result, size_t nallocs) {
sec_fill(tsdn, sec, size, result, nallocs, test_sec_shard(tsdn, sec));
}
static void
sec_test_dalloc(tsdn_t *tsdn, sec_t *sec,
edata_list_active_t *dalloc_list) {
sec_dalloc(tsdn, sec, dalloc_list, test_sec_shard(tsdn, sec));
}
TEST_BEGIN(test_max_nshards_option_zero) {
test_data_t tdata;
sec_opts_t opts;
@ -41,7 +67,7 @@ TEST_BEGIN(test_max_nshards_option_zero) {
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
test_data_init(tsdn, &tdata, &opts);
edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, PAGE);
expect_ptr_null(edata, "SEC should be disabled when nshards==0");
destroy_test_data(tsdn, &tdata);
}
@ -57,7 +83,7 @@ TEST_BEGIN(test_max_alloc_option_too_small) {
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
test_data_init(tsdn, &tdata, &opts);
edata_t *edata = sec_alloc(tsdn, &tdata.sec, 3 * PAGE);
edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, 3 * PAGE);
expect_ptr_null(edata, "max_alloc is 2*PAGE, should not alloc 3*PAGE");
destroy_test_data(tsdn, &tdata);
}
@ -82,7 +108,7 @@ TEST_BEGIN(test_sec_fill) {
edata_size_set(&edata2, PAGE);
edata_list_active_append(&allocs, &edata1);
edata_list_active_append(&allocs, &edata2);
sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 2);
sec_test_fill(tsdn, &tdata.sec, PAGE, &allocs, 2);
sec_stats_merge(tsdn, &tdata.sec, &stats);
expect_zu_eq(stats.bytes, 2 * PAGE, "SEC should have what we filled");
expect_true(edata_list_active_empty(&allocs),
@ -97,7 +123,7 @@ TEST_BEGIN(test_sec_fill) {
edata_list_active_append(&allocs, &edata3);
edata_list_active_append(&allocs, &edata4);
edata_list_active_append(&allocs, &edata5);
sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 3);
sec_test_fill(tsdn, &tdata.sec, PAGE, &allocs, 3);
sec_stats_merge(tsdn, &tdata.sec, &stats);
expect_zu_eq(
stats.bytes, opts.max_bytes, "SEC can't have more than max_bytes");
@ -118,7 +144,7 @@ TEST_BEGIN(test_sec_alloc) {
test_data_init(tsdn, &tdata, &opts);
/* Alloc from empty cache returns NULL */
edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, PAGE);
expect_ptr_null(edata, "SEC is empty");
/* Place two extents into the sec */
@ -127,11 +153,11 @@ TEST_BEGIN(test_sec_alloc) {
edata_t edata1, edata2;
edata_size_set(&edata1, PAGE);
edata_list_active_append(&allocs, &edata1);
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_true(edata_list_active_empty(&allocs), "");
edata_size_set(&edata2, PAGE);
edata_list_active_append(&allocs, &edata2);
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_true(edata_list_active_empty(&allocs), "");
sec_stats_t stats = {0};
@ -141,20 +167,20 @@ TEST_BEGIN(test_sec_alloc) {
stats.bytes = 0;
/* Most recently cached extent should be used on alloc */
edata = sec_alloc(tsdn, &tdata.sec, PAGE);
edata = sec_test_alloc(tsdn, &tdata.sec, PAGE);
expect_ptr_eq(edata, &edata2, "edata2 is most recently used");
sec_stats_merge(tsdn, &tdata.sec, &stats);
expect_zu_eq(stats.bytes, PAGE, "One more item left in the cache");
stats.bytes = 0;
/* Alloc can still get extents from cache */
edata = sec_alloc(tsdn, &tdata.sec, PAGE);
edata = sec_test_alloc(tsdn, &tdata.sec, PAGE);
expect_ptr_eq(edata, &edata1, "SEC is not empty");
sec_stats_merge(tsdn, &tdata.sec, &stats);
expect_zu_eq(stats.bytes, 0, "No more items after last one is popped");
/* And cache is empty again */
edata = sec_alloc(tsdn, &tdata.sec, PAGE);
edata = sec_test_alloc(tsdn, &tdata.sec, PAGE);
expect_ptr_null(edata, "SEC is empty");
destroy_test_data(tsdn, &tdata);
}
@ -178,7 +204,7 @@ TEST_BEGIN(test_sec_dalloc) {
edata_list_active_append(&allocs, &edata1);
/* SEC is empty, we return one pointer to it */
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_true(
edata_list_active_empty(&allocs), "extents should be consumed");
@ -187,7 +213,7 @@ TEST_BEGIN(test_sec_dalloc) {
edata_size_set(&edata2, PAGE);
edata_list_active_append(&allocs, &edata2);
/* Sec can take one more as well and we will be exactly at max_bytes */
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_true(
edata_list_active_empty(&allocs), "extents should be consumed");
@ -205,7 +231,7 @@ TEST_BEGIN(test_sec_dalloc) {
edata_t edata3;
edata_size_set(&edata3, PAGE);
edata_list_active_append(&allocs, &edata3);
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_false(
edata_list_active_empty(&allocs), "extents should NOT be consumed");
expect_ptr_ne(
@ -236,7 +262,7 @@ TEST_BEGIN(test_max_bytes_too_low) {
edata_list_active_append(&allocs, &edata1);
/* SEC is empty, we return one pointer to it */
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_false(
edata_list_active_empty(&allocs), "extents should not be consumed");
destroy_test_data(tsdn, &tdata);
@ -266,9 +292,9 @@ TEST_BEGIN(test_sec_flush) {
edata_size_set(&edata4[i], 4 * PAGE);
edata_list_active_append(&allocs1, &edata1[i]);
sec_dalloc(tsdn, &tdata.sec, &allocs1);
sec_test_dalloc(tsdn, &tdata.sec, &allocs1);
edata_list_active_append(&allocs4, &edata4[i]);
sec_dalloc(tsdn, &tdata.sec, &allocs4);
sec_test_dalloc(tsdn, &tdata.sec, &allocs4);
}
sec_stats_t stats = {0};
@ -305,11 +331,11 @@ TEST_BEGIN(test_sec_stats) {
edata_list_active_append(&allocs, &edata1);
/* SEC is empty alloc fails. nmisses==1 */
edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
edata_t *edata = sec_test_alloc(tsdn, &tdata.sec, PAGE);
expect_ptr_null(edata, "SEC should be empty");
/* SEC is empty, we return one pointer to it. ndalloc_noflush=1 */
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_true(
edata_list_active_empty(&allocs), "extents should be consumed");
@ -317,7 +343,7 @@ TEST_BEGIN(test_sec_stats) {
edata_size_set(&edata2, PAGE);
edata_list_active_append(&allocs, &edata2);
/* Sec can take one more, so ndalloc_noflush=2 */
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_true(
edata_list_active_empty(&allocs), "extents should be consumed");
@ -337,7 +363,7 @@ TEST_BEGIN(test_sec_stats) {
edata_t edata3;
edata_size_set(&edata3, PAGE);
edata_list_active_append(&allocs, &edata3);
sec_dalloc(tsdn, &tdata.sec, &allocs);
sec_test_dalloc(tsdn, &tdata.sec, &allocs);
expect_false(
edata_list_active_empty(&allocs), "extents should NOT be consumed");
sec_stats_merge(tsdn, &tdata.sec, &stats);
@ -379,12 +405,12 @@ thd_trylock_test(void *varg) {
*shard_idx = arg->preferred_shard;
/* Fill the shard with some extents */
sec_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz);
sec_test_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz);
expect_true(edata_list_active_empty(&arg->fill_list), "");
for (unsigned i = 0; i < NOPS_PER_THREAD; i++) {
/* Try to allocate from SEC */
arg->edata[i] = sec_alloc(tsdn, arg->sec, PAGE);
arg->edata[i] = sec_test_alloc(tsdn, arg->sec, PAGE);
if (arg->edata[i] != NULL) {
expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
}
@ -397,7 +423,7 @@ thd_trylock_test(void *varg) {
arg->nallocs++;
edata_list_active_append(&list, arg->edata[i]);
expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
sec_dalloc(tsdn, arg->sec, &list);
sec_test_dalloc(tsdn, arg->sec, &list);
if (edata_list_active_empty(&list)) {
arg->ndallocs++;
} else {