Use SEC in PAC to reduce lock contention on the ecaches

Add a small extent cache in front of the PAC ecaches. Allocs and dallocs
that fit are served from per-shard SEC bins without taking the ecache
mutex; overflow falls through to the backing ecaches, including
ecache_pinned for pinned extents.

The feature is gated behind experimental_pac_sec_nshards (default 0,
disabled). To support independent HPA and PAC SEC instances,
sec_alloc/sec_dalloc/sec_fill take an explicit shard argument, with HPA
and PAC using separate TSD shard slots.
This commit is contained in:
Bin Liu 2026-05-19 00:11:15 -07:00
parent 11b99d7a21
commit 6b13adf375
19 changed files with 680 additions and 59 deletions

View file

@ -953,6 +953,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
"hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.nshards,
"experimental_pac_sec_nshards", 0, 0,
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_alloc,
"experimental_pac_sec_max_alloc", PAGE,
USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
CONF_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_bytes,
"experimental_pac_sec_max_bytes",
SEC_OPTS_MAX_BYTES_DEFAULT, 0,
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
if (CONF_MATCH("slab_sizes")) {
if (CONF_MATCH_VALUE("default")) {

View file

@ -115,6 +115,9 @@ CTL_PROTO(opt_hpa_dirty_mult)
CTL_PROTO(opt_hpa_sec_nshards)
CTL_PROTO(opt_hpa_sec_max_alloc)
CTL_PROTO(opt_hpa_sec_max_bytes)
CTL_PROTO(opt_experimental_pac_sec_nshards)
CTL_PROTO(opt_experimental_pac_sec_max_alloc)
CTL_PROTO(opt_experimental_pac_sec_max_bytes)
CTL_PROTO(opt_huge_arena_pac_thp)
CTL_PROTO(opt_metadata_thp)
CTL_PROTO(opt_retain)
@ -352,6 +355,11 @@ CTL_PROTO(stats_arenas_i_hpa_sec_misses)
CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush)
CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush)
CTL_PROTO(stats_arenas_i_hpa_sec_overfills)
CTL_PROTO(stats_arenas_i_pac_sec_bytes)
CTL_PROTO(stats_arenas_i_pac_sec_hits)
CTL_PROTO(stats_arenas_i_pac_sec_misses)
CTL_PROTO(stats_arenas_i_pac_sec_dalloc_flush)
CTL_PROTO(stats_arenas_i_pac_sec_dalloc_noflush)
INDEX_PROTO(stats_arenas_i)
CTL_PROTO(stats_allocated)
CTL_PROTO(stats_active)
@ -495,6 +503,12 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
{NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
{NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
{NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
{NAME("experimental_pac_sec_nshards"),
CTL(opt_experimental_pac_sec_nshards)},
{NAME("experimental_pac_sec_max_alloc"),
CTL(opt_experimental_pac_sec_max_alloc)},
{NAME("experimental_pac_sec_max_bytes"),
CTL(opt_experimental_pac_sec_max_bytes)},
{NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
{NAME("metadata_thp"), CTL(opt_metadata_thp)},
{NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
@ -859,6 +873,12 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
CTL(stats_arenas_i_hpa_sec_dalloc_noflush)},
{NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)},
{NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)},
{NAME("pac_sec_bytes"), CTL(stats_arenas_i_pac_sec_bytes)},
{NAME("pac_sec_hits"), CTL(stats_arenas_i_pac_sec_hits)},
{NAME("pac_sec_misses"), CTL(stats_arenas_i_pac_sec_misses)},
{NAME("pac_sec_dalloc_noflush"),
CTL(stats_arenas_i_pac_sec_dalloc_noflush)},
{NAME("pac_sec_dalloc_flush"), CTL(stats_arenas_i_pac_sec_dalloc_flush)},
{NAME("small"), CHILD(named, stats_arenas_i_small)},
{NAME("large"), CHILD(named, stats_arenas_i_large)},
{NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
@ -1219,6 +1239,10 @@ ctl_arena_stats_sdmerge(
&sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
&astats->astats.pa_shard_stats.pac_stats.abandoned_vm);
sec_stats_accum(
&sdstats->astats.pa_shard_stats.pac_stats.pac_sec_stats,
&astats->astats.pa_shard_stats.pac_stats.pac_sec_stats);
sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
sdstats->astats.tcache_stashed_bytes +=
astats->astats.tcache_stashed_bytes;
@ -2208,6 +2232,12 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
CTL_RO_NL_GEN(opt_experimental_pac_sec_nshards,
opt_pac_sec_opts.nshards, size_t)
CTL_RO_NL_GEN(opt_experimental_pac_sec_max_alloc,
opt_pac_sec_opts.max_alloc, size_t)
CTL_RO_NL_GEN(opt_experimental_pac_sec_max_bytes,
opt_pac_sec_opts.max_bytes, size_t)
CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
CTL_RO_NL_GEN(
opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
@ -3881,6 +3911,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush,
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills,
arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_bytes,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.bytes, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_hits,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nhits, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_misses,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nmisses, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_flush,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_flush, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_noflush,
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_noflush, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
arenas_i(mib[2])->astats->allocated_small, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,

View file

@ -16,6 +16,18 @@ const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
bool opt_experimental_hpa_start_huge_if_thp_always = true;
bool opt_experimental_hpa_enforce_hugify = false;
static inline uint8_t
hpa_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
if (sec->opts.nshards <= 1) {
return 0;
}
if (tsdn_null(tsdn)) {
return 0;
}
tsd_t *tsd = tsdn_tsd(tsdn);
return sec_shard_pick(tsd, sec, tsd_sec_shardp_get(tsd));
}
bool
hpa_hugepage_size_exceeds_limit(void) {
return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
@ -947,7 +959,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment,
&& (size > shard->opts.slab_max_alloc)) {
return NULL;
}
edata_t *edata = sec_alloc(tsdn, &shard->sec, size);
edata_t *edata = sec_alloc(tsdn, &shard->sec, size,
hpa_sec_shard_pick(tsdn, &shard->sec));
if (edata != NULL) {
return edata;
}
@ -968,7 +981,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment,
}
if (nsuccess > 0) {
assert(sec_size_supported(&shard->sec, size));
sec_fill(tsdn, &shard->sec, size, &results, nsuccess);
sec_fill(tsdn, &shard->sec, size, &results, nsuccess,
hpa_sec_shard_pick(tsdn, &shard->sec));
/* Unlikely rollback in case of overfill */
if (!edata_list_active_empty(&results)) {
hpa_dalloc_batch(
@ -1075,7 +1089,8 @@ hpa_dalloc(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata,
edata_list_active_init(&dalloc_list);
edata_list_active_append(&dalloc_list, edata);
sec_dalloc(tsdn, &shard->sec, &dalloc_list);
sec_dalloc(tsdn, &shard->sec, &dalloc_list,
hpa_sec_shard_pick(tsdn, &shard->sec));
if (edata_list_active_empty(&dalloc_list)) {
/* sec consumed the pointer */
*deferred_work_generated = false;

View file

@ -181,6 +181,9 @@ size_t opt_calloc_madvise_threshold = CALLOC_MADVISE_THRESHOLD_DEFAULT;
bool opt_hpa = false;
hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
sec_opts_t opt_pac_sec_opts = {0,
(32 * 1024) > (PAGE * 2) ? (32 * 1024) : (PAGE * 2),
SEC_OPTS_MAX_BYTES_DEFAULT};
/* False should be the common case. Set to true to trigger initialization. */
bool malloc_slow = true;

View file

@ -94,6 +94,7 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) {
pac_sec_flush(tsdn, &shard->pac);
if (shard->ever_used_hpa) {
hpa_shard_flush(tsdn, &shard->hpa);
}

View file

@ -16,6 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
sec_prefork2(tsdn, &shard->pac.sec);
if (shard->ever_used_hpa) {
hpa_shard_prefork2(tsdn, &shard->hpa);
}
@ -53,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
ecache_postfork_parent(tsdn, &shard->pac.ecache_pinned);
malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
sec_postfork_parent(tsdn, &shard->pac.sec);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
@ -68,6 +70,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
ecache_postfork_child(tsdn, &shard->pac.ecache_pinned);
malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
sec_postfork_child(tsdn, &shard->pac.sec);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
@ -179,6 +182,9 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
if (shard->ever_used_hpa) {
hpa_shard_stats_merge(tsdn, &shard->hpa, hpa_stats_out);
}
sec_stats_merge(tsdn, &shard->pac.sec,
&pa_shard_stats_out->pac_stats.pac_sec_stats);
}
static void
@ -207,6 +213,9 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
sec_mutex_stats_read(tsdn, &shard->pac.sec,
&mutex_prof_data[arena_prof_mutex_pac_sec]);
if (shard->ever_used_hpa) {
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->hpa.mtx, arena_prof_mutex_hpa_shard);

View file

@ -4,6 +4,18 @@
#include "jemalloc/internal/pac.h"
#include "jemalloc/internal/san.h"
static inline uint8_t
pac_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
if (sec->opts.nshards <= 1) {
return 0;
}
if (tsdn_null(tsdn)) {
return 0;
}
tsd_t *tsd = tsdn_tsd(tsdn);
return sec_shard_pick(tsd, sec, tsd_pac_sec_shardp_get(tsd));
}
static inline void
pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay,
pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
@ -92,6 +104,17 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
pac->stats_mtx = stats_mtx;
atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
if (sec_init(tsdn, &pac->sec, base, &opt_pac_sec_opts)) {
/* Fall back to no SEC on allocation failure. */
pac->sec.opts.nshards = 0;
}
if (!sec_is_used(&pac->sec) || dirty_decay_ms == 0) {
atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED);
} else {
atomic_store_zu(&pac->sec_max_alloc,
pac->sec.opts.max_alloc, ATOMIC_RELAXED);
}
return false;
}
@ -133,6 +156,15 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
edata_t *edata = NULL;
if (!guarded && !zero && alignment <= PAGE
&& size <= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) {
edata = sec_alloc(tsdn, &pac->sec, size,
pac_sec_shard_pick(tsdn, &pac->sec));
if (edata != NULL) {
return edata;
}
}
/*
* Guarded allocations need surrounding guard pages, which the pinned
* pool does not maintain; skip ecache_pinned in that case.
@ -395,6 +427,32 @@ pac_dalloc(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
san_unguard_pages_two_sided(
tsdn, ehooks, edata, pac->emap);
}
} else if (edata_size_get(edata)
<= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) {
edata_zeroed_set(edata, false);
edata_list_active_t dalloc_list;
edata_list_active_init(&dalloc_list);
edata_list_active_append(&dalloc_list, edata);
sec_dalloc(tsdn, &pac->sec, &dalloc_list,
pac_sec_shard_pick(tsdn, &pac->sec));
if (edata_list_active_empty(&dalloc_list)) {
*deferred_work_generated = false;
return;
}
/* Flush overflow extents to their backing ecaches. */
bool any_deferred_work = false;
edata_t *flush_edata;
while ((flush_edata =
edata_list_active_first(&dalloc_list)) != NULL) {
edata_list_active_remove(&dalloc_list,
flush_edata);
if (!edata_pinned_get(flush_edata)) {
any_deferred_work = true;
}
pac_ecache_dalloc(tsdn, pac, ehooks, flush_edata);
}
*deferred_work_generated = any_deferred_work;
return;
}
bool pinned = edata_pinned_get(edata);
@ -717,6 +775,13 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
return true;
}
bool update_pac_sec = state == extent_state_dirty
&& sec_is_used(&pac->sec);
if (update_pac_sec && decay_ms == 0) {
atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED);
pac_sec_flush(tsdn, pac);
}
malloc_mutex_lock(tsdn, &decay->mtx);
/*
* Restart decay backlog from scratch, which may cause many dirty pages
@ -732,6 +797,11 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness);
malloc_mutex_unlock(tsdn, &decay->mtx);
if (update_pac_sec && decay_ms != 0) {
atomic_store_zu(&pac->sec_max_alloc,
pac->sec.opts.max_alloc, ATOMIC_RELAXED);
}
return false;
}
@ -746,12 +816,11 @@ pac_decay_ms_get(pac_t *pac, extent_state_t state) {
void
pac_reset(tsdn_t *tsdn, pac_t *pac) {
pac_sec_flush(tsdn, pac);
/*
* No-op for now; purging is still done at the arena-level. It should
* get moved in here, though.
* Purging is still done at the arena-level. It should get moved in
* here, though.
*/
(void)tsdn;
(void)pac;
}
void
@ -816,3 +885,16 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) {
extent_destroy_wrapper(tsdn, pac, ehooks, edata);
}
}
void
pac_sec_flush(tsdn_t *tsdn, pac_t *pac) {
ehooks_t *ehooks = pac_ehooks_get(pac);
edata_list_active_t to_flush;
edata_list_active_init(&to_flush);
sec_flush(tsdn, &pac->sec, &to_flush);
edata_t *edata;
while ((edata = edata_list_active_first(&to_flush)) != NULL) {
edata_list_active_remove(&to_flush, edata);
pac_ecache_dalloc(tsdn, pac, ehooks, edata);
}
}

View file

@ -25,6 +25,8 @@ sec_bin_init(sec_bin_t *bin) {
bool
sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
sec->opts = *opts;
sec->bins = NULL;
sec->npsizes = 0;
if (opts->nshards == 0) {
return false;
}
@ -57,18 +59,16 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
return false;
}
static uint8_t
sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
uint8_t
sec_shard_pick(tsd_t *tsd, sec_t *sec, uint8_t *idxp) {
/*
* Eventually, we should implement affinity, tracking source shard using
* the edata_t's newly freed up fields. For now, just randomly
* distribute across all shards.
*
* Callers must ensure sec->opts.nshards > 1.
*/
if (tsdn_null(tsdn)) {
return 0;
}
tsd_t *tsd = tsdn_tsd(tsdn);
uint8_t *idxp = tsd_sec_shardp_get(tsd);
assert(sec->opts.nshards > 1);
if (*idxp == (uint8_t)-1) {
/*
* First use; initialize using the trick from Daniel Lemire's
@ -143,10 +143,10 @@ sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) {
static edata_t *
sec_multishard_trylock_alloc(
tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) {
tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind, uint8_t shard) {
assert(sec->opts.nshards > 0);
uint8_t cur_shard = sec_shard_pick(tsdn, sec);
uint8_t cur_shard = shard;
sec_bin_t *bin;
for (size_t i = 0; i < sec->opts.nshards; ++i) {
bin = sec_bin_pick(sec, cur_shard, pszind);
@ -170,7 +170,7 @@ sec_multishard_trylock_alloc(
* declaring a miss. That could recover more remote-shard hits under
* contention, but it also changes the allocation latency policy.
*/
assert(cur_shard == sec_shard_pick(tsdn, sec));
assert(cur_shard == shard);
bin = sec_bin_pick(sec, cur_shard, pszind);
malloc_mutex_lock(tsdn, &bin->mtx);
edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
@ -184,7 +184,7 @@ sec_multishard_trylock_alloc(
}
edata_t *
sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size, uint8_t shard) {
if (!sec_size_supported(sec, size)) {
return NULL;
}
@ -208,7 +208,7 @@ sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
/* frequent_reuse */ 1);
return edata;
}
return sec_multishard_trylock_alloc(tsdn, sec, size, pszind);
return sec_multishard_trylock_alloc(tsdn, sec, size, pszind, shard);
}
static void
@ -248,11 +248,11 @@ sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size,
static void
sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
pszind_t pszind, edata_list_active_t *dalloc_list) {
pszind_t pszind, edata_list_active_t *dalloc_list, uint8_t shard) {
assert(sec->opts.nshards > 0);
/* Try to dalloc in this threads bin first */
uint8_t cur_shard = sec_shard_pick(tsdn, sec);
uint8_t cur_shard = shard;
for (size_t i = 0; i < sec->opts.nshards; ++i) {
sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
@ -267,7 +267,7 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
}
}
/* No bin had alloc or had the extent */
assert(cur_shard == sec_shard_pick(tsdn, sec));
assert(cur_shard == shard);
sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
malloc_mutex_lock(tsdn, &bin->mtx);
sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
@ -275,13 +275,11 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
}
void
sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
if (!sec_is_used(sec)) {
return;
}
sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list,
uint8_t shard) {
edata_t *edata = edata_list_active_first(dalloc_list);
size_t size = edata_size_get(edata);
if (size > sec->opts.max_alloc) {
if (!sec_size_supported(sec, size)) {
return;
}
pszind_t pszind = sz_psz2ind(size);
@ -298,20 +296,21 @@ sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
malloc_mutex_unlock(tsdn, &bin->mtx);
return;
}
sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list);
sec_multishard_trylock_dalloc(
tsdn, sec, size, pszind, dalloc_list, shard);
}
void
sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result,
size_t nallocs) {
size_t nallocs, uint8_t shard) {
assert((size & PAGE_MASK) == 0);
assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc);
assert(sec_size_supported(sec, size));
assert(nallocs > 0);
pszind_t pszind = sz_psz2ind(size);
assert(pszind < sec->npsizes);
sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind);
sec_bin_t *bin = sec_bin_pick(sec, shard, pszind);
malloc_mutex_assert_not_owner(tsdn, &bin->mtx);
malloc_mutex_lock(tsdn, &bin->mtx);
size_t new_cached_bytes = nallocs * size;

View file

@ -835,6 +835,37 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
&sec_overfills);
}
static void
stats_arena_pac_sec_print(emitter_t *emitter, unsigned i) {
size_t sec_bytes;
size_t sec_hits;
size_t sec_misses;
size_t sec_dalloc_flush;
size_t sec_dalloc_noflush;
CTL_M2_GET("stats.arenas.0.pac_sec_bytes", i, &sec_bytes, size_t);
emitter_kv(emitter, "pac_sec_bytes",
"Bytes in PAC small extent cache",
emitter_type_size, &sec_bytes);
CTL_M2_GET("stats.arenas.0.pac_sec_hits", i, &sec_hits, size_t);
emitter_kv(emitter, "pac_sec_hits",
"Total hits in PAC small extent cache",
emitter_type_size, &sec_hits);
CTL_M2_GET("stats.arenas.0.pac_sec_misses", i, &sec_misses, size_t);
emitter_kv(emitter, "pac_sec_misses",
"Total misses in PAC small extent cache",
emitter_type_size, &sec_misses);
CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_noflush", i,
&sec_dalloc_noflush, size_t);
emitter_kv(emitter, "pac_sec_dalloc_noflush",
"Dalloc calls without flush in PAC small extent cache",
emitter_type_size, &sec_dalloc_noflush);
CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_flush", i, &sec_dalloc_flush,
size_t);
emitter_kv(emitter, "pac_sec_dalloc_flush",
"Dalloc calls with flush in PAC small extent cache",
emitter_type_size, &sec_dalloc_flush);
}
static void
stats_arena_hpa_shard_counters_print(
emitter_t *emitter, unsigned i, uint64_t uptime) {
@ -1567,6 +1598,10 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
GET_AND_EMIT_MEM_STAT(extent_avail)
#undef GET_AND_EMIT_MEM_STAT
if (opt_pac_sec_opts.nshards > 0) {
stats_arena_pac_sec_print(emitter, i);
}
if (mutex) {
stats_arena_mutexes_print(emitter, i, uptime);
}
@ -1761,6 +1796,9 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_SIZE_T("hpa_sec_nshards")
OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
OPT_WRITE_SIZE_T("experimental_pac_sec_nshards")
OPT_WRITE_SIZE_T("experimental_pac_sec_max_alloc")
OPT_WRITE_SIZE_T("experimental_pac_sec_max_bytes")
OPT_WRITE_BOOL("huge_arena_pac_thp")
OPT_WRITE_CHAR_P("metadata_thp")
OPT_WRITE_INT64("mutex_max_spin")