mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-06-04 11:14:20 +03:00
Use SEC in PAC to reduce lock contention on the ecaches
Add a small extent cache in front of the PAC ecaches. Allocs and dallocs that fit are served from per-shard SEC bins without taking the ecache mutex; overflow falls through to the backing ecaches, including ecache_pinned for pinned extents. The feature is gated behind experimental_pac_sec_nshards (default 0, disabled). To support independent HPA and PAC SEC instances, sec_alloc/sec_dalloc/sec_fill take an explicit shard argument, with HPA and PAC using separate TSD shard slots.
This commit is contained in:
parent
11b99d7a21
commit
6b13adf375
19 changed files with 680 additions and 59 deletions
11
src/conf.c
11
src/conf.c
|
|
@ -953,6 +953,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
|||
CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
|
||||
"hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
|
||||
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.nshards,
|
||||
"experimental_pac_sec_nshards", 0, 0,
|
||||
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_alloc,
|
||||
"experimental_pac_sec_max_alloc", PAGE,
|
||||
USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
|
||||
CONF_CHECK_MAX, true);
|
||||
CONF_HANDLE_SIZE_T(opt_pac_sec_opts.max_bytes,
|
||||
"experimental_pac_sec_max_bytes",
|
||||
SEC_OPTS_MAX_BYTES_DEFAULT, 0,
|
||||
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
|
||||
if (CONF_MATCH("slab_sizes")) {
|
||||
if (CONF_MATCH_VALUE("default")) {
|
||||
|
|
|
|||
41
src/ctl.c
41
src/ctl.c
|
|
@ -115,6 +115,9 @@ CTL_PROTO(opt_hpa_dirty_mult)
|
|||
CTL_PROTO(opt_hpa_sec_nshards)
|
||||
CTL_PROTO(opt_hpa_sec_max_alloc)
|
||||
CTL_PROTO(opt_hpa_sec_max_bytes)
|
||||
CTL_PROTO(opt_experimental_pac_sec_nshards)
|
||||
CTL_PROTO(opt_experimental_pac_sec_max_alloc)
|
||||
CTL_PROTO(opt_experimental_pac_sec_max_bytes)
|
||||
CTL_PROTO(opt_huge_arena_pac_thp)
|
||||
CTL_PROTO(opt_metadata_thp)
|
||||
CTL_PROTO(opt_retain)
|
||||
|
|
@ -352,6 +355,11 @@ CTL_PROTO(stats_arenas_i_hpa_sec_misses)
|
|||
CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush)
|
||||
CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush)
|
||||
CTL_PROTO(stats_arenas_i_hpa_sec_overfills)
|
||||
CTL_PROTO(stats_arenas_i_pac_sec_bytes)
|
||||
CTL_PROTO(stats_arenas_i_pac_sec_hits)
|
||||
CTL_PROTO(stats_arenas_i_pac_sec_misses)
|
||||
CTL_PROTO(stats_arenas_i_pac_sec_dalloc_flush)
|
||||
CTL_PROTO(stats_arenas_i_pac_sec_dalloc_noflush)
|
||||
INDEX_PROTO(stats_arenas_i)
|
||||
CTL_PROTO(stats_allocated)
|
||||
CTL_PROTO(stats_active)
|
||||
|
|
@ -495,6 +503,12 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
|
|||
{NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
|
||||
{NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
|
||||
{NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
|
||||
{NAME("experimental_pac_sec_nshards"),
|
||||
CTL(opt_experimental_pac_sec_nshards)},
|
||||
{NAME("experimental_pac_sec_max_alloc"),
|
||||
CTL(opt_experimental_pac_sec_max_alloc)},
|
||||
{NAME("experimental_pac_sec_max_bytes"),
|
||||
CTL(opt_experimental_pac_sec_max_bytes)},
|
||||
{NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
|
||||
{NAME("metadata_thp"), CTL(opt_metadata_thp)},
|
||||
{NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
|
||||
|
|
@ -859,6 +873,12 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
|
|||
CTL(stats_arenas_i_hpa_sec_dalloc_noflush)},
|
||||
{NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)},
|
||||
{NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)},
|
||||
{NAME("pac_sec_bytes"), CTL(stats_arenas_i_pac_sec_bytes)},
|
||||
{NAME("pac_sec_hits"), CTL(stats_arenas_i_pac_sec_hits)},
|
||||
{NAME("pac_sec_misses"), CTL(stats_arenas_i_pac_sec_misses)},
|
||||
{NAME("pac_sec_dalloc_noflush"),
|
||||
CTL(stats_arenas_i_pac_sec_dalloc_noflush)},
|
||||
{NAME("pac_sec_dalloc_flush"), CTL(stats_arenas_i_pac_sec_dalloc_flush)},
|
||||
{NAME("small"), CHILD(named, stats_arenas_i_small)},
|
||||
{NAME("large"), CHILD(named, stats_arenas_i_large)},
|
||||
{NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
|
||||
|
|
@ -1219,6 +1239,10 @@ ctl_arena_stats_sdmerge(
|
|||
&sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
|
||||
&astats->astats.pa_shard_stats.pac_stats.abandoned_vm);
|
||||
|
||||
sec_stats_accum(
|
||||
&sdstats->astats.pa_shard_stats.pac_stats.pac_sec_stats,
|
||||
&astats->astats.pa_shard_stats.pac_stats.pac_sec_stats);
|
||||
|
||||
sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
|
||||
sdstats->astats.tcache_stashed_bytes +=
|
||||
astats->astats.tcache_stashed_bytes;
|
||||
|
|
@ -2208,6 +2232,12 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
|
|||
CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
|
||||
CTL_RO_NL_GEN(opt_experimental_pac_sec_nshards,
|
||||
opt_pac_sec_opts.nshards, size_t)
|
||||
CTL_RO_NL_GEN(opt_experimental_pac_sec_max_alloc,
|
||||
opt_pac_sec_opts.max_alloc, size_t)
|
||||
CTL_RO_NL_GEN(opt_experimental_pac_sec_max_bytes,
|
||||
opt_pac_sec_opts.max_bytes, size_t)
|
||||
CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
|
||||
CTL_RO_NL_GEN(
|
||||
opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
|
||||
|
|
@ -3881,6 +3911,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush,
|
|||
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills,
|
||||
arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t)
|
||||
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_bytes,
|
||||
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.bytes, size_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_hits,
|
||||
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nhits, size_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_misses,
|
||||
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.nmisses, size_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_flush,
|
||||
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_flush, size_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_pac_sec_dalloc_noflush,
|
||||
arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.pac_sec_stats.total.ndalloc_noflush, size_t)
|
||||
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
|
||||
arenas_i(mib[2])->astats->allocated_small, size_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
|
||||
|
|
|
|||
21
src/hpa.c
21
src/hpa.c
|
|
@ -16,6 +16,18 @@ const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
|
|||
bool opt_experimental_hpa_start_huge_if_thp_always = true;
|
||||
bool opt_experimental_hpa_enforce_hugify = false;
|
||||
|
||||
static inline uint8_t
|
||||
hpa_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
|
||||
if (sec->opts.nshards <= 1) {
|
||||
return 0;
|
||||
}
|
||||
if (tsdn_null(tsdn)) {
|
||||
return 0;
|
||||
}
|
||||
tsd_t *tsd = tsdn_tsd(tsdn);
|
||||
return sec_shard_pick(tsd, sec, tsd_sec_shardp_get(tsd));
|
||||
}
|
||||
|
||||
bool
|
||||
hpa_hugepage_size_exceeds_limit(void) {
|
||||
return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
|
||||
|
|
@ -947,7 +959,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment,
|
|||
&& (size > shard->opts.slab_max_alloc)) {
|
||||
return NULL;
|
||||
}
|
||||
edata_t *edata = sec_alloc(tsdn, &shard->sec, size);
|
||||
edata_t *edata = sec_alloc(tsdn, &shard->sec, size,
|
||||
hpa_sec_shard_pick(tsdn, &shard->sec));
|
||||
if (edata != NULL) {
|
||||
return edata;
|
||||
}
|
||||
|
|
@ -968,7 +981,8 @@ hpa_alloc(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, size_t alignment,
|
|||
}
|
||||
if (nsuccess > 0) {
|
||||
assert(sec_size_supported(&shard->sec, size));
|
||||
sec_fill(tsdn, &shard->sec, size, &results, nsuccess);
|
||||
sec_fill(tsdn, &shard->sec, size, &results, nsuccess,
|
||||
hpa_sec_shard_pick(tsdn, &shard->sec));
|
||||
/* Unlikely rollback in case of overfill */
|
||||
if (!edata_list_active_empty(&results)) {
|
||||
hpa_dalloc_batch(
|
||||
|
|
@ -1075,7 +1089,8 @@ hpa_dalloc(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata,
|
|||
edata_list_active_init(&dalloc_list);
|
||||
edata_list_active_append(&dalloc_list, edata);
|
||||
|
||||
sec_dalloc(tsdn, &shard->sec, &dalloc_list);
|
||||
sec_dalloc(tsdn, &shard->sec, &dalloc_list,
|
||||
hpa_sec_shard_pick(tsdn, &shard->sec));
|
||||
if (edata_list_active_empty(&dalloc_list)) {
|
||||
/* sec consumed the pointer */
|
||||
*deferred_work_generated = false;
|
||||
|
|
|
|||
|
|
@ -181,6 +181,9 @@ size_t opt_calloc_madvise_threshold = CALLOC_MADVISE_THRESHOLD_DEFAULT;
|
|||
bool opt_hpa = false;
|
||||
hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
|
||||
sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
|
||||
sec_opts_t opt_pac_sec_opts = {0,
|
||||
(32 * 1024) > (PAGE * 2) ? (32 * 1024) : (PAGE * 2),
|
||||
SEC_OPTS_MAX_BYTES_DEFAULT};
|
||||
|
||||
/* False should be the common case. Set to true to trigger initialization. */
|
||||
bool malloc_slow = true;
|
||||
|
|
|
|||
1
src/pa.c
1
src/pa.c
|
|
@ -94,6 +94,7 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
|
|||
|
||||
void
|
||||
pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
pac_sec_flush(tsdn, &shard->pac);
|
||||
if (shard->ever_used_hpa) {
|
||||
hpa_shard_flush(tsdn, &shard->hpa);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
|
|||
|
||||
void
|
||||
pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
sec_prefork2(tsdn, &shard->pac.sec);
|
||||
if (shard->ever_used_hpa) {
|
||||
hpa_shard_prefork2(tsdn, &shard->hpa);
|
||||
}
|
||||
|
|
@ -53,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
|
|||
ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
|
||||
ecache_postfork_parent(tsdn, &shard->pac.ecache_pinned);
|
||||
malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
|
||||
sec_postfork_parent(tsdn, &shard->pac.sec);
|
||||
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
|
||||
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
|
||||
if (shard->ever_used_hpa) {
|
||||
|
|
@ -68,6 +70,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
|
|||
ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
|
||||
ecache_postfork_child(tsdn, &shard->pac.ecache_pinned);
|
||||
malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
|
||||
sec_postfork_child(tsdn, &shard->pac.sec);
|
||||
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
|
||||
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
|
||||
if (shard->ever_used_hpa) {
|
||||
|
|
@ -179,6 +182,9 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
|
|||
if (shard->ever_used_hpa) {
|
||||
hpa_shard_stats_merge(tsdn, &shard->hpa, hpa_stats_out);
|
||||
}
|
||||
|
||||
sec_stats_merge(tsdn, &shard->pac.sec,
|
||||
&pa_shard_stats_out->pac_stats.pac_sec_stats);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -207,6 +213,9 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
|
|||
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
|
||||
&shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
|
||||
|
||||
sec_mutex_stats_read(tsdn, &shard->pac.sec,
|
||||
&mutex_prof_data[arena_prof_mutex_pac_sec]);
|
||||
|
||||
if (shard->ever_used_hpa) {
|
||||
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
|
||||
&shard->hpa.mtx, arena_prof_mutex_hpa_shard);
|
||||
|
|
|
|||
90
src/pac.c
90
src/pac.c
|
|
@ -4,6 +4,18 @@
|
|||
#include "jemalloc/internal/pac.h"
|
||||
#include "jemalloc/internal/san.h"
|
||||
|
||||
static inline uint8_t
|
||||
pac_sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
|
||||
if (sec->opts.nshards <= 1) {
|
||||
return 0;
|
||||
}
|
||||
if (tsdn_null(tsdn)) {
|
||||
return 0;
|
||||
}
|
||||
tsd_t *tsd = tsdn_tsd(tsdn);
|
||||
return sec_shard_pick(tsd, sec, tsd_pac_sec_shardp_get(tsd));
|
||||
}
|
||||
|
||||
static inline void
|
||||
pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay,
|
||||
pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
|
||||
|
|
@ -92,6 +104,17 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
|
|||
pac->stats_mtx = stats_mtx;
|
||||
atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
|
||||
|
||||
if (sec_init(tsdn, &pac->sec, base, &opt_pac_sec_opts)) {
|
||||
/* Fall back to no SEC on allocation failure. */
|
||||
pac->sec.opts.nshards = 0;
|
||||
}
|
||||
if (!sec_is_used(&pac->sec) || dirty_decay_ms == 0) {
|
||||
atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED);
|
||||
} else {
|
||||
atomic_store_zu(&pac->sec_max_alloc,
|
||||
pac->sec.opts.max_alloc, ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -133,6 +156,15 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
|
|||
|
||||
edata_t *edata = NULL;
|
||||
|
||||
if (!guarded && !zero && alignment <= PAGE
|
||||
&& size <= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) {
|
||||
edata = sec_alloc(tsdn, &pac->sec, size,
|
||||
pac_sec_shard_pick(tsdn, &pac->sec));
|
||||
if (edata != NULL) {
|
||||
return edata;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Guarded allocations need surrounding guard pages, which the pinned
|
||||
* pool does not maintain; skip ecache_pinned in that case.
|
||||
|
|
@ -395,6 +427,32 @@ pac_dalloc(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
|
|||
san_unguard_pages_two_sided(
|
||||
tsdn, ehooks, edata, pac->emap);
|
||||
}
|
||||
} else if (edata_size_get(edata)
|
||||
<= atomic_load_zu(&pac->sec_max_alloc, ATOMIC_RELAXED)) {
|
||||
edata_zeroed_set(edata, false);
|
||||
edata_list_active_t dalloc_list;
|
||||
edata_list_active_init(&dalloc_list);
|
||||
edata_list_active_append(&dalloc_list, edata);
|
||||
sec_dalloc(tsdn, &pac->sec, &dalloc_list,
|
||||
pac_sec_shard_pick(tsdn, &pac->sec));
|
||||
if (edata_list_active_empty(&dalloc_list)) {
|
||||
*deferred_work_generated = false;
|
||||
return;
|
||||
}
|
||||
/* Flush overflow extents to their backing ecaches. */
|
||||
bool any_deferred_work = false;
|
||||
edata_t *flush_edata;
|
||||
while ((flush_edata =
|
||||
edata_list_active_first(&dalloc_list)) != NULL) {
|
||||
edata_list_active_remove(&dalloc_list,
|
||||
flush_edata);
|
||||
if (!edata_pinned_get(flush_edata)) {
|
||||
any_deferred_work = true;
|
||||
}
|
||||
pac_ecache_dalloc(tsdn, pac, ehooks, flush_edata);
|
||||
}
|
||||
*deferred_work_generated = any_deferred_work;
|
||||
return;
|
||||
}
|
||||
|
||||
bool pinned = edata_pinned_get(edata);
|
||||
|
|
@ -717,6 +775,13 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool update_pac_sec = state == extent_state_dirty
|
||||
&& sec_is_used(&pac->sec);
|
||||
if (update_pac_sec && decay_ms == 0) {
|
||||
atomic_store_zu(&pac->sec_max_alloc, 0, ATOMIC_RELAXED);
|
||||
pac_sec_flush(tsdn, pac);
|
||||
}
|
||||
|
||||
malloc_mutex_lock(tsdn, &decay->mtx);
|
||||
/*
|
||||
* Restart decay backlog from scratch, which may cause many dirty pages
|
||||
|
|
@ -732,6 +797,11 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
|
|||
pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness);
|
||||
malloc_mutex_unlock(tsdn, &decay->mtx);
|
||||
|
||||
if (update_pac_sec && decay_ms != 0) {
|
||||
atomic_store_zu(&pac->sec_max_alloc,
|
||||
pac->sec.opts.max_alloc, ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -746,12 +816,11 @@ pac_decay_ms_get(pac_t *pac, extent_state_t state) {
|
|||
|
||||
void
|
||||
pac_reset(tsdn_t *tsdn, pac_t *pac) {
|
||||
pac_sec_flush(tsdn, pac);
|
||||
/*
|
||||
* No-op for now; purging is still done at the arena-level. It should
|
||||
* get moved in here, though.
|
||||
* Purging is still done at the arena-level. It should get moved in
|
||||
* here, though.
|
||||
*/
|
||||
(void)tsdn;
|
||||
(void)pac;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -816,3 +885,16 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) {
|
|||
extent_destroy_wrapper(tsdn, pac, ehooks, edata);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pac_sec_flush(tsdn_t *tsdn, pac_t *pac) {
|
||||
ehooks_t *ehooks = pac_ehooks_get(pac);
|
||||
edata_list_active_t to_flush;
|
||||
edata_list_active_init(&to_flush);
|
||||
sec_flush(tsdn, &pac->sec, &to_flush);
|
||||
edata_t *edata;
|
||||
while ((edata = edata_list_active_first(&to_flush)) != NULL) {
|
||||
edata_list_active_remove(&to_flush, edata);
|
||||
pac_ecache_dalloc(tsdn, pac, ehooks, edata);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
47
src/sec.c
47
src/sec.c
|
|
@ -25,6 +25,8 @@ sec_bin_init(sec_bin_t *bin) {
|
|||
bool
|
||||
sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
|
||||
sec->opts = *opts;
|
||||
sec->bins = NULL;
|
||||
sec->npsizes = 0;
|
||||
if (opts->nshards == 0) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -57,18 +59,16 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
|
||||
uint8_t
|
||||
sec_shard_pick(tsd_t *tsd, sec_t *sec, uint8_t *idxp) {
|
||||
/*
|
||||
* Eventually, we should implement affinity, tracking source shard using
|
||||
* the edata_t's newly freed up fields. For now, just randomly
|
||||
* distribute across all shards.
|
||||
*
|
||||
* Callers must ensure sec->opts.nshards > 1.
|
||||
*/
|
||||
if (tsdn_null(tsdn)) {
|
||||
return 0;
|
||||
}
|
||||
tsd_t *tsd = tsdn_tsd(tsdn);
|
||||
uint8_t *idxp = tsd_sec_shardp_get(tsd);
|
||||
assert(sec->opts.nshards > 1);
|
||||
if (*idxp == (uint8_t)-1) {
|
||||
/*
|
||||
* First use; initialize using the trick from Daniel Lemire's
|
||||
|
|
@ -143,10 +143,10 @@ sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) {
|
|||
|
||||
static edata_t *
|
||||
sec_multishard_trylock_alloc(
|
||||
tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) {
|
||||
tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind, uint8_t shard) {
|
||||
assert(sec->opts.nshards > 0);
|
||||
|
||||
uint8_t cur_shard = sec_shard_pick(tsdn, sec);
|
||||
uint8_t cur_shard = shard;
|
||||
sec_bin_t *bin;
|
||||
for (size_t i = 0; i < sec->opts.nshards; ++i) {
|
||||
bin = sec_bin_pick(sec, cur_shard, pszind);
|
||||
|
|
@ -170,7 +170,7 @@ sec_multishard_trylock_alloc(
|
|||
* declaring a miss. That could recover more remote-shard hits under
|
||||
* contention, but it also changes the allocation latency policy.
|
||||
*/
|
||||
assert(cur_shard == sec_shard_pick(tsdn, sec));
|
||||
assert(cur_shard == shard);
|
||||
bin = sec_bin_pick(sec, cur_shard, pszind);
|
||||
malloc_mutex_lock(tsdn, &bin->mtx);
|
||||
edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
|
||||
|
|
@ -184,7 +184,7 @@ sec_multishard_trylock_alloc(
|
|||
}
|
||||
|
||||
edata_t *
|
||||
sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
|
||||
sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size, uint8_t shard) {
|
||||
if (!sec_size_supported(sec, size)) {
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -208,7 +208,7 @@ sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
|
|||
/* frequent_reuse */ 1);
|
||||
return edata;
|
||||
}
|
||||
return sec_multishard_trylock_alloc(tsdn, sec, size, pszind);
|
||||
return sec_multishard_trylock_alloc(tsdn, sec, size, pszind, shard);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -248,11 +248,11 @@ sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size,
|
|||
|
||||
static void
|
||||
sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
|
||||
pszind_t pszind, edata_list_active_t *dalloc_list) {
|
||||
pszind_t pszind, edata_list_active_t *dalloc_list, uint8_t shard) {
|
||||
assert(sec->opts.nshards > 0);
|
||||
|
||||
/* Try to dalloc in this threads bin first */
|
||||
uint8_t cur_shard = sec_shard_pick(tsdn, sec);
|
||||
uint8_t cur_shard = shard;
|
||||
for (size_t i = 0; i < sec->opts.nshards; ++i) {
|
||||
sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
|
||||
if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
|
||||
|
|
@ -267,7 +267,7 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
|
|||
}
|
||||
}
|
||||
/* No bin had alloc or had the extent */
|
||||
assert(cur_shard == sec_shard_pick(tsdn, sec));
|
||||
assert(cur_shard == shard);
|
||||
sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
|
||||
malloc_mutex_lock(tsdn, &bin->mtx);
|
||||
sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
|
||||
|
|
@ -275,13 +275,11 @@ sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
|
|||
}
|
||||
|
||||
void
|
||||
sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
|
||||
if (!sec_is_used(sec)) {
|
||||
return;
|
||||
}
|
||||
sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list,
|
||||
uint8_t shard) {
|
||||
edata_t *edata = edata_list_active_first(dalloc_list);
|
||||
size_t size = edata_size_get(edata);
|
||||
if (size > sec->opts.max_alloc) {
|
||||
if (!sec_size_supported(sec, size)) {
|
||||
return;
|
||||
}
|
||||
pszind_t pszind = sz_psz2ind(size);
|
||||
|
|
@ -298,20 +296,21 @@ sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
|
|||
malloc_mutex_unlock(tsdn, &bin->mtx);
|
||||
return;
|
||||
}
|
||||
sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list);
|
||||
sec_multishard_trylock_dalloc(
|
||||
tsdn, sec, size, pszind, dalloc_list, shard);
|
||||
}
|
||||
|
||||
void
|
||||
sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result,
|
||||
size_t nallocs) {
|
||||
size_t nallocs, uint8_t shard) {
|
||||
assert((size & PAGE_MASK) == 0);
|
||||
assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc);
|
||||
assert(sec_size_supported(sec, size));
|
||||
assert(nallocs > 0);
|
||||
|
||||
pszind_t pszind = sz_psz2ind(size);
|
||||
assert(pszind < sec->npsizes);
|
||||
|
||||
sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind);
|
||||
sec_bin_t *bin = sec_bin_pick(sec, shard, pszind);
|
||||
malloc_mutex_assert_not_owner(tsdn, &bin->mtx);
|
||||
malloc_mutex_lock(tsdn, &bin->mtx);
|
||||
size_t new_cached_bytes = nallocs * size;
|
||||
|
|
|
|||
38
src/stats.c
38
src/stats.c
|
|
@ -835,6 +835,37 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
|
|||
&sec_overfills);
|
||||
}
|
||||
|
||||
static void
|
||||
stats_arena_pac_sec_print(emitter_t *emitter, unsigned i) {
|
||||
size_t sec_bytes;
|
||||
size_t sec_hits;
|
||||
size_t sec_misses;
|
||||
size_t sec_dalloc_flush;
|
||||
size_t sec_dalloc_noflush;
|
||||
CTL_M2_GET("stats.arenas.0.pac_sec_bytes", i, &sec_bytes, size_t);
|
||||
emitter_kv(emitter, "pac_sec_bytes",
|
||||
"Bytes in PAC small extent cache",
|
||||
emitter_type_size, &sec_bytes);
|
||||
CTL_M2_GET("stats.arenas.0.pac_sec_hits", i, &sec_hits, size_t);
|
||||
emitter_kv(emitter, "pac_sec_hits",
|
||||
"Total hits in PAC small extent cache",
|
||||
emitter_type_size, &sec_hits);
|
||||
CTL_M2_GET("stats.arenas.0.pac_sec_misses", i, &sec_misses, size_t);
|
||||
emitter_kv(emitter, "pac_sec_misses",
|
||||
"Total misses in PAC small extent cache",
|
||||
emitter_type_size, &sec_misses);
|
||||
CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_noflush", i,
|
||||
&sec_dalloc_noflush, size_t);
|
||||
emitter_kv(emitter, "pac_sec_dalloc_noflush",
|
||||
"Dalloc calls without flush in PAC small extent cache",
|
||||
emitter_type_size, &sec_dalloc_noflush);
|
||||
CTL_M2_GET("stats.arenas.0.pac_sec_dalloc_flush", i, &sec_dalloc_flush,
|
||||
size_t);
|
||||
emitter_kv(emitter, "pac_sec_dalloc_flush",
|
||||
"Dalloc calls with flush in PAC small extent cache",
|
||||
emitter_type_size, &sec_dalloc_flush);
|
||||
}
|
||||
|
||||
static void
|
||||
stats_arena_hpa_shard_counters_print(
|
||||
emitter_t *emitter, unsigned i, uint64_t uptime) {
|
||||
|
|
@ -1567,6 +1598,10 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
|
|||
GET_AND_EMIT_MEM_STAT(extent_avail)
|
||||
#undef GET_AND_EMIT_MEM_STAT
|
||||
|
||||
if (opt_pac_sec_opts.nshards > 0) {
|
||||
stats_arena_pac_sec_print(emitter, i);
|
||||
}
|
||||
|
||||
if (mutex) {
|
||||
stats_arena_mutexes_print(emitter, i, uptime);
|
||||
}
|
||||
|
|
@ -1761,6 +1796,9 @@ stats_general_print(emitter_t *emitter) {
|
|||
OPT_WRITE_SIZE_T("hpa_sec_nshards")
|
||||
OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
|
||||
OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
|
||||
OPT_WRITE_SIZE_T("experimental_pac_sec_nshards")
|
||||
OPT_WRITE_SIZE_T("experimental_pac_sec_max_alloc")
|
||||
OPT_WRITE_SIZE_T("experimental_pac_sec_max_bytes")
|
||||
OPT_WRITE_BOOL("huge_arena_pac_thp")
|
||||
OPT_WRITE_CHAR_P("metadata_thp")
|
||||
OPT_WRITE_INT64("mutex_max_spin")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue