Tcache batching: Plumbing

In the next commit, we'll start using the batcher to eliminate mutex traffic.
To avoid cluttering up that commit with the random bits of busy-work it entails,
we'll centralize them here.  This commit introduces:
- A batched bin type.
- The ability to mix batched and unbatched bins in the arena.
- Conf parsing to set batches per size and a max batched size.
- mallctl access to the corresponding opt-namespace keys.
- Stats output of the above.
This commit is contained in:
David Goldblatt 2024-02-02 13:20:14 -08:00 committed by David Goldblatt
parent 70c94d7474
commit c085530c71
11 changed files with 121 additions and 35 deletions

View file

@ -604,10 +604,25 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
}
}
static inline bool
arena_bin_has_batch(szind_t binind) {
return binind < bin_info_nbatched_sizes;
}
static inline bin_t *
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
return shard0 + binshard;
bin_t *ret;
if (arena_bin_has_batch(binind)) {
ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
} else {
ret = shard0 + binshard;
}
assert(binind >= SC_NBINS - 1
|| (uintptr_t)ret < (uintptr_t)arena
+ arena_bin_offsets[binind + 1]);
return ret;
}
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */

View file

@ -104,7 +104,7 @@ struct arena_s {
JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
"Use `arena_get_bin` instead.")
JEMALLOC_ALIGNED(CACHELINE)
bin_t all_bins[0];
bin_with_batch_t all_bins[0];
};
#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */

View file

@ -2,12 +2,15 @@
#define JEMALLOC_INTERNAL_BIN_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/batcher.h"
#include "jemalloc/internal/bin_stats.h"
#include "jemalloc/internal/bin_types.h"
#include "jemalloc/internal/edata.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/sc.h"
#define BIN_REMOTE_FREE_ELEMS_MAX 16
/*
* A bin contains a set of extents that are currently being used for slab
* allocations.
@ -42,6 +45,19 @@ struct bin_s {
edata_list_active_t slabs_full;
};
typedef struct bin_remote_free_data_s bin_remote_free_data_t;
struct bin_remote_free_data_s {
void *ptr;
edata_t *slab;
};
typedef struct bin_with_batch_s bin_with_batch_t;
struct bin_with_batch_s {
bin_t bin;
batcher_t remote_frees;
bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
};
/* A set of sharded bins of the same size class. */
typedef struct bins_s bins_t;
struct bins_s {
@ -57,9 +73,9 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
bool bin_init(bin_t *bin);
/* Forking. */
void bin_prefork(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);
/* Stats. */
static inline void

View file

@ -44,6 +44,15 @@ struct bin_info_s {
bitmap_info_t bitmap_info;
};
/* The maximum size a size class can be and still get batching behavior. */
extern size_t opt_bin_info_max_batched_size;
/* The number of batches per batched size class. */
extern size_t opt_bin_info_remote_free_max_batch;
extern szind_t bin_info_nbatched_sizes;
extern unsigned bin_info_nbatched_bins;
extern unsigned bin_info_nunbatched_bins;
extern bin_info_t bin_infos[SC_NBINS];
void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);

View file

@ -45,7 +45,6 @@ size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
uint32_t arena_bin_offsets[SC_NBINS];
static unsigned nbins_total;
static unsigned huge_arena_ind;
@ -1672,7 +1671,6 @@ arena_t *
arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
arena_t *arena;
base_t *base;
unsigned i;
if (ind == 0) {
base = b0get();
@ -1685,15 +1683,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
}
size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
sizeof(bin_t) * nbins_total;
sizeof(bin_with_batch_t) * bin_info_nbatched_bins
+ sizeof(bin_t) * bin_info_nunbatched_bins;
arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
if (arena == NULL) {
goto label_error;
}
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
assert((uintptr_t)&arena->all_bins[nbins_total -1] + sizeof(bin_t) <=
(uintptr_t)arena + arena_size);
)
atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
@ -1733,12 +1728,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
/* Initialize bins. */
atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
for (i = 0; i < nbins_total; i++) {
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
bool err = bin_init(&arena->all_bins[i]);
)
if (err) {
goto label_error;
for (unsigned i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_t *bin = arena_get_bin(arena, i, j);
bool err = bin_init(bin);
if (err) {
goto label_error;
}
}
}
@ -1882,8 +1878,9 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
)
for (szind_t i = 0; i < SC_NBINS; i++) {
arena_bin_offsets[i] = cur_offset;
nbins_total += bin_infos[i].n_shards;
cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
uint32_t bin_sz = (i < bin_info_nbatched_sizes
? sizeof(bin_with_batch_t) : sizeof(bin_t));
cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
}
return pa_central_init(&arena_pa_central_global, base, hpa,
&hpa_hooks_default);
@ -1933,19 +1930,21 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
void
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
for (unsigned i = 0; i < nbins_total; i++) {
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
bin_prefork(tsdn, &arena->all_bins[i]);
)
for (szind_t i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_t *bin = arena_get_bin(arena, i, j);
bin_prefork(tsdn, bin, arena_bin_has_batch(i));
}
}
}
void
arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
for (unsigned i = 0; i < nbins_total; i++) {
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
bin_postfork_parent(tsdn, &arena->all_bins[i]);
)
for (szind_t i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_t *bin = arena_get_bin(arena, i, j);
bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i));
}
}
malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@ -1982,10 +1981,11 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
}
}
for (unsigned i = 0; i < nbins_total; i++) {
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
bin_postfork_child(tsdn, &arena->all_bins[i]);
)
for (szind_t i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_t *bin = arena_get_bin(arena, i, j);
bin_postfork_child(tsdn, bin, arena_bin_has_batch(i));
}
}
malloc_mutex_postfork_child(tsdn, &arena->large_mtx);

View file

@ -54,16 +54,28 @@ bin_init(bin_t *bin) {
}
void
bin_prefork(tsdn_t *tsdn, bin_t *bin) {
bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
malloc_mutex_prefork(tsdn, &bin->lock);
if (has_batch) {
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
batcher_prefork(tsdn, &batched->remote_frees);
}
}
void
bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
malloc_mutex_postfork_parent(tsdn, &bin->lock);
if (has_batch) {
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
batcher_postfork_parent(tsdn, &batched->remote_frees);
}
}
void
bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
malloc_mutex_postfork_child(tsdn, &bin->lock);
if (has_batch) {
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
batcher_postfork_child(tsdn, &batched->remote_frees);
}
}

View file

@ -3,8 +3,15 @@
#include "jemalloc/internal/bin_info.h"
size_t opt_bin_info_max_batched_size;
size_t opt_bin_info_remote_free_max_batch;
bin_info_t bin_infos[SC_NBINS];
szind_t bin_info_nbatched_sizes;
unsigned bin_info_nbatched_bins;
unsigned bin_info_nunbatched_bins;
static void
bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
bin_info_t infos[SC_NBINS]) {
@ -20,6 +27,12 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
bin_info->nregs);
bin_info->bitmap_info = bitmap_info;
if (bin_info->reg_size <= opt_bin_info_max_batched_size) {
bin_info_nbatched_sizes++;
bin_info_nbatched_bins += bin_info->n_shards;
} else {
bin_info_nunbatched_bins += bin_info->n_shards;
}
}
}

View file

@ -129,6 +129,8 @@ CTL_PROTO(opt_zero)
CTL_PROTO(opt_utrace)
CTL_PROTO(opt_xmalloc)
CTL_PROTO(opt_experimental_infallible_new)
CTL_PROTO(opt_max_batched_size)
CTL_PROTO(opt_remote_free_max_batch)
CTL_PROTO(opt_tcache)
CTL_PROTO(opt_tcache_max)
CTL_PROTO(opt_tcache_nslots_small_min)
@ -480,6 +482,8 @@ static const ctl_named_node_t opt_node[] = {
{NAME("xmalloc"), CTL(opt_xmalloc)},
{NAME("experimental_infallible_new"),
CTL(opt_experimental_infallible_new)},
{NAME("max_batched_size"), CTL(opt_max_batched_size)},
{NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)},
{NAME("tcache"), CTL(opt_tcache)},
{NAME("tcache_max"), CTL(opt_tcache_max)},
{NAME("tcache_nslots_small_min"),
@ -2203,6 +2207,9 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
opt_experimental_infallible_new, bool)
CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
size_t)
CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,

View file

@ -1325,6 +1325,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
} while (vlen_left > 0);
CONF_CONTINUE;
}
CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size,
"max_batched_size", 0, SIZE_T_MAX,
CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
/* clip */ true)
CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
"remote_free_max_batch", 0,
BIN_REMOTE_FREE_ELEMS_MAX,
CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
/* clip */ true)
if (CONF_MATCH("tcache_ncached_max")) {
bool err = tcache_bin_info_default_init(
v, vlen);

View file

@ -1555,6 +1555,8 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_BOOL("utrace")
OPT_WRITE_BOOL("xmalloc")
OPT_WRITE_BOOL("experimental_infallible_new")
OPT_WRITE_SIZE_T("max_batched_size")
OPT_WRITE_SIZE_T("remote_free_max_batch")
OPT_WRITE_BOOL("tcache")
OPT_WRITE_SIZE_T("tcache_max")
OPT_WRITE_UNSIGNED("tcache_nslots_small_min")

View file

@ -35,6 +35,8 @@ main(void) {
P(arena_t);
P(arena_stats_t);
P(base_t);
P(bin_t);
P(bin_with_batch_t);
P(decay_t);
P(edata_t);
P(ecache_t);