mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-27 05:12:12 +03:00
Tcache batching: Plumbing
In the next commit, we'll start using the batcher to eliminate mutex traffic. To avoid cluttering up that commit with the random bits of busy-work it entails, we'll centralize them here. This commit introduces: - A batched bin type. - The ability to mix batched and unbatched bins in the arena. - Conf parsing to set batches per size and a max batched size. - mallctl access to the corresponding opt-namespace keys. - Stats output of the above.
This commit is contained in:
parent
70c94d7474
commit
c085530c71
11 changed files with 121 additions and 35 deletions
|
|
@ -604,10 +604,25 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
|||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
arena_bin_has_batch(szind_t binind) {
|
||||
return binind < bin_info_nbatched_sizes;
|
||||
}
|
||||
|
||||
static inline bin_t *
|
||||
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
|
||||
bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
|
||||
return shard0 + binshard;
|
||||
bin_t *ret;
|
||||
if (arena_bin_has_batch(binind)) {
|
||||
ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
|
||||
} else {
|
||||
ret = shard0 + binshard;
|
||||
}
|
||||
assert(binind >= SC_NBINS - 1
|
||||
|| (uintptr_t)ret < (uintptr_t)arena
|
||||
+ arena_bin_offsets[binind + 1]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ struct arena_s {
|
|||
JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
|
||||
"Use `arena_get_bin` instead.")
|
||||
JEMALLOC_ALIGNED(CACHELINE)
|
||||
bin_t all_bins[0];
|
||||
bin_with_batch_t all_bins[0];
|
||||
};
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
|
||||
|
|
|
|||
|
|
@ -2,12 +2,15 @@
|
|||
#define JEMALLOC_INTERNAL_BIN_H
|
||||
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/batcher.h"
|
||||
#include "jemalloc/internal/bin_stats.h"
|
||||
#include "jemalloc/internal/bin_types.h"
|
||||
#include "jemalloc/internal/edata.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
|
||||
#define BIN_REMOTE_FREE_ELEMS_MAX 16
|
||||
|
||||
/*
|
||||
* A bin contains a set of extents that are currently being used for slab
|
||||
* allocations.
|
||||
|
|
@ -42,6 +45,19 @@ struct bin_s {
|
|||
edata_list_active_t slabs_full;
|
||||
};
|
||||
|
||||
typedef struct bin_remote_free_data_s bin_remote_free_data_t;
|
||||
struct bin_remote_free_data_s {
|
||||
void *ptr;
|
||||
edata_t *slab;
|
||||
};
|
||||
|
||||
typedef struct bin_with_batch_s bin_with_batch_t;
|
||||
struct bin_with_batch_s {
|
||||
bin_t bin;
|
||||
batcher_t remote_frees;
|
||||
bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
|
||||
};
|
||||
|
||||
/* A set of sharded bins of the same size class. */
|
||||
typedef struct bins_s bins_t;
|
||||
struct bins_s {
|
||||
|
|
@ -57,9 +73,9 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
|
|||
bool bin_init(bin_t *bin);
|
||||
|
||||
/* Forking. */
|
||||
void bin_prefork(tsdn_t *tsdn, bin_t *bin);
|
||||
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
|
||||
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
|
||||
void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
|
||||
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
|
||||
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);
|
||||
|
||||
/* Stats. */
|
||||
static inline void
|
||||
|
|
|
|||
|
|
@ -44,6 +44,15 @@ struct bin_info_s {
|
|||
bitmap_info_t bitmap_info;
|
||||
};
|
||||
|
||||
/* The maximum size a size class can be and still get batching behavior. */
|
||||
extern size_t opt_bin_info_max_batched_size;
|
||||
/* The number of batches per batched size class. */
|
||||
extern size_t opt_bin_info_remote_free_max_batch;
|
||||
|
||||
extern szind_t bin_info_nbatched_sizes;
|
||||
extern unsigned bin_info_nbatched_bins;
|
||||
extern unsigned bin_info_nunbatched_bins;
|
||||
|
||||
extern bin_info_t bin_infos[SC_NBINS];
|
||||
|
||||
void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
|
||||
|
|
|
|||
54
src/arena.c
54
src/arena.c
|
|
@ -45,7 +45,6 @@ size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
|||
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
||||
|
||||
uint32_t arena_bin_offsets[SC_NBINS];
|
||||
static unsigned nbins_total;
|
||||
|
||||
static unsigned huge_arena_ind;
|
||||
|
||||
|
|
@ -1672,7 +1671,6 @@ arena_t *
|
|||
arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
|
||||
arena_t *arena;
|
||||
base_t *base;
|
||||
unsigned i;
|
||||
|
||||
if (ind == 0) {
|
||||
base = b0get();
|
||||
|
|
@ -1685,15 +1683,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
|
|||
}
|
||||
|
||||
size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
|
||||
sizeof(bin_t) * nbins_total;
|
||||
sizeof(bin_with_batch_t) * bin_info_nbatched_bins
|
||||
+ sizeof(bin_t) * bin_info_nunbatched_bins;
|
||||
arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
|
||||
if (arena == NULL) {
|
||||
goto label_error;
|
||||
}
|
||||
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||
assert((uintptr_t)&arena->all_bins[nbins_total -1] + sizeof(bin_t) <=
|
||||
(uintptr_t)arena + arena_size);
|
||||
)
|
||||
|
||||
atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
|
||||
atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
|
||||
|
|
@ -1733,12 +1728,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
|
|||
|
||||
/* Initialize bins. */
|
||||
atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
|
||||
for (i = 0; i < nbins_total; i++) {
|
||||
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||
bool err = bin_init(&arena->all_bins[i]);
|
||||
)
|
||||
if (err) {
|
||||
goto label_error;
|
||||
for (unsigned i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_t *bin = arena_get_bin(arena, i, j);
|
||||
bool err = bin_init(bin);
|
||||
if (err) {
|
||||
goto label_error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1882,8 +1878,9 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
|
|||
)
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
arena_bin_offsets[i] = cur_offset;
|
||||
nbins_total += bin_infos[i].n_shards;
|
||||
cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
|
||||
uint32_t bin_sz = (i < bin_info_nbatched_sizes
|
||||
? sizeof(bin_with_batch_t) : sizeof(bin_t));
|
||||
cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
|
||||
}
|
||||
return pa_central_init(&arena_pa_central_global, base, hpa,
|
||||
&hpa_hooks_default);
|
||||
|
|
@ -1933,19 +1930,21 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
|
|||
|
||||
void
|
||||
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
|
||||
for (unsigned i = 0; i < nbins_total; i++) {
|
||||
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||
bin_prefork(tsdn, &arena->all_bins[i]);
|
||||
)
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_t *bin = arena_get_bin(arena, i, j);
|
||||
bin_prefork(tsdn, bin, arena_bin_has_batch(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
|
||||
for (unsigned i = 0; i < nbins_total; i++) {
|
||||
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||
bin_postfork_parent(tsdn, &arena->all_bins[i]);
|
||||
)
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_t *bin = arena_get_bin(arena, i, j);
|
||||
bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i));
|
||||
}
|
||||
}
|
||||
|
||||
malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
|
||||
|
|
@ -1982,10 +1981,11 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
|
|||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < nbins_total; i++) {
|
||||
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||
bin_postfork_child(tsdn, &arena->all_bins[i]);
|
||||
)
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_t *bin = arena_get_bin(arena, i, j);
|
||||
bin_postfork_child(tsdn, bin, arena_bin_has_batch(i));
|
||||
}
|
||||
}
|
||||
|
||||
malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
|
||||
|
|
|
|||
18
src/bin.c
18
src/bin.c
|
|
@ -54,16 +54,28 @@ bin_init(bin_t *bin) {
|
|||
}
|
||||
|
||||
void
|
||||
bin_prefork(tsdn_t *tsdn, bin_t *bin) {
|
||||
bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
|
||||
malloc_mutex_prefork(tsdn, &bin->lock);
|
||||
if (has_batch) {
|
||||
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
|
||||
batcher_prefork(tsdn, &batched->remote_frees);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
|
||||
bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
|
||||
malloc_mutex_postfork_parent(tsdn, &bin->lock);
|
||||
if (has_batch) {
|
||||
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
|
||||
batcher_postfork_parent(tsdn, &batched->remote_frees);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
|
||||
bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
|
||||
malloc_mutex_postfork_child(tsdn, &bin->lock);
|
||||
if (has_batch) {
|
||||
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
|
||||
batcher_postfork_child(tsdn, &batched->remote_frees);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,15 @@
|
|||
|
||||
#include "jemalloc/internal/bin_info.h"
|
||||
|
||||
size_t opt_bin_info_max_batched_size;
|
||||
size_t opt_bin_info_remote_free_max_batch;
|
||||
|
||||
bin_info_t bin_infos[SC_NBINS];
|
||||
|
||||
szind_t bin_info_nbatched_sizes;
|
||||
unsigned bin_info_nbatched_bins;
|
||||
unsigned bin_info_nunbatched_bins;
|
||||
|
||||
static void
|
||||
bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
||||
bin_info_t infos[SC_NBINS]) {
|
||||
|
|
@ -20,6 +27,12 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
|||
bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
|
||||
bin_info->nregs);
|
||||
bin_info->bitmap_info = bitmap_info;
|
||||
if (bin_info->reg_size <= opt_bin_info_max_batched_size) {
|
||||
bin_info_nbatched_sizes++;
|
||||
bin_info_nbatched_bins += bin_info->n_shards;
|
||||
} else {
|
||||
bin_info_nunbatched_bins += bin_info->n_shards;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -129,6 +129,8 @@ CTL_PROTO(opt_zero)
|
|||
CTL_PROTO(opt_utrace)
|
||||
CTL_PROTO(opt_xmalloc)
|
||||
CTL_PROTO(opt_experimental_infallible_new)
|
||||
CTL_PROTO(opt_max_batched_size)
|
||||
CTL_PROTO(opt_remote_free_max_batch)
|
||||
CTL_PROTO(opt_tcache)
|
||||
CTL_PROTO(opt_tcache_max)
|
||||
CTL_PROTO(opt_tcache_nslots_small_min)
|
||||
|
|
@ -480,6 +482,8 @@ static const ctl_named_node_t opt_node[] = {
|
|||
{NAME("xmalloc"), CTL(opt_xmalloc)},
|
||||
{NAME("experimental_infallible_new"),
|
||||
CTL(opt_experimental_infallible_new)},
|
||||
{NAME("max_batched_size"), CTL(opt_max_batched_size)},
|
||||
{NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)},
|
||||
{NAME("tcache"), CTL(opt_tcache)},
|
||||
{NAME("tcache_max"), CTL(opt_tcache_max)},
|
||||
{NAME("tcache_nslots_small_min"),
|
||||
|
|
@ -2203,6 +2207,9 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
|
|||
CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
|
||||
CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
|
||||
opt_experimental_infallible_new, bool)
|
||||
CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
|
||||
CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
|
||||
size_t)
|
||||
CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
|
||||
CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
|
||||
CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
|
||||
|
|
|
|||
|
|
@ -1325,6 +1325,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
|||
} while (vlen_left > 0);
|
||||
CONF_CONTINUE;
|
||||
}
|
||||
CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size,
|
||||
"max_batched_size", 0, SIZE_T_MAX,
|
||||
CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
|
||||
/* clip */ true)
|
||||
CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
|
||||
"remote_free_max_batch", 0,
|
||||
BIN_REMOTE_FREE_ELEMS_MAX,
|
||||
CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
|
||||
/* clip */ true)
|
||||
|
||||
if (CONF_MATCH("tcache_ncached_max")) {
|
||||
bool err = tcache_bin_info_default_init(
|
||||
v, vlen);
|
||||
|
|
|
|||
|
|
@ -1555,6 +1555,8 @@ stats_general_print(emitter_t *emitter) {
|
|||
OPT_WRITE_BOOL("utrace")
|
||||
OPT_WRITE_BOOL("xmalloc")
|
||||
OPT_WRITE_BOOL("experimental_infallible_new")
|
||||
OPT_WRITE_SIZE_T("max_batched_size")
|
||||
OPT_WRITE_SIZE_T("remote_free_max_batch")
|
||||
OPT_WRITE_BOOL("tcache")
|
||||
OPT_WRITE_SIZE_T("tcache_max")
|
||||
OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ main(void) {
|
|||
P(arena_t);
|
||||
P(arena_stats_t);
|
||||
P(base_t);
|
||||
P(bin_t);
|
||||
P(bin_with_batch_t);
|
||||
P(decay_t);
|
||||
P(edata_t);
|
||||
P(ecache_t);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue