mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-14 14:41:42 +03:00
parent
ced8b3cffb
commit
2114349a4e
30 changed files with 124 additions and 1364 deletions
|
|
@ -98,7 +98,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
|
||||||
$(srcroot)src/arena.c \
|
$(srcroot)src/arena.c \
|
||||||
$(srcroot)src/background_thread.c \
|
$(srcroot)src/background_thread.c \
|
||||||
$(srcroot)src/base.c \
|
$(srcroot)src/base.c \
|
||||||
$(srcroot)src/batcher.c \
|
|
||||||
$(srcroot)src/bin.c \
|
$(srcroot)src/bin.c \
|
||||||
$(srcroot)src/bin_info.c \
|
$(srcroot)src/bin_info.c \
|
||||||
$(srcroot)src/bitmap.c \
|
$(srcroot)src/bitmap.c \
|
||||||
|
|
@ -208,8 +207,6 @@ TESTS_UNIT := \
|
||||||
$(srcroot)test/unit/background_thread_enable.c \
|
$(srcroot)test/unit/background_thread_enable.c \
|
||||||
$(srcroot)test/unit/base.c \
|
$(srcroot)test/unit/base.c \
|
||||||
$(srcroot)test/unit/batch_alloc.c \
|
$(srcroot)test/unit/batch_alloc.c \
|
||||||
$(srcroot)test/unit/batcher.c \
|
|
||||||
$(srcroot)test/unit/bin_batching.c \
|
|
||||||
$(srcroot)test/unit/binshard.c \
|
$(srcroot)test/unit/binshard.c \
|
||||||
$(srcroot)test/unit/bitmap.c \
|
$(srcroot)test/unit/bitmap.c \
|
||||||
$(srcroot)test/unit/bit_util.c \
|
$(srcroot)test/unit/bit_util.c \
|
||||||
|
|
|
||||||
|
|
@ -588,11 +588,10 @@ arena_dalloc_bin_locked_begin(
|
||||||
* stats updates, which happen during finish (this lets running counts get left
|
* stats updates, which happen during finish (this lets running counts get left
|
||||||
* in a register).
|
* in a register).
|
||||||
*/
|
*/
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||||
arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
|
arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
|
||||||
void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
|
void *ptr) {
|
||||||
unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
|
|
||||||
const bin_info_t *bin_info = &bin_infos[binind];
|
const bin_info_t *bin_info = &bin_infos[binind];
|
||||||
size_t regind = arena_slab_regind(info, binind, slab, ptr);
|
size_t regind = arena_slab_regind(info, binind, slab, ptr);
|
||||||
slab_data_t *slab_data = edata_slab_data_get(slab);
|
slab_data_t *slab_data = edata_slab_data_get(slab);
|
||||||
|
|
@ -612,17 +611,12 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||||
if (nfree == bin_info->nregs) {
|
if (nfree == bin_info->nregs) {
|
||||||
arena_dalloc_bin_locked_handle_newly_empty(
|
arena_dalloc_bin_locked_handle_newly_empty(
|
||||||
tsdn, arena, slab, bin);
|
tsdn, arena, slab, bin);
|
||||||
|
return true;
|
||||||
if (*dalloc_slabs_count < ndalloc_slabs) {
|
|
||||||
dalloc_slabs[*dalloc_slabs_count] = slab;
|
|
||||||
(*dalloc_slabs_count)++;
|
|
||||||
} else {
|
|
||||||
edata_list_active_append(dalloc_slabs_extra, slab);
|
|
||||||
}
|
|
||||||
} else if (nfree == 1 && slab != bin->slabcur) {
|
} else if (nfree == 1 && slab != bin->slabcur) {
|
||||||
arena_dalloc_bin_locked_handle_newly_nonempty(
|
arena_dalloc_bin_locked_handle_newly_nonempty(
|
||||||
tsdn, arena, slab, bin);
|
tsdn, arena, slab, bin);
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
JEMALLOC_ALWAYS_INLINE void
|
||||||
|
|
@ -635,148 +629,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
|
||||||
arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
|
||||||
arena_dalloc_bin_locked_info_t *dalloc_bin_info, unsigned binind,
|
|
||||||
edata_t **dalloc_slabs, unsigned ndalloc_slabs, unsigned *dalloc_count,
|
|
||||||
edata_list_active_t *dalloc_slabs_extra) {
|
|
||||||
assert(binind < bin_info_nbatched_sizes);
|
|
||||||
bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
|
|
||||||
size_t nelems_to_pop = batcher_pop_begin(
|
|
||||||
tsdn, &batched_bin->remote_frees);
|
|
||||||
|
|
||||||
bin_batching_test_mid_pop(nelems_to_pop);
|
|
||||||
if (nelems_to_pop == BATCHER_NO_IDX) {
|
|
||||||
malloc_mutex_assert_not_owner(
|
|
||||||
tsdn, &batched_bin->remote_frees.mtx);
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
malloc_mutex_assert_owner(tsdn, &batched_bin->remote_frees.mtx);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t npushes = batcher_pop_get_pushes(
|
|
||||||
tsdn, &batched_bin->remote_frees);
|
|
||||||
bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
|
|
||||||
for (size_t i = 0; i < nelems_to_pop; i++) {
|
|
||||||
remote_free_data[i] = batched_bin->remote_free_data[i];
|
|
||||||
}
|
|
||||||
batcher_pop_end(tsdn, &batched_bin->remote_frees);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < nelems_to_pop; i++) {
|
|
||||||
arena_dalloc_bin_locked_step(tsdn, arena, bin, dalloc_bin_info,
|
|
||||||
binind, remote_free_data[i].slab, remote_free_data[i].ptr,
|
|
||||||
dalloc_slabs, ndalloc_slabs, dalloc_count,
|
|
||||||
dalloc_slabs_extra);
|
|
||||||
}
|
|
||||||
|
|
||||||
bin->stats.batch_pops++;
|
|
||||||
bin->stats.batch_pushes += npushes;
|
|
||||||
bin->stats.batch_pushed_elems += nelems_to_pop;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
|
|
||||||
struct arena_bin_flush_batch_state_s {
|
|
||||||
arena_dalloc_bin_locked_info_t info;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Bin batching is subtle in that there are unusual edge cases in which
|
|
||||||
* it can trigger the deallocation of more slabs than there were items
|
|
||||||
* flushed (say, if every original deallocation triggered a slab
|
|
||||||
* deallocation, and so did every batched one). So we keep a small
|
|
||||||
* backup array for any "extra" slabs, as well as a a list to allow a
|
|
||||||
* dynamic number of ones exceeding that array.
|
|
||||||
*/
|
|
||||||
edata_t *dalloc_slabs[8];
|
|
||||||
unsigned dalloc_slab_count;
|
|
||||||
edata_list_active_t dalloc_slabs_extra;
|
|
||||||
};
|
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE unsigned
|
|
||||||
arena_bin_batch_get_ndalloc_slabs(unsigned preallocated_slabs) {
|
|
||||||
if (preallocated_slabs > bin_batching_test_ndalloc_slabs_max) {
|
|
||||||
return bin_batching_test_ndalloc_slabs_max;
|
|
||||||
}
|
|
||||||
return preallocated_slabs;
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
|
||||||
arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
|
||||||
unsigned binind, arena_bin_flush_batch_state_t *state) {
|
|
||||||
if (binind >= bin_info_nbatched_sizes) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
arena_dalloc_bin_locked_begin(&state->info, binind);
|
|
||||||
state->dalloc_slab_count = 0;
|
|
||||||
edata_list_active_init(&state->dalloc_slabs_extra);
|
|
||||||
|
|
||||||
unsigned preallocated_slabs = (unsigned)(sizeof(state->dalloc_slabs)
|
|
||||||
/ sizeof(state->dalloc_slabs[0]));
|
|
||||||
unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
|
|
||||||
preallocated_slabs);
|
|
||||||
|
|
||||||
arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
|
|
||||||
state->dalloc_slabs, ndalloc_slabs, &state->dalloc_slab_count,
|
|
||||||
&state->dalloc_slabs_extra);
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
|
||||||
arena_bin_flush_batch_before_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
|
||||||
unsigned binind, arena_bin_flush_batch_state_t *state) {
|
|
||||||
if (binind >= bin_info_nbatched_sizes) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
arena_dalloc_bin_locked_finish(tsdn, arena, bin, &state->info);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
arena_bin_has_batch(szind_t binind) {
|
|
||||||
return binind < bin_info_nbatched_sizes;
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
|
||||||
arena_bin_flush_batch_after_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
|
||||||
unsigned binind, arena_bin_flush_batch_state_t *state) {
|
|
||||||
if (!arena_bin_has_batch(binind)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* The initialization of dalloc_slabs_extra is guarded by an
|
|
||||||
* arena_bin_has_batch check higher up the stack. But the clang
|
|
||||||
* analyzer forgets this down the stack, triggering a spurious error
|
|
||||||
* reported here.
|
|
||||||
*/
|
|
||||||
JEMALLOC_CLANG_ANALYZER_SUPPRESS {
|
|
||||||
bin_batching_test_after_unlock(state->dalloc_slab_count,
|
|
||||||
edata_list_active_empty(&state->dalloc_slabs_extra));
|
|
||||||
}
|
|
||||||
for (unsigned i = 0; i < state->dalloc_slab_count; i++) {
|
|
||||||
edata_t *slab = state->dalloc_slabs[i];
|
|
||||||
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
|
||||||
}
|
|
||||||
while (!edata_list_active_empty(&state->dalloc_slabs_extra)) {
|
|
||||||
edata_t *slab = edata_list_active_first(
|
|
||||||
&state->dalloc_slabs_extra);
|
|
||||||
edata_list_active_remove(&state->dalloc_slabs_extra, slab);
|
|
||||||
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bin_t *
|
static inline bin_t *
|
||||||
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
|
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
|
||||||
bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
|
bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
|
||||||
bin_t *ret;
|
return shard0 + binshard;
|
||||||
if (arena_bin_has_batch(binind)) {
|
|
||||||
ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
|
|
||||||
} else {
|
|
||||||
ret = shard0 + binshard;
|
|
||||||
}
|
|
||||||
assert(binind >= SC_NBINS - 1
|
|
||||||
|| (uintptr_t)ret
|
|
||||||
< (uintptr_t)arena + arena_bin_offsets[binind + 1]);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
|
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,7 @@ struct arena_s {
|
||||||
"Do not use this field directly. "
|
"Do not use this field directly. "
|
||||||
"Use `arena_get_bin` instead.")
|
"Use `arena_get_bin` instead.")
|
||||||
JEMALLOC_ALIGNED(CACHELINE)
|
JEMALLOC_ALIGNED(CACHELINE)
|
||||||
bin_with_batch_t all_bins[0];
|
bin_t all_bins[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
|
#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
|
||||||
|
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
#ifndef JEMALLOC_INTERNAL_BATCHER_H
|
|
||||||
#define JEMALLOC_INTERNAL_BATCHER_H
|
|
||||||
|
|
||||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
||||||
#include "jemalloc/internal/atomic.h"
|
|
||||||
#include "jemalloc/internal/mutex.h"
|
|
||||||
|
|
||||||
#define BATCHER_NO_IDX ((size_t) - 1)
|
|
||||||
|
|
||||||
typedef struct batcher_s batcher_t;
|
|
||||||
struct batcher_s {
|
|
||||||
/*
|
|
||||||
* Optimize for locality -- nelems_max and nelems are always touched
|
|
||||||
* togehter, along with the front of the mutex. The end of the mutex is
|
|
||||||
* only touched if there's contention.
|
|
||||||
*/
|
|
||||||
atomic_zu_t nelems;
|
|
||||||
size_t nelems_max;
|
|
||||||
size_t npushes;
|
|
||||||
malloc_mutex_t mtx;
|
|
||||||
};
|
|
||||||
|
|
||||||
void batcher_init(batcher_t *batcher, size_t nelems_max);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns an index (into some user-owned array) to use for pushing, or
|
|
||||||
* BATCHER_NO_IDX if no index is free. If the former, the caller must call
|
|
||||||
* batcher_push_end once done.
|
|
||||||
*/
|
|
||||||
size_t batcher_push_begin(
|
|
||||||
tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push);
|
|
||||||
void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns the number of items to pop, or BATCHER_NO_IDX if there are none.
|
|
||||||
* If the former, must be followed by a call to batcher_pop_end.
|
|
||||||
*/
|
|
||||||
size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
|
|
||||||
size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
|
|
||||||
void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
|
|
||||||
|
|
||||||
void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
|
|
||||||
void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
|
|
||||||
void batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher);
|
|
||||||
|
|
||||||
#endif /* JEMALLOC_INTERNAL_BATCHER_H */
|
|
||||||
|
|
@ -2,60 +2,12 @@
|
||||||
#define JEMALLOC_INTERNAL_BIN_H
|
#define JEMALLOC_INTERNAL_BIN_H
|
||||||
|
|
||||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||||
#include "jemalloc/internal/batcher.h"
|
|
||||||
#include "jemalloc/internal/bin_stats.h"
|
#include "jemalloc/internal/bin_stats.h"
|
||||||
#include "jemalloc/internal/bin_types.h"
|
#include "jemalloc/internal/bin_types.h"
|
||||||
#include "jemalloc/internal/edata.h"
|
#include "jemalloc/internal/edata.h"
|
||||||
#include "jemalloc/internal/mutex.h"
|
#include "jemalloc/internal/mutex.h"
|
||||||
#include "jemalloc/internal/sc.h"
|
#include "jemalloc/internal/sc.h"
|
||||||
|
|
||||||
#define BIN_REMOTE_FREE_ELEMS_MAX 16
|
|
||||||
|
|
||||||
#ifdef JEMALLOC_JET
|
|
||||||
extern void (*bin_batching_test_after_push_hook)(size_t idx);
|
|
||||||
extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
|
|
||||||
extern void (*bin_batching_test_after_unlock_hook)(
|
|
||||||
unsigned slab_dalloc_count, bool list_empty);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef JEMALLOC_JET
|
|
||||||
extern unsigned bin_batching_test_ndalloc_slabs_max;
|
|
||||||
#else
|
|
||||||
static const unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
|
||||||
bin_batching_test_after_push(size_t idx) {
|
|
||||||
(void)idx;
|
|
||||||
#ifdef JEMALLOC_JET
|
|
||||||
if (bin_batching_test_after_push_hook != NULL) {
|
|
||||||
bin_batching_test_after_push_hook(idx);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
|
||||||
bin_batching_test_mid_pop(size_t elems_to_pop) {
|
|
||||||
(void)elems_to_pop;
|
|
||||||
#ifdef JEMALLOC_JET
|
|
||||||
if (bin_batching_test_mid_pop_hook != NULL) {
|
|
||||||
bin_batching_test_mid_pop_hook(elems_to_pop);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
|
||||||
bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
|
|
||||||
(void)slab_dalloc_count;
|
|
||||||
(void)list_empty;
|
|
||||||
#ifdef JEMALLOC_JET
|
|
||||||
if (bin_batching_test_after_unlock_hook != NULL) {
|
|
||||||
bin_batching_test_after_unlock_hook(
|
|
||||||
slab_dalloc_count, list_empty);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A bin contains a set of extents that are currently being used for slab
|
* A bin contains a set of extents that are currently being used for slab
|
||||||
* allocations.
|
* allocations.
|
||||||
|
|
@ -90,19 +42,6 @@ struct bin_s {
|
||||||
edata_list_active_t slabs_full;
|
edata_list_active_t slabs_full;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct bin_remote_free_data_s bin_remote_free_data_t;
|
|
||||||
struct bin_remote_free_data_s {
|
|
||||||
void *ptr;
|
|
||||||
edata_t *slab;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct bin_with_batch_s bin_with_batch_t;
|
|
||||||
struct bin_with_batch_s {
|
|
||||||
bin_t bin;
|
|
||||||
batcher_t remote_frees;
|
|
||||||
bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
/* A set of sharded bins of the same size class. */
|
/* A set of sharded bins of the same size class. */
|
||||||
typedef struct bins_s bins_t;
|
typedef struct bins_s bins_t;
|
||||||
struct bins_s {
|
struct bins_s {
|
||||||
|
|
@ -115,12 +54,12 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
|
||||||
size_t end_size, size_t nshards);
|
size_t end_size, size_t nshards);
|
||||||
|
|
||||||
/* Initializes a bin to empty. Returns true on error. */
|
/* Initializes a bin to empty. Returns true on error. */
|
||||||
bool bin_init(bin_t *bin, unsigned binind);
|
bool bin_init(bin_t *bin);
|
||||||
|
|
||||||
/* Forking. */
|
/* Forking. */
|
||||||
void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
|
void bin_prefork(tsdn_t *tsdn, bin_t *bin);
|
||||||
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
|
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
|
||||||
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);
|
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
|
||||||
|
|
||||||
/* Stats. */
|
/* Stats. */
|
||||||
static inline void
|
static inline void
|
||||||
|
|
@ -138,11 +77,6 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
|
||||||
stats->reslabs += bin->stats.reslabs;
|
stats->reslabs += bin->stats.reslabs;
|
||||||
stats->curslabs += bin->stats.curslabs;
|
stats->curslabs += bin->stats.curslabs;
|
||||||
stats->nonfull_slabs += bin->stats.nonfull_slabs;
|
stats->nonfull_slabs += bin->stats.nonfull_slabs;
|
||||||
|
|
||||||
stats->batch_failed_pushes += bin->stats.batch_failed_pushes;
|
|
||||||
stats->batch_pushes += bin->stats.batch_pushes;
|
|
||||||
stats->batch_pushed_elems += bin->stats.batch_pushed_elems;
|
|
||||||
|
|
||||||
malloc_mutex_unlock(tsdn, &bin->lock);
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,17 +44,6 @@ struct bin_info_s {
|
||||||
bitmap_info_t bitmap_info;
|
bitmap_info_t bitmap_info;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The maximum size a size class can be and still get batching behavior. */
|
|
||||||
extern size_t opt_bin_info_max_batched_size;
|
|
||||||
/* The number of batches per batched size class. */
|
|
||||||
extern size_t opt_bin_info_remote_free_max_batch;
|
|
||||||
// The max number of pending elems (across all batches)
|
|
||||||
extern size_t opt_bin_info_remote_free_max;
|
|
||||||
|
|
||||||
extern szind_t bin_info_nbatched_sizes;
|
|
||||||
extern unsigned bin_info_nbatched_bins;
|
|
||||||
extern unsigned bin_info_nunbatched_bins;
|
|
||||||
|
|
||||||
extern bin_info_t bin_infos[SC_NBINS];
|
extern bin_info_t bin_infos[SC_NBINS];
|
||||||
|
|
||||||
void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
|
void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
|
||||||
|
|
|
||||||
|
|
@ -48,11 +48,6 @@ struct bin_stats_s {
|
||||||
|
|
||||||
/* Current size of nonfull slabs heap in this bin. */
|
/* Current size of nonfull slabs heap in this bin. */
|
||||||
size_t nonfull_slabs;
|
size_t nonfull_slabs;
|
||||||
|
|
||||||
uint64_t batch_pops;
|
|
||||||
uint64_t batch_failed_pushes;
|
|
||||||
uint64_t batch_pushes;
|
|
||||||
uint64_t batch_pushed_elems;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct bin_stats_data_s bin_stats_data_t;
|
typedef struct bin_stats_data_s bin_stats_data_t;
|
||||||
|
|
|
||||||
|
|
@ -64,10 +64,9 @@ enum witness_rank_e {
|
||||||
WITNESS_RANK_BASE,
|
WITNESS_RANK_BASE,
|
||||||
WITNESS_RANK_ARENA_LARGE,
|
WITNESS_RANK_ARENA_LARGE,
|
||||||
WITNESS_RANK_HOOK,
|
WITNESS_RANK_HOOK,
|
||||||
WITNESS_RANK_BIN,
|
|
||||||
|
|
||||||
WITNESS_RANK_LEAF = 0x1000,
|
WITNESS_RANK_LEAF = 0x1000,
|
||||||
WITNESS_RANK_BATCHER = WITNESS_RANK_LEAF,
|
WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
|
||||||
WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
|
WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
|
||||||
WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
|
WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
|
||||||
WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
|
WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\arena.c" />
|
<ClCompile Include="..\..\..\..\src\arena.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\base.c" />
|
<ClCompile Include="..\..\..\..\src\base.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c" />
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c" />
|
<ClCompile Include="..\..\..\..\src\bin.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\base.c">
|
<ClCompile Include="..\..\..\..\src\base.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c">
|
<ClCompile Include="..\..\..\..\src\bin.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\arena.c" />
|
<ClCompile Include="..\..\..\..\src\arena.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\base.c" />
|
<ClCompile Include="..\..\..\..\src\base.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c" />
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c" />
|
<ClCompile Include="..\..\..\..\src\bin.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\base.c">
|
<ClCompile Include="..\..\..\..\src\base.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c">
|
<ClCompile Include="..\..\..\..\src\bin.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\arena.c" />
|
<ClCompile Include="..\..\..\..\src\arena.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\base.c" />
|
<ClCompile Include="..\..\..\..\src\base.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c" />
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c" />
|
<ClCompile Include="..\..\..\..\src\bin.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\base.c">
|
<ClCompile Include="..\..\..\..\src\base.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c">
|
<ClCompile Include="..\..\..\..\src\bin.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\arena.c" />
|
<ClCompile Include="..\..\..\..\src\arena.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
<ClCompile Include="..\..\..\..\src\background_thread.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\base.c" />
|
<ClCompile Include="..\..\..\..\src\base.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c" />
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c" />
|
<ClCompile Include="..\..\..\..\src\bin.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
<ClCompile Include="..\..\..\..\src\bin_info.c" />
|
||||||
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
<ClCompile Include="..\..\..\..\src\bitmap.c" />
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,6 @@
|
||||||
<ClCompile Include="..\..\..\..\src\base.c">
|
<ClCompile Include="..\..\..\..\src\base.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="..\..\..\..\src\batcher.c">
|
|
||||||
<Filter>Source Files</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="..\..\..\..\src\bin.c">
|
<ClCompile Include="..\..\..\..\src\bin.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
|
|
||||||
89
src/arena.c
89
src/arena.c
|
|
@ -39,7 +39,8 @@ div_info_t arena_binind_div_info[SC_NBINS];
|
||||||
size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
||||||
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
||||||
|
|
||||||
uint32_t arena_bin_offsets[SC_NBINS];
|
uint32_t arena_bin_offsets[SC_NBINS];
|
||||||
|
static unsigned nbins_total;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* a0 is used to handle huge requests before malloc init completes. After
|
* a0 is used to handle huge requests before malloc init completes. After
|
||||||
|
|
@ -674,17 +675,11 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) {
|
arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
|
||||||
edata_t *slab;
|
edata_t *slab;
|
||||||
|
|
||||||
malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
|
malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
|
||||||
|
|
||||||
if (arena_bin_has_batch(binind)) {
|
|
||||||
bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
|
|
||||||
batcher_init(
|
|
||||||
&batched_bin->remote_frees, BIN_REMOTE_FREE_ELEMS_MAX);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bin->slabcur != NULL) {
|
if (bin->slabcur != NULL) {
|
||||||
slab = bin->slabcur;
|
slab = bin->slabcur;
|
||||||
bin->slabcur = NULL;
|
bin->slabcur = NULL;
|
||||||
|
|
@ -835,8 +830,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
|
||||||
/* Bins. */
|
/* Bins. */
|
||||||
for (unsigned i = 0; i < SC_NBINS; i++) {
|
for (unsigned i = 0; i < SC_NBINS; i++) {
|
||||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||||
arena_bin_reset(
|
arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
|
||||||
tsd, arena, arena_get_bin(arena, i, j), i);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
|
pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
|
||||||
|
|
@ -1103,19 +1097,8 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin,
|
||||||
unsigned binshard;
|
unsigned binshard;
|
||||||
bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
|
bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
|
||||||
|
|
||||||
/*
|
|
||||||
* This has some fields that are conditionally initialized down batch
|
|
||||||
* flush pathways. This can trigger static analysis warnings deeper
|
|
||||||
* down in the static. The accesses are guarded by the same checks as
|
|
||||||
* the initialization, but the analysis isn't able to track that across
|
|
||||||
* multiple stack frames.
|
|
||||||
*/
|
|
||||||
arena_bin_flush_batch_state_t batch_flush_state
|
|
||||||
JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0});
|
|
||||||
label_refill:
|
label_refill:
|
||||||
malloc_mutex_lock(tsdn, &bin->lock);
|
malloc_mutex_lock(tsdn, &bin->lock);
|
||||||
arena_bin_flush_batch_after_lock(
|
|
||||||
tsdn, arena, bin, binind, &batch_flush_state);
|
|
||||||
|
|
||||||
while (filled < nfill_min) {
|
while (filled < nfill_min) {
|
||||||
/* Try batch-fill from slabcur first. */
|
/* Try batch-fill from slabcur first. */
|
||||||
|
|
@ -1176,11 +1159,7 @@ label_refill:
|
||||||
cache_bin->tstats.nrequests = 0;
|
cache_bin->tstats.nrequests = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
arena_bin_flush_batch_before_unlock(
|
|
||||||
tsdn, arena, bin, binind, &batch_flush_state);
|
|
||||||
malloc_mutex_unlock(tsdn, &bin->lock);
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
||||||
arena_bin_flush_batch_after_unlock(
|
|
||||||
tsdn, arena, bin, binind, &batch_flush_state);
|
|
||||||
|
|
||||||
if (alloc_and_retry) {
|
if (alloc_and_retry) {
|
||||||
assert(fresh_slab == NULL);
|
assert(fresh_slab == NULL);
|
||||||
|
|
@ -1474,16 +1453,12 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
|
||||||
malloc_mutex_lock(tsdn, &bin->lock);
|
malloc_mutex_lock(tsdn, &bin->lock);
|
||||||
arena_dalloc_bin_locked_info_t info;
|
arena_dalloc_bin_locked_info_t info;
|
||||||
arena_dalloc_bin_locked_begin(&info, binind);
|
arena_dalloc_bin_locked_begin(&info, binind);
|
||||||
edata_t *dalloc_slabs[1];
|
bool ret = arena_dalloc_bin_locked_step(
|
||||||
unsigned dalloc_slabs_count = 0;
|
tsdn, arena, bin, &info, binind, edata, ptr);
|
||||||
arena_dalloc_bin_locked_step(tsdn, arena, bin, &info, binind, edata,
|
|
||||||
ptr, dalloc_slabs, /* ndalloc_slabs */ 1, &dalloc_slabs_count,
|
|
||||||
/* dalloc_slabs_extra */ NULL);
|
|
||||||
arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
|
arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
|
||||||
malloc_mutex_unlock(tsdn, &bin->lock);
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
||||||
|
|
||||||
if (dalloc_slabs_count != 0) {
|
if (ret) {
|
||||||
assert(dalloc_slabs[0] == edata);
|
|
||||||
arena_slab_dalloc(tsdn, arena, edata);
|
arena_slab_dalloc(tsdn, arena, edata);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1722,6 +1697,7 @@ arena_t *
|
||||||
arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
|
arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
|
||||||
arena_t *arena;
|
arena_t *arena;
|
||||||
base_t *base;
|
base_t *base;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
if (ind == 0) {
|
if (ind == 0) {
|
||||||
base = b0get();
|
base = b0get();
|
||||||
|
|
@ -1734,13 +1710,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE)
|
size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE)
|
||||||
+ sizeof(bin_with_batch_t) * bin_info_nbatched_bins
|
+ sizeof(bin_t) * nbins_total;
|
||||||
+ sizeof(bin_t) * bin_info_nunbatched_bins;
|
|
||||||
arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
|
arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
|
||||||
if (arena == NULL) {
|
if (arena == NULL) {
|
||||||
goto label_error;
|
goto label_error;
|
||||||
}
|
}
|
||||||
|
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||||
|
assert((uintptr_t)&arena->all_bins[nbins_total - 1] + sizeof(bin_t)
|
||||||
|
<= (uintptr_t)arena + arena_size);)
|
||||||
atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
|
atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
|
||||||
atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
|
atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
|
||||||
arena->last_thd = NULL;
|
arena->last_thd = NULL;
|
||||||
|
|
@ -1779,13 +1756,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
|
||||||
|
|
||||||
/* Initialize bins. */
|
/* Initialize bins. */
|
||||||
atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
|
atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
|
||||||
for (unsigned i = 0; i < SC_NBINS; i++) {
|
for (i = 0; i < nbins_total; i++) {
|
||||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||||
bin_t *bin = arena_get_bin(arena, i, j);
|
bool err = bin_init(&arena->all_bins[i]);)
|
||||||
bool err = bin_init(bin, i);
|
if (err) {
|
||||||
if (err) {
|
goto label_error;
|
||||||
goto label_error;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1943,10 +1918,8 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
|
||||||
uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);)
|
uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);)
|
||||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||||
arena_bin_offsets[i] = cur_offset;
|
arena_bin_offsets[i] = cur_offset;
|
||||||
uint32_t bin_sz = (i < bin_info_nbatched_sizes
|
nbins_total += bin_infos[i].n_shards;
|
||||||
? sizeof(bin_with_batch_t)
|
cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
|
||||||
: sizeof(bin_t));
|
|
||||||
cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
|
|
||||||
}
|
}
|
||||||
return pa_central_init(
|
return pa_central_init(
|
||||||
&arena_pa_central_global, base, hpa, &hpa_hooks_default);
|
&arena_pa_central_global, base, hpa, &hpa_hooks_default);
|
||||||
|
|
@ -1996,21 +1969,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
|
||||||
|
|
||||||
void
|
void
|
||||||
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
|
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
|
||||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
for (unsigned i = 0; i < nbins_total; i++) {
|
||||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||||
bin_t *bin = arena_get_bin(arena, i, j);
|
bin_prefork(tsdn, &arena->all_bins[i]);)
|
||||||
bin_prefork(tsdn, bin, arena_bin_has_batch(i));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
|
arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
|
||||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
for (unsigned i = 0; i < nbins_total; i++) {
|
||||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||||
bin_t *bin = arena_get_bin(arena, i, j);
|
bin_postfork_parent(tsdn, &arena->all_bins[i]);)
|
||||||
bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
|
malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
|
||||||
|
|
@ -2047,11 +2016,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
for (unsigned i = 0; i < nbins_total; i++) {
|
||||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
|
||||||
bin_t *bin = arena_get_bin(arena, i, j);
|
bin_postfork_child(tsdn, &arena->all_bins[i]);)
|
||||||
bin_postfork_child(tsdn, bin, arena_bin_has_batch(i));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
|
malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
|
||||||
|
|
|
||||||
|
|
@ -1,98 +0,0 @@
|
||||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
|
||||||
|
|
||||||
#include "jemalloc/internal/batcher.h"
|
|
||||||
|
|
||||||
#include "jemalloc/internal/assert.h"
|
|
||||||
#include "jemalloc/internal/atomic.h"
|
|
||||||
|
|
||||||
void
|
|
||||||
batcher_init(batcher_t *batcher, size_t nelems_max) {
|
|
||||||
atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
|
|
||||||
batcher->nelems_max = nelems_max;
|
|
||||||
batcher->npushes = 0;
|
|
||||||
malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER,
|
|
||||||
malloc_mutex_rank_exclusive);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns an index (into some user-owned array) to use for pushing, or
|
|
||||||
* BATCHER_NO_IDX if no index is free.
|
|
||||||
*/
|
|
||||||
size_t
|
|
||||||
batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push) {
|
|
||||||
assert(elems_to_push > 0);
|
|
||||||
size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
|
|
||||||
if (nelems_guess + elems_to_push > batcher->nelems_max) {
|
|
||||||
return BATCHER_NO_IDX;
|
|
||||||
}
|
|
||||||
malloc_mutex_lock(tsdn, &batcher->mtx);
|
|
||||||
size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
|
|
||||||
if (nelems + elems_to_push > batcher->nelems_max) {
|
|
||||||
malloc_mutex_unlock(tsdn, &batcher->mtx);
|
|
||||||
return BATCHER_NO_IDX;
|
|
||||||
}
|
|
||||||
assert(elems_to_push <= batcher->nelems_max - nelems);
|
|
||||||
/*
|
|
||||||
* We update nelems at push time (instead of during pop) so that other
|
|
||||||
* racing accesses of the batcher can fail fast instead of trying to
|
|
||||||
* acquire a mutex only to discover that there's no space for them.
|
|
||||||
*/
|
|
||||||
atomic_store_zu(
|
|
||||||
&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
|
|
||||||
batcher->npushes++;
|
|
||||||
return nelems;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher) {
|
|
||||||
malloc_mutex_assert_owner(tsdn, &batcher->mtx);
|
|
||||||
size_t npushes = batcher->npushes;
|
|
||||||
batcher->npushes = 0;
|
|
||||||
return npushes;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) {
|
|
||||||
malloc_mutex_assert_owner(tsdn, &batcher->mtx);
|
|
||||||
assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) > 0);
|
|
||||||
malloc_mutex_unlock(tsdn, &batcher->mtx);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) {
|
|
||||||
size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
|
|
||||||
assert(nelems_guess <= batcher->nelems_max);
|
|
||||||
if (nelems_guess == 0) {
|
|
||||||
return BATCHER_NO_IDX;
|
|
||||||
}
|
|
||||||
malloc_mutex_lock(tsdn, &batcher->mtx);
|
|
||||||
size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
|
|
||||||
assert(nelems <= batcher->nelems_max);
|
|
||||||
if (nelems == 0) {
|
|
||||||
malloc_mutex_unlock(tsdn, &batcher->mtx);
|
|
||||||
return BATCHER_NO_IDX;
|
|
||||||
}
|
|
||||||
atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
|
|
||||||
return nelems;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
|
|
||||||
assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0);
|
|
||||||
malloc_mutex_unlock(tsdn, &batcher->mtx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
batcher_prefork(tsdn_t *tsdn, batcher_t *batcher) {
|
|
||||||
malloc_mutex_prefork(tsdn, &batcher->mtx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher) {
|
|
||||||
malloc_mutex_postfork_parent(tsdn, &batcher->mtx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher) {
|
|
||||||
malloc_mutex_postfork_child(tsdn, &batcher->mtx);
|
|
||||||
}
|
|
||||||
48
src/bin.c
48
src/bin.c
|
|
@ -6,14 +6,6 @@
|
||||||
#include "jemalloc/internal/sc.h"
|
#include "jemalloc/internal/sc.h"
|
||||||
#include "jemalloc/internal/witness.h"
|
#include "jemalloc/internal/witness.h"
|
||||||
|
|
||||||
#ifdef JEMALLOC_JET
|
|
||||||
unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
|
|
||||||
void (*bin_batching_test_after_push_hook)(size_t push_idx);
|
|
||||||
void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop);
|
|
||||||
void (*bin_batching_test_after_unlock_hook)(
|
|
||||||
unsigned slab_dalloc_count, bool list_empty);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
|
bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
|
||||||
size_t end_size, size_t nshards) {
|
size_t end_size, size_t nshards) {
|
||||||
|
|
@ -47,7 +39,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
bin_init(bin_t *bin, unsigned binind) {
|
bin_init(bin_t *bin) {
|
||||||
if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
|
if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
|
||||||
malloc_mutex_rank_exclusive)) {
|
malloc_mutex_rank_exclusive)) {
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -58,52 +50,20 @@ bin_init(bin_t *bin, unsigned binind) {
|
||||||
if (config_stats) {
|
if (config_stats) {
|
||||||
memset(&bin->stats, 0, sizeof(bin_stats_t));
|
memset(&bin->stats, 0, sizeof(bin_stats_t));
|
||||||
}
|
}
|
||||||
if (arena_bin_has_batch(binind)) {
|
|
||||||
bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
|
|
||||||
batcher_init(
|
|
||||||
&batched_bin->remote_frees, opt_bin_info_remote_free_max);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
|
bin_prefork(tsdn_t *tsdn, bin_t *bin) {
|
||||||
malloc_mutex_prefork(tsdn, &bin->lock);
|
malloc_mutex_prefork(tsdn, &bin->lock);
|
||||||
if (has_batch) {
|
|
||||||
/*
|
|
||||||
* The batch mutex has lower rank than the bin mutex (as it must
|
|
||||||
* -- it's acquired later). But during forking, we go
|
|
||||||
* bin-at-a-time, so that we acquire mutex on bin 0, then on
|
|
||||||
* the bin 0 batcher, then on bin 1. This is a safe ordering
|
|
||||||
* (it's ordered by the index of arenas and bins within those
|
|
||||||
* arenas), but will trigger witness errors that would
|
|
||||||
* otherwise force another level of arena forking that breaks
|
|
||||||
* bin encapsulation (because the witness API doesn't "know"
|
|
||||||
* about arena or bin ordering -- it just sees that the batcher
|
|
||||||
* has a lower rank than the bin). So instead we exclude the
|
|
||||||
* batcher mutex from witness checking during fork (which is
|
|
||||||
* the only time we touch multiple bins at once) by passing
|
|
||||||
* TSDN_NULL.
|
|
||||||
*/
|
|
||||||
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
|
|
||||||
batcher_prefork(TSDN_NULL, &batched->remote_frees);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
|
bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
|
||||||
malloc_mutex_postfork_parent(tsdn, &bin->lock);
|
malloc_mutex_postfork_parent(tsdn, &bin->lock);
|
||||||
if (has_batch) {
|
|
||||||
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
|
|
||||||
batcher_postfork_parent(TSDN_NULL, &batched->remote_frees);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
|
bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
|
||||||
malloc_mutex_postfork_child(tsdn, &bin->lock);
|
malloc_mutex_postfork_child(tsdn, &bin->lock);
|
||||||
if (has_batch) {
|
|
||||||
bin_with_batch_t *batched = (bin_with_batch_t *)bin;
|
|
||||||
batcher_postfork_child(TSDN_NULL, &batched->remote_frees);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,26 +3,8 @@
|
||||||
|
|
||||||
#include "jemalloc/internal/bin_info.h"
|
#include "jemalloc/internal/bin_info.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* We leave bin-batching disabled by default, with other settings chosen mostly
|
|
||||||
* empirically; across the test programs I looked at they provided the most bang
|
|
||||||
* for the buck. With other default settings, these choices for bin batching
|
|
||||||
* result in them consuming far less memory (even in the worst case) than the
|
|
||||||
* tcaches themselves, the arena, etc.
|
|
||||||
* Note that we always try to pop all bins on every arena cache bin lock
|
|
||||||
* operation, so the typical memory waste is far less than this (and only on
|
|
||||||
* hot bins, which tend to be large anyways).
|
|
||||||
*/
|
|
||||||
size_t opt_bin_info_max_batched_size = 0; /* 192 is a good default. */
|
|
||||||
size_t opt_bin_info_remote_free_max_batch = 4;
|
|
||||||
size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX;
|
|
||||||
|
|
||||||
bin_info_t bin_infos[SC_NBINS];
|
bin_info_t bin_infos[SC_NBINS];
|
||||||
|
|
||||||
szind_t bin_info_nbatched_sizes;
|
|
||||||
unsigned bin_info_nbatched_bins;
|
|
||||||
unsigned bin_info_nunbatched_bins;
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
||||||
bin_info_t infos[SC_NBINS]) {
|
bin_info_t infos[SC_NBINS]) {
|
||||||
|
|
@ -38,12 +20,6 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
||||||
bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
|
bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
|
||||||
bin_info->nregs);
|
bin_info->nregs);
|
||||||
bin_info->bitmap_info = bitmap_info;
|
bin_info->bitmap_info = bitmap_info;
|
||||||
if (bin_info->reg_size <= opt_bin_info_max_batched_size) {
|
|
||||||
bin_info_nbatched_sizes++;
|
|
||||||
bin_info_nbatched_bins += bin_info->n_shards;
|
|
||||||
} else {
|
|
||||||
bin_info_nunbatched_bins += bin_info->n_shards;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
37
src/ctl.c
37
src/ctl.c
|
|
@ -134,9 +134,6 @@ CTL_PROTO(opt_utrace)
|
||||||
CTL_PROTO(opt_xmalloc)
|
CTL_PROTO(opt_xmalloc)
|
||||||
CTL_PROTO(opt_experimental_infallible_new)
|
CTL_PROTO(opt_experimental_infallible_new)
|
||||||
CTL_PROTO(opt_experimental_tcache_gc)
|
CTL_PROTO(opt_experimental_tcache_gc)
|
||||||
CTL_PROTO(opt_max_batched_size)
|
|
||||||
CTL_PROTO(opt_remote_free_max)
|
|
||||||
CTL_PROTO(opt_remote_free_max_batch)
|
|
||||||
CTL_PROTO(opt_tcache)
|
CTL_PROTO(opt_tcache)
|
||||||
CTL_PROTO(opt_tcache_max)
|
CTL_PROTO(opt_tcache_max)
|
||||||
CTL_PROTO(opt_tcache_nslots_small_min)
|
CTL_PROTO(opt_tcache_nslots_small_min)
|
||||||
|
|
@ -248,10 +245,6 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs)
|
||||||
CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
|
CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
|
||||||
CTL_PROTO(stats_arenas_i_bins_j_curslabs)
|
CTL_PROTO(stats_arenas_i_bins_j_curslabs)
|
||||||
CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
|
CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
|
||||||
CTL_PROTO(stats_arenas_i_bins_j_batch_pops)
|
|
||||||
CTL_PROTO(stats_arenas_i_bins_j_batch_failed_pushes)
|
|
||||||
CTL_PROTO(stats_arenas_i_bins_j_batch_pushes)
|
|
||||||
CTL_PROTO(stats_arenas_i_bins_j_batch_pushed_elems)
|
|
||||||
INDEX_PROTO(stats_arenas_i_bins_j)
|
INDEX_PROTO(stats_arenas_i_bins_j)
|
||||||
CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
|
CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
|
||||||
CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
|
CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
|
||||||
|
|
@ -501,9 +494,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
|
||||||
{NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
|
{NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
|
||||||
{NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)},
|
{NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)},
|
||||||
{NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)},
|
{NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)},
|
||||||
{NAME("max_batched_size"), CTL(opt_max_batched_size)},
|
|
||||||
{NAME("remote_free_max"), CTL(opt_remote_free_max)},
|
|
||||||
{NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)},
|
|
||||||
{NAME("tcache"), CTL(opt_tcache)},
|
{NAME("tcache"), CTL(opt_tcache)},
|
||||||
{NAME("tcache_max"), CTL(opt_tcache_max)},
|
{NAME("tcache_max"), CTL(opt_tcache_max)},
|
||||||
{NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)},
|
{NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)},
|
||||||
|
|
@ -673,11 +663,6 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
|
||||||
{NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
|
{NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
|
||||||
{NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
|
{NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
|
||||||
{NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)},
|
{NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)},
|
||||||
{NAME("batch_pops"), CTL(stats_arenas_i_bins_j_batch_pops)},
|
|
||||||
{NAME("batch_failed_pushes"),
|
|
||||||
CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
|
|
||||||
{NAME("batch_pushes"), CTL(stats_arenas_i_bins_j_batch_pushes)},
|
|
||||||
{NAME("batch_pushed_elems"), CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
|
|
||||||
{NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}};
|
{NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}};
|
||||||
|
|
||||||
static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
|
static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
|
||||||
|
|
@ -1219,14 +1204,6 @@ ctl_arena_stats_sdmerge(
|
||||||
assert(bstats->curslabs == 0);
|
assert(bstats->curslabs == 0);
|
||||||
assert(bstats->nonfull_slabs == 0);
|
assert(bstats->nonfull_slabs == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
merged->batch_pops += bstats->batch_pops;
|
|
||||||
merged->batch_failed_pushes +=
|
|
||||||
bstats->batch_failed_pushes;
|
|
||||||
merged->batch_pushes += bstats->batch_pushes;
|
|
||||||
merged->batch_pushed_elems +=
|
|
||||||
bstats->batch_pushed_elems;
|
|
||||||
|
|
||||||
malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
|
malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
|
||||||
&astats->bstats[i].mutex_data);
|
&astats->bstats[i].mutex_data);
|
||||||
}
|
}
|
||||||
|
|
@ -2202,10 +2179,6 @@ CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
|
||||||
CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
|
CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
|
||||||
opt_experimental_infallible_new, bool)
|
opt_experimental_infallible_new, bool)
|
||||||
CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
|
CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
|
||||||
CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
|
|
||||||
CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max, size_t)
|
|
||||||
CTL_RO_NL_GEN(
|
|
||||||
opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch, size_t)
|
|
||||||
CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
|
CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
|
||||||
CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
|
CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
|
||||||
CTL_RO_NL_GEN(
|
CTL_RO_NL_GEN(
|
||||||
|
|
@ -3982,16 +3955,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
|
||||||
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
|
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
|
||||||
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
|
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
|
||||||
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
|
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
|
||||||
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops,
|
|
||||||
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t)
|
|
||||||
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes,
|
|
||||||
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes,
|
|
||||||
uint64_t)
|
|
||||||
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes,
|
|
||||||
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t)
|
|
||||||
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems,
|
|
||||||
arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems,
|
|
||||||
uint64_t)
|
|
||||||
|
|
||||||
static const ctl_named_node_t *
|
static const ctl_named_node_t *
|
||||||
stats_arenas_i_bins_j_index(
|
stats_arenas_i_bins_j_index(
|
||||||
|
|
|
||||||
|
|
@ -1391,20 +1391,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
||||||
} while (vlen_left > 0);
|
} while (vlen_left > 0);
|
||||||
CONF_CONTINUE;
|
CONF_CONTINUE;
|
||||||
}
|
}
|
||||||
CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size,
|
|
||||||
"max_batched_size", 0, SIZE_T_MAX,
|
|
||||||
CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
|
|
||||||
/* clip */ true)
|
|
||||||
CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
|
|
||||||
"remote_free_max_batch", 0,
|
|
||||||
BIN_REMOTE_FREE_ELEMS_MAX, CONF_DONT_CHECK_MIN,
|
|
||||||
CONF_CHECK_MAX,
|
|
||||||
/* clip */ true)
|
|
||||||
CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max,
|
|
||||||
"remote_free_max", 0, BIN_REMOTE_FREE_ELEMS_MAX,
|
|
||||||
CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
|
|
||||||
/* clip */ true)
|
|
||||||
|
|
||||||
if (CONF_MATCH("tcache_ncached_max")) {
|
if (CONF_MATCH("tcache_ncached_max")) {
|
||||||
bool err = tcache_bin_info_default_init(
|
bool err = tcache_bin_info_default_init(
|
||||||
v, vlen);
|
v, vlen);
|
||||||
|
|
|
||||||
58
src/stats.c
58
src/stats.c
|
|
@ -357,15 +357,6 @@ stats_arena_bins_print(
|
||||||
COL_HDR(row, nreslabs, NULL, right, 13, uint64)
|
COL_HDR(row, nreslabs, NULL, right, 13, uint64)
|
||||||
COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64)
|
COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64)
|
||||||
|
|
||||||
COL_HDR(row, pops, NULL, right, 10, uint64)
|
|
||||||
COL_HDR(row, pops_ps, "(#/sec)", right, 8, uint64)
|
|
||||||
COL_HDR(row, failed_push, NULL, right, 13, uint64)
|
|
||||||
COL_HDR(row, failed_push_ps, "(#/sec)", right, 8, uint64)
|
|
||||||
COL_HDR(row, push, NULL, right, 7, uint64)
|
|
||||||
COL_HDR(row, push_ps, "(#/sec)", right, 8, uint64)
|
|
||||||
COL_HDR(row, push_elem, NULL, right, 12, uint64)
|
|
||||||
COL_HDR(row, push_elem_ps, "(#/sec)", right, 8, uint64)
|
|
||||||
|
|
||||||
/* Don't want to actually print the name. */
|
/* Don't want to actually print the name. */
|
||||||
header_justify_spacer.str_val = " ";
|
header_justify_spacer.str_val = " ";
|
||||||
col_justify_spacer.str_val = " ";
|
col_justify_spacer.str_val = " ";
|
||||||
|
|
@ -406,15 +397,13 @@ stats_arena_bins_print(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0, in_gap = false; j < nbins; j++) {
|
for (j = 0, in_gap = false; j < nbins; j++) {
|
||||||
uint64_t nslabs;
|
uint64_t nslabs;
|
||||||
size_t reg_size, slab_size, curregs;
|
size_t reg_size, slab_size, curregs;
|
||||||
size_t curslabs;
|
size_t curslabs;
|
||||||
size_t nonfull_slabs;
|
size_t nonfull_slabs;
|
||||||
uint32_t nregs, nshards;
|
uint32_t nregs, nshards;
|
||||||
uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
|
uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
|
||||||
uint64_t nreslabs;
|
uint64_t nreslabs;
|
||||||
uint64_t batch_pops, batch_failed_pushes, batch_pushes,
|
|
||||||
batch_pushed_elems;
|
|
||||||
prof_stats_t prof_live;
|
prof_stats_t prof_live;
|
||||||
prof_stats_t prof_accum;
|
prof_stats_t prof_accum;
|
||||||
|
|
||||||
|
|
@ -463,15 +452,6 @@ stats_arena_bins_print(
|
||||||
CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
|
CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
|
||||||
size_t);
|
size_t);
|
||||||
|
|
||||||
CTL_LEAF(
|
|
||||||
stats_arenas_mib, 5, "batch_pops", &batch_pops, uint64_t);
|
|
||||||
CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes",
|
|
||||||
&batch_failed_pushes, uint64_t);
|
|
||||||
CTL_LEAF(stats_arenas_mib, 5, "batch_pushes", &batch_pushes,
|
|
||||||
uint64_t);
|
|
||||||
CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems",
|
|
||||||
&batch_pushed_elems, uint64_t);
|
|
||||||
|
|
||||||
if (mutex) {
|
if (mutex) {
|
||||||
mutex_stats_read_arena_bin(stats_arenas_mib, 5,
|
mutex_stats_read_arena_bin(stats_arenas_mib, 5,
|
||||||
col_mutex64, col_mutex32, uptime);
|
col_mutex64, col_mutex32, uptime);
|
||||||
|
|
@ -506,14 +486,6 @@ stats_arena_bins_print(
|
||||||
emitter, "curslabs", emitter_type_size, &curslabs);
|
emitter, "curslabs", emitter_type_size, &curslabs);
|
||||||
emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
|
emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
|
||||||
&nonfull_slabs);
|
&nonfull_slabs);
|
||||||
emitter_json_kv(
|
|
||||||
emitter, "batch_pops", emitter_type_uint64, &batch_pops);
|
|
||||||
emitter_json_kv(emitter, "batch_failed_pushes",
|
|
||||||
emitter_type_uint64, &batch_failed_pushes);
|
|
||||||
emitter_json_kv(emitter, "batch_pushes", emitter_type_uint64,
|
|
||||||
&batch_pushes);
|
|
||||||
emitter_json_kv(emitter, "batch_pushed_elems",
|
|
||||||
emitter_type_uint64, &batch_pushed_elems);
|
|
||||||
if (mutex) {
|
if (mutex) {
|
||||||
emitter_json_object_kv_begin(emitter, "mutex");
|
emitter_json_object_kv_begin(emitter, "mutex");
|
||||||
mutex_stats_emit(
|
mutex_stats_emit(
|
||||||
|
|
@ -573,19 +545,6 @@ stats_arena_bins_print(
|
||||||
col_nreslabs.uint64_val = nreslabs;
|
col_nreslabs.uint64_val = nreslabs;
|
||||||
col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
|
col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
|
||||||
|
|
||||||
col_pops.uint64_val = batch_pops;
|
|
||||||
col_pops_ps.uint64_val = rate_per_second(batch_pops, uptime);
|
|
||||||
|
|
||||||
col_failed_push.uint64_val = batch_failed_pushes;
|
|
||||||
col_failed_push_ps.uint64_val = rate_per_second(
|
|
||||||
batch_failed_pushes, uptime);
|
|
||||||
col_push.uint64_val = batch_pushes;
|
|
||||||
col_push_ps.uint64_val = rate_per_second(batch_pushes, uptime);
|
|
||||||
|
|
||||||
col_push_elem.uint64_val = batch_pushed_elems;
|
|
||||||
col_push_elem_ps.uint64_val = rate_per_second(
|
|
||||||
batch_pushed_elems, uptime);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that mutex columns were initialized above, if mutex ==
|
* Note that mutex columns were initialized above, if mutex ==
|
||||||
* true.
|
* true.
|
||||||
|
|
@ -1677,9 +1636,6 @@ stats_general_print(emitter_t *emitter) {
|
||||||
OPT_WRITE_BOOL("xmalloc")
|
OPT_WRITE_BOOL("xmalloc")
|
||||||
OPT_WRITE_BOOL("experimental_infallible_new")
|
OPT_WRITE_BOOL("experimental_infallible_new")
|
||||||
OPT_WRITE_BOOL("experimental_tcache_gc")
|
OPT_WRITE_BOOL("experimental_tcache_gc")
|
||||||
OPT_WRITE_SIZE_T("max_batched_size")
|
|
||||||
OPT_WRITE_SIZE_T("remote_free_max")
|
|
||||||
OPT_WRITE_SIZE_T("remote_free_max_batch")
|
|
||||||
OPT_WRITE_BOOL("tcache")
|
OPT_WRITE_BOOL("tcache")
|
||||||
OPT_WRITE_SIZE_T("tcache_max")
|
OPT_WRITE_SIZE_T("tcache_max")
|
||||||
OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
|
OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
|
||||||
|
|
|
||||||
194
src/tcache.c
194
src/tcache.c
|
|
@ -608,7 +608,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||||
}
|
}
|
||||||
arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
|
arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
|
||||||
/* nfill_min */
|
/* nfill_min */
|
||||||
opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
|
opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
|
||||||
/* nfill_max */ nfill);
|
/* nfill_max */ nfill);
|
||||||
tcache_slow->bin_refilled[binind] = true;
|
tcache_slow->bin_refilled[binind] = true;
|
||||||
tcache_nfill_small_burst_prepare(tcache_slow, binind);
|
tcache_nfill_small_burst_prepare(tcache_slow, binind);
|
||||||
|
|
@ -680,8 +680,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
|
||||||
assert(binind < SC_NBINS);
|
assert(binind < SC_NBINS);
|
||||||
arena_t *tcache_arena = tcache_slow->arena;
|
arena_t *tcache_arena = tcache_slow->arena;
|
||||||
assert(tcache_arena != NULL);
|
assert(tcache_arena != NULL);
|
||||||
unsigned tcache_binshard =
|
|
||||||
tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Variable length array must have > 0 length; the last element is never
|
* Variable length array must have > 0 length; the last element is never
|
||||||
|
|
@ -698,25 +696,12 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
|
||||||
unsigned dalloc_count = 0;
|
unsigned dalloc_count = 0;
|
||||||
VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
|
VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
|
||||||
|
|
||||||
/*
|
|
||||||
* There's an edge case where we need to deallocate more slabs than we
|
|
||||||
* have elements of dalloc_slabs. This can if we end up deallocating
|
|
||||||
* items batched by another thread in addition to ones flushed from the
|
|
||||||
* cache. Since this is not very likely (most small object
|
|
||||||
* deallocations don't free up a whole slab), we don't want to burn the
|
|
||||||
* stack space to keep those excess slabs in an array. Instead we'll
|
|
||||||
* maintain an overflow list.
|
|
||||||
*/
|
|
||||||
edata_list_active_t dalloc_slabs_extra;
|
|
||||||
edata_list_active_init(&dalloc_slabs_extra);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We're about to grab a bunch of locks. If one of them happens to be
|
* We're about to grab a bunch of locks. If one of them happens to be
|
||||||
* the one guarding the arena-level stats counters we flush our
|
* the one guarding the arena-level stats counters we flush our
|
||||||
* thread-local ones to, we do so under one critical section.
|
* thread-local ones to, we do so under one critical section.
|
||||||
*/
|
*/
|
||||||
bool merged_stats = false;
|
bool merged_stats = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We maintain the invariant that all edatas yet to be flushed are
|
* We maintain the invariant that all edatas yet to be flushed are
|
||||||
* contained in the half-open range [flush_start, flush_end). We'll
|
* contained in the half-open range [flush_start, flush_end). We'll
|
||||||
|
|
@ -741,7 +726,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
|
||||||
unsigned cur_binshard = edata_binshard_get(cur_edata);
|
unsigned cur_binshard = edata_binshard_get(cur_edata);
|
||||||
bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
|
bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
|
||||||
assert(cur_binshard < bin_infos[binind].n_shards);
|
assert(cur_binshard < bin_infos[binind].n_shards);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Start off the partition; item_edata[i] always matches itself
|
* Start off the partition; item_edata[i] always matches itself
|
||||||
* of course.
|
* of course.
|
||||||
|
|
@ -788,150 +772,43 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Actually do the flushing. */
|
||||||
* We never batch when flushing to our home-base bin shard,
|
malloc_mutex_lock(tsdn, &cur_bin->lock);
|
||||||
* since it's likely that we'll have to acquire that lock anyway
|
|
||||||
* when flushing stats.
|
|
||||||
*
|
|
||||||
* A plausible check we could add to can_batch is
|
|
||||||
* '&& arena_is_auto(cur_arena)'. The motivation would be that
|
|
||||||
* we have a higher tolerance for dubious user assumptions
|
|
||||||
* around non-auto arenas (e.g. "if I deallocate every object I
|
|
||||||
* allocated, and then call tcache.flush, then the arena stats
|
|
||||||
* must reflect zero live allocations").
|
|
||||||
*
|
|
||||||
* This is dubious for a couple reasons:
|
|
||||||
* - We already don't provide perfect fidelity for stats
|
|
||||||
* counting (e.g. for profiled allocations, whose size can
|
|
||||||
* inflate in stats).
|
|
||||||
* - Hanging load-bearing guarantees around stats impedes
|
|
||||||
* scalability in general.
|
|
||||||
*
|
|
||||||
* There are some "complete" strategies we could do instead:
|
|
||||||
* - Add a arena.<i>.quiesce call to pop all bins for users who
|
|
||||||
* do want those stats accounted for.
|
|
||||||
* - Make batchability a user-controllable per-arena option.
|
|
||||||
* - Do a batch pop after every mutex acquisition for which we
|
|
||||||
* want to provide accurate stats. This gives perfectly
|
|
||||||
* accurate stats, but can cause weird performance effects
|
|
||||||
* (because doing stats collection can now result in slabs
|
|
||||||
* becoming empty, and therefore purging, large mutex
|
|
||||||
* acquisition, etc.).
|
|
||||||
* - Propagate the "why" behind a flush down to the level of the
|
|
||||||
* batcher, and include a batch pop attempt down full tcache
|
|
||||||
* flushing pathways. This is just a lot of plumbing and
|
|
||||||
* internal complexity.
|
|
||||||
*
|
|
||||||
* We don't do any of these right now, but the decision calculus
|
|
||||||
* and tradeoffs are subtle enough that the reasoning was worth
|
|
||||||
* leaving in this comment.
|
|
||||||
*/
|
|
||||||
bool bin_is_batched = arena_bin_has_batch(binind);
|
|
||||||
bool home_binshard = (cur_arena == tcache_arena
|
|
||||||
&& cur_binshard == tcache_binshard);
|
|
||||||
bool can_batch = (flush_start - prev_flush_start
|
|
||||||
<= opt_bin_info_remote_free_max_batch)
|
|
||||||
&& !home_binshard && bin_is_batched;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We try to avoid the batching pathway if we can, so we always
|
* Flush stats first, if that was the right lock. Note that we
|
||||||
* at least *try* to lock.
|
* don't actually have to flush stats into the current thread's
|
||||||
|
* binshard. Flushing into any binshard in the same arena is
|
||||||
|
* enough; we don't expose stats on per-binshard basis (just
|
||||||
|
* per-bin).
|
||||||
*/
|
*/
|
||||||
bool locked = false;
|
if (config_stats && tcache_arena == cur_arena
|
||||||
bool batched = false;
|
&& !merged_stats) {
|
||||||
bool batch_failed = false;
|
merged_stats = true;
|
||||||
if (can_batch) {
|
cur_bin->stats.nflushes++;
|
||||||
locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
|
cur_bin->stats.nrequests += cache_bin->tstats.nrequests;
|
||||||
|
cache_bin->tstats.nrequests = 0;
|
||||||
}
|
}
|
||||||
if (can_batch && !locked) {
|
|
||||||
bin_with_batch_t *batched_bin = (bin_with_batch_t *)
|
|
||||||
cur_bin;
|
|
||||||
size_t push_idx = batcher_push_begin(tsdn,
|
|
||||||
&batched_bin->remote_frees,
|
|
||||||
flush_start - prev_flush_start);
|
|
||||||
bin_batching_test_after_push(push_idx);
|
|
||||||
|
|
||||||
if (push_idx != BATCHER_NO_IDX) {
|
/* Next flush objects. */
|
||||||
batched = true;
|
/* Init only to avoid used-uninitialized warning. */
|
||||||
unsigned nbatched = flush_start
|
arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
|
||||||
- prev_flush_start;
|
arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
|
||||||
for (unsigned i = 0; i < nbatched; i++) {
|
for (unsigned i = prev_flush_start; i < flush_start; i++) {
|
||||||
unsigned src_ind = prev_flush_start + i;
|
void *ptr = ptrs->ptr[i];
|
||||||
batched_bin
|
edata_t *edata = item_edata[i].edata;
|
||||||
->remote_free_data[push_idx + i]
|
if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
|
||||||
.ptr = ptrs->ptr[src_ind];
|
cur_bin, &dalloc_bin_info, binind, edata,
|
||||||
batched_bin
|
ptr)) {
|
||||||
->remote_free_data[push_idx + i]
|
dalloc_slabs[dalloc_count] = edata;
|
||||||
.slab = item_edata[src_ind].edata;
|
dalloc_count++;
|
||||||
}
|
|
||||||
batcher_push_end(
|
|
||||||
tsdn, &batched_bin->remote_frees);
|
|
||||||
} else {
|
|
||||||
batch_failed = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!batched) {
|
|
||||||
if (!locked) {
|
|
||||||
malloc_mutex_lock(tsdn, &cur_bin->lock);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Unlike other stats (which only ever get flushed into
|
|
||||||
* a tcache's associated arena), batch_failed counts get
|
|
||||||
* accumulated into the bin where the push attempt
|
|
||||||
* failed.
|
|
||||||
*/
|
|
||||||
if (config_stats && batch_failed) {
|
|
||||||
cur_bin->stats.batch_failed_pushes++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
arena_dalloc_bin_locked_finish(
|
||||||
* Flush stats first, if that was the right lock. Note
|
tsdn, cur_arena, cur_bin, &dalloc_bin_info);
|
||||||
* that we don't actually have to flush stats into the
|
malloc_mutex_unlock(tsdn, &cur_bin->lock);
|
||||||
* current thread's binshard. Flushing into any binshard
|
|
||||||
* in the same arena is enough; we don't expose stats on
|
|
||||||
* per-binshard basis (just per-bin).
|
|
||||||
*/
|
|
||||||
if (config_stats && tcache_arena == cur_arena
|
|
||||||
&& !merged_stats) {
|
|
||||||
merged_stats = true;
|
|
||||||
cur_bin->stats.nflushes++;
|
|
||||||
cur_bin->stats.nrequests +=
|
|
||||||
cache_bin->tstats.nrequests;
|
|
||||||
cache_bin->tstats.nrequests = 0;
|
|
||||||
}
|
|
||||||
unsigned preallocated_slabs = nflush;
|
|
||||||
unsigned ndalloc_slabs =
|
|
||||||
arena_bin_batch_get_ndalloc_slabs(
|
|
||||||
preallocated_slabs);
|
|
||||||
|
|
||||||
/* Next flush objects our own objects. */
|
|
||||||
/* Init only to avoid used-uninitialized warning. */
|
|
||||||
arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
|
|
||||||
arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
|
|
||||||
for (unsigned i = prev_flush_start; i < flush_start;
|
|
||||||
i++) {
|
|
||||||
void *ptr = ptrs->ptr[i];
|
|
||||||
edata_t *edata = item_edata[i].edata;
|
|
||||||
arena_dalloc_bin_locked_step(tsdn, cur_arena,
|
|
||||||
cur_bin, &dalloc_bin_info, binind, edata,
|
|
||||||
ptr, dalloc_slabs, ndalloc_slabs,
|
|
||||||
&dalloc_count, &dalloc_slabs_extra);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Lastly, flush any batched objects (from other
|
|
||||||
* threads).
|
|
||||||
*/
|
|
||||||
if (bin_is_batched) {
|
|
||||||
arena_bin_flush_batch_impl(tsdn, cur_arena,
|
|
||||||
cur_bin, &dalloc_bin_info, binind,
|
|
||||||
dalloc_slabs, ndalloc_slabs, &dalloc_count,
|
|
||||||
&dalloc_slabs_extra);
|
|
||||||
}
|
|
||||||
|
|
||||||
arena_dalloc_bin_locked_finish(
|
|
||||||
tsdn, cur_arena, cur_bin, &dalloc_bin_info);
|
|
||||||
malloc_mutex_unlock(tsdn, &cur_bin->lock);
|
|
||||||
}
|
|
||||||
arena_decay_ticks(
|
arena_decay_ticks(
|
||||||
tsdn, cur_arena, flush_start - prev_flush_start);
|
tsdn, cur_arena, flush_start - prev_flush_start);
|
||||||
}
|
}
|
||||||
|
|
@ -941,18 +818,13 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
|
||||||
edata_t *slab = dalloc_slabs[i];
|
edata_t *slab = dalloc_slabs[i];
|
||||||
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
||||||
}
|
}
|
||||||
while (!edata_list_active_empty(&dalloc_slabs_extra)) {
|
|
||||||
edata_t *slab = edata_list_active_first(&dalloc_slabs_extra);
|
|
||||||
edata_list_active_remove(&dalloc_slabs_extra, slab);
|
|
||||||
arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (config_stats && !merged_stats) {
|
if (config_stats && !merged_stats) {
|
||||||
/*
|
/*
|
||||||
* The flush loop didn't happen to flush to this
|
* The flush loop didn't happen to flush to this
|
||||||
* thread's arena, so the stats didn't get merged.
|
* thread's arena, so the stats didn't get merged.
|
||||||
* Manually do so now.
|
* Manually do so now.
|
||||||
*/
|
*/
|
||||||
bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
|
bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
|
||||||
malloc_mutex_lock(tsdn, &bin->lock);
|
malloc_mutex_lock(tsdn, &bin->lock);
|
||||||
bin->stats.nflushes++;
|
bin->stats.nflushes++;
|
||||||
|
|
|
||||||
|
|
@ -34,8 +34,6 @@ main(void) {
|
||||||
P(arena_t);
|
P(arena_t);
|
||||||
P(arena_stats_t);
|
P(arena_stats_t);
|
||||||
P(base_t);
|
P(base_t);
|
||||||
P(bin_t);
|
|
||||||
P(bin_with_batch_t);
|
|
||||||
P(decay_t);
|
P(decay_t);
|
||||||
P(edata_t);
|
P(edata_t);
|
||||||
P(ecache_t);
|
P(ecache_t);
|
||||||
|
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
#ifndef JEMALLOC_TEST_FORK_H
|
|
||||||
#define JEMALLOC_TEST_FORK_H
|
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
|
|
||||||
# include <sys/wait.h>
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
fork_wait_for_child_exit(int pid) {
|
|
||||||
int status;
|
|
||||||
while (true) {
|
|
||||||
if (waitpid(pid, &status, 0) == -1) {
|
|
||||||
test_fail("Unexpected waitpid() failure.");
|
|
||||||
}
|
|
||||||
if (WIFSIGNALED(status)) {
|
|
||||||
test_fail(
|
|
||||||
"Unexpected child termination due to "
|
|
||||||
"signal %d",
|
|
||||||
WTERMSIG(status));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (WIFEXITED(status)) {
|
|
||||||
if (WEXITSTATUS(status) != 0) {
|
|
||||||
test_fail("Unexpected child exit value %d",
|
|
||||||
WEXITSTATUS(status));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* JEMALLOC_TEST_FORK_H */
|
|
||||||
|
|
@ -1,243 +0,0 @@
|
||||||
#include "test/jemalloc_test.h"
|
|
||||||
|
|
||||||
#include "jemalloc/internal/batcher.h"
|
|
||||||
|
|
||||||
TEST_BEGIN(test_simple) {
|
|
||||||
enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 };
|
|
||||||
batcher_t batcher;
|
|
||||||
size_t data[NELEMS_MAX];
|
|
||||||
for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) {
|
|
||||||
batcher_init(&batcher, nelems);
|
|
||||||
for (int run = 0; run < NRUNS; run++) {
|
|
||||||
for (int i = 0; i < NELEMS_MAX; i++) {
|
|
||||||
data[i] = (size_t)-1;
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < nelems; i++) {
|
|
||||||
size_t idx = batcher_push_begin(
|
|
||||||
TSDN_NULL, &batcher, 1);
|
|
||||||
assert_zu_eq(i, idx, "Wrong index");
|
|
||||||
assert_zu_eq((size_t)-1, data[idx],
|
|
||||||
"Expected uninitialized slot");
|
|
||||||
data[idx] = DATA_BASE_VAL + i;
|
|
||||||
batcher_push_end(TSDN_NULL, &batcher);
|
|
||||||
}
|
|
||||||
if (nelems > 0) {
|
|
||||||
size_t idx = batcher_push_begin(
|
|
||||||
TSDN_NULL, &batcher, 1);
|
|
||||||
assert_zu_eq(BATCHER_NO_IDX, idx,
|
|
||||||
"Shouldn't be able to push into a full "
|
|
||||||
"batcher");
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t npop = batcher_pop_begin(TSDN_NULL, &batcher);
|
|
||||||
if (nelems == 0) {
|
|
||||||
assert_zu_eq(npop, BATCHER_NO_IDX,
|
|
||||||
"Shouldn't get any items out of an empty "
|
|
||||||
"batcher");
|
|
||||||
} else {
|
|
||||||
assert_zu_eq(npop, nelems,
|
|
||||||
"Wrong number of elements popped");
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < nelems; i++) {
|
|
||||||
assert_zu_eq(data[i], DATA_BASE_VAL + i,
|
|
||||||
"Item popped out of order!");
|
|
||||||
}
|
|
||||||
if (nelems != 0) {
|
|
||||||
batcher_pop_end(TSDN_NULL, &batcher);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TEST_END
|
|
||||||
|
|
||||||
TEST_BEGIN(test_multi_push) {
|
|
||||||
size_t idx, nelems;
|
|
||||||
batcher_t batcher;
|
|
||||||
batcher_init(&batcher, 11);
|
|
||||||
/* Push two at a time, 5 times, for 10 total. */
|
|
||||||
for (int i = 0; i < 5; i++) {
|
|
||||||
idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
|
|
||||||
assert_zu_eq(2 * i, idx, "Should push in order");
|
|
||||||
batcher_push_end(TSDN_NULL, &batcher);
|
|
||||||
}
|
|
||||||
/* Pushing two more should fail -- would put us at 12 elems. */
|
|
||||||
idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
|
|
||||||
assert_zu_eq(BATCHER_NO_IDX, idx, "Should be out of space");
|
|
||||||
/* But one more should work */
|
|
||||||
idx = batcher_push_begin(TSDN_NULL, &batcher, 1);
|
|
||||||
assert_zu_eq(10, idx, "Should be out of space");
|
|
||||||
batcher_push_end(TSDN_NULL, &batcher);
|
|
||||||
nelems = batcher_pop_begin(TSDN_NULL, &batcher);
|
|
||||||
batcher_pop_end(TSDN_NULL, &batcher);
|
|
||||||
assert_zu_eq(11, nelems, "Should have popped everything");
|
|
||||||
}
|
|
||||||
TEST_END
|
|
||||||
|
|
||||||
enum {
|
|
||||||
STRESS_TEST_ELEMS = 10,
|
|
||||||
STRESS_TEST_THREADS = 4,
|
|
||||||
STRESS_TEST_OPS = 1000 * 1000,
|
|
||||||
STRESS_TEST_PUSH_TO_POP_RATIO = 5,
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct stress_test_data_s stress_test_data_t;
|
|
||||||
struct stress_test_data_s {
|
|
||||||
batcher_t batcher;
|
|
||||||
mtx_t pop_mtx;
|
|
||||||
atomic_u32_t thread_id;
|
|
||||||
|
|
||||||
uint32_t elems_data[STRESS_TEST_ELEMS];
|
|
||||||
size_t push_count[STRESS_TEST_ELEMS];
|
|
||||||
size_t pop_count[STRESS_TEST_ELEMS];
|
|
||||||
atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS];
|
|
||||||
atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS];
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Note: 0-indexed. If one element is set and you want to find it, you call
|
|
||||||
* get_nth_set(elems, 0).
|
|
||||||
*/
|
|
||||||
static size_t
|
|
||||||
get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
|
|
||||||
size_t ntrue = 0;
|
|
||||||
for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
|
|
||||||
if (elems_owned[i]) {
|
|
||||||
ntrue++;
|
|
||||||
}
|
|
||||||
if (ntrue > n) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert_not_reached(
|
|
||||||
"Asked for the %zu'th set element when < %zu are "
|
|
||||||
"set",
|
|
||||||
n, n);
|
|
||||||
/* Just to silence a compiler warning. */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void *
|
|
||||||
stress_test_thd(void *arg) {
|
|
||||||
stress_test_data_t *data = arg;
|
|
||||||
size_t prng = atomic_fetch_add_u32(&data->thread_id, 1, ATOMIC_RELAXED);
|
|
||||||
|
|
||||||
size_t nelems_owned = 0;
|
|
||||||
bool elems_owned[STRESS_TEST_ELEMS] = {0};
|
|
||||||
size_t local_push_count[STRESS_TEST_ELEMS] = {0};
|
|
||||||
size_t local_pop_count[STRESS_TEST_ELEMS] = {0};
|
|
||||||
|
|
||||||
for (int i = 0; i < STRESS_TEST_OPS; i++) {
|
|
||||||
size_t rnd = prng_range_zu(
|
|
||||||
&prng, STRESS_TEST_PUSH_TO_POP_RATIO);
|
|
||||||
if (rnd == 0 || nelems_owned == 0) {
|
|
||||||
size_t nelems = batcher_pop_begin(
|
|
||||||
TSDN_NULL, &data->batcher);
|
|
||||||
if (nelems == BATCHER_NO_IDX) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < nelems; i++) {
|
|
||||||
uint32_t elem = data->elems_data[i];
|
|
||||||
assert_false(elems_owned[elem],
|
|
||||||
"Shouldn't already own what we just "
|
|
||||||
"popped");
|
|
||||||
elems_owned[elem] = true;
|
|
||||||
nelems_owned++;
|
|
||||||
local_pop_count[elem]++;
|
|
||||||
data->pop_count[elem]++;
|
|
||||||
}
|
|
||||||
batcher_pop_end(TSDN_NULL, &data->batcher);
|
|
||||||
} else {
|
|
||||||
size_t elem_to_push_idx = prng_range_zu(
|
|
||||||
&prng, nelems_owned);
|
|
||||||
size_t elem = get_nth_set(
|
|
||||||
elems_owned, elem_to_push_idx);
|
|
||||||
assert_true(elems_owned[elem],
|
|
||||||
"Should own element we're about to pop");
|
|
||||||
elems_owned[elem] = false;
|
|
||||||
local_push_count[elem]++;
|
|
||||||
data->push_count[elem]++;
|
|
||||||
nelems_owned--;
|
|
||||||
size_t idx = batcher_push_begin(
|
|
||||||
TSDN_NULL, &data->batcher, 1);
|
|
||||||
assert_zu_ne(idx, BATCHER_NO_IDX,
|
|
||||||
"Batcher can't be full -- we have one of its "
|
|
||||||
"elems!");
|
|
||||||
data->elems_data[idx] = (uint32_t)elem;
|
|
||||||
batcher_push_end(TSDN_NULL, &data->batcher);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Push all local elems back, flush local counts to the shared ones. */
|
|
||||||
size_t push_idx = 0;
|
|
||||||
if (nelems_owned != 0) {
|
|
||||||
push_idx = batcher_push_begin(
|
|
||||||
TSDN_NULL, &data->batcher, nelems_owned);
|
|
||||||
assert_zu_ne(
|
|
||||||
BATCHER_NO_IDX, push_idx, "Should be space to push");
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
|
|
||||||
if (elems_owned[i]) {
|
|
||||||
data->elems_data[push_idx] = (uint32_t)i;
|
|
||||||
push_idx++;
|
|
||||||
local_push_count[i]++;
|
|
||||||
data->push_count[i]++;
|
|
||||||
}
|
|
||||||
atomic_fetch_add_zu(&data->atomic_push_count[i],
|
|
||||||
local_push_count[i], ATOMIC_RELAXED);
|
|
||||||
atomic_fetch_add_zu(&data->atomic_pop_count[i],
|
|
||||||
local_pop_count[i], ATOMIC_RELAXED);
|
|
||||||
}
|
|
||||||
if (nelems_owned != 0) {
|
|
||||||
batcher_push_end(TSDN_NULL, &data->batcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_BEGIN(test_stress) {
|
|
||||||
stress_test_data_t data;
|
|
||||||
batcher_init(&data.batcher, STRESS_TEST_ELEMS);
|
|
||||||
bool err = mtx_init(&data.pop_mtx);
|
|
||||||
assert_false(err, "mtx_init failure");
|
|
||||||
atomic_store_u32(&data.thread_id, 0, ATOMIC_RELAXED);
|
|
||||||
for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
|
|
||||||
data.push_count[i] = 0;
|
|
||||||
data.pop_count[i] = 0;
|
|
||||||
atomic_store_zu(&data.atomic_push_count[i], 0, ATOMIC_RELAXED);
|
|
||||||
atomic_store_zu(&data.atomic_pop_count[i], 0, ATOMIC_RELAXED);
|
|
||||||
|
|
||||||
size_t idx = batcher_push_begin(TSDN_NULL, &data.batcher, 1);
|
|
||||||
assert_zu_eq(i, idx, "Should push in order");
|
|
||||||
data.elems_data[idx] = i;
|
|
||||||
batcher_push_end(TSDN_NULL, &data.batcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
thd_t threads[STRESS_TEST_THREADS];
|
|
||||||
for (int i = 0; i < STRESS_TEST_THREADS; i++) {
|
|
||||||
thd_create(&threads[i], stress_test_thd, &data);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < STRESS_TEST_THREADS; i++) {
|
|
||||||
thd_join(threads[i], NULL);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
|
|
||||||
assert_zu_ne(
|
|
||||||
0, data.push_count[i], "Should have done something!");
|
|
||||||
assert_zu_eq(data.push_count[i], data.pop_count[i],
|
|
||||||
"every element should be pushed and popped an equal number "
|
|
||||||
"of times");
|
|
||||||
assert_zu_eq(data.push_count[i],
|
|
||||||
atomic_load_zu(&data.atomic_push_count[i], ATOMIC_RELAXED),
|
|
||||||
"atomic and non-atomic count should be equal given proper "
|
|
||||||
"synchronization");
|
|
||||||
assert_zu_eq(data.pop_count[i],
|
|
||||||
atomic_load_zu(&data.atomic_pop_count[i], ATOMIC_RELAXED),
|
|
||||||
"atomic and non-atomic count should be equal given proper "
|
|
||||||
"synchronization");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TEST_END
|
|
||||||
|
|
||||||
int
|
|
||||||
main(void) {
|
|
||||||
return test_no_reentrancy(test_simple, test_multi_push, test_stress);
|
|
||||||
}
|
|
||||||
|
|
@ -1,270 +0,0 @@
|
||||||
#include "test/jemalloc_test.h"
|
|
||||||
#include "test/fork.h"
|
|
||||||
|
|
||||||
enum {
|
|
||||||
STRESS_THREADS = 3,
|
|
||||||
STRESS_OBJECTS_PER_THREAD = 1000,
|
|
||||||
STRESS_ALLOC_SZ = PAGE / 2,
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct stress_thread_data_s stress_thread_data_t;
|
|
||||||
struct stress_thread_data_s {
|
|
||||||
unsigned thd_id;
|
|
||||||
atomic_zu_t *ready_thds;
|
|
||||||
atomic_zu_t *done_thds;
|
|
||||||
void **to_dalloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
static atomic_zu_t push_failure_count;
|
|
||||||
static atomic_zu_t pop_attempt_results[2];
|
|
||||||
static atomic_zu_t dalloc_zero_slab_count;
|
|
||||||
static atomic_zu_t dalloc_nonzero_slab_count;
|
|
||||||
static atomic_zu_t dalloc_nonempty_list_count;
|
|
||||||
|
|
||||||
static bool
|
|
||||||
should_skip() {
|
|
||||||
return
|
|
||||||
/*
|
|
||||||
* We do batching operations on tcache flush pathways; we can't if
|
|
||||||
* caching is disabled.
|
|
||||||
*/
|
|
||||||
!opt_tcache ||
|
|
||||||
/* We rely on tcache fill/flush operations of the size we use. */
|
|
||||||
opt_tcache_max < STRESS_ALLOC_SZ
|
|
||||||
/*
|
|
||||||
* Some of the races we want to trigger are fiddly enough that they
|
|
||||||
* only show up under real concurrency. We add 1 to account for the
|
|
||||||
* main thread, which also does some work.
|
|
||||||
*/
|
|
||||||
|| ncpus < STRESS_THREADS + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
increment_push_failure(size_t push_idx) {
|
|
||||||
if (push_idx == BATCHER_NO_IDX) {
|
|
||||||
atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED);
|
|
||||||
} else {
|
|
||||||
assert_zu_lt(push_idx, 4, "Only 4 elems");
|
|
||||||
volatile size_t x = 10000;
|
|
||||||
while (--x) {
|
|
||||||
/* Spin for a while, to try to provoke a failure. */
|
|
||||||
if (x == push_idx) {
|
|
||||||
#ifdef _WIN32
|
|
||||||
SwitchToThread();
|
|
||||||
#else
|
|
||||||
sched_yield();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
increment_pop_attempt(size_t elems_to_pop) {
|
|
||||||
bool elems = (elems_to_pop != BATCHER_NO_IDX);
|
|
||||||
atomic_fetch_add_zu(&pop_attempt_results[elems], 1, ATOMIC_RELAXED);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) {
|
|
||||||
if (slab_dalloc_count > 0) {
|
|
||||||
atomic_fetch_add_zu(
|
|
||||||
&dalloc_nonzero_slab_count, 1, ATOMIC_RELAXED);
|
|
||||||
} else {
|
|
||||||
atomic_fetch_add_zu(&dalloc_zero_slab_count, 1, ATOMIC_RELAXED);
|
|
||||||
}
|
|
||||||
if (!list_empty) {
|
|
||||||
atomic_fetch_add_zu(
|
|
||||||
&dalloc_nonempty_list_count, 1, ATOMIC_RELAXED);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
flush_tcache() {
|
|
||||||
assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
|
|
||||||
"Unexpected mallctl failure");
|
|
||||||
}
|
|
||||||
|
|
||||||
static void *
|
|
||||||
stress_thread(void *arg) {
|
|
||||||
stress_thread_data_t *data = arg;
|
|
||||||
uint64_t prng_state = data->thd_id;
|
|
||||||
atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED);
|
|
||||||
while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED)
|
|
||||||
!= STRESS_THREADS) {
|
|
||||||
/* Spin */
|
|
||||||
}
|
|
||||||
for (int i = 0; i < STRESS_OBJECTS_PER_THREAD; i++) {
|
|
||||||
dallocx(data->to_dalloc[i], 0);
|
|
||||||
if (prng_range_u64(&prng_state, 3) == 0) {
|
|
||||||
flush_tcache();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
flush_tcache();
|
|
||||||
atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Run main_thread_fn in conditions that trigger all the various edge cases and
|
|
||||||
* subtle race conditions.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
stress_run(void (*main_thread_fn)(), int nruns) {
|
|
||||||
bin_batching_test_ndalloc_slabs_max = 1;
|
|
||||||
bin_batching_test_after_push_hook = &increment_push_failure;
|
|
||||||
bin_batching_test_mid_pop_hook = &increment_pop_attempt;
|
|
||||||
bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count;
|
|
||||||
|
|
||||||
atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED);
|
|
||||||
atomic_store_zu(&pop_attempt_results[0], 0, ATOMIC_RELAXED);
|
|
||||||
atomic_store_zu(&pop_attempt_results[1], 0, ATOMIC_RELAXED);
|
|
||||||
atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED);
|
|
||||||
atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED);
|
|
||||||
atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);
|
|
||||||
|
|
||||||
for (int run = 0; run < nruns; run++) {
|
|
||||||
thd_t thds[STRESS_THREADS];
|
|
||||||
stress_thread_data_t thd_datas[STRESS_THREADS];
|
|
||||||
atomic_zu_t ready_thds;
|
|
||||||
atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED);
|
|
||||||
atomic_zu_t done_thds;
|
|
||||||
atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED);
|
|
||||||
|
|
||||||
void *ptrs[STRESS_THREADS][STRESS_OBJECTS_PER_THREAD];
|
|
||||||
for (int i = 0; i < STRESS_THREADS; i++) {
|
|
||||||
thd_datas[i].thd_id = i;
|
|
||||||
thd_datas[i].ready_thds = &ready_thds;
|
|
||||||
thd_datas[i].done_thds = &done_thds;
|
|
||||||
thd_datas[i].to_dalloc = ptrs[i];
|
|
||||||
for (int j = 0; j < STRESS_OBJECTS_PER_THREAD; j++) {
|
|
||||||
void *ptr = mallocx(STRESS_ALLOC_SZ, 0);
|
|
||||||
assert_ptr_not_null(ptr, "alloc failure");
|
|
||||||
ptrs[i][j] = ptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < STRESS_THREADS; i++) {
|
|
||||||
thd_create(&thds[i], stress_thread, &thd_datas[i]);
|
|
||||||
}
|
|
||||||
while (atomic_load_zu(&done_thds, ATOMIC_RELAXED)
|
|
||||||
!= STRESS_THREADS) {
|
|
||||||
main_thread_fn();
|
|
||||||
}
|
|
||||||
for (int i = 0; i < STRESS_THREADS; i++) {
|
|
||||||
thd_join(thds[i], NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
|
|
||||||
bin_batching_test_after_push_hook = NULL;
|
|
||||||
bin_batching_test_mid_pop_hook = NULL;
|
|
||||||
bin_batching_test_after_unlock_hook = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
do_allocs_frees() {
|
|
||||||
enum { NALLOCS = 32 };
|
|
||||||
flush_tcache();
|
|
||||||
void *ptrs[NALLOCS];
|
|
||||||
for (int i = 0; i < NALLOCS; i++) {
|
|
||||||
ptrs[i] = mallocx(STRESS_ALLOC_SZ, 0);
|
|
||||||
}
|
|
||||||
for (int i = 0; i < NALLOCS; i++) {
|
|
||||||
dallocx(ptrs[i], 0);
|
|
||||||
}
|
|
||||||
flush_tcache();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
test_arena_reset_main_fn() {
|
|
||||||
do_allocs_frees();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_BEGIN(test_arena_reset) {
|
|
||||||
int err;
|
|
||||||
unsigned arena;
|
|
||||||
unsigned old_arena;
|
|
||||||
|
|
||||||
test_skip_if(should_skip());
|
|
||||||
test_skip_if(opt_percpu_arena != percpu_arena_disabled);
|
|
||||||
|
|
||||||
size_t arena_sz = sizeof(arena);
|
|
||||||
err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
|
|
||||||
assert_d_eq(0, err, "Arena creation failed");
|
|
||||||
|
|
||||||
err = mallctl("thread.arena", &old_arena, &arena_sz, &arena, arena_sz);
|
|
||||||
assert_d_eq(0, err, "changing arena failed");
|
|
||||||
|
|
||||||
stress_run(&test_arena_reset_main_fn, /* nruns */ 10);
|
|
||||||
|
|
||||||
flush_tcache();
|
|
||||||
|
|
||||||
char buf[100];
|
|
||||||
malloc_snprintf(buf, sizeof(buf), "arena.%u.reset", arena);
|
|
||||||
err = mallctl(buf, NULL, NULL, NULL, 0);
|
|
||||||
assert_d_eq(0, err, "Couldn't change arena");
|
|
||||||
|
|
||||||
do_allocs_frees();
|
|
||||||
|
|
||||||
err = mallctl("thread.arena", NULL, NULL, &old_arena, arena_sz);
|
|
||||||
assert_d_eq(0, err, "changing arena failed");
|
|
||||||
}
|
|
||||||
TEST_END
|
|
||||||
|
|
||||||
static void
|
|
||||||
test_fork_main_fn() {
|
|
||||||
#ifndef _WIN32
|
|
||||||
pid_t pid = fork();
|
|
||||||
if (pid == -1) {
|
|
||||||
test_fail("Fork failure!");
|
|
||||||
} else if (pid == 0) {
|
|
||||||
/* Child */
|
|
||||||
do_allocs_frees();
|
|
||||||
_exit(0);
|
|
||||||
} else {
|
|
||||||
fork_wait_for_child_exit(pid);
|
|
||||||
do_allocs_frees();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_BEGIN(test_fork) {
|
|
||||||
#ifdef _WIN32
|
|
||||||
test_skip("No fork on windows");
|
|
||||||
#endif
|
|
||||||
test_skip_if(should_skip());
|
|
||||||
stress_run(&test_fork_main_fn, /* nruns */ 10);
|
|
||||||
}
|
|
||||||
TEST_END
|
|
||||||
|
|
||||||
static void
|
|
||||||
test_races_main_fn() {
|
|
||||||
do_allocs_frees();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_BEGIN(test_races) {
|
|
||||||
test_skip_if(should_skip());
|
|
||||||
|
|
||||||
stress_run(&test_races_main_fn, /* nruns */ 400);
|
|
||||||
|
|
||||||
assert_zu_lt(0, atomic_load_zu(&push_failure_count, ATOMIC_RELAXED),
|
|
||||||
"Should have seen some push failures");
|
|
||||||
assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[0], ATOMIC_RELAXED),
|
|
||||||
"Should have seen some pop failures");
|
|
||||||
assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[1], ATOMIC_RELAXED),
|
|
||||||
"Should have seen some pop successes");
|
|
||||||
assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED),
|
|
||||||
"Expected some frees that didn't empty a slab");
|
|
||||||
assert_zu_lt(0,
|
|
||||||
atomic_load_zu(&dalloc_nonzero_slab_count, ATOMIC_RELAXED),
|
|
||||||
"expected some frees that emptied a slab");
|
|
||||||
assert_zu_lt(0,
|
|
||||||
atomic_load_zu(&dalloc_nonempty_list_count, ATOMIC_RELAXED),
|
|
||||||
"expected some frees that used the empty list");
|
|
||||||
}
|
|
||||||
TEST_END
|
|
||||||
|
|
||||||
int
|
|
||||||
main(void) {
|
|
||||||
return test_no_reentrancy(test_arena_reset, test_races, test_fork);
|
|
||||||
}
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
# This value of max_batched_size effectively requires all bins to be batched;
|
|
||||||
# our page limits are fuzzy, but we bound slab item counts to 2**32, so we'd be
|
|
||||||
# at multi-gigabyte minimum page sizes.
|
|
||||||
# The reason for this sort of hacky approach is that we want to
|
|
||||||
# allocate/deallocate PAGE/2-sized objects (to trigger the "non-empty" ->
|
|
||||||
# "empty" and "non-empty"-> "full" transitions often, which have special
|
|
||||||
# handling). But the value of PAGE isn't easily available in test scripts.
|
|
||||||
export MALLOC_CONF="narenas:2,bin_shards:1-1000000000:3,max_batched_size:1000000000,remote_free_max_batch:1,remote_free_max:4"
|
|
||||||
|
|
@ -1,5 +1,34 @@
|
||||||
#include "test/jemalloc_test.h"
|
#include "test/jemalloc_test.h"
|
||||||
#include "test/fork.h"
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
# include <sys/wait.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
static void
|
||||||
|
wait_for_child_exit(int pid) {
|
||||||
|
int status;
|
||||||
|
while (true) {
|
||||||
|
if (waitpid(pid, &status, 0) == -1) {
|
||||||
|
test_fail("Unexpected waitpid() failure.");
|
||||||
|
}
|
||||||
|
if (WIFSIGNALED(status)) {
|
||||||
|
test_fail(
|
||||||
|
"Unexpected child termination due to "
|
||||||
|
"signal %d",
|
||||||
|
WTERMSIG(status));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (WIFEXITED(status)) {
|
||||||
|
if (WEXITSTATUS(status) != 0) {
|
||||||
|
test_fail("Unexpected child exit value %d",
|
||||||
|
WEXITSTATUS(status));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
TEST_BEGIN(test_fork) {
|
TEST_BEGIN(test_fork) {
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
|
|
@ -37,7 +66,7 @@ TEST_BEGIN(test_fork) {
|
||||||
/* Child. */
|
/* Child. */
|
||||||
_exit(0);
|
_exit(0);
|
||||||
} else {
|
} else {
|
||||||
fork_wait_for_child_exit(pid);
|
wait_for_child_exit(pid);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
test_skip("fork(2) is irrelevant to Windows");
|
test_skip("fork(2) is irrelevant to Windows");
|
||||||
|
|
@ -60,7 +89,7 @@ do_fork_thd(void *arg) {
|
||||||
test_fail("Exec failed");
|
test_fail("Exec failed");
|
||||||
} else {
|
} else {
|
||||||
/* Parent */
|
/* Parent */
|
||||||
fork_wait_for_child_exit(pid);
|
wait_for_child_exit(pid);
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -97,7 +126,7 @@ TEST_BEGIN(test_fork_multithreaded) {
|
||||||
do_test_fork_multithreaded();
|
do_test_fork_multithreaded();
|
||||||
_exit(0);
|
_exit(0);
|
||||||
} else {
|
} else {
|
||||||
fork_wait_for_child_exit(pid);
|
wait_for_child_exit(pid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue