From e2da7477f8826e9bb55d9eaa6ba41a93afa17acd Mon Sep 17 00:00:00 2001 From: Shirui Cheng Date: Tue, 15 Jul 2025 15:44:14 -0700 Subject: [PATCH] Revert PR #2608: Manually revert commits 70c94d..f9c0b5 --- Makefile.in | 3 - include/jemalloc/internal/arena_inlines_b.h | 154 +--------- include/jemalloc/internal/arena_structs.h | 2 +- include/jemalloc/internal/batcher.h | 46 --- include/jemalloc/internal/bin.h | 74 +---- include/jemalloc/internal/bin_info.h | 11 - include/jemalloc/internal/bin_stats.h | 5 - include/jemalloc/internal/witness.h | 3 +- .../projects/vc2015/jemalloc/jemalloc.vcxproj | 3 +- .../vc2015/jemalloc/jemalloc.vcxproj.filters | 5 +- .../projects/vc2017/jemalloc/jemalloc.vcxproj | 3 +- .../vc2017/jemalloc/jemalloc.vcxproj.filters | 5 +- .../projects/vc2019/jemalloc/jemalloc.vcxproj | 3 +- .../vc2019/jemalloc/jemalloc.vcxproj.filters | 5 +- .../projects/vc2022/jemalloc/jemalloc.vcxproj | 3 +- .../vc2022/jemalloc/jemalloc.vcxproj.filters | 5 +- src/arena.c | 89 ++---- src/batcher.c | 98 ------- src/bin.c | 48 +--- src/bin_info.c | 24 -- src/ctl.c | 37 --- src/jemalloc.c | 14 - src/stats.c | 58 +--- src/tcache.c | 194 +++---------- test/analyze/sizes.c | 2 - test/include/test/fork.h | 34 --- test/unit/batcher.c | 243 ---------------- test/unit/bin_batching.c | 270 ------------------ test/unit/bin_batching.sh | 10 - test/unit/fork.c | 37 ++- 30 files changed, 124 insertions(+), 1364 deletions(-) delete mode 100644 include/jemalloc/internal/batcher.h delete mode 100644 src/batcher.c delete mode 100644 test/include/test/fork.h delete mode 100644 test/unit/batcher.c delete mode 100644 test/unit/bin_batching.c delete mode 100644 test/unit/bin_batching.sh diff --git a/Makefile.in b/Makefile.in index 2519ed83..4e9d0bea 100644 --- a/Makefile.in +++ b/Makefile.in @@ -98,7 +98,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/arena.c \ $(srcroot)src/background_thread.c \ $(srcroot)src/base.c \ - $(srcroot)src/batcher.c \ $(srcroot)src/bin.c \ $(srcroot)src/bin_info.c \ $(srcroot)src/bitmap.c \ @@ -208,8 +207,6 @@ TESTS_UNIT := \ $(srcroot)test/unit/background_thread_enable.c \ $(srcroot)test/unit/base.c \ $(srcroot)test/unit/batch_alloc.c \ - $(srcroot)test/unit/batcher.c \ - $(srcroot)test/unit/bin_batching.c \ $(srcroot)test/unit/binshard.c \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/bit_util.c \ diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 549dfb8a..6276deaa 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -588,11 +588,10 @@ arena_dalloc_bin_locked_begin( * stats updates, which happen during finish (this lets running counts get left * in a register). */ -JEMALLOC_ALWAYS_INLINE void +JEMALLOC_ALWAYS_INLINE bool arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin, arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab, - void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs, - unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) { + void *ptr) { const bin_info_t *bin_info = &bin_infos[binind]; size_t regind = arena_slab_regind(info, binind, slab, ptr); slab_data_t *slab_data = edata_slab_data_get(slab); @@ -612,17 +611,12 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin, if (nfree == bin_info->nregs) { arena_dalloc_bin_locked_handle_newly_empty( tsdn, arena, slab, bin); - - if (*dalloc_slabs_count < ndalloc_slabs) { - dalloc_slabs[*dalloc_slabs_count] = slab; - (*dalloc_slabs_count)++; - } else { - edata_list_active_append(dalloc_slabs_extra, slab); - } + return true; } else if (nfree == 1 && slab != bin->slabcur) { arena_dalloc_bin_locked_handle_newly_nonempty( tsdn, arena, slab, bin); } + return false; } JEMALLOC_ALWAYS_INLINE void @@ -635,148 +629,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin, } } -JEMALLOC_ALWAYS_INLINE void -arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - arena_dalloc_bin_locked_info_t *dalloc_bin_info, unsigned binind, - edata_t **dalloc_slabs, unsigned ndalloc_slabs, unsigned *dalloc_count, - edata_list_active_t *dalloc_slabs_extra) { - assert(binind < bin_info_nbatched_sizes); - bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin; - size_t nelems_to_pop = batcher_pop_begin( - tsdn, &batched_bin->remote_frees); - - bin_batching_test_mid_pop(nelems_to_pop); - if (nelems_to_pop == BATCHER_NO_IDX) { - malloc_mutex_assert_not_owner( - tsdn, &batched_bin->remote_frees.mtx); - return; - } else { - malloc_mutex_assert_owner(tsdn, &batched_bin->remote_frees.mtx); - } - - size_t npushes = batcher_pop_get_pushes( - tsdn, &batched_bin->remote_frees); - bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX]; - for (size_t i = 0; i < nelems_to_pop; i++) { - remote_free_data[i] = batched_bin->remote_free_data[i]; - } - batcher_pop_end(tsdn, &batched_bin->remote_frees); - - for (size_t i = 0; i < nelems_to_pop; i++) { - arena_dalloc_bin_locked_step(tsdn, arena, bin, dalloc_bin_info, - binind, remote_free_data[i].slab, remote_free_data[i].ptr, - dalloc_slabs, ndalloc_slabs, dalloc_count, - dalloc_slabs_extra); - } - - bin->stats.batch_pops++; - bin->stats.batch_pushes += npushes; - bin->stats.batch_pushed_elems += nelems_to_pop; -} - -typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t; -struct arena_bin_flush_batch_state_s { - arena_dalloc_bin_locked_info_t info; - - /* - * Bin batching is subtle in that there are unusual edge cases in which - * it can trigger the deallocation of more slabs than there were items - * flushed (say, if every original deallocation triggered a slab - * deallocation, and so did every batched one). So we keep a small - * backup array for any "extra" slabs, as well as a a list to allow a - * dynamic number of ones exceeding that array. - */ - edata_t *dalloc_slabs[8]; - unsigned dalloc_slab_count; - edata_list_active_t dalloc_slabs_extra; -}; - -JEMALLOC_ALWAYS_INLINE unsigned -arena_bin_batch_get_ndalloc_slabs(unsigned preallocated_slabs) { - if (preallocated_slabs > bin_batching_test_ndalloc_slabs_max) { - return bin_batching_test_ndalloc_slabs_max; - } - return preallocated_slabs; -} - -JEMALLOC_ALWAYS_INLINE void -arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - unsigned binind, arena_bin_flush_batch_state_t *state) { - if (binind >= bin_info_nbatched_sizes) { - return; - } - - arena_dalloc_bin_locked_begin(&state->info, binind); - state->dalloc_slab_count = 0; - edata_list_active_init(&state->dalloc_slabs_extra); - - unsigned preallocated_slabs = (unsigned)(sizeof(state->dalloc_slabs) - / sizeof(state->dalloc_slabs[0])); - unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs( - preallocated_slabs); - - arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind, - state->dalloc_slabs, ndalloc_slabs, &state->dalloc_slab_count, - &state->dalloc_slabs_extra); -} - -JEMALLOC_ALWAYS_INLINE void -arena_bin_flush_batch_before_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - unsigned binind, arena_bin_flush_batch_state_t *state) { - if (binind >= bin_info_nbatched_sizes) { - return; - } - - arena_dalloc_bin_locked_finish(tsdn, arena, bin, &state->info); -} - -static inline bool -arena_bin_has_batch(szind_t binind) { - return binind < bin_info_nbatched_sizes; -} - -JEMALLOC_ALWAYS_INLINE void -arena_bin_flush_batch_after_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - unsigned binind, arena_bin_flush_batch_state_t *state) { - if (!arena_bin_has_batch(binind)) { - return; - } - /* - * The initialization of dalloc_slabs_extra is guarded by an - * arena_bin_has_batch check higher up the stack. But the clang - * analyzer forgets this down the stack, triggering a spurious error - * reported here. - */ - JEMALLOC_CLANG_ANALYZER_SUPPRESS { - bin_batching_test_after_unlock(state->dalloc_slab_count, - edata_list_active_empty(&state->dalloc_slabs_extra)); - } - for (unsigned i = 0; i < state->dalloc_slab_count; i++) { - edata_t *slab = state->dalloc_slabs[i]; - arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab); - } - while (!edata_list_active_empty(&state->dalloc_slabs_extra)) { - edata_t *slab = edata_list_active_first( - &state->dalloc_slabs_extra); - edata_list_active_remove(&state->dalloc_slabs_extra, slab); - arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab); - } -} - static inline bin_t * arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) { bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]); - bin_t *ret; - if (arena_bin_has_batch(binind)) { - ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard); - } else { - ret = shard0 + binshard; - } - assert(binind >= SC_NBINS - 1 - || (uintptr_t)ret - < (uintptr_t)arena + arena_bin_offsets[binind + 1]); - - return ret; + return shard0 + binshard; } #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */ diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h index 4778ca1b..471f7692 100644 --- a/include/jemalloc/internal/arena_structs.h +++ b/include/jemalloc/internal/arena_structs.h @@ -105,7 +105,7 @@ struct arena_s { "Do not use this field directly. " "Use `arena_get_bin` instead.") JEMALLOC_ALIGNED(CACHELINE) - bin_with_batch_t all_bins[0]; + bin_t all_bins[0]; }; #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */ diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h deleted file mode 100644 index 3ceb8256..00000000 --- a/include/jemalloc/internal/batcher.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_BATCHER_H -#define JEMALLOC_INTERNAL_BATCHER_H - -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/mutex.h" - -#define BATCHER_NO_IDX ((size_t) - 1) - -typedef struct batcher_s batcher_t; -struct batcher_s { - /* - * Optimize for locality -- nelems_max and nelems are always touched - * togehter, along with the front of the mutex. The end of the mutex is - * only touched if there's contention. - */ - atomic_zu_t nelems; - size_t nelems_max; - size_t npushes; - malloc_mutex_t mtx; -}; - -void batcher_init(batcher_t *batcher, size_t nelems_max); - -/* - * Returns an index (into some user-owned array) to use for pushing, or - * BATCHER_NO_IDX if no index is free. If the former, the caller must call - * batcher_push_end once done. - */ -size_t batcher_push_begin( - tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push); -void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher); - -/* - * Returns the number of items to pop, or BATCHER_NO_IDX if there are none. - * If the former, must be followed by a call to batcher_pop_end. - */ -size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher); -size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher); -void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher); - -void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher); -void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher); -void batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher); - -#endif /* JEMALLOC_INTERNAL_BATCHER_H */ diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h index e91583d7..05a2f845 100644 --- a/include/jemalloc/internal/bin.h +++ b/include/jemalloc/internal/bin.h @@ -2,60 +2,12 @@ #define JEMALLOC_INTERNAL_BIN_H #include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/batcher.h" #include "jemalloc/internal/bin_stats.h" #include "jemalloc/internal/bin_types.h" #include "jemalloc/internal/edata.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/sc.h" -#define BIN_REMOTE_FREE_ELEMS_MAX 16 - -#ifdef JEMALLOC_JET -extern void (*bin_batching_test_after_push_hook)(size_t idx); -extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop); -extern void (*bin_batching_test_after_unlock_hook)( - unsigned slab_dalloc_count, bool list_empty); -#endif - -#ifdef JEMALLOC_JET -extern unsigned bin_batching_test_ndalloc_slabs_max; -#else -static const unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1; -#endif - -JEMALLOC_ALWAYS_INLINE void -bin_batching_test_after_push(size_t idx) { - (void)idx; -#ifdef JEMALLOC_JET - if (bin_batching_test_after_push_hook != NULL) { - bin_batching_test_after_push_hook(idx); - } -#endif -} - -JEMALLOC_ALWAYS_INLINE void -bin_batching_test_mid_pop(size_t elems_to_pop) { - (void)elems_to_pop; -#ifdef JEMALLOC_JET - if (bin_batching_test_mid_pop_hook != NULL) { - bin_batching_test_mid_pop_hook(elems_to_pop); - } -#endif -} - -JEMALLOC_ALWAYS_INLINE void -bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) { - (void)slab_dalloc_count; - (void)list_empty; -#ifdef JEMALLOC_JET - if (bin_batching_test_after_unlock_hook != NULL) { - bin_batching_test_after_unlock_hook( - slab_dalloc_count, list_empty); - } -#endif -} - /* * A bin contains a set of extents that are currently being used for slab * allocations. @@ -90,19 +42,6 @@ struct bin_s { edata_list_active_t slabs_full; }; -typedef struct bin_remote_free_data_s bin_remote_free_data_t; -struct bin_remote_free_data_s { - void *ptr; - edata_t *slab; -}; - -typedef struct bin_with_batch_s bin_with_batch_t; -struct bin_with_batch_s { - bin_t bin; - batcher_t remote_frees; - bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX]; -}; - /* A set of sharded bins of the same size class. */ typedef struct bins_s bins_t; struct bins_s { @@ -115,12 +54,12 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size, size_t end_size, size_t nshards); /* Initializes a bin to empty. Returns true on error. */ -bool bin_init(bin_t *bin, unsigned binind); +bool bin_init(bin_t *bin); /* Forking. */ -void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch); -void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch); -void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch); +void bin_prefork(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_child(tsdn_t *tsdn, bin_t *bin); /* Stats. */ static inline void @@ -138,11 +77,6 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) { stats->reslabs += bin->stats.reslabs; stats->curslabs += bin->stats.curslabs; stats->nonfull_slabs += bin->stats.nonfull_slabs; - - stats->batch_failed_pushes += bin->stats.batch_failed_pushes; - stats->batch_pushes += bin->stats.batch_pushes; - stats->batch_pushed_elems += bin->stats.batch_pushed_elems; - malloc_mutex_unlock(tsdn, &bin->lock); } diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h index 0022c3f7..8c563dee 100644 --- a/include/jemalloc/internal/bin_info.h +++ b/include/jemalloc/internal/bin_info.h @@ -44,17 +44,6 @@ struct bin_info_s { bitmap_info_t bitmap_info; }; -/* The maximum size a size class can be and still get batching behavior. */ -extern size_t opt_bin_info_max_batched_size; -/* The number of batches per batched size class. */ -extern size_t opt_bin_info_remote_free_max_batch; -// The max number of pending elems (across all batches) -extern size_t opt_bin_info_remote_free_max; - -extern szind_t bin_info_nbatched_sizes; -extern unsigned bin_info_nbatched_bins; -extern unsigned bin_info_nunbatched_bins; - extern bin_info_t bin_infos[SC_NBINS]; void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]); diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h index e1095f38..9900e0d1 100644 --- a/include/jemalloc/internal/bin_stats.h +++ b/include/jemalloc/internal/bin_stats.h @@ -48,11 +48,6 @@ struct bin_stats_s { /* Current size of nonfull slabs heap in this bin. */ size_t nonfull_slabs; - - uint64_t batch_pops; - uint64_t batch_failed_pushes; - uint64_t batch_pushes; - uint64_t batch_pushed_elems; }; typedef struct bin_stats_data_s bin_stats_data_t; diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 73770713..7ca3c347 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -64,10 +64,9 @@ enum witness_rank_e { WITNESS_RANK_BASE, WITNESS_RANK_ARENA_LARGE, WITNESS_RANK_HOOK, - WITNESS_RANK_BIN, WITNESS_RANK_LEAF = 0x1000, - WITNESS_RANK_BATCHER = WITNESS_RANK_LEAF, + WITNESS_RANK_BIN = WITNESS_RANK_LEAF, WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF, WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF, WITNESS_RANK_DSS = WITNESS_RANK_LEAF, diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index c43b30b1..9743e10b 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -38,7 +38,6 @@ - @@ -380,4 +379,4 @@ - + \ No newline at end of file diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index f091475e..c8236a12 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -16,9 +16,6 @@ Source Files - - Source Files - Source Files @@ -203,4 +200,4 @@ Source Files - + \ No newline at end of file diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index a195f6b3..c1ff11a9 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -38,7 +38,6 @@ - @@ -379,4 +378,4 @@ - + \ No newline at end of file diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index f091475e..c8236a12 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -16,9 +16,6 @@ Source Files - - Source Files - Source Files @@ -203,4 +200,4 @@ Source Files - + \ No newline at end of file diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj index cd16005d..6cb1b35e 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj @@ -38,7 +38,6 @@ - @@ -379,4 +378,4 @@ - + \ No newline at end of file diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters index f091475e..c8236a12 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters @@ -16,9 +16,6 @@ Source Files - - Source Files - Source Files @@ -203,4 +200,4 @@ Source Files - + \ No newline at end of file diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj index 2d8c4be6..5c7b00a2 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj @@ -38,7 +38,6 @@ - @@ -379,4 +378,4 @@ - + \ No newline at end of file diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters index f091475e..c8236a12 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters @@ -16,9 +16,6 @@ Source Files - - Source Files - Source Files @@ -203,4 +200,4 @@ Source Files - + \ No newline at end of file diff --git a/src/arena.c b/src/arena.c index 2f58b038..962a325d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -39,7 +39,8 @@ div_info_t arena_binind_div_info[SC_NBINS]; size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; -uint32_t arena_bin_offsets[SC_NBINS]; +uint32_t arena_bin_offsets[SC_NBINS]; +static unsigned nbins_total; /* * a0 is used to handle huge requests before malloc init completes. After @@ -674,17 +675,11 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) { } static void -arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) { +arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) { edata_t *slab; malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - if (arena_bin_has_batch(binind)) { - bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin; - batcher_init( - &batched_bin->remote_frees, BIN_REMOTE_FREE_ELEMS_MAX); - } - if (bin->slabcur != NULL) { slab = bin->slabcur; bin->slabcur = NULL; @@ -835,8 +830,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { /* Bins. */ for (unsigned i = 0; i < SC_NBINS; i++) { for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - arena_bin_reset( - tsd, arena, arena_get_bin(arena, i, j), i); + arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j)); } } pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard); @@ -1103,19 +1097,8 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin, unsigned binshard; bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard); - /* - * This has some fields that are conditionally initialized down batch - * flush pathways. This can trigger static analysis warnings deeper - * down in the static. The accesses are guarded by the same checks as - * the initialization, but the analysis isn't able to track that across - * multiple stack frames. - */ - arena_bin_flush_batch_state_t batch_flush_state - JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0}); label_refill: malloc_mutex_lock(tsdn, &bin->lock); - arena_bin_flush_batch_after_lock( - tsdn, arena, bin, binind, &batch_flush_state); while (filled < nfill_min) { /* Try batch-fill from slabcur first. */ @@ -1176,11 +1159,7 @@ label_refill: cache_bin->tstats.nrequests = 0; } - arena_bin_flush_batch_before_unlock( - tsdn, arena, bin, binind, &batch_flush_state); malloc_mutex_unlock(tsdn, &bin->lock); - arena_bin_flush_batch_after_unlock( - tsdn, arena, bin, binind, &batch_flush_state); if (alloc_and_retry) { assert(fresh_slab == NULL); @@ -1474,16 +1453,12 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) { malloc_mutex_lock(tsdn, &bin->lock); arena_dalloc_bin_locked_info_t info; arena_dalloc_bin_locked_begin(&info, binind); - edata_t *dalloc_slabs[1]; - unsigned dalloc_slabs_count = 0; - arena_dalloc_bin_locked_step(tsdn, arena, bin, &info, binind, edata, - ptr, dalloc_slabs, /* ndalloc_slabs */ 1, &dalloc_slabs_count, - /* dalloc_slabs_extra */ NULL); + bool ret = arena_dalloc_bin_locked_step( + tsdn, arena, bin, &info, binind, edata, ptr); arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info); malloc_mutex_unlock(tsdn, &bin->lock); - if (dalloc_slabs_count != 0) { - assert(dalloc_slabs[0] == edata); + if (ret) { arena_slab_dalloc(tsdn, arena, edata); } } @@ -1722,6 +1697,7 @@ arena_t * arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) { arena_t *arena; base_t *base; + unsigned i; if (ind == 0) { base = b0get(); @@ -1734,13 +1710,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) { } size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) - + sizeof(bin_with_batch_t) * bin_info_nbatched_bins - + sizeof(bin_t) * bin_info_nunbatched_bins; + + sizeof(bin_t) * nbins_total; arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE); if (arena == NULL) { goto label_error; } - + JEMALLOC_SUPPRESS_WARN_ON_USAGE( + assert((uintptr_t)&arena->all_bins[nbins_total - 1] + sizeof(bin_t) + <= (uintptr_t)arena + arena_size);) atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED); atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED); arena->last_thd = NULL; @@ -1779,13 +1756,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) { /* Initialize bins. */ atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE); - for (unsigned i = 0; i < SC_NBINS; i++) { - for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - bin_t *bin = arena_get_bin(arena, i, j); - bool err = bin_init(bin, i); - if (err) { - goto label_error; - } + for (i = 0; i < nbins_total; i++) { + JEMALLOC_SUPPRESS_WARN_ON_USAGE( + bool err = bin_init(&arena->all_bins[i]);) + if (err) { + goto label_error; } } @@ -1943,10 +1918,8 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) { uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);) for (szind_t i = 0; i < SC_NBINS; i++) { arena_bin_offsets[i] = cur_offset; - uint32_t bin_sz = (i < bin_info_nbatched_sizes - ? sizeof(bin_with_batch_t) - : sizeof(bin_t)); - cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz; + nbins_total += bin_infos[i].n_shards; + cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t)); } return pa_central_init( &arena_pa_central_global, base, hpa, &hpa_hooks_default); @@ -1996,21 +1969,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) { void arena_prefork8(tsdn_t *tsdn, arena_t *arena) { - for (szind_t i = 0; i < SC_NBINS; i++) { - for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - bin_t *bin = arena_get_bin(arena, i, j); - bin_prefork(tsdn, bin, arena_bin_has_batch(i)); - } + for (unsigned i = 0; i < nbins_total; i++) { + JEMALLOC_SUPPRESS_WARN_ON_USAGE( + bin_prefork(tsdn, &arena->all_bins[i]);) } } void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { - for (szind_t i = 0; i < SC_NBINS; i++) { - for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - bin_t *bin = arena_get_bin(arena, i, j); - bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i)); - } + for (unsigned i = 0; i < nbins_total; i++) { + JEMALLOC_SUPPRESS_WARN_ON_USAGE( + bin_postfork_parent(tsdn, &arena->all_bins[i]);) } malloc_mutex_postfork_parent(tsdn, &arena->large_mtx); @@ -2047,11 +2016,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { } } - for (szind_t i = 0; i < SC_NBINS; i++) { - for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - bin_t *bin = arena_get_bin(arena, i, j); - bin_postfork_child(tsdn, bin, arena_bin_has_batch(i)); - } + for (unsigned i = 0; i < nbins_total; i++) { + JEMALLOC_SUPPRESS_WARN_ON_USAGE( + bin_postfork_child(tsdn, &arena->all_bins[i]);) } malloc_mutex_postfork_child(tsdn, &arena->large_mtx); diff --git a/src/batcher.c b/src/batcher.c deleted file mode 100644 index af71dae5..00000000 --- a/src/batcher.c +++ /dev/null @@ -1,98 +0,0 @@ -#include "jemalloc/internal/jemalloc_preamble.h" - -#include "jemalloc/internal/batcher.h" - -#include "jemalloc/internal/assert.h" -#include "jemalloc/internal/atomic.h" - -void -batcher_init(batcher_t *batcher, size_t nelems_max) { - atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED); - batcher->nelems_max = nelems_max; - batcher->npushes = 0; - malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER, - malloc_mutex_rank_exclusive); -} - -/* - * Returns an index (into some user-owned array) to use for pushing, or - * BATCHER_NO_IDX if no index is free. - */ -size_t -batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push) { - assert(elems_to_push > 0); - size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED); - if (nelems_guess + elems_to_push > batcher->nelems_max) { - return BATCHER_NO_IDX; - } - malloc_mutex_lock(tsdn, &batcher->mtx); - size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED); - if (nelems + elems_to_push > batcher->nelems_max) { - malloc_mutex_unlock(tsdn, &batcher->mtx); - return BATCHER_NO_IDX; - } - assert(elems_to_push <= batcher->nelems_max - nelems); - /* - * We update nelems at push time (instead of during pop) so that other - * racing accesses of the batcher can fail fast instead of trying to - * acquire a mutex only to discover that there's no space for them. - */ - atomic_store_zu( - &batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED); - batcher->npushes++; - return nelems; -} - -size_t -batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher) { - malloc_mutex_assert_owner(tsdn, &batcher->mtx); - size_t npushes = batcher->npushes; - batcher->npushes = 0; - return npushes; -} - -void -batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) { - malloc_mutex_assert_owner(tsdn, &batcher->mtx); - assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) > 0); - malloc_mutex_unlock(tsdn, &batcher->mtx); -} - -size_t -batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) { - size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED); - assert(nelems_guess <= batcher->nelems_max); - if (nelems_guess == 0) { - return BATCHER_NO_IDX; - } - malloc_mutex_lock(tsdn, &batcher->mtx); - size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED); - assert(nelems <= batcher->nelems_max); - if (nelems == 0) { - malloc_mutex_unlock(tsdn, &batcher->mtx); - return BATCHER_NO_IDX; - } - atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED); - return nelems; -} - -void -batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) { - assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0); - malloc_mutex_unlock(tsdn, &batcher->mtx); -} - -void -batcher_prefork(tsdn_t *tsdn, batcher_t *batcher) { - malloc_mutex_prefork(tsdn, &batcher->mtx); -} - -void -batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher) { - malloc_mutex_postfork_parent(tsdn, &batcher->mtx); -} - -void -batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher) { - malloc_mutex_postfork_child(tsdn, &batcher->mtx); -} diff --git a/src/bin.c b/src/bin.c index 98d1da02..a11b108e 100644 --- a/src/bin.c +++ b/src/bin.c @@ -6,14 +6,6 @@ #include "jemalloc/internal/sc.h" #include "jemalloc/internal/witness.h" -#ifdef JEMALLOC_JET -unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1; -void (*bin_batching_test_after_push_hook)(size_t push_idx); -void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop); -void (*bin_batching_test_after_unlock_hook)( - unsigned slab_dalloc_count, bool list_empty); -#endif - bool bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size, size_t end_size, size_t nshards) { @@ -47,7 +39,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) { } bool -bin_init(bin_t *bin, unsigned binind) { +bin_init(bin_t *bin) { if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN, malloc_mutex_rank_exclusive)) { return true; @@ -58,52 +50,20 @@ bin_init(bin_t *bin, unsigned binind) { if (config_stats) { memset(&bin->stats, 0, sizeof(bin_stats_t)); } - if (arena_bin_has_batch(binind)) { - bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin; - batcher_init( - &batched_bin->remote_frees, opt_bin_info_remote_free_max); - } return false; } void -bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) { +bin_prefork(tsdn_t *tsdn, bin_t *bin) { malloc_mutex_prefork(tsdn, &bin->lock); - if (has_batch) { - /* - * The batch mutex has lower rank than the bin mutex (as it must - * -- it's acquired later). But during forking, we go - * bin-at-a-time, so that we acquire mutex on bin 0, then on - * the bin 0 batcher, then on bin 1. This is a safe ordering - * (it's ordered by the index of arenas and bins within those - * arenas), but will trigger witness errors that would - * otherwise force another level of arena forking that breaks - * bin encapsulation (because the witness API doesn't "know" - * about arena or bin ordering -- it just sees that the batcher - * has a lower rank than the bin). So instead we exclude the - * batcher mutex from witness checking during fork (which is - * the only time we touch multiple bins at once) by passing - * TSDN_NULL. - */ - bin_with_batch_t *batched = (bin_with_batch_t *)bin; - batcher_prefork(TSDN_NULL, &batched->remote_frees); - } } void -bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) { +bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) { malloc_mutex_postfork_parent(tsdn, &bin->lock); - if (has_batch) { - bin_with_batch_t *batched = (bin_with_batch_t *)bin; - batcher_postfork_parent(TSDN_NULL, &batched->remote_frees); - } } void -bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) { +bin_postfork_child(tsdn_t *tsdn, bin_t *bin) { malloc_mutex_postfork_child(tsdn, &bin->lock); - if (has_batch) { - bin_with_batch_t *batched = (bin_with_batch_t *)bin; - batcher_postfork_child(TSDN_NULL, &batched->remote_frees); - } } diff --git a/src/bin_info.c b/src/bin_info.c index de93418a..e10042fd 100644 --- a/src/bin_info.c +++ b/src/bin_info.c @@ -3,26 +3,8 @@ #include "jemalloc/internal/bin_info.h" -/* - * We leave bin-batching disabled by default, with other settings chosen mostly - * empirically; across the test programs I looked at they provided the most bang - * for the buck. With other default settings, these choices for bin batching - * result in them consuming far less memory (even in the worst case) than the - * tcaches themselves, the arena, etc. - * Note that we always try to pop all bins on every arena cache bin lock - * operation, so the typical memory waste is far less than this (and only on - * hot bins, which tend to be large anyways). - */ -size_t opt_bin_info_max_batched_size = 0; /* 192 is a good default. */ -size_t opt_bin_info_remote_free_max_batch = 4; -size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX; - bin_info_t bin_infos[SC_NBINS]; -szind_t bin_info_nbatched_sizes; -unsigned bin_info_nbatched_bins; -unsigned bin_info_nunbatched_bins; - static void bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], bin_info_t infos[SC_NBINS]) { @@ -38,12 +20,6 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER( bin_info->nregs); bin_info->bitmap_info = bitmap_info; - if (bin_info->reg_size <= opt_bin_info_max_batched_size) { - bin_info_nbatched_sizes++; - bin_info_nbatched_bins += bin_info->n_shards; - } else { - bin_info_nunbatched_bins += bin_info->n_shards; - } } } diff --git a/src/ctl.c b/src/ctl.c index 9e9a4b43..a4c60ce0 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -134,9 +134,6 @@ CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_experimental_infallible_new) CTL_PROTO(opt_experimental_tcache_gc) -CTL_PROTO(opt_max_batched_size) -CTL_PROTO(opt_remote_free_max) -CTL_PROTO(opt_remote_free_max_batch) CTL_PROTO(opt_tcache) CTL_PROTO(opt_tcache_max) CTL_PROTO(opt_tcache_nslots_small_min) @@ -248,10 +245,6 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs) CTL_PROTO(stats_arenas_i_bins_j_nreslabs) CTL_PROTO(stats_arenas_i_bins_j_curslabs) CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs) -CTL_PROTO(stats_arenas_i_bins_j_batch_pops) -CTL_PROTO(stats_arenas_i_bins_j_batch_failed_pushes) -CTL_PROTO(stats_arenas_i_bins_j_batch_pushes) -CTL_PROTO(stats_arenas_i_bins_j_batch_pushed_elems) INDEX_PROTO(stats_arenas_i_bins_j) CTL_PROTO(stats_arenas_i_lextents_j_nmalloc) CTL_PROTO(stats_arenas_i_lextents_j_ndalloc) @@ -501,9 +494,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)}, {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)}, {NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)}, - {NAME("max_batched_size"), CTL(opt_max_batched_size)}, - {NAME("remote_free_max"), CTL(opt_remote_free_max)}, - {NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("tcache_max"), CTL(opt_tcache_max)}, {NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)}, @@ -673,11 +663,6 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)}, {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)}, {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)}, - {NAME("batch_pops"), CTL(stats_arenas_i_bins_j_batch_pops)}, - {NAME("batch_failed_pushes"), - CTL(stats_arenas_i_bins_j_batch_failed_pushes)}, - {NAME("batch_pushes"), CTL(stats_arenas_i_bins_j_batch_pushes)}, - {NAME("batch_pushed_elems"), CTL(stats_arenas_i_bins_j_batch_pushed_elems)}, {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}}; static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { @@ -1219,14 +1204,6 @@ ctl_arena_stats_sdmerge( assert(bstats->curslabs == 0); assert(bstats->nonfull_slabs == 0); } - - merged->batch_pops += bstats->batch_pops; - merged->batch_failed_pushes += - bstats->batch_failed_pushes; - merged->batch_pushes += bstats->batch_pushes; - merged->batch_pushed_elems += - bstats->batch_pushed_elems; - malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data, &astats->bstats[i].mutex_data); } @@ -2202,10 +2179,6 @@ CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new, opt_experimental_infallible_new, bool) CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool) -CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t) -CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max, size_t) -CTL_RO_NL_GEN( - opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch, size_t) CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t) CTL_RO_NL_GEN( @@ -3982,16 +3955,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs, arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs, arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops, - arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes, - arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes, - uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes, - arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems, - arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems, - uint64_t) static const ctl_named_node_t * stats_arenas_i_bins_j_index( diff --git a/src/jemalloc.c b/src/jemalloc.c index 4adcbf3c..9f59a781 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1391,20 +1391,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], } while (vlen_left > 0); CONF_CONTINUE; } - CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size, - "max_batched_size", 0, SIZE_T_MAX, - CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, - /* clip */ true) - CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch, - "remote_free_max_batch", 0, - BIN_REMOTE_FREE_ELEMS_MAX, CONF_DONT_CHECK_MIN, - CONF_CHECK_MAX, - /* clip */ true) - CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max, - "remote_free_max", 0, BIN_REMOTE_FREE_ELEMS_MAX, - CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, - /* clip */ true) - if (CONF_MATCH("tcache_ncached_max")) { bool err = tcache_bin_info_default_init( v, vlen); diff --git a/src/stats.c b/src/stats.c index 84af3911..a8a574ac 100644 --- a/src/stats.c +++ b/src/stats.c @@ -357,15 +357,6 @@ stats_arena_bins_print( COL_HDR(row, nreslabs, NULL, right, 13, uint64) COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64) - COL_HDR(row, pops, NULL, right, 10, uint64) - COL_HDR(row, pops_ps, "(#/sec)", right, 8, uint64) - COL_HDR(row, failed_push, NULL, right, 13, uint64) - COL_HDR(row, failed_push_ps, "(#/sec)", right, 8, uint64) - COL_HDR(row, push, NULL, right, 7, uint64) - COL_HDR(row, push_ps, "(#/sec)", right, 8, uint64) - COL_HDR(row, push_elem, NULL, right, 12, uint64) - COL_HDR(row, push_elem_ps, "(#/sec)", right, 8, uint64) - /* Don't want to actually print the name. */ header_justify_spacer.str_val = " "; col_justify_spacer.str_val = " "; @@ -406,15 +397,13 @@ stats_arena_bins_print( } for (j = 0, in_gap = false; j < nbins; j++) { - uint64_t nslabs; - size_t reg_size, slab_size, curregs; - size_t curslabs; - size_t nonfull_slabs; - uint32_t nregs, nshards; - uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t nreslabs; - uint64_t batch_pops, batch_failed_pushes, batch_pushes, - batch_pushed_elems; + uint64_t nslabs; + size_t reg_size, slab_size, curregs; + size_t curslabs; + size_t nonfull_slabs; + uint32_t nregs, nshards; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t nreslabs; prof_stats_t prof_live; prof_stats_t prof_accum; @@ -463,15 +452,6 @@ stats_arena_bins_print( CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs, size_t); - CTL_LEAF( - stats_arenas_mib, 5, "batch_pops", &batch_pops, uint64_t); - CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes", - &batch_failed_pushes, uint64_t); - CTL_LEAF(stats_arenas_mib, 5, "batch_pushes", &batch_pushes, - uint64_t); - CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems", - &batch_pushed_elems, uint64_t); - if (mutex) { mutex_stats_read_arena_bin(stats_arenas_mib, 5, col_mutex64, col_mutex32, uptime); @@ -506,14 +486,6 @@ stats_arena_bins_print( emitter, "curslabs", emitter_type_size, &curslabs); emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size, &nonfull_slabs); - emitter_json_kv( - emitter, "batch_pops", emitter_type_uint64, &batch_pops); - emitter_json_kv(emitter, "batch_failed_pushes", - emitter_type_uint64, &batch_failed_pushes); - emitter_json_kv(emitter, "batch_pushes", emitter_type_uint64, - &batch_pushes); - emitter_json_kv(emitter, "batch_pushed_elems", - emitter_type_uint64, &batch_pushed_elems); if (mutex) { emitter_json_object_kv_begin(emitter, "mutex"); mutex_stats_emit( @@ -573,19 +545,6 @@ stats_arena_bins_print( col_nreslabs.uint64_val = nreslabs; col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime); - col_pops.uint64_val = batch_pops; - col_pops_ps.uint64_val = rate_per_second(batch_pops, uptime); - - col_failed_push.uint64_val = batch_failed_pushes; - col_failed_push_ps.uint64_val = rate_per_second( - batch_failed_pushes, uptime); - col_push.uint64_val = batch_pushes; - col_push_ps.uint64_val = rate_per_second(batch_pushes, uptime); - - col_push_elem.uint64_val = batch_pushed_elems; - col_push_elem_ps.uint64_val = rate_per_second( - batch_pushed_elems, uptime); - /* * Note that mutex columns were initialized above, if mutex == * true. @@ -1677,9 +1636,6 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_BOOL("xmalloc") OPT_WRITE_BOOL("experimental_infallible_new") OPT_WRITE_BOOL("experimental_tcache_gc") - OPT_WRITE_SIZE_T("max_batched_size") - OPT_WRITE_SIZE_T("remote_free_max") - OPT_WRITE_SIZE_T("remote_free_max_batch") OPT_WRITE_BOOL("tcache") OPT_WRITE_SIZE_T("tcache_max") OPT_WRITE_UNSIGNED("tcache_nslots_small_min") diff --git a/src/tcache.c b/src/tcache.c index 44a96841..2d73237b 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -608,7 +608,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, } arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind, /* nfill_min */ - opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill, + opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill, /* nfill_max */ nfill); tcache_slow->bin_refilled[binind] = true; tcache_nfill_small_burst_prepare(tcache_slow, binind); @@ -680,8 +680,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, assert(binind < SC_NBINS); arena_t *tcache_arena = tcache_slow->arena; assert(tcache_arena != NULL); - unsigned tcache_binshard = - tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind]; /* * Variable length array must have > 0 length; the last element is never @@ -698,25 +696,12 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, unsigned dalloc_count = 0; VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1); - /* - * There's an edge case where we need to deallocate more slabs than we - * have elements of dalloc_slabs. This can if we end up deallocating - * items batched by another thread in addition to ones flushed from the - * cache. Since this is not very likely (most small object - * deallocations don't free up a whole slab), we don't want to burn the - * stack space to keep those excess slabs in an array. Instead we'll - * maintain an overflow list. - */ - edata_list_active_t dalloc_slabs_extra; - edata_list_active_init(&dalloc_slabs_extra); - /* * We're about to grab a bunch of locks. If one of them happens to be * the one guarding the arena-level stats counters we flush our * thread-local ones to, we do so under one critical section. */ bool merged_stats = false; - /* * We maintain the invariant that all edatas yet to be flushed are * contained in the half-open range [flush_start, flush_end). We'll @@ -741,7 +726,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, unsigned cur_binshard = edata_binshard_get(cur_edata); bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard); assert(cur_binshard < bin_infos[binind].n_shards); - /* * Start off the partition; item_edata[i] always matches itself * of course. @@ -788,150 +772,43 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, } } - /* - * We never batch when flushing to our home-base bin shard, - * since it's likely that we'll have to acquire that lock anyway - * when flushing stats. - * - * A plausible check we could add to can_batch is - * '&& arena_is_auto(cur_arena)'. The motivation would be that - * we have a higher tolerance for dubious user assumptions - * around non-auto arenas (e.g. "if I deallocate every object I - * allocated, and then call tcache.flush, then the arena stats - * must reflect zero live allocations"). - * - * This is dubious for a couple reasons: - * - We already don't provide perfect fidelity for stats - * counting (e.g. for profiled allocations, whose size can - * inflate in stats). - * - Hanging load-bearing guarantees around stats impedes - * scalability in general. - * - * There are some "complete" strategies we could do instead: - * - Add a arena..quiesce call to pop all bins for users who - * do want those stats accounted for. - * - Make batchability a user-controllable per-arena option. - * - Do a batch pop after every mutex acquisition for which we - * want to provide accurate stats. This gives perfectly - * accurate stats, but can cause weird performance effects - * (because doing stats collection can now result in slabs - * becoming empty, and therefore purging, large mutex - * acquisition, etc.). - * - Propagate the "why" behind a flush down to the level of the - * batcher, and include a batch pop attempt down full tcache - * flushing pathways. This is just a lot of plumbing and - * internal complexity. - * - * We don't do any of these right now, but the decision calculus - * and tradeoffs are subtle enough that the reasoning was worth - * leaving in this comment. - */ - bool bin_is_batched = arena_bin_has_batch(binind); - bool home_binshard = (cur_arena == tcache_arena - && cur_binshard == tcache_binshard); - bool can_batch = (flush_start - prev_flush_start - <= opt_bin_info_remote_free_max_batch) - && !home_binshard && bin_is_batched; + /* Actually do the flushing. */ + malloc_mutex_lock(tsdn, &cur_bin->lock); /* - * We try to avoid the batching pathway if we can, so we always - * at least *try* to lock. + * Flush stats first, if that was the right lock. Note that we + * don't actually have to flush stats into the current thread's + * binshard. Flushing into any binshard in the same arena is + * enough; we don't expose stats on per-binshard basis (just + * per-bin). */ - bool locked = false; - bool batched = false; - bool batch_failed = false; - if (can_batch) { - locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock); + if (config_stats && tcache_arena == cur_arena + && !merged_stats) { + merged_stats = true; + cur_bin->stats.nflushes++; + cur_bin->stats.nrequests += cache_bin->tstats.nrequests; + cache_bin->tstats.nrequests = 0; } - if (can_batch && !locked) { - bin_with_batch_t *batched_bin = (bin_with_batch_t *) - cur_bin; - size_t push_idx = batcher_push_begin(tsdn, - &batched_bin->remote_frees, - flush_start - prev_flush_start); - bin_batching_test_after_push(push_idx); - if (push_idx != BATCHER_NO_IDX) { - batched = true; - unsigned nbatched = flush_start - - prev_flush_start; - for (unsigned i = 0; i < nbatched; i++) { - unsigned src_ind = prev_flush_start + i; - batched_bin - ->remote_free_data[push_idx + i] - .ptr = ptrs->ptr[src_ind]; - batched_bin - ->remote_free_data[push_idx + i] - .slab = item_edata[src_ind].edata; - } - batcher_push_end( - tsdn, &batched_bin->remote_frees); - } else { - batch_failed = true; + /* Next flush objects. */ + /* Init only to avoid used-uninitialized warning. */ + arena_dalloc_bin_locked_info_t dalloc_bin_info = {0}; + arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind); + for (unsigned i = prev_flush_start; i < flush_start; i++) { + void *ptr = ptrs->ptr[i]; + edata_t *edata = item_edata[i].edata; + if (arena_dalloc_bin_locked_step(tsdn, cur_arena, + cur_bin, &dalloc_bin_info, binind, edata, + ptr)) { + dalloc_slabs[dalloc_count] = edata; + dalloc_count++; } } - if (!batched) { - if (!locked) { - malloc_mutex_lock(tsdn, &cur_bin->lock); - } - /* - * Unlike other stats (which only ever get flushed into - * a tcache's associated arena), batch_failed counts get - * accumulated into the bin where the push attempt - * failed. - */ - if (config_stats && batch_failed) { - cur_bin->stats.batch_failed_pushes++; - } - /* - * Flush stats first, if that was the right lock. Note - * that we don't actually have to flush stats into the - * current thread's binshard. Flushing into any binshard - * in the same arena is enough; we don't expose stats on - * per-binshard basis (just per-bin). - */ - if (config_stats && tcache_arena == cur_arena - && !merged_stats) { - merged_stats = true; - cur_bin->stats.nflushes++; - cur_bin->stats.nrequests += - cache_bin->tstats.nrequests; - cache_bin->tstats.nrequests = 0; - } - unsigned preallocated_slabs = nflush; - unsigned ndalloc_slabs = - arena_bin_batch_get_ndalloc_slabs( - preallocated_slabs); + arena_dalloc_bin_locked_finish( + tsdn, cur_arena, cur_bin, &dalloc_bin_info); + malloc_mutex_unlock(tsdn, &cur_bin->lock); - /* Next flush objects our own objects. */ - /* Init only to avoid used-uninitialized warning. */ - arena_dalloc_bin_locked_info_t dalloc_bin_info = {0}; - arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind); - for (unsigned i = prev_flush_start; i < flush_start; - i++) { - void *ptr = ptrs->ptr[i]; - edata_t *edata = item_edata[i].edata; - arena_dalloc_bin_locked_step(tsdn, cur_arena, - cur_bin, &dalloc_bin_info, binind, edata, - ptr, dalloc_slabs, ndalloc_slabs, - &dalloc_count, &dalloc_slabs_extra); - } - /* - * Lastly, flush any batched objects (from other - * threads). - */ - if (bin_is_batched) { - arena_bin_flush_batch_impl(tsdn, cur_arena, - cur_bin, &dalloc_bin_info, binind, - dalloc_slabs, ndalloc_slabs, &dalloc_count, - &dalloc_slabs_extra); - } - - arena_dalloc_bin_locked_finish( - tsdn, cur_arena, cur_bin, &dalloc_bin_info); - malloc_mutex_unlock(tsdn, &cur_bin->lock); - } arena_decay_ticks( tsdn, cur_arena, flush_start - prev_flush_start); } @@ -941,18 +818,13 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, edata_t *slab = dalloc_slabs[i]; arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab); } - while (!edata_list_active_empty(&dalloc_slabs_extra)) { - edata_t *slab = edata_list_active_first(&dalloc_slabs_extra); - edata_list_active_remove(&dalloc_slabs_extra, slab); - arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab); - } if (config_stats && !merged_stats) { /* - * The flush loop didn't happen to flush to this - * thread's arena, so the stats didn't get merged. - * Manually do so now. - */ + * The flush loop didn't happen to flush to this + * thread's arena, so the stats didn't get merged. + * Manually do so now. + */ bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL); malloc_mutex_lock(tsdn, &bin->lock); bin->stats.nflushes++; diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c index cc6c3806..b8d10629 100644 --- a/test/analyze/sizes.c +++ b/test/analyze/sizes.c @@ -34,8 +34,6 @@ main(void) { P(arena_t); P(arena_stats_t); P(base_t); - P(bin_t); - P(bin_with_batch_t); P(decay_t); P(edata_t); P(ecache_t); diff --git a/test/include/test/fork.h b/test/include/test/fork.h deleted file mode 100644 index 9e04d279..00000000 --- a/test/include/test/fork.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef JEMALLOC_TEST_FORK_H -#define JEMALLOC_TEST_FORK_H - -#ifndef _WIN32 - -# include - -static inline void -fork_wait_for_child_exit(int pid) { - int status; - while (true) { - if (waitpid(pid, &status, 0) == -1) { - test_fail("Unexpected waitpid() failure."); - } - if (WIFSIGNALED(status)) { - test_fail( - "Unexpected child termination due to " - "signal %d", - WTERMSIG(status)); - break; - } - if (WIFEXITED(status)) { - if (WEXITSTATUS(status) != 0) { - test_fail("Unexpected child exit value %d", - WEXITSTATUS(status)); - } - break; - } - } -} - -#endif - -#endif /* JEMALLOC_TEST_FORK_H */ diff --git a/test/unit/batcher.c b/test/unit/batcher.c deleted file mode 100644 index 1052ca27..00000000 --- a/test/unit/batcher.c +++ /dev/null @@ -1,243 +0,0 @@ -#include "test/jemalloc_test.h" - -#include "jemalloc/internal/batcher.h" - -TEST_BEGIN(test_simple) { - enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 }; - batcher_t batcher; - size_t data[NELEMS_MAX]; - for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) { - batcher_init(&batcher, nelems); - for (int run = 0; run < NRUNS; run++) { - for (int i = 0; i < NELEMS_MAX; i++) { - data[i] = (size_t)-1; - } - for (size_t i = 0; i < nelems; i++) { - size_t idx = batcher_push_begin( - TSDN_NULL, &batcher, 1); - assert_zu_eq(i, idx, "Wrong index"); - assert_zu_eq((size_t)-1, data[idx], - "Expected uninitialized slot"); - data[idx] = DATA_BASE_VAL + i; - batcher_push_end(TSDN_NULL, &batcher); - } - if (nelems > 0) { - size_t idx = batcher_push_begin( - TSDN_NULL, &batcher, 1); - assert_zu_eq(BATCHER_NO_IDX, idx, - "Shouldn't be able to push into a full " - "batcher"); - } - - size_t npop = batcher_pop_begin(TSDN_NULL, &batcher); - if (nelems == 0) { - assert_zu_eq(npop, BATCHER_NO_IDX, - "Shouldn't get any items out of an empty " - "batcher"); - } else { - assert_zu_eq(npop, nelems, - "Wrong number of elements popped"); - } - for (size_t i = 0; i < nelems; i++) { - assert_zu_eq(data[i], DATA_BASE_VAL + i, - "Item popped out of order!"); - } - if (nelems != 0) { - batcher_pop_end(TSDN_NULL, &batcher); - } - } - } -} -TEST_END - -TEST_BEGIN(test_multi_push) { - size_t idx, nelems; - batcher_t batcher; - batcher_init(&batcher, 11); - /* Push two at a time, 5 times, for 10 total. */ - for (int i = 0; i < 5; i++) { - idx = batcher_push_begin(TSDN_NULL, &batcher, 2); - assert_zu_eq(2 * i, idx, "Should push in order"); - batcher_push_end(TSDN_NULL, &batcher); - } - /* Pushing two more should fail -- would put us at 12 elems. */ - idx = batcher_push_begin(TSDN_NULL, &batcher, 2); - assert_zu_eq(BATCHER_NO_IDX, idx, "Should be out of space"); - /* But one more should work */ - idx = batcher_push_begin(TSDN_NULL, &batcher, 1); - assert_zu_eq(10, idx, "Should be out of space"); - batcher_push_end(TSDN_NULL, &batcher); - nelems = batcher_pop_begin(TSDN_NULL, &batcher); - batcher_pop_end(TSDN_NULL, &batcher); - assert_zu_eq(11, nelems, "Should have popped everything"); -} -TEST_END - -enum { - STRESS_TEST_ELEMS = 10, - STRESS_TEST_THREADS = 4, - STRESS_TEST_OPS = 1000 * 1000, - STRESS_TEST_PUSH_TO_POP_RATIO = 5, -}; - -typedef struct stress_test_data_s stress_test_data_t; -struct stress_test_data_s { - batcher_t batcher; - mtx_t pop_mtx; - atomic_u32_t thread_id; - - uint32_t elems_data[STRESS_TEST_ELEMS]; - size_t push_count[STRESS_TEST_ELEMS]; - size_t pop_count[STRESS_TEST_ELEMS]; - atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS]; - atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS]; -}; - -/* - * Note: 0-indexed. If one element is set and you want to find it, you call - * get_nth_set(elems, 0). - */ -static size_t -get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) { - size_t ntrue = 0; - for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) { - if (elems_owned[i]) { - ntrue++; - } - if (ntrue > n) { - return i; - } - } - assert_not_reached( - "Asked for the %zu'th set element when < %zu are " - "set", - n, n); - /* Just to silence a compiler warning. */ - return 0; -} - -static void * -stress_test_thd(void *arg) { - stress_test_data_t *data = arg; - size_t prng = atomic_fetch_add_u32(&data->thread_id, 1, ATOMIC_RELAXED); - - size_t nelems_owned = 0; - bool elems_owned[STRESS_TEST_ELEMS] = {0}; - size_t local_push_count[STRESS_TEST_ELEMS] = {0}; - size_t local_pop_count[STRESS_TEST_ELEMS] = {0}; - - for (int i = 0; i < STRESS_TEST_OPS; i++) { - size_t rnd = prng_range_zu( - &prng, STRESS_TEST_PUSH_TO_POP_RATIO); - if (rnd == 0 || nelems_owned == 0) { - size_t nelems = batcher_pop_begin( - TSDN_NULL, &data->batcher); - if (nelems == BATCHER_NO_IDX) { - continue; - } - for (size_t i = 0; i < nelems; i++) { - uint32_t elem = data->elems_data[i]; - assert_false(elems_owned[elem], - "Shouldn't already own what we just " - "popped"); - elems_owned[elem] = true; - nelems_owned++; - local_pop_count[elem]++; - data->pop_count[elem]++; - } - batcher_pop_end(TSDN_NULL, &data->batcher); - } else { - size_t elem_to_push_idx = prng_range_zu( - &prng, nelems_owned); - size_t elem = get_nth_set( - elems_owned, elem_to_push_idx); - assert_true(elems_owned[elem], - "Should own element we're about to pop"); - elems_owned[elem] = false; - local_push_count[elem]++; - data->push_count[elem]++; - nelems_owned--; - size_t idx = batcher_push_begin( - TSDN_NULL, &data->batcher, 1); - assert_zu_ne(idx, BATCHER_NO_IDX, - "Batcher can't be full -- we have one of its " - "elems!"); - data->elems_data[idx] = (uint32_t)elem; - batcher_push_end(TSDN_NULL, &data->batcher); - } - } - - /* Push all local elems back, flush local counts to the shared ones. */ - size_t push_idx = 0; - if (nelems_owned != 0) { - push_idx = batcher_push_begin( - TSDN_NULL, &data->batcher, nelems_owned); - assert_zu_ne( - BATCHER_NO_IDX, push_idx, "Should be space to push"); - } - for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) { - if (elems_owned[i]) { - data->elems_data[push_idx] = (uint32_t)i; - push_idx++; - local_push_count[i]++; - data->push_count[i]++; - } - atomic_fetch_add_zu(&data->atomic_push_count[i], - local_push_count[i], ATOMIC_RELAXED); - atomic_fetch_add_zu(&data->atomic_pop_count[i], - local_pop_count[i], ATOMIC_RELAXED); - } - if (nelems_owned != 0) { - batcher_push_end(TSDN_NULL, &data->batcher); - } - - return NULL; -} - -TEST_BEGIN(test_stress) { - stress_test_data_t data; - batcher_init(&data.batcher, STRESS_TEST_ELEMS); - bool err = mtx_init(&data.pop_mtx); - assert_false(err, "mtx_init failure"); - atomic_store_u32(&data.thread_id, 0, ATOMIC_RELAXED); - for (int i = 0; i < STRESS_TEST_ELEMS; i++) { - data.push_count[i] = 0; - data.pop_count[i] = 0; - atomic_store_zu(&data.atomic_push_count[i], 0, ATOMIC_RELAXED); - atomic_store_zu(&data.atomic_pop_count[i], 0, ATOMIC_RELAXED); - - size_t idx = batcher_push_begin(TSDN_NULL, &data.batcher, 1); - assert_zu_eq(i, idx, "Should push in order"); - data.elems_data[idx] = i; - batcher_push_end(TSDN_NULL, &data.batcher); - } - - thd_t threads[STRESS_TEST_THREADS]; - for (int i = 0; i < STRESS_TEST_THREADS; i++) { - thd_create(&threads[i], stress_test_thd, &data); - } - for (int i = 0; i < STRESS_TEST_THREADS; i++) { - thd_join(threads[i], NULL); - } - for (int i = 0; i < STRESS_TEST_ELEMS; i++) { - assert_zu_ne( - 0, data.push_count[i], "Should have done something!"); - assert_zu_eq(data.push_count[i], data.pop_count[i], - "every element should be pushed and popped an equal number " - "of times"); - assert_zu_eq(data.push_count[i], - atomic_load_zu(&data.atomic_push_count[i], ATOMIC_RELAXED), - "atomic and non-atomic count should be equal given proper " - "synchronization"); - assert_zu_eq(data.pop_count[i], - atomic_load_zu(&data.atomic_pop_count[i], ATOMIC_RELAXED), - "atomic and non-atomic count should be equal given proper " - "synchronization"); - } -} -TEST_END - -int -main(void) { - return test_no_reentrancy(test_simple, test_multi_push, test_stress); -} diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c deleted file mode 100644 index a422586d..00000000 --- a/test/unit/bin_batching.c +++ /dev/null @@ -1,270 +0,0 @@ -#include "test/jemalloc_test.h" -#include "test/fork.h" - -enum { - STRESS_THREADS = 3, - STRESS_OBJECTS_PER_THREAD = 1000, - STRESS_ALLOC_SZ = PAGE / 2, -}; - -typedef struct stress_thread_data_s stress_thread_data_t; -struct stress_thread_data_s { - unsigned thd_id; - atomic_zu_t *ready_thds; - atomic_zu_t *done_thds; - void **to_dalloc; -}; - -static atomic_zu_t push_failure_count; -static atomic_zu_t pop_attempt_results[2]; -static atomic_zu_t dalloc_zero_slab_count; -static atomic_zu_t dalloc_nonzero_slab_count; -static atomic_zu_t dalloc_nonempty_list_count; - -static bool -should_skip() { - return - /* - * We do batching operations on tcache flush pathways; we can't if - * caching is disabled. - */ - !opt_tcache || - /* We rely on tcache fill/flush operations of the size we use. */ - opt_tcache_max < STRESS_ALLOC_SZ - /* - * Some of the races we want to trigger are fiddly enough that they - * only show up under real concurrency. We add 1 to account for the - * main thread, which also does some work. - */ - || ncpus < STRESS_THREADS + 1; -} - -static void -increment_push_failure(size_t push_idx) { - if (push_idx == BATCHER_NO_IDX) { - atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED); - } else { - assert_zu_lt(push_idx, 4, "Only 4 elems"); - volatile size_t x = 10000; - while (--x) { - /* Spin for a while, to try to provoke a failure. */ - if (x == push_idx) { -#ifdef _WIN32 - SwitchToThread(); -#else - sched_yield(); -#endif - } - } - } -} - -static void -increment_pop_attempt(size_t elems_to_pop) { - bool elems = (elems_to_pop != BATCHER_NO_IDX); - atomic_fetch_add_zu(&pop_attempt_results[elems], 1, ATOMIC_RELAXED); -} - -static void -increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) { - if (slab_dalloc_count > 0) { - atomic_fetch_add_zu( - &dalloc_nonzero_slab_count, 1, ATOMIC_RELAXED); - } else { - atomic_fetch_add_zu(&dalloc_zero_slab_count, 1, ATOMIC_RELAXED); - } - if (!list_empty) { - atomic_fetch_add_zu( - &dalloc_nonempty_list_count, 1, ATOMIC_RELAXED); - } -} - -static void -flush_tcache() { - assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), - "Unexpected mallctl failure"); -} - -static void * -stress_thread(void *arg) { - stress_thread_data_t *data = arg; - uint64_t prng_state = data->thd_id; - atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED); - while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED) - != STRESS_THREADS) { - /* Spin */ - } - for (int i = 0; i < STRESS_OBJECTS_PER_THREAD; i++) { - dallocx(data->to_dalloc[i], 0); - if (prng_range_u64(&prng_state, 3) == 0) { - flush_tcache(); - } - } - flush_tcache(); - atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED); - return NULL; -} - -/* - * Run main_thread_fn in conditions that trigger all the various edge cases and - * subtle race conditions. - */ -static void -stress_run(void (*main_thread_fn)(), int nruns) { - bin_batching_test_ndalloc_slabs_max = 1; - bin_batching_test_after_push_hook = &increment_push_failure; - bin_batching_test_mid_pop_hook = &increment_pop_attempt; - bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count; - - atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED); - atomic_store_zu(&pop_attempt_results[0], 0, ATOMIC_RELAXED); - atomic_store_zu(&pop_attempt_results[1], 0, ATOMIC_RELAXED); - atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED); - atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED); - atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED); - - for (int run = 0; run < nruns; run++) { - thd_t thds[STRESS_THREADS]; - stress_thread_data_t thd_datas[STRESS_THREADS]; - atomic_zu_t ready_thds; - atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED); - atomic_zu_t done_thds; - atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED); - - void *ptrs[STRESS_THREADS][STRESS_OBJECTS_PER_THREAD]; - for (int i = 0; i < STRESS_THREADS; i++) { - thd_datas[i].thd_id = i; - thd_datas[i].ready_thds = &ready_thds; - thd_datas[i].done_thds = &done_thds; - thd_datas[i].to_dalloc = ptrs[i]; - for (int j = 0; j < STRESS_OBJECTS_PER_THREAD; j++) { - void *ptr = mallocx(STRESS_ALLOC_SZ, 0); - assert_ptr_not_null(ptr, "alloc failure"); - ptrs[i][j] = ptr; - } - } - for (int i = 0; i < STRESS_THREADS; i++) { - thd_create(&thds[i], stress_thread, &thd_datas[i]); - } - while (atomic_load_zu(&done_thds, ATOMIC_RELAXED) - != STRESS_THREADS) { - main_thread_fn(); - } - for (int i = 0; i < STRESS_THREADS; i++) { - thd_join(thds[i], NULL); - } - } - - bin_batching_test_ndalloc_slabs_max = (unsigned)-1; - bin_batching_test_after_push_hook = NULL; - bin_batching_test_mid_pop_hook = NULL; - bin_batching_test_after_unlock_hook = NULL; -} - -static void -do_allocs_frees() { - enum { NALLOCS = 32 }; - flush_tcache(); - void *ptrs[NALLOCS]; - for (int i = 0; i < NALLOCS; i++) { - ptrs[i] = mallocx(STRESS_ALLOC_SZ, 0); - } - for (int i = 0; i < NALLOCS; i++) { - dallocx(ptrs[i], 0); - } - flush_tcache(); -} - -static void -test_arena_reset_main_fn() { - do_allocs_frees(); -} - -TEST_BEGIN(test_arena_reset) { - int err; - unsigned arena; - unsigned old_arena; - - test_skip_if(should_skip()); - test_skip_if(opt_percpu_arena != percpu_arena_disabled); - - size_t arena_sz = sizeof(arena); - err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0); - assert_d_eq(0, err, "Arena creation failed"); - - err = mallctl("thread.arena", &old_arena, &arena_sz, &arena, arena_sz); - assert_d_eq(0, err, "changing arena failed"); - - stress_run(&test_arena_reset_main_fn, /* nruns */ 10); - - flush_tcache(); - - char buf[100]; - malloc_snprintf(buf, sizeof(buf), "arena.%u.reset", arena); - err = mallctl(buf, NULL, NULL, NULL, 0); - assert_d_eq(0, err, "Couldn't change arena"); - - do_allocs_frees(); - - err = mallctl("thread.arena", NULL, NULL, &old_arena, arena_sz); - assert_d_eq(0, err, "changing arena failed"); -} -TEST_END - -static void -test_fork_main_fn() { -#ifndef _WIN32 - pid_t pid = fork(); - if (pid == -1) { - test_fail("Fork failure!"); - } else if (pid == 0) { - /* Child */ - do_allocs_frees(); - _exit(0); - } else { - fork_wait_for_child_exit(pid); - do_allocs_frees(); - } -#endif -} - -TEST_BEGIN(test_fork) { -#ifdef _WIN32 - test_skip("No fork on windows"); -#endif - test_skip_if(should_skip()); - stress_run(&test_fork_main_fn, /* nruns */ 10); -} -TEST_END - -static void -test_races_main_fn() { - do_allocs_frees(); -} - -TEST_BEGIN(test_races) { - test_skip_if(should_skip()); - - stress_run(&test_races_main_fn, /* nruns */ 400); - - assert_zu_lt(0, atomic_load_zu(&push_failure_count, ATOMIC_RELAXED), - "Should have seen some push failures"); - assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[0], ATOMIC_RELAXED), - "Should have seen some pop failures"); - assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[1], ATOMIC_RELAXED), - "Should have seen some pop successes"); - assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED), - "Expected some frees that didn't empty a slab"); - assert_zu_lt(0, - atomic_load_zu(&dalloc_nonzero_slab_count, ATOMIC_RELAXED), - "expected some frees that emptied a slab"); - assert_zu_lt(0, - atomic_load_zu(&dalloc_nonempty_list_count, ATOMIC_RELAXED), - "expected some frees that used the empty list"); -} -TEST_END - -int -main(void) { - return test_no_reentrancy(test_arena_reset, test_races, test_fork); -} diff --git a/test/unit/bin_batching.sh b/test/unit/bin_batching.sh deleted file mode 100644 index fef9bdc6..00000000 --- a/test/unit/bin_batching.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -# This value of max_batched_size effectively requires all bins to be batched; -# our page limits are fuzzy, but we bound slab item counts to 2**32, so we'd be -# at multi-gigabyte minimum page sizes. -# The reason for this sort of hacky approach is that we want to -# allocate/deallocate PAGE/2-sized objects (to trigger the "non-empty" -> -# "empty" and "non-empty"-> "full" transitions often, which have special -# handling). But the value of PAGE isn't easily available in test scripts. -export MALLOC_CONF="narenas:2,bin_shards:1-1000000000:3,max_batched_size:1000000000,remote_free_max_batch:1,remote_free_max:4" diff --git a/test/unit/fork.c b/test/unit/fork.c index e52d0a6c..60675b77 100644 --- a/test/unit/fork.c +++ b/test/unit/fork.c @@ -1,5 +1,34 @@ #include "test/jemalloc_test.h" -#include "test/fork.h" + +#ifndef _WIN32 +# include +#endif + +#ifndef _WIN32 +static void +wait_for_child_exit(int pid) { + int status; + while (true) { + if (waitpid(pid, &status, 0) == -1) { + test_fail("Unexpected waitpid() failure."); + } + if (WIFSIGNALED(status)) { + test_fail( + "Unexpected child termination due to " + "signal %d", + WTERMSIG(status)); + break; + } + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) { + test_fail("Unexpected child exit value %d", + WEXITSTATUS(status)); + } + break; + } + } +} +#endif TEST_BEGIN(test_fork) { #ifndef _WIN32 @@ -37,7 +66,7 @@ TEST_BEGIN(test_fork) { /* Child. */ _exit(0); } else { - fork_wait_for_child_exit(pid); + wait_for_child_exit(pid); } #else test_skip("fork(2) is irrelevant to Windows"); @@ -60,7 +89,7 @@ do_fork_thd(void *arg) { test_fail("Exec failed"); } else { /* Parent */ - fork_wait_for_child_exit(pid); + wait_for_child_exit(pid); } return NULL; } @@ -97,7 +126,7 @@ TEST_BEGIN(test_fork_multithreaded) { do_test_fork_multithreaded(); _exit(0); } else { - fork_wait_for_child_exit(pid); + wait_for_child_exit(pid); } } #else