diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index 39d2099d..cf191aeb 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -61,13 +61,13 @@ bool arena_decay_ms_set( ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state); void arena_decay( tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all); -uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena); -void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena); -void arena_reset(tsd_t *tsd, arena_t *arena); -void arena_destroy(tsd_t *tsd, arena_t *arena); -void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, - cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min, - const cache_bin_sz_t nfill_max); +uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena); +void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena); +void arena_reset(tsd_t *tsd, arena_t *arena); +void arena_destroy(tsd_t *tsd, arena_t *arena); +cache_bin_sz_t arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, + szind_t binind, cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min, + const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats); void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, bool slab); @@ -84,6 +84,9 @@ void arena_dalloc_bin_locked_handle_newly_empty( void arena_dalloc_bin_locked_handle_newly_nonempty( tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin); void arena_dalloc_small(tsdn_t *tsdn, void *ptr); +void arena_ptr_array_flush(tsd_t *tsd, szind_t binind, + cache_bin_ptr_array_t *arr, unsigned nflush, bool small, + arena_t *stats_arena, cache_bin_stats_t merge_stats); bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero, size_t *newsize); void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h index 7ed2b968..c586164f 100644 --- a/include/jemalloc/internal/arena_types.h +++ b/include/jemalloc/internal/arena_types.h @@ -12,8 +12,7 @@ /* Maximum length of the arena name. */ #define ARENA_NAME_LEN 32 -typedef struct arena_decay_s arena_decay_t; -typedef struct arena_s arena_t; +typedef struct arena_s arena_t; typedef enum { percpu_arena_mode_names_base = 0, /* Used for options processing. */ diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 08ee0d6a..bea3a2fc 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -690,6 +690,10 @@ cache_bin_finish_fill( nfilled * sizeof(void *)); } bin->stack_head = empty_position - nfilled; + /* Reset the bin stats as it's merged during fill. */ + if (config_stats) { + bin->tstats.nrequests = 0; + } } /* @@ -711,6 +715,10 @@ cache_bin_finish_flush( bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *)); bin->stack_head += nflushed; cache_bin_low_water_adjust(bin); + /* Reset the bin stats as it's merged during flush. */ + if (config_stats) { + bin->tstats.nrequests = 0; + } } static inline void @@ -731,6 +739,10 @@ cache_bin_finish_flush_stashed(cache_bin_t *bin) { /* Reset the bin local full position. */ bin->low_bits_full = (uint16_t)(uintptr_t)low_bound; assert(cache_bin_nstashed_get_local(bin) == 0); + /* Reset the bin stats as it's merged during flush. */ + if (config_stats) { + bin->tstats.nrequests = 0; + } } /* diff --git a/src/arena.c b/src/arena.c index 224a9b63..664ed6a3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -693,7 +693,7 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) { malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); } for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL; - slab = edata_list_active_first(&bin->slabs_full)) { + slab = edata_list_active_first(&bin->slabs_full)) { arena_bin_slabs_full_remove(arena, bin, slab); malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); @@ -799,7 +799,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx); for (edata_t *edata = edata_list_active_first(&arena->large); - edata != NULL; edata = edata_list_active_first(&arena->large)) { + edata != NULL; edata = edata_list_active_first(&arena->large)) { void *ptr = edata_base_get(edata); size_t usize; @@ -1052,18 +1052,13 @@ arena_bin_choose( return arena_get_bin(arena, binind, binshard); } -void -arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin, - szind_t binind, const cache_bin_sz_t nfill_min, - const cache_bin_sz_t nfill_max) { - assert(cache_bin_ncached_get_local(cache_bin) == 0); +cache_bin_sz_t +arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, + cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min, + const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats) { assert(nfill_min > 0 && nfill_min <= nfill_max); - assert(nfill_max <= cache_bin_ncached_max_get(cache_bin)); const bin_info_t *bin_info = &bin_infos[binind]; - - CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max); - cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max); /* * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull * slabs. After both are exhausted, new slabs will be allocated through @@ -1115,7 +1110,7 @@ label_refill: } arena_slab_reg_alloc_batch( - slabcur, bin_info, cnt, &ptrs.ptr[filled]); + slabcur, bin_info, cnt, &arr->ptr[filled]); made_progress = true; filled += cnt; continue; @@ -1153,10 +1148,9 @@ label_refill: if (config_stats && !alloc_and_retry) { bin->stats.nmalloc += filled; - bin->stats.nrequests += cache_bin->tstats.nrequests; + bin->stats.nrequests += merge_stats.nrequests; bin->stats.curregs += filled; bin->stats.nfills++; - cache_bin->tstats.nrequests = 0; } malloc_mutex_unlock(tsdn, &bin->lock); @@ -1184,8 +1178,8 @@ label_refill: fresh_slab = NULL; } - cache_bin_finish_fill(cache_bin, &ptrs, filled); arena_decay_tick(tsdn, arena); + return filled; } size_t @@ -1472,6 +1466,357 @@ arena_dalloc_small(tsdn_t *tsdn, void *ptr) { arena_decay_tick(tsdn, arena); } +static const void * +arena_ptr_array_flush_ptr_getter(void *arr_ctx, size_t ind) { + cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx; + return arr->ptr[ind]; +} + +static void +arena_ptr_array_flush_metadata_visitor( + void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) { + size_t *szind_sum = (size_t *)szind_sum_ctx; + *szind_sum -= alloc_ctx->szind; + util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t)); +} + +JEMALLOC_NOINLINE static void +arena_ptr_array_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind, + size_t nptrs, emap_batch_lookup_result_t *edatas) { + bool found_mismatch = false; + for (size_t i = 0; i < nptrs; i++) { + szind_t true_szind = edata_szind_get(edatas[i].edata); + if (true_szind != szind) { + found_mismatch = true; + safety_check_fail_sized_dealloc( + /* current_dealloc */ false, + /* ptr */ arena_ptr_array_flush_ptr_getter(arr, i), + /* true_size */ sz_index2size(true_szind), + /* input_size */ sz_index2size(szind)); + } + } + assert(found_mismatch); +} + +JEMALLOC_ALWAYS_INLINE void +arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind, + cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata, + cache_bin_sz_t nflush, arena_t *stats_arena, + cache_bin_stats_t **merge_stats) { + /* + * The slabs where we freed the last remaining object in the slab (and + * so need to free the slab itself). + * Used only if small == true. + */ + unsigned dalloc_count = 0; + VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1); + /* + * We're about to grab a bunch of locks. If one of them happens to be + * the one guarding the arena-level stats counters we flush our + * thread-local ones to, we do so under one critical section. + */ + /* + * We maintain the invariant that all edatas yet to be flushed are + * contained in the half-open range [flush_start, flush_end). We'll + * repeatedly partition the array so that the unflushed items are at the + * end. + */ + unsigned flush_start = 0; + + while (flush_start < nflush) { + /* + * After our partitioning step, all objects to flush will be in + * the half-open range [prev_flush_start, flush_start), and + * flush_start will be updated to correspond to the next loop + * iteration. + */ + unsigned prev_flush_start = flush_start; + + edata_t *cur_edata = item_edata[flush_start].edata; + unsigned cur_arena_ind = edata_arena_ind_get(cur_edata); + arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false); + + unsigned cur_binshard = edata_binshard_get(cur_edata); + bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard); + assert(cur_binshard < bin_infos[binind].n_shards); + /* + * Start off the partition; item_edata[i] always matches itself + * of course. + */ + flush_start++; + for (unsigned i = flush_start; i < nflush; i++) { + void *ptr = arr->ptr[i]; + edata_t *edata = item_edata[i].edata; + assert(ptr != NULL && edata != NULL); + assert( + (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata)); + assert( + (uintptr_t)ptr < (uintptr_t)edata_past_get(edata)); + if (edata_arena_ind_get(edata) == cur_arena_ind + && edata_binshard_get(edata) == cur_binshard) { + /* Swap the edatas. */ + emap_batch_lookup_result_t temp_edata = + item_edata[flush_start]; + item_edata[flush_start] = item_edata[i]; + item_edata[i] = temp_edata; + /* Swap the pointers */ + void *temp_ptr = arr->ptr[flush_start]; + arr->ptr[flush_start] = arr->ptr[i]; + arr->ptr[i] = temp_ptr; + flush_start++; + } + } + /* Make sure we implemented partitioning correctly. */ + if (config_debug) { + for (unsigned i = prev_flush_start; i < flush_start; + i++) { + edata_t *edata = item_edata[i].edata; + unsigned arena_ind = edata_arena_ind_get(edata); + assert(arena_ind == cur_arena_ind); + unsigned binshard = edata_binshard_get(edata); + assert(binshard == cur_binshard); + } + for (unsigned i = flush_start; i < nflush; i++) { + edata_t *edata = item_edata[i].edata; + assert( + edata_arena_ind_get(edata) != cur_arena_ind + || edata_binshard_get(edata) + != cur_binshard); + } + } + + /* Actually do the flushing. */ + malloc_mutex_lock(tsdn, &cur_bin->lock); + + /* + * Flush stats first, if that was the right lock. Note that we + * don't actually have to flush stats into the current thread's + * binshard. Flushing into any binshard in the same arena is + * enough; we don't expose stats on per-binshard basis (just + * per-bin). + */ + if (config_stats && stats_arena == cur_arena + && *merge_stats != NULL) { + cur_bin->stats.nflushes++; + cur_bin->stats.nrequests += (*merge_stats)->nrequests; + *merge_stats = NULL; + } + + /* Next flush objects. */ + /* Init only to avoid used-uninitialized warning. */ + arena_dalloc_bin_locked_info_t dalloc_bin_info = {0}; + arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind); + for (unsigned i = prev_flush_start; i < flush_start; i++) { + void *ptr = arr->ptr[i]; + edata_t *edata = item_edata[i].edata; + if (arena_dalloc_bin_locked_step(tsdn, cur_arena, + cur_bin, &dalloc_bin_info, binind, edata, + ptr)) { + dalloc_slabs[dalloc_count] = edata; + dalloc_count++; + } + } + + arena_dalloc_bin_locked_finish( + tsdn, cur_arena, cur_bin, &dalloc_bin_info); + malloc_mutex_unlock(tsdn, &cur_bin->lock); + + arena_decay_ticks( + tsdn, cur_arena, flush_start - prev_flush_start); + } + + /* Handle all deferred slab dalloc. */ + for (unsigned i = 0; i < dalloc_count; i++) { + edata_t *slab = dalloc_slabs[i]; + arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab); + } + + if (config_stats && *merge_stats != NULL) { + /* + * The flush loop didn't happen to flush to this + * thread's arena, so the stats didn't get merged. + * Manually do so now. + */ + bin_t *bin = arena_bin_choose(tsdn, stats_arena, binind, NULL); + malloc_mutex_lock(tsdn, &bin->lock); + bin->stats.nflushes++; + bin->stats.nrequests += (*merge_stats)->nrequests; + *merge_stats = NULL; + malloc_mutex_unlock(tsdn, &bin->lock); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_ptr_array_flush_impl_large(tsdn_t *tsdn, szind_t binind, + cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata, + cache_bin_sz_t nflush, arena_t *stats_arena, + cache_bin_stats_t **merge_stats) { + /* + * We're about to grab a bunch of locks. If one of them happens to be + * the one guarding the arena-level stats counters we flush our + * thread-local ones to, we do so under one critical section. + */ + while (nflush > 0) { + /* Lock the arena, or bin, associated with the first object. */ + edata_t *edata = item_edata[0].edata; + unsigned cur_arena_ind = edata_arena_ind_get(edata); + arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false); + + if (!arena_is_auto(cur_arena)) { + malloc_mutex_lock(tsdn, &cur_arena->large_mtx); + } + + /* + * If we acquired the right lock and have some stats to flush, + * flush them. + */ + if (config_stats && stats_arena == cur_arena + && *merge_stats != NULL) { + arena_stats_large_flush_nrequests_add(tsdn, + &stats_arena->stats, binind, + (*merge_stats)->nrequests); + *merge_stats = NULL; + } + + /* + * Large allocations need special prep done. Afterwards, we can + * drop the large lock. + */ + for (unsigned i = 0; i < nflush; i++) { + void *ptr = arr->ptr[i]; + edata = item_edata[i].edata; + assert(ptr != NULL && edata != NULL); + + if (edata_arena_ind_get(edata) == cur_arena_ind) { + large_dalloc_prep_locked(tsdn, edata); + } + } + if (!arena_is_auto(cur_arena)) { + malloc_mutex_unlock(tsdn, &cur_arena->large_mtx); + } + + /* Deallocate whatever we can. */ + unsigned ndeferred = 0; + for (unsigned i = 0; i < nflush; i++) { + void *ptr = arr->ptr[i]; + edata = item_edata[i].edata; + assert(ptr != NULL && edata != NULL); + if (edata_arena_ind_get(edata) != cur_arena_ind) { + /* + * The object was allocated either via a + * different arena, or a different bin in this + * arena. Either way, stash the object so that + * it can be handled in a future pass. + */ + arr->ptr[ndeferred] = ptr; + item_edata[ndeferred].edata = edata; + ndeferred++; + continue; + } + if (large_dalloc_safety_checks( + edata, ptr, sz_index2size(binind))) { + /* See the comment in isfree. */ + continue; + } + large_dalloc_finish(tsdn, edata); + } + arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred); + nflush = ndeferred; + } + + if (config_stats && *merge_stats != NULL) { + arena_stats_large_flush_nrequests_add(tsdn, &stats_arena->stats, + binind, (*merge_stats)->nrequests); + *merge_stats = NULL; + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_ptr_array_flush_impl(tsd_t *tsd, szind_t binind, + cache_bin_ptr_array_t *arr, unsigned nflush, bool small, + arena_t *stats_arena, cache_bin_stats_t **merge_stats) { + /* + * A couple lookup calls take tsdn; declare it once for convenience + * instead of calling tsd_tsdn(tsd) all the time. + */ + tsdn_t *tsdn = tsd_tsdn(tsd); + /* + * Variable length array must have > 0 length; the last element is never + * touched (it's just included to satisfy the no-zero-length rule). + */ + VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1); + /* + * This gets compiled away when config_opt_safety_checks is false. + * Checks for sized deallocation bugs, failing early rather than + * corrupting metadata. + */ + size_t szind_sum = binind * nflush; + emap_edata_lookup_batch(tsd, &arena_emap_global, nflush, + &arena_ptr_array_flush_ptr_getter, (void *)arr, + &arena_ptr_array_flush_metadata_visitor, (void *)&szind_sum, + item_edata); + if (config_opt_safety_checks && unlikely(szind_sum != 0)) { + arena_ptr_array_flush_size_check_fail( + arr, binind, nflush, item_edata); + } + + /* + * The small/large flush logic is very similar; you might conclude that + * it's a good opportunity to share code. We've tried this, and by and + * large found this to obscure more than it helps; there are so many + * fiddly bits around things like stats handling, precisely when and + * which mutexes are acquired, etc., that almost all code ends up being + * gated behind 'if (small) { ... } else { ... }'. Even though the + * '...' is morally equivalent, the code itself needs slight tweaks. + */ + if (small) { + return arena_ptr_array_flush_impl_small(tsdn, binind, arr, + item_edata, nflush, stats_arena, merge_stats); + } else { + return arena_ptr_array_flush_impl_large(tsdn, binind, arr, + item_edata, nflush, stats_arena, merge_stats); + } +} + +/* + * In practice, pointers are flushed back to their original allocation arenas, + * so multiple arenas may be involved here. The input stats_arena simply + * indicates where the cache stats should be merged into. + */ +void +arena_ptr_array_flush(tsd_t *tsd, szind_t binind, cache_bin_ptr_array_t *arr, + unsigned nflush, bool small, arena_t *stats_arena, + cache_bin_stats_t merge_stats) { + assert(arr != NULL && arr->ptr != NULL); + /* + * The input cache bin stats represent a snapshot taken when the pointer + * array is set up, and will be merged into the next-level bin stats. + * The original bin stats will be reset by the caller itself. + * This separation ensures that each layer operates independently and + * does not modify another layer's data directly. + */ + cache_bin_stats_t *stats = &merge_stats; + unsigned nflush_batch, nflushed = 0; + cache_bin_ptr_array_t ptrs_batch; + do { + nflush_batch = nflush - nflushed; + if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) { + nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX; + } + assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX); + (&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch; + (&ptrs_batch)->ptr = arr->ptr + nflushed; + arena_ptr_array_flush_impl(tsd, binind, &ptrs_batch, + nflush_batch, small, stats_arena, &stats); + nflushed += nflush_batch; + } while (nflushed < nflush); + assert(nflush == nflushed); + assert((arr->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch)); + if (config_stats) { + assert(stats == NULL); + } +} + bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero, size_t *newsize) { @@ -1890,7 +2235,8 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) { /* Make sure that b0 thp auto-switch won't happen concurrently here. */ malloc_mutex_lock(tsdn, &b0->mtx); (&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp - && metadata_thp_enabled() && (opt_thp == thp_mode_do_nothing) + && metadata_thp_enabled() + && (opt_thp == thp_mode_do_nothing) && (init_system_thp_mode == system_thp_mode_madvise); (&huge_arena_pac_thp)->auto_thp_switched = b0->auto_thp_switched; diff --git a/src/tcache.c b/src/tcache.c index 2d73237b..74ff4718 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -601,15 +601,26 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, assert(tcache_slow->arena != NULL); assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow)); + assert(cache_bin_ncached_get_local(cache_bin) == 0); cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin) >> tcache_nfill_small_lg_div_get(tcache_slow, binind); if (nfill == 0) { nfill = 1; } - arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind, - /* nfill_min */ - opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill, - /* nfill_max */ nfill); + cache_bin_sz_t nfill_min = opt_experimental_tcache_gc + ? ((nfill >> 1) + 1) + : nfill; + cache_bin_sz_t nfill_max = nfill; + CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max); + cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max); + + cache_bin_sz_t filled = arena_ptr_array_fill_small(tsdn, arena, binind, + &ptrs, /* nfill_min */ nfill_min, /* nfill_max */ nfill_max, + cache_bin->tstats); + cache_bin_finish_fill(cache_bin, &ptrs, filled); + assert(filled >= nfill_min && filled <= nfill_max); + assert(cache_bin_ncached_get_local(cache_bin) == filled); + tcache_slow->bin_refilled[binind] = true; tcache_nfill_small_burst_prepare(tcache_slow, binind); ret = cache_bin_alloc(cache_bin, tcache_success); @@ -617,363 +628,6 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, return ret; } -static const void * -tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) { - cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx; - return arr->ptr[ind]; -} - -static void -tcache_bin_flush_metadata_visitor( - void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) { - size_t *szind_sum = (size_t *)szind_sum_ctx; - *szind_sum -= alloc_ctx->szind; - util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t)); -} - -JEMALLOC_NOINLINE static void -tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind, - size_t nptrs, emap_batch_lookup_result_t *edatas) { - bool found_mismatch = false; - for (size_t i = 0; i < nptrs; i++) { - szind_t true_szind = edata_szind_get(edatas[i].edata); - if (true_szind != szind) { - found_mismatch = true; - safety_check_fail_sized_dealloc( - /* current_dealloc */ false, - /* ptr */ tcache_bin_flush_ptr_getter(arr, i), - /* true_size */ sz_index2size(true_szind), - /* input_size */ sz_index2size(szind)); - } - } - assert(found_mismatch); -} - -static void -tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr, - szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) { - /* - * This gets compiled away when config_opt_safety_checks is false. - * Checks for sized deallocation bugs, failing early rather than - * corrupting metadata. - */ - size_t szind_sum = binind * nflush; - emap_edata_lookup_batch(tsd, &arena_emap_global, nflush, - &tcache_bin_flush_ptr_getter, (void *)arr, - &tcache_bin_flush_metadata_visitor, (void *)&szind_sum, edatas); - if (config_opt_safety_checks && unlikely(szind_sum != 0)) { - tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas); - } -} - -JEMALLOC_ALWAYS_INLINE void -tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs, - unsigned nflush) { - tcache_slow_t *tcache_slow = tcache->tcache_slow; - /* - * A couple lookup calls take tsdn; declare it once for convenience - * instead of calling tsd_tsdn(tsd) all the time. - */ - tsdn_t *tsdn = tsd_tsdn(tsd); - - assert(binind < SC_NBINS); - arena_t *tcache_arena = tcache_slow->arena; - assert(tcache_arena != NULL); - - /* - * Variable length array must have > 0 length; the last element is never - * touched (it's just included to satisfy the no-zero-length rule). - */ - VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1); - tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata); - - /* - * The slabs where we freed the last remaining object in the slab (and - * so need to free the slab itself). - * Used only if small == true. - */ - unsigned dalloc_count = 0; - VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1); - - /* - * We're about to grab a bunch of locks. If one of them happens to be - * the one guarding the arena-level stats counters we flush our - * thread-local ones to, we do so under one critical section. - */ - bool merged_stats = false; - /* - * We maintain the invariant that all edatas yet to be flushed are - * contained in the half-open range [flush_start, flush_end). We'll - * repeatedly partition the array so that the unflushed items are at the - * end. - */ - unsigned flush_start = 0; - - while (flush_start < nflush) { - /* - * After our partitioning step, all objects to flush will be in - * the half-open range [prev_flush_start, flush_start), and - * flush_start will be updated to correspond to the next loop - * iteration. - */ - unsigned prev_flush_start = flush_start; - - edata_t *cur_edata = item_edata[flush_start].edata; - unsigned cur_arena_ind = edata_arena_ind_get(cur_edata); - arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false); - - unsigned cur_binshard = edata_binshard_get(cur_edata); - bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard); - assert(cur_binshard < bin_infos[binind].n_shards); - /* - * Start off the partition; item_edata[i] always matches itself - * of course. - */ - flush_start++; - for (unsigned i = flush_start; i < nflush; i++) { - void *ptr = ptrs->ptr[i]; - edata_t *edata = item_edata[i].edata; - assert(ptr != NULL && edata != NULL); - assert( - (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata)); - assert( - (uintptr_t)ptr < (uintptr_t)edata_past_get(edata)); - if (edata_arena_ind_get(edata) == cur_arena_ind - && edata_binshard_get(edata) == cur_binshard) { - /* Swap the edatas. */ - emap_batch_lookup_result_t temp_edata = - item_edata[flush_start]; - item_edata[flush_start] = item_edata[i]; - item_edata[i] = temp_edata; - /* Swap the pointers */ - void *temp_ptr = ptrs->ptr[flush_start]; - ptrs->ptr[flush_start] = ptrs->ptr[i]; - ptrs->ptr[i] = temp_ptr; - flush_start++; - } - } - /* Make sure we implemented partitioning correctly. */ - if (config_debug) { - for (unsigned i = prev_flush_start; i < flush_start; - i++) { - edata_t *edata = item_edata[i].edata; - unsigned arena_ind = edata_arena_ind_get(edata); - assert(arena_ind == cur_arena_ind); - unsigned binshard = edata_binshard_get(edata); - assert(binshard == cur_binshard); - } - for (unsigned i = flush_start; i < nflush; i++) { - edata_t *edata = item_edata[i].edata; - assert( - edata_arena_ind_get(edata) != cur_arena_ind - || edata_binshard_get(edata) - != cur_binshard); - } - } - - /* Actually do the flushing. */ - malloc_mutex_lock(tsdn, &cur_bin->lock); - - /* - * Flush stats first, if that was the right lock. Note that we - * don't actually have to flush stats into the current thread's - * binshard. Flushing into any binshard in the same arena is - * enough; we don't expose stats on per-binshard basis (just - * per-bin). - */ - if (config_stats && tcache_arena == cur_arena - && !merged_stats) { - merged_stats = true; - cur_bin->stats.nflushes++; - cur_bin->stats.nrequests += cache_bin->tstats.nrequests; - cache_bin->tstats.nrequests = 0; - } - - /* Next flush objects. */ - /* Init only to avoid used-uninitialized warning. */ - arena_dalloc_bin_locked_info_t dalloc_bin_info = {0}; - arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind); - for (unsigned i = prev_flush_start; i < flush_start; i++) { - void *ptr = ptrs->ptr[i]; - edata_t *edata = item_edata[i].edata; - if (arena_dalloc_bin_locked_step(tsdn, cur_arena, - cur_bin, &dalloc_bin_info, binind, edata, - ptr)) { - dalloc_slabs[dalloc_count] = edata; - dalloc_count++; - } - } - - arena_dalloc_bin_locked_finish( - tsdn, cur_arena, cur_bin, &dalloc_bin_info); - malloc_mutex_unlock(tsdn, &cur_bin->lock); - - arena_decay_ticks( - tsdn, cur_arena, flush_start - prev_flush_start); - } - - /* Handle all deferred slab dalloc. */ - for (unsigned i = 0; i < dalloc_count; i++) { - edata_t *slab = dalloc_slabs[i]; - arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab); - } - - if (config_stats && !merged_stats) { - /* - * The flush loop didn't happen to flush to this - * thread's arena, so the stats didn't get merged. - * Manually do so now. - */ - bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL); - malloc_mutex_lock(tsdn, &bin->lock); - bin->stats.nflushes++; - bin->stats.nrequests += cache_bin->tstats.nrequests; - cache_bin->tstats.nrequests = 0; - malloc_mutex_unlock(tsdn, &bin->lock); - } -} - -JEMALLOC_ALWAYS_INLINE void -tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, - cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs, - unsigned nflush) { - tcache_slow_t *tcache_slow = tcache->tcache_slow; - /* - * A couple lookup calls take tsdn; declare it once for convenience - * instead of calling tsd_tsdn(tsd) all the time. - */ - tsdn_t *tsdn = tsd_tsdn(tsd); - - assert(binind < tcache_nbins_get(tcache_slow)); - arena_t *tcache_arena = tcache_slow->arena; - assert(tcache_arena != NULL); - - /* - * Variable length array must have > 0 length; the last element is never - * touched (it's just included to satisfy the no-zero-length rule). - */ - VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1); - tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata); - - /* - * We're about to grab a bunch of locks. If one of them happens to be - * the one guarding the arena-level stats counters we flush our - * thread-local ones to, we do so under one critical section. - */ - bool merged_stats = false; - while (nflush > 0) { - /* Lock the arena, or bin, associated with the first object. */ - edata_t *edata = item_edata[0].edata; - unsigned cur_arena_ind = edata_arena_ind_get(edata); - arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false); - - if (!arena_is_auto(cur_arena)) { - malloc_mutex_lock(tsdn, &cur_arena->large_mtx); - } - - /* - * If we acquired the right lock and have some stats to flush, - * flush them. - */ - if (config_stats && tcache_arena == cur_arena - && !merged_stats) { - merged_stats = true; - arena_stats_large_flush_nrequests_add(tsdn, - &tcache_arena->stats, binind, - cache_bin->tstats.nrequests); - cache_bin->tstats.nrequests = 0; - } - - /* - * Large allocations need special prep done. Afterwards, we can - * drop the large lock. - */ - for (unsigned i = 0; i < nflush; i++) { - void *ptr = ptrs->ptr[i]; - edata = item_edata[i].edata; - assert(ptr != NULL && edata != NULL); - - if (edata_arena_ind_get(edata) == cur_arena_ind) { - large_dalloc_prep_locked(tsdn, edata); - } - } - if (!arena_is_auto(cur_arena)) { - malloc_mutex_unlock(tsdn, &cur_arena->large_mtx); - } - - /* Deallocate whatever we can. */ - unsigned ndeferred = 0; - for (unsigned i = 0; i < nflush; i++) { - void *ptr = ptrs->ptr[i]; - edata = item_edata[i].edata; - assert(ptr != NULL && edata != NULL); - if (edata_arena_ind_get(edata) != cur_arena_ind) { - /* - * The object was allocated either via a - * different arena, or a different bin in this - * arena. Either way, stash the object so that - * it can be handled in a future pass. - */ - ptrs->ptr[ndeferred] = ptr; - item_edata[ndeferred].edata = edata; - ndeferred++; - continue; - } - if (large_dalloc_safety_checks( - edata, ptr, sz_index2size(binind))) { - /* See the comment in isfree. */ - continue; - } - large_dalloc_finish(tsdn, edata); - } - arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred); - nflush = ndeferred; - } - - if (config_stats && !merged_stats) { - arena_stats_large_flush_nrequests_add(tsdn, - &tcache_arena->stats, binind, cache_bin->tstats.nrequests); - cache_bin->tstats.nrequests = 0; - } -} - -JEMALLOC_ALWAYS_INLINE void -tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, - szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) { - assert(ptrs != NULL && ptrs->ptr != NULL); - unsigned nflush_batch, nflushed = 0; - cache_bin_ptr_array_t ptrs_batch; - do { - nflush_batch = nflush - nflushed; - if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) { - nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX; - } - assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX); - (&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch; - (&ptrs_batch)->ptr = ptrs->ptr + nflushed; - /* - * The small/large flush logic is very similar; you might conclude that - * it's a good opportunity to share code. We've tried this, and by and - * large found this to obscure more than it helps; there are so many - * fiddly bits around things like stats handling, precisely when and - * which mutexes are acquired, etc., that almost all code ends up being - * gated behind 'if (small) { ... } else { ... }'. Even though the - * '...' is morally equivalent, the code itself needs slight tweaks. - */ - if (small) { - tcache_bin_flush_impl_small(tsd, tcache, cache_bin, - binind, &ptrs_batch, nflush_batch); - } else { - tcache_bin_flush_impl_large(tsd, tcache, cache_bin, - binind, &ptrs_batch, nflush_batch); - } - nflushed += nflush_batch; - } while (nflushed < nflush); - assert(nflush == nflushed); - assert((ptrs->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch)); -} - JEMALLOC_ALWAYS_INLINE void tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind, unsigned rem, bool small) { @@ -1001,8 +655,8 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush); cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush); - tcache_bin_flush_impl( - tsd, tcache, cache_bin, binind, &ptrs, nflush, small); + arena_ptr_array_flush(tsd, binind, &ptrs, nflush, small, + tcache->tcache_slow->arena, cache_bin->tstats); cache_bin_finish_flush(cache_bin, &ptrs, nflush); } @@ -1054,8 +708,8 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, cache_bin_init_ptr_array_for_stashed( cache_bin, binind, &ptrs, nstashed); san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind)); - tcache_bin_flush_impl( - tsd, tcache, cache_bin, binind, &ptrs, nstashed, is_small); + arena_ptr_array_flush(tsd, binind, &ptrs, nstashed, is_small, + tcache->tcache_slow->arena, cache_bin->tstats); cache_bin_finish_flush_stashed(cache_bin); assert(cache_bin_nstashed_get_local(cache_bin) == 0);