diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index 7f5f6bb0..ea246cc5 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -630,6 +630,8 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin, &batched_bin->remote_frees.mtx); } + size_t npushes = batcher_pop_get_pushes(tsdn, + &batched_bin->remote_frees); bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX]; for (size_t i = 0; i < nelems_to_pop; i++) { remote_free_data[i] = batched_bin->remote_free_data[i]; @@ -642,6 +644,10 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin, dalloc_slabs, ndalloc_slabs, dalloc_count, dalloc_slabs_extra); } + + bin->stats.batch_pops++; + bin->stats.batch_pushes += npushes; + bin->stats.batch_pushed_elems += nelems_to_pop; } typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t; diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h index a435f0b7..40c8b35f 100644 --- a/include/jemalloc/internal/batcher.h +++ b/include/jemalloc/internal/batcher.h @@ -16,6 +16,7 @@ struct batcher_s { */ atomic_zu_t nelems; size_t nelems_max; + size_t npushes; malloc_mutex_t mtx; }; @@ -35,6 +36,7 @@ void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher); * If the former, must be followed by a call to batcher_pop_end. */ size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher); +size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher); void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher); void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher); diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h index 5b776c17..c49afea6 100644 --- a/include/jemalloc/internal/bin.h +++ b/include/jemalloc/internal/bin.h @@ -138,6 +138,11 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) { stats->reslabs += bin->stats.reslabs; stats->curslabs += bin->stats.curslabs; stats->nonfull_slabs += bin->stats.nonfull_slabs; + + stats->batch_failed_pushes += bin->stats.batch_failed_pushes; + stats->batch_pushes += bin->stats.batch_pushes; + stats->batch_pushed_elems += bin->stats.batch_pushed_elems; + malloc_mutex_unlock(tsdn, &bin->lock); } diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h index f95b9e9c..334c166d 100644 --- a/include/jemalloc/internal/bin_stats.h +++ b/include/jemalloc/internal/bin_stats.h @@ -48,6 +48,11 @@ struct bin_stats_s { /* Current size of nonfull slabs heap in this bin. */ size_t nonfull_slabs; + + uint64_t batch_pops; + uint64_t batch_failed_pushes; + uint64_t batch_pushes; + uint64_t batch_pushed_elems; }; typedef struct bin_stats_data_s bin_stats_data_t; diff --git a/src/batcher.c b/src/batcher.c index 19af7d83..2570b3a9 100644 --- a/src/batcher.c +++ b/src/batcher.c @@ -9,6 +9,7 @@ void batcher_init(batcher_t *batcher, size_t nelems_max) { atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED); batcher->nelems_max = nelems_max; + batcher->npushes = 0; malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER, malloc_mutex_rank_exclusive); } @@ -37,9 +38,18 @@ size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher, * acquire a mutex only to discover that there's no space for them. */ atomic_store_zu(&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED); + batcher->npushes++; return nelems; } +size_t +batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher) { + malloc_mutex_assert_owner(tsdn, &batcher->mtx); + size_t npushes = batcher->npushes; + batcher->npushes = 0; + return npushes; +} + void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) { malloc_mutex_assert_owner(tsdn, &batcher->mtx); diff --git a/src/ctl.c b/src/ctl.c index ab40050d..09188dd9 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -239,6 +239,10 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs) CTL_PROTO(stats_arenas_i_bins_j_nreslabs) CTL_PROTO(stats_arenas_i_bins_j_curslabs) CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs) +CTL_PROTO(stats_arenas_i_bins_j_batch_pops) +CTL_PROTO(stats_arenas_i_bins_j_batch_failed_pushes) +CTL_PROTO(stats_arenas_i_bins_j_batch_pushes) +CTL_PROTO(stats_arenas_i_bins_j_batch_pushed_elems) INDEX_PROTO(stats_arenas_i_bins_j) CTL_PROTO(stats_arenas_i_lextents_j_nmalloc) CTL_PROTO(stats_arenas_i_lextents_j_ndalloc) @@ -694,6 +698,14 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)}, {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)}, {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)}, + {NAME("batch_pops"), + CTL(stats_arenas_i_bins_j_batch_pops)}, + {NAME("batch_failed_pushes"), + CTL(stats_arenas_i_bins_j_batch_failed_pushes)}, + {NAME("batch_pushes"), + CTL(stats_arenas_i_bins_j_batch_pushes)}, + {NAME("batch_pushed_elems"), + CTL(stats_arenas_i_bins_j_batch_pushed_elems)}, {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)} }; @@ -1246,6 +1258,16 @@ MUTEX_PROF_ARENA_MUTEXES assert(bstats->curslabs == 0); assert(bstats->nonfull_slabs == 0); } + + merged->batch_pops + += bstats->batch_pops; + merged->batch_failed_pushes + += bstats->batch_failed_pushes; + merged->batch_pushes + += bstats->batch_pushes; + merged->batch_pushed_elems + += bstats->batch_pushed_elems; + malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data, &astats->bstats[i].mutex_data); } @@ -3957,6 +3979,14 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs, arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs, arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops, + arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes, + arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes, + arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems, + arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems, uint64_t) static const ctl_named_node_t * stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, diff --git a/src/stats.c b/src/stats.c index f057e722..8419158a 100644 --- a/src/stats.c +++ b/src/stats.c @@ -358,6 +358,15 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, COL_HDR(row, nreslabs, NULL, right, 13, uint64) COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, pops, NULL, right, 10, uint64) + COL_HDR(row, pops_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, failed_push, NULL, right, 13, uint64) + COL_HDR(row, failed_push_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, push, NULL, right, 7, uint64) + COL_HDR(row, push_ps, "(#/sec)", right, 8, uint64) + COL_HDR(row, push_elem, NULL, right, 12, uint64) + COL_HDR(row, push_elem_ps, "(#/sec)", right, 8, uint64) + /* Don't want to actually print the name. */ header_justify_spacer.str_val = " "; col_justify_spacer.str_val = " "; @@ -405,6 +414,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint32_t nregs, nshards; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t nreslabs; + uint64_t batch_pops, batch_failed_pushes, batch_pushes, + batch_pushed_elems; prof_stats_t prof_live; prof_stats_t prof_accum; @@ -453,6 +464,15 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs, size_t); + CTL_LEAF(stats_arenas_mib, 5, "batch_pops", &batch_pops, + uint64_t); + CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes", + &batch_failed_pushes, uint64_t); + CTL_LEAF(stats_arenas_mib, 5, "batch_pushes", + &batch_pushes, uint64_t); + CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems", + &batch_pushed_elems, uint64_t); + if (mutex) { mutex_stats_read_arena_bin(stats_arenas_mib, 5, col_mutex64, col_mutex32, uptime); @@ -487,6 +507,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, &curslabs); emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size, &nonfull_slabs); + emitter_json_kv(emitter, "batch_pops", + emitter_type_uint64, &batch_pops); + emitter_json_kv(emitter, "batch_failed_pushes", + emitter_type_uint64, &batch_failed_pushes); + emitter_json_kv(emitter, "batch_pushes", + emitter_type_uint64, &batch_pushes); + emitter_json_kv(emitter, "batch_pushed_elems", + emitter_type_uint64, &batch_pushed_elems); if (mutex) { emitter_json_object_kv_begin(emitter, "mutex"); mutex_stats_emit(emitter, NULL, col_mutex64, @@ -545,6 +573,21 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, col_nreslabs.uint64_val = nreslabs; col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime); + col_pops.uint64_val = batch_pops; + col_pops_ps.uint64_val + = rate_per_second(batch_pops, uptime); + + col_failed_push.uint64_val = batch_failed_pushes; + col_failed_push_ps.uint64_val + = rate_per_second(batch_failed_pushes, uptime); + col_push.uint64_val = batch_pushes; + col_push_ps.uint64_val + = rate_per_second(batch_pushes, uptime); + + col_push_elem.uint64_val = batch_pushed_elems; + col_push_elem_ps.uint64_val + = rate_per_second(batch_pushed_elems, uptime); + /* * Note that mutex columns were initialized above, if mutex == * true. diff --git a/src/tcache.c b/src/tcache.c index 564b5d9c..03ec5136 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -482,6 +482,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin */ bool locked = false; bool batched = false; + bool batch_failed = false; if (can_batch) { locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock); } @@ -508,12 +509,24 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin } batcher_push_end(tsdn, &batched_bin->remote_frees); + } else { + batch_failed = true; } } if (!batched) { if (!locked) { malloc_mutex_lock(tsdn, &cur_bin->lock); } + /* + * Unlike other stats (which only ever get flushed into + * a tcache's associated arena), batch_failed counts get + * accumulated into the bin where the push attempt + * failed. + */ + if (config_stats && batch_failed) { + cur_bin->stats.batch_failed_pushes++; + } + /* * Flush stats first, if that was the right lock. Note * that we don't actually have to flush stats into the