diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h index dc7725b7..2d9dcbed 100644 --- a/include/jemalloc/internal/hpa.h +++ b/include/jemalloc/internal/hpa.h @@ -51,6 +51,20 @@ struct hpa_shard_nonderived_stats_s { * Guarded by mtx. */ uint64_t ndehugifies; + + /* + * Distribution of max nallocs from hpa_alloc() calls. + * + * Guarded by mtx. + */ + uint64_t alloc_batch_max_nallocs[MAX_SEC_NALLOCS + 1]; + + /* + * Distribution of successful nallocs from hpa_alloc() calls. + * + * Guarded by mtx. + */ + uint64_t alloc_batch_nallocs[MAX_SEC_NALLOCS + 1]; }; /* Completely derived; only used by CTL. */ diff --git a/src/ctl.c b/src/ctl.c index e03dca4f..d9e42d7e 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -308,6 +308,9 @@ CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge) CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge) INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j) +CTL_PROTO(stats_arenas_i_hpa_shard_alloc_batch_j_max_nallocs) +CTL_PROTO(stats_arenas_i_hpa_shard_alloc_batch_j_nallocs) +INDEX_PROTO(stats_arenas_i_hpa_shard_alloc_batch_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_uptime) @@ -776,6 +779,18 @@ static const ctl_named_node_t static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] = {{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}}; +static const ctl_named_node_t stats_arenas_i_hpa_shard_alloc_batch_j_node[] = { + {NAME("max_nallocs"), + CTL(stats_arenas_i_hpa_shard_alloc_batch_j_max_nallocs)}, + {NAME("nallocs"), CTL(stats_arenas_i_hpa_shard_alloc_batch_j_nallocs)}}; + +static const ctl_named_node_t + super_stats_arenas_i_hpa_shard_alloc_batch_j_node[] = { + {NAME(""), CHILD(named, stats_arenas_i_hpa_shard_alloc_batch_j)}}; + +static const ctl_indexed_node_t stats_arenas_i_hpa_shard_alloc_batch_node[] = { + {INDEX(stats_arenas_i_hpa_shard_alloc_batch_j)}}; + static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = { {NAME("npageslabs"), CTL(stats_arenas_i_hpa_shard_npageslabs)}, {NAME("nactive"), CTL(stats_arenas_i_hpa_shard_nactive)}, @@ -789,6 +804,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = { {NAME("nhugify_failures"), CTL(stats_arenas_i_hpa_shard_nhugify_failures)}, {NAME("ndehugifies"), CTL(stats_arenas_i_hpa_shard_ndehugifies)}, + {NAME("alloc_batch"), CHILD(indexed, stats_arenas_i_hpa_shard_alloc_batch)}, + {NAME("full_slabs"), CHILD(named, stats_arenas_i_hpa_shard_full_slabs)}, {NAME("empty_slabs"), CHILD(named, stats_arenas_i_hpa_shard_empty_slabs)}, {NAME("nonfull_slabs"), @@ -4082,6 +4099,15 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugify_failures, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies, arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_alloc_batch_j_max_nallocs, + arenas_i(mib[2]) + ->astats->hpastats.nonderived_stats.alloc_batch_max_nallocs[mib[5]], + uint64_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_alloc_batch_j_nallocs, + arenas_i(mib[2]) + ->astats->hpastats.nonderived_stats.alloc_batch_nallocs[mib[5]], + uint64_t); + /* Full, nonhuge */ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge, @@ -4175,6 +4201,15 @@ stats_arenas_i_hpa_shard_nonfull_slabs_j_index( return super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node; } +static const ctl_named_node_t * +stats_arenas_i_hpa_shard_alloc_batch_j_index( + tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) { + if (j > MAX_SEC_NALLOCS) { + return NULL; + } + return super_stats_arenas_i_hpa_shard_alloc_batch_j_node; +} + static bool ctl_arenas_i_verify(size_t i) { size_t a = arenas_i2a_impl(i, true, true); diff --git a/src/hpa.c b/src/hpa.c index 2dd15362..9ebe02bc 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -109,6 +109,10 @@ hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central, shard->stats.nhugifies = 0; shard->stats.nhugify_failures = 0; shard->stats.ndehugifies = 0; + memset(shard->stats.alloc_batch_max_nallocs, 0, + sizeof(shard->stats.alloc_batch_max_nallocs)); + memset(shard->stats.alloc_batch_nallocs, 0, + sizeof(shard->stats.alloc_batch_nallocs)); /* * Fill these in last, so that if an hpa_shard gets used despite @@ -145,6 +149,11 @@ hpa_shard_nonderived_stats_accum( dst->nhugifies += src->nhugifies; dst->nhugify_failures += src->nhugify_failures; dst->ndehugifies += src->ndehugifies; + for (size_t i = 0; i <= MAX_SEC_NALLOCS; i++) { + dst->alloc_batch_max_nallocs[i] += + src->alloc_batch_max_nallocs[i]; + dst->alloc_batch_nallocs[i] += src->alloc_batch_nallocs[i]; + } } void @@ -751,6 +760,11 @@ hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard, hpa_update_purge_hugify_eligibility(tsdn, shard, ps); psset_update_end(&shard->psset, ps); + assert(max_nallocs <= MAX_SEC_NALLOCS); + shard->stats.alloc_batch_max_nallocs[max_nallocs] += 1; + assert(nsuccess <= MAX_SEC_NALLOCS); + shard->stats.alloc_batch_nallocs[nsuccess] += 1; + hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false); *deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard); return nsuccess; diff --git a/src/stats.c b/src/stats.c index ce79cb20..a9e5b168 100644 --- a/src/stats.c +++ b/src/stats.c @@ -889,9 +889,7 @@ stats_arena_hpa_shard_counters_print( " / sec)\n" " Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n" - " Dehugifies: %" FMTu64 " (%" FMTu64 - " / sec)\n" - "\n", + " Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n", npageslabs, npageslabs_huge, npageslabs_nonhuge, nactive, nactive_huge, nactive_nonhuge, ndirty, ndirty_huge, ndirty_nonhuge, nretained_nonhuge, npurge_passes, @@ -931,6 +929,43 @@ stats_arena_hpa_shard_counters_print( emitter_json_kv( emitter, "ndirty_huge", emitter_type_size, &ndirty_huge); emitter_json_object_end(emitter); /* End "slabs" */ + + /* alloc_batch stats */ + uint64_t alloc_batch_max_nallocs[MAX_SEC_NALLOCS + 1]; + uint64_t alloc_batch_nallocs[MAX_SEC_NALLOCS + 1]; + + size_t alloc_batch_mib[CTL_MAX_DEPTH]; + CTL_LEAF_PREPARE(alloc_batch_mib, 0, "stats.arenas"); + alloc_batch_mib[2] = i; + CTL_LEAF_PREPARE(alloc_batch_mib, 3, "hpa_shard.alloc_batch"); + + for (size_t j = 0; j <= MAX_SEC_NALLOCS; j += 1) { + alloc_batch_mib[5] = j; + CTL_LEAF(alloc_batch_mib, 6, "max_nallocs", + &alloc_batch_max_nallocs[j], uint64_t); + CTL_LEAF(alloc_batch_mib, 6, "nallocs", &alloc_batch_nallocs[j], + uint64_t); + } + + emitter_table_printf(emitter, " Alloc batch distribution:\n"); + emitter_table_printf(emitter, " %4s %20s %20s\n", "", "max", "actual"); + for (size_t j = 0; j <= MAX_SEC_NALLOCS; j += 1) { + emitter_table_printf(emitter, + " %4zu %20" FMTu64 " %20" FMTu64 "\n", j, + alloc_batch_max_nallocs[j], alloc_batch_nallocs[j]); + } + emitter_table_printf(emitter, "\n"); + + emitter_json_array_kv_begin(emitter, "alloc_batch_distribution"); + for (size_t j = 0; j <= MAX_SEC_NALLOCS; j += 1) { + emitter_json_object_begin(emitter); + emitter_json_kv(emitter, "max", emitter_type_uint64, + &alloc_batch_max_nallocs[j]); + emitter_json_kv(emitter, "actual", emitter_type_uint64, + &alloc_batch_nallocs[j]); + emitter_json_object_end(emitter); + } + emitter_json_array_end(emitter); /* End "alloc_batch" */ } static void diff --git a/test/unit/hpa_sec_integration.c b/test/unit/hpa_sec_integration.c index ea23e750..bb9fcc14 100644 --- a/test/unit/hpa_sec_integration.c +++ b/test/unit/hpa_sec_integration.c @@ -178,6 +178,9 @@ TEST_BEGIN(test_hpa_sec) { expect_zu_eq(hpa_stats.psset_stats.merged.nactive, target_nallocs, ""); expect_zu_eq(hpa_stats.secstats.bytes, (target_nallocs - 1) * PAGE, "sec should have extra pages"); + expect_zu_eq( + hpa_stats.nonderived_stats.alloc_batch_nallocs[target_nallocs], 1, + ""); /* Alloc/dealloc NALLOCS times and confirm extents are in sec. */ edata_t *edatas[NALLOCS]; @@ -193,6 +196,11 @@ TEST_BEGIN(test_hpa_sec) { hpa_stats.psset_stats.merged.nactive, expected_nactive, ""); expect_zu_eq(hpa_stats.secstats.bytes, (target_nallocs - 1) * PAGE, "multiple refills (every target_nallocs allocations)"); + const size_t expected_nsuccesses = (NALLOCS + 1 + target_nallocs - 1) + / target_nallocs; + expect_zu_eq( + hpa_stats.nonderived_stats.alloc_batch_nallocs[target_nallocs], + expected_nsuccesses, ""); for (int i = 0; i < NALLOCS - 1; i++) { pai_dalloc(