From 76d6771770dd59cbaf9b3195918ce130bcdd96ab Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Wed, 19 Nov 2025 14:25:58 -0800 Subject: [PATCH] Add experimental_enforce_hugify --- .../internal/jemalloc_internal_externs.h | 1 + src/ctl.c | 61 +++++++------- src/hpa.c | 10 ++- src/jemalloc.c | 6 +- src/stats.c | 3 +- test/unit/hpa.c | 83 ++++++++++++++++++- test/unit/mallctl.c | 17 ++-- 7 files changed, 133 insertions(+), 48 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index a319dc81..ea739ea8 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -16,6 +16,7 @@ extern bool opt_abort; extern bool opt_abort_conf; extern bool opt_trust_madvise; extern bool opt_experimental_hpa_start_huge_if_thp_always; +extern bool opt_experimental_hpa_enforce_hugify; extern bool opt_confirm_conf; extern bool opt_hpa; extern hpa_shard_opts_t opt_hpa_opts; diff --git a/src/ctl.c b/src/ctl.c index d3443a13..3e65e23f 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -99,6 +99,7 @@ CTL_PROTO(opt_cache_oblivious) CTL_PROTO(opt_debug_double_free_max_scan) CTL_PROTO(opt_trust_madvise) CTL_PROTO(opt_experimental_hpa_start_huge_if_thp_always) +CTL_PROTO(opt_experimental_hpa_enforce_hugify) CTL_PROTO(opt_confirm_conf) CTL_PROTO(opt_hpa) CTL_PROTO(opt_hpa_slab_max_alloc) @@ -467,6 +468,8 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)}, {NAME("trust_madvise"), CTL(opt_trust_madvise)}, {NAME("experimental_hpa_start_huge_if_thp_always"), CTL(opt_experimental_hpa_start_huge_if_thp_always)}, + {NAME("experimental_hpa_enforce_hugify"), + CTL(opt_experimental_hpa_enforce_hugify)}, {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)}, {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)}, {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)}, @@ -1108,30 +1111,30 @@ ctl_arena_stats_sdmerge( } ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats - .decay_dirty.npurge, + .decay_dirty.npurge, &astats->astats.pa_shard_stats.pac_stats.decay_dirty - .npurge); + .npurge); ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats - .decay_dirty.nmadvise, + .decay_dirty.nmadvise, &astats->astats.pa_shard_stats.pac_stats.decay_dirty - .nmadvise); + .nmadvise); ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats - .decay_dirty.purged, + .decay_dirty.purged, &astats->astats.pa_shard_stats.pac_stats.decay_dirty - .purged); + .purged); ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats - .decay_muzzy.npurge, + .decay_muzzy.npurge, &astats->astats.pa_shard_stats.pac_stats.decay_muzzy - .npurge); + .npurge); ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats - .decay_muzzy.nmadvise, + .decay_muzzy.nmadvise, &astats->astats.pa_shard_stats.pac_stats.decay_muzzy - .nmadvise); + .nmadvise); ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats - .decay_muzzy.purged, + .decay_muzzy.purged, &astats->astats.pa_shard_stats.pac_stats.decay_muzzy - .purged); + .purged); #define OP(mtx) \ malloc_mutex_prof_merge( \ @@ -1390,7 +1393,7 @@ ctl_refresh(tsdn_t *tsdn) { background_thread_lock); } else { memset(&ctl_stats->mutex_prof_data - [global_prof_mutex_background_thread], + [global_prof_mutex_background_thread], 0, sizeof(mutex_prof_data_t)); } /* We own ctl mutex already. */ @@ -2136,6 +2139,8 @@ CTL_RO_NL_GEN( CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool) CTL_RO_NL_GEN(opt_experimental_hpa_start_huge_if_thp_always, opt_experimental_hpa_start_huge_if_thp_always, bool) +CTL_RO_NL_GEN(opt_experimental_hpa_enforce_hugify, + opt_experimental_hpa_enforce_hugify, bool) CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool) /* HPA options. */ @@ -3770,35 +3775,29 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail, arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge, - locked_read_u64_unsynchronized( - &arenas_i(mib[2]) - ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge), + locked_read_u64_unsynchronized(&arenas_i(mib[2]) + ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise, - locked_read_u64_unsynchronized( - &arenas_i(mib[2]) - ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise), + locked_read_u64_unsynchronized(&arenas_i(mib[2]) + ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged, - locked_read_u64_unsynchronized( - &arenas_i(mib[2]) - ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged), + locked_read_u64_unsynchronized(&arenas_i(mib[2]) + ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge, - locked_read_u64_unsynchronized( - &arenas_i(mib[2]) - ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge), + locked_read_u64_unsynchronized(&arenas_i(mib[2]) + ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise, - locked_read_u64_unsynchronized( - &arenas_i(mib[2]) - ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise), + locked_read_u64_unsynchronized(&arenas_i(mib[2]) + ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged, - locked_read_u64_unsynchronized( - &arenas_i(mib[2]) - ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged), + locked_read_u64_unsynchronized(&arenas_i(mib[2]) + ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_base, diff --git a/src/hpa.c b/src/hpa.c index 3687e6ea..f6d46b25 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -29,6 +29,7 @@ static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self); const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"}; bool opt_experimental_hpa_start_huge_if_thp_always = true; +bool opt_experimental_hpa_enforce_hugify = false; bool hpa_hugepage_size_exceeds_limit(void) { @@ -430,7 +431,7 @@ hpa_update_purge_hugify_eligibility( /* Assume it is huge without the need to madvise */ hpa_assume_huge(tsdn, shard, ps); } - if (hpa_is_hugify_lazy(shard) + if ((hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify) && hpa_good_hugification_candidate(shard, ps) && !hpdata_huge_get(ps)) { nstime_t now; @@ -538,8 +539,9 @@ hpa_purge_actual_unlocked( static inline bool hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) { - return hpa_is_hugify_lazy(shard) && hpdata_huge_get(ps) - && !hpdata_empty(ps); + return (hpa_is_hugify_lazy(shard) + || opt_experimental_hpa_enforce_hugify) + && hpdata_huge_get(ps) && !hpdata_empty(ps); } /* Prepare purge of one page. Return number of dirty regular pages on it @@ -736,7 +738,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { * what user believes is the truth on the target system, but we won't * update nhugifies stat as system call is not being made. */ - if (hpa_is_hugify_lazy(shard)) { + if (hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify) { malloc_mutex_unlock(tsdn, &shard->mtx); bool err = shard->central->hooks.hugify( hpdata_addr_get(to_hugify), HUGEPAGE, diff --git a/src/jemalloc.c b/src/jemalloc.c index 0f6ff0c3..6844da5a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1305,6 +1305,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], CONF_HANDLE_BOOL( opt_experimental_hpa_start_huge_if_thp_always, "experimental_hpa_start_huge_if_thp_always") + CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify, + "experimental_hpa_enforce_hugify") CONF_HANDLE_BOOL( opt_huge_arena_pac_thp, "huge_arena_pac_thp") if (strncmp("metadata_thp", k, klen) == 0) { @@ -1554,7 +1556,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], if (strncmp("percpu_arena", k, klen) == 0) { bool match = false; for (int m = percpu_arena_mode_names_base; - m < percpu_arena_mode_names_limit; m++) { + m < percpu_arena_mode_names_limit; m++) { if (strncmp(percpu_arena_mode_names[m], v, vlen) == 0) { @@ -1651,7 +1653,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], if (strncmp("hpa_hugify_style", k, klen) == 0) { bool match = false; for (int m = 0; m < hpa_hugify_style_limit; - m++) { + m++) { if (strncmp(hpa_hugify_style_names[m], v, vlen) == 0) { diff --git a/src/stats.c b/src/stats.c index 4e04336e..2ccac6c9 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1115,7 +1115,7 @@ stats_arena_mutexes_print( CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes"); for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes; - i++) { + i++) { const char *name = arena_mutex_names[i]; emitter_json_object_kv_begin(emitter, name); mutex_stats_read_arena( @@ -1605,6 +1605,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_BOOL("cache_oblivious") OPT_WRITE_BOOL("confirm_conf") OPT_WRITE_BOOL("experimental_hpa_start_huge_if_thp_always") + OPT_WRITE_BOOL("experimental_hpa_enforce_hugify") OPT_WRITE_BOOL("retain") OPT_WRITE_CHAR_P("dss") OPT_WRITE_UNSIGNED("narenas") diff --git a/test/unit/hpa.c b/test/unit/hpa.c index 0398e21a..5937601e 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -286,7 +286,7 @@ TEST_BEGIN(test_stress) { size_t ntreenodes = 0; for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL; - contents = mem_tree_next(&tree, contents)) { + contents = mem_tree_next(&tree, contents)) { ntreenodes++; node_check(&tree, contents); } @@ -1441,6 +1441,84 @@ TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) { } TEST_END +TEST_BEGIN(test_experimental_hpa_enforce_hugify) { + test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0) + || !config_stats); + + bool old_opt_value = opt_experimental_hpa_enforce_hugify; + opt_experimental_hpa_enforce_hugify = true; + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; + + /* Use eager so hugify would normally not be made on threshold */ + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.hugify_style = hpa_hugify_style_eager; + opts.deferral_allowed = true; + opts.hugify_delay_ms = 0; + opts.min_purge_interval_ms = 0; + opts.hugification_threshold = 0.9 * HUGEPAGE; + + ndefer_hugify_calls = 0; + ndefer_dehugify_calls = 0; + ndefer_purge_calls = 0; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + bool deferred_work_generated = false; + nstime_init2(&defer_curtime, 100, 0); + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + enum { NALLOCS = HUGEPAGE_PAGES * 95 / 100 }; + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + + ndefer_hugify_calls = 0; + hpa_shard_do_deferred_work(tsdn, shard); + expect_zu_eq(ndefer_hugify_calls, 0, "Page was already huge"); + + ndefer_hugify_calls = 0; + ndefer_dehugify_calls = 0; + ndefer_purge_calls = 0; + + /* Deallocate half to trigger purge */ + for (int i = 0; i < NALLOCS / 2; i++) { + pai_dalloc( + tsdn, &shard->pai, edatas[i], &deferred_work_generated); + } + + hpa_shard_do_deferred_work(tsdn, shard); + /* + * Enforce hugify should have triggered dehugify syscall during purge + * when the page is huge and not empty. + */ + expect_zu_ge(ndefer_dehugify_calls, 1, + "Should have triggered dehugify syscall with eager style"); + + for (int i = 0; i < NALLOCS / 2; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + ndefer_hugify_calls = 0; + hpa_shard_do_deferred_work(tsdn, shard); + expect_zu_eq(ndefer_hugify_calls, 1, ""); + + opt_experimental_hpa_enforce_hugify = old_opt_value; + destroy_test_data(shard); +} +TEST_END + int main(void) { /* @@ -1464,5 +1542,6 @@ main(void) { test_assume_huge_purge_fully, test_eager_with_purge_threshold, test_delay_when_not_allowed_deferral, test_deferred_until_time, test_eager_no_hugify_on_threshold, - test_hpa_hugify_style_none_huge_no_syscall); + test_hpa_hugify_style_none_huge_no_syscall, + test_experimental_hpa_enforce_hugify); } diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 2415fda1..f409f687 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -302,6 +302,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, trust_madvise, always); TEST_MALLCTL_OPT( bool, experimental_hpa_start_huge_if_thp_always, always); + TEST_MALLCTL_OPT(bool, experimental_hpa_enforce_hugify, always); TEST_MALLCTL_OPT(bool, confirm_conf, always); TEST_MALLCTL_OPT(const char *, metadata_thp, always); TEST_MALLCTL_OPT(bool, retain, always); @@ -616,8 +617,8 @@ TEST_BEGIN(test_arena_i_dirty_decay_ms) { 0, "Unexpected mallctl() failure"); for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1; - dirty_decay_ms < 20; - prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) { + dirty_decay_ms < 20; + prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) { ssize_t old_dirty_decay_ms; expect_d_eq(mallctl("arena.0.dirty_decay_ms", @@ -649,8 +650,8 @@ TEST_BEGIN(test_arena_i_muzzy_decay_ms) { 0, "Unexpected mallctl() failure"); for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1; - muzzy_decay_ms < 20; - prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) { + muzzy_decay_ms < 20; + prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) { ssize_t old_muzzy_decay_ms; expect_d_eq(mallctl("arena.0.muzzy_decay_ms", @@ -869,8 +870,8 @@ TEST_BEGIN(test_arenas_dirty_decay_ms) { 0, "Expected mallctl() failure"); for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1; - dirty_decay_ms < 20; - prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) { + dirty_decay_ms < 20; + prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) { ssize_t old_dirty_decay_ms; expect_d_eq(mallctl("arenas.dirty_decay_ms", @@ -902,8 +903,8 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) { 0, "Expected mallctl() failure"); for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1; - muzzy_decay_ms < 20; - prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) { + muzzy_decay_ms < 20; + prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) { ssize_t old_muzzy_decay_ms; expect_d_eq(mallctl("arenas.muzzy_decay_ms",