From f6fc71f79ce98d360c46dd9a30bda77546a8903f Mon Sep 17 00:00:00 2001 From: Slobodan Predolac Date: Tue, 30 Sep 2025 08:36:19 -0700 Subject: [PATCH] [EASY] Encapsulate better, do not pass hpa_shard when hooks are enough, move shard independent actions to hpa_utils --- Makefile.in | 1 + include/jemalloc/internal/hpa_utils.h | 74 ++++++++++++---- .../projects/vc2015/jemalloc/jemalloc.vcxproj | 3 +- .../vc2015/jemalloc/jemalloc.vcxproj.filters | 6 ++ .../projects/vc2017/jemalloc/jemalloc.vcxproj | 3 +- .../vc2017/jemalloc/jemalloc.vcxproj.filters | 6 ++ .../projects/vc2019/jemalloc/jemalloc.vcxproj | 3 +- .../vc2019/jemalloc/jemalloc.vcxproj.filters | 6 ++ .../projects/vc2022/jemalloc/jemalloc.vcxproj | 3 +- .../vc2022/jemalloc/jemalloc.vcxproj.filters | 6 ++ src/hpa.c | 87 +------------------ src/hpa_utils.c | 33 +++++++ test/unit/hpa_vectorized_madvise.c | 71 +-------------- .../unit/hpa_vectorized_madvise_large_batch.c | 71 ++++++++++++++- 14 files changed, 198 insertions(+), 175 deletions(-) create mode 100644 src/hpa_utils.c diff --git a/Makefile.in b/Makefile.in index c63e6f8f..4dd4ce85 100644 --- a/Makefile.in +++ b/Makefile.in @@ -124,6 +124,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/hook.c \ $(srcroot)src/hpa.c \ $(srcroot)src/hpa_hooks.c \ + $(srcroot)src/hpa_utils.c \ $(srcroot)src/hpdata.c \ $(srcroot)src/inspect.c \ $(srcroot)src/large.c \ diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h index 53bcb670..6b006cff 100644 --- a/include/jemalloc/internal/hpa_utils.h +++ b/include/jemalloc/internal/hpa_utils.h @@ -2,8 +2,20 @@ #define JEMALLOC_INTERNAL_HPA_UTILS_H #include "jemalloc/internal/hpa.h" +#include "jemalloc/internal/extent.h" #define HPA_MIN_VAR_VEC_SIZE 8 +/* + * This is used for jemalloc internal tuning and may change in the future based + * on production traffic. + * + * This value protects two things: + * 1. Stack size + * 2. Number of huge pages that are being purged in a batch as we do not + * allow allocations while making madvise syscall. + */ +#define HPA_PURGE_BATCH_MAX 16 + #ifdef JEMALLOC_HAVE_PROCESS_MADVISE typedef struct iovec hpa_io_vector_t; #else @@ -13,27 +25,35 @@ typedef struct { } hpa_io_vector_t; #endif +static inline size_t +hpa_process_madvise_max_iovec_len(void) { + assert( + opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT); + return opt_process_madvise_max_batch == 0 + ? HPA_MIN_VAR_VEC_SIZE + : opt_process_madvise_max_batch; +} + /* Actually invoke hooks. If we fail vectorized, use single purges */ static void hpa_try_vectorized_purge( - hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) { + hpa_hooks_t *hooks, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) { bool success = opt_process_madvise_max_batch > 0 - && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes); + && !hooks->vectorized_purge(vec, vlen, nbytes); if (!success) { /* On failure, it is safe to purge again (potential perf - * penalty) If kernel can tell exactly which regions - * failed, we could avoid that penalty. - */ + * penalty) If kernel can tell exactly which regions + * failed, we could avoid that penalty. + */ for (size_t i = 0; i < vlen; ++i) { - shard->central->hooks.purge( - vec[i].iov_base, vec[i].iov_len); + hooks->purge(vec[i].iov_base, vec[i].iov_len); } } } /* - * This struct accumulates the regions for process_madvise. - * It invokes the hook when batch limit is reached + * This structure accumulates the regions for process_madvise. It invokes the + * hook when batch limit is reached. */ typedef struct { hpa_io_vector_t *vp; @@ -51,16 +71,16 @@ hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) { } static inline void -hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) { +hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_hooks_t *hooks) { assert(ra->total_bytes > 0 && ra->cur > 0); - hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes); + hpa_try_vectorized_purge(hooks, ra->vp, ra->cur, ra->total_bytes); ra->cur = 0; ra->total_bytes = 0; } static inline void hpa_range_accum_add( - hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) { + hpa_range_accum_t *ra, void *addr, size_t sz, hpa_hooks_t *hooks) { assert(ra->cur < ra->capacity); ra->vp[ra->cur].iov_base = addr; @@ -69,14 +89,14 @@ hpa_range_accum_add( ra->cur++; if (ra->cur == ra->capacity) { - hpa_range_accum_flush(ra, shard); + hpa_range_accum_flush(ra, hooks); } } static inline void -hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) { +hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_hooks_t *hooks) { if (ra->cur > 0) { - hpa_range_accum_flush(ra, shard); + hpa_range_accum_flush(ra, hooks); } } @@ -114,4 +134,28 @@ struct hpa_purge_batch_s { size_t npurged_hp_total; }; +static inline bool +hpa_batch_full(hpa_purge_batch_t *b) { + /* It's okay for ranges to go above */ + return b->npurged_hp_total == b->max_hp + || b->item_cnt == b->items_capacity + || b->nranges >= b->range_watermark; +} + +static inline void +hpa_batch_pass_start(hpa_purge_batch_t *b) { + b->item_cnt = 0; + b->nranges = 0; + b->ndirty_in_batch = 0; +} + +static inline bool +hpa_batch_empty(hpa_purge_batch_t *b) { + return b->item_cnt == 0; +} + +/* Purge pages in a batch using given hooks */ +void hpa_purge_batch( + hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz); + #endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */ diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index fff77a4b..abdeb7b7 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -62,6 +62,7 @@ + @@ -380,4 +381,4 @@ - \ No newline at end of file + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index c8236a12..7ce66945 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -70,6 +70,9 @@ Source Files + + Source Files + Source Files @@ -163,6 +166,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index 53d4af8d..1f39cb91 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -62,6 +62,7 @@ + @@ -379,4 +380,4 @@ - \ No newline at end of file + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index c8236a12..7ce66945 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -70,6 +70,9 @@ Source Files + + Source Files + Source Files @@ -163,6 +166,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj index 10514d35..0b1e1707 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj @@ -62,6 +62,7 @@ + @@ -379,4 +380,4 @@ - \ No newline at end of file + diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters index c8236a12..7ce66945 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters @@ -70,6 +70,9 @@ Source Files + + Source Files + Source Files @@ -163,6 +166,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj index cda827be..54462516 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj @@ -62,6 +62,7 @@ + @@ -379,4 +380,4 @@ - \ No newline at end of file + diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters index c8236a12..7ce66945 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters @@ -70,6 +70,9 @@ Source Files + + Source Files + Source Files @@ -163,6 +166,9 @@ Source Files + + Source Files + Source Files diff --git a/src/hpa.c b/src/hpa.c index f6d46b25..5e3727a1 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -473,70 +473,6 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { return to_hugify != NULL || hpa_should_purge(tsdn, shard); } -/* - * This is used for jemalloc internal tuning and may change in the - * future based on production traffic. - * - * This value protects two things: - * 1. Stack size - * 2. Number of huge pages that are being purged in a batch as - * we do not allow allocations while making madvise syscall. - */ -#define HPA_PURGE_BATCH_MAX_DEFAULT 16 - -#ifndef JEMALLOC_JET -# define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT -#else -size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT; -size_t -hpa_purge_max_batch_size_for_test_set(size_t new_size) { - size_t old_size = hpa_purge_max_batch_size_for_test; - hpa_purge_max_batch_size_for_test = new_size; - return old_size; -} -# define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test -#endif - -static inline size_t -hpa_process_madvise_max_iovec_len(void) { - assert( - opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT); - return opt_process_madvise_max_batch == 0 - ? HPA_MIN_VAR_VEC_SIZE - : opt_process_madvise_max_batch; -} - -static inline void -hpa_purge_actual_unlocked( - hpa_shard_t *shard, hpa_purge_item_t *batch, size_t batch_sz) { - assert(batch_sz > 0); - - size_t len = hpa_process_madvise_max_iovec_len(); - VARIABLE_ARRAY(hpa_io_vector_t, vec, len); - - hpa_range_accum_t accum; - hpa_range_accum_init(&accum, vec, len); - - for (size_t i = 0; i < batch_sz; ++i) { - /* Actually do the purging, now that the lock is dropped. */ - if (batch[i].dehugify) { - shard->central->hooks.dehugify( - hpdata_addr_get(batch[i].hp), HUGEPAGE); - } - void *purge_addr; - size_t purge_size; - size_t total_purged_on_one_hp = 0; - while (hpdata_purge_next( - batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) { - total_purged_on_one_hp += purge_size; - assert(total_purged_on_one_hp <= HUGEPAGE); - hpa_range_accum_add( - &accum, purge_addr, purge_size, shard); - } - } - hpa_range_accum_finish(&accum, shard); -} - static inline bool hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) { return (hpa_is_hugify_lazy(shard) @@ -624,26 +560,6 @@ hpa_purge_finish_hp( psset_update_end(&shard->psset, hp_item->hp); } -static inline bool -hpa_batch_full(hpa_purge_batch_t *b) { - /* It's okay for ranges to go above */ - return b->npurged_hp_total == b->max_hp - || b->item_cnt == b->items_capacity - || b->nranges >= b->range_watermark; -} - -static inline void -hpa_batch_pass_start(hpa_purge_batch_t *b) { - b->item_cnt = 0; - b->nranges = 0; - b->ndirty_in_batch = 0; -} - -static inline bool -hpa_batch_empty(hpa_purge_batch_t *b) { - return b->item_cnt == 0; -} - /* Returns number of huge pages purged. */ static inline size_t hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) { @@ -679,8 +595,9 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) { if (hpa_batch_empty(&batch)) { break; } + hpa_hooks_t *hooks = &shard->central->hooks; malloc_mutex_unlock(tsdn, &shard->mtx); - hpa_purge_actual_unlocked(shard, batch.items, batch.item_cnt); + hpa_purge_batch(hooks, batch.items, batch.item_cnt); malloc_mutex_lock(tsdn, &shard->mtx); /* The shard updates */ diff --git a/src/hpa_utils.c b/src/hpa_utils.c new file mode 100644 index 00000000..59bb0d1f --- /dev/null +++ b/src/hpa_utils.c @@ -0,0 +1,33 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/hpa_utils.h" + +void +hpa_purge_batch(hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz) { + assert(batch_sz > 0); + + size_t len = hpa_process_madvise_max_iovec_len(); + VARIABLE_ARRAY(hpa_io_vector_t, vec, len); + + hpa_range_accum_t accum; + hpa_range_accum_init(&accum, vec, len); + + for (size_t i = 0; i < batch_sz; ++i) { + /* Actually do the purging, now that the lock is dropped. */ + if (batch[i].dehugify) { + hooks->dehugify(hpdata_addr_get(batch[i].hp), HUGEPAGE); + } + void *purge_addr; + size_t purge_size; + size_t total_purged_on_one_hp = 0; + while (hpdata_purge_next( + batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) { + total_purged_on_one_hp += purge_size; + assert(total_purged_on_one_hp <= HUGEPAGE); + hpa_range_accum_add( + &accum, purge_addr, purge_size, hooks); + } + } + hpa_range_accum_finish(&accum, hooks); +} diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c index c66811e1..e82f0ffb 100644 --- a/test/unit/hpa_vectorized_madvise.c +++ b/test/unit/hpa_vectorized_madvise.c @@ -253,77 +253,8 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) { } TEST_END -size_t hpa_purge_max_batch_size_for_test_set(size_t new_size); -TEST_BEGIN(test_more_pages_than_batch_page_size) { - test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0) - || HUGEPAGE_PAGES <= 4); - - size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1); - - hpa_hooks_t hooks; - hooks.map = &defer_test_map; - hooks.unmap = &defer_test_unmap; - hooks.purge = &defer_test_purge; - hooks.hugify = &defer_test_hugify; - hooks.dehugify = &defer_test_dehugify; - hooks.curtime = &defer_test_curtime; - hooks.ms_since = &defer_test_ms_since; - hooks.vectorized_purge = &defer_vectorized_purge; - - hpa_shard_opts_t opts = test_hpa_shard_opts_default; - opts.deferral_allowed = true; - opts.min_purge_interval_ms = 0; - ndefer_vec_purge_calls = 0; - ndefer_purge_calls = 0; - - hpa_shard_t *shard = create_test_data(&hooks, &opts); - - bool deferred_work_generated = false; - - nstime_init(&defer_curtime, 0); - tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); - - enum { NALLOCS = 8 * HUGEPAGE_PAGES }; - edata_t *edatas[NALLOCS]; - for (int i = 0; i < NALLOCS; i++) { - edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, - false, false, &deferred_work_generated); - expect_ptr_not_null(edatas[i], "Unexpected null edata"); - } - for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) { - pai_dalloc( - tsdn, &shard->pai, edatas[i], &deferred_work_generated); - } - - hpa_shard_do_deferred_work(tsdn, shard); - - /* - * Strict minimum purge interval is not set, we should purge as long as - * we have dirty pages. - */ - expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early"); - expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early"); - - /* We have page batch size = 1. - * we have 5 * HP active pages, 3 * HP dirty pages - * To achieve the balance of 25% max dirty we need to - * purge 2 pages. Since batch is 1 that must be 2 calls - * no matter what opt_process_madvise_max_batch is - */ - size_t nexpected = 2; - expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge"); - expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge"); - ndefer_vec_purge_calls = 0; - - hpa_purge_max_batch_size_for_test_set(old_page_batch); - - destroy_test_data(shard); -} -TEST_END - int main(void) { return test_no_reentrancy(test_vectorized_failure_fallback, - test_more_regions_purged_from_one_page, - test_more_pages_than_batch_page_size); + test_more_regions_purged_from_one_page); } diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c index 8e7be7c0..d542f72a 100644 --- a/test/unit/hpa_vectorized_madvise_large_batch.c +++ b/test/unit/hpa_vectorized_madvise_large_batch.c @@ -1,6 +1,7 @@ #include "test/jemalloc_test.h" #include "jemalloc/internal/hpa.h" +#include "jemalloc/internal/hpa_utils.h" #include "jemalloc/internal/nstime.h" #define SHARD_IND 111 @@ -195,7 +196,75 @@ TEST_BEGIN(test_vectorized_purge) { } TEST_END +TEST_BEGIN(test_purge_more_than_one_batch_pages) { + test_skip_if(!hpa_supported() + || (opt_process_madvise_max_batch < HPA_PURGE_BATCH_MAX) + || HUGEPAGE_PAGES <= 4); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + hooks.vectorized_purge = &defer_vectorized_purge; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + opts.min_purge_interval_ms = 0; + opts.dirty_mult = FXP_INIT_PERCENT(1); + ndefer_vec_purge_calls = 0; + ndefer_purge_calls = 0; + ndefer_hugify_calls = 0; + ndefer_dehugify_calls = 0; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + + bool deferred_work_generated = false; + + nstime_init(&defer_curtime, 0); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + + enum { NALLOCS = HPA_PURGE_BATCH_MAX * 3 * HUGEPAGE_PAGES }; + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + for (int i = 0; i < HPA_PURGE_BATCH_MAX * 2 * (int)HUGEPAGE_PAGES; + i++) { + pai_dalloc( + tsdn, &shard->pai, edatas[i], &deferred_work_generated); + } + + hpa_shard_do_deferred_work(tsdn, shard); + + /* + * Strict minimum purge interval is not set, we should purge as long as + * we have dirty pages. + */ + expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early"); + expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early"); + + /* We have page batch size = HPA_PURGE_BATCH_MAX. We have + * HPA_PURGE_BATCH_MAX active pages, 2 * HPA_PURGE_BATCH_MAX dirty. + * To achieve the balance of 1% max dirty we need to purge more than one + * batch. + */ + size_t nexpected = 2; + expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge"); + expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge"); + ndefer_vec_purge_calls = 0; + + destroy_test_data(shard); +} +TEST_END + int main(void) { - return test_no_reentrancy(test_vectorized_purge); + return test_no_reentrancy( + test_vectorized_purge, test_purge_more_than_one_batch_pages); }