diff --git a/Makefile.in b/Makefile.in index 1914fc28..b4102d0b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -137,6 +137,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/pai.c \ $(srcroot)src/pac.c \ $(srcroot)src/pages.c \ + $(srcroot)src/peak_demand.c \ $(srcroot)src/peak_event.c \ $(srcroot)src/prof.c \ $(srcroot)src/prof_data.c \ @@ -252,6 +253,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/pack.c \ $(srcroot)test/unit/pages.c \ $(srcroot)test/unit/peak.c \ + $(srcroot)test/unit/peak_demand.c \ $(srcroot)test/unit/ph.c \ $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h index d788d051..a384d04a 100644 --- a/include/jemalloc/internal/hpa.h +++ b/include/jemalloc/internal/hpa.h @@ -10,6 +10,7 @@ #include "jemalloc/internal/hpa_opts.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/pai.h" +#include "jemalloc/internal/peak_demand.h" #include "jemalloc/internal/psset.h" typedef struct hpa_central_s hpa_central_t; @@ -147,6 +148,9 @@ struct hpa_shard_s { * Last time we performed purge on this shard. */ nstime_t last_purge; + + /* Peak active memory sliding window statistics. */ + peak_demand_t peak_demand; }; bool hpa_hugepage_size_exceeds_limit(); diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h index 42246172..816bb577 100644 --- a/include/jemalloc/internal/hpa_opts.h +++ b/include/jemalloc/internal/hpa_opts.h @@ -27,7 +27,8 @@ struct hpa_shard_opts_s { /* * The HPA purges whenever the number of pages exceeds dirty_mult * - * active_pages. This may be set to (fxp_t)-1 to disable purging. + * peak_active_pages. This may be set to (fxp_t)-1 to disable + * purging. */ fxp_t dirty_mult; @@ -59,6 +60,13 @@ struct hpa_shard_opts_s { * Maximum number of hugepages to purge on each purging attempt. */ ssize_t experimental_max_purge_nhp; + + /* + * Sliding window duration to track active memory demand statistics. + * This might be set to 0, to disable sliding window statistics + * tracking and use current number of active pages for purging instead. + */ + uint64_t peak_demand_window_ms; }; #define HPA_SHARD_OPTS_DEFAULT { \ @@ -83,7 +91,9 @@ struct hpa_shard_opts_s { /* min_purge_interval_ms */ \ 5 * 1000, \ /* experimental_max_purge_nhp */ \ - -1 \ + -1, \ + /* peak_demand_window_ms */ \ + 0 \ } #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */ diff --git a/include/jemalloc/internal/peak_demand.h b/include/jemalloc/internal/peak_demand.h new file mode 100644 index 00000000..2664cbec --- /dev/null +++ b/include/jemalloc/internal/peak_demand.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_PEAK_DEMAND_H +#define JEMALLOC_INTERNAL_PEAK_DEMAND_H + +#include "jemalloc/internal/jemalloc_preamble.h" + +/* + * Implementation of peak active memory demand tracking. + * + * Inspired by "Beyond malloc efficiency to fleet efficiency: a hugepage-aware + * memory allocator" whitepaper. + * https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf + * + * End goal is to track peak active memory usage over specified time interval. + * We do so by dividing this time interval into disjoint subintervals and + * storing value of maximum memory usage for each subinterval in a circular + * buffer. Nanoseconds resolution timestamp uniquely maps into epoch, which is + * used as an index to access circular buffer. + */ + +#define PEAK_DEMAND_LG_BUCKETS 4 +/* + * Number of buckets should be power of 2 to ensure modulo operation is + * optimized to bit masking by the compiler. + */ +#define PEAK_DEMAND_NBUCKETS (1 << PEAK_DEMAND_LG_BUCKETS) + +typedef struct peak_demand_s peak_demand_t; +struct peak_demand_s { + /* + * Absolute value of current epoch, monotonically increases over time. Epoch + * value modulo number of buckets used as an index to access nactive_max + * array. + */ + uint64_t epoch; + + /* How many nanoseconds each epoch approximately takes. */ + uint64_t epoch_interval_ns; + + /* + * Circular buffer to track maximum number of active pages for each + * epoch. + */ + size_t nactive_max[PEAK_DEMAND_NBUCKETS]; +}; + +void peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms); + +/* Updates peak demand statistics with current number of active pages. */ +void peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now, + size_t nactive); + +/* Returns maximum number of active pages in sliding window. */ +size_t peak_demand_nactive_max(peak_demand_t *peak_demand); + +#endif /* JEMALLOC_INTERNAL_PEAK_DEMAND_H */ diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index c43b30b1..97a95fbf 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -76,6 +76,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index f091475e..1a89369e 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -112,6 +112,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index a195f6b3..8529438c 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -76,6 +76,7 @@ + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index f091475e..1a89369e 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -112,6 +112,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj index cd16005d..eace48ba 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj @@ -76,6 +76,7 @@ + diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters index f091475e..1a89369e 100644 --- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters @@ -112,6 +112,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj index 2d8c4be6..98085cfd 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj @@ -76,6 +76,7 @@ + diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters index f091475e..1a89369e 100644 --- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters @@ -112,6 +112,9 @@ Source Files + + Source Files + Source Files diff --git a/src/ctl.c b/src/ctl.c index c55d9719..2c941ae8 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -106,6 +106,7 @@ CTL_PROTO(opt_hpa_hugify_delay_ms) CTL_PROTO(opt_hpa_hugify_sync) CTL_PROTO(opt_hpa_min_purge_interval_ms) CTL_PROTO(opt_experimental_hpa_max_purge_nhp) +CTL_PROTO(opt_hpa_peak_demand_window_ms) CTL_PROTO(opt_hpa_dirty_mult) CTL_PROTO(opt_hpa_sec_nshards) CTL_PROTO(opt_hpa_sec_max_alloc) @@ -487,6 +488,8 @@ static const ctl_named_node_t opt_node[] = { {NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)}, {NAME("experimental_hpa_max_purge_nhp"), CTL(opt_experimental_hpa_max_purge_nhp)}, + {NAME("hpa_peak_demand_window_ms"), + CTL(opt_hpa_peak_demand_window_ms)}, {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)}, {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)}, {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)}, @@ -2255,6 +2258,8 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms, uint64_t) CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp, opt_hpa_opts.experimental_max_purge_nhp, ssize_t) +CTL_RO_NL_GEN(opt_hpa_peak_demand_window_ms, + opt_hpa_opts.peak_demand_window_ms, uint64_t) /* * This will have to change before we publicly document this option; fxp_t and diff --git a/src/hpa.c b/src/hpa.c index 2a5d7e1f..c01dde13 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -63,6 +63,11 @@ hpa_supported(void) { return true; } +static bool +hpa_peak_demand_tracking_enabled(hpa_shard_t *shard) { + return shard->opts.peak_demand_window_ms > 0; +} + static void hpa_do_consistency_checks(hpa_shard_t *shard) { assert(shard->base != NULL); @@ -217,6 +222,11 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap, shard->stats.nhugify_failures = 0; shard->stats.ndehugifies = 0; + if (hpa_peak_demand_tracking_enabled(shard)) { + peak_demand_init(&shard->peak_demand, + shard->opts.peak_demand_window_ms); + } + /* * Fill these in last, so that if an hpa_shard gets used despite * initialization failing, we'll at least crash instead of just @@ -294,8 +304,37 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) { if (shard->opts.dirty_mult == (fxp_t)-1) { return (size_t)-1; } - return fxp_mul_frac(psset_nactive(&shard->psset), - shard->opts.dirty_mult); + /* + * We are trying to estimate maximum amount of active memory we'll + * need in the near future. We do so by projecting future active + * memory demand (based on peak active memory usage we observed in the + * past within sliding window) and adding slack on top of it (an + * overhead is reasonable to have in exchange of higher hugepages + * coverage). When peak demand tracking is off, projection of future + * active memory is active memory we are having right now. + * + * Estimation is essentially the same as nactive_max * (1 + + * dirty_mult), but expressed differently to factor in necessary + * implementation details. + */ + size_t nactive = psset_nactive(&shard->psset); + size_t nactive_max = nactive; + if (hpa_peak_demand_tracking_enabled(shard)) { + /* + * We release shard->mtx, when we do a syscall to purge dirty + * memory, so someone might grab shard->mtx, allocate memory + * from this shard and update psset's nactive counter, before + * peak_demand_update(...) was called and we'll get + * peak_demand_nactive_max(...) <= nactive as a result. + */ + size_t peak = peak_demand_nactive_max(&shard->peak_demand); + if (peak > nactive_max) { + nactive_max = peak; + } + } + size_t slack = fxp_mul_frac(nactive_max, shard->opts.dirty_mult); + size_t estimation = nactive_max + slack; + return estimation - nactive; } static bool @@ -548,6 +587,16 @@ static void hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard, bool forced) { malloc_mutex_assert_owner(tsdn, &shard->mtx); + + /* Update active memory demand statistics. */ + if (hpa_peak_demand_tracking_enabled(shard)) { + nstime_t now; + shard->central->hooks.curtime(&now, + /* first_reading */ true); + peak_demand_update(&shard->peak_demand, &now, + psset_nactive(&shard->psset)); + } + if (!forced && shard->opts.deferral_allowed) { return; } diff --git a/src/jemalloc.c b/src/jemalloc.c index 9f4bc785..d08771f8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1568,6 +1568,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], opt_hpa_opts.experimental_max_purge_nhp, "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX); + CONF_HANDLE_UINT64_T( + opt_hpa_opts.peak_demand_window_ms, + "hpa_peak_demand_window_ms", 0, 0, + CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false); + if (CONF_MATCH("hpa_dirty_mult")) { if (CONF_MATCH_VALUE("-1")) { opt_hpa_opts.dirty_mult = (fxp_t)-1; diff --git a/src/peak_demand.c b/src/peak_demand.c new file mode 100644 index 00000000..49f28930 --- /dev/null +++ b/src/peak_demand.c @@ -0,0 +1,74 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/peak_demand.h" + +void +peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms) { + assert(interval_ms > 0); + peak_demand->epoch = 0; + uint64_t interval_ns = interval_ms * 1000 * 1000; + peak_demand->epoch_interval_ns = interval_ns / PEAK_DEMAND_NBUCKETS; + memset(peak_demand->nactive_max, 0, sizeof(peak_demand->nactive_max)); +} + +static uint64_t +peak_demand_epoch_ind(peak_demand_t *peak_demand) { + return peak_demand->epoch % PEAK_DEMAND_NBUCKETS; +} + +static nstime_t +peak_demand_next_epoch_advance(peak_demand_t *peak_demand) { + uint64_t epoch = peak_demand->epoch; + uint64_t ns = (epoch + 1) * peak_demand->epoch_interval_ns; + nstime_t next; + nstime_init(&next, ns); + return next; +} + +static uint64_t +peak_demand_maybe_advance_epoch(peak_demand_t *peak_demand, + const nstime_t *now) { + nstime_t next_epoch_advance = + peak_demand_next_epoch_advance(peak_demand); + if (nstime_compare(now, &next_epoch_advance) < 0) { + return peak_demand_epoch_ind(peak_demand); + } + uint64_t next_epoch = nstime_ns(now) / peak_demand->epoch_interval_ns; + assert(next_epoch > peak_demand->epoch); + /* + * If we missed more epochs, than capacity of circular buffer + * (PEAK_DEMAND_NBUCKETS), re-write no more than PEAK_DEMAND_NBUCKETS + * items as we don't want to zero out same item multiple times. + */ + if (peak_demand->epoch + PEAK_DEMAND_NBUCKETS < next_epoch) { + peak_demand->epoch = next_epoch - PEAK_DEMAND_NBUCKETS; + } + while (peak_demand->epoch < next_epoch) { + ++peak_demand->epoch; + uint64_t ind = peak_demand_epoch_ind(peak_demand); + peak_demand->nactive_max[ind] = 0; + } + return peak_demand_epoch_ind(peak_demand); +} + +void +peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now, + size_t nactive) { + uint64_t ind = peak_demand_maybe_advance_epoch(peak_demand, now); + size_t *epoch_nactive = &peak_demand->nactive_max[ind]; + if (nactive > *epoch_nactive) { + *epoch_nactive = nactive; + } +} + +size_t +peak_demand_nactive_max(peak_demand_t *peak_demand) { + size_t nactive_max = peak_demand->nactive_max[0]; + for (int i = 1; i < PEAK_DEMAND_NBUCKETS; ++i) { + if (peak_demand->nactive_max[i] > nactive_max) { + nactive_max = peak_demand->nactive_max[i]; + } + } + return nactive_max; +} diff --git a/src/stats.c b/src/stats.c index 58874bf8..bd0167fb 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1657,6 +1657,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_BOOL("hpa_hugify_sync") OPT_WRITE_UINT64("hpa_min_purge_interval_ms") OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp") + OPT_WRITE_UINT64("hpa_peak_demand_window_ms") if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0) == 0) { /* diff --git a/test/unit/hpa.c b/test/unit/hpa.c index 6c42729a..ceed9bd8 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -37,26 +37,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = { /* min_purge_interval_ms */ 5 * 1000, /* experimental_max_purge_nhp */ - -1 -}; - -static hpa_shard_opts_t test_hpa_shard_opts_purge = { - /* slab_max_alloc */ - HUGEPAGE, - /* hugification_threshold */ - 0.9 * HUGEPAGE, - /* dirty_mult */ - FXP_INIT_PERCENT(11), - /* deferral_allowed */ - true, - /* hugify_delay_ms */ - 0, - /* hugify_sync */ - false, - /* min_purge_interval_ms */ - 5 * 1000, - /* experimental_max_purge_nhp */ - -1 + -1, + /* peak_demand_window_ms */ + 0 }; static hpa_shard_t * @@ -480,8 +463,14 @@ TEST_END TEST_BEGIN(test_purge_no_infinite_loop) { test_skip_if(!hpa_supported()); - hpa_shard_t *shard = create_test_data(&hpa_hooks_default, - &test_hpa_shard_opts_purge); + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.slab_max_alloc = HUGEPAGE; + opts.hugification_threshold = 0.9 * HUGEPAGE; + opts.dirty_mult = FXP_INIT_PERCENT(11); + opts.deferral_allowed = true; + opts.hugify_delay_ms = 0; + + hpa_shard_t *shard = create_test_data(&hpa_hooks_default, &opts); tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); /* @@ -489,8 +478,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) { * criteria for huge page and at the same time do not allow hugify page * without triggering a purge. */ - const size_t npages = - test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1; + const size_t npages = opts.hugification_threshold / PAGE + 1; const size_t size = npages * PAGE; bool deferred_work_generated = false; @@ -733,6 +721,140 @@ TEST_BEGIN(test_experimental_max_purge_nhp) { } TEST_END +TEST_BEGIN(test_demand_purge_slack) { + test_skip_if(!hpa_supported()); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + /* Allow 10% of slack. */ + opts.dirty_mult = FXP_INIT_PERCENT(10); + /* Peak demand sliding window duration is 10 seconds. */ + opts.peak_demand_window_ms = 10 * 1000; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + + bool deferred_work_generated = false; + + nstime_init(&defer_curtime, 0); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + enum {NALLOCS = 16 * HUGEPAGE_PAGES}; + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + + /* Deallocate 5 hugepages out of 16. */ + for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) { + pai_dalloc(tsdn, &shard->pai, edatas[i], + &deferred_work_generated); + } + nstime_init2(&defer_curtime, 6, 0); + hpa_shard_do_deferred_work(tsdn, shard); + + expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early"); + expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early"); + /* + * Peak demand within sliding window is 16 hugepages, so we don't need + * to purge anything just yet. + */ + expect_zu_eq(0, ndefer_purge_calls, "Purged too early"); + + nstime_init2(&defer_curtime, 12, 0); + hpa_shard_do_deferred_work(tsdn, shard); + + expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification"); + ndefer_hugify_calls = 0; + expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early"); + /* + * 12 seconds passed now, peak demand is 11 hugepages, we allowed to + * keep 11 * 0.1 (hpa_dirty_mult) = 1.1 dirty hugepages, but we + * have 5 dirty hugepages, so we should purge 4 of them. + */ + expect_zu_eq(4, ndefer_purge_calls, "Expect purges"); + ndefer_purge_calls = 0; + + destroy_test_data(shard); +} +TEST_END + +TEST_BEGIN(test_demand_purge_tight) { + test_skip_if(!hpa_supported()); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + hooks.ms_since = &defer_test_ms_since; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + /* No slack allowed. */ + opts.dirty_mult = FXP_INIT_PERCENT(0); + /* Peak demand sliding window duration is 10 seconds. */ + opts.peak_demand_window_ms = 10 * 1000; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + + bool deferred_work_generated = false; + + nstime_init(&defer_curtime, 0); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + enum {NALLOCS = 16 * HUGEPAGE_PAGES}; + edata_t *edatas[NALLOCS]; + for (int i = 0; i < NALLOCS; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, + false, false, &deferred_work_generated); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + + /* Deallocate 5 hugepages out of 16. */ + for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) { + pai_dalloc(tsdn, &shard->pai, edatas[i], + &deferred_work_generated); + } + nstime_init2(&defer_curtime, 6, 0); + hpa_shard_do_deferred_work(tsdn, shard); + + expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early"); + expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early"); + /* + * Peak demand within sliding window is 16 hugepages, to purge anything + * just yet. + */ + expect_zu_eq(0, ndefer_purge_calls, "Purged too early"); + + nstime_init2(&defer_curtime, 12, 0); + hpa_shard_do_deferred_work(tsdn, shard); + + expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification"); + ndefer_hugify_calls = 0; + expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early"); + /* + * 12 seconds passed now, peak demand is 11 hugepages. We have + * hpa_dirty_mult = 0, so we allowed to keep 11 * 0 = 0 dirty + * hugepages, but we have 5, all of them should be purged. + */ + expect_zu_eq(5, ndefer_purge_calls, "Expect purges"); + ndefer_purge_calls = 0; + + destroy_test_data(shard); +} +TEST_END + int main(void) { /* @@ -756,5 +878,7 @@ main(void) { test_no_min_purge_interval, test_min_purge_interval, test_purge, - test_experimental_max_purge_nhp); + test_experimental_max_purge_nhp, + test_demand_purge_slack, + test_demand_purge_tight); } diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 57aa59e5..366b992b 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -295,6 +295,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always); TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always); TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always); + TEST_MALLCTL_OPT(uint64_t, hpa_peak_demand_window_ms, always); TEST_MALLCTL_OPT(unsigned, narenas, always); TEST_MALLCTL_OPT(const char *, percpu_arena, always); TEST_MALLCTL_OPT(size_t, oversize_threshold, always); diff --git a/test/unit/peak_demand.c b/test/unit/peak_demand.c new file mode 100644 index 00000000..ca2506b8 --- /dev/null +++ b/test/unit/peak_demand.c @@ -0,0 +1,162 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/peak_demand.h" + +TEST_BEGIN(test_peak_demand_init) { + peak_demand_t peak_demand; + /* + * Exact value doesn't matter here as we don't advance epoch in this + * test. + */ + uint64_t interval_ms = 1000; + peak_demand_init(&peak_demand, interval_ms); + + expect_zu_eq(peak_demand_nactive_max(&peak_demand), 0, + "Unexpected ndirty_max value after initialization"); +} +TEST_END + +TEST_BEGIN(test_peak_demand_update_basic) { + peak_demand_t peak_demand; + /* Make each bucket exactly one second to simplify math. */ + uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS; + peak_demand_init(&peak_demand, interval_ms); + + nstime_t now; + + nstime_init2(&now, /* sec */ 0, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 1024); + + nstime_init2(&now, /* sec */ 1, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 512); + + nstime_init2(&now, /* sec */ 2, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 256); + + expect_zu_eq(peak_demand_nactive_max(&peak_demand), 1024, ""); +} +TEST_END + +TEST_BEGIN(test_peak_demand_update_skip_epochs) { + peak_demand_t peak_demand; + uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS; + peak_demand_init(&peak_demand, interval_ms); + + nstime_t now; + + nstime_init2(&now, /* sec */ 0, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 1024); + + nstime_init2(&now, /* sec */ PEAK_DEMAND_NBUCKETS - 1, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 512); + + nstime_init2(&now, /* sec */ 2 * (PEAK_DEMAND_NBUCKETS - 1), + /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 256); + + /* + * Updates are not evenly spread over time. When we update at + * 2 * (PEAK_DEMAND_NBUCKETS - 1) second, 1024 value is already out of + * sliding window, but 512 is still present. + */ + expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, ""); +} +TEST_END + +TEST_BEGIN(test_peak_demand_update_rewrite_optimization) { + peak_demand_t peak_demand; + uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS; + peak_demand_init(&peak_demand, interval_ms); + + nstime_t now; + + nstime_init2(&now, /* sec */ 0, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 1024); + + nstime_init2(&now, /* sec */ 0, /* nsec */ UINT64_MAX); + /* + * This update should take reasonable time if optimization is working + * correctly, otherwise we'll loop from 0 to UINT64_MAX and this test + * will take a long time to finish. + */ + peak_demand_update(&peak_demand, &now, /* nactive */ 512); + + expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, ""); +} +TEST_END + +TEST_BEGIN(test_peak_demand_update_out_of_interval) { + peak_demand_t peak_demand; + uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS; + peak_demand_init(&peak_demand, interval_ms); + + nstime_t now; + + nstime_init2(&now, /* sec */ 0 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 1024); + + nstime_init2(&now, /* sec */ 1 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 512); + + nstime_init2(&now, /* sec */ 2 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, /* nactive */ 256); + + /* + * Updates frequency is lower than tracking interval, so we should + * have only last value. + */ + expect_zu_eq(peak_demand_nactive_max(&peak_demand), 256, ""); +} +TEST_END + +TEST_BEGIN(test_peak_demand_update_static_epoch) { + peak_demand_t peak_demand; + uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS; + peak_demand_init(&peak_demand, interval_ms); + + nstime_t now; + nstime_init_zero(&now); + + /* Big enough value to overwrite values in circular buffer. */ + size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS; + for (size_t nactive = 0; nactive <= nactive_max; ++nactive) { + /* + * We should override value in the same bucket as now value + * doesn't change between iterations. + */ + peak_demand_update(&peak_demand, &now, nactive); + } + + expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, ""); +} +TEST_END + +TEST_BEGIN(test_peak_demand_update_epoch_advance) { + peak_demand_t peak_demand; + uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS; + peak_demand_init(&peak_demand, interval_ms); + + nstime_t now; + /* Big enough value to overwrite values in circular buffer. */ + size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS; + for (size_t nactive = 0; nactive <= nactive_max; ++nactive) { + uint64_t sec = nactive; + nstime_init2(&now, sec, /* nsec */ 0); + peak_demand_update(&peak_demand, &now, nactive); + } + + expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, ""); +} +TEST_END + +int +main(void) { + return test_no_reentrancy( + test_peak_demand_init, + test_peak_demand_update_basic, + test_peak_demand_update_skip_epochs, + test_peak_demand_update_rewrite_optimization, + test_peak_demand_update_out_of_interval, + test_peak_demand_update_static_epoch, + test_peak_demand_update_epoch_advance); +}