diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index 360653f9..bb3462f5 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -51,6 +51,7 @@ bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state); void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all); +void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena); void arena_reset(tsd_t *tsd, arena_t *arena); void arena_destroy(tsd_t *tsd, arena_t *arena); void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h index 778d1c92..27adefc0 100644 --- a/include/jemalloc/internal/hpa.h +++ b/include/jemalloc/internal/hpa.h @@ -55,6 +55,7 @@ struct hpa_shard_s { malloc_mutex_t mtx; /* The base metadata allocator. */ base_t *base; + /* * This edata cache is the one we use when allocating a small extent * from a pageslab. The pageslab itself comes from the centralized @@ -122,6 +123,10 @@ void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard); +void hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard, + bool deferral_allowed); +void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard); + /* * We share the fork ordering with the PA and arena prefork handling; that's why * these are 3 and 4 rather than 0 and 1. diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h index 5ff00725..ef162193 100644 --- a/include/jemalloc/internal/hpa_opts.h +++ b/include/jemalloc/internal/hpa_opts.h @@ -32,6 +32,14 @@ struct hpa_shard_opts_s { * active_pages. This may be set to (fxp_t)-1 to disable purging. */ fxp_t dirty_mult; + + /* + * Whether or not the PAI methods are allowed to defer work to a + * subsequent hpa_shard_do_deferred_work() call. Practically, this + * corresponds to background threads being enabled. We track this + * ourselves for encapsulation purposes. + */ + bool deferral_allowed; }; #define HPA_SHARD_OPTS_DEFAULT { \ @@ -42,7 +50,15 @@ struct hpa_shard_opts_s { /* dehugification_threshold */ \ HUGEPAGE * 20 / 100, \ /* dirty_mult */ \ - FXP_INIT_PERCENT(25) \ + FXP_INIT_PERCENT(25), \ + /* \ + * deferral_allowed \ + * \ + * Really, this is always set by the arena during creation \ + * or by an hpa_shard_set_deferral_allowed call, so the value \ + * we put here doesn't matter. \ + */ \ + false \ } #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */ diff --git a/src/arena.c b/src/arena.c index 3ff91572..bdc120fa 100644 --- a/src/arena.c +++ b/src/arena.c @@ -461,6 +461,12 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) { arena_decay_muzzy(tsdn, arena, is_background_thread, all); } +/* Called from background threads. */ +void +arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) { + arena_decay(tsdn, arena, true, false); +} + void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) { bool generated_dirty; @@ -1565,7 +1571,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { * so arena_hpa_global is not yet initialized. */ if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) { - if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &opt_hpa_opts, + hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts; + hpa_shard_opts.deferral_allowed = background_thread_enabled(); + if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts, &opt_hpa_sec_opts)) { goto label_error; } diff --git a/src/background_thread.c b/src/background_thread.c index 7302a303..edcf786e 100644 --- a/src/background_thread.c +++ b/src/background_thread.c @@ -291,7 +291,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne if (!arena) { continue; } - arena_decay(tsdn, arena, true, false); + arena_do_deferred_work(tsdn, arena); if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) { /* Min interval will be used. */ continue; diff --git a/src/hpa.c b/src/hpa.c index ba02f795..ee25e944 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -426,17 +426,29 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { return true; } +/* + * Execution of deferred work is forced if it's triggered by an explicit + * hpa_shard_do_deferred_work() call. + */ static void -hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { +hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard, + bool forced) { bool hugified; bool purged; size_t nloop = 0; - /* Just *some* bound, to impose a worst-case latency bound. */ - size_t maxloops = 100;; + malloc_mutex_assert_owner(tsdn, &shard->mtx); + if (!forced && shard->opts.deferral_allowed) { + return; + } + /* + * If we're on a background thread, do work so long as there's work to + * be done. Otherwise, bound latency to not be *too* bad by doing at + * most a small fixed number of operations. + */ + size_t maxloops = (forced ? (size_t)-1 : 8); do { - malloc_mutex_assert_owner(tsdn, &shard->mtx); hugified = hpa_try_hugify(tsdn, shard); - + malloc_mutex_assert_owner(tsdn, &shard->mtx); purged = false; if (hpa_should_purge(tsdn, shard)) { purged = hpa_try_purge(tsdn, shard); @@ -528,7 +540,7 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, edata_list_active_append(results, edata); } - hpa_do_deferred_work(tsdn, shard); + hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false); malloc_mutex_unlock(tsdn, &shard->mtx); return nsuccess; } @@ -740,7 +752,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) { edata_list_active_remove(list, edata); hpa_dalloc_locked(tsdn, shard, edata); } - hpa_do_deferred_work(tsdn, shard); + hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false); malloc_mutex_unlock(tsdn, &shard->mtx); } @@ -800,6 +812,26 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { } } +void +hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard, + bool deferral_allowed) { + malloc_mutex_lock(tsdn, &shard->mtx); + bool deferral_previously_allowed = shard->opts.deferral_allowed; + shard->opts.deferral_allowed = deferral_allowed; + if (deferral_previously_allowed && !deferral_allowed) { + hpa_shard_maybe_do_deferred_work(tsdn, shard, + /* forced */ true); + } + malloc_mutex_unlock(tsdn, &shard->mtx); +} + +void +hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_lock(tsdn, &shard->mtx); + hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ true); + malloc_mutex_unlock(tsdn, &shard->mtx); +} + void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_prefork(tsdn, &shard->grow_mtx); diff --git a/src/jemalloc.c b/src/jemalloc.c index 6ff9f97b..85d68639 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1788,8 +1788,10 @@ malloc_init_hard_a0_locked() { opt_hpa = false; } } else if (opt_hpa) { - if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard, &opt_hpa_opts, - &opt_hpa_sec_opts)) { + hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts; + hpa_shard_opts.deferral_allowed = background_thread_enabled(); + if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard, + &hpa_shard_opts, &opt_hpa_sec_opts)) { return true; } }