mirror of
https://github.com/jemalloc/jemalloc.git
synced 2026-04-14 14:41:42 +03:00
Add opt hpa_hugify_sync to hugify synchronously
Linux 6.1 introduced `MADV_COLLAPSE` flag to perform a best-effort synchronous collapse of the native pages mapped by the memory range into transparent huge pages. Synchronous hugification might be beneficial for at least two reasons: we are not relying on khugepaged anymore and get an instant feedback if range wasn't hugified. If `hpa_hugify_sync` option is on, we'll try to perform synchronously collapse and if it wasn't successful, we'll fallback to asynchronous behaviour.
This commit is contained in:
parent
a361e886e2
commit
0ce13c6fb5
15 changed files with 141 additions and 8 deletions
10
configure.ac
10
configure.ac
|
|
@ -2491,6 +2491,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
|
|||
if test "x${je_cv_madv_nocore}" = "xyes" ; then
|
||||
AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
|
||||
fi
|
||||
|
||||
dnl Check for madvise(..., MADV_COLLAPSE).
|
||||
JE_COMPILABLE([madvise(..., MADV_COLLAPSE)], [
|
||||
#include <sys/mman.h>
|
||||
], [
|
||||
madvise((void *)0, 0, MADV_COLLAPSE);
|
||||
], [je_cv_madv_collapse])
|
||||
if test "x${je_cv_madv_collapse}" = "xyes" ; then
|
||||
AC_DEFINE([JEMALLOC_HAVE_MADVISE_COLLAPSE], [ ], [ ])
|
||||
fi
|
||||
else
|
||||
dnl Check for posix_madvise.
|
||||
JE_COMPILABLE([posix_madvise], [
|
||||
|
|
|
|||
|
|
@ -61,6 +61,14 @@ struct hpa_shard_nonderived_stats_s {
|
|||
* Guarded by mtx.
|
||||
*/
|
||||
uint64_t nhugifies;
|
||||
|
||||
/*
|
||||
* The number of times we've tried to hugify a pageslab, but failed.
|
||||
*
|
||||
* Guarded by mtx.
|
||||
*/
|
||||
uint64_t nhugify_failures;
|
||||
|
||||
/*
|
||||
* The number of times we've dehugified a pageslab.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ struct hpa_hooks_s {
|
|||
void *(*map)(size_t size);
|
||||
void (*unmap)(void *ptr, size_t size);
|
||||
void (*purge)(void *ptr, size_t size);
|
||||
void (*hugify)(void *ptr, size_t size);
|
||||
bool (*hugify)(void *ptr, size_t size, bool sync);
|
||||
void (*dehugify)(void *ptr, size_t size);
|
||||
void (*curtime)(nstime_t *r_time, bool first_reading);
|
||||
uint64_t (*ms_since)(nstime_t *r_time);
|
||||
|
|
|
|||
|
|
@ -45,6 +45,11 @@ struct hpa_shard_opts_s {
|
|||
*/
|
||||
uint64_t hugify_delay_ms;
|
||||
|
||||
/*
|
||||
* Hugify pages synchronously.
|
||||
*/
|
||||
bool hugify_sync;
|
||||
|
||||
/*
|
||||
* Minimum amount of time between purges.
|
||||
*/
|
||||
|
|
@ -73,6 +78,8 @@ struct hpa_shard_opts_s {
|
|||
false, \
|
||||
/* hugify_delay_ms */ \
|
||||
10 * 1000, \
|
||||
/* hugify_sync */ \
|
||||
false, \
|
||||
/* min_purge_interval_ms */ \
|
||||
5 * 1000, \
|
||||
/* experimental_max_purge_nhp */ \
|
||||
|
|
|
|||
|
|
@ -308,6 +308,13 @@
|
|||
*/
|
||||
#undef JEMALLOC_HAVE_MADVISE_HUGE
|
||||
|
||||
/*
|
||||
* Defined if best-effort synchronous collapse of the native
|
||||
* pages mapped by the memory range into transparent huge pages is supported
|
||||
* via MADV_COLLAPSE arguments to madvise(2).
|
||||
*/
|
||||
#undef JEMALLOC_HAVE_MADVISE_COLLAPSE
|
||||
|
||||
/*
|
||||
* Methods for purging unused pages differ between operating systems.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -57,6 +57,15 @@
|
|||
# define JEMALLOC_MADV_FREE 8
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Can be defined at compile time, in cases, when it is known
|
||||
* madvise(..., MADV_COLLAPSE) feature is supported, but MADV_COLLAPSE
|
||||
* constant is not defined.
|
||||
*/
|
||||
#ifdef JEMALLOC_DEFINE_MADVISE_COLLAPSE
|
||||
# define JEMALLOC_MADV_COLLAPSE 25
|
||||
#endif
|
||||
|
||||
static const bool config_debug =
|
||||
#ifdef JEMALLOC_DEBUG
|
||||
true
|
||||
|
|
|
|||
|
|
@ -123,6 +123,7 @@ bool pages_purge_lazy(void *addr, size_t size);
|
|||
bool pages_purge_forced(void *addr, size_t size);
|
||||
bool pages_huge(void *addr, size_t size);
|
||||
bool pages_nohuge(void *addr, size_t size);
|
||||
bool pages_collapse(void *addr, size_t size);
|
||||
bool pages_dontdump(void *addr, size_t size);
|
||||
bool pages_dodump(void *addr, size_t size);
|
||||
bool pages_boot(void);
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ CTL_PROTO(opt_hpa)
|
|||
CTL_PROTO(opt_hpa_slab_max_alloc)
|
||||
CTL_PROTO(opt_hpa_hugification_threshold)
|
||||
CTL_PROTO(opt_hpa_hugify_delay_ms)
|
||||
CTL_PROTO(opt_hpa_hugify_sync)
|
||||
CTL_PROTO(opt_hpa_min_purge_interval_ms)
|
||||
CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
|
||||
CTL_PROTO(opt_hpa_dirty_mult)
|
||||
|
|
@ -263,6 +264,7 @@ INDEX_PROTO(stats_arenas_i_extents_j)
|
|||
CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
|
||||
CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
|
||||
CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
|
||||
CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures)
|
||||
CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
|
||||
|
||||
/* We have a set of stats for full slabs. */
|
||||
|
|
@ -462,6 +464,7 @@ static const ctl_named_node_t opt_node[] = {
|
|||
{NAME("hpa_hugification_threshold"),
|
||||
CTL(opt_hpa_hugification_threshold)},
|
||||
{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
|
||||
{NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
|
||||
{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
|
||||
{NAME("experimental_hpa_max_purge_nhp"),
|
||||
CTL(opt_experimental_hpa_max_purge_nhp)},
|
||||
|
|
@ -834,6 +837,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
|
|||
{NAME("npurge_passes"), CTL(stats_arenas_i_hpa_shard_npurge_passes)},
|
||||
{NAME("npurges"), CTL(stats_arenas_i_hpa_shard_npurges)},
|
||||
{NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)},
|
||||
{NAME("nhugify_failures"),
|
||||
CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
|
||||
{NAME("ndehugifies"), CTL(stats_arenas_i_hpa_shard_ndehugifies)}
|
||||
};
|
||||
|
||||
|
|
@ -2200,6 +2205,7 @@ CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
|
|||
CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
|
||||
opt_hpa_opts.hugification_threshold, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_hugify_sync, opt_hpa_opts.hugify_sync, bool)
|
||||
CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
|
||||
uint64_t)
|
||||
CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
|
||||
|
|
@ -4061,6 +4067,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
|
|||
arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
|
||||
arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugifies, uint64_t);
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugify_failures,
|
||||
arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugify_failures,
|
||||
uint64_t);
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
|
||||
arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
|
||||
|
||||
|
|
|
|||
17
src/hpa.c
17
src/hpa.c
|
|
@ -210,6 +210,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
|
|||
shard->stats.npurge_passes = 0;
|
||||
shard->stats.npurges = 0;
|
||||
shard->stats.nhugifies = 0;
|
||||
shard->stats.nhugify_failures = 0;
|
||||
shard->stats.ndehugifies = 0;
|
||||
|
||||
/*
|
||||
|
|
@ -242,6 +243,7 @@ hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
|
|||
dst->npurge_passes += src->npurge_passes;
|
||||
dst->npurges += src->npurges;
|
||||
dst->nhugifies += src->nhugifies;
|
||||
dst->nhugify_failures += src->nhugify_failures;
|
||||
dst->ndehugifies += src->ndehugifies;
|
||||
}
|
||||
|
||||
|
|
@ -499,10 +501,23 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
|
|||
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
|
||||
shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
|
||||
bool err = shard->central->hooks.hugify(hpdata_addr_get(to_hugify),
|
||||
HUGEPAGE, shard->opts.hugify_sync);
|
||||
|
||||
malloc_mutex_lock(tsdn, &shard->mtx);
|
||||
shard->stats.nhugifies++;
|
||||
if (err) {
|
||||
/*
|
||||
* When asynchronious hugification is used
|
||||
* (shard->opts.hugify_sync option is false), we are not
|
||||
* expecting to get here, unless something went terrible wrong.
|
||||
* Because underlying syscall is only setting kernel flag for
|
||||
* memory range (actual hugification happens asynchroniously
|
||||
* and we are not getting any feedback about its outcome), we
|
||||
* expect syscall to be successful all the time.
|
||||
*/
|
||||
shard->stats.nhugify_failures++;
|
||||
}
|
||||
|
||||
psset_update_begin(&shard->psset, to_hugify);
|
||||
hpdata_hugify(to_hugify);
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
static void *hpa_hooks_map(size_t size);
|
||||
static void hpa_hooks_unmap(void *ptr, size_t size);
|
||||
static void hpa_hooks_purge(void *ptr, size_t size);
|
||||
static void hpa_hooks_hugify(void *ptr, size_t size);
|
||||
static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
|
||||
static void hpa_hooks_dehugify(void *ptr, size_t size);
|
||||
static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
|
||||
static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
|
||||
|
|
@ -37,10 +37,27 @@ hpa_hooks_purge(void *ptr, size_t size) {
|
|||
pages_purge_forced(ptr, size);
|
||||
}
|
||||
|
||||
static void
|
||||
hpa_hooks_hugify(void *ptr, size_t size) {
|
||||
static bool
|
||||
hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
|
||||
/*
|
||||
* We mark memory range as huge independently on which hugification
|
||||
* technique is used (synchronous or asynchronous) to have correct
|
||||
* VmFlags set for introspection and accounting purposes. If
|
||||
* synchronous hugification is enabled and pages_collapse call fails,
|
||||
* then we hope memory range will be hugified asynchronously by
|
||||
* khugepaged eventually. Right now, 3 out of 4 error return codes of
|
||||
* madvise(..., MADV_COLLAPSE) are retryable. Instead of retrying, we
|
||||
* just fallback to asynchronous khugepaged hugification to simplify
|
||||
* implementation, even if we might know khugepaged fallback will not
|
||||
* be successful (current madvise(..., MADV_COLLAPSE) implementation
|
||||
* hints, when EINVAL is returned it is likely that khugepaged won't be
|
||||
* able to collapse memory range into hugepage either).
|
||||
*/
|
||||
bool err = pages_huge(ptr, size);
|
||||
(void)err;
|
||||
if (sync) {
|
||||
err = pages_collapse(ptr, size);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -1093,6 +1093,15 @@ validate_hpa_settings(void) {
|
|||
if (opt_hpa_opts.dirty_mult != (fxp_t)-1 && validate_hpa_ratios()) {
|
||||
had_conf_error = true;
|
||||
}
|
||||
#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
|
||||
if (opt_hpa_opts.hugify_sync) {
|
||||
had_conf_error = true;
|
||||
malloc_printf(
|
||||
"<jemalloc>: hpa_hugify_sync config option is enabled, "
|
||||
"but MADV_COLLAPSE support was not detected at build "
|
||||
"time.");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1566,6 +1575,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
|||
0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
|
||||
false);
|
||||
|
||||
CONF_HANDLE_BOOL(
|
||||
opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
|
||||
|
||||
CONF_HANDLE_UINT64_T(
|
||||
opt_hpa_opts.min_purge_interval_ms,
|
||||
"hpa_min_purge_interval_ms", 0, 0,
|
||||
|
|
|
|||
24
src/pages.c
24
src/pages.c
|
|
@ -567,6 +567,30 @@ pages_nohuge_unaligned(void *addr, size_t size) {
|
|||
return pages_nohuge_impl(addr, size, false);
|
||||
}
|
||||
|
||||
bool
|
||||
pages_collapse(void *addr, size_t size) {
|
||||
assert(PAGE_ADDR2BASE(addr) == addr);
|
||||
assert(PAGE_CEILING(size) == size);
|
||||
/*
|
||||
* There is one more MADV_COLLAPSE precondition that is not easy to
|
||||
* express with assert statement. In order to madvise(addr, size,
|
||||
* MADV_COLLAPSE) call to be successful, at least one page in the range
|
||||
* must currently be backed by physical memory. In particularly, this
|
||||
* means we can't call pages_collapse on freshly mapped memory region.
|
||||
* See madvise(2) man page for more details.
|
||||
*/
|
||||
#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE) && \
|
||||
(defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
|
||||
# if defined(MADV_COLLAPSE)
|
||||
return (madvise(addr, size, MADV_COLLAPSE) != 0);
|
||||
# elif defined(JEMALLOC_MADV_COLLAPSE)
|
||||
return (madvise(addr, size, JEMALLOC_MADV_COLLAPSE) != 0);
|
||||
# endif
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
pages_dontdump(void *addr, size_t size) {
|
||||
assert(PAGE_ADDR2BASE(addr) == addr);
|
||||
|
|
|
|||
|
|
@ -844,6 +844,7 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
|
|||
uint64_t npurge_passes;
|
||||
uint64_t npurges;
|
||||
uint64_t nhugifies;
|
||||
uint64_t nhugify_failures;
|
||||
uint64_t ndehugifies;
|
||||
|
||||
CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
|
||||
|
|
@ -852,6 +853,8 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
|
|||
i, &npurges, uint64_t);
|
||||
CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
|
||||
i, &nhugifies, uint64_t);
|
||||
CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures",
|
||||
i, &nhugify_failures, uint64_t);
|
||||
CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
|
||||
i, &ndehugifies, uint64_t);
|
||||
|
||||
|
|
@ -860,11 +863,13 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
|
|||
" Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
|
||||
" Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
|
||||
" Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
|
||||
" Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n"
|
||||
" Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
|
||||
"\n",
|
||||
npurge_passes, rate_per_second(npurge_passes, uptime),
|
||||
npurges, rate_per_second(npurges, uptime),
|
||||
nhugifies, rate_per_second(nhugifies, uptime),
|
||||
nhugify_failures, rate_per_second(nhugify_failures, uptime),
|
||||
ndehugifies, rate_per_second(ndehugifies, uptime));
|
||||
|
||||
emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
|
||||
|
|
@ -873,6 +878,8 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
|
|||
&npurges);
|
||||
emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
|
||||
&nhugifies);
|
||||
emitter_json_kv(emitter, "nhugify_failures", emitter_type_uint64,
|
||||
&nhugify_failures);
|
||||
emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
|
||||
&ndehugifies);
|
||||
}
|
||||
|
|
@ -1578,6 +1585,7 @@ stats_general_print(emitter_t *emitter) {
|
|||
OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
|
||||
OPT_WRITE_SIZE_T("hpa_hugification_threshold")
|
||||
OPT_WRITE_UINT64("hpa_hugify_delay_ms")
|
||||
OPT_WRITE_BOOL("hpa_hugify_sync")
|
||||
OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
|
||||
OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
|
||||
if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
|
|||
false,
|
||||
/* hugify_delay_ms */
|
||||
10 * 1000,
|
||||
/* hugify_sync */
|
||||
false,
|
||||
/* min_purge_interval_ms */
|
||||
5 * 1000,
|
||||
/* experimental_max_purge_nhp */
|
||||
|
|
@ -49,6 +51,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
|
|||
true,
|
||||
/* hugify_delay_ms */
|
||||
0,
|
||||
/* hugify_sync */
|
||||
false,
|
||||
/* min_purge_interval_ms */
|
||||
5 * 1000,
|
||||
/* experimental_max_purge_nhp */
|
||||
|
|
@ -371,9 +375,10 @@ defer_test_purge(void *ptr, size_t size) {
|
|||
}
|
||||
|
||||
static size_t ndefer_hugify_calls = 0;
|
||||
static void
|
||||
defer_test_hugify(void *ptr, size_t size) {
|
||||
static bool
|
||||
defer_test_hugify(void *ptr, size_t size, bool sync) {
|
||||
++ndefer_hugify_calls;
|
||||
return false;
|
||||
}
|
||||
|
||||
static size_t ndefer_dehugify_calls = 0;
|
||||
|
|
|
|||
|
|
@ -288,6 +288,7 @@ TEST_BEGIN(test_mallctl_opt) {
|
|||
TEST_MALLCTL_OPT(const char *, dss, always);
|
||||
TEST_MALLCTL_OPT(bool, hpa, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
|
||||
TEST_MALLCTL_OPT(bool, hpa_hugify_sync, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue