HPA: Hugification hysteresis.

We wait a while after deciding a huge extent should get hugified to see if it gets purged before long. This avoids hugifying extents that might shortly get dehugified for purging. Rename and use the hpa_dehugification_threshold option support code for this, since it's now ignored.
2026-05-16 09:56:22 +03:00 · 2021-06-14 14:53:23 -07:00 · 2021-06-14 14:53:23 -07:00 · 6630c59896
commit 6630c59896
parent 113938b6f4
9 changed files with 234 additions and 60 deletions
--- a/src/ctl.c
+++ b/src/ctl.c
@ -96,7 +96,7 @@ CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
-CTL_PROTO(opt_hpa_dehugification_threshold)
+CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@ -406,8 +406,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
-	{NAME("hpa_dehugification_threshold"),
-		CTL(opt_hpa_dehugification_threshold)},
+	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@ -2114,8 +2113,8 @@ CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
    opt_hpa_opts.hugification_threshold, size_t)
-CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
-    opt_hpa_opts.dehugification_threshold, size_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
+
 /*
 * This will have to change before we publicly document this option; fxp_t and
 * its representation are internal implementation details.
--- a/src/hpa.c
+++ b/src/hpa.c
@ -198,7 +198,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
 		hpdata_purge_allowed_set(ps, false);
-		hpdata_hugify_allowed_set(ps, false);
+		hpdata_disallow_hugify(ps);
 		return;
 	}
 	/*
@ -226,7 +226,24 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
-		hpdata_hugify_allowed_set(ps, true);
+		nstime_t now;
+		shard->hooks.curtime(&now);
+		hpdata_allow_hugify(ps, now);
+	}
+	/*
+	 * Once a hugepage has become eligible for hugification, we don't mark
+	 * it as ineligible just because it stops meeting the criteria (this
+	 * could lead to situations where a hugepage that spends most of its
+	 * time meeting the criteria never quite getting hugified if there are
+	 * intervening deallocations).  The idea is that the hugification delay
+	 * will allow them to get purged, reseting their "hugify-allowed" bit.
+	 * If they don't get purged, then the hugification isn't hurting and
+	 * might help.  As an exception, we don't hugify hugepages that are now
+	 * empty; it definitely doesn't help there until the hugepage gets
+	 * reused, which is likely not for a while.
+	 */
+	if (hpdata_nactive_get(ps) == 0) {
+		hpdata_disallow_hugify(ps);
 	}
 }

@ -309,7 +326,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
 	hpdata_purge_allowed_set(to_purge, false);
-	hpdata_hugify_allowed_set(to_purge, false);
+	hpdata_disallow_hugify(to_purge);
 	/*
 	 * Unlike with hugification (where concurrent
 	 * allocations are allowed), concurrent allocation out
@ -386,6 +403,16 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	assert(hpdata_hugify_allowed_get(to_hugify));
 	assert(!hpdata_changing_state_get(to_hugify));

+	/* Make sure that it's been hugifiable for long enough. */
+	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
+	nstime_t nstime;
+	shard->hooks.curtime(&nstime);
+	nstime_subtract(&nstime, &time_hugify_allowed);
+	uint64_t millis = nstime_msec(&nstime);
+	if (millis < shard->opts.hugify_delay_ms) {
+		return false;
+	}
+
 	/*
 	 * Don't let anyone else purge or hugify this page while
 	 * we're hugifying it (allocations and deallocations are
@ -394,7 +421,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_mid_hugify_set(to_hugify, true);
 	hpdata_purge_allowed_set(to_hugify, false);
-	hpdata_hugify_allowed_set(to_hugify, false);
+	hpdata_disallow_hugify(to_hugify);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);

@ -421,9 +448,6 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
    bool forced) {
-	bool hugified;
-	bool purged;
-	size_t nloop = 0;
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
@ -433,16 +457,29 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
 	 * most a small fixed number of operations.
 	 */
-	size_t maxloops = (forced ? (size_t)-1 : 8);
+	bool hugified = false;
+	bool purged = false;
+	size_t max_ops = (forced ? (size_t)-1 : 16);
+	size_t nops = 0;
 	do {
-		hugified = hpa_try_hugify(tsdn, shard);
-		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		/*
+		 * Always purge before hugifying, to make sure we get some
+		 * ability to hit our quiescence targets.
+		 */
 		purged = false;
-		if (hpa_should_purge(tsdn, shard)) {
+		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
 			purged = hpa_try_purge(tsdn, shard);
+			if (purged) {
+				nops++;
+			}
+		}
+		hugified = hpa_try_hugify(tsdn, shard);
+		if (hugified) {
+			nops++;
 		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	} while ((hugified || purged) && nloop++ < maxloops);
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	} while ((hugified || purged) && nops < max_ops);
 }

 static edata_t *
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@ -8,6 +8,7 @@ static void hpa_hooks_unmap(void *ptr, size_t size);
 static void hpa_hooks_purge(void *ptr, size_t size);
 static void hpa_hooks_hugify(void *ptr, size_t size);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
+static void hpa_hooks_curtime(nstime_t *r_nstime);

 hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@ -15,6 +16,7 @@ hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_purge,
 	&hpa_hooks_hugify,
 	&hpa_hooks_dehugify,
+	&hpa_hooks_curtime,
 };

 static void *
@ -44,3 +46,8 @@ hpa_hooks_dehugify(void *ptr, size_t size) {
 	bool err = pages_nohuge(ptr, size);
 	(void)err;
 }
+
+static void
+hpa_hooks_curtime(nstime_t *r_nstime) {
+	nstime_update(r_nstime);
+}
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@ -1145,6 +1145,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
 			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
 			    check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\
+			CONF_HANDLE_T_U(uint64_t, o, n, min, max,	\
+			    check_min, check_max, clip)
 #define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
@ -1441,26 +1444,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}

-			/* And the same for the dehugification_threhsold. */
 			CONF_HANDLE_SIZE_T(
-			    opt_hpa_opts.dehugification_threshold,
-			    "hpa_dehugification_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_dehugification_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				} else {
-					opt_hpa_opts.dehugification_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
+			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
+			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);

 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
--- a/src/stats.c
+++ b/src/stats.c
@ -1376,7 +1376,7 @@ stats_general_print(emitter_t *emitter) {
 	uint64_t u64v;
 	int64_t i64v;
 	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, u32sz, i64sz, ssz, sssz, cpsz;
+	size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;

 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
@ -1385,6 +1385,7 @@ stats_general_print(emitter_t *emitter) {
 	cpsz = sizeof(const char *);
 	u32sz = sizeof(uint32_t);
 	i64sz = sizeof(int64_t);
+	u64sz = sizeof(uint64_t);

 	CTL_GET("version", &cpv, const char *);
 	emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
@ -1442,6 +1443,8 @@ stats_general_print(emitter_t *emitter) {

 #define OPT_WRITE_INT64(name)						\
 	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+#define OPT_WRITE_UINT64(name)						\
+	OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)

 #define OPT_WRITE_SIZE_T(name)						\
 	OPT_WRITE(name, sv, ssz, emitter_type_size)
@ -1468,7 +1471,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
-	OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
+	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*