HPA: Introduce a redesigned hpa_central_t.

For now, this only handles allocating virtual address space to shards, with no reuse. This is framework, though; it will change over time.
2026-07-08 23:07:20 +03:00 · 2021-05-07 13:54:26 -07:00 · 2021-05-07 13:54:26 -07:00 · d93eef2f40
commit d93eef2f40
parent e09eac1d4e
10 changed files with 257 additions and 128 deletions
--- a/src/arena.c
+++ b/src/arena.c
@ -36,6 +36,7 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;

 emap_t arena_emap_global;
+pa_central_t arena_pa_central_global;

 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
@ -1541,9 +1542,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {

 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
-	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_emap_global, base, ind,
-	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
-	    &cur_time, oversize_threshold, arena_dirty_decay_ms_default_get(),
+	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_pa_central_global,
+	    &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
+	    LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
+	    arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
@ -1575,7 +1577,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
-		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
@ -1664,8 +1666,8 @@ arena_is_huge(unsigned arena_ind) {
 	return (arena_ind == huge_arena_ind);
 }

-void
-arena_boot(sc_data_t *sc_data) {
+bool
+arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
 	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
 	for (unsigned i = 0; i < SC_NBINS; i++) {
@ -1680,6 +1682,8 @@ arena_boot(sc_data_t *sc_data) {
 		nbins_total += bin_infos[i].n_shards;
 		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
 	}
+	return pa_central_init(&arena_pa_central_global, base, hpa,
+	    &hpa_hooks_default);
 }

 void
--- a/src/hpa.c
+++ b/src/hpa.c
@ -51,9 +51,125 @@ hpa_supported() {
 }

 bool
-hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind,
-    const hpa_hooks_t *hooks, const hpa_shard_opts_t *opts) {
+hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
+	bool err;
+	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
+	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	err = malloc_mutex_init(&central->mtx, "hpa_central",
+	    WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	central->base = base;
+	central->eden = NULL;
+	central->eden_len = 0;
+	central->age_counter = 0;
+	central->hooks = *hooks;
+	return false;
+}
+
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
+	return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t),
+	    CACHELINE);
+}
+
+hpdata_t *
+hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    bool *oom) {
+	/* Don't yet support big allocations; these should get filtered out. */
+	assert(size <= HUGEPAGE);
+	/*
+	 * Should only try to extract from the central allocator if the local
+	 * shard is exhausted.  We should hold the grow_mtx on that shard.
+	 */
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
+
+	malloc_mutex_lock(tsdn, &central->grow_mtx);
+	*oom = false;
+
+	hpdata_t *ps = NULL;
+
+	/* Is eden a perfect fit? */
+	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		hpdata_init(ps, central->eden, central->age_counter++);
+		central->eden = NULL;
+		central->eden_len = 0;
+		malloc_mutex_unlock(tsdn, &central->grow_mtx);
+		return ps;
+	}
+
+	/*
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
+	 */
+	if (central->eden == NULL) {
+		/*
+		 * During development, we're primarily concerned with systems
+		 * with overcommit.  Eventually, we should be more careful here.
+		 */
+		bool commit = true;
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
+		    &commit);
+		if (new_eden == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		central->eden = new_eden;
+		central->eden_len = HPA_EDEN_SIZE;
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+	}
+	assert(ps != NULL);
+	assert(central->eden != NULL);
+	assert(central->eden_len > HUGEPAGE);
+	assert(central->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
+
+	hpdata_init(ps, central->eden, central->age_counter++);
+
+	char *eden_char = (char *)central->eden;
+	eden_char += HUGEPAGE;
+	central->eden = (void *)eden_char;
+	central->eden_len -= HUGEPAGE;
+
+	malloc_mutex_unlock(tsdn, &central->grow_mtx);
+
+	return ps;
+}
+
+bool
+hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
+    base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@ -69,13 +185,11 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	}

 	assert(edata_cache != NULL);
+	shard->central = central;
 	shard->base = base;
-	shard->hooks = *hooks;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->age_counter = 0;
-	shard->eden = NULL;
-	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;

@ -136,12 +250,6 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }

-static hpdata_t *
-hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
-	return (hpdata_t *)base_alloc(tsdn, shard->base, sizeof(hpdata_t),
-	    CACHELINE);
-}
-
 static bool
 hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
@ -227,7 +335,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
-		shard->hooks.curtime(&now);
+		shard->central->hooks.curtime(&now);
 		hpdata_allow_hugify(ps, now);
 	}
 	/*
@ -247,64 +355,6 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	}
 }

-static hpdata_t *
-hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	hpdata_t *ps = NULL;
-
-	/* Is eden a perfect fit? */
-	if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			return NULL;
-		}
-		hpdata_init(ps, shard->eden, shard->age_counter++);
-		shard->eden = NULL;
-		shard->eden_len = 0;
-		return ps;
-	}
-
-	/*
-	 * We're about to try to allocate from eden by splitting.  If eden is
-	 * NULL, we have to allocate it too.  Otherwise, we just have to
-	 * allocate an edata_t for the new psset.
-	 */
-	if (shard->eden == NULL) {
-		/* Allocate address space, bailing if we fail. */
-		void *new_eden = shard->hooks.map(HPA_EDEN_SIZE);
-		if (new_eden == NULL) {
-			return NULL;
-		}
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			shard->hooks.unmap(new_eden, HPA_EDEN_SIZE);
-			return NULL;
-		}
-		shard->eden = new_eden;
-		shard->eden_len = HPA_EDEN_SIZE;
-	} else {
-		/* Eden is already nonempty; only need an edata for ps. */
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			return NULL;
-		}
-	}
-	assert(ps != NULL);
-	assert(shard->eden != NULL);
-	assert(shard->eden_len > HUGEPAGE);
-	assert(shard->eden_len % HUGEPAGE == 0);
-	assert(HUGEPAGE_ADDR2BASE(shard->eden) == shard->eden);
-
-	hpdata_init(ps, shard->eden, shard->age_counter++);
-
-	char *eden_char = (char *)shard->eden;
-	eden_char += HUGEPAGE;
-	shard->eden = (void *)eden_char;
-	shard->eden_len -= HUGEPAGE;
-
-	return ps;
-}
-
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@ -348,7 +398,8 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {

 	/* Actually do the purging, now that the lock is dropped. */
 	if (dehugify) {
-		shard->hooks.dehugify(hpdata_addr_get(to_purge), HUGEPAGE);
+		shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
+		    HUGEPAGE);
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
@ -359,7 +410,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
 		purges_this_pass++;
-		shard->hooks.purge(purge_addr, purge_size);
+		shard->central->hooks.purge(purge_addr, purge_size);
 	}

 	malloc_mutex_lock(tsdn, &shard->mtx);
@ -406,7 +457,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Make sure that it's been hugifiable for long enough. */
 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
 	nstime_t nstime;
-	shard->hooks.curtime(&nstime);
+	shard->central->hooks.curtime(&nstime);
 	nstime_subtract(&nstime, &time_hugify_allowed);
 	uint64_t millis = nstime_msec(&nstime);
 	if (millis < shard->opts.hugify_delay_ms) {
@ -427,7 +478,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {

 	malloc_mutex_unlock(tsdn, &shard->mtx);

-	shard->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
+	shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);

 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
@ -604,7 +655,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_grow(tsdn, shard);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@ -833,7 +884,7 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		/* There should be no allocations anywhere. */
 		assert(hpdata_empty(ps));
 		psset_remove(&shard->psset, ps);
-		shard->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
+		shard->central->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }

--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@ -1747,7 +1747,19 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
-	arena_boot(&sc_data);
+	if (opt_hpa && !hpa_supported()) {
+		malloc_printf("<jemalloc>: HPA not supported in the current "
+		    "configuration; %s.",
+		    opt_abort_conf ? "aborting" : "disabling");
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		} else {
+			opt_hpa = false;
+		}
+	}
+	if (arena_boot(&sc_data, b0get(), opt_hpa)) {
+		return true;
+	}
 	if (tcache_boot(TSDN_NULL, b0get())) {
 		return true;
 	}
@ -1786,7 +1798,7 @@ malloc_init_hard_a0_locked() {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
--- a/src/pa.c
+++ b/src/pa.c
@ -15,10 +15,23 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 }

 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
-    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms) {
+pa_central_init(pa_central_t *central, base_t *base, bool hpa,
+    hpa_hooks_t *hpa_hooks) {
+	bool err;
+	if (hpa) {
+		err = hpa_central_init(&central->hpa, base, hpa_hooks);
+		if (err) {
+			return true;
+		}
+	}
+	return false;
+}
+
+bool
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
+    emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
+    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	if (edata_cache_init(&shard->edata_cache, base)) {
@ -42,6 +55,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));

+	shard->central = central;
 	shard->emap = emap;
 	shard->base = base;

@ -50,10 +64,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,

 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts) {
-	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
-	    &shard->edata_cache, shard->ind, hpa_hooks, hpa_opts)) {
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
+	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
+	    shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,