From c0e2a93e4d2caac4f1ed877ded9e28f33d962f1e Mon Sep 17 00:00:00 2001
From: Tony Printezis <printezis@fb.com>
Date: Mon, 2 Mar 2026 11:11:09 -0800
Subject: [PATCH] Remove hpa_sec_batch_fill_extra and calculate nallocs
 dynamically.

---
 include/jemalloc/internal/hpdata.h   |  21 +++++
 include/jemalloc/internal/sec.h      |  13 +++
 include/jemalloc/internal/sec_opts.h |   9 +--
 include/jemalloc/internal/util.h     |   8 ++
 src/conf.c                           |   3 -
 src/ctl.c                            |   4 -
 src/hpa.c                            | 111 +++++++++++++-------------
 src/hpdata.c                         | 114 +++++++++++++++++++++++++++
 src/jemalloc.c                       |  15 ++--
 src/sec.c                            |  24 ++++++
 src/stats.c                          |   1 -
 test/unit/hpa_sec_integration.c      |  40 ++++++----
 test/unit/mallctl.c                  |   1 -
 test/unit/sec.c                      |   2 -
 14 files changed, 268 insertions(+), 98 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index a9c507f0..2eb0d73f 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -432,6 +432,27 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge);
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void  hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
 
+typedef struct hpdata_alloc_offset_s hpdata_alloc_offset_t;
+struct hpdata_alloc_offset_s {
+	size_t index;
+	size_t len;
+};
+
+/*
+ * Given an hpdata which can serve an allocation request of size sz,
+ * find between one and max_nallocs offsets that can satisfy such
+ * an allocation request and buffer them in offsets (without actually
+ * reserving any space or updating hpdata). Return the number
+ * of offsets discovered.
+ */
+size_t hpdata_find_alloc_offsets(hpdata_t *hpdata, size_t sz,
+    hpdata_alloc_offset_t *offsets, size_t max_nallocs);
+/* Reserve the allocation for the given offset. */
+void *hpdata_reserve_alloc_offset(
+    hpdata_t *hpdata, size_t sz, hpdata_alloc_offset_t *offset);
+void hpdata_post_reserve_alloc_offsets(
+    hpdata_t *hpdata, hpdata_alloc_offset_t *offsets, size_t nallocs);
+
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
  * subranges of a hugepage while holding a lock, drop the lock during the actual
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index cc458b9d..1c39c6f8 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -96,6 +96,19 @@ sec_size_supported(sec_t *sec, size_t size) {
 	return sec_is_used(sec) && size <= sec->opts.max_alloc;
 }
 
+/* Max number of extends we would allocate out of a single huge page. */
+#define MAX_SEC_NALLOCS 4
+
+/* Fill the SEC up to max_bytes / MAX_BYTES_DIV */
+#define MAX_BYTES_DIV 4
+
+/*
+ * Calculate the number of extends we will try to allocate out of
+ * a single huge page for a given allocation size. The result will be
+ * in the range [1, MAX_SEC_NALLOCS].
+ */
+size_t sec_calc_nallocs_for_size(sec_t *sec, size_t size);
+
 /* If sec does not have extent available, it will return NULL. */
 edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size);
 void     sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index 039d423c..90907165 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -27,16 +27,9 @@ struct sec_opts_s {
 	 * until we are 1/4 below max_bytes.
 	 */
 	size_t max_bytes;
-	/*
-	 * When we can't satisfy an allocation out of the SEC because there are
-	 * no available ones cached, allocator will allocate a batch with extra
-	 * batch_fill_extra extents of the same size.
-	 */
-	size_t batch_fill_extra;
 };
 
 #define SEC_OPTS_NSHARDS_DEFAULT 2
-#define SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT 3
 #define SEC_OPTS_MAX_ALLOC_DEFAULT ((32 * 1024) < PAGE ? PAGE : (32 * 1024))
 #define SEC_OPTS_MAX_BYTES_DEFAULT                                             \
 	((256 * 1024) < (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                       \
@@ -45,6 +38,6 @@ struct sec_opts_s {
 
 #define SEC_OPTS_DEFAULT                                                       \
 	{SEC_OPTS_NSHARDS_DEFAULT, SEC_OPTS_MAX_ALLOC_DEFAULT,                 \
-	    SEC_OPTS_MAX_BYTES_DEFAULT, SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT}
+	    SEC_OPTS_MAX_BYTES_DEFAULT}
 
 #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index ecfa76b8..f6199cd6 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -6,6 +6,13 @@
 
 #define UTIL_INLINE static inline
 
+#ifndef MIN
+#	define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+#ifndef MAX
+#	define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
 /* Junk fill patterns. */
 #ifndef JEMALLOC_ALLOC_JUNK
 #	define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5)
@@ -153,4 +160,5 @@ util_prefetch_write_range(void *ptr, size_t sz) {
  */
 bool multi_setting_parse_next(const char **setting_segment_cur,
     size_t *len_left, size_t *key_start, size_t *key_end, size_t *value);
+
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/src/conf.c b/src/conf.c
index 65abcd25..fedb2ede 100644
--- a/src/conf.c
+++ b/src/conf.c
@@ -952,9 +952,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
 			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
diff --git a/src/ctl.c b/src/ctl.c
index 0b72086c..e03dca4f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -115,7 +115,6 @@ CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
-CTL_PROTO(opt_hpa_sec_batch_fill_extra)
 CTL_PROTO(opt_huge_arena_pac_thp)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
@@ -487,7 +486,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
     {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
     {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
-    {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)},
     {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
     {NAME("metadata_thp"), CTL(opt_metadata_thp)},
     {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
@@ -2173,8 +2171,6 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
-CTL_RO_NL_GEN(
-    opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t)
 CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
 CTL_RO_NL_GEN(
     opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
diff --git a/src/hpa.c b/src/hpa.c
index 7e5b5f72..2dd15362 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -651,37 +651,18 @@ hpa_shard_maybe_do_deferred_work(
 }
 
 static edata_t *
-hpa_try_alloc_one_no_grow(
-    tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    hpdata_t *ps, hpdata_alloc_offset_t *alloc_offset, bool *oom) {
+	assert(*oom == false);
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
-	bool     err;
 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
 		*oom = true;
 		return NULL;
 	}
 
-	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
-	if (ps == NULL) {
-		edata_cache_fast_put(tsdn, &shard->ecf, edata);
-		return NULL;
-	}
-
-	psset_update_begin(&shard->psset, ps);
-
-	if (hpdata_empty(ps)) {
-		/*
-		 * If the pageslab used to be empty, treat it as though it's
-		 * brand new for fragmentation-avoidance purposes; what we're
-		 * trying to approximate is the age of the allocations *in* that
-		 * pageslab, and the allocations in the new pageslab are by
-		 * definition the youngest in this hpa shard.
-		 */
-		hpdata_age_set(ps, shard->age_counter++);
-	}
-
-	void *addr = hpdata_reserve_alloc(ps, size);
+	void *addr = hpdata_reserve_alloc_offset(ps, size, alloc_offset);
 	JE_USDT(hpa_alloc, 5, shard->ind, addr, size, hpdata_nactive_get(ps),
 	    hpdata_age_get(ps));
 	edata_init(edata, shard->ind, addr, size, /* slab */ false, SC_NSIZES,
@@ -693,12 +674,12 @@ hpa_try_alloc_one_no_grow(
 	/*
 	 * This could theoretically be moved outside of the critical section,
 	 * but that introduces the potential for a race.  Without the lock, the
-	 * (initially nonempty, since this is the reuse pathway) pageslab we
+   	 * (initially nonempty, since this is the reuse pathway) pageslab we
 	 * allocated out of could become otherwise empty while the lock is
 	 * dropped.  This would force us to deal with a pageslab eviction down
 	 * the error pathway, which is a pain.
 	 */
-	err = emap_register_boundary(
+	const bool err = emap_register_boundary(
 	    tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (err) {
 		hpdata_unreserve(
@@ -715,32 +696,61 @@ hpa_try_alloc_one_no_grow(
 		 * principle that we didn't *really* affect shard state (we
 		 * tweaked the stats, but our tweaks weren't really accurate).
 		 */
-		psset_update_end(&shard->psset, ps);
 		edata_cache_fast_put(tsdn, &shard->ecf, edata);
 		*oom = true;
 		return NULL;
 	}
 
-	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
-	psset_update_end(&shard->psset, ps);
 	return edata;
 }
 
 static size_t
 hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard,
-    size_t size, bool *oom, size_t nallocs, edata_list_active_t *results,
+    size_t size, bool *oom, edata_list_active_t *results,
     bool *deferred_work_generated) {
+	assert(size <= HUGEPAGE);
+	assert(size <= shard->opts.slab_max_alloc || size == sz_s2u(size));
+	assert(*oom == false);
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
+	if (ps == NULL) {
+		return 0;
+	}
+
+	hpdata_alloc_offset_t alloc_offsets[MAX_SEC_NALLOCS];
+	const size_t max_nallocs = sec_calc_nallocs_for_size(&shard->sec, size);
+	const size_t nallocs = hpdata_find_alloc_offsets(
+	    ps, size, alloc_offsets, max_nallocs);
+
+	psset_update_begin(&shard->psset, ps);
+
+	if (hpdata_empty(ps)) {
+		/*
+ 		 * If the pageslab used to be empty, treat it as though it's
+		 * brand new for fragmentation-avoidance purposes; what we're
+		 * trying to approximate is the age of the allocations *in* that
+		 * pageslab, and the allocations in the new pageslab are by
+		 * definition the youngest in this hpa shard.
+		 */
+		hpdata_age_set(ps, shard->age_counter++);
+	}
+
 	size_t nsuccess = 0;
-	for (; nsuccess < nallocs; nsuccess++) {
+	for (; nsuccess < nallocs; nsuccess += 1) {
 		edata_t *edata = hpa_try_alloc_one_no_grow(
-		    tsdn, shard, size, oom);
+		    tsdn, shard, size, ps, (alloc_offsets + nsuccess), oom);
 		if (edata == NULL) {
 			break;
 		}
+
 		edata_list_active_append(results, edata);
 	}
 
+	hpdata_post_reserve_alloc_offsets(ps, alloc_offsets, nsuccess);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
+	psset_update_end(&shard->psset, ps);
+
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
 	return nsuccess;
@@ -748,27 +758,22 @@ hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard,
 
 static size_t
 hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results,
-    bool *deferred_work_generated) {
+    bool *oom, edata_list_active_t *results, bool *deferred_work_generated) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	size_t nsuccess = hpa_try_alloc_batch_no_grow_locked(
-	    tsdn, shard, size, oom, nallocs, results, deferred_work_generated);
+	const size_t nsuccess = hpa_try_alloc_batch_no_grow_locked(
+	    tsdn, shard, size, oom, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
 
 static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    size_t nallocs, edata_list_active_t *results,
-    bool *deferred_work_generated) {
-	assert(size <= HUGEPAGE);
-	assert(size <= shard->opts.slab_max_alloc || size == sz_s2u(size));
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(
-	    tsdn, shard, size, &oom, nallocs, results, deferred_work_generated);
-
-	if (nsuccess == nallocs || oom) {
+	    tsdn, shard, size, &oom, results, deferred_work_generated);
+	if (0 < nsuccess || oom) {
 		return nsuccess;
 	}
 
@@ -777,13 +782,14 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * try to grow.
 	 */
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+
 	/*
 	 * Check for grow races; maybe some earlier thread expanded the psset
 	 * in between when we dropped the main mutex and grabbed the grow mutex.
 	 */
-	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results, deferred_work_generated);
-	if (nsuccess == nallocs || oom) {
+	nsuccess = hpa_try_alloc_batch_no_grow(
+	    tsdn, shard, size, &oom, results, deferred_work_generated);
+	if (0 < nsuccess || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
 	}
@@ -797,7 +803,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	    shard->age_counter++, hpa_is_hugify_eager(shard), &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return nsuccess;
+		return 0;
 	}
 
 	/*
@@ -807,14 +813,10 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 */
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_insert(&shard->psset, ps);
-	nsuccess += hpa_try_alloc_batch_no_grow_locked(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results, deferred_work_generated);
+	nsuccess = hpa_try_alloc_batch_no_grow_locked(
+	    tsdn, shard, size, &oom, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	/*
-	 * Drop grow_mtx before doing deferred work; other threads blocked on it
-	 * should be allowed to proceed while we're working.
-	 */
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 
 	return nsuccess;
@@ -886,13 +888,10 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (edata != NULL) {
 		return edata;
 	}
-	size_t              nallocs = sec_size_supported(&shard->sec, size)
-	                 ? shard->sec.opts.batch_fill_extra + 1
-	                 : 1;
 	edata_list_active_t results;
 	edata_list_active_init(&results);
 	size_t nsuccess = hpa_alloc_batch_psset(
-	    tsdn, shard, size, nallocs, &results, deferred_work_generated);
+	    tsdn, shard, size, &results, deferred_work_generated);
 	hpa_assert_results(tsdn, shard, &results);
 	edata = edata_list_active_first(&results);
 
diff --git a/src/hpdata.c b/src/hpdata.c
index e17d9ecf..ee60e984 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -170,6 +170,120 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
 }
 
+size_t
+hpdata_find_alloc_offsets(hpdata_t *hpdata, size_t sz,
+    hpdata_alloc_offset_t *offsets, size_t max_nallocs) {
+	hpdata_assert_consistent(hpdata);
+	assert((sz & PAGE_MASK) == 0);
+	assert(1 <= max_nallocs);
+	const size_t npages = sz >> LG_PAGE;
+	/* We should be able to find at least one allocation */
+	assert(npages <= hpdata_longest_free_range_get(hpdata));
+
+	bool   found = false;
+	size_t nallocs = 0;
+	size_t start = 0;
+	/*
+	 * These are dead stores, but the compiler will issue warnings on them
+	 * since it can't tell statically that found is always true below.
+	 */
+	size_t begin = 0;
+	size_t len = 0;
+
+	while (true) {
+		found = found
+		    || fb_urange_iter(hpdata->active_pages, HUGEPAGE_PAGES,
+		        start, &begin, &len);
+		if (!found) {
+			/* we should have found at least one */
+			assert(0 < nallocs);
+			break;
+		}
+
+		if (npages <= len) {
+			offsets->len = len;
+			offsets->index = begin;
+			offsets += 1;
+			nallocs += 1;
+
+			if (nallocs == max_nallocs) {
+				break;
+			}
+
+			begin += npages;
+			len -= npages;
+		} else {
+			found = false;
+			start = begin + len;
+			assert(start <= HUGEPAGE_PAGES);
+			if (start == HUGEPAGE_PAGES) {
+				break;
+			}
+		}
+	}
+
+	/* post-conditions */
+	assert(1 <= nallocs);
+	assert(nallocs <= max_nallocs);
+
+	return nallocs;
+}
+
+void *
+hpdata_reserve_alloc_offset(
+    hpdata_t *hpdata, size_t sz, hpdata_alloc_offset_t *offset) {
+	/*
+	 * This is a metadata change; the hpdata should therefore either not be
+	 * in the psset, or should have explicitly marked itself as being
+	 * mid-update.
+	 */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
+	assert(hpdata->h_alloc_allowed);
+	assert((sz & PAGE_MASK) == 0);
+	const size_t npages = sz >> LG_PAGE;
+	const size_t index = offset->index;
+
+	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, index, npages);
+	hpdata->h_nactive += npages;
+
+	/*
+	 * We might be about to dirty some memory for the first time; update our
+	 * count if so.
+	 */
+	size_t new_dirty = fb_ucount(
+	    hpdata->touched_pages, HUGEPAGE_PAGES, index, npages);
+	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, index, npages);
+	hpdata->h_ntouched += new_dirty;
+
+	return (void *)((byte_t *)hpdata_addr_get(hpdata) + (index << LG_PAGE));
+}
+
+void
+hpdata_post_reserve_alloc_offsets(
+    hpdata_t *hpdata, hpdata_alloc_offset_t *offsets, size_t nallocs) {
+	if (nallocs == 0) {
+		return;
+	}
+
+	size_t max_len = offsets[0].len;
+	for (size_t i = 1; i < nallocs; i += 1) {
+		const size_t len = offsets[i].len;
+		max_len = MAX(max_len, len);
+	}
+
+	/*
+	 * If we allocated out of a range that was the longest in the hpdata, it
+	 * might be the only one of that size and we'll have to adjust the
+	 * metadata.
+	 */
+	assert(max_len <= hpdata_longest_free_range_get(hpdata));
+	if (max_len == hpdata_longest_free_range_get(hpdata)) {
+		const size_t longest_unchosen_range = fb_urange_longest(
+		    hpdata->active_pages, HUGEPAGE_PAGES);
+		hpdata_longest_free_range_set(hpdata, longest_unchosen_range);
+	}
+}
+
 size_t
 hpdata_purge_begin(
     hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8d341ba3..5cfdca1f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -161,13 +161,13 @@ void (*JET_MUTABLE junk_free_callback)(
     void *ptr, size_t size) = &default_junk_free;
 void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 
-bool         opt_utrace = false;
-bool         opt_xmalloc = false;
-bool         opt_experimental_infallible_new = false;
-bool         opt_experimental_tcache_gc = true;
-bool         opt_zero = false;
-unsigned     opt_narenas = 0;
-fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
+bool     opt_utrace = false;
+bool     opt_xmalloc = false;
+bool     opt_experimental_infallible_new = false;
+bool     opt_experimental_tcache_gc = true;
+bool     opt_zero = false;
+unsigned opt_narenas = 0;
+fxp_t    opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned ncpus;
 
@@ -293,7 +293,6 @@ typedef struct {
 #	define UTRACE(a, b, c)
 #endif
 
-
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
diff --git a/src/sec.c b/src/sec.c
index a3254537..235c9f68 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -90,6 +90,30 @@ sec_bin_pick(sec_t *sec, uint8_t shard, pszind_t pszind) {
 	return &sec->bins[ind];
 }
 
+size_t
+sec_calc_nallocs_for_size(sec_t *sec, size_t size) {
+	size_t res = 1;
+
+	if (sec_size_supported(sec, size)) {
+		/*
+		 * This attempts to fill up to 1/MAX_BYTES_DIV of the SEC.
+		 * If we go much over that, we might cause purging.
+		 * This is mainly an issue when max_bytes is small (256K)
+		 * and size is large. For larger max_bytes, we will
+		 * almost always end up with MAX_SEC_NALLOCS.
+		 */
+		res = sec->opts.max_bytes / size / MAX_BYTES_DIV;
+		res = MAX(res, 1);
+		res = MIN(res, MAX_SEC_NALLOCS);
+	}
+
+	/* post-conditions */
+	assert(1 <= res);
+	assert(res <= MAX_SEC_NALLOCS);
+
+	return res;
+}
+
 static edata_t *
 sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) {
 	malloc_mutex_assert_owner(tsdn, &bin->mtx);
diff --git a/src/stats.c b/src/stats.c
index 82458fec..ce79cb20 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1673,7 +1673,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
-	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_BOOL("huge_arena_pac_thp")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_INT64("mutex_max_spin")
diff --git a/test/unit/hpa_sec_integration.c b/test/unit/hpa_sec_integration.c
index c54cdc0c..ea23e750 100644
--- a/test/unit/hpa_sec_integration.c
+++ b/test/unit/hpa_sec_integration.c
@@ -161,26 +161,27 @@ TEST_BEGIN(test_hpa_sec) {
 	sec_opts.nshards = 1;
 	sec_opts.max_alloc = 2 * PAGE;
 	sec_opts.max_bytes = NALLOCS * PAGE;
-	sec_opts.batch_fill_extra = 4;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts, &sec_opts);
 	bool         deferred_work_generated = false;
 	tsdn_t      *tsdn = tsd_tsdn(tsd_fetch());
+	const size_t target_nallocs = sec_calc_nallocs_for_size(
+	    &shard->sec, PAGE);
 
-	/* alloc 1 PAGE, confirm sec has fill_extra bytes. */
+	/* Alloc 1 PAGE, confirm sec has the expected extra pages. */
 	edata_t *edata1 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
 	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata1, "Unexpected null edata");
 	hpa_shard_stats_t hpa_stats;
 	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
 	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
-	expect_zu_eq(hpa_stats.psset_stats.merged.nactive,
-	    1 + sec_opts.batch_fill_extra, "");
-	expect_zu_eq(hpa_stats.secstats.bytes, PAGE * sec_opts.batch_fill_extra,
-	    "sec should have fill extra pages");
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, target_nallocs, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, (target_nallocs - 1) * PAGE,
+	    "sec should have extra pages");
 
 	/* Alloc/dealloc NALLOCS times and confirm extents are in sec. */
 	edata_t *edatas[NALLOCS];
+	size_t   expected_nactive = NALLOCS + target_nallocs;
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
 		    false, false, &deferred_work_generated);
@@ -188,8 +189,10 @@ TEST_BEGIN(test_hpa_sec) {
 	}
 	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
 	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
-	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, 2 + NALLOCS, "");
-	expect_zu_eq(hpa_stats.secstats.bytes, PAGE, "2 refills (at 0 and 4)");
+	expect_zu_eq(
+	    hpa_stats.psset_stats.merged.nactive, expected_nactive, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, (target_nallocs - 1) * PAGE,
+	    "multiple refills (every target_nallocs allocations)");
 
 	for (int i = 0; i < NALLOCS - 1; i++) {
 		pai_dalloc(
@@ -197,17 +200,22 @@ TEST_BEGIN(test_hpa_sec) {
 	}
 	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
 	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
-	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (2 + NALLOCS), "");
+	expect_zu_eq(
+	    hpa_stats.psset_stats.merged.nactive, expected_nactive, "");
 	expect_zu_eq(
 	    hpa_stats.secstats.bytes, sec_opts.max_bytes, "sec should be full");
 
-	/* this one should flush 1 + 0.25 * 8 = 3 extents */
+	/* this one should flush 1 + 0.25 * NALLOCS extents */
+	const size_t flushed_extends = 1 + NALLOCS / 4;
+	const size_t expected_native_minus_flushed = expected_nactive
+	    - flushed_extends;
 	pai_dalloc(
 	    tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
 	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
 	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
-	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (NALLOCS - 1), "");
-	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive,
+	    expected_native_minus_flushed, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, flushed_extends, "");
 	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes,
 	    "sec should be full");
 
@@ -217,7 +225,8 @@ TEST_BEGIN(test_hpa_sec) {
 	expect_ptr_not_null(edata2, "Unexpected null edata");
 	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
 	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
-	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive,
+	    expected_native_minus_flushed, "");
 	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes - PAGE,
 	    "sec should have max_bytes minus one page that just came from it");
 
@@ -225,8 +234,9 @@ TEST_BEGIN(test_hpa_sec) {
 	pai_dalloc(tsdn, &shard->pai, edata2, &deferred_work_generated);
 	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
 	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
-	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
-	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive,
+	    expected_native_minus_flushed, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, flushed_extends, "");
 	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes, "");
 
 	destroy_test_data(shard);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 11710c27..fc2e1350 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -313,7 +313,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
-	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
 	TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 2a6a00ce..8caf0a6a 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -69,7 +69,6 @@ TEST_BEGIN(test_sec_fill) {
 	opts.nshards = 1;
 	opts.max_alloc = 2 * PAGE;
 	opts.max_bytes = 4 * PAGE;
-	opts.batch_fill_extra = 2;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	test_data_init(tsdn, &tdata, &opts);
@@ -114,7 +113,6 @@ TEST_BEGIN(test_sec_alloc) {
 	opts.nshards = 1;
 	opts.max_alloc = 2 * PAGE;
 	opts.max_bytes = 4 * PAGE;
-	opts.batch_fill_extra = 1;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	test_data_init(tsdn, &tdata, &opts);