diff --git a/Makefile.in b/Makefile.in
index 1914fc28..b4102d0b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -137,6 +137,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
+	$(srcroot)src/peak_demand.c \
 	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
@@ -252,6 +253,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/peak.c \
+	$(srcroot)test/unit/peak_demand.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index d788d051..a384d04a 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/peak_demand.h"
 #include "jemalloc/internal/psset.h"
 
 typedef struct hpa_central_s hpa_central_t;
@@ -147,6 +148,9 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
+
+	/* Peak active memory sliding window statistics. */
+	peak_demand_t peak_demand;
 };
 
 bool hpa_hugepage_size_exceeds_limit();
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 42246172..816bb577 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -27,7 +27,8 @@ struct hpa_shard_opts_s {
 
 	/*
 	 * The HPA purges whenever the number of pages exceeds dirty_mult *
-	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
+	 * peak_active_pages.  This may be set to (fxp_t)-1 to disable
+	 * purging.
 	 */
 	fxp_t dirty_mult;
 
@@ -59,6 +60,13 @@ struct hpa_shard_opts_s {
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
 	ssize_t experimental_max_purge_nhp;
+
+	/*
+	 * Sliding window duration to track active memory demand statistics.
+	 * This might be set to 0, to disable sliding window statistics
+	 * tracking and use current number of active pages for purging instead.
+	 */
+	uint64_t peak_demand_window_ms;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -83,7 +91,9 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
-	-1								\
+	-1,								\
+	/* peak_demand_window_ms */					\
+	0								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/peak_demand.h b/include/jemalloc/internal/peak_demand.h
new file mode 100644
index 00000000..2664cbec
--- /dev/null
+++ b/include/jemalloc/internal/peak_demand.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_DEMAND_H
+#define JEMALLOC_INTERNAL_PEAK_DEMAND_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+/*
+ * Implementation of peak active memory demand tracking.
+ *
+ * Inspired by "Beyond malloc efficiency to fleet efficiency: a hugepage-aware
+ * memory allocator" whitepaper.
+ * https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
+ *
+ * End goal is to track peak active memory usage over specified time interval.
+ * We do so by dividing this time interval into disjoint subintervals and
+ * storing value of maximum memory usage for each subinterval in a circular
+ * buffer.  Nanoseconds resolution timestamp uniquely maps into epoch, which is
+ * used as an index to access circular buffer.
+ */
+
+#define PEAK_DEMAND_LG_BUCKETS 4
+/*
+ * Number of buckets should be power of 2 to ensure modulo operation is
+ * optimized to bit masking by the compiler.
+ */
+#define PEAK_DEMAND_NBUCKETS (1 << PEAK_DEMAND_LG_BUCKETS)
+
+typedef struct peak_demand_s peak_demand_t;
+struct peak_demand_s {
+	/*
+	 * Absolute value of current epoch, monotonically increases over time.  Epoch
+	 * value modulo number of buckets used as an index to access nactive_max
+	 * array.
+	 */
+	uint64_t epoch;
+
+	/* How many nanoseconds each epoch approximately takes. */
+	uint64_t epoch_interval_ns;
+
+	/*
+	 * Circular buffer to track maximum number of active pages for each
+	 * epoch.
+	 */
+	size_t nactive_max[PEAK_DEMAND_NBUCKETS];
+};
+
+void peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms);
+
+/* Updates peak demand statistics with current number of active pages. */
+void peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
+    size_t nactive);
+
+/* Returns maximum number of active pages in sliding window. */
+size_t peak_demand_nactive_max(peak_demand_t *peak_demand);
+
+#endif /* JEMALLOC_INTERNAL_PEAK_DEMAND_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index c43b30b1..97a95fbf 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a195f6b3..8529438c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index cd16005d..eace48ba 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 2d8c4be6..98085cfd 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index c55d9719..2c941ae8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -106,6 +106,7 @@ CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
+CTL_PROTO(opt_hpa_peak_demand_window_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -487,6 +488,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
+	{NAME("hpa_peak_demand_window_ms"),
+	    CTL(opt_hpa_peak_demand_window_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2255,6 +2258,8 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
+CTL_RO_NL_GEN(opt_hpa_peak_demand_window_ms,
+    opt_hpa_opts.peak_demand_window_ms, uint64_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index 2a5d7e1f..c01dde13 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -63,6 +63,11 @@ hpa_supported(void) {
 	return true;
 }
 
+static bool
+hpa_peak_demand_tracking_enabled(hpa_shard_t *shard) {
+	return shard->opts.peak_demand_window_ms > 0;
+}
+
 static void
 hpa_do_consistency_checks(hpa_shard_t *shard) {
 	assert(shard->base != NULL);
@@ -217,6 +222,11 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		peak_demand_init(&shard->peak_demand,
+		    shard->opts.peak_demand_window_ms);
+	}
+
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
 	 * initialization failing, we'll at least crash instead of just
@@ -294,8 +304,37 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	return fxp_mul_frac(psset_nactive(&shard->psset),
-	    shard->opts.dirty_mult);
+	/*
+	 * We are trying to estimate maximum amount of active memory we'll
+	 * need in the near future.  We do so by projecting future active
+	 * memory demand (based on peak active memory usage we observed in the
+	 * past within sliding window) and adding slack on top of it (an
+	 * overhead is reasonable to have in exchange of higher hugepages
+	 * coverage).  When peak demand tracking is off, projection of future
+	 * active memory is active memory we are having right now.
+	 *
+	 * Estimation is essentially the same as nactive_max * (1 +
+	 * dirty_mult), but expressed differently to factor in necessary
+	 * implementation details.
+	 */
+	size_t nactive = psset_nactive(&shard->psset);
+	size_t nactive_max = nactive;
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		/*
+		 * We release shard->mtx, when we do a syscall to purge dirty
+		 * memory, so someone might grab shard->mtx, allocate memory
+		 * from this shard and update psset's nactive counter, before
+		 * peak_demand_update(...) was called and we'll get
+		 * peak_demand_nactive_max(...) <= nactive as a result.
+		 */
+		size_t peak = peak_demand_nactive_max(&shard->peak_demand);
+		if (peak > nactive_max) {
+			nactive_max = peak;
+		}
+	}
+	size_t slack = fxp_mul_frac(nactive_max, shard->opts.dirty_mult);
+	size_t estimation = nactive_max + slack;
+	return estimation - nactive;
 }
 
 static bool
@@ -548,6 +587,16 @@ static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
     bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	/* Update active memory demand statistics. */
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		nstime_t now;
+		shard->central->hooks.curtime(&now,
+		    /* first_reading */ true);
+		peak_demand_update(&shard->peak_demand, &now,
+		    psset_nactive(&shard->psset));
+	}
+
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9f4bc785..d08771f8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1568,6 +1568,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
 
+			CONF_HANDLE_UINT64_T(
+			    opt_hpa_opts.peak_demand_window_ms,
+			    "hpa_peak_demand_window_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/peak_demand.c b/src/peak_demand.c
new file mode 100644
index 00000000..49f28930
--- /dev/null
+++ b/src/peak_demand.c
@@ -0,0 +1,74 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/peak_demand.h"
+
+void
+peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms) {
+	assert(interval_ms > 0);
+	peak_demand->epoch = 0;
+	uint64_t interval_ns = interval_ms * 1000 * 1000;
+	peak_demand->epoch_interval_ns = interval_ns / PEAK_DEMAND_NBUCKETS;
+	memset(peak_demand->nactive_max, 0, sizeof(peak_demand->nactive_max));
+}
+
+static uint64_t
+peak_demand_epoch_ind(peak_demand_t *peak_demand) {
+	return peak_demand->epoch % PEAK_DEMAND_NBUCKETS;
+}
+
+static nstime_t
+peak_demand_next_epoch_advance(peak_demand_t *peak_demand) {
+	uint64_t epoch = peak_demand->epoch;
+	uint64_t ns = (epoch + 1) * peak_demand->epoch_interval_ns;
+	nstime_t next;
+	nstime_init(&next, ns);
+	return next;
+}
+
+static uint64_t
+peak_demand_maybe_advance_epoch(peak_demand_t *peak_demand,
+    const nstime_t *now) {
+	nstime_t next_epoch_advance =
+	    peak_demand_next_epoch_advance(peak_demand);
+	if (nstime_compare(now, &next_epoch_advance) < 0) {
+		return peak_demand_epoch_ind(peak_demand);
+	}
+	uint64_t next_epoch = nstime_ns(now) / peak_demand->epoch_interval_ns;
+	assert(next_epoch > peak_demand->epoch);
+	/*
+	 * If we missed more epochs, than capacity of circular buffer
+	 * (PEAK_DEMAND_NBUCKETS), re-write no more than PEAK_DEMAND_NBUCKETS
+	 * items as we don't want to zero out same item multiple times.
+	 */
+	if (peak_demand->epoch + PEAK_DEMAND_NBUCKETS < next_epoch) {
+		peak_demand->epoch = next_epoch - PEAK_DEMAND_NBUCKETS;
+	}
+	while (peak_demand->epoch < next_epoch) {
+		++peak_demand->epoch;
+		uint64_t ind = peak_demand_epoch_ind(peak_demand);
+		peak_demand->nactive_max[ind] = 0;
+	}
+	return peak_demand_epoch_ind(peak_demand);
+}
+
+void
+peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
+    size_t nactive) {
+	uint64_t ind = peak_demand_maybe_advance_epoch(peak_demand, now);
+	size_t *epoch_nactive = &peak_demand->nactive_max[ind];
+	if (nactive > *epoch_nactive) {
+		*epoch_nactive = nactive;
+	}
+}
+
+size_t
+peak_demand_nactive_max(peak_demand_t *peak_demand) {
+	size_t nactive_max = peak_demand->nactive_max[0];
+	for (int i = 1; i < PEAK_DEMAND_NBUCKETS; ++i) {
+		if (peak_demand->nactive_max[i] > nactive_max) {
+			nactive_max = peak_demand->nactive_max[i];
+		}
+	}
+	return nactive_max;
+}
diff --git a/src/stats.c b/src/stats.c
index 58874bf8..bd0167fb 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1657,6 +1657,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
+	OPT_WRITE_UINT64("hpa_peak_demand_window_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 6c42729a..ceed9bd8 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,26 +37,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1
-};
-
-static hpa_shard_opts_t test_hpa_shard_opts_purge = {
-	/* slab_max_alloc */
-	HUGEPAGE,
-	/* hugification_threshold */
-	0.9 * HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(11),
-	/* deferral_allowed */
-	true,
-	/* hugify_delay_ms */
-	0,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
+	-1,
+	/* peak_demand_window_ms */
+	0
 };
 
 static hpa_shard_t *
@@ -480,8 +463,14 @@ TEST_END
 TEST_BEGIN(test_purge_no_infinite_loop) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_purge);
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.slab_max_alloc = HUGEPAGE;
+	opts.hugification_threshold = 0.9 * HUGEPAGE;
+	opts.dirty_mult = FXP_INIT_PERCENT(11);
+	opts.deferral_allowed = true;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default, &opts);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	/*
@@ -489,8 +478,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 	 * criteria for huge page and at the same time do not allow hugify page
 	 * without triggering a purge.
 	 */
-	const size_t npages =
-	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
+	const size_t npages = opts.hugification_threshold / PAGE + 1;
 	const size_t size = npages * PAGE;
 
 	bool deferred_work_generated = false;
@@ -733,6 +721,140 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 }
 TEST_END
 
+TEST_BEGIN(test_demand_purge_slack) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	/* Allow 10% of slack. */
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	/* Peak demand sliding window duration is 10 seconds. */
+	opts.peak_demand_window_ms = 10 * 1000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	/* Deallocate 5 hugepages out of 16. */
+	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Peak demand within sliding window is 16 hugepages, so we don't need
+	 * to purge anything just yet.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * 12 seconds passed now, peak demand is 11 hugepages, we allowed to
+	 * keep 11 * 0.1 (hpa_dirty_mult) = 1.1 dirty hugepages, but we
+	 * have 5 dirty hugepages, so we should purge 4 of them.
+	 */
+	expect_zu_eq(4, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_demand_purge_tight) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	/* No slack allowed. */
+	opts.dirty_mult = FXP_INIT_PERCENT(0);
+	/* Peak demand sliding window duration is 10 seconds. */
+	opts.peak_demand_window_ms = 10 * 1000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	/* Deallocate 5 hugepages out of 16. */
+	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Peak demand within sliding window is 16 hugepages, to purge anything
+	 * just yet.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * 12 seconds passed now, peak demand is 11 hugepages.  We have
+	 * hpa_dirty_mult = 0, so we allowed to keep 11 * 0 = 0 dirty
+	 * hugepages, but we have 5, all of them should be purged.
+	 */
+	expect_zu_eq(5, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -756,5 +878,7 @@ main(void) {
 	    test_no_min_purge_interval,
 	    test_min_purge_interval,
 	    test_purge,
-	    test_experimental_max_purge_nhp);
+	    test_experimental_max_purge_nhp,
+	    test_demand_purge_slack,
+	    test_demand_purge_tight);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 57aa59e5..366b992b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -295,6 +295,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
+	TEST_MALLCTL_OPT(uint64_t, hpa_peak_demand_window_ms, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/peak_demand.c b/test/unit/peak_demand.c
new file mode 100644
index 00000000..ca2506b8
--- /dev/null
+++ b/test/unit/peak_demand.c
@@ -0,0 +1,162 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/peak_demand.h"
+
+TEST_BEGIN(test_peak_demand_init) {
+	peak_demand_t peak_demand;
+	/*
+	 * Exact value doesn't matter here as we don't advance epoch in this
+	 * test.
+	 */
+	uint64_t interval_ms = 1000;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 0,
+	    "Unexpected ndirty_max value after initialization");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_basic) {
+	peak_demand_t peak_demand;
+	/* Make each bucket exactly one second to simplify math. */
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 1, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 1024, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_skip_epochs) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ PEAK_DEMAND_NBUCKETS - 1, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2 * (PEAK_DEMAND_NBUCKETS - 1),
+	    /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	/*
+	 * Updates are not evenly spread over time.  When we update at
+	 * 2 * (PEAK_DEMAND_NBUCKETS - 1) second, 1024 value is already out of
+	 * sliding window, but 512 is still present.
+	 */
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_rewrite_optimization) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ UINT64_MAX);
+	/*
+	 * This update should take reasonable time if optimization is working
+	 * correctly, otherwise we'll loop from 0 to UINT64_MAX and this test
+	 * will take a long time to finish.
+	 */
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_out_of_interval) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 1 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	/*
+	 * Updates frequency is lower than tracking interval, so we should
+	 * have only last value.
+	 */
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 256, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_static_epoch) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+	nstime_init_zero(&now);
+
+	/* Big enough value to overwrite values in circular buffer. */
+	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
+	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
+		/*
+		 * We should override value in the same bucket as now value
+		 * doesn't change between iterations.
+		 */
+		peak_demand_update(&peak_demand, &now, nactive);
+	}
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_epoch_advance) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+	/* Big enough value to overwrite values in circular buffer. */
+	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
+	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
+		uint64_t sec = nactive;
+		nstime_init2(&now, sec, /* nsec */ 0);
+		peak_demand_update(&peak_demand, &now, nactive);
+	}
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_peak_demand_init,
+	    test_peak_demand_update_basic,
+	    test_peak_demand_update_skip_epochs,
+	    test_peak_demand_update_rewrite_optimization,
+	    test_peak_demand_update_out_of_interval,
+	    test_peak_demand_update_static_epoch,
+	    test_peak_demand_update_epoch_advance);
+}